summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdkfd
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-03-25 02:19:43 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2022-03-25 02:19:43 +0300
commitb14ffae378aa1db993e62b01392e70d1e585fb23 (patch)
tree0ac179d24e8a62ec4c2732ed18d90d83da4b82d7 /drivers/gpu/drm/amd/amdkfd
parent52deda9551a01879b3562e7b41748e85c591f14c (diff)
parentc6e90a1c660874736bd09c1fec6312b4b4c2ff7b (diff)
downloadlinux-b14ffae378aa1db993e62b01392e70d1e585fb23.tar.xz
Merge tag 'drm-next-2022-03-24' of git://anongit.freedesktop.org/drm/drm
Pull drm updates from Dave Airlie: "Lots of work all over, Intel improving DG2 support, amdkfd CRIU support, msm new hw support, and faster fbdev support. dma-buf: - rename dma-buf-map to iosys-map core: - move buddy allocator to core - add pci/platform init macros - improve EDID parser deep color handling - EDID timing type 7 support - add GPD Win Max quirk - add yes/no helpers to string_helpers - flatten syncobj chains - add nomodeset support to lots of drivers - improve fb-helper clipping support - add default property value interface fbdev: - improve fbdev ops speed ttm: - add a backpointer from ttm bo->ttm resource dp: - move displayport headers - add a dp helper module bridge: - anx7625 atomic support, HDCP support panel: - split out panel-lvds and lvds bindings - find panels in OF subnodes privacy: - add chromeos privacy screen support fb: - hot unplug fw fb on forced removal simpledrm: - request region instead of marking ioresource busy - add panel oreintation property udmabuf: - fix oops with 0 pages amdgpu: - power management code cleanup - Enable freesync video mode by default - RAS code cleanup - Improve VRAM access for debug using SDMA - SR-IOV rework special register access and fixes - profiling power state request ioctl - expose IP discovery via sysfs - Cyan skillfish updates - GC 10.3.7, SDMA 5.2.7, DCN 3.1.6 updates - expose benchmark tests via debugfs - add module param to disable XGMI for testing - GPU reset debugfs register dumping support amdkfd: - CRIU support - SDMA queue fixes radeon: - UVD suspend fix - iMac backlight fix i915: - minimal parallel submission for execlists - DG2-G12 subplatform added - DG2 programming workarounds - DG2 accelerated migration support - flat CCS and CCS engine support for XeHP - initial small BAR support - drop fake LMEM support - ADL-N PCH support - bigjoiner updates - introduce VMA resources and async unbinding - register definitions cleanups - multi-FBC refactoring - DG1 OPROM over SPI support - ADL-N platform enabling - opregion mailbox #5 support - DP MST ESI improvements - drm device based logging - async flip optimisation for DG2 - CPU arch abstraction fixes - improve GuC ADS init to work on aarch64 - tweak TTM LRU priority hint - GuC 69.0.3 support - remove short term execbuf pins nouveau: - higher DP/eDP bitrates - backlight fixes msm: - dpu + dp support for sc8180x - dp support for sm8350 - dpu + dsi support for qcm2290 - 10nm dsi phy tuning support - bridge support for dp encoder - gpu support for additional 7c3 SKUs ingenic: - HDMI support for JZ4780 - aux channel EDID support ast: - AST2600 support - add wide screen support - create DP/DVI connectors omapdrm: - fix implicit dma_buf fencing vc4: - add CSC + full range support - better display firmware handoff panfrost: - add initial dual-core GPU support stm: - new revision support - fb handover support mediatek: - transfer display binding document to yaml format. - add mt8195 display device binding. - allow commands to be sent during video mode. - add wait_for_event for crtc disable by cmdq. tegra: - YUV format support rcar-du: - LVDS support for M3-W+ (R8A77961) exynos: - BGR pixel format for FIMD device" * tag 'drm-next-2022-03-24' of git://anongit.freedesktop.org/drm/drm: (1529 commits) drm/i915/display: Do not re-enable PSR after it was marked as not reliable drm/i915/display: Fix HPD short pulse handling for eDP drm/amdgpu: Use drm_mode_copy() drm/radeon: Use drm_mode_copy() drm/amdgpu: Use ternary operator in `vcn_v1_0_start()` drm/amdgpu: Remove pointless on stack mode copies drm/amd/pm: fix indenting in __smu_cmn_reg_print_error() drm/amdgpu/dc: fix typos in comments drm/amdgpu: fix typos in comments drm/amd/pm: fix typos in comments drm/amdgpu: Add stolen reserved memory for MI25 SRIOV. drm/amdgpu: Merge get_reserved_allocation to get_vbios_allocations. drm/amdkfd: evict svm bo worker handle error drm/amdgpu/vcn: fix vcn ring test failure in igt reload test drm/amdgpu: only allow secure submission on rings which support that drm/amdgpu: fixed the warnings reported by kernel test robot drm/amd/display: 3.2.177 drm/amd/display: [FW Promotion] Release 0.0.108.0 drm/amd/display: Add save/restore PANEL_PWRSEQ_REF_DIV2 drm/amd/display: Wait for hubp read line for Pollock ...
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/Makefile2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c1585
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.c8
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.h5
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c845
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h230
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c158
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h293
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c5
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c67
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c274
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h62
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c12
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_events.c316
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_events.h3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c5
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c11
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c11
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_iommu.c15
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_iommu.h3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_migrate.c53
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_module.c3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c67
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h44
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c126
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c155
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c175
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c151
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c11
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c52
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c29
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pasid.c3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_diq.h3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h7
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pm4_opcodes.h3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h196
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c95
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c375
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_queue.c3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c77
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.c467
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.h39
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c9
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.h3
55 files changed, 3426 insertions, 2662 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile
index c4f3aff11072..19cfbf9577b4 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -51,8 +51,6 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \
$(AMDKFD_PATH)/kfd_events.o \
$(AMDKFD_PATH)/cik_event_interrupt.o \
$(AMDKFD_PATH)/kfd_int_process_v9.o \
- $(AMDKFD_PATH)/kfd_dbgdev.o \
- $(AMDKFD_PATH)/kfd_dbgmgr.o \
$(AMDKFD_PATH)/kfd_smi_events.o \
$(AMDKFD_PATH)/kfd_crat.o
diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
index d60576ce10cd..5c8023cba196 100644
--- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
@@ -110,7 +110,7 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev,
struct kfd_vm_fault_info info;
kfd_smi_event_update_vmfault(dev, pasid);
- kfd_process_vm_fault(dev->dqm, pasid);
+ kfd_dqm_evict_pasid(dev->dqm, pasid);
memset(&info, 0, sizeof(info));
amdgpu_amdkfd_gpuvm_get_vm_fault_info(dev->adev, &info);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 4bfc0c8ab764..607f65ab39ac 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -33,14 +34,16 @@
#include <linux/time.h>
#include <linux/mm.h>
#include <linux/mman.h>
+#include <linux/ptrace.h>
#include <linux/dma-buf.h>
-#include <asm/processor.h>
+#include <linux/fdtable.h>
+#include <linux/processor.h>
#include "kfd_priv.h"
#include "kfd_device_queue_manager.h"
-#include "kfd_dbgmgr.h"
#include "kfd_svm.h"
#include "amdgpu_amdkfd.h"
#include "kfd_smi_events.h"
+#include "amdgpu_dma_buf.h"
static long kfd_ioctl(struct file *, unsigned int, unsigned long);
static int kfd_open(struct inode *, struct file *);
@@ -101,11 +104,6 @@ void kfd_chardev_exit(void)
kfd_device = NULL;
}
-struct device *kfd_chardev(void)
-{
- return kfd_device;
-}
-
static int kfd_open(struct inode *inode, struct file *filep)
{
@@ -292,14 +290,17 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
return err;
pr_debug("Looking for gpu id 0x%x\n", args->gpu_id);
- dev = kfd_device_by_id(args->gpu_id);
- if (!dev) {
- pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
- return -EINVAL;
- }
mutex_lock(&p->mutex);
+ pdd = kfd_process_device_data_by_id(p, args->gpu_id);
+ if (!pdd) {
+ pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
+ err = -EINVAL;
+ goto err_pdd;
+ }
+ dev = pdd->dev;
+
pdd = kfd_bind_process_to_device(dev, p);
if (IS_ERR(pdd)) {
err = -ESRCH;
@@ -310,7 +311,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
p->pasid,
dev->id);
- err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id,
+ err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id, NULL, NULL, NULL,
&doorbell_offset_in_process);
if (err != 0)
goto err_create_queue;
@@ -344,6 +345,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
err_create_queue:
err_bind_process:
+err_pdd:
mutex_unlock(&p->mutex);
return err;
}
@@ -490,7 +492,6 @@ static int kfd_ioctl_set_memory_policy(struct file *filep,
struct kfd_process *p, void *data)
{
struct kfd_ioctl_set_memory_policy_args *args = data;
- struct kfd_dev *dev;
int err = 0;
struct kfd_process_device *pdd;
enum cache_policy default_policy, alternate_policy;
@@ -505,13 +506,15 @@ static int kfd_ioctl_set_memory_policy(struct file *filep,
return -EINVAL;
}
- dev = kfd_device_by_id(args->gpu_id);
- if (!dev)
- return -EINVAL;
-
mutex_lock(&p->mutex);
+ pdd = kfd_process_device_data_by_id(p, args->gpu_id);
+ if (!pdd) {
+ pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
+ err = -EINVAL;
+ goto err_pdd;
+ }
- pdd = kfd_bind_process_to_device(dev, p);
+ pdd = kfd_bind_process_to_device(pdd->dev, p);
if (IS_ERR(pdd)) {
err = -ESRCH;
goto out;
@@ -524,7 +527,7 @@ static int kfd_ioctl_set_memory_policy(struct file *filep,
(args->alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT)
? cache_policy_coherent : cache_policy_noncoherent;
- if (!dev->dqm->ops.set_cache_memory_policy(dev->dqm,
+ if (!pdd->dev->dqm->ops.set_cache_memory_policy(pdd->dev->dqm,
&pdd->qpd,
default_policy,
alternate_policy,
@@ -533,6 +536,7 @@ static int kfd_ioctl_set_memory_policy(struct file *filep,
err = -EINVAL;
out:
+err_pdd:
mutex_unlock(&p->mutex);
return err;
@@ -542,17 +546,18 @@ static int kfd_ioctl_set_trap_handler(struct file *filep,
struct kfd_process *p, void *data)
{
struct kfd_ioctl_set_trap_handler_args *args = data;
- struct kfd_dev *dev;
int err = 0;
struct kfd_process_device *pdd;
- dev = kfd_device_by_id(args->gpu_id);
- if (!dev)
- return -EINVAL;
-
mutex_lock(&p->mutex);
- pdd = kfd_bind_process_to_device(dev, p);
+ pdd = kfd_process_device_data_by_id(p, args->gpu_id);
+ if (!pdd) {
+ err = -EINVAL;
+ goto err_pdd;
+ }
+
+ pdd = kfd_bind_process_to_device(pdd->dev, p);
if (IS_ERR(pdd)) {
err = -ESRCH;
goto out;
@@ -561,6 +566,7 @@ static int kfd_ioctl_set_trap_handler(struct file *filep,
kfd_process_set_trap_handler(&pdd->qpd, args->tba_addr, args->tma_addr);
out:
+err_pdd:
mutex_unlock(&p->mutex);
return err;
@@ -569,289 +575,40 @@ out:
static int kfd_ioctl_dbg_register(struct file *filep,
struct kfd_process *p, void *data)
{
- struct kfd_ioctl_dbg_register_args *args = data;
- struct kfd_dev *dev;
- struct kfd_dbgmgr *dbgmgr_ptr;
- struct kfd_process_device *pdd;
- bool create_ok;
- long status = 0;
-
- dev = kfd_device_by_id(args->gpu_id);
- if (!dev)
- return -EINVAL;
-
- if (dev->adev->asic_type == CHIP_CARRIZO) {
- pr_debug("kfd_ioctl_dbg_register not supported on CZ\n");
- return -EINVAL;
- }
-
- mutex_lock(&p->mutex);
- mutex_lock(kfd_get_dbgmgr_mutex());
-
- /*
- * make sure that we have pdd, if this the first queue created for
- * this process
- */
- pdd = kfd_bind_process_to_device(dev, p);
- if (IS_ERR(pdd)) {
- status = PTR_ERR(pdd);
- goto out;
- }
-
- if (!dev->dbgmgr) {
- /* In case of a legal call, we have no dbgmgr yet */
- create_ok = kfd_dbgmgr_create(&dbgmgr_ptr, dev);
- if (create_ok) {
- status = kfd_dbgmgr_register(dbgmgr_ptr, p);
- if (status != 0)
- kfd_dbgmgr_destroy(dbgmgr_ptr);
- else
- dev->dbgmgr = dbgmgr_ptr;
- }
- } else {
- pr_debug("debugger already registered\n");
- status = -EINVAL;
- }
-
-out:
- mutex_unlock(kfd_get_dbgmgr_mutex());
- mutex_unlock(&p->mutex);
-
- return status;
+ return -EPERM;
}
static int kfd_ioctl_dbg_unregister(struct file *filep,
struct kfd_process *p, void *data)
{
- struct kfd_ioctl_dbg_unregister_args *args = data;
- struct kfd_dev *dev;
- long status;
-
- dev = kfd_device_by_id(args->gpu_id);
- if (!dev || !dev->dbgmgr)
- return -EINVAL;
-
- if (dev->adev->asic_type == CHIP_CARRIZO) {
- pr_debug("kfd_ioctl_dbg_unregister not supported on CZ\n");
- return -EINVAL;
- }
-
- mutex_lock(kfd_get_dbgmgr_mutex());
-
- status = kfd_dbgmgr_unregister(dev->dbgmgr, p);
- if (!status) {
- kfd_dbgmgr_destroy(dev->dbgmgr);
- dev->dbgmgr = NULL;
- }
-
- mutex_unlock(kfd_get_dbgmgr_mutex());
-
- return status;
+ return -EPERM;
}
-/*
- * Parse and generate variable size data structure for address watch.
- * Total size of the buffer and # watch points is limited in order
- * to prevent kernel abuse. (no bearing to the much smaller HW limitation
- * which is enforced by dbgdev module)
- * please also note that the watch address itself are not "copied from user",
- * since it be set into the HW in user mode values.
- *
- */
static int kfd_ioctl_dbg_address_watch(struct file *filep,
struct kfd_process *p, void *data)
{
- struct kfd_ioctl_dbg_address_watch_args *args = data;
- struct kfd_dev *dev;
- struct dbg_address_watch_info aw_info;
- unsigned char *args_buff;
- long status;
- void __user *cmd_from_user;
- uint64_t watch_mask_value = 0;
- unsigned int args_idx = 0;
-
- memset((void *) &aw_info, 0, sizeof(struct dbg_address_watch_info));
-
- dev = kfd_device_by_id(args->gpu_id);
- if (!dev)
- return -EINVAL;
-
- if (dev->adev->asic_type == CHIP_CARRIZO) {
- pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
- return -EINVAL;
- }
-
- cmd_from_user = (void __user *) args->content_ptr;
-
- /* Validate arguments */
-
- if ((args->buf_size_in_bytes > MAX_ALLOWED_AW_BUFF_SIZE) ||
- (args->buf_size_in_bytes <= sizeof(*args) + sizeof(int) * 2) ||
- (cmd_from_user == NULL))
- return -EINVAL;
-
- /* this is the actual buffer to work with */
- args_buff = memdup_user(cmd_from_user,
- args->buf_size_in_bytes - sizeof(*args));
- if (IS_ERR(args_buff))
- return PTR_ERR(args_buff);
-
- aw_info.process = p;
-
- aw_info.num_watch_points = *((uint32_t *)(&args_buff[args_idx]));
- args_idx += sizeof(aw_info.num_watch_points);
-
- aw_info.watch_mode = (enum HSA_DBG_WATCH_MODE *) &args_buff[args_idx];
- args_idx += sizeof(enum HSA_DBG_WATCH_MODE) * aw_info.num_watch_points;
-
- /*
- * set watch address base pointer to point on the array base
- * within args_buff
- */
- aw_info.watch_address = (uint64_t *) &args_buff[args_idx];
-
- /* skip over the addresses buffer */
- args_idx += sizeof(aw_info.watch_address) * aw_info.num_watch_points;
-
- if (args_idx >= args->buf_size_in_bytes - sizeof(*args)) {
- status = -EINVAL;
- goto out;
- }
-
- watch_mask_value = (uint64_t) args_buff[args_idx];
-
- if (watch_mask_value > 0) {
- /*
- * There is an array of masks.
- * set watch mask base pointer to point on the array base
- * within args_buff
- */
- aw_info.watch_mask = (uint64_t *) &args_buff[args_idx];
-
- /* skip over the masks buffer */
- args_idx += sizeof(aw_info.watch_mask) *
- aw_info.num_watch_points;
- } else {
- /* just the NULL mask, set to NULL and skip over it */
- aw_info.watch_mask = NULL;
- args_idx += sizeof(aw_info.watch_mask);
- }
-
- if (args_idx >= args->buf_size_in_bytes - sizeof(args)) {
- status = -EINVAL;
- goto out;
- }
-
- /* Currently HSA Event is not supported for DBG */
- aw_info.watch_event = NULL;
-
- mutex_lock(kfd_get_dbgmgr_mutex());
-
- status = kfd_dbgmgr_address_watch(dev->dbgmgr, &aw_info);
-
- mutex_unlock(kfd_get_dbgmgr_mutex());
-
-out:
- kfree(args_buff);
-
- return status;
+ return -EPERM;
}
/* Parse and generate fixed size data structure for wave control */
static int kfd_ioctl_dbg_wave_control(struct file *filep,
struct kfd_process *p, void *data)
{
- struct kfd_ioctl_dbg_wave_control_args *args = data;
- struct kfd_dev *dev;
- struct dbg_wave_control_info wac_info;
- unsigned char *args_buff;
- uint32_t computed_buff_size;
- long status;
- void __user *cmd_from_user;
- unsigned int args_idx = 0;
-
- memset((void *) &wac_info, 0, sizeof(struct dbg_wave_control_info));
-
- /* we use compact form, independent of the packing attribute value */
- computed_buff_size = sizeof(*args) +
- sizeof(wac_info.mode) +
- sizeof(wac_info.operand) +
- sizeof(wac_info.dbgWave_msg.DbgWaveMsg) +
- sizeof(wac_info.dbgWave_msg.MemoryVA) +
- sizeof(wac_info.trapId);
-
- dev = kfd_device_by_id(args->gpu_id);
- if (!dev)
- return -EINVAL;
-
- if (dev->adev->asic_type == CHIP_CARRIZO) {
- pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
- return -EINVAL;
- }
-
- /* input size must match the computed "compact" size */
- if (args->buf_size_in_bytes != computed_buff_size) {
- pr_debug("size mismatch, computed : actual %u : %u\n",
- args->buf_size_in_bytes, computed_buff_size);
- return -EINVAL;
- }
-
- cmd_from_user = (void __user *) args->content_ptr;
-
- if (cmd_from_user == NULL)
- return -EINVAL;
-
- /* copy the entire buffer from user */
-
- args_buff = memdup_user(cmd_from_user,
- args->buf_size_in_bytes - sizeof(*args));
- if (IS_ERR(args_buff))
- return PTR_ERR(args_buff);
-
- /* move ptr to the start of the "pay-load" area */
- wac_info.process = p;
-
- wac_info.operand = *((enum HSA_DBG_WAVEOP *)(&args_buff[args_idx]));
- args_idx += sizeof(wac_info.operand);
-
- wac_info.mode = *((enum HSA_DBG_WAVEMODE *)(&args_buff[args_idx]));
- args_idx += sizeof(wac_info.mode);
-
- wac_info.trapId = *((uint32_t *)(&args_buff[args_idx]));
- args_idx += sizeof(wac_info.trapId);
-
- wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value =
- *((uint32_t *)(&args_buff[args_idx]));
- wac_info.dbgWave_msg.MemoryVA = NULL;
-
- mutex_lock(kfd_get_dbgmgr_mutex());
-
- pr_debug("Calling dbg manager process %p, operand %u, mode %u, trapId %u, message %u\n",
- wac_info.process, wac_info.operand,
- wac_info.mode, wac_info.trapId,
- wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
-
- status = kfd_dbgmgr_wave_control(dev->dbgmgr, &wac_info);
-
- pr_debug("Returned status of dbg manager is %ld\n", status);
-
- mutex_unlock(kfd_get_dbgmgr_mutex());
-
- kfree(args_buff);
-
- return status;
+ return -EPERM;
}
static int kfd_ioctl_get_clock_counters(struct file *filep,
struct kfd_process *p, void *data)
{
struct kfd_ioctl_get_clock_counters_args *args = data;
- struct kfd_dev *dev;
+ struct kfd_process_device *pdd;
- dev = kfd_device_by_id(args->gpu_id);
- if (dev)
+ mutex_lock(&p->mutex);
+ pdd = kfd_process_device_data_by_id(p, args->gpu_id);
+ mutex_unlock(&p->mutex);
+ if (pdd)
/* Reading GPU clock counter from KGD */
- args->gpu_clock_counter = amdgpu_amdkfd_get_gpu_clock_counter(dev->adev);
+ args->gpu_clock_counter = amdgpu_amdkfd_get_gpu_clock_counter(pdd->dev->adev);
else
/* Node without GPU resource */
args->gpu_clock_counter = 0;
@@ -1007,57 +764,11 @@ static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
* through the event_page_offset field.
*/
if (args->event_page_offset) {
- struct kfd_dev *kfd;
- struct kfd_process_device *pdd;
- void *mem, *kern_addr;
- uint64_t size;
-
- kfd = kfd_device_by_id(GET_GPU_ID(args->event_page_offset));
- if (!kfd) {
- pr_err("Getting device by id failed in %s\n", __func__);
- return -EINVAL;
- }
-
mutex_lock(&p->mutex);
-
- if (p->signal_page) {
- pr_err("Event page is already set\n");
- err = -EINVAL;
- goto out_unlock;
- }
-
- pdd = kfd_bind_process_to_device(kfd, p);
- if (IS_ERR(pdd)) {
- err = PTR_ERR(pdd);
- goto out_unlock;
- }
-
- mem = kfd_process_device_translate_handle(pdd,
- GET_IDR_HANDLE(args->event_page_offset));
- if (!mem) {
- pr_err("Can't find BO, offset is 0x%llx\n",
- args->event_page_offset);
- err = -EINVAL;
- goto out_unlock;
- }
-
- err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kfd->adev,
- mem, &kern_addr, &size);
- if (err) {
- pr_err("Failed to map event page to kernel\n");
- goto out_unlock;
- }
-
- err = kfd_event_page_set(p, kern_addr, size);
- if (err) {
- pr_err("Failed to set event page\n");
- amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(kfd->adev, mem);
- goto out_unlock;
- }
-
- p->signal_handle = args->event_page_offset;
-
+ err = kfd_kmap_event_page(p, args->event_page_offset);
mutex_unlock(&p->mutex);
+ if (err)
+ return err;
}
err = kfd_event_create(filp, p, args->event_type,
@@ -1066,10 +777,7 @@ static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
&args->event_page_offset,
&args->event_slot_index);
- return err;
-
-out_unlock:
- mutex_unlock(&p->mutex);
+ pr_debug("Created event (id:0x%08x) (%s)\n", args->event_id, __func__);
return err;
}
@@ -1118,11 +826,13 @@ static int kfd_ioctl_set_scratch_backing_va(struct file *filep,
struct kfd_dev *dev;
long err;
- dev = kfd_device_by_id(args->gpu_id);
- if (!dev)
- return -EINVAL;
-
mutex_lock(&p->mutex);
+ pdd = kfd_process_device_data_by_id(p, args->gpu_id);
+ if (!pdd) {
+ err = -EINVAL;
+ goto err_pdd;
+ }
+ dev = pdd->dev;
pdd = kfd_bind_process_to_device(dev, p);
if (IS_ERR(pdd)) {
@@ -1142,6 +852,7 @@ static int kfd_ioctl_set_scratch_backing_va(struct file *filep,
return 0;
bind_process_to_device_fail:
+err_pdd:
mutex_unlock(&p->mutex);
return err;
}
@@ -1150,15 +861,17 @@ static int kfd_ioctl_get_tile_config(struct file *filep,
struct kfd_process *p, void *data)
{
struct kfd_ioctl_get_tile_config_args *args = data;
- struct kfd_dev *dev;
+ struct kfd_process_device *pdd;
struct tile_config config;
int err = 0;
- dev = kfd_device_by_id(args->gpu_id);
- if (!dev)
+ mutex_lock(&p->mutex);
+ pdd = kfd_process_device_data_by_id(p, args->gpu_id);
+ mutex_unlock(&p->mutex);
+ if (!pdd)
return -EINVAL;
- amdgpu_amdkfd_get_tile_config(dev->adev, &config);
+ amdgpu_amdkfd_get_tile_config(pdd->dev->adev, &config);
args->gb_addr_config = config.gb_addr_config;
args->num_banks = config.num_banks;
@@ -1193,40 +906,37 @@ static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p,
{
struct kfd_ioctl_acquire_vm_args *args = data;
struct kfd_process_device *pdd;
- struct kfd_dev *dev;
struct file *drm_file;
int ret;
- dev = kfd_device_by_id(args->gpu_id);
- if (!dev)
- return -EINVAL;
-
drm_file = fget(args->drm_fd);
if (!drm_file)
return -EINVAL;
mutex_lock(&p->mutex);
-
- pdd = kfd_get_process_device_data(dev, p);
+ pdd = kfd_process_device_data_by_id(p, args->gpu_id);
if (!pdd) {
ret = -EINVAL;
- goto err_unlock;
+ goto err_pdd;
}
if (pdd->drm_file) {
ret = pdd->drm_file == drm_file ? 0 : -EBUSY;
- goto err_unlock;
+ goto err_drm_file;
}
ret = kfd_process_device_init_vm(pdd, drm_file);
if (ret)
goto err_unlock;
+
/* On success, the PDD keeps the drm_file reference */
mutex_unlock(&p->mutex);
return 0;
err_unlock:
+err_pdd:
+err_drm_file:
mutex_unlock(&p->mutex);
fput(drm_file);
return ret;
@@ -1283,19 +993,23 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
}
mutex_unlock(&p->svms.lock);
#endif
- dev = kfd_device_by_id(args->gpu_id);
- if (!dev)
- return -EINVAL;
+ mutex_lock(&p->mutex);
+ pdd = kfd_process_device_data_by_id(p, args->gpu_id);
+ if (!pdd) {
+ err = -EINVAL;
+ goto err_pdd;
+ }
+
+ dev = pdd->dev;
if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) &&
(flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) &&
!kfd_dev_is_large_bar(dev)) {
pr_err("Alloc host visible vram on small bar is not allowed\n");
- return -EINVAL;
+ err = -EINVAL;
+ goto err_large_bar;
}
- mutex_lock(&p->mutex);
-
pdd = kfd_bind_process_to_device(dev, p);
if (IS_ERR(pdd)) {
err = PTR_ERR(pdd);
@@ -1323,7 +1037,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
dev->adev, args->va_addr, args->size,
pdd->drm_priv, (struct kgd_mem **) &mem, &offset,
- flags);
+ flags, false);
if (err)
goto err_unlock;
@@ -1356,6 +1070,8 @@ err_free:
amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->adev, (struct kgd_mem *)mem,
pdd->drm_priv, NULL);
err_unlock:
+err_pdd:
+err_large_bar:
mutex_unlock(&p->mutex);
return err;
}
@@ -1366,14 +1082,9 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
struct kfd_ioctl_free_memory_of_gpu_args *args = data;
struct kfd_process_device *pdd;
void *mem;
- struct kfd_dev *dev;
int ret;
uint64_t size = 0;
- dev = kfd_device_by_id(GET_GPU_ID(args->handle));
- if (!dev)
- return -EINVAL;
-
mutex_lock(&p->mutex);
/*
* Safeguard to prevent user space from freeing signal BO.
@@ -1385,11 +1096,11 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
goto err_unlock;
}
- pdd = kfd_get_process_device_data(dev, p);
+ pdd = kfd_process_device_data_by_id(p, GET_GPU_ID(args->handle));
if (!pdd) {
pr_err("Process device data doesn't exist\n");
ret = -EINVAL;
- goto err_unlock;
+ goto err_pdd;
}
mem = kfd_process_device_translate_handle(
@@ -1399,7 +1110,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
goto err_unlock;
}
- ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->adev,
+ ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev,
(struct kgd_mem *)mem, pdd->drm_priv, &size);
/* If freeing the buffer failed, leave the handle in place for
@@ -1412,26 +1123,31 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
WRITE_ONCE(pdd->vram_usage, pdd->vram_usage - size);
err_unlock:
+err_pdd:
mutex_unlock(&p->mutex);
return ret;
}
+static bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev)
+{
+ return KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2) ||
+ (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 1) &&
+ dev->adev->sdma.instance[0].fw_version >= 18) ||
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 0);
+}
+
static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
struct kfd_process *p, void *data)
{
struct kfd_ioctl_map_memory_to_gpu_args *args = data;
struct kfd_process_device *pdd, *peer_pdd;
void *mem;
- struct kfd_dev *dev, *peer;
+ struct kfd_dev *dev;
long err = 0;
int i;
uint32_t *devices_arr = NULL;
bool table_freed = false;
- dev = kfd_device_by_id(GET_GPU_ID(args->handle));
- if (!dev)
- return -EINVAL;
-
if (!args->n_devices) {
pr_debug("Device IDs array empty\n");
return -EINVAL;
@@ -1455,6 +1171,12 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
}
mutex_lock(&p->mutex);
+ pdd = kfd_process_device_data_by_id(p, GET_GPU_ID(args->handle));
+ if (!pdd) {
+ err = -EINVAL;
+ goto get_process_device_data_failed;
+ }
+ dev = pdd->dev;
pdd = kfd_bind_process_to_device(dev, p);
if (IS_ERR(pdd)) {
@@ -1470,25 +1192,33 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
}
for (i = args->n_success; i < args->n_devices; i++) {
- peer = kfd_device_by_id(devices_arr[i]);
- if (!peer) {
+ peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);
+ if (!peer_pdd) {
pr_debug("Getting device by id failed for 0x%x\n",
devices_arr[i]);
err = -EINVAL;
goto get_mem_obj_from_handle_failed;
}
- peer_pdd = kfd_bind_process_to_device(peer, p);
+ peer_pdd = kfd_bind_process_to_device(peer_pdd->dev, p);
if (IS_ERR(peer_pdd)) {
err = PTR_ERR(peer_pdd);
goto get_mem_obj_from_handle_failed;
}
+
err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
- peer->adev, (struct kgd_mem *)mem,
+ peer_pdd->dev->adev, (struct kgd_mem *)mem,
peer_pdd->drm_priv, &table_freed);
if (err) {
- pr_err("Failed to map to gpu %d/%d\n",
- i, args->n_devices);
+ struct pci_dev *pdev = peer_pdd->dev->adev->pdev;
+
+ dev_err(dev->adev->dev,
+ "Failed to map peer:%04x:%02x:%02x.%d mem_domain:%d\n",
+ pci_domain_nr(pdev->bus),
+ pdev->bus->number,
+ PCI_SLOT(pdev->devfn),
+ PCI_FUNC(pdev->devfn),
+ ((struct kgd_mem *)mem)->domain);
goto map_memory_to_gpu_failed;
}
args->n_success = i+1;
@@ -1503,12 +1233,9 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
}
/* Flush TLBs after waiting for the page table updates to complete */
- if (table_freed) {
+ if (table_freed || !kfd_flush_tlb_after_unmap(dev)) {
for (i = 0; i < args->n_devices; i++) {
- peer = kfd_device_by_id(devices_arr[i]);
- if (WARN_ON_ONCE(!peer))
- continue;
- peer_pdd = kfd_get_process_device_data(peer, p);
+ peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);
if (WARN_ON_ONCE(!peer_pdd))
continue;
kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY);
@@ -1518,6 +1245,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
return err;
+get_process_device_data_failed:
bind_process_to_device_failed:
get_mem_obj_from_handle_failed:
map_memory_to_gpu_failed:
@@ -1535,14 +1263,9 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
struct kfd_ioctl_unmap_memory_from_gpu_args *args = data;
struct kfd_process_device *pdd, *peer_pdd;
void *mem;
- struct kfd_dev *dev, *peer;
long err = 0;
uint32_t *devices_arr = NULL, i;
- dev = kfd_device_by_id(GET_GPU_ID(args->handle));
- if (!dev)
- return -EINVAL;
-
if (!args->n_devices) {
pr_debug("Device IDs array empty\n");
return -EINVAL;
@@ -1566,8 +1289,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
}
mutex_lock(&p->mutex);
-
- pdd = kfd_get_process_device_data(dev, p);
+ pdd = kfd_process_device_data_by_id(p, GET_GPU_ID(args->handle));
if (!pdd) {
err = -EINVAL;
goto bind_process_to_device_failed;
@@ -1581,19 +1303,13 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
}
for (i = args->n_success; i < args->n_devices; i++) {
- peer = kfd_device_by_id(devices_arr[i]);
- if (!peer) {
- err = -EINVAL;
- goto get_mem_obj_from_handle_failed;
- }
-
- peer_pdd = kfd_get_process_device_data(peer, p);
+ peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);
if (!peer_pdd) {
- err = -ENODEV;
+ err = -EINVAL;
goto get_mem_obj_from_handle_failed;
}
err = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
- peer->adev, (struct kgd_mem *)mem, peer_pdd->drm_priv);
+ peer_pdd->dev->adev, (struct kgd_mem *)mem, peer_pdd->drm_priv);
if (err) {
pr_err("Failed to unmap from gpu %d/%d\n",
i, args->n_devices);
@@ -1603,8 +1319,8 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
}
mutex_unlock(&p->mutex);
- if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2)) {
- err = amdgpu_amdkfd_gpuvm_sync_memory(dev->adev,
+ if (kfd_flush_tlb_after_unmap(pdd->dev)) {
+ err = amdgpu_amdkfd_gpuvm_sync_memory(pdd->dev->adev,
(struct kgd_mem *) mem, true);
if (err) {
pr_debug("Sync memory failed, wait interrupted by user signal\n");
@@ -1613,10 +1329,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
/* Flush TLBs after waiting for the page table updates to complete */
for (i = 0; i < args->n_devices; i++) {
- peer = kfd_device_by_id(devices_arr[i]);
- if (WARN_ON_ONCE(!peer))
- continue;
- peer_pdd = kfd_get_process_device_data(peer, p);
+ peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);
if (WARN_ON_ONCE(!peer_pdd))
continue;
kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT);
@@ -1736,29 +1449,29 @@ static int kfd_ioctl_import_dmabuf(struct file *filep,
struct kfd_ioctl_import_dmabuf_args *args = data;
struct kfd_process_device *pdd;
struct dma_buf *dmabuf;
- struct kfd_dev *dev;
int idr_handle;
uint64_t size;
void *mem;
int r;
- dev = kfd_device_by_id(args->gpu_id);
- if (!dev)
- return -EINVAL;
-
dmabuf = dma_buf_get(args->dmabuf_fd);
if (IS_ERR(dmabuf))
return PTR_ERR(dmabuf);
mutex_lock(&p->mutex);
+ pdd = kfd_process_device_data_by_id(p, args->gpu_id);
+ if (!pdd) {
+ r = -EINVAL;
+ goto err_unlock;
+ }
- pdd = kfd_bind_process_to_device(dev, p);
+ pdd = kfd_bind_process_to_device(pdd->dev, p);
if (IS_ERR(pdd)) {
r = PTR_ERR(pdd);
goto err_unlock;
}
- r = amdgpu_amdkfd_gpuvm_import_dmabuf(dev->adev, dmabuf,
+ r = amdgpu_amdkfd_gpuvm_import_dmabuf(pdd->dev->adev, dmabuf,
args->va_addr, pdd->drm_priv,
(struct kgd_mem **)&mem, &size,
NULL);
@@ -1779,7 +1492,7 @@ static int kfd_ioctl_import_dmabuf(struct file *filep,
return 0;
err_free:
- amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->adev, (struct kgd_mem *)mem,
+ amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev, (struct kgd_mem *)mem,
pdd->drm_priv, NULL);
err_unlock:
mutex_unlock(&p->mutex);
@@ -1792,13 +1505,16 @@ static int kfd_ioctl_smi_events(struct file *filep,
struct kfd_process *p, void *data)
{
struct kfd_ioctl_smi_events_args *args = data;
- struct kfd_dev *dev;
+ struct kfd_process_device *pdd;
- dev = kfd_device_by_id(args->gpuid);
- if (!dev)
+ mutex_lock(&p->mutex);
+
+ pdd = kfd_process_device_data_by_id(p, args->gpuid);
+ mutex_unlock(&p->mutex);
+ if (!pdd)
return -EINVAL;
- return kfd_smi_event_open(dev, &args->anon_fd);
+ return kfd_smi_event_open(pdd->dev, &args->anon_fd);
}
static int kfd_ioctl_set_xnack_mode(struct file *filep,
@@ -1840,13 +1556,9 @@ static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
if (!args->start_addr || !args->size)
return -EINVAL;
- mutex_lock(&p->mutex);
-
r = svm_ioctl(p, args->op, args->start_addr, args->size, args->nattr,
args->attrs);
- mutex_unlock(&p->mutex);
-
return r;
}
#else
@@ -1856,6 +1568,993 @@ static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
}
#endif
+static int criu_checkpoint_process(struct kfd_process *p,
+ uint8_t __user *user_priv_data,
+ uint64_t *priv_offset)
+{
+ struct kfd_criu_process_priv_data process_priv;
+ int ret;
+
+ memset(&process_priv, 0, sizeof(process_priv));
+
+ process_priv.version = KFD_CRIU_PRIV_VERSION;
+ /* For CR, we don't consider negative xnack mode which is used for
+ * querying without changing it, here 0 simply means disabled and 1
+ * means enabled so retry for finding a valid PTE.
+ */
+ process_priv.xnack_mode = p->xnack_enabled ? 1 : 0;
+
+ ret = copy_to_user(user_priv_data + *priv_offset,
+ &process_priv, sizeof(process_priv));
+
+ if (ret) {
+ pr_err("Failed to copy process information to user\n");
+ ret = -EFAULT;
+ }
+
+ *priv_offset += sizeof(process_priv);
+ return ret;
+}
+
+static int criu_checkpoint_devices(struct kfd_process *p,
+ uint32_t num_devices,
+ uint8_t __user *user_addr,
+ uint8_t __user *user_priv_data,
+ uint64_t *priv_offset)
+{
+ struct kfd_criu_device_priv_data *device_priv = NULL;
+ struct kfd_criu_device_bucket *device_buckets = NULL;
+ int ret = 0, i;
+
+ device_buckets = kvzalloc(num_devices * sizeof(*device_buckets), GFP_KERNEL);
+ if (!device_buckets) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+ device_priv = kvzalloc(num_devices * sizeof(*device_priv), GFP_KERNEL);
+ if (!device_priv) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+ for (i = 0; i < num_devices; i++) {
+ struct kfd_process_device *pdd = p->pdds[i];
+
+ device_buckets[i].user_gpu_id = pdd->user_gpu_id;
+ device_buckets[i].actual_gpu_id = pdd->dev->id;
+
+ /*
+ * priv_data does not contain useful information for now and is reserved for
+ * future use, so we do not set its contents.
+ */
+ }
+
+ ret = copy_to_user(user_addr, device_buckets, num_devices * sizeof(*device_buckets));
+ if (ret) {
+ pr_err("Failed to copy device information to user\n");
+ ret = -EFAULT;
+ goto exit;
+ }
+
+ ret = copy_to_user(user_priv_data + *priv_offset,
+ device_priv,
+ num_devices * sizeof(*device_priv));
+ if (ret) {
+ pr_err("Failed to copy device information to user\n");
+ ret = -EFAULT;
+ }
+ *priv_offset += num_devices * sizeof(*device_priv);
+
+exit:
+ kvfree(device_buckets);
+ kvfree(device_priv);
+ return ret;
+}
+
+static uint32_t get_process_num_bos(struct kfd_process *p)
+{
+ uint32_t num_of_bos = 0;
+ int i;
+
+ /* Run over all PDDs of the process */
+ for (i = 0; i < p->n_pdds; i++) {
+ struct kfd_process_device *pdd = p->pdds[i];
+ void *mem;
+ int id;
+
+ idr_for_each_entry(&pdd->alloc_idr, mem, id) {
+ struct kgd_mem *kgd_mem = (struct kgd_mem *)mem;
+
+ if ((uint64_t)kgd_mem->va > pdd->gpuvm_base)
+ num_of_bos++;
+ }
+ }
+ return num_of_bos;
+}
+
+static int criu_get_prime_handle(struct drm_gem_object *gobj, int flags,
+ u32 *shared_fd)
+{
+ struct dma_buf *dmabuf;
+ int ret;
+
+ dmabuf = amdgpu_gem_prime_export(gobj, flags);
+ if (IS_ERR(dmabuf)) {
+ ret = PTR_ERR(dmabuf);
+ pr_err("dmabuf export failed for the BO\n");
+ return ret;
+ }
+
+ ret = dma_buf_fd(dmabuf, flags);
+ if (ret < 0) {
+ pr_err("dmabuf create fd failed, ret:%d\n", ret);
+ goto out_free_dmabuf;
+ }
+
+ *shared_fd = ret;
+ return 0;
+
+out_free_dmabuf:
+ dma_buf_put(dmabuf);
+ return ret;
+}
+
+static int criu_checkpoint_bos(struct kfd_process *p,
+ uint32_t num_bos,
+ uint8_t __user *user_bos,
+ uint8_t __user *user_priv_data,
+ uint64_t *priv_offset)
+{
+ struct kfd_criu_bo_bucket *bo_buckets;
+ struct kfd_criu_bo_priv_data *bo_privs;
+ int ret = 0, pdd_index, bo_index = 0, id;
+ void *mem;
+
+ bo_buckets = kvzalloc(num_bos * sizeof(*bo_buckets), GFP_KERNEL);
+ if (!bo_buckets)
+ return -ENOMEM;
+
+ bo_privs = kvzalloc(num_bos * sizeof(*bo_privs), GFP_KERNEL);
+ if (!bo_privs) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+ for (pdd_index = 0; pdd_index < p->n_pdds; pdd_index++) {
+ struct kfd_process_device *pdd = p->pdds[pdd_index];
+ struct amdgpu_bo *dumper_bo;
+ struct kgd_mem *kgd_mem;
+
+ idr_for_each_entry(&pdd->alloc_idr, mem, id) {
+ struct kfd_criu_bo_bucket *bo_bucket;
+ struct kfd_criu_bo_priv_data *bo_priv;
+ int i, dev_idx = 0;
+
+ if (!mem) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+ kgd_mem = (struct kgd_mem *)mem;
+ dumper_bo = kgd_mem->bo;
+
+ if ((uint64_t)kgd_mem->va <= pdd->gpuvm_base)
+ continue;
+
+ bo_bucket = &bo_buckets[bo_index];
+ bo_priv = &bo_privs[bo_index];
+
+ bo_bucket->gpu_id = pdd->user_gpu_id;
+ bo_bucket->addr = (uint64_t)kgd_mem->va;
+ bo_bucket->size = amdgpu_bo_size(dumper_bo);
+ bo_bucket->alloc_flags = (uint32_t)kgd_mem->alloc_flags;
+ bo_priv->idr_handle = id;
+
+ if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
+ ret = amdgpu_ttm_tt_get_userptr(&dumper_bo->tbo,
+ &bo_priv->user_addr);
+ if (ret) {
+ pr_err("Failed to obtain user address for user-pointer bo\n");
+ goto exit;
+ }
+ }
+ if (bo_bucket->alloc_flags
+ & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) {
+ ret = criu_get_prime_handle(&dumper_bo->tbo.base,
+ bo_bucket->alloc_flags &
+ KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? DRM_RDWR : 0,
+ &bo_bucket->dmabuf_fd);
+ if (ret)
+ goto exit;
+ } else {
+ bo_bucket->dmabuf_fd = KFD_INVALID_FD;
+ }
+
+ if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL)
+ bo_bucket->offset = KFD_MMAP_TYPE_DOORBELL |
+ KFD_MMAP_GPU_ID(pdd->dev->id);
+ else if (bo_bucket->alloc_flags &
+ KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)
+ bo_bucket->offset = KFD_MMAP_TYPE_MMIO |
+ KFD_MMAP_GPU_ID(pdd->dev->id);
+ else
+ bo_bucket->offset = amdgpu_bo_mmap_offset(dumper_bo);
+
+ for (i = 0; i < p->n_pdds; i++) {
+ if (amdgpu_amdkfd_bo_mapped_to_dev(p->pdds[i]->dev->adev, kgd_mem))
+ bo_priv->mapped_gpuids[dev_idx++] = p->pdds[i]->user_gpu_id;
+ }
+
+ pr_debug("bo_size = 0x%llx, bo_addr = 0x%llx bo_offset = 0x%llx\n"
+ "gpu_id = 0x%x alloc_flags = 0x%x idr_handle = 0x%x",
+ bo_bucket->size,
+ bo_bucket->addr,
+ bo_bucket->offset,
+ bo_bucket->gpu_id,
+ bo_bucket->alloc_flags,
+ bo_priv->idr_handle);
+ bo_index++;
+ }
+ }
+
+ ret = copy_to_user(user_bos, bo_buckets, num_bos * sizeof(*bo_buckets));
+ if (ret) {
+ pr_err("Failed to copy BO information to user\n");
+ ret = -EFAULT;
+ goto exit;
+ }
+
+ ret = copy_to_user(user_priv_data + *priv_offset, bo_privs, num_bos * sizeof(*bo_privs));
+ if (ret) {
+ pr_err("Failed to copy BO priv information to user\n");
+ ret = -EFAULT;
+ goto exit;
+ }
+
+ *priv_offset += num_bos * sizeof(*bo_privs);
+
+exit:
+ while (ret && bo_index--) {
+ if (bo_buckets[bo_index].alloc_flags
+ & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT))
+ close_fd(bo_buckets[bo_index].dmabuf_fd);
+ }
+
+ kvfree(bo_buckets);
+ kvfree(bo_privs);
+ return ret;
+}
+
+static int criu_get_process_object_info(struct kfd_process *p,
+ uint32_t *num_devices,
+ uint32_t *num_bos,
+ uint32_t *num_objects,
+ uint64_t *objs_priv_size)
+{
+ uint64_t queues_priv_data_size, svm_priv_data_size, priv_size;
+ uint32_t num_queues, num_events, num_svm_ranges;
+ int ret;
+
+ *num_devices = p->n_pdds;
+ *num_bos = get_process_num_bos(p);
+
+ ret = kfd_process_get_queue_info(p, &num_queues, &queues_priv_data_size);
+ if (ret)
+ return ret;
+
+ num_events = kfd_get_num_events(p);
+
+ ret = svm_range_get_info(p, &num_svm_ranges, &svm_priv_data_size);
+ if (ret)
+ return ret;
+
+ *num_objects = num_queues + num_events + num_svm_ranges;
+
+ if (objs_priv_size) {
+ priv_size = sizeof(struct kfd_criu_process_priv_data);
+ priv_size += *num_devices * sizeof(struct kfd_criu_device_priv_data);
+ priv_size += *num_bos * sizeof(struct kfd_criu_bo_priv_data);
+ priv_size += queues_priv_data_size;
+ priv_size += num_events * sizeof(struct kfd_criu_event_priv_data);
+ priv_size += svm_priv_data_size;
+ *objs_priv_size = priv_size;
+ }
+ return 0;
+}
+
+static int criu_checkpoint(struct file *filep,
+ struct kfd_process *p,
+ struct kfd_ioctl_criu_args *args)
+{
+ int ret;
+ uint32_t num_devices, num_bos, num_objects;
+ uint64_t priv_size, priv_offset = 0;
+
+ if (!args->devices || !args->bos || !args->priv_data)
+ return -EINVAL;
+
+ mutex_lock(&p->mutex);
+
+ if (!p->n_pdds) {
+ pr_err("No pdd for given process\n");
+ ret = -ENODEV;
+ goto exit_unlock;
+ }
+
+ /* Confirm all process queues are evicted */
+ if (!p->queues_paused) {
+ pr_err("Cannot dump process when queues are not in evicted state\n");
+ /* CRIU plugin did not call op PROCESS_INFO before checkpointing */
+ ret = -EINVAL;
+ goto exit_unlock;
+ }
+
+ ret = criu_get_process_object_info(p, &num_devices, &num_bos, &num_objects, &priv_size);
+ if (ret)
+ goto exit_unlock;
+
+ if (num_devices != args->num_devices ||
+ num_bos != args->num_bos ||
+ num_objects != args->num_objects ||
+ priv_size != args->priv_data_size) {
+
+ ret = -EINVAL;
+ goto exit_unlock;
+ }
+
+ /* each function will store private data inside priv_data and adjust priv_offset */
+ ret = criu_checkpoint_process(p, (uint8_t __user *)args->priv_data, &priv_offset);
+ if (ret)
+ goto exit_unlock;
+
+ ret = criu_checkpoint_devices(p, num_devices, (uint8_t __user *)args->devices,
+ (uint8_t __user *)args->priv_data, &priv_offset);
+ if (ret)
+ goto exit_unlock;
+
+ ret = criu_checkpoint_bos(p, num_bos, (uint8_t __user *)args->bos,
+ (uint8_t __user *)args->priv_data, &priv_offset);
+ if (ret)
+ goto exit_unlock;
+
+ if (num_objects) {
+ ret = kfd_criu_checkpoint_queues(p, (uint8_t __user *)args->priv_data,
+ &priv_offset);
+ if (ret)
+ goto close_bo_fds;
+
+ ret = kfd_criu_checkpoint_events(p, (uint8_t __user *)args->priv_data,
+ &priv_offset);
+ if (ret)
+ goto close_bo_fds;
+
+ ret = kfd_criu_checkpoint_svm(p, (uint8_t __user *)args->priv_data, &priv_offset);
+ if (ret)
+ goto close_bo_fds;
+ }
+
+close_bo_fds:
+ if (ret) {
+ /* If IOCTL returns err, user assumes all FDs opened in criu_dump_bos are closed */
+ uint32_t i;
+ struct kfd_criu_bo_bucket *bo_buckets = (struct kfd_criu_bo_bucket *) args->bos;
+
+ for (i = 0; i < num_bos; i++) {
+ if (bo_buckets[i].alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
+ close_fd(bo_buckets[i].dmabuf_fd);
+ }
+ }
+
+exit_unlock:
+ mutex_unlock(&p->mutex);
+ if (ret)
+ pr_err("Failed to dump CRIU ret:%d\n", ret);
+ else
+ pr_debug("CRIU dump ret:%d\n", ret);
+
+ return ret;
+}
+
+static int criu_restore_process(struct kfd_process *p,
+ struct kfd_ioctl_criu_args *args,
+ uint64_t *priv_offset,
+ uint64_t max_priv_data_size)
+{
+ int ret = 0;
+ struct kfd_criu_process_priv_data process_priv;
+
+ if (*priv_offset + sizeof(process_priv) > max_priv_data_size)
+ return -EINVAL;
+
+ ret = copy_from_user(&process_priv,
+ (void __user *)(args->priv_data + *priv_offset),
+ sizeof(process_priv));
+ if (ret) {
+ pr_err("Failed to copy process private information from user\n");
+ ret = -EFAULT;
+ goto exit;
+ }
+ *priv_offset += sizeof(process_priv);
+
+ if (process_priv.version != KFD_CRIU_PRIV_VERSION) {
+ pr_err("Invalid CRIU API version (checkpointed:%d current:%d)\n",
+ process_priv.version, KFD_CRIU_PRIV_VERSION);
+ return -EINVAL;
+ }
+
+ pr_debug("Setting XNACK mode\n");
+ if (process_priv.xnack_mode && !kfd_process_xnack_mode(p, true)) {
+ pr_err("xnack mode cannot be set\n");
+ ret = -EPERM;
+ goto exit;
+ } else {
+ pr_debug("set xnack mode: %d\n", process_priv.xnack_mode);
+ p->xnack_enabled = process_priv.xnack_mode;
+ }
+
+exit:
+ return ret;
+}
+
+static int criu_restore_devices(struct kfd_process *p,
+ struct kfd_ioctl_criu_args *args,
+ uint64_t *priv_offset,
+ uint64_t max_priv_data_size)
+{
+ struct kfd_criu_device_bucket *device_buckets;
+ struct kfd_criu_device_priv_data *device_privs;
+ int ret = 0;
+ uint32_t i;
+
+ if (args->num_devices != p->n_pdds)
+ return -EINVAL;
+
+ if (*priv_offset + (args->num_devices * sizeof(*device_privs)) > max_priv_data_size)
+ return -EINVAL;
+
+ device_buckets = kmalloc_array(args->num_devices, sizeof(*device_buckets), GFP_KERNEL);
+ if (!device_buckets)
+ return -ENOMEM;
+
+ ret = copy_from_user(device_buckets, (void __user *)args->devices,
+ args->num_devices * sizeof(*device_buckets));
+ if (ret) {
+ pr_err("Failed to copy devices buckets from user\n");
+ ret = -EFAULT;
+ goto exit;
+ }
+
+ for (i = 0; i < args->num_devices; i++) {
+ struct kfd_dev *dev;
+ struct kfd_process_device *pdd;
+ struct file *drm_file;
+
+ /* device private data is not currently used */
+
+ if (!device_buckets[i].user_gpu_id) {
+ pr_err("Invalid user gpu_id\n");
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ dev = kfd_device_by_id(device_buckets[i].actual_gpu_id);
+ if (!dev) {
+ pr_err("Failed to find device with gpu_id = %x\n",
+ device_buckets[i].actual_gpu_id);
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ pdd = kfd_get_process_device_data(dev, p);
+ if (!pdd) {
+ pr_err("Failed to get pdd for gpu_id = %x\n",
+ device_buckets[i].actual_gpu_id);
+ ret = -EINVAL;
+ goto exit;
+ }
+ pdd->user_gpu_id = device_buckets[i].user_gpu_id;
+
+ drm_file = fget(device_buckets[i].drm_fd);
+ if (!drm_file) {
+ pr_err("Invalid render node file descriptor sent from plugin (%d)\n",
+ device_buckets[i].drm_fd);
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ if (pdd->drm_file) {
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ /* create the vm using render nodes for kfd pdd */
+ if (kfd_process_device_init_vm(pdd, drm_file)) {
+ pr_err("could not init vm for given pdd\n");
+ /* On success, the PDD keeps the drm_file reference */
+ fput(drm_file);
+ ret = -EINVAL;
+ goto exit;
+ }
+ /*
+ * pdd now already has the vm bound to render node so below api won't create a new
+ * exclusive kfd mapping but use existing one with renderDXXX but is still needed
+ * for iommu v2 binding and runtime pm.
+ */
+ pdd = kfd_bind_process_to_device(dev, p);
+ if (IS_ERR(pdd)) {
+ ret = PTR_ERR(pdd);
+ goto exit;
+ }
+ }
+
+ /*
+ * We are not copying device private data from user as we are not using the data for now,
+ * but we still adjust for its private data.
+ */
+ *priv_offset += args->num_devices * sizeof(*device_privs);
+
+exit:
+ kfree(device_buckets);
+ return ret;
+}
+
+static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd,
+ struct kfd_criu_bo_bucket *bo_bucket,
+ struct kfd_criu_bo_priv_data *bo_priv,
+ struct kgd_mem **kgd_mem)
+{
+ int idr_handle;
+ int ret;
+ const bool criu_resume = true;
+ u64 offset;
+
+ if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
+ if (bo_bucket->size != kfd_doorbell_process_slice(pdd->dev))
+ return -EINVAL;
+
+ offset = kfd_get_process_doorbells(pdd);
+ } else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
+ /* MMIO BOs need remapped bus address */
+ if (bo_bucket->size != PAGE_SIZE) {
+ pr_err("Invalid page size\n");
+ return -EINVAL;
+ }
+ offset = pdd->dev->adev->rmmio_remap.bus_addr;
+ if (!offset) {
+ pr_err("amdgpu_amdkfd_get_mmio_remap_phys_addr failed\n");
+ return -ENOMEM;
+ }
+ } else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
+ offset = bo_priv->user_addr;
+ }
+ /* Create the BO */
+ ret = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(pdd->dev->adev, bo_bucket->addr,
+ bo_bucket->size, pdd->drm_priv, kgd_mem,
+ &offset, bo_bucket->alloc_flags, criu_resume);
+ if (ret) {
+ pr_err("Could not create the BO\n");
+ return ret;
+ }
+ pr_debug("New BO created: size:0x%llx addr:0x%llx offset:0x%llx\n",
+ bo_bucket->size, bo_bucket->addr, offset);
+
+ /* Restore previous IDR handle */
+ pr_debug("Restoring old IDR handle for the BO");
+ idr_handle = idr_alloc(&pdd->alloc_idr, *kgd_mem, bo_priv->idr_handle,
+ bo_priv->idr_handle + 1, GFP_KERNEL);
+
+ if (idr_handle < 0) {
+ pr_err("Could not allocate idr\n");
+ amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev, *kgd_mem, pdd->drm_priv,
+ NULL);
+ return -ENOMEM;
+ }
+
+ if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL)
+ bo_bucket->restored_offset = KFD_MMAP_TYPE_DOORBELL | KFD_MMAP_GPU_ID(pdd->dev->id);
+ if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
+ bo_bucket->restored_offset = KFD_MMAP_TYPE_MMIO | KFD_MMAP_GPU_ID(pdd->dev->id);
+ } else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
+ bo_bucket->restored_offset = offset;
+ } else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
+ bo_bucket->restored_offset = offset;
+ /* Update the VRAM usage count */
+ WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + bo_bucket->size);
+ }
+ return 0;
+}
+
+static int criu_restore_bo(struct kfd_process *p,
+ struct kfd_criu_bo_bucket *bo_bucket,
+ struct kfd_criu_bo_priv_data *bo_priv)
+{
+ struct kfd_process_device *pdd;
+ struct kgd_mem *kgd_mem;
+ int ret;
+ int j;
+
+ pr_debug("Restoring BO size:0x%llx addr:0x%llx gpu_id:0x%x flags:0x%x idr_handle:0x%x\n",
+ bo_bucket->size, bo_bucket->addr, bo_bucket->gpu_id, bo_bucket->alloc_flags,
+ bo_priv->idr_handle);
+
+ pdd = kfd_process_device_data_by_id(p, bo_bucket->gpu_id);
+ if (!pdd) {
+ pr_err("Failed to get pdd\n");
+ return -ENODEV;
+ }
+
+ ret = criu_restore_memory_of_gpu(pdd, bo_bucket, bo_priv, &kgd_mem);
+ if (ret)
+ return ret;
+
+ /* now map these BOs to GPU/s */
+ for (j = 0; j < p->n_pdds; j++) {
+ struct kfd_dev *peer;
+ struct kfd_process_device *peer_pdd;
+
+ if (!bo_priv->mapped_gpuids[j])
+ break;
+
+ peer_pdd = kfd_process_device_data_by_id(p, bo_priv->mapped_gpuids[j]);
+ if (!peer_pdd)
+ return -EINVAL;
+
+ peer = peer_pdd->dev;
+
+ peer_pdd = kfd_bind_process_to_device(peer, p);
+ if (IS_ERR(peer_pdd))
+ return PTR_ERR(peer_pdd);
+
+ ret = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(peer->adev, kgd_mem, peer_pdd->drm_priv,
+ NULL);
+ if (ret) {
+ pr_err("Failed to map to gpu %d/%d\n", j, p->n_pdds);
+ return ret;
+ }
+ }
+
+ pr_debug("map memory was successful for the BO\n");
+ /* create the dmabuf object and export the bo */
+ if (bo_bucket->alloc_flags
+ & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) {
+ ret = criu_get_prime_handle(&kgd_mem->bo->tbo.base, DRM_RDWR,
+ &bo_bucket->dmabuf_fd);
+ if (ret)
+ return ret;
+ } else {
+ bo_bucket->dmabuf_fd = KFD_INVALID_FD;
+ }
+
+ return 0;
+}
+
+static int criu_restore_bos(struct kfd_process *p,
+ struct kfd_ioctl_criu_args *args,
+ uint64_t *priv_offset,
+ uint64_t max_priv_data_size)
+{
+ struct kfd_criu_bo_bucket *bo_buckets = NULL;
+ struct kfd_criu_bo_priv_data *bo_privs = NULL;
+ int ret = 0;
+ uint32_t i = 0;
+
+ if (*priv_offset + (args->num_bos * sizeof(*bo_privs)) > max_priv_data_size)
+ return -EINVAL;
+
+ /* Prevent MMU notifications until stage-4 IOCTL (CRIU_RESUME) is received */
+ amdgpu_amdkfd_block_mmu_notifications(p->kgd_process_info);
+
+ bo_buckets = kvmalloc_array(args->num_bos, sizeof(*bo_buckets), GFP_KERNEL);
+ if (!bo_buckets)
+ return -ENOMEM;
+
+ ret = copy_from_user(bo_buckets, (void __user *)args->bos,
+ args->num_bos * sizeof(*bo_buckets));
+ if (ret) {
+ pr_err("Failed to copy BOs information from user\n");
+ ret = -EFAULT;
+ goto exit;
+ }
+
+ bo_privs = kvmalloc_array(args->num_bos, sizeof(*bo_privs), GFP_KERNEL);
+ if (!bo_privs) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+ ret = copy_from_user(bo_privs, (void __user *)args->priv_data + *priv_offset,
+ args->num_bos * sizeof(*bo_privs));
+ if (ret) {
+ pr_err("Failed to copy BOs information from user\n");
+ ret = -EFAULT;
+ goto exit;
+ }
+ *priv_offset += args->num_bos * sizeof(*bo_privs);
+
+ /* Create and map new BOs */
+ for (; i < args->num_bos; i++) {
+ ret = criu_restore_bo(p, &bo_buckets[i], &bo_privs[i]);
+ if (ret) {
+ pr_debug("Failed to restore BO[%d] ret%d\n", i, ret);
+ goto exit;
+ }
+ } /* done */
+
+ /* Copy only the buckets back so user can read bo_buckets[N].restored_offset */
+ ret = copy_to_user((void __user *)args->bos,
+ bo_buckets,
+ (args->num_bos * sizeof(*bo_buckets)));
+ if (ret)
+ ret = -EFAULT;
+
+exit:
+ while (ret && i--) {
+ if (bo_buckets[i].alloc_flags
+ & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT))
+ close_fd(bo_buckets[i].dmabuf_fd);
+ }
+ kvfree(bo_buckets);
+ kvfree(bo_privs);
+ return ret;
+}
+
+static int criu_restore_objects(struct file *filep,
+ struct kfd_process *p,
+ struct kfd_ioctl_criu_args *args,
+ uint64_t *priv_offset,
+ uint64_t max_priv_data_size)
+{
+ int ret = 0;
+ uint32_t i;
+
+ BUILD_BUG_ON(offsetof(struct kfd_criu_queue_priv_data, object_type));
+ BUILD_BUG_ON(offsetof(struct kfd_criu_event_priv_data, object_type));
+ BUILD_BUG_ON(offsetof(struct kfd_criu_svm_range_priv_data, object_type));
+
+ for (i = 0; i < args->num_objects; i++) {
+ uint32_t object_type;
+
+ if (*priv_offset + sizeof(object_type) > max_priv_data_size) {
+ pr_err("Invalid private data size\n");
+ return -EINVAL;
+ }
+
+ ret = get_user(object_type, (uint32_t __user *)(args->priv_data + *priv_offset));
+ if (ret) {
+ pr_err("Failed to copy private information from user\n");
+ goto exit;
+ }
+
+ switch (object_type) {
+ case KFD_CRIU_OBJECT_TYPE_QUEUE:
+ ret = kfd_criu_restore_queue(p, (uint8_t __user *)args->priv_data,
+ priv_offset, max_priv_data_size);
+ if (ret)
+ goto exit;
+ break;
+ case KFD_CRIU_OBJECT_TYPE_EVENT:
+ ret = kfd_criu_restore_event(filep, p, (uint8_t __user *)args->priv_data,
+ priv_offset, max_priv_data_size);
+ if (ret)
+ goto exit;
+ break;
+ case KFD_CRIU_OBJECT_TYPE_SVM_RANGE:
+ ret = kfd_criu_restore_svm(p, (uint8_t __user *)args->priv_data,
+ priv_offset, max_priv_data_size);
+ if (ret)
+ goto exit;
+ break;
+ default:
+ pr_err("Invalid object type:%u at index:%d\n", object_type, i);
+ ret = -EINVAL;
+ goto exit;
+ }
+ }
+exit:
+ return ret;
+}
+
+static int criu_restore(struct file *filep,
+ struct kfd_process *p,
+ struct kfd_ioctl_criu_args *args)
+{
+ uint64_t priv_offset = 0;
+ int ret = 0;
+
+ pr_debug("CRIU restore (num_devices:%u num_bos:%u num_objects:%u priv_data_size:%llu)\n",
+ args->num_devices, args->num_bos, args->num_objects, args->priv_data_size);
+
+ if (!args->bos || !args->devices || !args->priv_data || !args->priv_data_size ||
+ !args->num_devices || !args->num_bos)
+ return -EINVAL;
+
+ mutex_lock(&p->mutex);
+
+ /*
+ * Set the process to evicted state to avoid running any new queues before all the memory
+ * mappings are ready.
+ */
+ ret = kfd_process_evict_queues(p);
+ if (ret)
+ goto exit_unlock;
+
+ /* Each function will adjust priv_offset based on how many bytes they consumed */
+ ret = criu_restore_process(p, args, &priv_offset, args->priv_data_size);
+ if (ret)
+ goto exit_unlock;
+
+ ret = criu_restore_devices(p, args, &priv_offset, args->priv_data_size);
+ if (ret)
+ goto exit_unlock;
+
+ ret = criu_restore_bos(p, args, &priv_offset, args->priv_data_size);
+ if (ret)
+ goto exit_unlock;
+
+ ret = criu_restore_objects(filep, p, args, &priv_offset, args->priv_data_size);
+ if (ret)
+ goto exit_unlock;
+
+ if (priv_offset != args->priv_data_size) {
+ pr_err("Invalid private data size\n");
+ ret = -EINVAL;
+ }
+
+exit_unlock:
+ mutex_unlock(&p->mutex);
+ if (ret)
+ pr_err("Failed to restore CRIU ret:%d\n", ret);
+ else
+ pr_debug("CRIU restore successful\n");
+
+ return ret;
+}
+
+static int criu_unpause(struct file *filep,
+ struct kfd_process *p,
+ struct kfd_ioctl_criu_args *args)
+{
+ int ret;
+
+ mutex_lock(&p->mutex);
+
+ if (!p->queues_paused) {
+ mutex_unlock(&p->mutex);
+ return -EINVAL;
+ }
+
+ ret = kfd_process_restore_queues(p);
+ if (ret)
+ pr_err("Failed to unpause queues ret:%d\n", ret);
+ else
+ p->queues_paused = false;
+
+ mutex_unlock(&p->mutex);
+
+ return ret;
+}
+
+static int criu_resume(struct file *filep,
+ struct kfd_process *p,
+ struct kfd_ioctl_criu_args *args)
+{
+ struct kfd_process *target = NULL;
+ struct pid *pid = NULL;
+ int ret = 0;
+
+ pr_debug("Inside %s, target pid for criu restore: %d\n", __func__,
+ args->pid);
+
+ pid = find_get_pid(args->pid);
+ if (!pid) {
+ pr_err("Cannot find pid info for %i\n", args->pid);
+ return -ESRCH;
+ }
+
+ pr_debug("calling kfd_lookup_process_by_pid\n");
+ target = kfd_lookup_process_by_pid(pid);
+
+ put_pid(pid);
+
+ if (!target) {
+ pr_debug("Cannot find process info for %i\n", args->pid);
+ return -ESRCH;
+ }
+
+ mutex_lock(&target->mutex);
+ ret = kfd_criu_resume_svm(target);
+ if (ret) {
+ pr_err("kfd_criu_resume_svm failed for %i\n", args->pid);
+ goto exit;
+ }
+
+ ret = amdgpu_amdkfd_criu_resume(target->kgd_process_info);
+ if (ret)
+ pr_err("amdgpu_amdkfd_criu_resume failed for %i\n", args->pid);
+
+exit:
+ mutex_unlock(&target->mutex);
+
+ kfd_unref_process(target);
+ return ret;
+}
+
+static int criu_process_info(struct file *filep,
+ struct kfd_process *p,
+ struct kfd_ioctl_criu_args *args)
+{
+ int ret = 0;
+
+ mutex_lock(&p->mutex);
+
+ if (!p->n_pdds) {
+ pr_err("No pdd for given process\n");
+ ret = -ENODEV;
+ goto err_unlock;
+ }
+
+ ret = kfd_process_evict_queues(p);
+ if (ret)
+ goto err_unlock;
+
+ p->queues_paused = true;
+
+ args->pid = task_pid_nr_ns(p->lead_thread,
+ task_active_pid_ns(p->lead_thread));
+
+ ret = criu_get_process_object_info(p, &args->num_devices, &args->num_bos,
+ &args->num_objects, &args->priv_data_size);
+ if (ret)
+ goto err_unlock;
+
+ dev_dbg(kfd_device, "Num of devices:%u bos:%u objects:%u priv_data_size:%lld\n",
+ args->num_devices, args->num_bos, args->num_objects,
+ args->priv_data_size);
+
+err_unlock:
+ if (ret) {
+ kfd_process_restore_queues(p);
+ p->queues_paused = false;
+ }
+ mutex_unlock(&p->mutex);
+ return ret;
+}
+
+static int kfd_ioctl_criu(struct file *filep, struct kfd_process *p, void *data)
+{
+ struct kfd_ioctl_criu_args *args = data;
+ int ret;
+
+ dev_dbg(kfd_device, "CRIU operation: %d\n", args->op);
+ switch (args->op) {
+ case KFD_CRIU_OP_PROCESS_INFO:
+ ret = criu_process_info(filep, p, args);
+ break;
+ case KFD_CRIU_OP_CHECKPOINT:
+ ret = criu_checkpoint(filep, p, args);
+ break;
+ case KFD_CRIU_OP_UNPAUSE:
+ ret = criu_unpause(filep, p, args);
+ break;
+ case KFD_CRIU_OP_RESTORE:
+ ret = criu_restore(filep, p, args);
+ break;
+ case KFD_CRIU_OP_RESUME:
+ ret = criu_resume(filep, p, args);
+ break;
+ default:
+ dev_dbg(kfd_device, "Unsupported CRIU operation:%d\n", args->op);
+ ret = -EINVAL;
+ break;
+ }
+
+ if (ret)
+ dev_dbg(kfd_device, "CRIU operation:%d err:%d\n", args->op, ret);
+
+ return ret;
+}
+
#define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
.cmd_drv = 0, .name = #ioctl}
@@ -1898,16 +2597,16 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS,
kfd_ioctl_wait_events, 0),
- AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER,
+ AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER_DEPRECATED,
kfd_ioctl_dbg_register, 0),
- AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER,
+ AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER_DEPRECATED,
kfd_ioctl_dbg_unregister, 0),
- AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH,
+ AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH_DEPRECATED,
kfd_ioctl_dbg_address_watch, 0),
- AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL,
+ AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL_DEPRECATED,
kfd_ioctl_dbg_wave_control, 0),
AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_SCRATCH_BACKING_VA,
@@ -1959,6 +2658,10 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_XNACK_MODE,
kfd_ioctl_set_xnack_mode, 0),
+
+ AMDKFD_IOCTL_DEF(AMDKFD_IOC_CRIU_OP,
+ kfd_ioctl_criu, KFD_IOC_FLAG_CHECKPOINT_RESTORE),
+
};
#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
@@ -1973,6 +2676,7 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
char *kdata = NULL;
unsigned int usize, asize;
int retcode = -EINVAL;
+ bool ptrace_attached = false;
if (nr >= AMDKFD_CORE_IOCTL_COUNT)
goto err_i1;
@@ -1998,7 +2702,15 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
* processes need to create their own KFD device context.
*/
process = filep->private_data;
- if (process->lead_thread != current->group_leader) {
+
+ rcu_read_lock();
+ if ((ioctl->flags & KFD_IOC_FLAG_CHECKPOINT_RESTORE) &&
+ ptrace_parent(process->lead_thread) == current)
+ ptrace_attached = true;
+ rcu_read_unlock();
+
+ if (process->lead_thread != current->group_leader
+ && !ptrace_attached) {
dev_dbg(kfd_device, "Using KFD FD in wrong process\n");
retcode = -EBADF;
goto err_i1;
@@ -2013,6 +2725,19 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
goto err_i1;
}
+ /*
+ * Versions of docker shipped in Ubuntu 18.xx and 20.xx do not support
+ * CAP_CHECKPOINT_RESTORE, so we also allow access if CAP_SYS_ADMIN as CAP_SYS_ADMIN is a
+ * more priviledged access.
+ */
+ if (unlikely(ioctl->flags & KFD_IOC_FLAG_CHECKPOINT_RESTORE)) {
+ if (!capable(CAP_CHECKPOINT_RESTORE) &&
+ !capable(CAP_SYS_ADMIN)) {
+ retcode = -EACCES;
+ goto err_i1;
+ }
+ }
+
if (cmd & (IOC_IN | IOC_OUT)) {
if (asize <= sizeof(stack_kdata)) {
kdata = stack_kdata;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 9624bbe8b501..1eaabd2cb41b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2015-2017 Advanced Micro Devices, Inc.
+ * Copyright 2015-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -1381,7 +1382,7 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
num_of_cache_types = ARRAY_SIZE(vegam_cache_info);
break;
default:
- switch(KFD_GC_VERSION(kdev)) {
+ switch (KFD_GC_VERSION(kdev)) {
case IP_VERSION(9, 0, 1):
pcache_info = vega10_cache_info;
num_of_cache_types = ARRAY_SIZE(vega10_cache_info);
@@ -1411,6 +1412,7 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
case IP_VERSION(10, 1, 10):
case IP_VERSION(10, 1, 2):
case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
pcache_info = navi10_cache_info;
num_of_cache_types = ARRAY_SIZE(navi10_cache_info);
break;
@@ -1567,7 +1569,7 @@ int kfd_create_crat_image_acpi(void **crat_image, size_t *size)
/* Fetch the CRAT table from ACPI */
status = acpi_get_table(CRAT_SIGNATURE, 0, &crat_table);
if (status == AE_NOT_FOUND) {
- pr_warn("CRAT table not found\n");
+ pr_info("CRAT table not found\n");
return -ENODATA;
} else if (ACPI_FAILURE(status)) {
const char *err = acpi_format_exception(status);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
index d54ceebd346b..482ba84a728d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
@@ -1,5 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -232,7 +233,7 @@ struct crat_subtype_ccompute {
#define CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT (1 << 2)
#define CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT (1 << 3)
#define CRAT_IOLINK_FLAGS_NO_PEER_TO_PEER_DMA (1 << 4)
-#define CRAT_IOLINK_FLAGS_BI_DIRECTIONAL (1 << 31)
+#define CRAT_IOLINK_FLAGS_BI_DIRECTIONAL (1 << 31)
#define CRAT_IOLINK_FLAGS_RESERVED_MASK 0x7fffffe0
/*
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
deleted file mode 100644
index 1e30717b5253..000000000000
--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
+++ /dev/null
@@ -1,845 +0,0 @@
-/*
- * Copyright 2014 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/log2.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/mutex.h>
-#include <linux/device.h>
-
-#include "kfd_pm4_headers.h"
-#include "kfd_pm4_headers_diq.h"
-#include "kfd_kernel_queue.h"
-#include "kfd_priv.h"
-#include "kfd_pm4_opcodes.h"
-#include "cik_regs.h"
-#include "kfd_dbgmgr.h"
-#include "kfd_dbgdev.h"
-#include "kfd_device_queue_manager.h"
-
-static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev)
-{
- dev->kfd2kgd->address_watch_disable(dev->adev);
-}
-
-static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
- u32 pasid, uint64_t vmid0_address,
- uint32_t *packet_buff, size_t size_in_bytes)
-{
- struct pm4__release_mem *rm_packet;
- struct pm4__indirect_buffer_pasid *ib_packet;
- struct kfd_mem_obj *mem_obj;
- size_t pq_packets_size_in_bytes;
- union ULARGE_INTEGER *largep;
- union ULARGE_INTEGER addr;
- struct kernel_queue *kq;
- uint64_t *rm_state;
- unsigned int *ib_packet_buff;
- int status;
-
- if (WARN_ON(!size_in_bytes))
- return -EINVAL;
-
- kq = dbgdev->kq;
-
- pq_packets_size_in_bytes = sizeof(struct pm4__release_mem) +
- sizeof(struct pm4__indirect_buffer_pasid);
-
- /*
- * We acquire a buffer from DIQ
- * The receive packet buff will be sitting on the Indirect Buffer
- * and in the PQ we put the IB packet + sync packet(s).
- */
- status = kq_acquire_packet_buffer(kq,
- pq_packets_size_in_bytes / sizeof(uint32_t),
- &ib_packet_buff);
- if (status) {
- pr_err("kq_acquire_packet_buffer failed\n");
- return status;
- }
-
- memset(ib_packet_buff, 0, pq_packets_size_in_bytes);
-
- ib_packet = (struct pm4__indirect_buffer_pasid *) (ib_packet_buff);
-
- ib_packet->header.count = 3;
- ib_packet->header.opcode = IT_INDIRECT_BUFFER_PASID;
- ib_packet->header.type = PM4_TYPE_3;
-
- largep = (union ULARGE_INTEGER *) &vmid0_address;
-
- ib_packet->bitfields2.ib_base_lo = largep->u.low_part >> 2;
- ib_packet->bitfields3.ib_base_hi = largep->u.high_part;
-
- ib_packet->control = (1 << 23) | (1 << 31) |
- ((size_in_bytes / 4) & 0xfffff);
-
- ib_packet->bitfields5.pasid = pasid;
-
- /*
- * for now we use release mem for GPU-CPU synchronization
- * Consider WaitRegMem + WriteData as a better alternative
- * we get a GART allocations ( gpu/cpu mapping),
- * for the sync variable, and wait until:
- * (a) Sync with HW
- * (b) Sync var is written by CP to mem.
- */
- rm_packet = (struct pm4__release_mem *) (ib_packet_buff +
- (sizeof(struct pm4__indirect_buffer_pasid) /
- sizeof(unsigned int)));
-
- status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t),
- &mem_obj);
-
- if (status) {
- pr_err("Failed to allocate GART memory\n");
- kq_rollback_packet(kq);
- return status;
- }
-
- rm_state = (uint64_t *) mem_obj->cpu_ptr;
-
- *rm_state = QUEUESTATE__ACTIVE_COMPLETION_PENDING;
-
- rm_packet->header.opcode = IT_RELEASE_MEM;
- rm_packet->header.type = PM4_TYPE_3;
- rm_packet->header.count = sizeof(struct pm4__release_mem) / 4 - 2;
-
- rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
- rm_packet->bitfields2.event_index =
- event_index___release_mem__end_of_pipe;
-
- rm_packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
- rm_packet->bitfields2.atc = 0;
- rm_packet->bitfields2.tc_wb_action_ena = 1;
-
- addr.quad_part = mem_obj->gpu_addr;
-
- rm_packet->bitfields4.address_lo_32b = addr.u.low_part >> 2;
- rm_packet->address_hi = addr.u.high_part;
-
- rm_packet->bitfields3.data_sel =
- data_sel___release_mem__send_64_bit_data;
-
- rm_packet->bitfields3.int_sel =
- int_sel___release_mem__send_data_after_write_confirm;
-
- rm_packet->bitfields3.dst_sel =
- dst_sel___release_mem__memory_controller;
-
- rm_packet->data_lo = QUEUESTATE__ACTIVE;
-
- kq_submit_packet(kq);
-
- /* Wait till CP writes sync code: */
- status = amdkfd_fence_wait_timeout(
- rm_state,
- QUEUESTATE__ACTIVE, 1500);
-
- kfd_gtt_sa_free(dbgdev->dev, mem_obj);
-
- return status;
-}
-
-static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev)
-{
- /*
- * no action is needed in this case,
- * just make sure diq will not be used
- */
-
- dbgdev->kq = NULL;
-
- return 0;
-}
-
-static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)
-{
- struct queue_properties properties;
- unsigned int qid;
- struct kernel_queue *kq = NULL;
- int status;
-
- properties.type = KFD_QUEUE_TYPE_DIQ;
-
- status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL,
- &properties, &qid, NULL);
-
- if (status) {
- pr_err("Failed to create DIQ\n");
- return status;
- }
-
- pr_debug("DIQ Created with queue id: %d\n", qid);
-
- kq = pqm_get_kernel_queue(dbgdev->pqm, qid);
-
- if (!kq) {
- pr_err("Error getting DIQ\n");
- pqm_destroy_queue(dbgdev->pqm, qid);
- return -EFAULT;
- }
-
- dbgdev->kq = kq;
-
- return status;
-}
-
-static int dbgdev_unregister_nodiq(struct kfd_dbgdev *dbgdev)
-{
- /* disable watch address */
- dbgdev_address_watch_disable_nodiq(dbgdev->dev);
- return 0;
-}
-
-static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev)
-{
- /* todo - disable address watch */
- int status;
-
- status = pqm_destroy_queue(dbgdev->pqm,
- dbgdev->kq->queue->properties.queue_id);
- dbgdev->kq = NULL;
-
- return status;
-}
-
-static void dbgdev_address_watch_set_registers(
- const struct dbg_address_watch_info *adw_info,
- union TCP_WATCH_ADDR_H_BITS *addrHi,
- union TCP_WATCH_ADDR_L_BITS *addrLo,
- union TCP_WATCH_CNTL_BITS *cntl,
- unsigned int index, unsigned int vmid)
-{
- union ULARGE_INTEGER addr;
-
- addr.quad_part = 0;
- addrHi->u32All = 0;
- addrLo->u32All = 0;
- cntl->u32All = 0;
-
- if (adw_info->watch_mask)
- cntl->bitfields.mask =
- (uint32_t) (adw_info->watch_mask[index] &
- ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK);
- else
- cntl->bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
-
- addr.quad_part = (unsigned long long) adw_info->watch_address[index];
-
- addrHi->bitfields.addr = addr.u.high_part &
- ADDRESS_WATCH_REG_ADDHIGH_MASK;
- addrLo->bitfields.addr =
- (addr.u.low_part >> ADDRESS_WATCH_REG_ADDLOW_SHIFT);
-
- cntl->bitfields.mode = adw_info->watch_mode[index];
- cntl->bitfields.vmid = (uint32_t) vmid;
- /* for now assume it is an ATC address */
- cntl->u32All |= ADDRESS_WATCH_REG_CNTL_ATC_BIT;
-
- pr_debug("\t\t%20s %08x\n", "set reg mask :", cntl->bitfields.mask);
- pr_debug("\t\t%20s %08x\n", "set reg add high :",
- addrHi->bitfields.addr);
- pr_debug("\t\t%20s %08x\n", "set reg add low :",
- addrLo->bitfields.addr);
-}
-
-static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev,
- struct dbg_address_watch_info *adw_info)
-{
- union TCP_WATCH_ADDR_H_BITS addrHi;
- union TCP_WATCH_ADDR_L_BITS addrLo;
- union TCP_WATCH_CNTL_BITS cntl;
- struct kfd_process_device *pdd;
- unsigned int i;
-
- /* taking the vmid for that process on the safe way using pdd */
- pdd = kfd_get_process_device_data(dbgdev->dev,
- adw_info->process);
- if (!pdd) {
- pr_err("Failed to get pdd for wave control no DIQ\n");
- return -EFAULT;
- }
-
- addrHi.u32All = 0;
- addrLo.u32All = 0;
- cntl.u32All = 0;
-
- if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
- (adw_info->num_watch_points == 0)) {
- pr_err("num_watch_points is invalid\n");
- return -EINVAL;
- }
-
- if (!adw_info->watch_mode || !adw_info->watch_address) {
- pr_err("adw_info fields are not valid\n");
- return -EINVAL;
- }
-
- for (i = 0; i < adw_info->num_watch_points; i++) {
- dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo,
- &cntl, i, pdd->qpd.vmid);
-
- pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
- pr_debug("\t\t%20s %08x\n", "register index :", i);
- pr_debug("\t\t%20s %08x\n", "vmid is :", pdd->qpd.vmid);
- pr_debug("\t\t%20s %08x\n", "Address Low is :",
- addrLo.bitfields.addr);
- pr_debug("\t\t%20s %08x\n", "Address high is :",
- addrHi.bitfields.addr);
- pr_debug("\t\t%20s %08x\n", "Address high is :",
- addrHi.bitfields.addr);
- pr_debug("\t\t%20s %08x\n", "Control Mask is :",
- cntl.bitfields.mask);
- pr_debug("\t\t%20s %08x\n", "Control Mode is :",
- cntl.bitfields.mode);
- pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
- cntl.bitfields.vmid);
- pr_debug("\t\t%20s %08x\n", "Control atc is :",
- cntl.bitfields.atc);
- pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
-
- pdd->dev->kfd2kgd->address_watch_execute(
- dbgdev->dev->adev,
- i,
- cntl.u32All,
- addrHi.u32All,
- addrLo.u32All);
- }
-
- return 0;
-}
-
-static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
- struct dbg_address_watch_info *adw_info)
-{
- struct pm4__set_config_reg *packets_vec;
- union TCP_WATCH_ADDR_H_BITS addrHi;
- union TCP_WATCH_ADDR_L_BITS addrLo;
- union TCP_WATCH_CNTL_BITS cntl;
- struct kfd_mem_obj *mem_obj;
- unsigned int aw_reg_add_dword;
- uint32_t *packet_buff_uint;
- unsigned int i;
- int status;
- size_t ib_size = sizeof(struct pm4__set_config_reg) * 4;
- /* we do not control the vmid in DIQ mode, just a place holder */
- unsigned int vmid = 0;
-
- addrHi.u32All = 0;
- addrLo.u32All = 0;
- cntl.u32All = 0;
-
- if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
- (adw_info->num_watch_points == 0)) {
- pr_err("num_watch_points is invalid\n");
- return -EINVAL;
- }
-
- if (!adw_info->watch_mode || !adw_info->watch_address) {
- pr_err("adw_info fields are not valid\n");
- return -EINVAL;
- }
-
- status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
-
- if (status) {
- pr_err("Failed to allocate GART memory\n");
- return status;
- }
-
- packet_buff_uint = mem_obj->cpu_ptr;
-
- memset(packet_buff_uint, 0, ib_size);
-
- packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint);
-
- packets_vec[0].header.count = 1;
- packets_vec[0].header.opcode = IT_SET_CONFIG_REG;
- packets_vec[0].header.type = PM4_TYPE_3;
- packets_vec[0].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
- packets_vec[0].bitfields2.insert_vmid = 1;
- packets_vec[1].ordinal1 = packets_vec[0].ordinal1;
- packets_vec[1].bitfields2.insert_vmid = 0;
- packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
- packets_vec[2].bitfields2.insert_vmid = 0;
- packets_vec[3].ordinal1 = packets_vec[0].ordinal1;
- packets_vec[3].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
- packets_vec[3].bitfields2.insert_vmid = 1;
-
- for (i = 0; i < adw_info->num_watch_points; i++) {
- dbgdev_address_watch_set_registers(adw_info,
- &addrHi,
- &addrLo,
- &cntl,
- i,
- vmid);
-
- pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
- pr_debug("\t\t%20s %08x\n", "register index :", i);
- pr_debug("\t\t%20s %08x\n", "vmid is :", vmid);
- pr_debug("\t\t%20s %p\n", "Add ptr is :",
- adw_info->watch_address);
- pr_debug("\t\t%20s %08llx\n", "Add is :",
- adw_info->watch_address[i]);
- pr_debug("\t\t%20s %08x\n", "Address Low is :",
- addrLo.bitfields.addr);
- pr_debug("\t\t%20s %08x\n", "Address high is :",
- addrHi.bitfields.addr);
- pr_debug("\t\t%20s %08x\n", "Control Mask is :",
- cntl.bitfields.mask);
- pr_debug("\t\t%20s %08x\n", "Control Mode is :",
- cntl.bitfields.mode);
- pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
- cntl.bitfields.vmid);
- pr_debug("\t\t%20s %08x\n", "Control atc is :",
- cntl.bitfields.atc);
- pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
-
- aw_reg_add_dword =
- dbgdev->dev->kfd2kgd->address_watch_get_offset(
- dbgdev->dev->adev,
- i,
- ADDRESS_WATCH_REG_CNTL);
-
- packets_vec[0].bitfields2.reg_offset =
- aw_reg_add_dword - AMD_CONFIG_REG_BASE;
-
- packets_vec[0].reg_data[0] = cntl.u32All;
-
- aw_reg_add_dword =
- dbgdev->dev->kfd2kgd->address_watch_get_offset(
- dbgdev->dev->adev,
- i,
- ADDRESS_WATCH_REG_ADDR_HI);
-
- packets_vec[1].bitfields2.reg_offset =
- aw_reg_add_dword - AMD_CONFIG_REG_BASE;
- packets_vec[1].reg_data[0] = addrHi.u32All;
-
- aw_reg_add_dword =
- dbgdev->dev->kfd2kgd->address_watch_get_offset(
- dbgdev->dev->adev,
- i,
- ADDRESS_WATCH_REG_ADDR_LO);
-
- packets_vec[2].bitfields2.reg_offset =
- aw_reg_add_dword - AMD_CONFIG_REG_BASE;
- packets_vec[2].reg_data[0] = addrLo.u32All;
-
- /* enable watch flag if address is not zero*/
- if (adw_info->watch_address[i] > 0)
- cntl.bitfields.valid = 1;
- else
- cntl.bitfields.valid = 0;
-
- aw_reg_add_dword =
- dbgdev->dev->kfd2kgd->address_watch_get_offset(
- dbgdev->dev->adev,
- i,
- ADDRESS_WATCH_REG_CNTL);
-
- packets_vec[3].bitfields2.reg_offset =
- aw_reg_add_dword - AMD_CONFIG_REG_BASE;
- packets_vec[3].reg_data[0] = cntl.u32All;
-
- status = dbgdev_diq_submit_ib(
- dbgdev,
- adw_info->process->pasid,
- mem_obj->gpu_addr,
- packet_buff_uint,
- ib_size);
-
- if (status) {
- pr_err("Failed to submit IB to DIQ\n");
- break;
- }
- }
-
- kfd_gtt_sa_free(dbgdev->dev, mem_obj);
- return status;
-}
-
-static int dbgdev_wave_control_set_registers(
- struct dbg_wave_control_info *wac_info,
- union SQ_CMD_BITS *in_reg_sq_cmd,
- union GRBM_GFX_INDEX_BITS *in_reg_gfx_index)
-{
- int status = 0;
- union SQ_CMD_BITS reg_sq_cmd;
- union GRBM_GFX_INDEX_BITS reg_gfx_index;
- struct HsaDbgWaveMsgAMDGen2 *pMsg;
-
- reg_sq_cmd.u32All = 0;
- reg_gfx_index.u32All = 0;
- pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2;
-
- switch (wac_info->mode) {
- /* Send command to single wave */
- case HSA_DBG_WAVEMODE_SINGLE:
- /*
- * Limit access to the process waves only,
- * by setting vmid check
- */
- reg_sq_cmd.bits.check_vmid = 1;
- reg_sq_cmd.bits.simd_id = pMsg->ui32.SIMD;
- reg_sq_cmd.bits.wave_id = pMsg->ui32.WaveId;
- reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_SINGLE;
-
- reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
- reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
- reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
-
- break;
-
- /* Send command to all waves with matching VMID */
- case HSA_DBG_WAVEMODE_BROADCAST_PROCESS:
-
- reg_gfx_index.bits.sh_broadcast_writes = 1;
- reg_gfx_index.bits.se_broadcast_writes = 1;
- reg_gfx_index.bits.instance_broadcast_writes = 1;
-
- reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
-
- break;
-
- /* Send command to all CU waves with matching VMID */
- case HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU:
-
- reg_sq_cmd.bits.check_vmid = 1;
- reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
-
- reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
- reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
- reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
-
- break;
-
- default:
- return -EINVAL;
- }
-
- switch (wac_info->operand) {
- case HSA_DBG_WAVEOP_HALT:
- reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_HALT;
- break;
-
- case HSA_DBG_WAVEOP_RESUME:
- reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_RESUME;
- break;
-
- case HSA_DBG_WAVEOP_KILL:
- reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL;
- break;
-
- case HSA_DBG_WAVEOP_DEBUG:
- reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_DEBUG;
- break;
-
- case HSA_DBG_WAVEOP_TRAP:
- if (wac_info->trapId < MAX_TRAPID) {
- reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_TRAP;
- reg_sq_cmd.bits.trap_id = wac_info->trapId;
- } else {
- status = -EINVAL;
- }
- break;
-
- default:
- status = -EINVAL;
- break;
- }
-
- if (status == 0) {
- *in_reg_sq_cmd = reg_sq_cmd;
- *in_reg_gfx_index = reg_gfx_index;
- }
-
- return status;
-}
-
-static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
- struct dbg_wave_control_info *wac_info)
-{
-
- int status;
- union SQ_CMD_BITS reg_sq_cmd;
- union GRBM_GFX_INDEX_BITS reg_gfx_index;
- struct kfd_mem_obj *mem_obj;
- uint32_t *packet_buff_uint;
- struct pm4__set_config_reg *packets_vec;
- size_t ib_size = sizeof(struct pm4__set_config_reg) * 3;
-
- reg_sq_cmd.u32All = 0;
-
- status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
- &reg_gfx_index);
- if (status) {
- pr_err("Failed to set wave control registers\n");
- return status;
- }
-
- /* we do not control the VMID in DIQ, so reset it to a known value */
- reg_sq_cmd.bits.vm_id = 0;
-
- pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
-
- pr_debug("\t\t mode is: %u\n", wac_info->mode);
- pr_debug("\t\t operand is: %u\n", wac_info->operand);
- pr_debug("\t\t trap id is: %u\n", wac_info->trapId);
- pr_debug("\t\t msg value is: %u\n",
- wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
- pr_debug("\t\t vmid is: N/A\n");
-
- pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid);
- pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd);
- pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id);
- pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id);
- pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode);
- pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id);
- pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id);
-
- pr_debug("\t\t ibw is : %u\n",
- reg_gfx_index.bitfields.instance_broadcast_writes);
- pr_debug("\t\t ii is : %u\n",
- reg_gfx_index.bitfields.instance_index);
- pr_debug("\t\t sebw is : %u\n",
- reg_gfx_index.bitfields.se_broadcast_writes);
- pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index);
- pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index);
- pr_debug("\t\t sbw is : %u\n",
- reg_gfx_index.bitfields.sh_broadcast_writes);
-
- pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
-
- status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
-
- if (status != 0) {
- pr_err("Failed to allocate GART memory\n");
- return status;
- }
-
- packet_buff_uint = mem_obj->cpu_ptr;
-
- memset(packet_buff_uint, 0, ib_size);
-
- packets_vec = (struct pm4__set_config_reg *) packet_buff_uint;
- packets_vec[0].header.count = 1;
- packets_vec[0].header.opcode = IT_SET_UCONFIG_REG;
- packets_vec[0].header.type = PM4_TYPE_3;
- packets_vec[0].bitfields2.reg_offset =
- GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE;
-
- packets_vec[0].bitfields2.insert_vmid = 0;
- packets_vec[0].reg_data[0] = reg_gfx_index.u32All;
-
- packets_vec[1].header.count = 1;
- packets_vec[1].header.opcode = IT_SET_CONFIG_REG;
- packets_vec[1].header.type = PM4_TYPE_3;
- packets_vec[1].bitfields2.reg_offset = SQ_CMD / 4 - AMD_CONFIG_REG_BASE;
-
- packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET;
- packets_vec[1].bitfields2.insert_vmid = 1;
- packets_vec[1].reg_data[0] = reg_sq_cmd.u32All;
-
- /* Restore the GRBM_GFX_INDEX register */
-
- reg_gfx_index.u32All = 0;
- reg_gfx_index.bits.sh_broadcast_writes = 1;
- reg_gfx_index.bits.instance_broadcast_writes = 1;
- reg_gfx_index.bits.se_broadcast_writes = 1;
-
-
- packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
- packets_vec[2].bitfields2.reg_offset =
- GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE;
-
- packets_vec[2].bitfields2.insert_vmid = 0;
- packets_vec[2].reg_data[0] = reg_gfx_index.u32All;
-
- status = dbgdev_diq_submit_ib(
- dbgdev,
- wac_info->process->pasid,
- mem_obj->gpu_addr,
- packet_buff_uint,
- ib_size);
-
- if (status)
- pr_err("Failed to submit IB to DIQ\n");
-
- kfd_gtt_sa_free(dbgdev->dev, mem_obj);
-
- return status;
-}
-
-static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev,
- struct dbg_wave_control_info *wac_info)
-{
- int status;
- union SQ_CMD_BITS reg_sq_cmd;
- union GRBM_GFX_INDEX_BITS reg_gfx_index;
- struct kfd_process_device *pdd;
-
- reg_sq_cmd.u32All = 0;
-
- /* taking the VMID for that process on the safe way using PDD */
- pdd = kfd_get_process_device_data(dbgdev->dev, wac_info->process);
-
- if (!pdd) {
- pr_err("Failed to get pdd for wave control no DIQ\n");
- return -EFAULT;
- }
- status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
- &reg_gfx_index);
- if (status) {
- pr_err("Failed to set wave control registers\n");
- return status;
- }
-
- /* for non DIQ we need to patch the VMID: */
-
- reg_sq_cmd.bits.vm_id = pdd->qpd.vmid;
-
- pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
-
- pr_debug("\t\t mode is: %u\n", wac_info->mode);
- pr_debug("\t\t operand is: %u\n", wac_info->operand);
- pr_debug("\t\t trap id is: %u\n", wac_info->trapId);
- pr_debug("\t\t msg value is: %u\n",
- wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
- pr_debug("\t\t vmid is: %u\n", pdd->qpd.vmid);
-
- pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid);
- pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd);
- pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id);
- pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id);
- pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode);
- pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id);
- pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id);
-
- pr_debug("\t\t ibw is : %u\n",
- reg_gfx_index.bitfields.instance_broadcast_writes);
- pr_debug("\t\t ii is : %u\n",
- reg_gfx_index.bitfields.instance_index);
- pr_debug("\t\t sebw is : %u\n",
- reg_gfx_index.bitfields.se_broadcast_writes);
- pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index);
- pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index);
- pr_debug("\t\t sbw is : %u\n",
- reg_gfx_index.bitfields.sh_broadcast_writes);
-
- pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
-
- return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->adev,
- reg_gfx_index.u32All,
- reg_sq_cmd.u32All);
-}
-
-int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
-{
- int status = 0;
- unsigned int vmid;
- uint16_t queried_pasid;
- union SQ_CMD_BITS reg_sq_cmd;
- union GRBM_GFX_INDEX_BITS reg_gfx_index;
- struct kfd_process_device *pdd;
- struct dbg_wave_control_info wac_info;
- int first_vmid_to_scan = dev->vm_info.first_vmid_kfd;
- int last_vmid_to_scan = dev->vm_info.last_vmid_kfd;
-
- reg_sq_cmd.u32All = 0;
- status = 0;
-
- wac_info.mode = HSA_DBG_WAVEMODE_BROADCAST_PROCESS;
- wac_info.operand = HSA_DBG_WAVEOP_KILL;
-
- pr_debug("Killing all process wavefronts\n");
-
- /* Scan all registers in the range ATC_VMID8_PASID_MAPPING ..
- * ATC_VMID15_PASID_MAPPING
- * to check which VMID the current process is mapped to.
- */
-
- for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) {
- status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info
- (dev->adev, vmid, &queried_pasid);
-
- if (status && queried_pasid == p->pasid) {
- pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n",
- vmid, p->pasid);
- break;
- }
- }
-
- if (vmid > last_vmid_to_scan) {
- pr_err("Didn't find vmid for pasid 0x%x\n", p->pasid);
- return -EFAULT;
- }
-
- /* taking the VMID for that process on the safe way using PDD */
- pdd = kfd_get_process_device_data(dev, p);
- if (!pdd)
- return -EFAULT;
-
- status = dbgdev_wave_control_set_registers(&wac_info, &reg_sq_cmd,
- &reg_gfx_index);
- if (status != 0)
- return -EINVAL;
-
- /* for non DIQ we need to patch the VMID: */
- reg_sq_cmd.bits.vm_id = vmid;
-
- dev->kfd2kgd->wave_control_execute(dev->adev,
- reg_gfx_index.u32All,
- reg_sq_cmd.u32All);
-
- return 0;
-}
-
-void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev,
- enum DBGDEV_TYPE type)
-{
- pdbgdev->dev = pdev;
- pdbgdev->kq = NULL;
- pdbgdev->type = type;
- pdbgdev->pqm = NULL;
-
- switch (type) {
- case DBGDEV_TYPE_NODIQ:
- pdbgdev->dbgdev_register = dbgdev_register_nodiq;
- pdbgdev->dbgdev_unregister = dbgdev_unregister_nodiq;
- pdbgdev->dbgdev_wave_control = dbgdev_wave_control_nodiq;
- pdbgdev->dbgdev_address_watch = dbgdev_address_watch_nodiq;
- break;
- case DBGDEV_TYPE_DIQ:
- default:
- pdbgdev->dbgdev_register = dbgdev_register_diq;
- pdbgdev->dbgdev_unregister = dbgdev_unregister_diq;
- pdbgdev->dbgdev_wave_control = dbgdev_wave_control_diq;
- pdbgdev->dbgdev_address_watch = dbgdev_address_watch_diq;
- break;
- }
-
-}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h
deleted file mode 100644
index 0619c777b47e..000000000000
--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h
+++ /dev/null
@@ -1,230 +0,0 @@
-/*
- * Copyright 2014 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef KFD_DBGDEV_H_
-#define KFD_DBGDEV_H_
-
-enum {
- SQ_CMD_VMID_OFFSET = 28,
- ADDRESS_WATCH_CNTL_OFFSET = 24
-};
-
-enum {
- PRIV_QUEUE_SYNC_TIME_MS = 200
-};
-
-/* CONTEXT reg space definition */
-enum {
- CONTEXT_REG_BASE = 0xA000,
- CONTEXT_REG_END = 0xA400,
- CONTEXT_REG_SIZE = CONTEXT_REG_END - CONTEXT_REG_BASE
-};
-
-/* USER CONFIG reg space definition */
-enum {
- USERCONFIG_REG_BASE = 0xC000,
- USERCONFIG_REG_END = 0x10000,
- USERCONFIG_REG_SIZE = USERCONFIG_REG_END - USERCONFIG_REG_BASE
-};
-
-/* CONFIG reg space definition */
-enum {
- AMD_CONFIG_REG_BASE = 0x2000, /* in dwords */
- AMD_CONFIG_REG_END = 0x2B00,
- AMD_CONFIG_REG_SIZE = AMD_CONFIG_REG_END - AMD_CONFIG_REG_BASE
-};
-
-/* SH reg space definition */
-enum {
- SH_REG_BASE = 0x2C00,
- SH_REG_END = 0x3000,
- SH_REG_SIZE = SH_REG_END - SH_REG_BASE
-};
-
-/* SQ_CMD definitions */
-#define SQ_CMD 0x8DEC
-
-enum SQ_IND_CMD_CMD {
- SQ_IND_CMD_CMD_NULL = 0x00000000,
- SQ_IND_CMD_CMD_HALT = 0x00000001,
- SQ_IND_CMD_CMD_RESUME = 0x00000002,
- SQ_IND_CMD_CMD_KILL = 0x00000003,
- SQ_IND_CMD_CMD_DEBUG = 0x00000004,
- SQ_IND_CMD_CMD_TRAP = 0x00000005,
-};
-
-enum SQ_IND_CMD_MODE {
- SQ_IND_CMD_MODE_SINGLE = 0x00000000,
- SQ_IND_CMD_MODE_BROADCAST = 0x00000001,
- SQ_IND_CMD_MODE_BROADCAST_QUEUE = 0x00000002,
- SQ_IND_CMD_MODE_BROADCAST_PIPE = 0x00000003,
- SQ_IND_CMD_MODE_BROADCAST_ME = 0x00000004,
-};
-
-union SQ_IND_INDEX_BITS {
- struct {
- uint32_t wave_id:4;
- uint32_t simd_id:2;
- uint32_t thread_id:6;
- uint32_t:1;
- uint32_t force_read:1;
- uint32_t read_timeout:1;
- uint32_t unindexed:1;
- uint32_t index:16;
-
- } bitfields, bits;
- uint32_t u32All;
- signed int i32All;
- float f32All;
-};
-
-union SQ_IND_CMD_BITS {
- struct {
- uint32_t data:32;
- } bitfields, bits;
- uint32_t u32All;
- signed int i32All;
- float f32All;
-};
-
-union SQ_CMD_BITS {
- struct {
- uint32_t cmd:3;
- uint32_t:1;
- uint32_t mode:3;
- uint32_t check_vmid:1;
- uint32_t trap_id:3;
- uint32_t:5;
- uint32_t wave_id:4;
- uint32_t simd_id:2;
- uint32_t:2;
- uint32_t queue_id:3;
- uint32_t:1;
- uint32_t vm_id:4;
- } bitfields, bits;
- uint32_t u32All;
- signed int i32All;
- float f32All;
-};
-
-union SQ_IND_DATA_BITS {
- struct {
- uint32_t data:32;
- } bitfields, bits;
- uint32_t u32All;
- signed int i32All;
- float f32All;
-};
-
-union GRBM_GFX_INDEX_BITS {
- struct {
- uint32_t instance_index:8;
- uint32_t sh_index:8;
- uint32_t se_index:8;
- uint32_t:5;
- uint32_t sh_broadcast_writes:1;
- uint32_t instance_broadcast_writes:1;
- uint32_t se_broadcast_writes:1;
- } bitfields, bits;
- uint32_t u32All;
- signed int i32All;
- float f32All;
-};
-
-union TCP_WATCH_ADDR_H_BITS {
- struct {
- uint32_t addr:16;
- uint32_t:16;
-
- } bitfields, bits;
- uint32_t u32All;
- signed int i32All;
- float f32All;
-};
-
-union TCP_WATCH_ADDR_L_BITS {
- struct {
- uint32_t:6;
- uint32_t addr:26;
- } bitfields, bits;
- uint32_t u32All;
- signed int i32All;
- float f32All;
-};
-
-enum {
- QUEUESTATE__INVALID = 0, /* so by default we'll get invalid state */
- QUEUESTATE__ACTIVE_COMPLETION_PENDING,
- QUEUESTATE__ACTIVE
-};
-
-union ULARGE_INTEGER {
- struct {
- uint32_t low_part;
- uint32_t high_part;
- } u;
- unsigned long long quad_part;
-};
-
-
-#define KFD_CIK_VMID_START_OFFSET (8)
-#define KFD_CIK_VMID_END_OFFSET (KFD_CIK_VMID_START_OFFSET + (8))
-
-
-void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev,
- enum DBGDEV_TYPE type);
-
-union TCP_WATCH_CNTL_BITS {
- struct {
- uint32_t mask:24;
- uint32_t vmid:4;
- uint32_t atc:1;
- uint32_t mode:2;
- uint32_t valid:1;
- } bitfields, bits;
- uint32_t u32All;
- signed int i32All;
- float f32All;
-};
-
-enum {
- ADDRESS_WATCH_REG_CNTL_ATC_BIT = 0x10000000UL,
- ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK = 0x00FFFFFF,
- ADDRESS_WATCH_REG_ADDLOW_MASK_EXTENSION = 0x03000000,
- /* extend the mask to 26 bits in order to match the low address field */
- ADDRESS_WATCH_REG_ADDLOW_SHIFT = 6,
- ADDRESS_WATCH_REG_ADDHIGH_MASK = 0xFFFF
-};
-
-enum {
- MAX_TRAPID = 8, /* 3 bits in the bitfield. */
- MAX_WATCH_ADDRESSES = 4
-};
-
-enum {
- ADDRESS_WATCH_REG_ADDR_HI = 0,
- ADDRESS_WATCH_REG_ADDR_LO,
- ADDRESS_WATCH_REG_CNTL,
- ADDRESS_WATCH_REG_MAX
-};
-
-#endif /* KFD_DBGDEV_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
deleted file mode 100644
index 9bfa50633654..000000000000
--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
+++ /dev/null
@@ -1,158 +0,0 @@
-/*
- * Copyright 2014 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/log2.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/device.h>
-
-#include "kfd_priv.h"
-#include "cik_regs.h"
-#include "kfd_pm4_headers.h"
-#include "kfd_pm4_headers_diq.h"
-#include "kfd_dbgmgr.h"
-#include "kfd_dbgdev.h"
-#include "kfd_device_queue_manager.h"
-
-static DEFINE_MUTEX(kfd_dbgmgr_mutex);
-
-struct mutex *kfd_get_dbgmgr_mutex(void)
-{
- return &kfd_dbgmgr_mutex;
-}
-
-
-static void kfd_dbgmgr_uninitialize(struct kfd_dbgmgr *pmgr)
-{
- kfree(pmgr->dbgdev);
-
- pmgr->dbgdev = NULL;
- pmgr->pasid = 0;
- pmgr->dev = NULL;
-}
-
-void kfd_dbgmgr_destroy(struct kfd_dbgmgr *pmgr)
-{
- if (pmgr) {
- kfd_dbgmgr_uninitialize(pmgr);
- kfree(pmgr);
- }
-}
-
-bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev)
-{
- enum DBGDEV_TYPE type = DBGDEV_TYPE_DIQ;
- struct kfd_dbgmgr *new_buff;
-
- if (WARN_ON(!pdev->init_complete))
- return false;
-
- new_buff = kfd_alloc_struct(new_buff);
- if (!new_buff) {
- pr_err("Failed to allocate dbgmgr instance\n");
- return false;
- }
-
- new_buff->pasid = 0;
- new_buff->dev = pdev;
- new_buff->dbgdev = kfd_alloc_struct(new_buff->dbgdev);
- if (!new_buff->dbgdev) {
- pr_err("Failed to allocate dbgdev instance\n");
- kfree(new_buff);
- return false;
- }
-
- /* get actual type of DBGDevice cpsch or not */
- if (pdev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
- type = DBGDEV_TYPE_NODIQ;
-
- kfd_dbgdev_init(new_buff->dbgdev, pdev, type);
- *ppmgr = new_buff;
-
- return true;
-}
-
-long kfd_dbgmgr_register(struct kfd_dbgmgr *pmgr, struct kfd_process *p)
-{
- if (pmgr->pasid != 0) {
- pr_debug("H/W debugger is already active using pasid 0x%x\n",
- pmgr->pasid);
- return -EBUSY;
- }
-
- /* remember pasid */
- pmgr->pasid = p->pasid;
-
- /* provide the pqm for diq generation */
- pmgr->dbgdev->pqm = &p->pqm;
-
- /* activate the actual registering */
- pmgr->dbgdev->dbgdev_register(pmgr->dbgdev);
-
- return 0;
-}
-
-long kfd_dbgmgr_unregister(struct kfd_dbgmgr *pmgr, struct kfd_process *p)
-{
- /* Is the requests coming from the already registered process? */
- if (pmgr->pasid != p->pasid) {
- pr_debug("H/W debugger is not registered by calling pasid 0x%x\n",
- p->pasid);
- return -EINVAL;
- }
-
- pmgr->dbgdev->dbgdev_unregister(pmgr->dbgdev);
-
- pmgr->pasid = 0;
-
- return 0;
-}
-
-long kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr,
- struct dbg_wave_control_info *wac_info)
-{
- /* Is the requests coming from the already registered process? */
- if (pmgr->pasid != wac_info->process->pasid) {
- pr_debug("H/W debugger support was not registered for requester pasid 0x%x\n",
- wac_info->process->pasid);
- return -EINVAL;
- }
-
- return (long) pmgr->dbgdev->dbgdev_wave_control(pmgr->dbgdev, wac_info);
-}
-
-long kfd_dbgmgr_address_watch(struct kfd_dbgmgr *pmgr,
- struct dbg_address_watch_info *adw_info)
-{
- /* Is the requests coming from the already registered process? */
- if (pmgr->pasid != adw_info->process->pasid) {
- pr_debug("H/W debugger support was not registered for requester pasid 0x%x\n",
- adw_info->process->pasid);
- return -EINVAL;
- }
-
- return (long) pmgr->dbgdev->dbgdev_address_watch(pmgr->dbgdev,
- adw_info);
-}
-
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h
deleted file mode 100644
index f9c6df1fdc5c..000000000000
--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h
+++ /dev/null
@@ -1,293 +0,0 @@
-/*
- * Copyright 2014 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#ifndef KFD_DBGMGR_H_
-#define KFD_DBGMGR_H_
-
-#include "kfd_priv.h"
-
-/* must align with hsakmttypes definition */
-#pragma pack(push, 4)
-
-enum HSA_DBG_WAVEOP {
- HSA_DBG_WAVEOP_HALT = 1, /* Halts a wavefront */
- HSA_DBG_WAVEOP_RESUME = 2, /* Resumes a wavefront */
- HSA_DBG_WAVEOP_KILL = 3, /* Kills a wavefront */
- HSA_DBG_WAVEOP_DEBUG = 4, /* Causes wavefront to enter dbg mode */
- HSA_DBG_WAVEOP_TRAP = 5, /* Causes wavefront to take a trap */
- HSA_DBG_NUM_WAVEOP = 5,
- HSA_DBG_MAX_WAVEOP = 0xFFFFFFFF
-};
-
-enum HSA_DBG_WAVEMODE {
- /* send command to a single wave */
- HSA_DBG_WAVEMODE_SINGLE = 0,
- /*
- * Broadcast to all wavefronts of all processes is not
- * supported for HSA user mode
- */
-
- /* send to waves within current process */
- HSA_DBG_WAVEMODE_BROADCAST_PROCESS = 2,
- /* send to waves within current process on CU */
- HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU = 3,
- HSA_DBG_NUM_WAVEMODE = 3,
- HSA_DBG_MAX_WAVEMODE = 0xFFFFFFFF
-};
-
-enum HSA_DBG_WAVEMSG_TYPE {
- HSA_DBG_WAVEMSG_AUTO = 0,
- HSA_DBG_WAVEMSG_USER = 1,
- HSA_DBG_WAVEMSG_ERROR = 2,
- HSA_DBG_NUM_WAVEMSG,
- HSA_DBG_MAX_WAVEMSG = 0xFFFFFFFF
-};
-
-enum HSA_DBG_WATCH_MODE {
- HSA_DBG_WATCH_READ = 0, /* Read operations only */
- HSA_DBG_WATCH_NONREAD = 1, /* Write or Atomic operations only */
- HSA_DBG_WATCH_ATOMIC = 2, /* Atomic Operations only */
- HSA_DBG_WATCH_ALL = 3, /* Read, Write or Atomic operations */
- HSA_DBG_WATCH_NUM,
- HSA_DBG_WATCH_SIZE = 0xFFFFFFFF
-};
-
-/* This structure is hardware specific and may change in the future */
-struct HsaDbgWaveMsgAMDGen2 {
- union {
- struct ui32 {
- uint32_t UserData:8; /* user data */
- uint32_t ShaderArray:1; /* Shader array */
- uint32_t Priv:1; /* Privileged */
- uint32_t Reserved0:4; /* Reserved, should be 0 */
- uint32_t WaveId:4; /* wave id */
- uint32_t SIMD:2; /* SIMD id */
- uint32_t HSACU:4; /* Compute unit */
- uint32_t ShaderEngine:2;/* Shader engine */
- uint32_t MessageType:2; /* see HSA_DBG_WAVEMSG_TYPE */
- uint32_t Reserved1:4; /* Reserved, should be 0 */
- } ui32;
- uint32_t Value;
- };
- uint32_t Reserved2;
-};
-
-union HsaDbgWaveMessageAMD {
- struct HsaDbgWaveMsgAMDGen2 WaveMsgInfoGen2;
- /* for future HsaDbgWaveMsgAMDGen3; */
-};
-
-struct HsaDbgWaveMessage {
- void *MemoryVA; /* ptr to associated host-accessible data */
- union HsaDbgWaveMessageAMD DbgWaveMsg;
-};
-
-/*
- * TODO: This definitions to be MOVED to kfd_event, once it is implemented.
- *
- * HSA sync primitive, Event and HW Exception notification API definitions.
- * The API functions allow the runtime to define a so-called sync-primitive,
- * a SW object combining a user-mode provided "syncvar" and a scheduler event
- * that can be signaled through a defined GPU interrupt. A syncvar is
- * a process virtual memory location of a certain size that can be accessed
- * by CPU and GPU shader code within the process to set and query the content
- * within that memory. The definition of the content is determined by the HSA
- * runtime and potentially GPU shader code interfacing with the HSA runtime.
- * The syncvar values may be commonly written through an PM4 WRITE_DATA packet
- * in the user mode instruction stream. The OS scheduler event is typically
- * associated and signaled by an interrupt issued by the GPU, but other HSA
- * system interrupt conditions from other HW (e.g. IOMMUv2) may be surfaced
- * by the KFD by this mechanism, too.
- */
-
-/* these are the new definitions for events */
-enum HSA_EVENTTYPE {
- HSA_EVENTTYPE_SIGNAL = 0, /* user-mode generated GPU signal */
- HSA_EVENTTYPE_NODECHANGE = 1, /* HSA node change (attach/detach) */
- HSA_EVENTTYPE_DEVICESTATECHANGE = 2, /* HSA device state change
- * (start/stop)
- */
- HSA_EVENTTYPE_HW_EXCEPTION = 3, /* GPU shader exception event */
- HSA_EVENTTYPE_SYSTEM_EVENT = 4, /* GPU SYSCALL with parameter info */
- HSA_EVENTTYPE_DEBUG_EVENT = 5, /* GPU signal for debugging */
- HSA_EVENTTYPE_PROFILE_EVENT = 6,/* GPU signal for profiling */
- HSA_EVENTTYPE_QUEUE_EVENT = 7, /* GPU signal queue idle state
- * (EOP pm4)
- */
- /* ... */
- HSA_EVENTTYPE_MAXID,
- HSA_EVENTTYPE_TYPE_SIZE = 0xFFFFFFFF
-};
-
-/* Sub-definitions for various event types: Syncvar */
-struct HsaSyncVar {
- union SyncVar {
- void *UserData; /* pointer to user mode data */
- uint64_t UserDataPtrValue; /* 64bit compatibility of value */
- } SyncVar;
- uint64_t SyncVarSize;
-};
-
-/* Sub-definitions for various event types: NodeChange */
-
-enum HSA_EVENTTYPE_NODECHANGE_FLAGS {
- HSA_EVENTTYPE_NODECHANGE_ADD = 0,
- HSA_EVENTTYPE_NODECHANGE_REMOVE = 1,
- HSA_EVENTTYPE_NODECHANGE_SIZE = 0xFFFFFFFF
-};
-
-struct HsaNodeChange {
- /* HSA node added/removed on the platform */
- enum HSA_EVENTTYPE_NODECHANGE_FLAGS Flags;
-};
-
-/* Sub-definitions for various event types: DeviceStateChange */
-enum HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS {
- /* device started (and available) */
- HSA_EVENTTYPE_DEVICESTATUSCHANGE_START = 0,
- /* device stopped (i.e. unavailable) */
- HSA_EVENTTYPE_DEVICESTATUSCHANGE_STOP = 1,
- HSA_EVENTTYPE_DEVICESTATUSCHANGE_SIZE = 0xFFFFFFFF
-};
-
-enum HSA_DEVICE {
- HSA_DEVICE_CPU = 0,
- HSA_DEVICE_GPU = 1,
- MAX_HSA_DEVICE = 2
-};
-
-struct HsaDeviceStateChange {
- uint32_t NodeId; /* F-NUMA node that contains the device */
- enum HSA_DEVICE Device; /* device type: GPU or CPU */
- enum HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS Flags; /* event flags */
-};
-
-struct HsaEventData {
- enum HSA_EVENTTYPE EventType; /* event type */
- union EventData {
- /*
- * return data associated with HSA_EVENTTYPE_SIGNAL
- * and other events
- */
- struct HsaSyncVar SyncVar;
-
- /* data associated with HSA_EVENTTYPE_NODE_CHANGE */
- struct HsaNodeChange NodeChangeState;
-
- /* data associated with HSA_EVENTTYPE_DEVICE_STATE_CHANGE */
- struct HsaDeviceStateChange DeviceState;
- } EventData;
-
- /* the following data entries are internal to the KFD & thunk itself */
-
- /* internal thunk store for Event data (OsEventHandle) */
- uint64_t HWData1;
- /* internal thunk store for Event data (HWAddress) */
- uint64_t HWData2;
- /* internal thunk store for Event data (HWData) */
- uint32_t HWData3;
-};
-
-struct HsaEventDescriptor {
- /* event type to allocate */
- enum HSA_EVENTTYPE EventType;
- /* H-NUMA node containing GPU device that is event source */
- uint32_t NodeId;
- /* pointer to user mode syncvar data, syncvar->UserDataPtrValue
- * may be NULL
- */
- struct HsaSyncVar SyncVar;
-};
-
-struct HsaEvent {
- uint32_t EventId;
- struct HsaEventData EventData;
-};
-
-#pragma pack(pop)
-
-enum DBGDEV_TYPE {
- DBGDEV_TYPE_ILLEGAL = 0,
- DBGDEV_TYPE_NODIQ = 1,
- DBGDEV_TYPE_DIQ = 2,
- DBGDEV_TYPE_TEST = 3
-};
-
-struct dbg_address_watch_info {
- struct kfd_process *process;
- enum HSA_DBG_WATCH_MODE *watch_mode;
- uint64_t *watch_address;
- uint64_t *watch_mask;
- struct HsaEvent *watch_event;
- uint32_t num_watch_points;
-};
-
-struct dbg_wave_control_info {
- struct kfd_process *process;
- uint32_t trapId;
- enum HSA_DBG_WAVEOP operand;
- enum HSA_DBG_WAVEMODE mode;
- struct HsaDbgWaveMessage dbgWave_msg;
-};
-
-struct kfd_dbgdev {
-
- /* The device that owns this data. */
- struct kfd_dev *dev;
-
- /* kernel queue for DIQ */
- struct kernel_queue *kq;
-
- /* a pointer to the pqm of the calling process */
- struct process_queue_manager *pqm;
-
- /* type of debug device ( DIQ, non DIQ, etc. ) */
- enum DBGDEV_TYPE type;
-
- /* virtualized function pointers to device dbg */
- int (*dbgdev_register)(struct kfd_dbgdev *dbgdev);
- int (*dbgdev_unregister)(struct kfd_dbgdev *dbgdev);
- int (*dbgdev_address_watch)(struct kfd_dbgdev *dbgdev,
- struct dbg_address_watch_info *adw_info);
- int (*dbgdev_wave_control)(struct kfd_dbgdev *dbgdev,
- struct dbg_wave_control_info *wac_info);
-
-};
-
-struct kfd_dbgmgr {
- u32 pasid;
- struct kfd_dev *dev;
- struct kfd_dbgdev *dbgdev;
-};
-
-/* prototypes for debug manager functions */
-struct mutex *kfd_get_dbgmgr_mutex(void);
-void kfd_dbgmgr_destroy(struct kfd_dbgmgr *pmgr);
-bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev);
-long kfd_dbgmgr_register(struct kfd_dbgmgr *pmgr, struct kfd_process *p);
-long kfd_dbgmgr_unregister(struct kfd_dbgmgr *pmgr, struct kfd_process *p);
-long kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr,
- struct dbg_wave_control_info *wac_info);
-long kfd_dbgmgr_address_watch(struct kfd_dbgmgr *pmgr,
- struct dbg_address_watch_info *adw_info);
-#endif /* KFD_DBGMGR_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c
index 673d5e34f213..581c3a30fee1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2016-2017 Advanced Micro Devices, Inc.
+ * Copyright 2016-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -35,7 +36,7 @@ static int kfd_debugfs_open(struct inode *inode, struct file *file)
}
static int kfd_debugfs_hang_hws_read(struct seq_file *m, void *data)
{
- seq_printf(m, "echo gpu_id > hang_hws\n");
+ seq_puts(m, "echo gpu_id > hang_hws\n");
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 2b65d0acae2c..339e12c94cff 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -64,34 +65,33 @@ static void kfd_device_info_set_sdma_queue_num(struct kfd_dev *kfd)
uint32_t sdma_version = kfd->adev->ip_versions[SDMA0_HWIP][0];
switch (sdma_version) {
- case IP_VERSION(4, 0, 0):/* VEGA10 */
- case IP_VERSION(4, 0, 1):/* VEGA12 */
- case IP_VERSION(4, 1, 0):/* RAVEN */
- case IP_VERSION(4, 1, 1):/* RAVEN */
- case IP_VERSION(4, 1, 2):/* RENOIR */
- case IP_VERSION(5, 2, 1):/* VANGOGH */
- case IP_VERSION(5, 2, 3):/* YELLOW_CARP */
- kfd->device_info.num_sdma_queues_per_engine = 2;
- break;
- case IP_VERSION(4, 2, 0):/* VEGA20 */
- case IP_VERSION(4, 2, 2):/* ARCTURUS */
- case IP_VERSION(4, 4, 0):/* ALDEBARAN */
- case IP_VERSION(5, 0, 0):/* NAVI10 */
- case IP_VERSION(5, 0, 1):/* CYAN_SKILLFISH */
- case IP_VERSION(5, 0, 2):/* NAVI14 */
- case IP_VERSION(5, 0, 5):/* NAVI12 */
- case IP_VERSION(5, 2, 0):/* SIENNA_CICHLID */
- case IP_VERSION(5, 2, 2):/* NAVY_FLOUNDER */
- case IP_VERSION(5, 2, 4):/* DIMGREY_CAVEFISH */
- case IP_VERSION(5, 2, 5):/* BEIGE_GOBY */
- kfd->device_info.num_sdma_queues_per_engine = 8;
- break;
- default:
- dev_warn(kfd_device,
- "Default sdma queue per engine(8) is set due to "
- "mismatch of sdma ip block(SDMA_HWIP:0x%x).\n",
- sdma_version);
- kfd->device_info.num_sdma_queues_per_engine = 8;
+ case IP_VERSION(4, 0, 0):/* VEGA10 */
+ case IP_VERSION(4, 0, 1):/* VEGA12 */
+ case IP_VERSION(4, 1, 0):/* RAVEN */
+ case IP_VERSION(4, 1, 1):/* RAVEN */
+ case IP_VERSION(4, 1, 2):/* RENOIR */
+ case IP_VERSION(5, 2, 1):/* VANGOGH */
+ case IP_VERSION(5, 2, 3):/* YELLOW_CARP */
+ kfd->device_info.num_sdma_queues_per_engine = 2;
+ break;
+ case IP_VERSION(4, 2, 0):/* VEGA20 */
+ case IP_VERSION(4, 2, 2):/* ARCTURUS */
+ case IP_VERSION(4, 4, 0):/* ALDEBARAN */
+ case IP_VERSION(5, 0, 0):/* NAVI10 */
+ case IP_VERSION(5, 0, 1):/* CYAN_SKILLFISH */
+ case IP_VERSION(5, 0, 2):/* NAVI14 */
+ case IP_VERSION(5, 0, 5):/* NAVI12 */
+ case IP_VERSION(5, 2, 0):/* SIENNA_CICHLID */
+ case IP_VERSION(5, 2, 2):/* NAVY_FLOUNDER */
+ case IP_VERSION(5, 2, 4):/* DIMGREY_CAVEFISH */
+ case IP_VERSION(5, 2, 5):/* BEIGE_GOBY */
+ kfd->device_info.num_sdma_queues_per_engine = 8;
+ break;
+ default:
+ dev_warn(kfd_device,
+ "Default sdma queue per engine(8) is set due to mismatch of sdma ip block(SDMA_HWIP:0x%x).\n",
+ sdma_version);
+ kfd->device_info.num_sdma_queues_per_engine = 8;
}
}
@@ -111,6 +111,7 @@ static void kfd_device_info_set_event_interrupt_class(struct kfd_dev *kfd)
case IP_VERSION(10, 3, 1): /* VANGOGH */
case IP_VERSION(10, 3, 3): /* YELLOW_CARP */
case IP_VERSION(10, 1, 3): /* CYAN_SKILLFISH */
+ case IP_VERSION(10, 1, 4):
case IP_VERSION(10, 1, 10): /* NAVI10 */
case IP_VERSION(10, 1, 2): /* NAVI12 */
case IP_VERSION(10, 1, 1): /* NAVI14 */
@@ -308,6 +309,7 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
break;
/* Cyan Skillfish */
case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
gfx_target_version = 100103;
if (!vf)
f2g = &gfx_v10_kfd2kgd;
@@ -437,7 +439,8 @@ static int kfd_gws_init(struct kfd_dev *kfd)
return ret;
}
-static void kfd_smi_init(struct kfd_dev *dev) {
+static void kfd_smi_init(struct kfd_dev *dev)
+{
INIT_LIST_HEAD(&dev->smi_clients);
spin_lock_init(&dev->smi_lock);
}
@@ -570,14 +573,12 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
svm_migrate_init(kfd->adev);
- if(kgd2kfd_resume_iommu(kfd))
+ if (kgd2kfd_resume_iommu(kfd))
goto device_iommu_error;
if (kfd_resume(kfd))
goto kfd_resume_error;
- kfd->dbgmgr = NULL;
-
if (kfd_topology_add_device(kfd)) {
dev_err(kfd_device, "Error adding device to topology\n");
goto kfd_topology_add_device_error;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 4b6814949aad..acf4f7975850 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -58,7 +59,7 @@ static inline void deallocate_hqd(struct device_queue_manager *dqm,
struct queue *q);
static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q);
static int allocate_sdma_queue(struct device_queue_manager *dqm,
- struct queue *q);
+ struct queue *q, const uint32_t *restore_sdma_id);
static void kfd_process_hw_exception(struct work_struct *work);
static inline
@@ -144,7 +145,13 @@ static void decrement_queue_count(struct device_queue_manager *dqm,
dqm->active_cp_queue_count--;
}
-static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
+/*
+ * Allocate a doorbell ID to this queue.
+ * If doorbell_id is passed in, make sure requested ID is valid then allocate it.
+ */
+static int allocate_doorbell(struct qcm_process_device *qpd,
+ struct queue *q,
+ uint32_t const *restore_id)
{
struct kfd_dev *dev = qpd->dqm->dev;
@@ -152,6 +159,10 @@ static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
/* On pre-SOC15 chips we need to use the queue ID to
* preserve the user mode ABI.
*/
+
+ if (restore_id && *restore_id != q->properties.queue_id)
+ return -EINVAL;
+
q->doorbell_id = q->properties.queue_id;
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
@@ -160,25 +171,37 @@ static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
* The doobell index distance between RLC (2*i) and (2*i+1)
* for a SDMA engine is 512.
*/
- uint32_t *idx_offset =
- dev->shared_resources.sdma_doorbell_idx;
- q->doorbell_id = idx_offset[q->properties.sdma_engine_id]
- + (q->properties.sdma_queue_id & 1)
- * KFD_QUEUE_DOORBELL_MIRROR_OFFSET
- + (q->properties.sdma_queue_id >> 1);
+ uint32_t *idx_offset = dev->shared_resources.sdma_doorbell_idx;
+ uint32_t valid_id = idx_offset[q->properties.sdma_engine_id]
+ + (q->properties.sdma_queue_id & 1)
+ * KFD_QUEUE_DOORBELL_MIRROR_OFFSET
+ + (q->properties.sdma_queue_id >> 1);
+
+ if (restore_id && *restore_id != valid_id)
+ return -EINVAL;
+ q->doorbell_id = valid_id;
} else {
- /* For CP queues on SOC15 reserve a free doorbell ID */
- unsigned int found;
-
- found = find_first_zero_bit(qpd->doorbell_bitmap,
- KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
- if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
- pr_debug("No doorbells available");
- return -EBUSY;
+ /* For CP queues on SOC15 */
+ if (restore_id) {
+ /* make sure that ID is free */
+ if (__test_and_set_bit(*restore_id, qpd->doorbell_bitmap))
+ return -EINVAL;
+
+ q->doorbell_id = *restore_id;
+ } else {
+ /* or reserve a free doorbell ID */
+ unsigned int found;
+
+ found = find_first_zero_bit(qpd->doorbell_bitmap,
+ KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
+ if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
+ pr_debug("No doorbells available");
+ return -EBUSY;
+ }
+ set_bit(found, qpd->doorbell_bitmap);
+ q->doorbell_id = found;
}
- set_bit(found, qpd->doorbell_bitmap);
- q->doorbell_id = found;
}
q->properties.doorbell_off =
@@ -299,7 +322,9 @@ static void deallocate_vmid(struct device_queue_manager *dqm,
static int create_queue_nocpsch(struct device_queue_manager *dqm,
struct queue *q,
- struct qcm_process_device *qpd)
+ struct qcm_process_device *qpd,
+ const struct kfd_criu_queue_priv_data *qd,
+ const void *restore_mqd, const void *restore_ctl_stack)
{
struct mqd_manager *mqd_mgr;
int retval;
@@ -339,13 +364,13 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
q->pipe, q->queue);
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
- retval = allocate_sdma_queue(dqm, q);
+ retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL);
if (retval)
goto deallocate_vmid;
dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
}
- retval = allocate_doorbell(qpd, q);
+ retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL);
if (retval)
goto out_deallocate_hqd;
@@ -358,8 +383,15 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
retval = -ENOMEM;
goto out_deallocate_doorbell;
}
- mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
- &q->gart_mqd_addr, &q->properties);
+
+ if (qd)
+ mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr,
+ &q->properties, restore_mqd, restore_ctl_stack,
+ qd->ctl_stack_size);
+ else
+ mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
+ &q->gart_mqd_addr, &q->properties);
+
if (q->properties.is_active) {
if (!dqm->sched_running) {
WARN_ONCE(1, "Load non-HWS mqd while stopped\n");
@@ -449,6 +481,70 @@ static inline void deallocate_hqd(struct device_queue_manager *dqm,
dqm->allocated_queues[q->pipe] |= (1 << q->queue);
}
+#define SQ_IND_CMD_CMD_KILL 0x00000003
+#define SQ_IND_CMD_MODE_BROADCAST 0x00000001
+
+static int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
+{
+ int status = 0;
+ unsigned int vmid;
+ uint16_t queried_pasid;
+ union SQ_CMD_BITS reg_sq_cmd;
+ union GRBM_GFX_INDEX_BITS reg_gfx_index;
+ struct kfd_process_device *pdd;
+ int first_vmid_to_scan = dev->vm_info.first_vmid_kfd;
+ int last_vmid_to_scan = dev->vm_info.last_vmid_kfd;
+
+ reg_sq_cmd.u32All = 0;
+ reg_gfx_index.u32All = 0;
+
+ pr_debug("Killing all process wavefronts\n");
+
+ if (!dev->kfd2kgd->get_atc_vmid_pasid_mapping_info) {
+ pr_err("no vmid pasid mapping supported \n");
+ return -EOPNOTSUPP;
+ }
+
+ /* Scan all registers in the range ATC_VMID8_PASID_MAPPING ..
+ * ATC_VMID15_PASID_MAPPING
+ * to check which VMID the current process is mapped to.
+ */
+
+ for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) {
+ status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info
+ (dev->adev, vmid, &queried_pasid);
+
+ if (status && queried_pasid == p->pasid) {
+ pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n",
+ vmid, p->pasid);
+ break;
+ }
+ }
+
+ if (vmid > last_vmid_to_scan) {
+ pr_err("Didn't find vmid for pasid 0x%x\n", p->pasid);
+ return -EFAULT;
+ }
+
+ /* taking the VMID for that process on the safe way using PDD */
+ pdd = kfd_get_process_device_data(dev, p);
+ if (!pdd)
+ return -EFAULT;
+
+ reg_gfx_index.bits.sh_broadcast_writes = 1;
+ reg_gfx_index.bits.se_broadcast_writes = 1;
+ reg_gfx_index.bits.instance_broadcast_writes = 1;
+ reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
+ reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL;
+ reg_sq_cmd.bits.vm_id = vmid;
+
+ dev->kfd2kgd->wave_control_execute(dev->adev,
+ reg_gfx_index.u32All,
+ reg_sq_cmd.u32All);
+
+ return 0;
+}
+
/* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked
* to avoid asynchronized access
*/
@@ -586,9 +682,9 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q,
}
retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
- (dqm->dev->cwsr_enabled?
- KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
- KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN),
+ (dqm->dev->cwsr_enabled ?
+ KFD_PREEMPT_TYPE_WAVEFRONT_SAVE :
+ KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN),
KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
if (retval) {
pr_err("destroy mqd failed\n");
@@ -681,9 +777,9 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
continue;
retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
- (dqm->dev->cwsr_enabled?
- KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
- KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN),
+ (dqm->dev->cwsr_enabled ?
+ KFD_PREEMPT_TYPE_WAVEFRONT_SAVE :
+ KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN),
KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
if (retval && !ret)
/* Return the first error, but keep going to
@@ -1008,7 +1104,7 @@ static int start_nocpsch(struct device_queue_manager *dqm)
pr_info("SW scheduler is used");
init_interrupts(dqm);
-
+
if (dqm->dev->adev->asic_type == CHIP_HAWAII)
r = pm_init(&dqm->packet_mgr, dqm);
if (!r)
@@ -1034,7 +1130,7 @@ static void pre_reset(struct device_queue_manager *dqm)
}
static int allocate_sdma_queue(struct device_queue_manager *dqm,
- struct queue *q)
+ struct queue *q, const uint32_t *restore_sdma_id)
{
int bit;
@@ -1044,9 +1140,21 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm,
return -ENOMEM;
}
- bit = __ffs64(dqm->sdma_bitmap);
- dqm->sdma_bitmap &= ~(1ULL << bit);
- q->sdma_id = bit;
+ if (restore_sdma_id) {
+ /* Re-use existing sdma_id */
+ if (!(dqm->sdma_bitmap & (1ULL << *restore_sdma_id))) {
+ pr_err("SDMA queue already in use\n");
+ return -EBUSY;
+ }
+ dqm->sdma_bitmap &= ~(1ULL << *restore_sdma_id);
+ q->sdma_id = *restore_sdma_id;
+ } else {
+ /* Find first available sdma_id */
+ bit = __ffs64(dqm->sdma_bitmap);
+ dqm->sdma_bitmap &= ~(1ULL << bit);
+ q->sdma_id = bit;
+ }
+
q->properties.sdma_engine_id = q->sdma_id %
kfd_get_num_sdma_engines(dqm->dev);
q->properties.sdma_queue_id = q->sdma_id /
@@ -1056,9 +1164,19 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm,
pr_err("No more XGMI SDMA queue to allocate\n");
return -ENOMEM;
}
- bit = __ffs64(dqm->xgmi_sdma_bitmap);
- dqm->xgmi_sdma_bitmap &= ~(1ULL << bit);
- q->sdma_id = bit;
+ if (restore_sdma_id) {
+ /* Re-use existing sdma_id */
+ if (!(dqm->xgmi_sdma_bitmap & (1ULL << *restore_sdma_id))) {
+ pr_err("SDMA queue already in use\n");
+ return -EBUSY;
+ }
+ dqm->xgmi_sdma_bitmap &= ~(1ULL << *restore_sdma_id);
+ q->sdma_id = *restore_sdma_id;
+ } else {
+ bit = __ffs64(dqm->xgmi_sdma_bitmap);
+ dqm->xgmi_sdma_bitmap &= ~(1ULL << bit);
+ q->sdma_id = bit;
+ }
/* sdma_engine_id is sdma id including
* both PCIe-optimized SDMAs and XGMI-
* optimized SDMAs. The calculation below
@@ -1288,7 +1406,9 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
}
static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
- struct qcm_process_device *qpd)
+ struct qcm_process_device *qpd,
+ const struct kfd_criu_queue_priv_data *qd,
+ const void *restore_mqd, const void *restore_ctl_stack)
{
int retval;
struct mqd_manager *mqd_mgr;
@@ -1303,13 +1423,13 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
dqm_lock(dqm);
- retval = allocate_sdma_queue(dqm, q);
+ retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL);
dqm_unlock(dqm);
if (retval)
goto out;
}
- retval = allocate_doorbell(qpd, q);
+ retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL);
if (retval)
goto out_deallocate_sdma_queue;
@@ -1334,8 +1454,14 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
* updates the is_evicted flag but is a no-op otherwise.
*/
q->properties.is_evicted = !!qpd->evicted;
- mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
- &q->gart_mqd_addr, &q->properties);
+
+ if (qd)
+ mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr,
+ &q->properties, restore_mqd, restore_ctl_stack,
+ qd->ctl_stack_size);
+ else
+ mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
+ &q->gart_mqd_addr, &q->properties);
list_add(&q->list, &qpd->queues_list);
qpd->queue_count++;
@@ -1434,8 +1560,7 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
if (!dqm->active_runlist)
return retval;
- retval = pm_send_unmap_queue(&dqm->packet_mgr, KFD_QUEUE_TYPE_COMPUTE,
- filter, filter_param, reset, 0);
+ retval = pm_send_unmap_queue(&dqm->packet_mgr, filter, filter_param, reset);
if (retval)
return retval;
@@ -1738,6 +1863,56 @@ static int get_wave_state(struct device_queue_manager *dqm,
ctl_stack_used_size, save_area_used_size);
}
+static void get_queue_checkpoint_info(struct device_queue_manager *dqm,
+ const struct queue *q,
+ u32 *mqd_size,
+ u32 *ctl_stack_size)
+{
+ struct mqd_manager *mqd_mgr;
+ enum KFD_MQD_TYPE mqd_type =
+ get_mqd_type_from_queue_type(q->properties.type);
+
+ dqm_lock(dqm);
+ mqd_mgr = dqm->mqd_mgrs[mqd_type];
+ *mqd_size = mqd_mgr->mqd_size;
+ *ctl_stack_size = 0;
+
+ if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE && mqd_mgr->get_checkpoint_info)
+ mqd_mgr->get_checkpoint_info(mqd_mgr, q->mqd, ctl_stack_size);
+
+ dqm_unlock(dqm);
+}
+
+static int checkpoint_mqd(struct device_queue_manager *dqm,
+ const struct queue *q,
+ void *mqd,
+ void *ctl_stack)
+{
+ struct mqd_manager *mqd_mgr;
+ int r = 0;
+ enum KFD_MQD_TYPE mqd_type =
+ get_mqd_type_from_queue_type(q->properties.type);
+
+ dqm_lock(dqm);
+
+ if (q->properties.is_active || !q->device->cwsr_enabled) {
+ r = -EINVAL;
+ goto dqm_unlock;
+ }
+
+ mqd_mgr = dqm->mqd_mgrs[mqd_type];
+ if (!mqd_mgr->checkpoint_mqd) {
+ r = -EOPNOTSUPP;
+ goto dqm_unlock;
+ }
+
+ mqd_mgr->checkpoint_mqd(mqd_mgr, q->mqd, mqd, ctl_stack);
+
+dqm_unlock:
+ dqm_unlock(dqm);
+ return r;
+}
+
static int process_termination_cpsch(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
@@ -1915,6 +2090,8 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
dqm->ops.restore_process_queues = restore_process_queues_cpsch;
dqm->ops.get_wave_state = get_wave_state;
dqm->ops.reset_queues = reset_queues_cpsch;
+ dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info;
+ dqm->ops.checkpoint_mqd = checkpoint_mqd;
break;
case KFD_SCHED_POLICY_NO_HWS:
/* initialize dqm for no cp scheduling */
@@ -1934,6 +2111,8 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
dqm->ops.restore_process_queues =
restore_process_queues_nocpsch;
dqm->ops.get_wave_state = get_wave_state;
+ dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info;
+ dqm->ops.checkpoint_mqd = checkpoint_mqd;
break;
default:
pr_err("Invalid scheduling policy %d\n", dqm->sched_policy);
@@ -2005,7 +2184,7 @@ void device_queue_manager_uninit(struct device_queue_manager *dqm)
kfree(dqm);
}
-int kfd_process_vm_fault(struct device_queue_manager *dqm, u32 pasid)
+int kfd_dqm_evict_pasid(struct device_queue_manager *dqm, u32 pasid)
{
struct kfd_process_device *pdd;
struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
@@ -2060,8 +2239,7 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data)
int r = 0;
if (!dqm->sched_running) {
- seq_printf(m, " Device is stopped\n");
-
+ seq_puts(m, " Device is stopped\n");
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index e145e4deb53a..3d539d6483e0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -1,5 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -39,6 +40,41 @@ struct device_process_node {
struct list_head list;
};
+union SQ_CMD_BITS {
+ struct {
+ uint32_t cmd:3;
+ uint32_t:1;
+ uint32_t mode:3;
+ uint32_t check_vmid:1;
+ uint32_t trap_id:3;
+ uint32_t:5;
+ uint32_t wave_id:4;
+ uint32_t simd_id:2;
+ uint32_t:2;
+ uint32_t queue_id:3;
+ uint32_t:1;
+ uint32_t vm_id:4;
+ } bitfields, bits;
+ uint32_t u32All;
+ signed int i32All;
+ float f32All;
+};
+
+union GRBM_GFX_INDEX_BITS {
+ struct {
+ uint32_t instance_index:8;
+ uint32_t sh_index:8;
+ uint32_t se_index:8;
+ uint32_t:5;
+ uint32_t sh_broadcast_writes:1;
+ uint32_t instance_broadcast_writes:1;
+ uint32_t se_broadcast_writes:1;
+ } bitfields, bits;
+ uint32_t u32All;
+ signed int i32All;
+ float f32All;
+};
+
/**
* struct device_queue_manager_ops
*
@@ -56,7 +92,7 @@ struct device_process_node {
*
* @initialize: Initializes the pipelines and memory module for that device.
*
- * @start: Initializes the resources/modules the the device needs for queues
+ * @start: Initializes the resources/modules the device needs for queues
* execution. This function is called on device initialization and after the
* system woke up after suspension.
*
@@ -77,18 +113,24 @@ struct device_process_node {
*
* @evict_process_queues: Evict all active queues of a process
*
- * @restore_process_queues: Restore all evicted queues queues of a process
+ * @restore_process_queues: Restore all evicted queues of a process
*
* @get_wave_state: Retrieves context save state and optionally copies the
* control stack, if kept in the MQD, to the given userspace address.
*
* @reset_queues: reset queues which consume RAS poison
+ * @get_queue_checkpoint_info: Retrieves queue size information for CRIU checkpoint.
+ *
+ * @checkpoint_mqd: checkpoint queue MQD contents for CRIU.
*/
struct device_queue_manager_ops {
int (*create_queue)(struct device_queue_manager *dqm,
struct queue *q,
- struct qcm_process_device *qpd);
+ struct qcm_process_device *qpd,
+ const struct kfd_criu_queue_priv_data *qd,
+ const void *restore_mqd,
+ const void *restore_ctl_stack);
int (*destroy_queue)(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
@@ -139,6 +181,14 @@ struct device_queue_manager_ops {
int (*reset_queues)(struct device_queue_manager *dqm,
uint16_t pasid);
+ void (*get_queue_checkpoint_info)(struct device_queue_manager *dqm,
+ const struct queue *q, u32 *mqd_size,
+ u32 *ctl_stack_size);
+
+ int (*checkpoint_mqd)(struct device_queue_manager *dqm,
+ const struct queue *q,
+ void *mqd,
+ void *ctl_stack);
};
struct device_queue_manager_asic_ops {
@@ -253,9 +303,7 @@ static inline void dqm_unlock(struct device_queue_manager *dqm)
static inline int read_sdma_queue_counter(uint64_t __user *q_rptr, uint64_t *val)
{
- /*
- * SDMA activity counter is stored at queue's RPTR + 0x8 location.
- */
+ /* SDMA activity counter is stored at queue's RPTR + 0x8 location. */
return get_user(*val, q_rptr + 1);
}
#endif /* KFD_DEVICE_QUEUE_MANAGER_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
index 0d26506798cf..b1ab5b0775e1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c
index ad0593342333..f1a1f5753e65 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2018 Advanced Micro Devices, Inc.
+ * Copyright 2018-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -61,15 +62,6 @@ static int update_qpd_v10(struct device_queue_manager *dqm,
(SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
(3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT);
-#if 0
- /* TODO:
- * This shouldn't be an issue with Navi10. Verify.
- */
- if (vega10_noretry)
- qpd->sh_mem_config |=
- 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
-#endif
-
qpd->sh_mem_ape1_limit = 0;
qpd->sh_mem_ape1_base = 0;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
index f20434d9980e..d119070956fb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2016-2018 Advanced Micro Devices, Inc.
+ * Copyright 2016-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
index 3a7cb2f88366..d7d45832df0f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
index 0dbcf54657ed..5401b6317f25 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index afe72dd11325..deecccebe5b6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -55,7 +56,6 @@ struct kfd_signal_page {
bool need_to_free_pages;
};
-
static uint64_t *page_slots(struct kfd_signal_page *page)
{
return page->kernel_address;
@@ -92,7 +92,8 @@ fail_alloc_signal_store:
}
static int allocate_event_notification_slot(struct kfd_process *p,
- struct kfd_event *ev)
+ struct kfd_event *ev,
+ const int *restore_id)
{
int id;
@@ -104,14 +105,19 @@ static int allocate_event_notification_slot(struct kfd_process *p,
p->signal_mapped_size = 256*8;
}
- /*
- * Compatibility with old user mode: Only use signal slots
- * user mode has mapped, may be less than
- * KFD_SIGNAL_EVENT_LIMIT. This also allows future increase
- * of the event limit without breaking user mode.
- */
- id = idr_alloc(&p->event_idr, ev, 0, p->signal_mapped_size / 8,
- GFP_KERNEL);
+ if (restore_id) {
+ id = idr_alloc(&p->event_idr, ev, *restore_id, *restore_id + 1,
+ GFP_KERNEL);
+ } else {
+ /*
+ * Compatibility with old user mode: Only use signal slots
+ * user mode has mapped, may be less than
+ * KFD_SIGNAL_EVENT_LIMIT. This also allows future increase
+ * of the event limit without breaking user mode.
+ */
+ id = idr_alloc(&p->event_idr, ev, 0, p->signal_mapped_size / 8,
+ GFP_KERNEL);
+ }
if (id < 0)
return id;
@@ -178,9 +184,8 @@ static struct kfd_event *lookup_signaled_event_by_partial_id(
return ev;
}
-static int create_signal_event(struct file *devkfd,
- struct kfd_process *p,
- struct kfd_event *ev)
+static int create_signal_event(struct file *devkfd, struct kfd_process *p,
+ struct kfd_event *ev, const int *restore_id)
{
int ret;
@@ -193,7 +198,7 @@ static int create_signal_event(struct file *devkfd,
return -ENOSPC;
}
- ret = allocate_event_notification_slot(p, ev);
+ ret = allocate_event_notification_slot(p, ev, restore_id);
if (ret) {
pr_warn("Signal event wasn't created because out of kernel memory\n");
return ret;
@@ -209,16 +214,22 @@ static int create_signal_event(struct file *devkfd,
return 0;
}
-static int create_other_event(struct kfd_process *p, struct kfd_event *ev)
+static int create_other_event(struct kfd_process *p, struct kfd_event *ev, const int *restore_id)
{
- /* Cast KFD_LAST_NONSIGNAL_EVENT to uint32_t. This allows an
- * intentional integer overflow to -1 without a compiler
- * warning. idr_alloc treats a negative value as "maximum
- * signed integer".
- */
- int id = idr_alloc(&p->event_idr, ev, KFD_FIRST_NONSIGNAL_EVENT_ID,
- (uint32_t)KFD_LAST_NONSIGNAL_EVENT_ID + 1,
- GFP_KERNEL);
+ int id;
+
+ if (restore_id)
+ id = idr_alloc(&p->event_idr, ev, *restore_id, *restore_id + 1,
+ GFP_KERNEL);
+ else
+ /* Cast KFD_LAST_NONSIGNAL_EVENT to uint32_t. This allows an
+ * intentional integer overflow to -1 without a compiler
+ * warning. idr_alloc treats a negative value as "maximum
+ * signed integer".
+ */
+ id = idr_alloc(&p->event_idr, ev, KFD_FIRST_NONSIGNAL_EVENT_ID,
+ (uint32_t)KFD_LAST_NONSIGNAL_EVENT_ID + 1,
+ GFP_KERNEL);
if (id < 0)
return id;
@@ -295,8 +306,8 @@ static bool event_can_be_cpu_signaled(const struct kfd_event *ev)
return ev->type == KFD_EVENT_TYPE_SIGNAL;
}
-int kfd_event_page_set(struct kfd_process *p, void *kernel_address,
- uint64_t size)
+static int kfd_event_page_set(struct kfd_process *p, void *kernel_address,
+ uint64_t size, uint64_t user_handle)
{
struct kfd_signal_page *page;
@@ -315,10 +326,57 @@ int kfd_event_page_set(struct kfd_process *p, void *kernel_address,
p->signal_page = page;
p->signal_mapped_size = size;
-
+ p->signal_handle = user_handle;
return 0;
}
+int kfd_kmap_event_page(struct kfd_process *p, uint64_t event_page_offset)
+{
+ struct kfd_dev *kfd;
+ struct kfd_process_device *pdd;
+ void *mem, *kern_addr;
+ uint64_t size;
+ int err = 0;
+
+ if (p->signal_page) {
+ pr_err("Event page is already set\n");
+ return -EINVAL;
+ }
+
+ pdd = kfd_process_device_data_by_id(p, GET_GPU_ID(event_page_offset));
+ if (!pdd) {
+ pr_err("Getting device by id failed in %s\n", __func__);
+ return -EINVAL;
+ }
+ kfd = pdd->dev;
+
+ pdd = kfd_bind_process_to_device(kfd, p);
+ if (IS_ERR(pdd))
+ return PTR_ERR(pdd);
+
+ mem = kfd_process_device_translate_handle(pdd,
+ GET_IDR_HANDLE(event_page_offset));
+ if (!mem) {
+ pr_err("Can't find BO, offset is 0x%llx\n", event_page_offset);
+ return -EINVAL;
+ }
+
+ err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kfd->adev,
+ mem, &kern_addr, &size);
+ if (err) {
+ pr_err("Failed to map event page to kernel\n");
+ return err;
+ }
+
+ err = kfd_event_page_set(p, kern_addr, size, event_page_offset);
+ if (err) {
+ pr_err("Failed to set event page\n");
+ amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(kfd->adev, mem);
+ return err;
+ }
+ return err;
+}
+
int kfd_event_create(struct file *devkfd, struct kfd_process *p,
uint32_t event_type, bool auto_reset, uint32_t node_id,
uint32_t *event_id, uint32_t *event_trigger_data,
@@ -343,14 +401,14 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
switch (event_type) {
case KFD_EVENT_TYPE_SIGNAL:
case KFD_EVENT_TYPE_DEBUG:
- ret = create_signal_event(devkfd, p, ev);
+ ret = create_signal_event(devkfd, p, ev, NULL);
if (!ret) {
*event_page_offset = KFD_MMAP_TYPE_EVENTS;
*event_slot_index = ev->event_id;
}
break;
default:
- ret = create_other_event(p, ev);
+ ret = create_other_event(p, ev, NULL);
break;
}
@@ -366,6 +424,166 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
return ret;
}
+int kfd_criu_restore_event(struct file *devkfd,
+ struct kfd_process *p,
+ uint8_t __user *user_priv_ptr,
+ uint64_t *priv_data_offset,
+ uint64_t max_priv_data_size)
+{
+ struct kfd_criu_event_priv_data *ev_priv;
+ struct kfd_event *ev = NULL;
+ int ret = 0;
+
+ ev_priv = kmalloc(sizeof(*ev_priv), GFP_KERNEL);
+ if (!ev_priv)
+ return -ENOMEM;
+
+ ev = kzalloc(sizeof(*ev), GFP_KERNEL);
+ if (!ev) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+ if (*priv_data_offset + sizeof(*ev_priv) > max_priv_data_size) {
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ ret = copy_from_user(ev_priv, user_priv_ptr + *priv_data_offset, sizeof(*ev_priv));
+ if (ret) {
+ ret = -EFAULT;
+ goto exit;
+ }
+ *priv_data_offset += sizeof(*ev_priv);
+
+ if (ev_priv->user_handle) {
+ ret = kfd_kmap_event_page(p, ev_priv->user_handle);
+ if (ret)
+ goto exit;
+ }
+
+ ev->type = ev_priv->type;
+ ev->auto_reset = ev_priv->auto_reset;
+ ev->signaled = ev_priv->signaled;
+
+ init_waitqueue_head(&ev->wq);
+
+ mutex_lock(&p->event_mutex);
+ switch (ev->type) {
+ case KFD_EVENT_TYPE_SIGNAL:
+ case KFD_EVENT_TYPE_DEBUG:
+ ret = create_signal_event(devkfd, p, ev, &ev_priv->event_id);
+ break;
+ case KFD_EVENT_TYPE_MEMORY:
+ memcpy(&ev->memory_exception_data,
+ &ev_priv->memory_exception_data,
+ sizeof(struct kfd_hsa_memory_exception_data));
+
+ ret = create_other_event(p, ev, &ev_priv->event_id);
+ break;
+ case KFD_EVENT_TYPE_HW_EXCEPTION:
+ memcpy(&ev->hw_exception_data,
+ &ev_priv->hw_exception_data,
+ sizeof(struct kfd_hsa_hw_exception_data));
+
+ ret = create_other_event(p, ev, &ev_priv->event_id);
+ break;
+ }
+
+exit:
+ if (ret)
+ kfree(ev);
+
+ kfree(ev_priv);
+
+ mutex_unlock(&p->event_mutex);
+
+ return ret;
+}
+
+int kfd_criu_checkpoint_events(struct kfd_process *p,
+ uint8_t __user *user_priv_data,
+ uint64_t *priv_data_offset)
+{
+ struct kfd_criu_event_priv_data *ev_privs;
+ int i = 0;
+ int ret = 0;
+ struct kfd_event *ev;
+ uint32_t ev_id;
+
+ uint32_t num_events = kfd_get_num_events(p);
+
+ if (!num_events)
+ return 0;
+
+ ev_privs = kvzalloc(num_events * sizeof(*ev_privs), GFP_KERNEL);
+ if (!ev_privs)
+ return -ENOMEM;
+
+
+ idr_for_each_entry(&p->event_idr, ev, ev_id) {
+ struct kfd_criu_event_priv_data *ev_priv;
+
+ /*
+ * Currently, all events have same size of private_data, but the current ioctl's
+ * and CRIU plugin supports private_data of variable sizes
+ */
+ ev_priv = &ev_privs[i];
+
+ ev_priv->object_type = KFD_CRIU_OBJECT_TYPE_EVENT;
+
+ /* We store the user_handle with the first event */
+ if (i == 0 && p->signal_page)
+ ev_priv->user_handle = p->signal_handle;
+
+ ev_priv->event_id = ev->event_id;
+ ev_priv->auto_reset = ev->auto_reset;
+ ev_priv->type = ev->type;
+ ev_priv->signaled = ev->signaled;
+
+ if (ev_priv->type == KFD_EVENT_TYPE_MEMORY)
+ memcpy(&ev_priv->memory_exception_data,
+ &ev->memory_exception_data,
+ sizeof(struct kfd_hsa_memory_exception_data));
+ else if (ev_priv->type == KFD_EVENT_TYPE_HW_EXCEPTION)
+ memcpy(&ev_priv->hw_exception_data,
+ &ev->hw_exception_data,
+ sizeof(struct kfd_hsa_hw_exception_data));
+
+ pr_debug("Checkpointed event[%d] id = 0x%08x auto_reset = %x type = %x signaled = %x\n",
+ i,
+ ev_priv->event_id,
+ ev_priv->auto_reset,
+ ev_priv->type,
+ ev_priv->signaled);
+ i++;
+ }
+
+ ret = copy_to_user(user_priv_data + *priv_data_offset,
+ ev_privs, num_events * sizeof(*ev_privs));
+ if (ret) {
+ pr_err("Failed to copy events priv to user\n");
+ ret = -EFAULT;
+ }
+
+ *priv_data_offset += num_events * sizeof(*ev_privs);
+
+ kvfree(ev_privs);
+ return ret;
+}
+
+int kfd_get_num_events(struct kfd_process *p)
+{
+ struct kfd_event *ev;
+ uint32_t id;
+ u32 num_events = 0;
+
+ idr_for_each_entry(&p->event_idr, ev, id)
+ num_events++;
+
+ return num_events;
+}
+
/* Assumes that p is current. */
int kfd_event_destroy(struct kfd_process *p, uint32_t event_id)
{
@@ -878,6 +1096,7 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, u32 pasid,
{
struct kfd_hsa_memory_exception_data memory_exception_data;
struct vm_area_struct *vma;
+ int user_gpu_id;
/*
* Because we are called from arbitrary context (workqueue) as opposed
@@ -899,12 +1118,17 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, u32 pasid,
return; /* Process is exiting */
}
+ user_gpu_id = kfd_process_get_user_gpu_id(p, dev->id);
+ if (unlikely(user_gpu_id == -EINVAL)) {
+ WARN_ONCE(1, "Could not get user_gpu_id from dev->id:%x\n", dev->id);
+ return;
+ }
memset(&memory_exception_data, 0, sizeof(memory_exception_data));
mmap_read_lock(mm);
vma = find_vma(mm, address);
- memory_exception_data.gpu_id = dev->id;
+ memory_exception_data.gpu_id = user_gpu_id;
memory_exception_data.va = address;
/* Set failure reason */
memory_exception_data.failure.NotPresent = 1;
@@ -980,11 +1204,19 @@ void kfd_signal_vm_fault_event(struct kfd_dev *dev, u32 pasid,
uint32_t id;
struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
struct kfd_hsa_memory_exception_data memory_exception_data;
+ int user_gpu_id;
if (!p)
return; /* Presumably process exited. */
+
+ user_gpu_id = kfd_process_get_user_gpu_id(p, dev->id);
+ if (unlikely(user_gpu_id == -EINVAL)) {
+ WARN_ONCE(1, "Could not get user_gpu_id from dev->id:%x\n", dev->id);
+ return;
+ }
+
memset(&memory_exception_data, 0, sizeof(memory_exception_data));
- memory_exception_data.gpu_id = dev->id;
+ memory_exception_data.gpu_id = user_gpu_id;
memory_exception_data.failure.imprecise = true;
/* Set failure reason */
if (info) {
@@ -1024,27 +1256,34 @@ void kfd_signal_reset_event(struct kfd_dev *dev)
/* Whole gpu reset caused by GPU hang and memory is lost */
memset(&hw_exception_data, 0, sizeof(hw_exception_data));
- hw_exception_data.gpu_id = dev->id;
hw_exception_data.memory_lost = 1;
hw_exception_data.reset_cause = reset_cause;
memset(&memory_exception_data, 0, sizeof(memory_exception_data));
memory_exception_data.ErrorType = KFD_MEM_ERR_SRAM_ECC;
- memory_exception_data.gpu_id = dev->id;
memory_exception_data.failure.imprecise = true;
idx = srcu_read_lock(&kfd_processes_srcu);
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
+ int user_gpu_id = kfd_process_get_user_gpu_id(p, dev->id);
+
+ if (unlikely(user_gpu_id == -EINVAL)) {
+ WARN_ONCE(1, "Could not get user_gpu_id from dev->id:%x\n", dev->id);
+ continue;
+ }
+
mutex_lock(&p->event_mutex);
id = KFD_FIRST_NONSIGNAL_EVENT_ID;
idr_for_each_entry_continue(&p->event_idr, ev, id) {
if (ev->type == KFD_EVENT_TYPE_HW_EXCEPTION) {
ev->hw_exception_data = hw_exception_data;
+ ev->hw_exception_data.gpu_id = user_gpu_id;
set_event(ev);
}
if (ev->type == KFD_EVENT_TYPE_MEMORY &&
reset_cause == KFD_HW_EXCEPTION_ECC) {
ev->memory_exception_data = memory_exception_data;
+ ev->memory_exception_data.gpu_id = user_gpu_id;
set_event(ev);
}
}
@@ -1060,18 +1299,25 @@ void kfd_signal_poison_consumed_event(struct kfd_dev *dev, u32 pasid)
struct kfd_hsa_hw_exception_data hw_exception_data;
struct kfd_event *ev;
uint32_t id = KFD_FIRST_NONSIGNAL_EVENT_ID;
+ int user_gpu_id;
if (!p)
return; /* Presumably process exited. */
+ user_gpu_id = kfd_process_get_user_gpu_id(p, dev->id);
+ if (unlikely(user_gpu_id == -EINVAL)) {
+ WARN_ONCE(1, "Could not get user_gpu_id from dev->id:%x\n", dev->id);
+ return;
+ }
+
memset(&hw_exception_data, 0, sizeof(hw_exception_data));
- hw_exception_data.gpu_id = dev->id;
+ hw_exception_data.gpu_id = user_gpu_id;
hw_exception_data.memory_lost = 1;
hw_exception_data.reset_cause = KFD_HW_EXCEPTION_ECC;
memset(&memory_exception_data, 0, sizeof(memory_exception_data));
memory_exception_data.ErrorType = KFD_MEM_ERR_POISON_CONSUMED;
- memory_exception_data.gpu_id = dev->id;
+ memory_exception_data.gpu_id = user_gpu_id;
memory_exception_data.failure.imprecise = true;
mutex_lock(&p->event_mutex);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_events.h
index c8fe5dbdad55..1238af11916e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.h
@@ -1,5 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
index 2e2b7ceb71db..8aebe408c544 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -34,7 +35,7 @@
#include "kfd_priv.h"
#include <linux/mm.h>
#include <linux/mman.h>
-#include <asm/processor.h>
+#include <linux/processor.h>
/*
* The primary memory I/O features being added for revisions of gfxip
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
index e8bc28009c22..7eedbcd14828 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2016-2018 Advanced Micro Devices, Inc.
+ * Copyright 2016-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -109,8 +110,7 @@ static void event_interrupt_poison_consumption(struct kfd_dev *dev,
switch (source_id) {
case SOC15_INTSRC_SQ_INTERRUPT_MSG:
- if (dev->dqm->ops.reset_queues)
- ret = dev->dqm->ops.reset_queues(dev->dqm, pasid);
+ ret = kfd_dqm_evict_pasid(dev->dqm, pasid);
break;
case SOC15_INTSRC_SDMA_ECC:
default:
@@ -120,7 +120,8 @@ static void event_interrupt_poison_consumption(struct kfd_dev *dev,
kfd_signal_poison_consumed_event(dev, pasid);
/* resetting queue passes, do page retirement without gpu reset
- resetting queue fails, fallback to gpu reset solution */
+ * resetting queue fails, fallback to gpu reset solution
+ */
if (!ret)
amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, false);
else
@@ -308,7 +309,7 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev,
info.prot_write = ring_id & 0x20;
kfd_smi_event_update_vmfault(dev, pasid);
- kfd_process_vm_fault(dev->dqm, pasid);
+ kfd_dqm_evict_pasid(dev->dqm, pasid);
kfd_signal_vm_fault_event(dev, pasid, &info);
}
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
index 81887c2013c9..9178cfe34f20 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -57,14 +58,14 @@ int kfd_interrupt_init(struct kfd_dev *kfd)
KFD_IH_NUM_ENTRIES * kfd->device_info.ih_ring_entry_size,
GFP_KERNEL);
if (r) {
- dev_err(kfd_chardev(), "Failed to allocate IH fifo\n");
+ dev_err(kfd->adev->dev, "Failed to allocate IH fifo\n");
return r;
}
kfd->ih_wq = alloc_workqueue("KFD IH", WQ_HIGHPRI, 1);
if (unlikely(!kfd->ih_wq)) {
kfifo_free(&kfd->ih_fifo);
- dev_err(kfd_chardev(), "Failed to allocate KFD IH workqueue\n");
+ dev_err(kfd->adev->dev, "Failed to allocate KFD IH workqueue\n");
return -ENOMEM;
}
spin_lock_init(&kfd->interrupt_lock);
@@ -116,7 +117,7 @@ bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry)
count = kfifo_in(&kfd->ih_fifo, ih_ring_entry,
kfd->device_info.ih_ring_entry_size);
if (count != kfd->device_info.ih_ring_entry_size) {
- dev_err_ratelimited(kfd_chardev(),
+ dev_dbg_ratelimited(kfd->adev->dev,
"Interrupt ring overflow, dropping interrupt %d\n",
count);
return false;
@@ -147,7 +148,7 @@ static void interrupt_wq(struct work_struct *work)
uint32_t ih_ring_entry[KFD_MAX_RING_ENTRY_SIZE];
if (dev->device_info.ih_ring_entry_size > sizeof(ih_ring_entry)) {
- dev_err_once(kfd_chardev(), "Ring entry too small\n");
+ dev_err_once(dev->adev->dev, "Ring entry too small\n");
return;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c
index 66ad8d0b8f7f..fbd0afe4da42 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2018 Advanced Micro Devices, Inc.
+ * Copyright 2018-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -30,7 +31,6 @@
#include <linux/pci.h>
#include <linux/amd-iommu.h>
#include "kfd_priv.h"
-#include "kfd_dbgmgr.h"
#include "kfd_topology.h"
#include "kfd_iommu.h"
@@ -163,17 +163,6 @@ static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, u32 pasid)
pr_debug("Unbinding process 0x%x from IOMMU\n", pasid);
- mutex_lock(kfd_get_dbgmgr_mutex());
-
- if (dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) {
- if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) {
- kfd_dbgmgr_destroy(dev->dbgmgr);
- dev->dbgmgr = NULL;
- }
- }
-
- mutex_unlock(kfd_get_dbgmgr_mutex());
-
mutex_lock(&p->mutex);
pdd = kfd_get_process_device_data(dev, p);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.h b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.h
index afd420b01a0c..8cf0fcbe87c2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.h
@@ -1,5 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
- * Copyright 2018 Advanced Micro Devices, Inc.
+ * Copyright 2018-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
index 16f8bc4ca7f6..bcf7bc3302c9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
index f4cfe9f1871c..383202fd1ea2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
@@ -1,5 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index e27ca3758762..7e3a7fcb9fe6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -37,7 +37,7 @@
#ifdef dev_fmt
#undef dev_fmt
#endif
-#define dev_fmt(fmt) "kfd_migrate: %s: " fmt, __func__
+#define dev_fmt(fmt) "kfd_migrate: " fmt
static uint64_t
svm_migrate_direct_mapping_addr(struct amdgpu_device *adev, uint64_t addr)
@@ -87,10 +87,7 @@ svm_migrate_gart_map(struct amdgpu_ring *ring, uint64_t npages,
cpu_addr = &job->ibs[0].ptr[num_dw];
- r = amdgpu_gart_map(adev, 0, npages, addr, pte_flags, cpu_addr);
- if (r)
- goto error_free;
-
+ amdgpu_gart_map(adev, 0, npages, addr, pte_flags, cpu_addr);
r = amdgpu_job_submit(job, &adev->mman.entity,
AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
if (r)
@@ -315,7 +312,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
r = svm_range_vram_node_new(adev, prange, true);
if (r) {
- dev_err(adev->dev, "fail %d to alloc vram\n", r);
+ dev_dbg(adev->dev, "fail %d to alloc vram\n", r);
goto out;
}
@@ -334,7 +331,8 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
DMA_TO_DEVICE);
r = dma_mapping_error(dev, src[i]);
if (r) {
- dev_err(adev->dev, "fail %d dma_map_page\n", r);
+ dev_err(adev->dev, "%s: fail %d dma_map_page\n",
+ __func__, r);
goto out_free_vram_pages;
}
} else {
@@ -365,7 +363,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
if (r)
goto out_free_vram_pages;
amdgpu_res_next(&cursor, (j + 1) * PAGE_SIZE);
- j= 0;
+ j = 0;
} else {
j++;
}
@@ -435,8 +433,8 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
r = migrate_vma_setup(&migrate);
if (r) {
- dev_err(adev->dev, "vma setup fail %d range [0x%lx 0x%lx]\n", r,
- prange->start, prange->last);
+ dev_err(adev->dev, "%s: vma setup fail %d range [0x%lx 0x%lx]\n",
+ __func__, r, prange->start, prange->last);
goto out_free;
}
@@ -614,7 +612,7 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_FROM_DEVICE);
r = dma_mapping_error(dev, dst[i]);
if (r) {
- dev_err(adev->dev, "fail %d dma_map_page\n", r);
+ dev_err(adev->dev, "%s: fail %d dma_map_page\n", __func__, r);
goto out_oom;
}
@@ -640,6 +638,22 @@ out_oom:
return r;
}
+/**
+ * svm_migrate_vma_to_ram - migrate range inside one vma from device to system
+ *
+ * @adev: amdgpu device to migrate from
+ * @prange: svm range structure
+ * @vma: vm_area_struct that range [start, end] belongs to
+ * @start: range start virtual address in pages
+ * @end: range end virtual address in pages
+ *
+ * Context: Process context, caller hold mmap read lock, prange->migrate_mutex
+ *
+ * Return:
+ * 0 - success with all pages migrated
+ * negative values - indicate error
+ * positive values - partial migration, number of pages not migrated
+ */
static long
svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
struct vm_area_struct *vma, uint64_t start, uint64_t end)
@@ -674,8 +688,8 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
r = migrate_vma_setup(&migrate);
if (r) {
- dev_err(adev->dev, "vma setup fail %d range [0x%lx 0x%lx]\n", r,
- prange->start, prange->last);
+ dev_err(adev->dev, "%s: vma setup fail %d range [0x%lx 0x%lx]\n",
+ __func__, r, prange->start, prange->last);
goto out_free;
}
@@ -711,8 +725,6 @@ out:
pdd = svm_range_get_pdd_by_adev(prange, adev);
if (pdd)
WRITE_ONCE(pdd->page_out, pdd->page_out + cpages);
-
- return upages;
}
return r ? r : upages;
}
@@ -722,7 +734,7 @@ out:
* @prange: range structure
* @mm: process mm, use current->mm if NULL
*
- * Context: Process context, caller hold mmap read lock, svms lock, prange lock
+ * Context: Process context, caller hold mmap read lock, prange->migrate_mutex
*
* Return:
* 0 - OK, otherwise error code
@@ -761,13 +773,16 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm)
unsigned long next;
vma = find_vma(mm, addr);
- if (!vma || addr < vma->vm_start)
+ if (!vma || addr < vma->vm_start) {
+ pr_debug("failed to find vma for prange %p\n", prange);
+ r = -EFAULT;
break;
+ }
next = min(vma->vm_end, end);
r = svm_migrate_vma_to_ram(adev, prange, vma, addr, next);
if (r < 0) {
- pr_debug("failed %ld to migrate\n", r);
+ pr_debug("failed %ld to migrate prange %p\n", r, prange);
break;
} else {
upages += r;
@@ -775,7 +790,7 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm)
addr = next;
}
- if (!upages) {
+ if (r >= 0 && !upages) {
svm_range_vram_node_free(prange);
prange->actual_loc = 0;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
index 5e90fe642192..09b966dc3768 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
index e2825ad4d699..5ac209209613 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -100,6 +101,7 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
struct kfd_cu_info cu_info;
uint32_t cu_per_sh[KFD_MAX_NUM_SE][KFD_MAX_NUM_SH_PER_SE] = {0};
int i, se, sh, cu;
+
amdgpu_amdkfd_get_cu_info(mm->dev->adev, &cu_info);
if (cu_mask_count > cu_info.cu_active_number)
@@ -173,3 +175,66 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
}
}
}
+
+int kfd_hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ struct queue_properties *p, struct mm_struct *mms)
+{
+ return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->adev, mqd, pipe_id,
+ queue_id, p->doorbell_off);
+}
+
+int kfd_destroy_mqd_cp(struct mqd_manager *mm, void *mqd,
+ enum kfd_preempt_type type, unsigned int timeout,
+ uint32_t pipe_id, uint32_t queue_id)
+{
+ return mm->dev->kfd2kgd->hqd_destroy(mm->dev->adev, mqd, type, timeout,
+ pipe_id, queue_id);
+}
+
+void kfd_free_mqd_cp(struct mqd_manager *mm, void *mqd,
+ struct kfd_mem_obj *mqd_mem_obj)
+{
+ if (mqd_mem_obj->gtt_mem) {
+ amdgpu_amdkfd_free_gtt_mem(mm->dev->adev, mqd_mem_obj->gtt_mem);
+ kfree(mqd_mem_obj);
+ } else {
+ kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
+ }
+}
+
+bool kfd_is_occupied_cp(struct mqd_manager *mm, void *mqd,
+ uint64_t queue_address, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ return mm->dev->kfd2kgd->hqd_is_occupied(mm->dev->adev, queue_address,
+ pipe_id, queue_id);
+}
+
+int kfd_load_mqd_sdma(struct mqd_manager *mm, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ struct queue_properties *p, struct mm_struct *mms)
+{
+ return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->adev, mqd,
+ (uint32_t __user *)p->write_ptr,
+ mms);
+}
+
+/*
+ * preempt type here is ignored because there is only one way
+ * to preempt sdma queue
+ */
+int kfd_destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,
+ enum kfd_preempt_type type,
+ unsigned int timeout, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->adev, mqd, timeout);
+}
+
+bool kfd_is_occupied_sdma(struct mqd_manager *mm, void *mqd,
+ uint64_t queue_address, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->adev, mqd);
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
index 965e17c5dbb4..57f900ccaa10 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
@@ -1,5 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -100,6 +101,20 @@ struct mqd_manager {
u32 *ctl_stack_used_size,
u32 *save_area_used_size);
+ void (*get_checkpoint_info)(struct mqd_manager *mm, void *mqd, uint32_t *ctl_stack_size);
+
+ void (*checkpoint_mqd)(struct mqd_manager *mm,
+ void *mqd,
+ void *mqd_dst,
+ void *ctl_stack_dst);
+
+ void (*restore_mqd)(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *p,
+ const void *mqd_src,
+ const void *ctl_stack_src,
+ const u32 ctl_stack_size);
+
#if defined(CONFIG_DEBUG_FS)
int (*debugfs_show_mqd)(struct seq_file *m, void *data);
#endif
@@ -122,4 +137,31 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
const uint32_t *cu_mask, uint32_t cu_mask_count,
uint32_t *se_mask);
+int kfd_hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ struct queue_properties *p, struct mm_struct *mms);
+
+int kfd_destroy_mqd_cp(struct mqd_manager *mm, void *mqd,
+ enum kfd_preempt_type type, unsigned int timeout,
+ uint32_t pipe_id, uint32_t queue_id);
+
+void kfd_free_mqd_cp(struct mqd_manager *mm, void *mqd,
+ struct kfd_mem_obj *mqd_mem_obj);
+
+bool kfd_is_occupied_cp(struct mqd_manager *mm, void *mqd,
+ uint64_t queue_address, uint32_t pipe_id,
+ uint32_t queue_id);
+
+int kfd_load_mqd_sdma(struct mqd_manager *mm, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ struct queue_properties *p, struct mm_struct *mms);
+
+int kfd_destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,
+ enum kfd_preempt_type type, unsigned int timeout,
+ uint32_t pipe_id, uint32_t queue_id);
+
+bool kfd_is_occupied_sdma(struct mqd_manager *mm, void *mqd,
+ uint64_t queue_address, uint32_t pipe_id,
+ uint32_t queue_id);
+
#endif /* KFD_MQD_MANAGER_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
index e9a8e21e144e..4889865c725c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -156,13 +157,6 @@ static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
mm->update_mqd(mm, m, q, NULL);
}
-static void free_mqd(struct mqd_manager *mm, void *mqd,
- struct kfd_mem_obj *mqd_mem_obj)
-{
- kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
-}
-
-
static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id,
uint32_t queue_id, struct queue_properties *p,
struct mm_struct *mms)
@@ -176,15 +170,6 @@ static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id,
wptr_shift, wptr_mask, mms);
}
-static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
- uint32_t pipe_id, uint32_t queue_id,
- struct queue_properties *p, struct mm_struct *mms)
-{
- return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->adev, mqd,
- (uint32_t __user *)p->write_ptr,
- mms);
-}
-
static void __update_mqd(struct mqd_manager *mm, void *mqd,
struct queue_properties *q, struct mqd_update_info *minfo,
unsigned int atc_bit)
@@ -271,42 +256,75 @@ static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
q->is_active = QUEUE_IS_ACTIVE(*q);
}
-static int destroy_mqd(struct mqd_manager *mm, void *mqd,
- enum kfd_preempt_type type,
- unsigned int timeout, uint32_t pipe_id,
- uint32_t queue_id)
+static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, void *ctl_stack_dst)
{
- return mm->dev->kfd2kgd->hqd_destroy(mm->dev->adev, mqd, type, timeout,
- pipe_id, queue_id);
+ struct cik_mqd *m;
+
+ m = get_mqd(mqd);
+
+ memcpy(mqd_dst, m, sizeof(struct cik_mqd));
}
-/*
- * preempt type here is ignored because there is only one way
- * to preempt sdma queue
- */
-static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,
- enum kfd_preempt_type type,
- unsigned int timeout, uint32_t pipe_id,
- uint32_t queue_id)
+static void restore_mqd(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *qp,
+ const void *mqd_src,
+ const void *ctl_stack_src, const u32 ctl_stack_size)
{
- return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->adev, mqd, timeout);
+ uint64_t addr;
+ struct cik_mqd *m;
+
+ m = (struct cik_mqd *) mqd_mem_obj->cpu_ptr;
+ addr = mqd_mem_obj->gpu_addr;
+
+ memcpy(m, mqd_src, sizeof(*m));
+
+ *mqd = m;
+ if (gart_addr)
+ *gart_addr = addr;
+
+ m->cp_hqd_pq_doorbell_control = DOORBELL_OFFSET(qp->doorbell_off);
+
+ pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
+ m->cp_hqd_pq_doorbell_control);
+
+ qp->is_active = 0;
}
-static bool is_occupied(struct mqd_manager *mm, void *mqd,
- uint64_t queue_address, uint32_t pipe_id,
- uint32_t queue_id)
+static void checkpoint_mqd_sdma(struct mqd_manager *mm,
+ void *mqd,
+ void *mqd_dst,
+ void *ctl_stack_dst)
{
+ struct cik_sdma_rlc_registers *m;
- return mm->dev->kfd2kgd->hqd_is_occupied(mm->dev->adev, queue_address,
- pipe_id, queue_id);
+ m = get_sdma_mqd(mqd);
+ memcpy(mqd_dst, m, sizeof(struct cik_sdma_rlc_registers));
}
-static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd,
- uint64_t queue_address, uint32_t pipe_id,
- uint32_t queue_id)
+static void restore_mqd_sdma(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *qp,
+ const void *mqd_src,
+ const void *ctl_stack_src, const u32 ctl_stack_size)
{
- return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->adev, mqd);
+ uint64_t addr;
+ struct cik_sdma_rlc_registers *m;
+
+ m = (struct cik_sdma_rlc_registers *) mqd_mem_obj->cpu_ptr;
+ addr = mqd_mem_obj->gpu_addr;
+
+ memcpy(m, mqd_src, sizeof(*m));
+
+ m->sdma_rlc_doorbell =
+ qp->doorbell_off << SDMA0_RLC0_DOORBELL__OFFSET__SHIFT;
+
+ *mqd = m;
+ if (gart_addr)
+ *gart_addr = addr;
+
+ qp->is_active = 0;
}
/*
@@ -389,11 +407,13 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
case KFD_MQD_TYPE_CP:
mqd->allocate_mqd = allocate_mqd;
mqd->init_mqd = init_mqd;
- mqd->free_mqd = free_mqd;
+ mqd->free_mqd = kfd_free_mqd_cp;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd;
- mqd->destroy_mqd = destroy_mqd;
- mqd->is_occupied = is_occupied;
+ mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->is_occupied = kfd_is_occupied_cp;
+ mqd->checkpoint_mqd = checkpoint_mqd;
+ mqd->restore_mqd = restore_mqd;
mqd->mqd_size = sizeof(struct cik_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
@@ -405,8 +425,8 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
mqd->free_mqd = free_mqd_hiq_sdma;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd_hiq;
- mqd->destroy_mqd = destroy_mqd;
- mqd->is_occupied = is_occupied;
+ mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->is_occupied = kfd_is_occupied_cp;
mqd->mqd_size = sizeof(struct cik_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
@@ -416,11 +436,11 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
case KFD_MQD_TYPE_DIQ:
mqd->allocate_mqd = allocate_mqd;
mqd->init_mqd = init_mqd_hiq;
- mqd->free_mqd = free_mqd;
+ mqd->free_mqd = kfd_free_mqd_cp;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd_hiq;
- mqd->destroy_mqd = destroy_mqd;
- mqd->is_occupied = is_occupied;
+ mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->is_occupied = kfd_is_occupied_cp;
mqd->mqd_size = sizeof(struct cik_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
@@ -430,10 +450,12 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
mqd->allocate_mqd = allocate_sdma_mqd;
mqd->init_mqd = init_mqd_sdma;
mqd->free_mqd = free_mqd_hiq_sdma;
- mqd->load_mqd = load_mqd_sdma;
+ mqd->load_mqd = kfd_load_mqd_sdma;
mqd->update_mqd = update_mqd_sdma;
- mqd->destroy_mqd = destroy_mqd_sdma;
- mqd->is_occupied = is_occupied_sdma;
+ mqd->destroy_mqd = kfd_destroy_mqd_sdma;
+ mqd->is_occupied = kfd_is_occupied_sdma;
+ mqd->checkpoint_mqd = checkpoint_mqd_sdma;
+ mqd->restore_mqd = restore_mqd_sdma;
mqd->mqd_size = sizeof(struct cik_sdma_rlc_registers);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
index d74d8a6ac27a..d3e2b6a599a4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2018 Advanced Micro Devices, Inc.
+ * Copyright 2018-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -154,14 +155,6 @@ static int load_mqd(struct mqd_manager *mm, void *mqd,
return r;
}
-static int hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd,
- uint32_t pipe_id, uint32_t queue_id,
- struct queue_properties *p, struct mm_struct *mms)
-{
- return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->adev, mqd, pipe_id,
- queue_id, p->doorbell_off);
-}
-
static void update_mqd(struct mqd_manager *mm, void *mqd,
struct queue_properties *q,
struct mqd_update_info *minfo)
@@ -213,7 +206,7 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
/* GC 10 removed WPP_CLAMP from PQ Control */
m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK |
2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT |
- 1 << CP_HQD_PQ_CONTROL__QUEUE_FULL_EN__SHIFT ;
+ 1 << CP_HQD_PQ_CONTROL__QUEUE_FULL_EN__SHIFT;
m->cp_hqd_pq_doorbell_control |=
1 << CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_BIF_DROP__SHIFT;
}
@@ -233,31 +226,6 @@ static uint32_t read_doorbell_id(void *mqd)
return m->queue_doorbell_id0;
}
-static int destroy_mqd(struct mqd_manager *mm, void *mqd,
- enum kfd_preempt_type type,
- unsigned int timeout, uint32_t pipe_id,
- uint32_t queue_id)
-{
- return mm->dev->kfd2kgd->hqd_destroy
- (mm->dev->adev, mqd, type, timeout,
- pipe_id, queue_id);
-}
-
-static void free_mqd(struct mqd_manager *mm, void *mqd,
- struct kfd_mem_obj *mqd_mem_obj)
-{
- kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
-}
-
-static bool is_occupied(struct mqd_manager *mm, void *mqd,
- uint64_t queue_address, uint32_t pipe_id,
- uint32_t queue_id)
-{
- return mm->dev->kfd2kgd->hqd_is_occupied(
- mm->dev->adev, queue_address,
- pipe_id, queue_id);
-}
-
static int get_wave_state(struct mqd_manager *mm, void *mqd,
void __user *ctl_stack,
u32 *ctl_stack_used_size,
@@ -285,6 +253,42 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd,
return 0;
}
+static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, void *ctl_stack_dst)
+{
+ struct v10_compute_mqd *m;
+
+ m = get_mqd(mqd);
+
+ memcpy(mqd_dst, m, sizeof(struct v10_compute_mqd));
+}
+
+static void restore_mqd(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *qp,
+ const void *mqd_src,
+ const void *ctl_stack_src, const u32 ctl_stack_size)
+{
+ uint64_t addr;
+ struct v10_compute_mqd *m;
+
+ m = (struct v10_compute_mqd *) mqd_mem_obj->cpu_ptr;
+ addr = mqd_mem_obj->gpu_addr;
+
+ memcpy(m, mqd_src, sizeof(*m));
+
+ *mqd = m;
+ if (gart_addr)
+ *gart_addr = addr;
+
+ m->cp_hqd_pq_doorbell_control =
+ qp->doorbell_off <<
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+ pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
+ m->cp_hqd_pq_doorbell_control);
+
+ qp->is_active = 0;
+}
+
static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *q)
@@ -316,15 +320,6 @@ static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
mm->update_mqd(mm, m, q, NULL);
}
-static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
- uint32_t pipe_id, uint32_t queue_id,
- struct queue_properties *p, struct mm_struct *mms)
-{
- return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->adev, mqd,
- (uint32_t __user *)p->write_ptr,
- mms);
-}
-
#define SDMA_RLC_DUMMY_DEFAULT 0xf
static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
@@ -354,23 +349,41 @@ static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
q->is_active = QUEUE_IS_ACTIVE(*q);
}
-/*
- * * preempt type here is ignored because there is only one way
- * * to preempt sdma queue
- */
-static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,
- enum kfd_preempt_type type,
- unsigned int timeout, uint32_t pipe_id,
- uint32_t queue_id)
+static void checkpoint_mqd_sdma(struct mqd_manager *mm,
+ void *mqd,
+ void *mqd_dst,
+ void *ctl_stack_dst)
{
- return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->adev, mqd, timeout);
+ struct v10_sdma_mqd *m;
+
+ m = get_sdma_mqd(mqd);
+
+ memcpy(mqd_dst, m, sizeof(struct v10_sdma_mqd));
}
-static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd,
- uint64_t queue_address, uint32_t pipe_id,
- uint32_t queue_id)
+static void restore_mqd_sdma(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *qp,
+ const void *mqd_src,
+ const void *ctl_stack_src,
+ const u32 ctl_stack_size)
{
- return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->adev, mqd);
+ uint64_t addr;
+ struct v10_sdma_mqd *m;
+
+ m = (struct v10_sdma_mqd *) mqd_mem_obj->cpu_ptr;
+ addr = mqd_mem_obj->gpu_addr;
+
+ memcpy(m, mqd_src, sizeof(*m));
+
+ m->sdmax_rlcx_doorbell_offset =
+ qp->doorbell_off << SDMA0_RLC0_DOORBELL_OFFSET__OFFSET__SHIFT;
+
+ *mqd = m;
+ if (gart_addr)
+ *gart_addr = addr;
+
+ qp->is_active = 0;
}
#if defined(CONFIG_DEBUG_FS)
@@ -410,13 +423,15 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type,
pr_debug("%s@%i\n", __func__, __LINE__);
mqd->allocate_mqd = allocate_mqd;
mqd->init_mqd = init_mqd;
- mqd->free_mqd = free_mqd;
+ mqd->free_mqd = kfd_free_mqd_cp;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd;
- mqd->destroy_mqd = destroy_mqd;
- mqd->is_occupied = is_occupied;
+ mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->is_occupied = kfd_is_occupied_cp;
mqd->mqd_size = sizeof(struct v10_compute_mqd);
mqd->get_wave_state = get_wave_state;
+ mqd->checkpoint_mqd = checkpoint_mqd;
+ mqd->restore_mqd = restore_mqd;
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
@@ -427,10 +442,10 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type,
mqd->allocate_mqd = allocate_hiq_mqd;
mqd->init_mqd = init_mqd_hiq;
mqd->free_mqd = free_mqd_hiq_sdma;
- mqd->load_mqd = hiq_load_mqd_kiq;
+ mqd->load_mqd = kfd_hiq_load_mqd_kiq;
mqd->update_mqd = update_mqd;
- mqd->destroy_mqd = destroy_mqd;
- mqd->is_occupied = is_occupied;
+ mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->is_occupied = kfd_is_occupied_cp;
mqd->mqd_size = sizeof(struct v10_compute_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
@@ -441,11 +456,11 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type,
case KFD_MQD_TYPE_DIQ:
mqd->allocate_mqd = allocate_mqd;
mqd->init_mqd = init_mqd_hiq;
- mqd->free_mqd = free_mqd;
+ mqd->free_mqd = kfd_free_mqd_cp;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd;
- mqd->destroy_mqd = destroy_mqd;
- mqd->is_occupied = is_occupied;
+ mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->is_occupied = kfd_is_occupied_cp;
mqd->mqd_size = sizeof(struct v10_compute_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
@@ -456,10 +471,12 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type,
mqd->allocate_mqd = allocate_sdma_mqd;
mqd->init_mqd = init_mqd_sdma;
mqd->free_mqd = free_mqd_hiq_sdma;
- mqd->load_mqd = load_mqd_sdma;
+ mqd->load_mqd = kfd_load_mqd_sdma;
mqd->update_mqd = update_mqd_sdma;
- mqd->destroy_mqd = destroy_mqd_sdma;
- mqd->is_occupied = is_occupied_sdma;
+ mqd->destroy_mqd = kfd_destroy_mqd_sdma;
+ mqd->is_occupied = kfd_is_occupied_sdma;
+ mqd->checkpoint_mqd = checkpoint_mqd_sdma;
+ mqd->restore_mqd = restore_mqd_sdma;
mqd->mqd_size = sizeof(struct v10_sdma_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index 326eb2285029..0778e587a2d6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2016-2018 Advanced Micro Devices, Inc.
+ * Copyright 2016-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -204,14 +205,6 @@ static int load_mqd(struct mqd_manager *mm, void *mqd,
wptr_shift, 0, mms);
}
-static int hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd,
- uint32_t pipe_id, uint32_t queue_id,
- struct queue_properties *p, struct mm_struct *mms)
-{
- return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->adev, mqd, pipe_id,
- queue_id, p->doorbell_off);
-}
-
static void update_mqd(struct mqd_manager *mm, void *mqd,
struct queue_properties *q,
struct mqd_update_info *minfo)
@@ -285,38 +278,6 @@ static uint32_t read_doorbell_id(void *mqd)
return m->queue_doorbell_id0;
}
-static int destroy_mqd(struct mqd_manager *mm, void *mqd,
- enum kfd_preempt_type type,
- unsigned int timeout, uint32_t pipe_id,
- uint32_t queue_id)
-{
- return mm->dev->kfd2kgd->hqd_destroy
- (mm->dev->adev, mqd, type, timeout,
- pipe_id, queue_id);
-}
-
-static void free_mqd(struct mqd_manager *mm, void *mqd,
- struct kfd_mem_obj *mqd_mem_obj)
-{
- struct kfd_dev *kfd = mm->dev;
-
- if (mqd_mem_obj->gtt_mem) {
- amdgpu_amdkfd_free_gtt_mem(kfd->adev, mqd_mem_obj->gtt_mem);
- kfree(mqd_mem_obj);
- } else {
- kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
- }
-}
-
-static bool is_occupied(struct mqd_manager *mm, void *mqd,
- uint64_t queue_address, uint32_t pipe_id,
- uint32_t queue_id)
-{
- return mm->dev->kfd2kgd->hqd_is_occupied(
- mm->dev->adev, queue_address,
- pipe_id, queue_id);
-}
-
static int get_wave_state(struct mqd_manager *mm, void *mqd,
void __user *ctl_stack,
u32 *ctl_stack_used_size,
@@ -340,6 +301,57 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd,
return 0;
}
+static void get_checkpoint_info(struct mqd_manager *mm, void *mqd, u32 *ctl_stack_size)
+{
+ struct v9_mqd *m = get_mqd(mqd);
+
+ *ctl_stack_size = m->cp_hqd_cntl_stack_size;
+}
+
+static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, void *ctl_stack_dst)
+{
+ struct v9_mqd *m;
+ /* Control stack is located one page after MQD. */
+ void *ctl_stack = (void *)((uintptr_t)mqd + PAGE_SIZE);
+
+ m = get_mqd(mqd);
+
+ memcpy(mqd_dst, m, sizeof(struct v9_mqd));
+ memcpy(ctl_stack_dst, ctl_stack, m->cp_hqd_cntl_stack_size);
+}
+
+static void restore_mqd(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *qp,
+ const void *mqd_src,
+ const void *ctl_stack_src, u32 ctl_stack_size)
+{
+ uint64_t addr;
+ struct v9_mqd *m;
+ void *ctl_stack;
+
+ m = (struct v9_mqd *) mqd_mem_obj->cpu_ptr;
+ addr = mqd_mem_obj->gpu_addr;
+
+ memcpy(m, mqd_src, sizeof(*m));
+
+ *mqd = m;
+ if (gart_addr)
+ *gart_addr = addr;
+
+ /* Control stack is located one page after MQD. */
+ ctl_stack = (void *)((uintptr_t)*mqd + PAGE_SIZE);
+ memcpy(ctl_stack, ctl_stack_src, ctl_stack_size);
+
+ m->cp_hqd_pq_doorbell_control =
+ qp->doorbell_off <<
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+ pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
+ m->cp_hqd_pq_doorbell_control);
+
+ qp->is_active = 0;
+}
+
static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *q)
@@ -371,15 +383,6 @@ static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
mm->update_mqd(mm, m, q, NULL);
}
-static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
- uint32_t pipe_id, uint32_t queue_id,
- struct queue_properties *p, struct mm_struct *mms)
-{
- return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->adev, mqd,
- (uint32_t __user *)p->write_ptr,
- mms);
-}
-
#define SDMA_RLC_DUMMY_DEFAULT 0xf
static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
@@ -409,23 +412,40 @@ static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
q->is_active = QUEUE_IS_ACTIVE(*q);
}
-/*
- * * preempt type here is ignored because there is only one way
- * * to preempt sdma queue
- */
-static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,
- enum kfd_preempt_type type,
- unsigned int timeout, uint32_t pipe_id,
- uint32_t queue_id)
+static void checkpoint_mqd_sdma(struct mqd_manager *mm,
+ void *mqd,
+ void *mqd_dst,
+ void *ctl_stack_dst)
{
- return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->adev, mqd, timeout);
+ struct v9_sdma_mqd *m;
+
+ m = get_sdma_mqd(mqd);
+
+ memcpy(mqd_dst, m, sizeof(struct v9_sdma_mqd));
}
-static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd,
- uint64_t queue_address, uint32_t pipe_id,
- uint32_t queue_id)
+static void restore_mqd_sdma(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *qp,
+ const void *mqd_src,
+ const void *ctl_stack_src, const u32 ctl_stack_size)
{
- return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->adev, mqd);
+ uint64_t addr;
+ struct v9_sdma_mqd *m;
+
+ m = (struct v9_sdma_mqd *) mqd_mem_obj->cpu_ptr;
+ addr = mqd_mem_obj->gpu_addr;
+
+ memcpy(m, mqd_src, sizeof(*m));
+
+ m->sdmax_rlcx_doorbell_offset =
+ qp->doorbell_off << SDMA0_RLC0_DOORBELL_OFFSET__OFFSET__SHIFT;
+
+ *mqd = m;
+ if (gart_addr)
+ *gart_addr = addr;
+
+ qp->is_active = 0;
}
#if defined(CONFIG_DEBUG_FS)
@@ -464,12 +484,15 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
case KFD_MQD_TYPE_CP:
mqd->allocate_mqd = allocate_mqd;
mqd->init_mqd = init_mqd;
- mqd->free_mqd = free_mqd;
+ mqd->free_mqd = kfd_free_mqd_cp;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd;
- mqd->destroy_mqd = destroy_mqd;
- mqd->is_occupied = is_occupied;
+ mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->is_occupied = kfd_is_occupied_cp;
mqd->get_wave_state = get_wave_state;
+ mqd->get_checkpoint_info = get_checkpoint_info;
+ mqd->checkpoint_mqd = checkpoint_mqd;
+ mqd->restore_mqd = restore_mqd;
mqd->mqd_size = sizeof(struct v9_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
@@ -479,10 +502,10 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
mqd->allocate_mqd = allocate_hiq_mqd;
mqd->init_mqd = init_mqd_hiq;
mqd->free_mqd = free_mqd_hiq_sdma;
- mqd->load_mqd = hiq_load_mqd_kiq;
+ mqd->load_mqd = kfd_hiq_load_mqd_kiq;
mqd->update_mqd = update_mqd;
- mqd->destroy_mqd = destroy_mqd;
- mqd->is_occupied = is_occupied;
+ mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->is_occupied = kfd_is_occupied_cp;
mqd->mqd_size = sizeof(struct v9_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
@@ -492,11 +515,11 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
case KFD_MQD_TYPE_DIQ:
mqd->allocate_mqd = allocate_mqd;
mqd->init_mqd = init_mqd_hiq;
- mqd->free_mqd = free_mqd;
+ mqd->free_mqd = kfd_free_mqd_cp;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd;
- mqd->destroy_mqd = destroy_mqd;
- mqd->is_occupied = is_occupied;
+ mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->is_occupied = kfd_is_occupied_cp;
mqd->mqd_size = sizeof(struct v9_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
@@ -506,10 +529,12 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
mqd->allocate_mqd = allocate_sdma_mqd;
mqd->init_mqd = init_mqd_sdma;
mqd->free_mqd = free_mqd_hiq_sdma;
- mqd->load_mqd = load_mqd_sdma;
+ mqd->load_mqd = kfd_load_mqd_sdma;
mqd->update_mqd = update_mqd_sdma;
- mqd->destroy_mqd = destroy_mqd_sdma;
- mqd->is_occupied = is_occupied_sdma;
+ mqd->destroy_mqd = kfd_destroy_mqd_sdma;
+ mqd->is_occupied = kfd_is_occupied_sdma;
+ mqd->checkpoint_mqd = checkpoint_mqd_sdma;
+ mqd->restore_mqd = restore_mqd_sdma;
mqd->mqd_size = sizeof(struct v9_sdma_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
index d456e950ce1d..530ba6f5b57e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -259,31 +260,6 @@ static void update_mqd_tonga(struct mqd_manager *mm, void *mqd,
__update_mqd(mm, mqd, q, minfo, MTYPE_UC, 0);
}
-static int destroy_mqd(struct mqd_manager *mm, void *mqd,
- enum kfd_preempt_type type,
- unsigned int timeout, uint32_t pipe_id,
- uint32_t queue_id)
-{
- return mm->dev->kfd2kgd->hqd_destroy
- (mm->dev->adev, mqd, type, timeout,
- pipe_id, queue_id);
-}
-
-static void free_mqd(struct mqd_manager *mm, void *mqd,
- struct kfd_mem_obj *mqd_mem_obj)
-{
- kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
-}
-
-static bool is_occupied(struct mqd_manager *mm, void *mqd,
- uint64_t queue_address, uint32_t pipe_id,
- uint32_t queue_id)
-{
- return mm->dev->kfd2kgd->hqd_is_occupied(
- mm->dev->adev, queue_address,
- pipe_id, queue_id);
-}
-
static int get_wave_state(struct mqd_manager *mm, void *mqd,
void __user *ctl_stack,
u32 *ctl_stack_used_size,
@@ -306,11 +282,54 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd,
return 0;
}
+static void get_checkpoint_info(struct mqd_manager *mm, void *mqd, u32 *ctl_stack_size)
+{
+ /* Control stack is stored in user mode */
+ *ctl_stack_size = 0;
+}
+
+static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, void *ctl_stack_dst)
+{
+ struct vi_mqd *m;
+
+ m = get_mqd(mqd);
+
+ memcpy(mqd_dst, m, sizeof(struct vi_mqd));
+}
+
+static void restore_mqd(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *qp,
+ const void *mqd_src,
+ const void *ctl_stack_src, const u32 ctl_stack_size)
+{
+ uint64_t addr;
+ struct vi_mqd *m;
+
+ m = (struct vi_mqd *) mqd_mem_obj->cpu_ptr;
+ addr = mqd_mem_obj->gpu_addr;
+
+ memcpy(m, mqd_src, sizeof(*m));
+
+ *mqd = m;
+ if (gart_addr)
+ *gart_addr = addr;
+
+ m->cp_hqd_pq_doorbell_control =
+ qp->doorbell_off <<
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+ pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
+ m->cp_hqd_pq_doorbell_control);
+
+ qp->is_active = 0;
+}
+
static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *q)
{
struct vi_mqd *m;
+
init_mqd(mm, mqd, mqd_mem_obj, gart_addr, q);
m = get_mqd(*mqd);
@@ -343,15 +362,6 @@ static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
mm->update_mqd(mm, m, q, NULL);
}
-static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
- uint32_t pipe_id, uint32_t queue_id,
- struct queue_properties *p, struct mm_struct *mms)
-{
- return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->adev, mqd,
- (uint32_t __user *)p->write_ptr,
- mms);
-}
-
static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
struct queue_properties *q,
struct mqd_update_info *minfo)
@@ -380,27 +390,45 @@ static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
q->is_active = QUEUE_IS_ACTIVE(*q);
}
-/*
- * * preempt type here is ignored because there is only one way
- * * to preempt sdma queue
- */
-static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,
- enum kfd_preempt_type type,
- unsigned int timeout, uint32_t pipe_id,
- uint32_t queue_id)
+static void checkpoint_mqd_sdma(struct mqd_manager *mm,
+ void *mqd,
+ void *mqd_dst,
+ void *ctl_stack_dst)
{
- return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->adev, mqd, timeout);
+ struct vi_sdma_mqd *m;
+
+ m = get_sdma_mqd(mqd);
+
+ memcpy(mqd_dst, m, sizeof(struct vi_sdma_mqd));
}
-static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd,
- uint64_t queue_address, uint32_t pipe_id,
- uint32_t queue_id)
+static void restore_mqd_sdma(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *qp,
+ const void *mqd_src,
+ const void *ctl_stack_src, const u32 ctl_stack_size)
{
- return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->adev, mqd);
+ uint64_t addr;
+ struct vi_sdma_mqd *m;
+
+ m = (struct vi_sdma_mqd *) mqd_mem_obj->cpu_ptr;
+ addr = mqd_mem_obj->gpu_addr;
+
+ memcpy(m, mqd_src, sizeof(*m));
+
+ m->sdmax_rlcx_doorbell =
+ qp->doorbell_off << SDMA0_RLC0_DOORBELL__OFFSET__SHIFT;
+
+ *mqd = m;
+ if (gart_addr)
+ *gart_addr = addr;
+
+ qp->is_active = 0;
}
#if defined(CONFIG_DEBUG_FS)
+
static int debugfs_show_mqd(struct seq_file *m, void *data)
{
seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4,
@@ -435,12 +463,15 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
case KFD_MQD_TYPE_CP:
mqd->allocate_mqd = allocate_mqd;
mqd->init_mqd = init_mqd;
- mqd->free_mqd = free_mqd;
+ mqd->free_mqd = kfd_free_mqd_cp;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd;
- mqd->destroy_mqd = destroy_mqd;
- mqd->is_occupied = is_occupied;
+ mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->is_occupied = kfd_is_occupied_cp;
mqd->get_wave_state = get_wave_state;
+ mqd->get_checkpoint_info = get_checkpoint_info;
+ mqd->checkpoint_mqd = checkpoint_mqd;
+ mqd->restore_mqd = restore_mqd;
mqd->mqd_size = sizeof(struct vi_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
@@ -452,8 +483,8 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
mqd->free_mqd = free_mqd_hiq_sdma;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd_hiq;
- mqd->destroy_mqd = destroy_mqd;
- mqd->is_occupied = is_occupied;
+ mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->is_occupied = kfd_is_occupied_cp;
mqd->mqd_size = sizeof(struct vi_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
@@ -463,11 +494,11 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
case KFD_MQD_TYPE_DIQ:
mqd->allocate_mqd = allocate_mqd;
mqd->init_mqd = init_mqd_hiq;
- mqd->free_mqd = free_mqd;
+ mqd->free_mqd = kfd_free_mqd_cp;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd_hiq;
- mqd->destroy_mqd = destroy_mqd;
- mqd->is_occupied = is_occupied;
+ mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->is_occupied = kfd_is_occupied_cp;
mqd->mqd_size = sizeof(struct vi_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
@@ -477,10 +508,12 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
mqd->allocate_mqd = allocate_sdma_mqd;
mqd->init_mqd = init_mqd_sdma;
mqd->free_mqd = free_mqd_hiq_sdma;
- mqd->load_mqd = load_mqd_sdma;
+ mqd->load_mqd = kfd_load_mqd_sdma;
mqd->update_mqd = update_mqd_sdma;
- mqd->destroy_mqd = destroy_mqd_sdma;
- mqd->is_occupied = is_occupied_sdma;
+ mqd->destroy_mqd = kfd_destroy_mqd_sdma;
+ mqd->is_occupied = kfd_is_occupied_sdma;
+ mqd->checkpoint_mqd = checkpoint_mqd_sdma;
+ mqd->restore_mqd = restore_mqd_sdma;
mqd->mqd_size = sizeof(struct vi_sdma_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index 1439420925a0..ed02b6d8bf63 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -368,10 +369,9 @@ out:
return retval;
}
-int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
+int pm_send_unmap_queue(struct packet_manager *pm,
enum kfd_unmap_queues_filter filter,
- uint32_t filter_param, bool reset,
- unsigned int sdma_engine)
+ uint32_t filter_param, bool reset)
{
uint32_t *buffer, size;
int retval = 0;
@@ -386,8 +386,7 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
goto out;
}
- retval = pm->pmf->unmap_queues(pm, buffer, type, filter, filter_param,
- reset, sdma_engine);
+ retval = pm->pmf->unmap_queues(pm, buffer, filter, filter_param, reset);
if (!retval)
kq_submit_packet(pm->priv_queue);
else
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
index 7ea3f671b325..18250845a989 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2016-2018 Advanced Micro Devices, Inc.
+ * Copyright 2016-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -178,6 +179,11 @@ static int pm_set_resources_v9(struct packet_manager *pm, uint32_t *buffer,
return 0;
}
+static inline bool pm_use_ext_eng(struct kfd_dev *dev)
+{
+ return dev->adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(5, 2, 0);
+}
+
static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
struct queue *q, bool is_static)
{
@@ -214,7 +220,7 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
case KFD_QUEUE_TYPE_SDMA:
case KFD_QUEUE_TYPE_SDMA_XGMI:
use_static = false; /* no static queues under SDMA */
- if (q->properties.sdma_engine_id < 2)
+ if (q->properties.sdma_engine_id < 2 && !pm_use_ext_eng(q->device))
packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
engine_sel__mes_map_queues__sdma0_vi;
else {
@@ -246,10 +252,8 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
}
static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t *buffer,
- enum kfd_queue_type type,
enum kfd_unmap_queues_filter filter,
- uint32_t filter_param, bool reset,
- unsigned int sdma_engine)
+ uint32_t filter_param, bool reset)
{
struct pm4_mes_unmap_queues *packet;
@@ -258,31 +262,13 @@ static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t *buffer,
packet->header.u32All = pm_build_pm4_header(IT_UNMAP_QUEUES,
sizeof(struct pm4_mes_unmap_queues));
- switch (type) {
- case KFD_QUEUE_TYPE_COMPUTE:
- case KFD_QUEUE_TYPE_DIQ:
- packet->bitfields2.extended_engine_sel =
- extended_engine_sel__mes_unmap_queues__legacy_engine_sel;
- packet->bitfields2.engine_sel =
- engine_sel__mes_unmap_queues__compute;
- break;
- case KFD_QUEUE_TYPE_SDMA:
- case KFD_QUEUE_TYPE_SDMA_XGMI:
- if (sdma_engine < 2) {
- packet->bitfields2.extended_engine_sel =
- extended_engine_sel__mes_unmap_queues__legacy_engine_sel;
- packet->bitfields2.engine_sel =
- engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
- } else {
- packet->bitfields2.extended_engine_sel =
- extended_engine_sel__mes_unmap_queues__sdma0_to_7_sel;
- packet->bitfields2.engine_sel = sdma_engine;
- }
- break;
- default:
- WARN(1, "queue type %d", type);
- return -EINVAL;
- }
+
+ packet->bitfields2.extended_engine_sel = pm_use_ext_eng(pm->dqm->dev) ?
+ extended_engine_sel__mes_unmap_queues__sdma0_to_7_sel :
+ extended_engine_sel__mes_unmap_queues__legacy_engine_sel;
+
+ packet->bitfields2.engine_sel =
+ engine_sel__mes_unmap_queues__compute;
if (reset)
packet->bitfields2.action =
@@ -292,12 +278,6 @@ static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t *buffer,
action__mes_unmap_queues__preempt_queues;
switch (filter) {
- case KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE:
- packet->bitfields2.queue_sel =
- queue_sel__mes_unmap_queues__perform_request_on_specified_queues;
- packet->bitfields2.num_queues = 1;
- packet->bitfields3b.doorbell_offset0 = filter_param;
- break;
case KFD_UNMAP_QUEUES_FILTER_BY_PASID:
packet->bitfields2.queue_sel =
queue_sel__mes_unmap_queues__perform_request_on_pasid_queues;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
index 3c0658e32e93..4f951eaa6ee8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -197,10 +198,8 @@ static int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer,
}
static int pm_unmap_queues_vi(struct packet_manager *pm, uint32_t *buffer,
- enum kfd_queue_type type,
enum kfd_unmap_queues_filter filter,
- uint32_t filter_param, bool reset,
- unsigned int sdma_engine)
+ uint32_t filter_param, bool reset)
{
struct pm4_mes_unmap_queues *packet;
@@ -209,21 +208,9 @@ static int pm_unmap_queues_vi(struct packet_manager *pm, uint32_t *buffer,
packet->header.u32All = pm_build_pm4_header(IT_UNMAP_QUEUES,
sizeof(struct pm4_mes_unmap_queues));
- switch (type) {
- case KFD_QUEUE_TYPE_COMPUTE:
- case KFD_QUEUE_TYPE_DIQ:
- packet->bitfields2.engine_sel =
+
+ packet->bitfields2.engine_sel =
engine_sel__mes_unmap_queues__compute;
- break;
- case KFD_QUEUE_TYPE_SDMA:
- case KFD_QUEUE_TYPE_SDMA_XGMI:
- packet->bitfields2.engine_sel =
- engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
- break;
- default:
- WARN(1, "queue type %d", type);
- return -EINVAL;
- }
if (reset)
packet->bitfields2.action =
@@ -233,12 +220,6 @@ static int pm_unmap_queues_vi(struct packet_manager *pm, uint32_t *buffer,
action__mes_unmap_queues__preempt_queues;
switch (filter) {
- case KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE:
- packet->bitfields2.queue_sel =
- queue_sel__mes_unmap_queues__perform_request_on_specified_queues;
- packet->bitfields2.num_queues = 1;
- packet->bitfields3b.doorbell_offset0 = filter_param;
- break;
case KFD_UNMAP_QUEUES_FILTER_BY_PASID:
packet->bitfields2.queue_sel =
queue_sel__mes_unmap_queues__perform_request_on_pasid_queues;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c
index af5816f51e55..e3b250918f39 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h
index e50f73d25de6..7274edfd3f38 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h
@@ -1,5 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
index 4d7add843746..a666710ed403 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
@@ -1,5 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
- * Copyright 2016 Advanced Micro Devices, Inc.
+ * Copyright 2016-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h
index f795ec815e2a..38f5cb6a222a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h
@@ -1,5 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
- * Copyright 2020 Advanced Micro Devices, Inc.
+ * Copyright 2020-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_diq.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_diq.h
index a0ff34878163..f9cd28690151 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_diq.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_diq.h
@@ -1,5 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h
index 5466cfe1c3cc..8147395c083b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h
@@ -1,5 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -101,8 +102,8 @@ struct pm4_mes_set_resources {
struct pm4_mes_runlist {
union {
- union PM4_MES_TYPE_3_HEADER header; /* header */
- uint32_t ordinal1;
+ union PM4_MES_TYPE_3_HEADER header; /* header */
+ uint32_t ordinal1;
};
union {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_opcodes.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_opcodes.h
index b72fa3b8c2d4..5bfd0f9cbe23 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_opcodes.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_opcodes.h
@@ -1,5 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 6d643b4b791d..9967a73d5b0f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -1,5 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -122,7 +123,26 @@
*/
#define KFD_QUEUE_DOORBELL_MIRROR_OFFSET 512
-
+/**
+ * enum kfd_ioctl_flags - KFD ioctl flags
+ * Various flags that can be set in &amdkfd_ioctl_desc.flags to control how
+ * userspace can use a given ioctl.
+ */
+enum kfd_ioctl_flags {
+ /*
+ * @KFD_IOC_FLAG_CHECKPOINT_RESTORE:
+ * Certain KFD ioctls such as AMDKFD_IOC_CRIU_OP can potentially
+ * perform privileged operations and load arbitrary data into MQDs and
+ * eventually HQD registers when the queue is mapped by HWS. In order to
+ * prevent this we should perform additional security checks.
+ *
+ * This is equivalent to callers with the CHECKPOINT_RESTORE capability.
+ *
+ * Note: Since earlier versions of docker do not support CHECKPOINT_RESTORE,
+ * we also allow ioctls with SYS_ADMIN capability.
+ */
+ KFD_IOC_FLAG_CHECKPOINT_RESTORE = BIT(0),
+};
/*
* Kernel module parameter to specify maximum number of supported queues per
* device
@@ -282,9 +302,6 @@ struct kfd_dev {
*/
bool interrupts_active;
- /* Debug manager */
- struct kfd_dbgmgr *dbgmgr;
-
/* Firmware versions */
uint16_t mec_fw_version;
uint16_t mec2_fw_version;
@@ -339,25 +356,24 @@ enum kfd_mempool {
/* Character device interface */
int kfd_chardev_init(void);
void kfd_chardev_exit(void);
-struct device *kfd_chardev(void);
/**
* enum kfd_unmap_queues_filter - Enum for queue filters.
*
- * @KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE: Preempts single queue.
- *
* @KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES: Preempts all queues in the
* running queues list.
*
+ * @KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES: Preempts all non-static queues
+ * in the run list.
+ *
* @KFD_UNMAP_QUEUES_FILTER_BY_PASID: Preempts queues that belongs to
* specific process.
*
*/
enum kfd_unmap_queues_filter {
- KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE,
- KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES,
- KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES,
- KFD_UNMAP_QUEUES_FILTER_BY_PASID
+ KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES = 1,
+ KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES = 2,
+ KFD_UNMAP_QUEUES_FILTER_BY_PASID = 3
};
/**
@@ -443,6 +459,7 @@ enum KFD_QUEUE_PRIORITY {
* it's user mode or kernel mode queue.
*
*/
+
struct queue_properties {
enum kfd_queue_type type;
enum kfd_queue_format format;
@@ -755,6 +772,12 @@ struct kfd_process_device {
uint64_t faults;
uint64_t page_in;
uint64_t page_out;
+ /*
+ * If this process has been checkpointed before, then the user
+ * application will use the original gpu_id on the
+ * checkpointed node to refer to this device.
+ */
+ uint32_t user_gpu_id;
};
#define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
@@ -765,12 +788,13 @@ struct svm_range_list {
struct list_head list;
struct work_struct deferred_list_work;
struct list_head deferred_range_list;
+ struct list_head criu_svm_metadata_list;
spinlock_t deferred_list_lock;
atomic_t evicted_ranges;
atomic_t drain_pagefaults;
struct delayed_work restore_work;
DECLARE_BITMAP(bitmap_supported, MAX_GPU_INSTANCE);
- struct task_struct *faulting_task;
+ struct task_struct *faulting_task;
};
/* Process data */
@@ -859,6 +883,8 @@ struct kfd_process {
bool xnack_enabled;
atomic_t poison;
+ /* Queues are in paused stated because we are in the process of doing a CRIU checkpoint */
+ bool queues_paused;
};
#define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */
@@ -889,7 +915,7 @@ bool kfd_dev_is_large_bar(struct kfd_dev *dev);
int kfd_process_create_wq(void);
void kfd_process_destroy_wq(void);
struct kfd_process *kfd_create_process(struct file *filep);
-struct kfd_process *kfd_get_process(const struct task_struct *);
+struct kfd_process *kfd_get_process(const struct task_struct *task);
struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid);
struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm);
@@ -912,6 +938,11 @@ int kfd_process_restore_queues(struct kfd_process *p);
void kfd_suspend_all_processes(void);
int kfd_resume_all_processes(void);
+struct kfd_process_device *kfd_process_device_data_by_id(struct kfd_process *process,
+ uint32_t gpu_id);
+
+int kfd_process_get_user_gpu_id(struct kfd_process *p, uint32_t actual_gpu_id);
+
int kfd_process_device_init_vm(struct kfd_process_device *pdd,
struct file *drm_file);
struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
@@ -933,6 +964,7 @@ void *kfd_process_device_translate_handle(struct kfd_process_device *p,
int handle);
void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd,
int handle);
+struct kfd_process *kfd_lookup_process_by_pid(struct pid *pid);
/* PASIDs */
int kfd_pasid_init(void);
@@ -1007,6 +1039,116 @@ void kfd_process_set_trap_handler(struct qcm_process_device *qpd,
uint64_t tba_addr,
uint64_t tma_addr);
+/* CRIU */
+/*
+ * Need to increment KFD_CRIU_PRIV_VERSION each time a change is made to any of the CRIU private
+ * structures:
+ * kfd_criu_process_priv_data
+ * kfd_criu_device_priv_data
+ * kfd_criu_bo_priv_data
+ * kfd_criu_queue_priv_data
+ * kfd_criu_event_priv_data
+ * kfd_criu_svm_range_priv_data
+ */
+
+#define KFD_CRIU_PRIV_VERSION 1
+
+struct kfd_criu_process_priv_data {
+ uint32_t version;
+ uint32_t xnack_mode;
+};
+
+struct kfd_criu_device_priv_data {
+ /* For future use */
+ uint64_t reserved;
+};
+
+struct kfd_criu_bo_priv_data {
+ uint64_t user_addr;
+ uint32_t idr_handle;
+ uint32_t mapped_gpuids[MAX_GPU_INSTANCE];
+};
+
+/*
+ * The first 4 bytes of kfd_criu_queue_priv_data, kfd_criu_event_priv_data,
+ * kfd_criu_svm_range_priv_data is the object type
+ */
+enum kfd_criu_object_type {
+ KFD_CRIU_OBJECT_TYPE_QUEUE,
+ KFD_CRIU_OBJECT_TYPE_EVENT,
+ KFD_CRIU_OBJECT_TYPE_SVM_RANGE,
+};
+
+struct kfd_criu_svm_range_priv_data {
+ uint32_t object_type;
+ uint64_t start_addr;
+ uint64_t size;
+ /* Variable length array of attributes */
+ struct kfd_ioctl_svm_attribute attrs[];
+};
+
+struct kfd_criu_queue_priv_data {
+ uint32_t object_type;
+ uint64_t q_address;
+ uint64_t q_size;
+ uint64_t read_ptr_addr;
+ uint64_t write_ptr_addr;
+ uint64_t doorbell_off;
+ uint64_t eop_ring_buffer_address;
+ uint64_t ctx_save_restore_area_address;
+ uint32_t gpu_id;
+ uint32_t type;
+ uint32_t format;
+ uint32_t q_id;
+ uint32_t priority;
+ uint32_t q_percent;
+ uint32_t doorbell_id;
+ uint32_t is_gws;
+ uint32_t sdma_id;
+ uint32_t eop_ring_buffer_size;
+ uint32_t ctx_save_restore_area_size;
+ uint32_t ctl_stack_size;
+ uint32_t mqd_size;
+};
+
+struct kfd_criu_event_priv_data {
+ uint32_t object_type;
+ uint64_t user_handle;
+ uint32_t event_id;
+ uint32_t auto_reset;
+ uint32_t type;
+ uint32_t signaled;
+
+ union {
+ struct kfd_hsa_memory_exception_data memory_exception_data;
+ struct kfd_hsa_hw_exception_data hw_exception_data;
+ };
+};
+
+int kfd_process_get_queue_info(struct kfd_process *p,
+ uint32_t *num_queues,
+ uint64_t *priv_data_sizes);
+
+int kfd_criu_checkpoint_queues(struct kfd_process *p,
+ uint8_t __user *user_priv_data,
+ uint64_t *priv_data_offset);
+
+int kfd_criu_restore_queue(struct kfd_process *p,
+ uint8_t __user *user_priv_data,
+ uint64_t *priv_data_offset,
+ uint64_t max_priv_data_size);
+
+int kfd_criu_checkpoint_events(struct kfd_process *p,
+ uint8_t __user *user_priv_data,
+ uint64_t *priv_data_offset);
+
+int kfd_criu_restore_event(struct file *devkfd,
+ struct kfd_process *p,
+ uint8_t __user *user_priv_data,
+ uint64_t *priv_data_offset,
+ uint64_t max_priv_data_size);
+/* CRIU - End */
+
/* Queue Context Management */
int init_queue(struct queue **q, const struct queue_properties *properties);
void uninit_queue(struct queue *q);
@@ -1030,7 +1172,7 @@ void device_queue_manager_uninit(struct device_queue_manager *dqm);
struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
enum kfd_queue_type type);
void kernel_queue_uninit(struct kernel_queue *kq, bool hanging);
-int kfd_process_vm_fault(struct device_queue_manager *dqm, u32 pasid);
+int kfd_dqm_evict_pasid(struct device_queue_manager *dqm, u32 pasid);
/* Process Queue Manager */
struct process_queue_node {
@@ -1048,6 +1190,9 @@ int pqm_create_queue(struct process_queue_manager *pqm,
struct file *f,
struct queue_properties *properties,
unsigned int *qid,
+ const struct kfd_criu_queue_priv_data *q_data,
+ const void *restore_mqd,
+ const void *restore_ctl_stack,
uint32_t *p_doorbell_offset_in_process);
int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid);
int pqm_update_queue_properties(struct process_queue_manager *pqm, unsigned int qid,
@@ -1070,6 +1215,10 @@ int amdkfd_fence_wait_timeout(uint64_t *fence_addr,
uint64_t fence_value,
unsigned int timeout_ms);
+int pqm_get_queue_checkpoint_info(struct process_queue_manager *pqm,
+ unsigned int qid,
+ u32 *mqd_size,
+ u32 *ctl_stack_size);
/* Packet Manager */
#define KFD_FENCE_COMPLETED (100)
@@ -1098,10 +1247,8 @@ struct packet_manager_funcs {
int (*map_queues)(struct packet_manager *pm, uint32_t *buffer,
struct queue *q, bool is_static);
int (*unmap_queues)(struct packet_manager *pm, uint32_t *buffer,
- enum kfd_queue_type type,
enum kfd_unmap_queues_filter mode,
- uint32_t filter_param, bool reset,
- unsigned int sdma_engine);
+ uint32_t filter_param, bool reset);
int (*query_status)(struct packet_manager *pm, uint32_t *buffer,
uint64_t fence_address, uint64_t fence_value);
int (*release_mem)(uint64_t gpu_addr, uint32_t *buffer);
@@ -1128,10 +1275,9 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues);
int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
uint64_t fence_value);
-int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
+int pm_send_unmap_queue(struct packet_manager *pm,
enum kfd_unmap_queues_filter mode,
- uint32_t filter_param, bool reset,
- unsigned int sdma_engine);
+ uint32_t filter_param, bool reset);
void pm_release_ib(struct packet_manager *pm);
@@ -1161,12 +1307,14 @@ void kfd_signal_iommu_event(struct kfd_dev *dev,
void kfd_signal_hw_exception_event(u32 pasid);
int kfd_set_event(struct kfd_process *p, uint32_t event_id);
int kfd_reset_event(struct kfd_process *p, uint32_t event_id);
-int kfd_event_page_set(struct kfd_process *p, void *kernel_address,
- uint64_t size);
+int kfd_kmap_event_page(struct kfd_process *p, uint64_t event_page_offset);
+
int kfd_event_create(struct file *devkfd, struct kfd_process *p,
uint32_t event_type, bool auto_reset, uint32_t node_id,
uint32_t *event_id, uint32_t *event_trigger_data,
uint64_t *event_page_offset, uint32_t *event_slot_index);
+
+int kfd_get_num_events(struct kfd_process *p);
int kfd_event_destroy(struct kfd_process *p, uint32_t event_id);
void kfd_signal_vm_fault_event(struct kfd_dev *dev, u32 pasid,
@@ -1178,8 +1326,6 @@ void kfd_signal_poison_consumed_event(struct kfd_dev *dev, u32 pasid);
void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type);
-int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p);
-
bool kfd_is_locked(void);
/* Compute profile */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index d1145da5348f..59c04b2d383b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -40,7 +41,6 @@ struct mm_struct;
#include "kfd_priv.h"
#include "kfd_device_queue_manager.h"
-#include "kfd_dbgmgr.h"
#include "kfd_iommu.h"
#include "kfd_svm.h"
@@ -64,7 +64,8 @@ static struct workqueue_struct *kfd_process_wq;
*/
static struct workqueue_struct *kfd_restore_wq;
-static struct kfd_process *find_process(const struct task_struct *thread);
+static struct kfd_process *find_process(const struct task_struct *thread,
+ bool ref);
static void kfd_process_ref_release(struct kref *ref);
static struct kfd_process *create_process(const struct task_struct *thread);
static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep);
@@ -715,7 +716,8 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
int err;
err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->adev, gpu_va, size,
- pdd->drm_priv, mem, NULL, flags);
+ pdd->drm_priv, mem, NULL,
+ flags, false);
if (err)
goto err_alloc_mem;
@@ -816,7 +818,7 @@ struct kfd_process *kfd_create_process(struct file *filep)
mutex_lock(&kfd_processes_mutex);
/* A prior open of /dev/kfd could have already created the process. */
- process = find_process(thread);
+ process = find_process(thread, false);
if (process) {
pr_debug("Process already found\n");
} else {
@@ -884,7 +886,7 @@ struct kfd_process *kfd_get_process(const struct task_struct *thread)
if (thread->group_leader->mm != thread->mm)
return ERR_PTR(-EINVAL);
- process = find_process(thread);
+ process = find_process(thread, false);
if (!process)
return ERR_PTR(-EINVAL);
@@ -903,13 +905,16 @@ static struct kfd_process *find_process_by_mm(const struct mm_struct *mm)
return NULL;
}
-static struct kfd_process *find_process(const struct task_struct *thread)
+static struct kfd_process *find_process(const struct task_struct *thread,
+ bool ref)
{
struct kfd_process *p;
int idx;
idx = srcu_read_lock(&kfd_processes_srcu);
p = find_process_by_mm(thread->mm);
+ if (p && ref)
+ kref_get(&p->ref);
srcu_read_unlock(&kfd_processes_srcu, idx);
return p;
@@ -920,6 +925,26 @@ void kfd_unref_process(struct kfd_process *p)
kref_put(&p->ref, kfd_process_ref_release);
}
+/* This increments the process->ref counter. */
+struct kfd_process *kfd_lookup_process_by_pid(struct pid *pid)
+{
+ struct task_struct *task = NULL;
+ struct kfd_process *p = NULL;
+
+ if (!pid) {
+ task = current;
+ get_task_struct(task);
+ } else {
+ task = get_pid_task(pid, PIDTYPE_PID);
+ }
+
+ if (task) {
+ p = find_process(task, true);
+ put_task_struct(task);
+ }
+
+ return p;
+}
static void kfd_process_device_free_bos(struct kfd_process_device *pdd)
{
@@ -1133,7 +1158,6 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
struct mm_struct *mm)
{
struct kfd_process *p;
- int i;
/*
* The kfd_process structure can not be free because the
@@ -1150,27 +1174,9 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
cancel_delayed_work_sync(&p->eviction_work);
cancel_delayed_work_sync(&p->restore_work);
- cancel_delayed_work_sync(&p->svms.restore_work);
mutex_lock(&p->mutex);
- /* Iterate over all process device data structures and if the
- * pdd is in debug mode, we should first force unregistration,
- * then we will be able to destroy the queues
- */
- for (i = 0; i < p->n_pdds; i++) {
- struct kfd_dev *dev = p->pdds[i]->dev;
-
- mutex_lock(kfd_get_dbgmgr_mutex());
- if (dev && dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) {
- if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) {
- kfd_dbgmgr_destroy(dev->dbgmgr);
- dev->dbgmgr = NULL;
- }
- }
- mutex_unlock(kfd_get_dbgmgr_mutex());
- }
-
kfd_process_dequeue_from_all_devices(p);
pqm_uninit(&p->pqm);
@@ -1360,6 +1366,7 @@ static struct kfd_process *create_process(const struct task_struct *thread)
process->mm = thread->mm;
process->lead_thread = thread->group_leader;
process->n_pdds = 0;
+ process->queues_paused = false;
INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker);
INIT_DELAYED_WORK(&process->restore_work, restore_process_worker);
process->last_restore_timestamp = get_jiffies_64();
@@ -1501,6 +1508,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
pdd->runtime_inuse = false;
pdd->vram_usage = 0;
pdd->sdma_past_activity_counter = 0;
+ pdd->user_gpu_id = dev->id;
atomic64_set(&pdd->evict_duration_counter, 0);
p->pdds[p->n_pdds++] = pdd;
@@ -1771,7 +1779,7 @@ int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id)
int i;
for (i = 0; i < p->n_pdds; i++)
- if (p->pdds[i] && gpu_id == p->pdds[i]->dev->id)
+ if (p->pdds[i] && gpu_id == p->pdds[i]->user_gpu_id)
return i;
return -EINVAL;
}
@@ -1784,7 +1792,7 @@ kfd_process_gpuid_from_adev(struct kfd_process *p, struct amdgpu_device *adev,
for (i = 0; i < p->n_pdds; i++)
if (p->pdds[i] && p->pdds[i]->dev->adev == adev) {
- *gpuid = p->pdds[i]->dev->id;
+ *gpuid = p->pdds[i]->user_gpu_id;
*gpuidx = i;
return 0;
}
@@ -1956,6 +1964,37 @@ void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type)
}
}
+struct kfd_process_device *kfd_process_device_data_by_id(struct kfd_process *p, uint32_t gpu_id)
+{
+ int i;
+
+ if (gpu_id) {
+ for (i = 0; i < p->n_pdds; i++) {
+ struct kfd_process_device *pdd = p->pdds[i];
+
+ if (pdd->user_gpu_id == gpu_id)
+ return pdd;
+ }
+ }
+ return NULL;
+}
+
+int kfd_process_get_user_gpu_id(struct kfd_process *p, uint32_t actual_gpu_id)
+{
+ int i;
+
+ if (!actual_gpu_id)
+ return 0;
+
+ for (i = 0; i < p->n_pdds; i++) {
+ struct kfd_process_device *pdd = p->pdds[i];
+
+ if (pdd->dev->id == actual_gpu_id)
+ return pdd->user_gpu_id;
+ }
+ return -EINVAL;
+}
+
#if defined(CONFIG_DEBUG_FS)
int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 5e5c84a8e1ef..6eca9509f2e3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -42,6 +43,20 @@ static inline struct process_queue_node *get_queue_by_qid(
return NULL;
}
+static int assign_queue_slot_by_qid(struct process_queue_manager *pqm,
+ unsigned int qid)
+{
+ if (qid >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
+ return -EINVAL;
+
+ if (__test_and_set_bit(qid, pqm->queue_slot_bitmap)) {
+ pr_err("Cannot create new queue because requested qid(%u) is in use\n", qid);
+ return -ENOSPC;
+ }
+
+ return 0;
+}
+
static int find_available_queue_slot(struct process_queue_manager *pqm,
unsigned int *qid)
{
@@ -193,6 +208,9 @@ int pqm_create_queue(struct process_queue_manager *pqm,
struct file *f,
struct queue_properties *properties,
unsigned int *qid,
+ const struct kfd_criu_queue_priv_data *q_data,
+ const void *restore_mqd,
+ const void *restore_ctl_stack,
uint32_t *p_doorbell_offset_in_process)
{
int retval;
@@ -224,7 +242,12 @@ int pqm_create_queue(struct process_queue_manager *pqm,
if (pdd->qpd.queue_count >= max_queues)
return -ENOSPC;
- retval = find_available_queue_slot(pqm, qid);
+ if (q_data) {
+ retval = assign_queue_slot_by_qid(pqm, q_data->q_id);
+ *qid = q_data->q_id;
+ } else
+ retval = find_available_queue_slot(pqm, qid);
+
if (retval != 0)
return retval;
@@ -252,7 +275,8 @@ int pqm_create_queue(struct process_queue_manager *pqm,
goto err_create_queue;
pqn->q = q;
pqn->kq = NULL;
- retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd);
+ retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd, q_data,
+ restore_mqd, restore_ctl_stack);
print_queue(q);
break;
@@ -272,7 +296,8 @@ int pqm_create_queue(struct process_queue_manager *pqm,
goto err_create_queue;
pqn->q = q;
pqn->kq = NULL;
- retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd);
+ retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd, q_data,
+ restore_mqd, restore_ctl_stack);
print_queue(q);
break;
case KFD_QUEUE_TYPE_DIQ:
@@ -497,6 +522,348 @@ int pqm_get_wave_state(struct process_queue_manager *pqm,
save_area_used_size);
}
+static int get_queue_data_sizes(struct kfd_process_device *pdd,
+ struct queue *q,
+ uint32_t *mqd_size,
+ uint32_t *ctl_stack_size)
+{
+ int ret;
+
+ ret = pqm_get_queue_checkpoint_info(&pdd->process->pqm,
+ q->properties.queue_id,
+ mqd_size,
+ ctl_stack_size);
+ if (ret)
+ pr_err("Failed to get queue dump info (%d)\n", ret);
+
+ return ret;
+}
+
+int kfd_process_get_queue_info(struct kfd_process *p,
+ uint32_t *num_queues,
+ uint64_t *priv_data_sizes)
+{
+ uint32_t extra_data_sizes = 0;
+ struct queue *q;
+ int i;
+ int ret;
+
+ *num_queues = 0;
+
+ /* Run over all PDDs of the process */
+ for (i = 0; i < p->n_pdds; i++) {
+ struct kfd_process_device *pdd = p->pdds[i];
+
+ list_for_each_entry(q, &pdd->qpd.queues_list, list) {
+ if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
+ q->properties.type == KFD_QUEUE_TYPE_SDMA ||
+ q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
+ uint32_t mqd_size, ctl_stack_size;
+
+ *num_queues = *num_queues + 1;
+
+ ret = get_queue_data_sizes(pdd, q, &mqd_size, &ctl_stack_size);
+ if (ret)
+ return ret;
+
+ extra_data_sizes += mqd_size + ctl_stack_size;
+ } else {
+ pr_err("Unsupported queue type (%d)\n", q->properties.type);
+ return -EOPNOTSUPP;
+ }
+ }
+ }
+ *priv_data_sizes = extra_data_sizes +
+ (*num_queues * sizeof(struct kfd_criu_queue_priv_data));
+
+ return 0;
+}
+
+static int pqm_checkpoint_mqd(struct process_queue_manager *pqm,
+ unsigned int qid,
+ void *mqd,
+ void *ctl_stack)
+{
+ struct process_queue_node *pqn;
+
+ pqn = get_queue_by_qid(pqm, qid);
+ if (!pqn) {
+ pr_debug("amdkfd: No queue %d exists for operation\n", qid);
+ return -EFAULT;
+ }
+
+ if (!pqn->q->device->dqm->ops.checkpoint_mqd) {
+ pr_err("amdkfd: queue dumping not supported on this device\n");
+ return -EOPNOTSUPP;
+ }
+
+ return pqn->q->device->dqm->ops.checkpoint_mqd(pqn->q->device->dqm,
+ pqn->q, mqd, ctl_stack);
+}
+
+static int criu_checkpoint_queue(struct kfd_process_device *pdd,
+ struct queue *q,
+ struct kfd_criu_queue_priv_data *q_data)
+{
+ uint8_t *mqd, *ctl_stack;
+ int ret;
+
+ mqd = (void *)(q_data + 1);
+ ctl_stack = mqd + q_data->mqd_size;
+
+ q_data->gpu_id = pdd->user_gpu_id;
+ q_data->type = q->properties.type;
+ q_data->format = q->properties.format;
+ q_data->q_id = q->properties.queue_id;
+ q_data->q_address = q->properties.queue_address;
+ q_data->q_size = q->properties.queue_size;
+ q_data->priority = q->properties.priority;
+ q_data->q_percent = q->properties.queue_percent;
+ q_data->read_ptr_addr = (uint64_t)q->properties.read_ptr;
+ q_data->write_ptr_addr = (uint64_t)q->properties.write_ptr;
+ q_data->doorbell_id = q->doorbell_id;
+
+ q_data->sdma_id = q->sdma_id;
+
+ q_data->eop_ring_buffer_address =
+ q->properties.eop_ring_buffer_address;
+
+ q_data->eop_ring_buffer_size = q->properties.eop_ring_buffer_size;
+
+ q_data->ctx_save_restore_area_address =
+ q->properties.ctx_save_restore_area_address;
+
+ q_data->ctx_save_restore_area_size =
+ q->properties.ctx_save_restore_area_size;
+
+ ret = pqm_checkpoint_mqd(&pdd->process->pqm, q->properties.queue_id, mqd, ctl_stack);
+ if (ret) {
+ pr_err("Failed checkpoint queue_mqd (%d)\n", ret);
+ return ret;
+ }
+
+ pr_debug("Dumping Queue: gpu_id:%x queue_id:%u\n", q_data->gpu_id, q_data->q_id);
+ return ret;
+}
+
+static int criu_checkpoint_queues_device(struct kfd_process_device *pdd,
+ uint8_t __user *user_priv,
+ unsigned int *q_index,
+ uint64_t *queues_priv_data_offset)
+{
+ unsigned int q_private_data_size = 0;
+ uint8_t *q_private_data = NULL; /* Local buffer to store individual queue private data */
+ struct queue *q;
+ int ret = 0;
+
+ list_for_each_entry(q, &pdd->qpd.queues_list, list) {
+ struct kfd_criu_queue_priv_data *q_data;
+ uint64_t q_data_size;
+ uint32_t mqd_size;
+ uint32_t ctl_stack_size;
+
+ if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE &&
+ q->properties.type != KFD_QUEUE_TYPE_SDMA &&
+ q->properties.type != KFD_QUEUE_TYPE_SDMA_XGMI) {
+
+ pr_err("Unsupported queue type (%d)\n", q->properties.type);
+ ret = -EOPNOTSUPP;
+ break;
+ }
+
+ ret = get_queue_data_sizes(pdd, q, &mqd_size, &ctl_stack_size);
+ if (ret)
+ break;
+
+ q_data_size = sizeof(*q_data) + mqd_size + ctl_stack_size;
+
+ /* Increase local buffer space if needed */
+ if (q_private_data_size < q_data_size) {
+ kfree(q_private_data);
+
+ q_private_data = kzalloc(q_data_size, GFP_KERNEL);
+ if (!q_private_data) {
+ ret = -ENOMEM;
+ break;
+ }
+ q_private_data_size = q_data_size;
+ }
+
+ q_data = (struct kfd_criu_queue_priv_data *)q_private_data;
+
+ /* data stored in this order: priv_data, mqd, ctl_stack */
+ q_data->mqd_size = mqd_size;
+ q_data->ctl_stack_size = ctl_stack_size;
+
+ ret = criu_checkpoint_queue(pdd, q, q_data);
+ if (ret)
+ break;
+
+ q_data->object_type = KFD_CRIU_OBJECT_TYPE_QUEUE;
+
+ ret = copy_to_user(user_priv + *queues_priv_data_offset,
+ q_data, q_data_size);
+ if (ret) {
+ ret = -EFAULT;
+ break;
+ }
+ *queues_priv_data_offset += q_data_size;
+ *q_index = *q_index + 1;
+ }
+
+ kfree(q_private_data);
+
+ return ret;
+}
+
+int kfd_criu_checkpoint_queues(struct kfd_process *p,
+ uint8_t __user *user_priv_data,
+ uint64_t *priv_data_offset)
+{
+ int ret = 0, pdd_index, q_index = 0;
+
+ for (pdd_index = 0; pdd_index < p->n_pdds; pdd_index++) {
+ struct kfd_process_device *pdd = p->pdds[pdd_index];
+
+ /*
+ * criu_checkpoint_queues_device will copy data to user and update q_index and
+ * queues_priv_data_offset
+ */
+ ret = criu_checkpoint_queues_device(pdd, user_priv_data, &q_index,
+ priv_data_offset);
+
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
+static void set_queue_properties_from_criu(struct queue_properties *qp,
+ struct kfd_criu_queue_priv_data *q_data)
+{
+ qp->is_interop = false;
+ qp->is_gws = q_data->is_gws;
+ qp->queue_percent = q_data->q_percent;
+ qp->priority = q_data->priority;
+ qp->queue_address = q_data->q_address;
+ qp->queue_size = q_data->q_size;
+ qp->read_ptr = (uint32_t *) q_data->read_ptr_addr;
+ qp->write_ptr = (uint32_t *) q_data->write_ptr_addr;
+ qp->eop_ring_buffer_address = q_data->eop_ring_buffer_address;
+ qp->eop_ring_buffer_size = q_data->eop_ring_buffer_size;
+ qp->ctx_save_restore_area_address = q_data->ctx_save_restore_area_address;
+ qp->ctx_save_restore_area_size = q_data->ctx_save_restore_area_size;
+ qp->ctl_stack_size = q_data->ctl_stack_size;
+ qp->type = q_data->type;
+ qp->format = q_data->format;
+}
+
+int kfd_criu_restore_queue(struct kfd_process *p,
+ uint8_t __user *user_priv_ptr,
+ uint64_t *priv_data_offset,
+ uint64_t max_priv_data_size)
+{
+ uint8_t *mqd, *ctl_stack, *q_extra_data = NULL;
+ struct kfd_criu_queue_priv_data *q_data;
+ struct kfd_process_device *pdd;
+ uint64_t q_extra_data_size;
+ struct queue_properties qp;
+ unsigned int queue_id;
+ int ret = 0;
+
+ if (*priv_data_offset + sizeof(*q_data) > max_priv_data_size)
+ return -EINVAL;
+
+ q_data = kmalloc(sizeof(*q_data), GFP_KERNEL);
+ if (!q_data)
+ return -ENOMEM;
+
+ ret = copy_from_user(q_data, user_priv_ptr + *priv_data_offset, sizeof(*q_data));
+ if (ret) {
+ ret = -EFAULT;
+ goto exit;
+ }
+
+ *priv_data_offset += sizeof(*q_data);
+ q_extra_data_size = (uint64_t)q_data->ctl_stack_size + q_data->mqd_size;
+
+ if (*priv_data_offset + q_extra_data_size > max_priv_data_size) {
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ q_extra_data = kmalloc(q_extra_data_size, GFP_KERNEL);
+ if (!q_extra_data) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+ ret = copy_from_user(q_extra_data, user_priv_ptr + *priv_data_offset, q_extra_data_size);
+ if (ret) {
+ ret = -EFAULT;
+ goto exit;
+ }
+
+ *priv_data_offset += q_extra_data_size;
+
+ pdd = kfd_process_device_data_by_id(p, q_data->gpu_id);
+ if (!pdd) {
+ pr_err("Failed to get pdd\n");
+ ret = -EINVAL;
+ goto exit;
+ }
+ /* data stored in this order: mqd, ctl_stack */
+ mqd = q_extra_data;
+ ctl_stack = mqd + q_data->mqd_size;
+
+ memset(&qp, 0, sizeof(qp));
+ set_queue_properties_from_criu(&qp, q_data);
+
+ print_queue_properties(&qp);
+
+ ret = pqm_create_queue(&p->pqm, pdd->dev, NULL, &qp, &queue_id, q_data, mqd, ctl_stack,
+ NULL);
+ if (ret) {
+ pr_err("Failed to create new queue err:%d\n", ret);
+ ret = -EINVAL;
+ }
+
+exit:
+ if (ret)
+ pr_err("Failed to create queue (%d)\n", ret);
+ else
+ pr_debug("Queue id %d was restored successfully\n", queue_id);
+
+ kfree(q_data);
+
+ return ret;
+}
+
+int pqm_get_queue_checkpoint_info(struct process_queue_manager *pqm,
+ unsigned int qid,
+ uint32_t *mqd_size,
+ uint32_t *ctl_stack_size)
+{
+ struct process_queue_node *pqn;
+
+ pqn = get_queue_by_qid(pqm, qid);
+ if (!pqn) {
+ pr_debug("amdkfd: No queue %d exists for operation\n", qid);
+ return -EFAULT;
+ }
+
+ if (!pqn->q->device->dqm->ops.get_queue_checkpoint_info) {
+ pr_err("amdkfd: queue dumping not supported on this device\n");
+ return -EOPNOTSUPP;
+ }
+
+ pqn->q->device->dqm->ops.get_queue_checkpoint_info(pqn->q->device->dqm,
+ pqn->q, mqd_size,
+ ctl_stack_size);
+ return 0;
+}
+
#if defined(CONFIG_DEBUG_FS)
int pqm_debugfs_mqds(struct seq_file *m, void *data)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
index 6dcd621e5b71..0f6992b1895c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
index deae12dc777d..e4beebb1c80a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2020 Advanced Micro Devices, Inc.
+ * Copyright 2020-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -81,7 +82,8 @@ static ssize_t kfd_smi_ev_read(struct file *filep, char __user *user,
struct kfd_smi_client *client = filep->private_data;
unsigned char *buf;
- buf = kmalloc_array(MAX_KFIFO_SIZE, sizeof(*buf), GFP_KERNEL);
+ size = min_t(size_t, size, MAX_KFIFO_SIZE);
+ buf = kmalloc(size, GFP_KERNEL);
if (!buf)
return -ENOMEM;
@@ -95,7 +97,7 @@ static ssize_t kfd_smi_ev_read(struct file *filep, char __user *user,
ret = -EAGAIN;
goto ret_err;
}
- to_copy = min3(size, sizeof(buf), to_copy);
+ to_copy = min(size, to_copy);
ret = kfifo_out(&client->fifo, buf, to_copy);
spin_unlock(&client->lock);
if (ret <= 0) {
@@ -174,22 +176,29 @@ static void add_event_to_kfifo(struct kfd_dev *dev, unsigned int smi_event,
rcu_read_unlock();
}
-void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset)
+__printf(3, 4)
+static void kfd_smi_event_add(struct kfd_dev *dev, unsigned int event,
+ char *fmt, ...)
{
- /*
- * GpuReset msg = Reset seq number (incremented for
- * every reset message sent before GPU reset).
- * 1 byte event + 1 byte space + 8 bytes seq num +
- * 1 byte \n + 1 byte \0 = 12
- */
- char fifo_in[12];
+ char fifo_in[KFD_SMI_EVENT_MSG_SIZE];
int len;
- unsigned int event;
+ va_list args;
if (list_empty(&dev->smi_clients))
return;
- memset(fifo_in, 0x0, sizeof(fifo_in));
+ len = snprintf(fifo_in, sizeof(fifo_in), "%x ", event);
+
+ va_start(args, fmt);
+ len += vsnprintf(fifo_in + len, sizeof(fifo_in) - len, fmt, args);
+ va_end(args);
+
+ add_event_to_kfifo(dev, event, fifo_in, len);
+}
+
+void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset)
+{
+ unsigned int event;
if (post_reset) {
event = KFD_SMI_EVENT_GPU_POST_RESET;
@@ -197,48 +206,20 @@ void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset)
event = KFD_SMI_EVENT_GPU_PRE_RESET;
++(dev->reset_seq_num);
}
-
- len = snprintf(fifo_in, sizeof(fifo_in), "%x %x\n", event,
- dev->reset_seq_num);
-
- add_event_to_kfifo(dev, event, fifo_in, len);
+ kfd_smi_event_add(dev, event, "%x\n", dev->reset_seq_num);
}
void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
uint64_t throttle_bitmask)
{
- /*
- * ThermalThrottle msg = throttle_bitmask(8):
- * thermal_interrupt_count(16):
- * 1 byte event + 1 byte space + 16 byte throttle_bitmask +
- * 1 byte : + 16 byte thermal_interupt_counter + 1 byte \n +
- * 1 byte \0 = 37
- */
- char fifo_in[37];
- int len;
-
- if (list_empty(&dev->smi_clients))
- return;
-
- len = snprintf(fifo_in, sizeof(fifo_in), "%x %llx:%llx\n",
- KFD_SMI_EVENT_THERMAL_THROTTLE, throttle_bitmask,
- atomic64_read(&dev->adev->smu.throttle_int_counter));
-
- add_event_to_kfifo(dev, KFD_SMI_EVENT_THERMAL_THROTTLE, fifo_in, len);
+ kfd_smi_event_add(dev, KFD_SMI_EVENT_THERMAL_THROTTLE, "%llx:%llx\n",
+ throttle_bitmask,
+ amdgpu_dpm_get_thermal_throttling_counter(dev->adev));
}
void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid)
{
struct amdgpu_task_info task_info;
- /* VmFault msg = (hex)uint32_pid(8) + :(1) + task name(16) = 25 */
- /* 1 byte event + 1 byte space + 25 bytes msg + 1 byte \n +
- * 1 byte \0 = 29
- */
- char fifo_in[29];
- int len;
-
- if (list_empty(&dev->smi_clients))
- return;
memset(&task_info, 0, sizeof(struct amdgpu_task_info));
amdgpu_vm_get_task_info(dev->adev, pasid, &task_info);
@@ -246,10 +227,8 @@ void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid)
if (!task_info.pid)
return;
- len = snprintf(fifo_in, sizeof(fifo_in), "%x %x:%s\n", KFD_SMI_EVENT_VMFAULT,
- task_info.pid, task_info.task_name);
-
- add_event_to_kfifo(dev, KFD_SMI_EVENT_VMFAULT, fifo_in, len);
+ kfd_smi_event_add(dev, KFD_SMI_EVENT_VMFAULT, "%x:%s\n",
+ task_info.pid, task_info.task_name);
}
int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
index bffd0c32b060..dfe101c21166 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
@@ -1,5 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
- * Copyright 2020 Advanced Micro Devices, Inc.
+ * Copyright 2020-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index f2805ba74c80..3b8856b4cece 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -45,6 +45,11 @@
*/
#define AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING 2000
+struct criu_svm_metadata {
+ struct list_head list;
+ struct kfd_criu_svm_range_priv_data data;
+};
+
static void svm_range_evict_svm_bo_worker(struct work_struct *work);
static bool
svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
@@ -1224,19 +1229,20 @@ svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start,
if (r)
break;
}
- amdgpu_amdkfd_flush_gpu_tlb_pasid(pdd->dev->adev,
- p->pasid, TLB_FLUSH_HEAVYWEIGHT);
+ kfd_flush_tlb(pdd, TLB_FLUSH_HEAVYWEIGHT);
}
return r;
}
static int
-svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
- struct svm_range *prange, unsigned long offset,
- unsigned long npages, bool readonly, dma_addr_t *dma_addr,
- struct amdgpu_device *bo_adev, struct dma_fence **fence)
+svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange,
+ unsigned long offset, unsigned long npages, bool readonly,
+ dma_addr_t *dma_addr, struct amdgpu_device *bo_adev,
+ struct dma_fence **fence)
{
+ struct amdgpu_device *adev = pdd->dev->adev;
+ struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);
bool table_freed = false;
uint64_t pte_flags;
unsigned long last_start;
@@ -1300,12 +1306,8 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
if (fence)
*fence = dma_fence_get(vm->last_update);
- if (table_freed) {
- struct kfd_process *p;
-
- p = container_of(prange->svms, struct kfd_process, svms);
- amdgpu_amdkfd_flush_gpu_tlb_pasid(adev, p->pasid, TLB_FLUSH_LEGACY);
- }
+ if (table_freed)
+ kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY);
out:
return r;
}
@@ -1346,8 +1348,7 @@ svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset,
continue;
}
- r = svm_range_map_to_gpu(pdd->dev->adev, drm_priv_to_vm(pdd->drm_priv),
- prange, offset, npages, readonly,
+ r = svm_range_map_to_gpu(pdd, prange, offset, npages, readonly,
prange->dma_addr[gpuidx],
bo_adev, wait ? &fence : NULL);
if (r)
@@ -1628,6 +1629,7 @@ retry_flush_work:
static void svm_range_restore_work(struct work_struct *work)
{
struct delayed_work *dwork = to_delayed_work(work);
+ struct amdkfd_process_info *process_info;
struct svm_range_list *svms;
struct svm_range *prange;
struct kfd_process *p;
@@ -1643,14 +1645,17 @@ static void svm_range_restore_work(struct work_struct *work)
pr_debug("restore svm ranges\n");
- /* kfd_process_notifier_release destroys this worker thread. So during
- * the lifetime of this thread, kfd_process and mm will be valid.
- */
p = container_of(svms, struct kfd_process, svms);
- mm = p->mm;
- if (!mm)
+ process_info = p->kgd_process_info;
+
+ /* Keep mm reference when svm_range_validate_and_map ranges */
+ mm = get_task_mm(p->lead_thread);
+ if (!mm) {
+ pr_debug("svms 0x%p process mm gone\n", svms);
return;
+ }
+ mutex_lock(&process_info->lock);
svm_range_list_lock_and_flush_work(svms, mm);
mutex_lock(&svms->lock);
@@ -1703,6 +1708,8 @@ static void svm_range_restore_work(struct work_struct *work)
out_reschedule:
mutex_unlock(&svms->lock);
mmap_write_unlock(mm);
+ mutex_unlock(&process_info->lock);
+ mmput(mm);
/* If validation failed, reschedule another attempt */
if (evicted_ranges) {
@@ -1985,10 +1992,9 @@ svm_range_update_notifier_and_interval_tree(struct mm_struct *mm,
}
static void
-svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange)
+svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange,
+ struct mm_struct *mm)
{
- struct mm_struct *mm = prange->work_item.mm;
-
switch (prange->work_item.op) {
case SVM_OP_NULL:
pr_debug("NULL OP 0x%p prange 0x%p [0x%lx 0x%lx]\n",
@@ -2065,40 +2071,44 @@ static void svm_range_deferred_list_work(struct work_struct *work)
struct svm_range_list *svms;
struct svm_range *prange;
struct mm_struct *mm;
- struct kfd_process *p;
svms = container_of(work, struct svm_range_list, deferred_list_work);
pr_debug("enter svms 0x%p\n", svms);
- p = container_of(svms, struct kfd_process, svms);
- /* Avoid mm is gone when inserting mmu notifier */
- mm = get_task_mm(p->lead_thread);
- if (!mm) {
- pr_debug("svms 0x%p process mm gone\n", svms);
- return;
- }
-retry:
- mmap_write_lock(mm);
-
- /* Checking for the need to drain retry faults must be inside
- * mmap write lock to serialize with munmap notifiers.
- */
- if (unlikely(atomic_read(&svms->drain_pagefaults))) {
- mmap_write_unlock(mm);
- svm_range_drain_retry_fault(svms);
- goto retry;
- }
-
spin_lock(&svms->deferred_list_lock);
while (!list_empty(&svms->deferred_range_list)) {
prange = list_first_entry(&svms->deferred_range_list,
struct svm_range, deferred_list);
- list_del_init(&prange->deferred_list);
spin_unlock(&svms->deferred_list_lock);
pr_debug("prange 0x%p [0x%lx 0x%lx] op %d\n", prange,
prange->start, prange->last, prange->work_item.op);
+ mm = prange->work_item.mm;
+retry:
+ mmap_write_lock(mm);
+
+ /* Checking for the need to drain retry faults must be inside
+ * mmap write lock to serialize with munmap notifiers.
+ */
+ if (unlikely(atomic_read(&svms->drain_pagefaults))) {
+ mmap_write_unlock(mm);
+ svm_range_drain_retry_fault(svms);
+ goto retry;
+ }
+
+ /* Remove from deferred_list must be inside mmap write lock, for
+ * two race cases:
+ * 1. unmap_from_cpu may change work_item.op and add the range
+ * to deferred_list again, cause use after free bug.
+ * 2. svm_range_list_lock_and_flush_work may hold mmap write
+ * lock and continue because deferred_list is empty, but
+ * deferred_list work is actually waiting for mmap lock.
+ */
+ spin_lock(&svms->deferred_list_lock);
+ list_del_init(&prange->deferred_list);
+ spin_unlock(&svms->deferred_list_lock);
+
mutex_lock(&svms->lock);
mutex_lock(&prange->migrate_mutex);
while (!list_empty(&prange->child_list)) {
@@ -2109,19 +2119,20 @@ retry:
pr_debug("child prange 0x%p op %d\n", pchild,
pchild->work_item.op);
list_del_init(&pchild->child_list);
- svm_range_handle_list_op(svms, pchild);
+ svm_range_handle_list_op(svms, pchild, mm);
}
mutex_unlock(&prange->migrate_mutex);
- svm_range_handle_list_op(svms, prange);
+ svm_range_handle_list_op(svms, prange, mm);
mutex_unlock(&svms->lock);
+ mmap_write_unlock(mm);
+
+ /* Pairs with mmget in svm_range_add_list_work */
+ mmput(mm);
spin_lock(&svms->deferred_list_lock);
}
spin_unlock(&svms->deferred_list_lock);
-
- mmap_write_unlock(mm);
- mmput(mm);
pr_debug("exit svms 0x%p\n", svms);
}
@@ -2139,6 +2150,9 @@ svm_range_add_list_work(struct svm_range_list *svms, struct svm_range *prange,
prange->work_item.op = op;
} else {
prange->work_item.op = op;
+
+ /* Pairs with mmput in deferred_list_work */
+ mmget(mm);
prange->work_item.mm = mm;
list_add_tail(&prange->deferred_list,
&prange->svms->deferred_range_list);
@@ -2830,6 +2844,8 @@ void svm_range_list_fini(struct kfd_process *p)
pr_debug("pasid 0x%x svms 0x%p\n", p->pasid, &p->svms);
+ cancel_delayed_work_sync(&p->svms.restore_work);
+
/* Ensure list work is finished before process is destroyed */
flush_work(&p->svms.deferred_list_work);
@@ -2840,7 +2856,6 @@ void svm_range_list_fini(struct kfd_process *p)
atomic_inc(&p->svms.drain_pagefaults);
svm_range_drain_retry_fault(&p->svms);
-
list_for_each_entry_safe(prange, next, &p->svms.list, list) {
svm_range_unlink(prange);
svm_range_remove_notifier(prange);
@@ -2865,6 +2880,7 @@ int svm_range_list_init(struct kfd_process *p)
INIT_DELAYED_WORK(&svms->restore_work, svm_range_restore_work);
INIT_WORK(&svms->deferred_list_work, svm_range_deferred_list_work);
INIT_LIST_HEAD(&svms->deferred_range_list);
+ INIT_LIST_HEAD(&svms->criu_svm_metadata_list);
spin_lock_init(&svms->deferred_list_lock);
for (i = 0; i < p->n_pdds; i++)
@@ -3139,6 +3155,7 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
struct svm_range_bo *svm_bo;
struct kfd_process *p;
struct mm_struct *mm;
+ int r = 0;
svm_bo = container_of(work, struct svm_range_bo, eviction_work);
if (!svm_bo_ref_unless_zero(svm_bo))
@@ -3154,7 +3171,7 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
mmap_read_lock(mm);
spin_lock(&svm_bo->list_lock);
- while (!list_empty(&svm_bo->range_list)) {
+ while (!list_empty(&svm_bo->range_list) && !r) {
struct svm_range *prange =
list_first_entry(&svm_bo->range_list,
struct svm_range, svm_bo_list);
@@ -3168,15 +3185,18 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
mutex_lock(&prange->migrate_mutex);
do {
- svm_migrate_vram_to_ram(prange,
+ r = svm_migrate_vram_to_ram(prange,
svm_bo->eviction_fence->mm);
- } while (prange->actual_loc && --retries);
- WARN(prange->actual_loc, "Migration failed during eviction");
+ } while (!r && prange->actual_loc && --retries);
- mutex_lock(&prange->lock);
- prange->svm_bo = NULL;
- mutex_unlock(&prange->lock);
+ if (!r && prange->actual_loc)
+ pr_info_once("Migration failed during eviction");
+ if (!prange->actual_loc) {
+ mutex_lock(&prange->lock);
+ prange->svm_bo = NULL;
+ mutex_unlock(&prange->lock);
+ }
mutex_unlock(&prange->migrate_mutex);
spin_lock(&svm_bo->list_lock);
@@ -3185,18 +3205,20 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
mmap_read_unlock(mm);
dma_fence_signal(&svm_bo->eviction_fence->base);
+
/* This is the last reference to svm_bo, after svm_range_vram_node_free
* has been called in svm_migrate_vram_to_ram
*/
- WARN_ONCE(kref_read(&svm_bo->kref) != 1, "This was not the last reference\n");
+ WARN_ONCE(!r && kref_read(&svm_bo->kref) != 1, "This was not the last reference\n");
svm_range_bo_unref(svm_bo);
}
static int
-svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size,
- uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs)
+svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
+ uint64_t start, uint64_t size, uint32_t nattr,
+ struct kfd_ioctl_svm_attribute *attrs)
{
- struct mm_struct *mm = current->mm;
+ struct amdkfd_process_info *process_info = p->kgd_process_info;
struct list_head update_list;
struct list_head insert_list;
struct list_head remove_list;
@@ -3214,6 +3236,8 @@ svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size,
svms = &p->svms;
+ mutex_lock(&process_info->lock);
+
svm_range_list_lock_and_flush_work(svms, mm);
r = svm_range_is_valid(p, start, size);
@@ -3288,6 +3312,8 @@ out_unlock_range:
mutex_unlock(&svms->lock);
mmap_read_unlock(mm);
out:
+ mutex_unlock(&process_info->lock);
+
pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] done, r=%d\n", p->pasid,
&p->svms, start, start + size - 1, r);
@@ -3295,8 +3321,9 @@ out:
}
static int
-svm_range_get_attr(struct kfd_process *p, uint64_t start, uint64_t size,
- uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs)
+svm_range_get_attr(struct kfd_process *p, struct mm_struct *mm,
+ uint64_t start, uint64_t size, uint32_t nattr,
+ struct kfd_ioctl_svm_attribute *attrs)
{
DECLARE_BITMAP(bitmap_access, MAX_GPU_INSTANCE);
DECLARE_BITMAP(bitmap_aip, MAX_GPU_INSTANCE);
@@ -3306,7 +3333,6 @@ svm_range_get_attr(struct kfd_process *p, uint64_t start, uint64_t size,
bool get_accessible = false;
bool get_flags = false;
uint64_t last = start + size - 1UL;
- struct mm_struct *mm = current->mm;
uint8_t granularity = 0xff;
struct interval_tree_node *node;
struct svm_range_list *svms;
@@ -3471,10 +3497,321 @@ fill_values:
return 0;
}
+int kfd_criu_resume_svm(struct kfd_process *p)
+{
+ struct kfd_ioctl_svm_attribute *set_attr_new, *set_attr = NULL;
+ int nattr_common = 4, nattr_accessibility = 1;
+ struct criu_svm_metadata *criu_svm_md = NULL;
+ struct svm_range_list *svms = &p->svms;
+ struct criu_svm_metadata *next = NULL;
+ uint32_t set_flags = 0xffffffff;
+ int i, j, num_attrs, ret = 0;
+ uint64_t set_attr_size;
+ struct mm_struct *mm;
+
+ if (list_empty(&svms->criu_svm_metadata_list)) {
+ pr_debug("No SVM data from CRIU restore stage 2\n");
+ return ret;
+ }
+
+ mm = get_task_mm(p->lead_thread);
+ if (!mm) {
+ pr_err("failed to get mm for the target process\n");
+ return -ESRCH;
+ }
+
+ num_attrs = nattr_common + (nattr_accessibility * p->n_pdds);
+
+ i = j = 0;
+ list_for_each_entry(criu_svm_md, &svms->criu_svm_metadata_list, list) {
+ pr_debug("criu_svm_md[%d]\n\tstart: 0x%llx size: 0x%llx (npages)\n",
+ i, criu_svm_md->data.start_addr, criu_svm_md->data.size);
+
+ for (j = 0; j < num_attrs; j++) {
+ pr_debug("\ncriu_svm_md[%d]->attrs[%d].type : 0x%x\ncriu_svm_md[%d]->attrs[%d].value : 0x%x\n",
+ i, j, criu_svm_md->data.attrs[j].type,
+ i, j, criu_svm_md->data.attrs[j].value);
+ switch (criu_svm_md->data.attrs[j].type) {
+ /* During Checkpoint operation, the query for
+ * KFD_IOCTL_SVM_ATTR_PREFETCH_LOC attribute might
+ * return KFD_IOCTL_SVM_LOCATION_UNDEFINED if they were
+ * not used by the range which was checkpointed. Care
+ * must be taken to not restore with an invalid value
+ * otherwise the gpuidx value will be invalid and
+ * set_attr would eventually fail so just replace those
+ * with another dummy attribute such as
+ * KFD_IOCTL_SVM_ATTR_SET_FLAGS.
+ */
+ case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC:
+ if (criu_svm_md->data.attrs[j].value ==
+ KFD_IOCTL_SVM_LOCATION_UNDEFINED) {
+ criu_svm_md->data.attrs[j].type =
+ KFD_IOCTL_SVM_ATTR_SET_FLAGS;
+ criu_svm_md->data.attrs[j].value = 0;
+ }
+ break;
+ case KFD_IOCTL_SVM_ATTR_SET_FLAGS:
+ set_flags = criu_svm_md->data.attrs[j].value;
+ break;
+ default:
+ break;
+ }
+ }
+
+ /* CLR_FLAGS is not available via get_attr during checkpoint but
+ * it needs to be inserted before restoring the ranges so
+ * allocate extra space for it before calling set_attr
+ */
+ set_attr_size = sizeof(struct kfd_ioctl_svm_attribute) *
+ (num_attrs + 1);
+ set_attr_new = krealloc(set_attr, set_attr_size,
+ GFP_KERNEL);
+ if (!set_attr_new) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+ set_attr = set_attr_new;
+
+ memcpy(set_attr, criu_svm_md->data.attrs, num_attrs *
+ sizeof(struct kfd_ioctl_svm_attribute));
+ set_attr[num_attrs].type = KFD_IOCTL_SVM_ATTR_CLR_FLAGS;
+ set_attr[num_attrs].value = ~set_flags;
+
+ ret = svm_range_set_attr(p, mm, criu_svm_md->data.start_addr,
+ criu_svm_md->data.size, num_attrs + 1,
+ set_attr);
+ if (ret) {
+ pr_err("CRIU: failed to set range attributes\n");
+ goto exit;
+ }
+
+ i++;
+ }
+exit:
+ kfree(set_attr);
+ list_for_each_entry_safe(criu_svm_md, next, &svms->criu_svm_metadata_list, list) {
+ pr_debug("freeing criu_svm_md[]\n\tstart: 0x%llx\n",
+ criu_svm_md->data.start_addr);
+ kfree(criu_svm_md);
+ }
+
+ mmput(mm);
+ return ret;
+
+}
+
+int kfd_criu_restore_svm(struct kfd_process *p,
+ uint8_t __user *user_priv_ptr,
+ uint64_t *priv_data_offset,
+ uint64_t max_priv_data_size)
+{
+ uint64_t svm_priv_data_size, svm_object_md_size, svm_attrs_size;
+ int nattr_common = 4, nattr_accessibility = 1;
+ struct criu_svm_metadata *criu_svm_md = NULL;
+ struct svm_range_list *svms = &p->svms;
+ uint32_t num_devices;
+ int ret = 0;
+
+ num_devices = p->n_pdds;
+ /* Handle one SVM range object at a time, also the number of gpus are
+ * assumed to be same on the restore node, checking must be done while
+ * evaluating the topology earlier
+ */
+
+ svm_attrs_size = sizeof(struct kfd_ioctl_svm_attribute) *
+ (nattr_common + nattr_accessibility * num_devices);
+ svm_object_md_size = sizeof(struct criu_svm_metadata) + svm_attrs_size;
+
+ svm_priv_data_size = sizeof(struct kfd_criu_svm_range_priv_data) +
+ svm_attrs_size;
+
+ criu_svm_md = kzalloc(svm_object_md_size, GFP_KERNEL);
+ if (!criu_svm_md) {
+ pr_err("failed to allocate memory to store svm metadata\n");
+ return -ENOMEM;
+ }
+ if (*priv_data_offset + svm_priv_data_size > max_priv_data_size) {
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ ret = copy_from_user(&criu_svm_md->data, user_priv_ptr + *priv_data_offset,
+ svm_priv_data_size);
+ if (ret) {
+ ret = -EFAULT;
+ goto exit;
+ }
+ *priv_data_offset += svm_priv_data_size;
+
+ list_add_tail(&criu_svm_md->list, &svms->criu_svm_metadata_list);
+
+ return 0;
+
+
+exit:
+ kfree(criu_svm_md);
+ return ret;
+}
+
+int svm_range_get_info(struct kfd_process *p, uint32_t *num_svm_ranges,
+ uint64_t *svm_priv_data_size)
+{
+ uint64_t total_size, accessibility_size, common_attr_size;
+ int nattr_common = 4, nattr_accessibility = 1;
+ int num_devices = p->n_pdds;
+ struct svm_range_list *svms;
+ struct svm_range *prange;
+ uint32_t count = 0;
+
+ *svm_priv_data_size = 0;
+
+ svms = &p->svms;
+ if (!svms)
+ return -EINVAL;
+
+ mutex_lock(&svms->lock);
+ list_for_each_entry(prange, &svms->list, list) {
+ pr_debug("prange: 0x%p start: 0x%lx\t npages: 0x%llx\t end: 0x%llx\n",
+ prange, prange->start, prange->npages,
+ prange->start + prange->npages - 1);
+ count++;
+ }
+ mutex_unlock(&svms->lock);
+
+ *num_svm_ranges = count;
+ /* Only the accessbility attributes need to be queried for all the gpus
+ * individually, remaining ones are spanned across the entire process
+ * regardless of the various gpu nodes. Of the remaining attributes,
+ * KFD_IOCTL_SVM_ATTR_CLR_FLAGS need not be saved.
+ *
+ * KFD_IOCTL_SVM_ATTR_PREFERRED_LOC
+ * KFD_IOCTL_SVM_ATTR_PREFETCH_LOC
+ * KFD_IOCTL_SVM_ATTR_SET_FLAGS
+ * KFD_IOCTL_SVM_ATTR_GRANULARITY
+ *
+ * ** ACCESSBILITY ATTRIBUTES **
+ * (Considered as one, type is altered during query, value is gpuid)
+ * KFD_IOCTL_SVM_ATTR_ACCESS
+ * KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE
+ * KFD_IOCTL_SVM_ATTR_NO_ACCESS
+ */
+ if (*num_svm_ranges > 0) {
+ common_attr_size = sizeof(struct kfd_ioctl_svm_attribute) *
+ nattr_common;
+ accessibility_size = sizeof(struct kfd_ioctl_svm_attribute) *
+ nattr_accessibility * num_devices;
+
+ total_size = sizeof(struct kfd_criu_svm_range_priv_data) +
+ common_attr_size + accessibility_size;
+
+ *svm_priv_data_size = *num_svm_ranges * total_size;
+ }
+
+ pr_debug("num_svm_ranges %u total_priv_size %llu\n", *num_svm_ranges,
+ *svm_priv_data_size);
+ return 0;
+}
+
+int kfd_criu_checkpoint_svm(struct kfd_process *p,
+ uint8_t __user *user_priv_data,
+ uint64_t *priv_data_offset)
+{
+ struct kfd_criu_svm_range_priv_data *svm_priv = NULL;
+ struct kfd_ioctl_svm_attribute *query_attr = NULL;
+ uint64_t svm_priv_data_size, query_attr_size = 0;
+ int index, nattr_common = 4, ret = 0;
+ struct svm_range_list *svms;
+ int num_devices = p->n_pdds;
+ struct svm_range *prange;
+ struct mm_struct *mm;
+
+ svms = &p->svms;
+ if (!svms)
+ return -EINVAL;
+
+ mm = get_task_mm(p->lead_thread);
+ if (!mm) {
+ pr_err("failed to get mm for the target process\n");
+ return -ESRCH;
+ }
+
+ query_attr_size = sizeof(struct kfd_ioctl_svm_attribute) *
+ (nattr_common + num_devices);
+
+ query_attr = kzalloc(query_attr_size, GFP_KERNEL);
+ if (!query_attr) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+ query_attr[0].type = KFD_IOCTL_SVM_ATTR_PREFERRED_LOC;
+ query_attr[1].type = KFD_IOCTL_SVM_ATTR_PREFETCH_LOC;
+ query_attr[2].type = KFD_IOCTL_SVM_ATTR_SET_FLAGS;
+ query_attr[3].type = KFD_IOCTL_SVM_ATTR_GRANULARITY;
+
+ for (index = 0; index < num_devices; index++) {
+ struct kfd_process_device *pdd = p->pdds[index];
+
+ query_attr[index + nattr_common].type =
+ KFD_IOCTL_SVM_ATTR_ACCESS;
+ query_attr[index + nattr_common].value = pdd->user_gpu_id;
+ }
+
+ svm_priv_data_size = sizeof(*svm_priv) + query_attr_size;
+
+ svm_priv = kzalloc(svm_priv_data_size, GFP_KERNEL);
+ if (!svm_priv) {
+ ret = -ENOMEM;
+ goto exit_query;
+ }
+
+ index = 0;
+ list_for_each_entry(prange, &svms->list, list) {
+
+ svm_priv->object_type = KFD_CRIU_OBJECT_TYPE_SVM_RANGE;
+ svm_priv->start_addr = prange->start;
+ svm_priv->size = prange->npages;
+ memcpy(&svm_priv->attrs, query_attr, query_attr_size);
+ pr_debug("CRIU: prange: 0x%p start: 0x%lx\t npages: 0x%llx end: 0x%llx\t size: 0x%llx\n",
+ prange, prange->start, prange->npages,
+ prange->start + prange->npages - 1,
+ prange->npages * PAGE_SIZE);
+
+ ret = svm_range_get_attr(p, mm, svm_priv->start_addr,
+ svm_priv->size,
+ (nattr_common + num_devices),
+ svm_priv->attrs);
+ if (ret) {
+ pr_err("CRIU: failed to obtain range attributes\n");
+ goto exit_priv;
+ }
+
+ if (copy_to_user(user_priv_data + *priv_data_offset, svm_priv,
+ svm_priv_data_size)) {
+ pr_err("Failed to copy svm priv to user\n");
+ ret = -EFAULT;
+ goto exit_priv;
+ }
+
+ *priv_data_offset += svm_priv_data_size;
+
+ }
+
+
+exit_priv:
+ kfree(svm_priv);
+exit_query:
+ kfree(query_attr);
+exit:
+ mmput(mm);
+ return ret;
+}
+
int
svm_ioctl(struct kfd_process *p, enum kfd_ioctl_svm_op op, uint64_t start,
uint64_t size, uint32_t nattrs, struct kfd_ioctl_svm_attribute *attrs)
{
+ struct mm_struct *mm = current->mm;
int r;
start >>= PAGE_SHIFT;
@@ -3482,10 +3819,10 @@ svm_ioctl(struct kfd_process *p, enum kfd_ioctl_svm_op op, uint64_t start,
switch (op) {
case KFD_IOCTL_SVM_OP_SET_ATTR:
- r = svm_range_set_attr(p, start, size, nattrs, attrs);
+ r = svm_range_set_attr(p, mm, start, size, nattrs, attrs);
break;
case KFD_IOCTL_SVM_OP_GET_ATTR:
- r = svm_range_get_attr(p, start, size, nattrs, attrs);
+ r = svm_range_get_attr(p, mm, start, size, nattrs, attrs);
break;
default:
r = EINVAL;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
index 949b477e2f4c..66c77f00ac3e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
@@ -183,6 +183,16 @@ void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr,
void svm_range_free_dma_mappings(struct svm_range *prange);
void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm,
void *owner);
+int svm_range_get_info(struct kfd_process *p, uint32_t *num_svm_ranges,
+ uint64_t *svm_priv_data_size);
+int kfd_criu_checkpoint_svm(struct kfd_process *p,
+ uint8_t __user *user_priv_data,
+ uint64_t *priv_offset);
+int kfd_criu_restore_svm(struct kfd_process *p,
+ uint8_t __user *user_priv_ptr,
+ uint64_t *priv_data_offset,
+ uint64_t max_priv_data_size);
+int kfd_criu_resume_svm(struct kfd_process *p);
struct kfd_process_device *
svm_range_get_pdd_by_adev(struct svm_range *prange, struct amdgpu_device *adev);
void svm_range_list_lock_and_flush_work(struct svm_range_list *svms, struct mm_struct *mm);
@@ -220,6 +230,35 @@ static inline int svm_range_schedule_evict_svm_bo(
return -EINVAL;
}
+static inline int svm_range_get_info(struct kfd_process *p,
+ uint32_t *num_svm_ranges,
+ uint64_t *svm_priv_data_size)
+{
+ *num_svm_ranges = 0;
+ *svm_priv_data_size = 0;
+ return 0;
+}
+
+static inline int kfd_criu_checkpoint_svm(struct kfd_process *p,
+ uint8_t __user *user_priv_data,
+ uint64_t *priv_offset)
+{
+ return 0;
+}
+
+static inline int kfd_criu_restore_svm(struct kfd_process *p,
+ uint8_t __user *user_priv_ptr,
+ uint64_t *priv_data_offset,
+ uint64_t max_priv_data_size)
+{
+ return -EINVAL;
+}
+
+static inline int kfd_criu_resume_svm(struct kfd_process *p)
+{
+ return 0;
+}
+
#define KFD_IS_SVM_API_SUPPORTED(dev) false
#endif /* IS_ENABLED(CONFIG_HSA_AMD_SVM) */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 948fbb39336e..3bdcae239bc0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -1441,9 +1442,9 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
}
/*
- * Overwrite ATS capability according to needs_iommu_device to fix
- * potential missing corresponding bit in CRAT of BIOS.
- */
+ * Overwrite ATS capability according to needs_iommu_device to fix
+ * potential missing corresponding bit in CRAT of BIOS.
+ */
if (dev->gpu->use_iommu_v2)
dev->node_props.capability |= HSA_CAP_ATS_PRESENT;
else
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
index f0cc59d2fd5d..4f80d2ea1000 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -1,5 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),