summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
diff options
context:
space:
mode:
authorChristian König <christian.koenig@amd.com>2018-11-07 15:55:01 +0300
committerAlex Deucher <alexander.deucher@amd.com>2019-03-19 23:36:58 +0300
commitc1a8abd99da5a035f54cd7633b060033650ff3e0 (patch)
treeb0e00e326f07b5b869cdb299fc47840481c42a1c /drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
parent8c65fe5fc81c37c98269389759bb1d90c4658953 (diff)
downloadlinux-c1a8abd99da5a035f54cd7633b060033650ff3e0.tar.xz
drm/amdgpu: use ring/hash for fault handling on GMC9 v3
Further testing showed that the idea with the chash doesn't work as expected. Especially we can't predict when we can remove the entries from the hash again. So replace the chash with a ring buffer/hash mix where entries in the container age automatically based on their timestamp. v2: use ring buffer / hash mix v3: check the timeout to make sure all entries age Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> (v2) Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h34
1 files changed, 34 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index 6ce45664ff87..071145ac67b5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -43,9 +43,35 @@
*/
#define AMDGPU_GMC_HOLE_MASK 0x0000ffffffffffffULL
+/*
+ * Ring size as power of two for the log of recent faults.
+ */
+#define AMDGPU_GMC_FAULT_RING_ORDER 8
+#define AMDGPU_GMC_FAULT_RING_SIZE (1 << AMDGPU_GMC_FAULT_RING_ORDER)
+
+/*
+ * Hash size as power of two for the log of recent faults
+ */
+#define AMDGPU_GMC_FAULT_HASH_ORDER 8
+#define AMDGPU_GMC_FAULT_HASH_SIZE (1 << AMDGPU_GMC_FAULT_HASH_ORDER)
+
+/*
+ * Number of IH timestamp ticks until a fault is considered handled
+ */
+#define AMDGPU_GMC_FAULT_TIMEOUT 5000ULL
+
struct firmware;
/*
+ * GMC page fault information
+ */
+struct amdgpu_gmc_fault {
+ uint64_t timestamp;
+ uint64_t next:AMDGPU_GMC_FAULT_RING_ORDER;
+ uint64_t key:52;
+};
+
+/*
* VMHUB structures, functions & helpers
*/
struct amdgpu_vmhub {
@@ -141,6 +167,12 @@ struct amdgpu_gmc {
struct kfd_vm_fault_info *vm_fault_info;
atomic_t vm_fault_info_updated;
+ struct amdgpu_gmc_fault fault_ring[AMDGPU_GMC_FAULT_RING_SIZE];
+ struct {
+ uint64_t idx:AMDGPU_GMC_FAULT_RING_ORDER;
+ } fault_hash[AMDGPU_GMC_FAULT_HASH_SIZE];
+ uint64_t last_fault:AMDGPU_GMC_FAULT_RING_ORDER;
+
const struct amdgpu_gmc_funcs *gmc_funcs;
struct amdgpu_xgmi xgmi;
@@ -195,5 +227,7 @@ void amdgpu_gmc_gart_location(struct amdgpu_device *adev,
struct amdgpu_gmc *mc);
void amdgpu_gmc_agp_location(struct amdgpu_device *adev,
struct amdgpu_gmc *mc);
+bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
+ uint16_t pasid, uint64_t timestamp);
#endif