diff options
author | Dave Airlie <airlied@redhat.com> | 2023-10-18 09:08:07 +0300 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2023-10-18 09:08:07 +0300 |
commit | 27442758e9b4e083bef3f164a1739475c01f3202 (patch) | |
tree | 58f4a82f1b9e41c11b6247d8f7b9c3c9b8b52711 /drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | |
parent | 08057253366d916a73e62bafb913d9b659228cc1 (diff) | |
parent | cd90511557fdfb394bb4ac4c3b539b007383914c (diff) | |
download | linux-27442758e9b4e083bef3f164a1739475c01f3202.tar.xz |
Merge tag 'amd-drm-next-6.7-2023-10-13' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-6.7-2023-10-13:
amdgpu:
- DC replay fixes
- Misc code cleanups and spelling fixes
- Documentation updates
- RAS EEPROM Updates
- FRU EEPROM Updates
- IP discovery updates
- SR-IOV fixes
- RAS updates
- DC PQ fixes
- SMU 13.0.6 updates
- GC 11.5 Support
- NBIO 7.11 Support
- GMC 11 Updates
- Reset fixes
- SMU 11.5 Updates
- SMU 13.0 OD support
- Use flexible arrays for bo list handling
- W=1 Fixes
- SubVP fixes
- DPIA fixes
- DCN 3.5 Support
- Devcoredump fixes
- VPE 6.1 support
- VCN 4.0 Updates
- S/G display fixes
- DML fixes
- DML2 Support
- MST fixes
- VRR fixes
- Enable seamless boot in more cases
- Enable content type property for HDMI
- OLED fixes
- Rework and clean up GPUVM TLB flushing
- DC ODM fixes
- DP 2.x fixes
- AGP aperture fixes
- SDMA firmware loading cleanups
- Cyan Skillfish GPU clock counter fix
- GC 11 GART fix
- Cache GPU fault info for userspace queries
- DC cursor check fixes
- eDP fixes
- DC FP handling fixes
- Variable sized array fixes
- SMU 13.0.x fixes
- IB start and size alignment fixes for VCN
- SMU 14 Support
- Suspend and resume sequence rework
- vkms fix
amdkfd:
- GC 11 fixes
- GC 10 fixes
- Doorbell fixes
- CWSR fixes
- SVM fixes
- Clean up GC info enumeration
- Rework memory limit handling
- Coherent memory handling fixes
- Use partial migrations in GPU faults
- TLB flush fixes
- DMA unmap fixes
- GC 9.4.3 fixes
- SQ interrupt fix
- GTT mapping fix
- GC 11.5 Support
radeon:
- Misc code cleanups
- W=1 Fixes
- Fix possible buffer overflow
- Fix possible NULL pointer dereference
UAPI:
- Add EXT_COHERENT memory allocation flags. These allow for system scope atomics.
Proposed userspace: https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/pull/88
- Add support for new VPE engine. This is a memory to memory copy engine with advanced scaling, CSC, and color management features
Proposed mesa MR: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25713
- Add INFO IOCTL interface to query GPU faults
Proposed Mesa MR: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23238
Proposed libdrm MR: https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/298
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231013175758.1735031-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 35 |
1 files changed, 33 insertions, 2 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index ffb49b2d533a..0a5c8a107fb2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -28,6 +28,7 @@ #include <linux/list.h> #include "ta_ras_if.h" #include "amdgpu_ras_eeprom.h" +#include "amdgpu_smuio.h" struct amdgpu_iv_entry; @@ -389,9 +390,12 @@ struct amdgpu_ras { /* ras infrastructure */ /* for ras itself. */ uint32_t features; + uint32_t schema; struct list_head head; /* sysfs */ struct device_attribute features_attr; + struct device_attribute version_attr; + struct device_attribute schema_attr; struct bin_attribute badpages_attr; struct dentry *de_ras_eeprom_table; /* block array */ @@ -436,17 +440,33 @@ struct amdgpu_ras { }; struct ras_fs_data { - char sysfs_name[32]; + char sysfs_name[48]; char debugfs_name[32]; }; +struct ras_err_info { + struct amdgpu_smuio_mcm_config_info mcm_info; + u64 ce_count; + u64 ue_count; +}; + +struct ras_err_node { + struct list_head node; + struct ras_err_info err_info; +}; + struct ras_err_data { unsigned long ue_count; unsigned long ce_count; unsigned long err_addr_cnt; struct eeprom_table_record *err_addr; + u32 err_list_count; + struct list_head err_node_list; }; +#define for_each_ras_error(err_node, err_data) \ + list_for_each_entry(err_node, &(err_data)->err_node_list, node) + struct ras_err_handler_data { /* point to bad page records array */ struct eeprom_table_record *bps; @@ -493,7 +513,10 @@ struct ras_manager { /* IH data */ struct ras_ih_data ih_data; - struct ras_err_data err_data; + struct { + unsigned long ue_count; + unsigned long ce_count; + } err_data; }; struct ras_badpage { @@ -767,4 +790,12 @@ void amdgpu_ras_inst_reset_ras_error_count(struct amdgpu_device *adev, const struct amdgpu_ras_err_status_reg_entry *reg_list, uint32_t reg_list_size, uint32_t instance); + +int amdgpu_ras_error_data_init(struct ras_err_data *err_data); +void amdgpu_ras_error_data_fini(struct ras_err_data *err_data); +int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data, + struct amdgpu_smuio_mcm_config_info *mcm_info, u64 count); +int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data, + struct amdgpu_smuio_mcm_config_info *mcm_info, u64 count); + #endif |