summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/radeon/cik.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/radeon/cik.c')
-rw-r--r--drivers/gpu/drm/radeon/cik.c3196
1 files changed, 2288 insertions, 908 deletions
diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index ed1d91025928..a3bba0587276 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -22,7 +22,6 @@
* Authors: Alex Deucher
*/
#include <linux/firmware.h>
-#include <linux/platform_device.h>
#include <linux/slab.h>
#include <linux/module.h>
#include "drmP.h"
@@ -31,22 +30,8 @@
#include "cikd.h"
#include "atom.h"
#include "cik_blit_shaders.h"
-
-/* GFX */
-#define CIK_PFP_UCODE_SIZE 2144
-#define CIK_ME_UCODE_SIZE 2144
-#define CIK_CE_UCODE_SIZE 2144
-/* compute */
-#define CIK_MEC_UCODE_SIZE 4192
-/* interrupts */
-#define BONAIRE_RLC_UCODE_SIZE 2048
-#define KB_RLC_UCODE_SIZE 2560
-#define KV_RLC_UCODE_SIZE 2560
-/* gddr controller */
-#define CIK_MC_UCODE_SIZE 7866
-/* sdma */
-#define CIK_SDMA_UCODE_SIZE 1050
-#define CIK_SDMA_UCODE_VERSION 64
+#include "radeon_ucode.h"
+#include "clearstate_ci.h"
MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
@@ -55,6 +40,7 @@ MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
+MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
MODULE_FIRMWARE("radeon/KAVERI_me.bin");
MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
@@ -73,10 +59,61 @@ extern void r600_ih_ring_fini(struct radeon_device *rdev);
extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
extern bool evergreen_is_display_hung(struct radeon_device *rdev);
+extern void sumo_rlc_fini(struct radeon_device *rdev);
+extern int sumo_rlc_init(struct radeon_device *rdev);
extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
-extern void si_rlc_fini(struct radeon_device *rdev);
-extern int si_rlc_init(struct radeon_device *rdev);
+extern void si_rlc_reset(struct radeon_device *rdev);
+extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
+extern int cik_sdma_resume(struct radeon_device *rdev);
+extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
+extern void cik_sdma_fini(struct radeon_device *rdev);
+extern void cik_sdma_vm_set_page(struct radeon_device *rdev,
+ struct radeon_ib *ib,
+ uint64_t pe,
+ uint64_t addr, unsigned count,
+ uint32_t incr, uint32_t flags);
static void cik_rlc_stop(struct radeon_device *rdev);
+static void cik_pcie_gen3_enable(struct radeon_device *rdev);
+static void cik_program_aspm(struct radeon_device *rdev);
+static void cik_init_pg(struct radeon_device *rdev);
+static void cik_init_cg(struct radeon_device *rdev);
+
+/* get temperature in millidegrees */
+int ci_get_temp(struct radeon_device *rdev)
+{
+ u32 temp;
+ int actual_temp = 0;
+
+ temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
+ CTF_TEMP_SHIFT;
+
+ if (temp & 0x200)
+ actual_temp = 255;
+ else
+ actual_temp = temp & 0x1ff;
+
+ actual_temp = actual_temp * 1000;
+
+ return actual_temp;
+}
+
+/* get temperature in millidegrees */
+int kv_get_temp(struct radeon_device *rdev)
+{
+ u32 temp;
+ int actual_temp = 0;
+
+ temp = RREG32_SMC(0xC0300E0C);
+
+ if (temp)
+ actual_temp = (temp / 8) - 49;
+ else
+ actual_temp = 0;
+
+ actual_temp = actual_temp * 1000;
+
+ return actual_temp;
+}
/*
* Indirect registers accessor
@@ -99,6 +136,778 @@ void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
(void)RREG32(PCIE_DATA);
}
+static const u32 spectre_rlc_save_restore_register_list[] =
+{
+ (0x0e00 << 16) | (0xc12c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc140 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc150 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc15c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc168 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc170 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc178 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc204 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc2b4 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc2b8 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc2bc >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc2c0 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8228 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x829c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x869c >> 2),
+ 0x00000000,
+ (0x0600 << 16) | (0x98f4 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x98f8 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x9900 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc260 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x90e8 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x3c000 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x3c00c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8c1c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x9700 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xcd20 >> 2),
+ 0x00000000,
+ (0x4e00 << 16) | (0xcd20 >> 2),
+ 0x00000000,
+ (0x5e00 << 16) | (0xcd20 >> 2),
+ 0x00000000,
+ (0x6e00 << 16) | (0xcd20 >> 2),
+ 0x00000000,
+ (0x7e00 << 16) | (0xcd20 >> 2),
+ 0x00000000,
+ (0x8e00 << 16) | (0xcd20 >> 2),
+ 0x00000000,
+ (0x9e00 << 16) | (0xcd20 >> 2),
+ 0x00000000,
+ (0xae00 << 16) | (0xcd20 >> 2),
+ 0x00000000,
+ (0xbe00 << 16) | (0xcd20 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x89bc >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8900 >> 2),
+ 0x00000000,
+ 0x3,
+ (0x0e00 << 16) | (0xc130 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc134 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc1fc >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc208 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc264 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc268 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc26c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc270 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc274 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc278 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc27c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc280 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc284 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc288 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc28c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc290 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc294 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc298 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc29c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc2a0 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc2a4 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc2a8 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc2ac >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc2b0 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x301d0 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x30238 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x30250 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x30254 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x30258 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x3025c >> 2),
+ 0x00000000,
+ (0x4e00 << 16) | (0xc900 >> 2),
+ 0x00000000,
+ (0x5e00 << 16) | (0xc900 >> 2),
+ 0x00000000,
+ (0x6e00 << 16) | (0xc900 >> 2),
+ 0x00000000,
+ (0x7e00 << 16) | (0xc900 >> 2),
+ 0x00000000,
+ (0x8e00 << 16) | (0xc900 >> 2),
+ 0x00000000,
+ (0x9e00 << 16) | (0xc900 >> 2),
+ 0x00000000,
+ (0xae00 << 16) | (0xc900 >> 2),
+ 0x00000000,
+ (0xbe00 << 16) | (0xc900 >> 2),
+ 0x00000000,
+ (0x4e00 << 16) | (0xc904 >> 2),
+ 0x00000000,
+ (0x5e00 << 16) | (0xc904 >> 2),
+ 0x00000000,
+ (0x6e00 << 16) | (0xc904 >> 2),
+ 0x00000000,
+ (0x7e00 << 16) | (0xc904 >> 2),
+ 0x00000000,
+ (0x8e00 << 16) | (0xc904 >> 2),
+ 0x00000000,
+ (0x9e00 << 16) | (0xc904 >> 2),
+ 0x00000000,
+ (0xae00 << 16) | (0xc904 >> 2),
+ 0x00000000,
+ (0xbe00 << 16) | (0xc904 >> 2),
+ 0x00000000,
+ (0x4e00 << 16) | (0xc908 >> 2),
+ 0x00000000,
+ (0x5e00 << 16) | (0xc908 >> 2),
+ 0x00000000,
+ (0x6e00 << 16) | (0xc908 >> 2),
+ 0x00000000,
+ (0x7e00 << 16) | (0xc908 >> 2),
+ 0x00000000,
+ (0x8e00 << 16) | (0xc908 >> 2),
+ 0x00000000,
+ (0x9e00 << 16) | (0xc908 >> 2),
+ 0x00000000,
+ (0xae00 << 16) | (0xc908 >> 2),
+ 0x00000000,
+ (0xbe00 << 16) | (0xc908 >> 2),
+ 0x00000000,
+ (0x4e00 << 16) | (0xc90c >> 2),
+ 0x00000000,
+ (0x5e00 << 16) | (0xc90c >> 2),
+ 0x00000000,
+ (0x6e00 << 16) | (0xc90c >> 2),
+ 0x00000000,
+ (0x7e00 << 16) | (0xc90c >> 2),
+ 0x00000000,
+ (0x8e00 << 16) | (0xc90c >> 2),
+ 0x00000000,
+ (0x9e00 << 16) | (0xc90c >> 2),
+ 0x00000000,
+ (0xae00 << 16) | (0xc90c >> 2),
+ 0x00000000,
+ (0xbe00 << 16) | (0xc90c >> 2),
+ 0x00000000,
+ (0x4e00 << 16) | (0xc910 >> 2),
+ 0x00000000,
+ (0x5e00 << 16) | (0xc910 >> 2),
+ 0x00000000,
+ (0x6e00 << 16) | (0xc910 >> 2),
+ 0x00000000,
+ (0x7e00 << 16) | (0xc910 >> 2),
+ 0x00000000,
+ (0x8e00 << 16) | (0xc910 >> 2),
+ 0x00000000,
+ (0x9e00 << 16) | (0xc910 >> 2),
+ 0x00000000,
+ (0xae00 << 16) | (0xc910 >> 2),
+ 0x00000000,
+ (0xbe00 << 16) | (0xc910 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc99c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x9834 >> 2),
+ 0x00000000,
+ (0x0000 << 16) | (0x30f00 >> 2),
+ 0x00000000,
+ (0x0001 << 16) | (0x30f00 >> 2),
+ 0x00000000,
+ (0x0000 << 16) | (0x30f04 >> 2),
+ 0x00000000,
+ (0x0001 << 16) | (0x30f04 >> 2),
+ 0x00000000,
+ (0x0000 << 16) | (0x30f08 >> 2),
+ 0x00000000,
+ (0x0001 << 16) | (0x30f08 >> 2),
+ 0x00000000,
+ (0x0000 << 16) | (0x30f0c >> 2),
+ 0x00000000,
+ (0x0001 << 16) | (0x30f0c >> 2),
+ 0x00000000,
+ (0x0600 << 16) | (0x9b7c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8a14 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8a18 >> 2),
+ 0x00000000,
+ (0x0600 << 16) | (0x30a00 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8bf0 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8bcc >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8b24 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x30a04 >> 2),
+ 0x00000000,
+ (0x0600 << 16) | (0x30a10 >> 2),
+ 0x00000000,
+ (0x0600 << 16) | (0x30a14 >> 2),
+ 0x00000000,
+ (0x0600 << 16) | (0x30a18 >> 2),
+ 0x00000000,
+ (0x0600 << 16) | (0x30a2c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc700 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc704 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc708 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc768 >> 2),
+ 0x00000000,
+ (0x0400 << 16) | (0xc770 >> 2),
+ 0x00000000,
+ (0x0400 << 16) | (0xc774 >> 2),
+ 0x00000000,
+ (0x0400 << 16) | (0xc778 >> 2),
+ 0x00000000,
+ (0x0400 << 16) | (0xc77c >> 2),
+ 0x00000000,
+ (0x0400 << 16) | (0xc780 >> 2),
+ 0x00000000,
+ (0x0400 << 16) | (0xc784 >> 2),
+ 0x00000000,
+ (0x0400 << 16) | (0xc788 >> 2),
+ 0x00000000,
+ (0x0400 << 16) | (0xc78c >> 2),
+ 0x00000000,
+ (0x0400 << 16) | (0xc798 >> 2),
+ 0x00000000,
+ (0x0400 << 16) | (0xc79c >> 2),
+ 0x00000000,
+ (0x0400 << 16) | (0xc7a0 >> 2),
+ 0x00000000,
+ (0x0400 << 16) | (0xc7a4 >> 2),
+ 0x00000000,
+ (0x0400 << 16) | (0xc7a8 >> 2),
+ 0x00000000,
+ (0x0400 << 16) | (0xc7ac >> 2),
+ 0x00000000,
+ (0x0400 << 16) | (0xc7b0 >> 2),
+ 0x00000000,
+ (0x0400 << 16) | (0xc7b4 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x9100 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x3c010 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x92a8 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x92ac >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x92b4 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x92b8 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x92bc >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x92c0 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x92c4 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x92c8 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x92cc >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x92d0 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8c00 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8c04 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8c20 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8c38 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8c3c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xae00 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x9604 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac08 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac0c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac10 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac14 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac58 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac68 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac6c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac70 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac74 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac78 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac7c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac80 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac84 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac88 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac8c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x970c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x9714 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x9718 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x971c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x31068 >> 2),
+ 0x00000000,
+ (0x4e00 << 16) | (0x31068 >> 2),
+ 0x00000000,
+ (0x5e00 << 16) | (0x31068 >> 2),
+ 0x00000000,
+ (0x6e00 << 16) | (0x31068 >> 2),
+ 0x00000000,
+ (0x7e00 << 16) | (0x31068 >> 2),
+ 0x00000000,
+ (0x8e00 << 16) | (0x31068 >> 2),
+ 0x00000000,
+ (0x9e00 << 16) | (0x31068 >> 2),
+ 0x00000000,
+ (0xae00 << 16) | (0x31068 >> 2),
+ 0x00000000,
+ (0xbe00 << 16) | (0x31068 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xcd10 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xcd14 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x88b0 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x88b4 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x88b8 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x88bc >> 2),
+ 0x00000000,
+ (0x0400 << 16) | (0x89c0 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x88c4 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x88c8 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x88d0 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x88d4 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x88d8 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8980 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x30938 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x3093c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x30940 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x89a0 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x30900 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x30904 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x89b4 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x3c210 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x3c214 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x3c218 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8904 >> 2),
+ 0x00000000,
+ 0x5,
+ (0x0e00 << 16) | (0x8c28 >> 2),
+ (0x0e00 << 16) | (0x8c2c >> 2),
+ (0x0e00 << 16) | (0x8c30 >> 2),
+ (0x0e00 << 16) | (0x8c34 >> 2),
+ (0x0e00 << 16) | (0x9600 >> 2),
+};
+
+static const u32 kalindi_rlc_save_restore_register_list[] =
+{
+ (0x0e00 << 16) | (0xc12c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc140 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc150 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc15c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc168 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc170 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc204 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc2b4 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc2b8 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc2bc >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc2c0 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8228 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x829c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x869c >> 2),
+ 0x00000000,
+ (0x0600 << 16) | (0x98f4 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x98f8 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x9900 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc260 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x90e8 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x3c000 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x3c00c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8c1c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x9700 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xcd20 >> 2),
+ 0x00000000,
+ (0x4e00 << 16) | (0xcd20 >> 2),
+ 0x00000000,
+ (0x5e00 << 16) | (0xcd20 >> 2),
+ 0x00000000,
+ (0x6e00 << 16) | (0xcd20 >> 2),
+ 0x00000000,
+ (0x7e00 << 16) | (0xcd20 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x89bc >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8900 >> 2),
+ 0x00000000,
+ 0x3,
+ (0x0e00 << 16) | (0xc130 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc134 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc1fc >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc208 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc264 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc268 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc26c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc270 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc274 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc28c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc290 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc294 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc298 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc2a0 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc2a4 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc2a8 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc2ac >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x301d0 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x30238 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x30250 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x30254 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x30258 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x3025c >> 2),
+ 0x00000000,
+ (0x4e00 << 16) | (0xc900 >> 2),
+ 0x00000000,
+ (0x5e00 << 16) | (0xc900 >> 2),
+ 0x00000000,
+ (0x6e00 << 16) | (0xc900 >> 2),
+ 0x00000000,
+ (0x7e00 << 16) | (0xc900 >> 2),
+ 0x00000000,
+ (0x4e00 << 16) | (0xc904 >> 2),
+ 0x00000000,
+ (0x5e00 << 16) | (0xc904 >> 2),
+ 0x00000000,
+ (0x6e00 << 16) | (0xc904 >> 2),
+ 0x00000000,
+ (0x7e00 << 16) | (0xc904 >> 2),
+ 0x00000000,
+ (0x4e00 << 16) | (0xc908 >> 2),
+ 0x00000000,
+ (0x5e00 << 16) | (0xc908 >> 2),
+ 0x00000000,
+ (0x6e00 << 16) | (0xc908 >> 2),
+ 0x00000000,
+ (0x7e00 << 16) | (0xc908 >> 2),
+ 0x00000000,
+ (0x4e00 << 16) | (0xc90c >> 2),
+ 0x00000000,
+ (0x5e00 << 16) | (0xc90c >> 2),
+ 0x00000000,
+ (0x6e00 << 16) | (0xc90c >> 2),
+ 0x00000000,
+ (0x7e00 << 16) | (0xc90c >> 2),
+ 0x00000000,
+ (0x4e00 << 16) | (0xc910 >> 2),
+ 0x00000000,
+ (0x5e00 << 16) | (0xc910 >> 2),
+ 0x00000000,
+ (0x6e00 << 16) | (0xc910 >> 2),
+ 0x00000000,
+ (0x7e00 << 16) | (0xc910 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc99c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x9834 >> 2),
+ 0x00000000,
+ (0x0000 << 16) | (0x30f00 >> 2),
+ 0x00000000,
+ (0x0000 << 16) | (0x30f04 >> 2),
+ 0x00000000,
+ (0x0000 << 16) | (0x30f08 >> 2),
+ 0x00000000,
+ (0x0000 << 16) | (0x30f0c >> 2),
+ 0x00000000,
+ (0x0600 << 16) | (0x9b7c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8a14 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8a18 >> 2),
+ 0x00000000,
+ (0x0600 << 16) | (0x30a00 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8bf0 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8bcc >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8b24 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x30a04 >> 2),
+ 0x00000000,
+ (0x0600 << 16) | (0x30a10 >> 2),
+ 0x00000000,
+ (0x0600 << 16) | (0x30a14 >> 2),
+ 0x00000000,
+ (0x0600 << 16) | (0x30a18 >> 2),
+ 0x00000000,
+ (0x0600 << 16) | (0x30a2c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc700 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc704 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc708 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc768 >> 2),
+ 0x00000000,
+ (0x0400 << 16) | (0xc770 >> 2),
+ 0x00000000,
+ (0x0400 << 16) | (0xc774 >> 2),
+ 0x00000000,
+ (0x0400 << 16) | (0xc798 >> 2),
+ 0x00000000,
+ (0x0400 << 16) | (0xc79c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x9100 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x3c010 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8c00 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8c04 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8c20 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8c38 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8c3c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xae00 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x9604 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac08 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac0c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac10 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac14 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac58 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac68 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac6c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac70 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac74 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac78 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac7c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac80 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac84 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac88 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac8c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x970c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x9714 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x9718 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x971c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x31068 >> 2),
+ 0x00000000,
+ (0x4e00 << 16) | (0x31068 >> 2),
+ 0x00000000,
+ (0x5e00 << 16) | (0x31068 >> 2),
+ 0x00000000,
+ (0x6e00 << 16) | (0x31068 >> 2),
+ 0x00000000,
+ (0x7e00 << 16) | (0x31068 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xcd10 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xcd14 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x88b0 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x88b4 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x88b8 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x88bc >> 2),
+ 0x00000000,
+ (0x0400 << 16) | (0x89c0 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x88c4 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x88c8 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x88d0 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x88d4 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x88d8 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8980 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x30938 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x3093c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x30940 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x89a0 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x30900 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x30904 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x89b4 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x3e1fc >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x3c210 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x3c214 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x3c218 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8904 >> 2),
+ 0x00000000,
+ 0x5,
+ (0x0e00 << 16) | (0x8c28 >> 2),
+ (0x0e00 << 16) | (0x8c2c >> 2),
+ (0x0e00 << 16) | (0x8c30 >> 2),
+ (0x0e00 << 16) | (0x8c34 >> 2),
+ (0x0e00 << 16) | (0x9600 >> 2),
+};
+
static const u32 bonaire_golden_spm_registers[] =
{
0x30800, 0xe0ffffff, 0xe0000000
@@ -742,23 +1551,15 @@ static int ci_mc_load_microcode(struct radeon_device *rdev)
*/
static int cik_init_microcode(struct radeon_device *rdev)
{
- struct platform_device *pdev;
const char *chip_name;
size_t pfp_req_size, me_req_size, ce_req_size,
mec_req_size, rlc_req_size, mc_req_size,
- sdma_req_size;
+ sdma_req_size, smc_req_size;
char fw_name[30];
int err;
DRM_DEBUG("\n");
- pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
- err = IS_ERR(pdev);
- if (err) {
- printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
- return -EINVAL;
- }
-
switch (rdev->family) {
case CHIP_BONAIRE:
chip_name = "BONAIRE";
@@ -769,6 +1570,7 @@ static int cik_init_microcode(struct radeon_device *rdev)
rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
mc_req_size = CIK_MC_UCODE_SIZE * 4;
sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
+ smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
break;
case CHIP_KAVERI:
chip_name = "KAVERI";
@@ -794,7 +1596,7 @@ static int cik_init_microcode(struct radeon_device *rdev)
DRM_INFO("Loading %s Microcode\n", chip_name);
snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
- err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev);
+ err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
if (err)
goto out;
if (rdev->pfp_fw->size != pfp_req_size) {
@@ -806,7 +1608,7 @@ static int cik_init_microcode(struct radeon_device *rdev)
}
snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
- err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
+ err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
if (err)
goto out;
if (rdev->me_fw->size != me_req_size) {
@@ -817,7 +1619,7 @@ static int cik_init_microcode(struct radeon_device *rdev)
}
snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
- err = request_firmware(&rdev->ce_fw, fw_name, &pdev->dev);
+ err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
if (err)
goto out;
if (rdev->ce_fw->size != ce_req_size) {
@@ -828,7 +1630,7 @@ static int cik_init_microcode(struct radeon_device *rdev)
}
snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
- err = request_firmware(&rdev->mec_fw, fw_name, &pdev->dev);
+ err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
if (err)
goto out;
if (rdev->mec_fw->size != mec_req_size) {
@@ -839,7 +1641,7 @@ static int cik_init_microcode(struct radeon_device *rdev)
}
snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
- err = request_firmware(&rdev->rlc_fw, fw_name, &pdev->dev);
+ err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
if (err)
goto out;
if (rdev->rlc_fw->size != rlc_req_size) {
@@ -850,7 +1652,7 @@ static int cik_init_microcode(struct radeon_device *rdev)
}
snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
- err = request_firmware(&rdev->sdma_fw, fw_name, &pdev->dev);
+ err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
if (err)
goto out;
if (rdev->sdma_fw->size != sdma_req_size) {
@@ -860,10 +1662,10 @@ static int cik_init_microcode(struct radeon_device *rdev)
err = -EINVAL;
}
- /* No MC ucode on APUs */
+ /* No SMC, MC ucode on APUs */
if (!(rdev->flags & RADEON_IS_IGP)) {
snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
- err = request_firmware(&rdev->mc_fw, fw_name, &pdev->dev);
+ err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
if (err)
goto out;
if (rdev->mc_fw->size != mc_req_size) {
@@ -872,11 +1674,24 @@ static int cik_init_microcode(struct radeon_device *rdev)
rdev->mc_fw->size, fw_name);
err = -EINVAL;
}
+
+ snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
+ err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
+ if (err) {
+ printk(KERN_ERR
+ "smc: error loading firmware \"%s\"\n",
+ fw_name);
+ release_firmware(rdev->smc_fw);
+ rdev->smc_fw = NULL;
+ } else if (rdev->smc_fw->size != smc_req_size) {
+ printk(KERN_ERR
+ "cik_smc: Bogus length %zu in firmware \"%s\"\n",
+ rdev->smc_fw->size, fw_name);
+ err = -EINVAL;
+ }
}
out:
- platform_device_unregister(pdev);
-
if (err) {
if (err != -EINVAL)
printk(KERN_ERR
@@ -892,6 +1707,8 @@ out:
rdev->rlc_fw = NULL;
release_firmware(rdev->mc_fw);
rdev->mc_fw = NULL;
+ release_firmware(rdev->smc_fw);
+ rdev->smc_fw = NULL;
}
return err;
}
@@ -1891,7 +2708,46 @@ static void cik_gpu_init(struct radeon_device *rdev)
gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
break;
case CHIP_KAVERI:
- /* TODO */
+ rdev->config.cik.max_shader_engines = 1;
+ rdev->config.cik.max_tile_pipes = 4;
+ if ((rdev->pdev->device == 0x1304) ||
+ (rdev->pdev->device == 0x1305) ||
+ (rdev->pdev->device == 0x130C) ||
+ (rdev->pdev->device == 0x130F) ||
+ (rdev->pdev->device == 0x1310) ||
+ (rdev->pdev->device == 0x1311) ||
+ (rdev->pdev->device == 0x131C)) {
+ rdev->config.cik.max_cu_per_sh = 8;
+ rdev->config.cik.max_backends_per_se = 2;
+ } else if ((rdev->pdev->device == 0x1309) ||
+ (rdev->pdev->device == 0x130A) ||
+ (rdev->pdev->device == 0x130D) ||
+ (rdev->pdev->device == 0x1313)) {
+ rdev->config.cik.max_cu_per_sh = 6;
+ rdev->config.cik.max_backends_per_se = 2;
+ } else if ((rdev->pdev->device == 0x1306) ||
+ (rdev->pdev->device == 0x1307) ||
+ (rdev->pdev->device == 0x130B) ||
+ (rdev->pdev->device == 0x130E) ||
+ (rdev->pdev->device == 0x1315) ||
+ (rdev->pdev->device == 0x131B)) {
+ rdev->config.cik.max_cu_per_sh = 4;
+ rdev->config.cik.max_backends_per_se = 1;
+ } else {
+ rdev->config.cik.max_cu_per_sh = 3;
+ rdev->config.cik.max_backends_per_se = 1;
+ }
+ rdev->config.cik.max_sh_per_se = 1;
+ rdev->config.cik.max_texture_channel_caches = 4;
+ rdev->config.cik.max_gprs = 256;
+ rdev->config.cik.max_gs_threads = 16;
+ rdev->config.cik.max_hw_contexts = 8;
+
+ rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
+ rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
+ rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
+ rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
+ gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
break;
case CHIP_KABINI:
default:
@@ -2546,8 +3402,8 @@ static int cik_cp_gfx_resume(struct radeon_device *rdev)
/* ring 0 - compute and gfx */
/* Set ring buffer size */
ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
- rb_bufsz = drm_order(ring->ring_size / 8);
- tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
+ rb_bufsz = order_base_2(ring->ring_size / 8);
+ tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
#ifdef __BIG_ENDIAN
tmp |= BUF_SWAP_32BIT;
#endif
@@ -2598,11 +3454,12 @@ u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
if (rdev->wb.enabled) {
rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
} else {
+ mutex_lock(&rdev->srbm_mutex);
cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
rptr = RREG32(CP_HQD_PQ_RPTR);
cik_srbm_select(rdev, 0, 0, 0, 0);
+ mutex_unlock(&rdev->srbm_mutex);
}
- rptr = (rptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
return rptr;
}
@@ -2615,11 +3472,12 @@ u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
if (rdev->wb.enabled) {
wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
} else {
+ mutex_lock(&rdev->srbm_mutex);
cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
wptr = RREG32(CP_HQD_PQ_WPTR);
cik_srbm_select(rdev, 0, 0, 0, 0);
+ mutex_unlock(&rdev->srbm_mutex);
}
- wptr = (wptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
return wptr;
}
@@ -2627,10 +3485,8 @@ u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
void cik_compute_ring_set_wptr(struct radeon_device *rdev,
struct radeon_ring *ring)
{
- u32 wptr = (ring->wptr << ring->ptr_reg_shift) & ring->ptr_reg_mask;
-
- rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(wptr);
- WDOORBELL32(ring->doorbell_offset, wptr);
+ rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(ring->wptr);
+ WDOORBELL32(ring->doorbell_offset, ring->wptr);
}
/**
@@ -2908,6 +3764,7 @@ static int cik_cp_compute_resume(struct radeon_device *rdev)
WREG32(CP_CPF_DEBUG, tmp);
/* init the pipes */
+ mutex_lock(&rdev->srbm_mutex);
for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
int me = (i < 4) ? 1 : 2;
int pipe = (i < 4) ? i : (i - 4);
@@ -2926,10 +3783,11 @@ static int cik_cp_compute_resume(struct radeon_device *rdev)
/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
tmp = RREG32(CP_HPD_EOP_CONTROL);
tmp &= ~EOP_SIZE_MASK;
- tmp |= drm_order(MEC_HPD_SIZE / 8);
+ tmp |= order_base_2(MEC_HPD_SIZE / 8);
WREG32(CP_HPD_EOP_CONTROL, tmp);
}
cik_srbm_select(rdev, 0, 0, 0, 0);
+ mutex_unlock(&rdev->srbm_mutex);
/* init the queues. Just two for now. */
for (i = 0; i < 2; i++) {
@@ -2983,6 +3841,7 @@ static int cik_cp_compute_resume(struct radeon_device *rdev)
mqd->static_thread_mgmt23[0] = 0xffffffff;
mqd->static_thread_mgmt23[1] = 0xffffffff;
+ mutex_lock(&rdev->srbm_mutex);
cik_srbm_select(rdev, rdev->ring[idx].me,
rdev->ring[idx].pipe,
rdev->ring[idx].queue, 0);
@@ -3041,9 +3900,9 @@ static int cik_cp_compute_resume(struct radeon_device *rdev)
~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
mqd->queue_state.cp_hqd_pq_control |=
- drm_order(rdev->ring[idx].ring_size / 8);
+ order_base_2(rdev->ring[idx].ring_size / 8);
mqd->queue_state.cp_hqd_pq_control |=
- (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8);
+ (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
#ifdef __BIG_ENDIAN
mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
#endif
@@ -3110,6 +3969,7 @@ static int cik_cp_compute_resume(struct radeon_device *rdev)
WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
cik_srbm_select(rdev, 0, 0, 0, 0);
+ mutex_unlock(&rdev->srbm_mutex);
radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
@@ -3153,13 +4013,6 @@ static int cik_cp_resume(struct radeon_device *rdev)
{
int r;
- /* Reset all cp blocks */
- WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
- RREG32(GRBM_SOFT_RESET);
- mdelay(15);
- WREG32(GRBM_SOFT_RESET, 0);
- RREG32(GRBM_SOFT_RESET);
-
r = cik_cp_load_microcode(rdev);
if (r)
return r;
@@ -3174,579 +4027,6 @@ static int cik_cp_resume(struct radeon_device *rdev)
return 0;
}
-/*
- * sDMA - System DMA
- * Starting with CIK, the GPU has new asynchronous
- * DMA engines. These engines are used for compute
- * and gfx. There are two DMA engines (SDMA0, SDMA1)
- * and each one supports 1 ring buffer used for gfx
- * and 2 queues used for compute.
- *
- * The programming model is very similar to the CP
- * (ring buffer, IBs, etc.), but sDMA has it's own
- * packet format that is different from the PM4 format
- * used by the CP. sDMA supports copying data, writing
- * embedded data, solid fills, and a number of other
- * things. It also has support for tiling/detiling of
- * buffers.
- */
-/**
- * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
- *
- * @rdev: radeon_device pointer
- * @ib: IB object to schedule
- *
- * Schedule an IB in the DMA ring (CIK).
- */
-void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
- struct radeon_ib *ib)
-{
- struct radeon_ring *ring = &rdev->ring[ib->ring];
- u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
-
- if (rdev->wb.enabled) {
- u32 next_rptr = ring->wptr + 5;
- while ((next_rptr & 7) != 4)
- next_rptr++;
- next_rptr += 4;
- radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
- radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
- radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
- radeon_ring_write(ring, 1); /* number of DWs to follow */
- radeon_ring_write(ring, next_rptr);
- }
-
- /* IB packet must end on a 8 DW boundary */
- while ((ring->wptr & 7) != 4)
- radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
- radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
- radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
- radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
- radeon_ring_write(ring, ib->length_dw);
-
-}
-
-/**
- * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
- *
- * @rdev: radeon_device pointer
- * @fence: radeon fence object
- *
- * Add a DMA fence packet to the ring to write
- * the fence seq number and DMA trap packet to generate
- * an interrupt if needed (CIK).
- */
-void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
- struct radeon_fence *fence)
-{
- struct radeon_ring *ring = &rdev->ring[fence->ring];
- u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
- u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
- SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
- u32 ref_and_mask;
-
- if (fence->ring == R600_RING_TYPE_DMA_INDEX)
- ref_and_mask = SDMA0;
- else
- ref_and_mask = SDMA1;
-
- /* write the fence */
- radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
- radeon_ring_write(ring, addr & 0xffffffff);
- radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
- radeon_ring_write(ring, fence->seq);
- /* generate an interrupt */
- radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
- /* flush HDP */
- radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
- radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
- radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
- radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
- radeon_ring_write(ring, ref_and_mask); /* MASK */
- radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
-}
-
-/**
- * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
- *
- * @rdev: radeon_device pointer
- * @ring: radeon_ring structure holding ring information
- * @semaphore: radeon semaphore object
- * @emit_wait: wait or signal semaphore
- *
- * Add a DMA semaphore packet to the ring wait on or signal
- * other rings (CIK).
- */
-void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
- struct radeon_ring *ring,
- struct radeon_semaphore *semaphore,
- bool emit_wait)
-{
- u64 addr = semaphore->gpu_addr;
- u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
-
- radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
- radeon_ring_write(ring, addr & 0xfffffff8);
- radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
-}
-
-/**
- * cik_sdma_gfx_stop - stop the gfx async dma engines
- *
- * @rdev: radeon_device pointer
- *
- * Stop the gfx async dma ring buffers (CIK).
- */
-static void cik_sdma_gfx_stop(struct radeon_device *rdev)
-{
- u32 rb_cntl, reg_offset;
- int i;
-
- radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
-
- for (i = 0; i < 2; i++) {
- if (i == 0)
- reg_offset = SDMA0_REGISTER_OFFSET;
- else
- reg_offset = SDMA1_REGISTER_OFFSET;
- rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset);
- rb_cntl &= ~SDMA_RB_ENABLE;
- WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
- WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0);
- }
-}
-
-/**
- * cik_sdma_rlc_stop - stop the compute async dma engines
- *
- * @rdev: radeon_device pointer
- *
- * Stop the compute async dma queues (CIK).
- */
-static void cik_sdma_rlc_stop(struct radeon_device *rdev)
-{
- /* XXX todo */
-}
-
-/**
- * cik_sdma_enable - stop the async dma engines
- *
- * @rdev: radeon_device pointer
- * @enable: enable/disable the DMA MEs.
- *
- * Halt or unhalt the async dma engines (CIK).
- */
-static void cik_sdma_enable(struct radeon_device *rdev, bool enable)
-{
- u32 me_cntl, reg_offset;
- int i;
-
- for (i = 0; i < 2; i++) {
- if (i == 0)
- reg_offset = SDMA0_REGISTER_OFFSET;
- else
- reg_offset = SDMA1_REGISTER_OFFSET;
- me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset);
- if (enable)
- me_cntl &= ~SDMA_HALT;
- else
- me_cntl |= SDMA_HALT;
- WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl);
- }
-}
-
-/**
- * cik_sdma_gfx_resume - setup and start the async dma engines
- *
- * @rdev: radeon_device pointer
- *
- * Set up the gfx DMA ring buffers and enable them (CIK).
- * Returns 0 for success, error for failure.
- */
-static int cik_sdma_gfx_resume(struct radeon_device *rdev)
-{
- struct radeon_ring *ring;
- u32 rb_cntl, ib_cntl;
- u32 rb_bufsz;
- u32 reg_offset, wb_offset;
- int i, r;
-
- for (i = 0; i < 2; i++) {
- if (i == 0) {
- ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
- reg_offset = SDMA0_REGISTER_OFFSET;
- wb_offset = R600_WB_DMA_RPTR_OFFSET;
- } else {
- ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
- reg_offset = SDMA1_REGISTER_OFFSET;
- wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
- }
-
- WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
- WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
-
- /* Set ring buffer size in dwords */
- rb_bufsz = drm_order(ring->ring_size / 4);
- rb_cntl = rb_bufsz << 1;
-#ifdef __BIG_ENDIAN
- rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE;
-#endif
- WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
-
- /* Initialize the ring buffer's read and write pointers */
- WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0);
- WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0);
-
- /* set the wb address whether it's enabled or not */
- WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset,
- upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
- WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset,
- ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
-
- if (rdev->wb.enabled)
- rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE;
-
- WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8);
- WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40);
-
- ring->wptr = 0;
- WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2);
-
- ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2;
-
- /* enable DMA RB */
- WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE);
-
- ib_cntl = SDMA_IB_ENABLE;
-#ifdef __BIG_ENDIAN
- ib_cntl |= SDMA_IB_SWAP_ENABLE;
-#endif
- /* enable DMA IBs */
- WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl);
-
- ring->ready = true;
-
- r = radeon_ring_test(rdev, ring->idx, ring);
- if (r) {
- ring->ready = false;
- return r;
- }
- }
-
- radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
-
- return 0;
-}
-
-/**
- * cik_sdma_rlc_resume - setup and start the async dma engines
- *
- * @rdev: radeon_device pointer
- *
- * Set up the compute DMA queues and enable them (CIK).
- * Returns 0 for success, error for failure.
- */
-static int cik_sdma_rlc_resume(struct radeon_device *rdev)
-{
- /* XXX todo */
- return 0;
-}
-
-/**
- * cik_sdma_load_microcode - load the sDMA ME ucode
- *
- * @rdev: radeon_device pointer
- *
- * Loads the sDMA0/1 ucode.
- * Returns 0 for success, -EINVAL if the ucode is not available.
- */
-static int cik_sdma_load_microcode(struct radeon_device *rdev)
-{
- const __be32 *fw_data;
- int i;
-
- if (!rdev->sdma_fw)
- return -EINVAL;
-
- /* stop the gfx rings and rlc compute queues */
- cik_sdma_gfx_stop(rdev);
- cik_sdma_rlc_stop(rdev);
-
- /* halt the MEs */
- cik_sdma_enable(rdev, false);
-
- /* sdma0 */
- fw_data = (const __be32 *)rdev->sdma_fw->data;
- WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
- for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
- WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++));
- WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
-
- /* sdma1 */
- fw_data = (const __be32 *)rdev->sdma_fw->data;
- WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
- for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
- WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++));
- WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
-
- WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
- WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
- return 0;
-}
-
-/**
- * cik_sdma_resume - setup and start the async dma engines
- *
- * @rdev: radeon_device pointer
- *
- * Set up the DMA engines and enable them (CIK).
- * Returns 0 for success, error for failure.
- */
-static int cik_sdma_resume(struct radeon_device *rdev)
-{
- int r;
-
- /* Reset dma */
- WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
- RREG32(SRBM_SOFT_RESET);
- udelay(50);
- WREG32(SRBM_SOFT_RESET, 0);
- RREG32(SRBM_SOFT_RESET);
-
- r = cik_sdma_load_microcode(rdev);
- if (r)
- return r;
-
- /* unhalt the MEs */
- cik_sdma_enable(rdev, true);
-
- /* start the gfx rings and rlc compute queues */
- r = cik_sdma_gfx_resume(rdev);
- if (r)
- return r;
- r = cik_sdma_rlc_resume(rdev);
- if (r)
- return r;
-
- return 0;
-}
-
-/**
- * cik_sdma_fini - tear down the async dma engines
- *
- * @rdev: radeon_device pointer
- *
- * Stop the async dma engines and free the rings (CIK).
- */
-static void cik_sdma_fini(struct radeon_device *rdev)
-{
- /* stop the gfx rings and rlc compute queues */
- cik_sdma_gfx_stop(rdev);
- cik_sdma_rlc_stop(rdev);
- /* halt the MEs */
- cik_sdma_enable(rdev, false);
- radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
- radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
- /* XXX - compute dma queue tear down */
-}
-
-/**
- * cik_copy_dma - copy pages using the DMA engine
- *
- * @rdev: radeon_device pointer
- * @src_offset: src GPU address
- * @dst_offset: dst GPU address
- * @num_gpu_pages: number of GPU pages to xfer
- * @fence: radeon fence object
- *
- * Copy GPU paging using the DMA engine (CIK).
- * Used by the radeon ttm implementation to move pages if
- * registered as the asic copy callback.
- */
-int cik_copy_dma(struct radeon_device *rdev,
- uint64_t src_offset, uint64_t dst_offset,
- unsigned num_gpu_pages,
- struct radeon_fence **fence)
-{
- struct radeon_semaphore *sem = NULL;
- int ring_index = rdev->asic->copy.dma_ring_index;
- struct radeon_ring *ring = &rdev->ring[ring_index];
- u32 size_in_bytes, cur_size_in_bytes;
- int i, num_loops;
- int r = 0;
-
- r = radeon_semaphore_create(rdev, &sem);
- if (r) {
- DRM_ERROR("radeon: moving bo (%d).\n", r);
- return r;
- }
-
- size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
- num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
- r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
- if (r) {
- DRM_ERROR("radeon: moving bo (%d).\n", r);
- radeon_semaphore_free(rdev, &sem, NULL);
- return r;
- }
-
- if (radeon_fence_need_sync(*fence, ring->idx)) {
- radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
- ring->idx);
- radeon_fence_note_sync(*fence, ring->idx);
- } else {
- radeon_semaphore_free(rdev, &sem, NULL);
- }
-
- for (i = 0; i < num_loops; i++) {
- cur_size_in_bytes = size_in_bytes;
- if (cur_size_in_bytes > 0x1fffff)
- cur_size_in_bytes = 0x1fffff;
- size_in_bytes -= cur_size_in_bytes;
- radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
- radeon_ring_write(ring, cur_size_in_bytes);
- radeon_ring_write(ring, 0); /* src/dst endian swap */
- radeon_ring_write(ring, src_offset & 0xffffffff);
- radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff);
- radeon_ring_write(ring, dst_offset & 0xfffffffc);
- radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff);
- src_offset += cur_size_in_bytes;
- dst_offset += cur_size_in_bytes;
- }
-
- r = radeon_fence_emit(rdev, fence, ring->idx);
- if (r) {
- radeon_ring_unlock_undo(rdev, ring);
- return r;
- }
-
- radeon_ring_unlock_commit(rdev, ring);
- radeon_semaphore_free(rdev, &sem, *fence);
-
- return r;
-}
-
-/**
- * cik_sdma_ring_test - simple async dma engine test
- *
- * @rdev: radeon_device pointer
- * @ring: radeon_ring structure holding ring information
- *
- * Test the DMA engine by writing using it to write an
- * value to memory. (CIK).
- * Returns 0 for success, error for failure.
- */
-int cik_sdma_ring_test(struct radeon_device *rdev,
- struct radeon_ring *ring)
-{
- unsigned i;
- int r;
- void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
- u32 tmp;
-
- if (!ptr) {
- DRM_ERROR("invalid vram scratch pointer\n");
- return -EINVAL;
- }
-
- tmp = 0xCAFEDEAD;
- writel(tmp, ptr);
-
- r = radeon_ring_lock(rdev, ring, 4);
- if (r) {
- DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
- return r;
- }
- radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
- radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
- radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff);
- radeon_ring_write(ring, 1); /* number of DWs to follow */
- radeon_ring_write(ring, 0xDEADBEEF);
- radeon_ring_unlock_commit(rdev, ring);
-
- for (i = 0; i < rdev->usec_timeout; i++) {
- tmp = readl(ptr);
- if (tmp == 0xDEADBEEF)
- break;
- DRM_UDELAY(1);
- }
-
- if (i < rdev->usec_timeout) {
- DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
- } else {
- DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
- ring->idx, tmp);
- r = -EINVAL;
- }
- return r;
-}
-
-/**
- * cik_sdma_ib_test - test an IB on the DMA engine
- *
- * @rdev: radeon_device pointer
- * @ring: radeon_ring structure holding ring information
- *
- * Test a simple IB in the DMA ring (CIK).
- * Returns 0 on success, error on failure.
- */
-int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
-{
- struct radeon_ib ib;
- unsigned i;
- int r;
- void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
- u32 tmp = 0;
-
- if (!ptr) {
- DRM_ERROR("invalid vram scratch pointer\n");
- return -EINVAL;
- }
-
- tmp = 0xCAFEDEAD;
- writel(tmp, ptr);
-
- r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
- if (r) {
- DRM_ERROR("radeon: failed to get ib (%d).\n", r);
- return r;
- }
-
- ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
- ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
- ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff;
- ib.ptr[3] = 1;
- ib.ptr[4] = 0xDEADBEEF;
- ib.length_dw = 5;
-
- r = radeon_ib_schedule(rdev, &ib, NULL);
- if (r) {
- radeon_ib_free(rdev, &ib);
- DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
- return r;
- }
- r = radeon_fence_wait(ib.fence, false);
- if (r) {
- DRM_ERROR("radeon: fence wait failed (%d).\n", r);
- return r;
- }
- for (i = 0; i < rdev->usec_timeout; i++) {
- tmp = readl(ptr);
- if (tmp == 0xDEADBEEF)
- break;
- DRM_UDELAY(1);
- }
- if (i < rdev->usec_timeout) {
- DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
- } else {
- DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
- r = -EINVAL;
- }
- radeon_ib_free(rdev, &ib);
- return r;
-}
-
-
static void cik_print_gpu_status_regs(struct radeon_device *rdev)
{
dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
@@ -3796,7 +4076,7 @@ static void cik_print_gpu_status_regs(struct radeon_device *rdev)
* mask to be used by cik_gpu_soft_reset().
* Returns a mask of the blocks to be reset.
*/
-static u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
+u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
{
u32 reset_mask = 0;
u32 tmp;
@@ -4047,34 +4327,6 @@ bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
return radeon_ring_test_lockup(rdev, ring);
}
-/**
- * cik_sdma_is_lockup - Check if the DMA engine is locked up
- *
- * @rdev: radeon_device pointer
- * @ring: radeon_ring structure holding ring information
- *
- * Check if the async DMA engine is locked up (CIK).
- * Returns true if the engine appears to be locked up, false if not.
- */
-bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
-{
- u32 reset_mask = cik_gpu_check_soft_reset(rdev);
- u32 mask;
-
- if (ring->idx == R600_RING_TYPE_DMA_INDEX)
- mask = RADEON_RESET_DMA;
- else
- mask = RADEON_RESET_DMA1;
-
- if (!(reset_mask & mask)) {
- radeon_ring_lockup_update(ring);
- return false;
- }
- /* force ring activities */
- radeon_ring_force_activity(rdev, ring);
- return radeon_ring_test_lockup(rdev, ring);
-}
-
/* MC */
/**
* cik_mc_program - program the GPU memory controller
@@ -4331,6 +4583,7 @@ static int cik_pcie_gart_enable(struct radeon_device *rdev)
/* XXX SH_MEM regs */
/* where to put LDS, scratch, GPUVM in FSA64 space */
+ mutex_lock(&rdev->srbm_mutex);
for (i = 0; i < 16; i++) {
cik_srbm_select(rdev, 0, 0, 0, i);
/* CP and shaders */
@@ -4346,6 +4599,7 @@ static int cik_pcie_gart_enable(struct radeon_device *rdev)
/* XXX SDMA RLC - todo */
}
cik_srbm_select(rdev, 0, 0, 0, 0);
+ mutex_unlock(&rdev->srbm_mutex);
cik_pcie_gart_tlb_flush(rdev);
DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
@@ -4453,6 +4707,29 @@ void cik_vm_fini(struct radeon_device *rdev)
}
/**
+ * cik_vm_decode_fault - print human readable fault info
+ *
+ * @rdev: radeon_device pointer
+ * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
+ * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
+ *
+ * Print human readable fault information (CIK).
+ */
+static void cik_vm_decode_fault(struct radeon_device *rdev,
+ u32 status, u32 addr, u32 mc_client)
+{
+ u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
+ u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
+ u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
+ char *block = (char *)&mc_client;
+
+ printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
+ protections, vmid, addr,
+ (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
+ block, mc_id);
+}
+
+/**
* cik_vm_flush - cik vm flush using the CP
*
* @rdev: radeon_device pointer
@@ -4586,131 +4863,8 @@ void cik_vm_set_page(struct radeon_device *rdev,
}
} else {
/* DMA */
- if (flags & RADEON_VM_PAGE_SYSTEM) {
- while (count) {
- ndw = count * 2;
- if (ndw > 0xFFFFE)
- ndw = 0xFFFFE;
-
- /* for non-physically contiguous pages (system) */
- ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
- ib->ptr[ib->length_dw++] = pe;
- ib->ptr[ib->length_dw++] = upper_32_bits(pe);
- ib->ptr[ib->length_dw++] = ndw;
- for (; ndw > 0; ndw -= 2, --count, pe += 8) {
- if (flags & RADEON_VM_PAGE_SYSTEM) {
- value = radeon_vm_map_gart(rdev, addr);
- value &= 0xFFFFFFFFFFFFF000ULL;
- } else if (flags & RADEON_VM_PAGE_VALID) {
- value = addr;
- } else {
- value = 0;
- }
- addr += incr;
- value |= r600_flags;
- ib->ptr[ib->length_dw++] = value;
- ib->ptr[ib->length_dw++] = upper_32_bits(value);
- }
- }
- } else {
- while (count) {
- ndw = count;
- if (ndw > 0x7FFFF)
- ndw = 0x7FFFF;
-
- if (flags & RADEON_VM_PAGE_VALID)
- value = addr;
- else
- value = 0;
- /* for physically contiguous pages (vram) */
- ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
- ib->ptr[ib->length_dw++] = pe; /* dst addr */
- ib->ptr[ib->length_dw++] = upper_32_bits(pe);
- ib->ptr[ib->length_dw++] = r600_flags; /* mask */
- ib->ptr[ib->length_dw++] = 0;
- ib->ptr[ib->length_dw++] = value; /* value */
- ib->ptr[ib->length_dw++] = upper_32_bits(value);
- ib->ptr[ib->length_dw++] = incr; /* increment size */
- ib->ptr[ib->length_dw++] = 0;
- ib->ptr[ib->length_dw++] = ndw; /* number of entries */
- pe += ndw * 8;
- addr += ndw * incr;
- count -= ndw;
- }
- }
- while (ib->length_dw & 0x7)
- ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
- }
-}
-
-/**
- * cik_dma_vm_flush - cik vm flush using sDMA
- *
- * @rdev: radeon_device pointer
- *
- * Update the page table base and flush the VM TLB
- * using sDMA (CIK).
- */
-void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
-{
- struct radeon_ring *ring = &rdev->ring[ridx];
- u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
- SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
- u32 ref_and_mask;
-
- if (vm == NULL)
- return;
-
- if (ridx == R600_RING_TYPE_DMA_INDEX)
- ref_and_mask = SDMA0;
- else
- ref_and_mask = SDMA1;
-
- radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
- if (vm->id < 8) {
- radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
- } else {
- radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
+ cik_sdma_vm_set_page(rdev, ib, pe, addr, count, incr, flags);
}
- radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
-
- /* update SH_MEM_* regs */
- radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
- radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
- radeon_ring_write(ring, VMID(vm->id));
-
- radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
- radeon_ring_write(ring, SH_MEM_BASES >> 2);
- radeon_ring_write(ring, 0);
-
- radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
- radeon_ring_write(ring, SH_MEM_CONFIG >> 2);
- radeon_ring_write(ring, 0);
-
- radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
- radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2);
- radeon_ring_write(ring, 1);
-
- radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
- radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2);
- radeon_ring_write(ring, 0);
-
- radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
- radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
- radeon_ring_write(ring, VMID(0));
-
- /* flush HDP */
- radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
- radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
- radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
- radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
- radeon_ring_write(ring, ref_and_mask); /* MASK */
- radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
-
- /* flush TLB */
- radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
- radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
- radeon_ring_write(ring, 1 << vm->id);
}
/*
@@ -4719,31 +4873,34 @@ void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm
* variety of functions, the most important of which is
* the interrupt controller.
*/
-/**
- * cik_rlc_stop - stop the RLC ME
- *
- * @rdev: radeon_device pointer
- *
- * Halt the RLC ME (MicroEngine) (CIK).
- */
-static void cik_rlc_stop(struct radeon_device *rdev)
+static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
+ bool enable)
{
- int i, j, k;
- u32 mask, tmp;
+ u32 tmp = RREG32(CP_INT_CNTL_RING0);
- tmp = RREG32(CP_INT_CNTL_RING0);
- tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
+ if (enable)
+ tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
+ else
+ tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
WREG32(CP_INT_CNTL_RING0, tmp);
+}
- RREG32(CB_CGTT_SCLK_CTRL);
- RREG32(CB_CGTT_SCLK_CTRL);
- RREG32(CB_CGTT_SCLK_CTRL);
- RREG32(CB_CGTT_SCLK_CTRL);
+static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
+{
+ u32 tmp;
- tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
- WREG32(RLC_CGCG_CGLS_CTRL, tmp);
+ tmp = RREG32(RLC_LB_CNTL);
+ if (enable)
+ tmp |= LOAD_BALANCE_ENABLE;
+ else
+ tmp &= ~LOAD_BALANCE_ENABLE;
+ WREG32(RLC_LB_CNTL, tmp);
+}
- WREG32(RLC_CNTL, 0);
+static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
+{
+ u32 i, j, k;
+ u32 mask;
for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
@@ -4765,6 +4922,84 @@ static void cik_rlc_stop(struct radeon_device *rdev)
}
}
+static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
+{
+ u32 tmp;
+
+ tmp = RREG32(RLC_CNTL);
+ if (tmp != rlc)
+ WREG32(RLC_CNTL, rlc);
+}
+
+static u32 cik_halt_rlc(struct radeon_device *rdev)
+{
+ u32 data, orig;
+
+ orig = data = RREG32(RLC_CNTL);
+
+ if (data & RLC_ENABLE) {
+ u32 i;
+
+ data &= ~RLC_ENABLE;
+ WREG32(RLC_CNTL, data);
+
+ for (i = 0; i < rdev->usec_timeout; i++) {
+ if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
+ break;
+ udelay(1);
+ }
+
+ cik_wait_for_rlc_serdes(rdev);
+ }
+
+ return orig;
+}
+
+void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
+{
+ u32 tmp, i, mask;
+
+ tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
+ WREG32(RLC_GPR_REG2, tmp);
+
+ mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
+ for (i = 0; i < rdev->usec_timeout; i++) {
+ if ((RREG32(RLC_GPM_STAT) & mask) == mask)
+ break;
+ udelay(1);
+ }
+
+ for (i = 0; i < rdev->usec_timeout; i++) {
+ if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
+ break;
+ udelay(1);
+ }
+}
+
+void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
+{
+ u32 tmp;
+
+ tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
+ WREG32(RLC_GPR_REG2, tmp);
+}
+
+/**
+ * cik_rlc_stop - stop the RLC ME
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Halt the RLC ME (MicroEngine) (CIK).
+ */
+static void cik_rlc_stop(struct radeon_device *rdev)
+{
+ WREG32(RLC_CNTL, 0);
+
+ cik_enable_gui_idle_interrupt(rdev, false);
+
+ cik_wait_for_rlc_serdes(rdev);
+}
+
/**
* cik_rlc_start - start the RLC ME
*
@@ -4774,13 +5009,9 @@ static void cik_rlc_stop(struct radeon_device *rdev)
*/
static void cik_rlc_start(struct radeon_device *rdev)
{
- u32 tmp;
-
WREG32(RLC_CNTL, RLC_ENABLE);
- tmp = RREG32(CP_INT_CNTL_RING0);
- tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
- WREG32(CP_INT_CNTL_RING0, tmp);
+ cik_enable_gui_idle_interrupt(rdev, true);
udelay(50);
}
@@ -4796,8 +5027,7 @@ static void cik_rlc_start(struct radeon_device *rdev)
*/
static int cik_rlc_resume(struct radeon_device *rdev)
{
- u32 i, size;
- u32 clear_state_info[3];
+ u32 i, size, tmp;
const __be32 *fw_data;
if (!rdev->rlc_fw)
@@ -4818,12 +5048,15 @@ static int cik_rlc_resume(struct radeon_device *rdev)
cik_rlc_stop(rdev);
- WREG32(GRBM_SOFT_RESET, SOFT_RESET_RLC);
- RREG32(GRBM_SOFT_RESET);
- udelay(50);
- WREG32(GRBM_SOFT_RESET, 0);
- RREG32(GRBM_SOFT_RESET);
- udelay(50);
+ /* disable CG */
+ tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
+ WREG32(RLC_CGCG_CGLS_CTRL, tmp);
+
+ si_rlc_reset(rdev);
+
+ cik_init_pg(rdev);
+
+ cik_init_cg(rdev);
WREG32(RLC_LB_CNTR_INIT, 0);
WREG32(RLC_LB_CNTR_MAX, 0x00008000);
@@ -4842,20 +5075,757 @@ static int cik_rlc_resume(struct radeon_device *rdev)
WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
WREG32(RLC_GPM_UCODE_ADDR, 0);
- /* XXX */
- clear_state_info[0] = 0;//upper_32_bits(rdev->rlc.save_restore_gpu_addr);
- clear_state_info[1] = 0;//rdev->rlc.save_restore_gpu_addr;
- clear_state_info[2] = 0;//cik_default_size;
- WREG32(RLC_GPM_SCRATCH_ADDR, 0x3d);
- for (i = 0; i < 3; i++)
- WREG32(RLC_GPM_SCRATCH_DATA, clear_state_info[i]);
- WREG32(RLC_DRIVER_DMA_STATUS, 0);
+ /* XXX - find out what chips support lbpw */
+ cik_enable_lbpw(rdev, false);
+
+ if (rdev->family == CHIP_BONAIRE)
+ WREG32(RLC_DRIVER_DMA_STATUS, 0);
cik_rlc_start(rdev);
return 0;
}
+static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
+{
+ u32 data, orig, tmp, tmp2;
+
+ orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
+
+ if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
+ cik_enable_gui_idle_interrupt(rdev, true);
+
+ tmp = cik_halt_rlc(rdev);
+
+ cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
+ WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
+ WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
+ tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
+ WREG32(RLC_SERDES_WR_CTRL, tmp2);
+
+ cik_update_rlc(rdev, tmp);
+
+ data |= CGCG_EN | CGLS_EN;
+ } else {
+ cik_enable_gui_idle_interrupt(rdev, false);
+
+ RREG32(CB_CGTT_SCLK_CTRL);
+ RREG32(CB_CGTT_SCLK_CTRL);
+ RREG32(CB_CGTT_SCLK_CTRL);
+ RREG32(CB_CGTT_SCLK_CTRL);
+
+ data &= ~(CGCG_EN | CGLS_EN);
+ }
+
+ if (orig != data)
+ WREG32(RLC_CGCG_CGLS_CTRL, data);
+
+}
+
+static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
+{
+ u32 data, orig, tmp = 0;
+
+ if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
+ if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
+ if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
+ orig = data = RREG32(CP_MEM_SLP_CNTL);
+ data |= CP_MEM_LS_EN;
+ if (orig != data)
+ WREG32(CP_MEM_SLP_CNTL, data);
+ }
+ }
+
+ orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
+ data &= 0xfffffffd;
+ if (orig != data)
+ WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
+
+ tmp = cik_halt_rlc(rdev);
+
+ cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
+ WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
+ WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
+ data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
+ WREG32(RLC_SERDES_WR_CTRL, data);
+
+ cik_update_rlc(rdev, tmp);
+
+ if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
+ orig = data = RREG32(CGTS_SM_CTRL_REG);
+ data &= ~SM_MODE_MASK;
+ data |= SM_MODE(0x2);
+ data |= SM_MODE_ENABLE;
+ data &= ~CGTS_OVERRIDE;
+ if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
+ (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
+ data &= ~CGTS_LS_OVERRIDE;
+ data &= ~ON_MONITOR_ADD_MASK;
+ data |= ON_MONITOR_ADD_EN;
+ data |= ON_MONITOR_ADD(0x96);
+ if (orig != data)
+ WREG32(CGTS_SM_CTRL_REG, data);
+ }
+ } else {
+ orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
+ data |= 0x00000002;
+ if (orig != data)
+ WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
+
+ data = RREG32(RLC_MEM_SLP_CNTL);
+ if (data & RLC_MEM_LS_EN) {
+ data &= ~RLC_MEM_LS_EN;
+ WREG32(RLC_MEM_SLP_CNTL, data);
+ }
+
+ data = RREG32(CP_MEM_SLP_CNTL);
+ if (data & CP_MEM_LS_EN) {
+ data &= ~CP_MEM_LS_EN;
+ WREG32(CP_MEM_SLP_CNTL, data);
+ }
+
+ orig = data = RREG32(CGTS_SM_CTRL_REG);
+ data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
+ if (orig != data)
+ WREG32(CGTS_SM_CTRL_REG, data);
+
+ tmp = cik_halt_rlc(rdev);
+
+ cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
+ WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
+ WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
+ data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
+ WREG32(RLC_SERDES_WR_CTRL, data);
+
+ cik_update_rlc(rdev, tmp);
+ }
+}
+
+static const u32 mc_cg_registers[] =
+{
+ MC_HUB_MISC_HUB_CG,
+ MC_HUB_MISC_SIP_CG,
+ MC_HUB_MISC_VM_CG,
+ MC_XPB_CLK_GAT,
+ ATC_MISC_CG,
+ MC_CITF_MISC_WR_CG,
+ MC_CITF_MISC_RD_CG,
+ MC_CITF_MISC_VM_CG,
+ VM_L2_CG,
+};
+
+static void cik_enable_mc_ls(struct radeon_device *rdev,
+ bool enable)
+{
+ int i;
+ u32 orig, data;
+
+ for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
+ orig = data = RREG32(mc_cg_registers[i]);
+ if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
+ data |= MC_LS_ENABLE;
+ else
+ data &= ~MC_LS_ENABLE;
+ if (data != orig)
+ WREG32(mc_cg_registers[i], data);
+ }
+}
+
+static void cik_enable_mc_mgcg(struct radeon_device *rdev,
+ bool enable)
+{
+ int i;
+ u32 orig, data;
+
+ for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
+ orig = data = RREG32(mc_cg_registers[i]);
+ if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
+ data |= MC_CG_ENABLE;
+ else
+ data &= ~MC_CG_ENABLE;
+ if (data != orig)
+ WREG32(mc_cg_registers[i], data);
+ }
+}
+
+static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
+ bool enable)
+{
+ u32 orig, data;
+
+ if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
+ WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
+ WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
+ } else {
+ orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
+ data |= 0xff000000;
+ if (data != orig)
+ WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
+
+ orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
+ data |= 0xff000000;
+ if (data != orig)
+ WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
+ }
+}
+
+static void cik_enable_sdma_mgls(struct radeon_device *rdev,
+ bool enable)
+{
+ u32 orig, data;
+
+ if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
+ orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
+ data |= 0x100;
+ if (orig != data)
+ WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
+
+ orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
+ data |= 0x100;
+ if (orig != data)
+ WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
+ } else {
+ orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
+ data &= ~0x100;
+ if (orig != data)
+ WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
+
+ orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
+ data &= ~0x100;
+ if (orig != data)
+ WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
+ }
+}
+
+static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
+ bool enable)
+{
+ u32 orig, data;
+
+ if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
+ data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
+ data = 0xfff;
+ WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
+
+ orig = data = RREG32(UVD_CGC_CTRL);
+ data |= DCM;
+ if (orig != data)
+ WREG32(UVD_CGC_CTRL, data);
+ } else {
+ data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
+ data &= ~0xfff;
+ WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
+
+ orig = data = RREG32(UVD_CGC_CTRL);
+ data &= ~DCM;
+ if (orig != data)
+ WREG32(UVD_CGC_CTRL, data);
+ }
+}
+
+static void cik_enable_bif_mgls(struct radeon_device *rdev,
+ bool enable)
+{
+ u32 orig, data;
+
+ orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
+
+ if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
+ data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
+ REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
+ else
+ data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
+ REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
+
+ if (orig != data)
+ WREG32_PCIE_PORT(PCIE_CNTL2, data);
+}
+
+static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
+ bool enable)
+{
+ u32 orig, data;
+
+ orig = data = RREG32(HDP_HOST_PATH_CNTL);
+
+ if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
+ data &= ~CLOCK_GATING_DIS;
+ else
+ data |= CLOCK_GATING_DIS;
+
+ if (orig != data)
+ WREG32(HDP_HOST_PATH_CNTL, data);
+}
+
+static void cik_enable_hdp_ls(struct radeon_device *rdev,
+ bool enable)
+{
+ u32 orig, data;
+
+ orig = data = RREG32(HDP_MEM_POWER_LS);
+
+ if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
+ data |= HDP_LS_ENABLE;
+ else
+ data &= ~HDP_LS_ENABLE;
+
+ if (orig != data)
+ WREG32(HDP_MEM_POWER_LS, data);
+}
+
+void cik_update_cg(struct radeon_device *rdev,
+ u32 block, bool enable)
+{
+ if (block & RADEON_CG_BLOCK_GFX) {
+ /* order matters! */
+ if (enable) {
+ cik_enable_mgcg(rdev, true);
+ cik_enable_cgcg(rdev, true);
+ } else {
+ cik_enable_cgcg(rdev, false);
+ cik_enable_mgcg(rdev, false);
+ }
+ }
+
+ if (block & RADEON_CG_BLOCK_MC) {
+ if (!(rdev->flags & RADEON_IS_IGP)) {
+ cik_enable_mc_mgcg(rdev, enable);
+ cik_enable_mc_ls(rdev, enable);
+ }
+ }
+
+ if (block & RADEON_CG_BLOCK_SDMA) {
+ cik_enable_sdma_mgcg(rdev, enable);
+ cik_enable_sdma_mgls(rdev, enable);
+ }
+
+ if (block & RADEON_CG_BLOCK_BIF) {
+ cik_enable_bif_mgls(rdev, enable);
+ }
+
+ if (block & RADEON_CG_BLOCK_UVD) {
+ if (rdev->has_uvd)
+ cik_enable_uvd_mgcg(rdev, enable);
+ }
+
+ if (block & RADEON_CG_BLOCK_HDP) {
+ cik_enable_hdp_mgcg(rdev, enable);
+ cik_enable_hdp_ls(rdev, enable);
+ }
+}
+
+static void cik_init_cg(struct radeon_device *rdev)
+{
+
+ cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
+
+ if (rdev->has_uvd)
+ si_init_uvd_internal_cg(rdev);
+
+ cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
+ RADEON_CG_BLOCK_SDMA |
+ RADEON_CG_BLOCK_BIF |
+ RADEON_CG_BLOCK_UVD |
+ RADEON_CG_BLOCK_HDP), true);
+}
+
+static void cik_fini_cg(struct radeon_device *rdev)
+{
+ cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
+ RADEON_CG_BLOCK_SDMA |
+ RADEON_CG_BLOCK_BIF |
+ RADEON_CG_BLOCK_UVD |
+ RADEON_CG_BLOCK_HDP), false);
+
+ cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
+}
+
+static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
+ bool enable)
+{
+ u32 data, orig;
+
+ orig = data = RREG32(RLC_PG_CNTL);
+ if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
+ data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
+ else
+ data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
+ if (orig != data)
+ WREG32(RLC_PG_CNTL, data);
+}
+
+static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
+ bool enable)
+{
+ u32 data, orig;
+
+ orig = data = RREG32(RLC_PG_CNTL);
+ if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
+ data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
+ else
+ data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
+ if (orig != data)
+ WREG32(RLC_PG_CNTL, data);
+}
+
+static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
+{
+ u32 data, orig;
+
+ orig = data = RREG32(RLC_PG_CNTL);
+ if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
+ data &= ~DISABLE_CP_PG;
+ else
+ data |= DISABLE_CP_PG;
+ if (orig != data)
+ WREG32(RLC_PG_CNTL, data);
+}
+
+static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
+{
+ u32 data, orig;
+
+ orig = data = RREG32(RLC_PG_CNTL);
+ if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
+ data &= ~DISABLE_GDS_PG;
+ else
+ data |= DISABLE_GDS_PG;
+ if (orig != data)
+ WREG32(RLC_PG_CNTL, data);
+}
+
+#define CP_ME_TABLE_SIZE 96
+#define CP_ME_TABLE_OFFSET 2048
+#define CP_MEC_TABLE_OFFSET 4096
+
+void cik_init_cp_pg_table(struct radeon_device *rdev)
+{
+ const __be32 *fw_data;
+ volatile u32 *dst_ptr;
+ int me, i, max_me = 4;
+ u32 bo_offset = 0;
+ u32 table_offset;
+
+ if (rdev->family == CHIP_KAVERI)
+ max_me = 5;
+
+ if (rdev->rlc.cp_table_ptr == NULL)
+ return;
+
+ /* write the cp table buffer */
+ dst_ptr = rdev->rlc.cp_table_ptr;
+ for (me = 0; me < max_me; me++) {
+ if (me == 0) {
+ fw_data = (const __be32 *)rdev->ce_fw->data;
+ table_offset = CP_ME_TABLE_OFFSET;
+ } else if (me == 1) {
+ fw_data = (const __be32 *)rdev->pfp_fw->data;
+ table_offset = CP_ME_TABLE_OFFSET;
+ } else if (me == 2) {
+ fw_data = (const __be32 *)rdev->me_fw->data;
+ table_offset = CP_ME_TABLE_OFFSET;
+ } else {
+ fw_data = (const __be32 *)rdev->mec_fw->data;
+ table_offset = CP_MEC_TABLE_OFFSET;
+ }
+
+ for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
+ dst_ptr[bo_offset + i] = be32_to_cpu(fw_data[table_offset + i]);
+ }
+ bo_offset += CP_ME_TABLE_SIZE;
+ }
+}
+
+static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
+ bool enable)
+{
+ u32 data, orig;
+
+ if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_CG)) {
+ orig = data = RREG32(RLC_PG_CNTL);
+ data |= GFX_PG_ENABLE;
+ if (orig != data)
+ WREG32(RLC_PG_CNTL, data);
+
+ orig = data = RREG32(RLC_AUTO_PG_CTRL);
+ data |= AUTO_PG_EN;
+ if (orig != data)
+ WREG32(RLC_AUTO_PG_CTRL, data);
+ } else {
+ orig = data = RREG32(RLC_PG_CNTL);
+ data &= ~GFX_PG_ENABLE;
+ if (orig != data)
+ WREG32(RLC_PG_CNTL, data);
+
+ orig = data = RREG32(RLC_AUTO_PG_CTRL);
+ data &= ~AUTO_PG_EN;
+ if (orig != data)
+ WREG32(RLC_AUTO_PG_CTRL, data);
+
+ data = RREG32(DB_RENDER_CONTROL);
+ }
+}
+
+static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
+{
+ u32 mask = 0, tmp, tmp1;
+ int i;
+
+ cik_select_se_sh(rdev, se, sh);
+ tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
+ tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
+ cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
+
+ tmp &= 0xffff0000;
+
+ tmp |= tmp1;
+ tmp >>= 16;
+
+ for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
+ mask <<= 1;
+ mask |= 1;
+ }
+
+ return (~tmp) & mask;
+}
+
+static void cik_init_ao_cu_mask(struct radeon_device *rdev)
+{
+ u32 i, j, k, active_cu_number = 0;
+ u32 mask, counter, cu_bitmap;
+ u32 tmp = 0;
+
+ for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
+ for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
+ mask = 1;
+ cu_bitmap = 0;
+ counter = 0;
+ for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
+ if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
+ if (counter < 2)
+ cu_bitmap |= mask;
+ counter ++;
+ }
+ mask <<= 1;
+ }
+
+ active_cu_number += counter;
+ tmp |= (cu_bitmap << (i * 16 + j * 8));
+ }
+ }
+
+ WREG32(RLC_PG_AO_CU_MASK, tmp);
+
+ tmp = RREG32(RLC_MAX_PG_CU);
+ tmp &= ~MAX_PU_CU_MASK;
+ tmp |= MAX_PU_CU(active_cu_number);
+ WREG32(RLC_MAX_PG_CU, tmp);
+}
+
+static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
+ bool enable)
+{
+ u32 data, orig;
+
+ orig = data = RREG32(RLC_PG_CNTL);
+ if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
+ data |= STATIC_PER_CU_PG_ENABLE;
+ else
+ data &= ~STATIC_PER_CU_PG_ENABLE;
+ if (orig != data)
+ WREG32(RLC_PG_CNTL, data);
+}
+
+static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
+ bool enable)
+{
+ u32 data, orig;
+
+ orig = data = RREG32(RLC_PG_CNTL);
+ if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
+ data |= DYN_PER_CU_PG_ENABLE;
+ else
+ data &= ~DYN_PER_CU_PG_ENABLE;
+ if (orig != data)
+ WREG32(RLC_PG_CNTL, data);
+}
+
+#define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
+#define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D
+
+static void cik_init_gfx_cgpg(struct radeon_device *rdev)
+{
+ u32 data, orig;
+ u32 i;
+
+ if (rdev->rlc.cs_data) {
+ WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
+ WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
+ WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
+ WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
+ } else {
+ WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
+ for (i = 0; i < 3; i++)
+ WREG32(RLC_GPM_SCRATCH_DATA, 0);
+ }
+ if (rdev->rlc.reg_list) {
+ WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
+ for (i = 0; i < rdev->rlc.reg_list_size; i++)
+ WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
+ }
+
+ orig = data = RREG32(RLC_PG_CNTL);
+ data |= GFX_PG_SRC;
+ if (orig != data)
+ WREG32(RLC_PG_CNTL, data);
+
+ WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
+ WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
+
+ data = RREG32(CP_RB_WPTR_POLL_CNTL);
+ data &= ~IDLE_POLL_COUNT_MASK;
+ data |= IDLE_POLL_COUNT(0x60);
+ WREG32(CP_RB_WPTR_POLL_CNTL, data);
+
+ data = 0x10101010;
+ WREG32(RLC_PG_DELAY, data);
+
+ data = RREG32(RLC_PG_DELAY_2);
+ data &= ~0xff;
+ data |= 0x3;
+ WREG32(RLC_PG_DELAY_2, data);
+
+ data = RREG32(RLC_AUTO_PG_CTRL);
+ data &= ~GRBM_REG_SGIT_MASK;
+ data |= GRBM_REG_SGIT(0x700);
+ WREG32(RLC_AUTO_PG_CTRL, data);
+
+}
+
+static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
+{
+ cik_enable_gfx_cgpg(rdev, enable);
+ cik_enable_gfx_static_mgpg(rdev, enable);
+ cik_enable_gfx_dynamic_mgpg(rdev, enable);
+}
+
+u32 cik_get_csb_size(struct radeon_device *rdev)
+{
+ u32 count = 0;
+ const struct cs_section_def *sect = NULL;
+ const struct cs_extent_def *ext = NULL;
+
+ if (rdev->rlc.cs_data == NULL)
+ return 0;
+
+ /* begin clear state */
+ count += 2;
+ /* context control state */
+ count += 3;
+
+ for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
+ for (ext = sect->section; ext->extent != NULL; ++ext) {
+ if (sect->id == SECT_CONTEXT)
+ count += 2 + ext->reg_count;
+ else
+ return 0;
+ }
+ }
+ /* pa_sc_raster_config/pa_sc_raster_config1 */
+ count += 4;
+ /* end clear state */
+ count += 2;
+ /* clear state */
+ count += 2;
+
+ return count;
+}
+
+void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
+{
+ u32 count = 0, i;
+ const struct cs_section_def *sect = NULL;
+ const struct cs_extent_def *ext = NULL;
+
+ if (rdev->rlc.cs_data == NULL)
+ return;
+ if (buffer == NULL)
+ return;
+
+ buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
+ buffer[count++] = PACKET3_PREAMBLE_BEGIN_CLEAR_STATE;
+
+ buffer[count++] = PACKET3(PACKET3_CONTEXT_CONTROL, 1);
+ buffer[count++] = 0x80000000;
+ buffer[count++] = 0x80000000;
+
+ for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
+ for (ext = sect->section; ext->extent != NULL; ++ext) {
+ if (sect->id == SECT_CONTEXT) {
+ buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count);
+ buffer[count++] = ext->reg_index - 0xa000;
+ for (i = 0; i < ext->reg_count; i++)
+ buffer[count++] = ext->extent[i];
+ } else {
+ return;
+ }
+ }
+ }
+
+ buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
+ buffer[count++] = PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START;
+ switch (rdev->family) {
+ case CHIP_BONAIRE:
+ buffer[count++] = 0x16000012;
+ buffer[count++] = 0x00000000;
+ break;
+ case CHIP_KAVERI:
+ buffer[count++] = 0x00000000; /* XXX */
+ buffer[count++] = 0x00000000;
+ break;
+ case CHIP_KABINI:
+ buffer[count++] = 0x00000000; /* XXX */
+ buffer[count++] = 0x00000000;
+ break;
+ default:
+ buffer[count++] = 0x00000000;
+ buffer[count++] = 0x00000000;
+ break;
+ }
+
+ buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
+ buffer[count++] = PACKET3_PREAMBLE_END_CLEAR_STATE;
+
+ buffer[count++] = PACKET3(PACKET3_CLEAR_STATE, 0);
+ buffer[count++] = 0;
+}
+
+static void cik_init_pg(struct radeon_device *rdev)
+{
+ if (rdev->pg_flags) {
+ cik_enable_sck_slowdown_on_pu(rdev, true);
+ cik_enable_sck_slowdown_on_pd(rdev, true);
+ if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_CG) {
+ cik_init_gfx_cgpg(rdev);
+ cik_enable_cp_pg(rdev, true);
+ cik_enable_gds_pg(rdev, true);
+ }
+ cik_init_ao_cu_mask(rdev);
+ cik_update_gfx_pg(rdev, true);
+ }
+}
+
+static void cik_fini_pg(struct radeon_device *rdev)
+{
+ if (rdev->pg_flags) {
+ cik_update_gfx_pg(rdev, false);
+ if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_CG) {
+ cik_enable_cp_pg(rdev, false);
+ cik_enable_gds_pg(rdev, false);
+ }
+ }
+}
+
/*
* Interrupts
* Starting with r6xx, interrupts are handled via a ring buffer.
@@ -5018,7 +5988,7 @@ static int cik_irq_init(struct radeon_device *rdev)
WREG32(INTERRUPT_CNTL, interrupt_cntl);
WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
- rb_bufsz = drm_order(rdev->ih.ring_size / 4);
+ rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
IH_WPTR_OVERFLOW_CLEAR |
@@ -5074,6 +6044,7 @@ int cik_irq_set(struct radeon_device *rdev)
u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
u32 grbm_int_cntl = 0;
u32 dma_cntl, dma_cntl1;
+ u32 thermal_int;
if (!rdev->irq.installed) {
WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
@@ -5106,6 +6077,13 @@ int cik_irq_set(struct radeon_device *rdev)
cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
+ if (rdev->flags & RADEON_IS_IGP)
+ thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
+ ~(THERM_INTH_MASK | THERM_INTL_MASK);
+ else
+ thermal_int = RREG32_SMC(CG_THERMAL_INT) &
+ ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
+
/* enable CP interrupts on all rings */
if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
DRM_DEBUG("cik_irq_set: sw int gfx\n");
@@ -5263,6 +6241,14 @@ int cik_irq_set(struct radeon_device *rdev)
hpd6 |= DC_HPDx_INT_EN;
}
+ if (rdev->irq.dpm_thermal) {
+ DRM_DEBUG("dpm thermal\n");
+ if (rdev->flags & RADEON_IS_IGP)
+ thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
+ else
+ thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
+ }
+
WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
@@ -5297,6 +6283,11 @@ int cik_irq_set(struct radeon_device *rdev)
WREG32(DC_HPD5_INT_CONTROL, hpd5);
WREG32(DC_HPD6_INT_CONTROL, hpd6);
+ if (rdev->flags & RADEON_IS_IGP)
+ WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
+ else
+ WREG32_SMC(CG_THERMAL_INT, thermal_int);
+
return 0;
}
@@ -5507,6 +6498,8 @@ int cik_irq_process(struct radeon_device *rdev)
u32 ring_index;
bool queue_hotplug = false;
bool queue_reset = false;
+ u32 addr, status, mc_client;
+ bool queue_thermal = false;
if (!rdev->ih.enabled || rdev->shutdown)
return IRQ_NONE;
@@ -5740,13 +6733,21 @@ restart_ih:
break;
}
break;
+ case 124: /* UVD */
+ DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
+ radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
+ break;
case 146:
case 147:
+ addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
+ status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
+ mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
- RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
+ addr);
dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
- RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
+ status);
+ cik_vm_decode_fault(rdev, status, addr, mc_client);
/* reset addr and status */
WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
break;
@@ -5853,6 +6854,19 @@ restart_ih:
break;
}
break;
+ case 230: /* thermal low to high */
+ DRM_DEBUG("IH: thermal low to high\n");
+ rdev->pm.dpm.thermal.high_to_low = false;
+ queue_thermal = true;
+ break;
+ case 231: /* thermal high to low */
+ DRM_DEBUG("IH: thermal high to low\n");
+ rdev->pm.dpm.thermal.high_to_low = true;
+ queue_thermal = true;
+ break;
+ case 233: /* GUI IDLE */
+ DRM_DEBUG("IH: GUI idle\n");
+ break;
case 241: /* SDMA Privileged inst */
case 247: /* SDMA Privileged inst */
DRM_ERROR("Illegal instruction in SDMA command stream\n");
@@ -5892,9 +6906,6 @@ restart_ih:
break;
}
break;
- case 233: /* GUI IDLE */
- DRM_DEBUG("IH: GUI idle\n");
- break;
default:
DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
break;
@@ -5908,6 +6919,8 @@ restart_ih:
schedule_work(&rdev->hotplug_work);
if (queue_reset)
schedule_work(&rdev->reset_work);
+ if (queue_thermal)
+ schedule_work(&rdev->pm.dpm.thermal.work);
rdev->ih.rptr = rptr;
WREG32(IH_RB_RPTR, rdev->ih.rptr);
atomic_set(&rdev->ih.lock, 0);
@@ -5937,6 +6950,18 @@ static int cik_startup(struct radeon_device *rdev)
struct radeon_ring *ring;
int r;
+ /* enable pcie gen2/3 link */
+ cik_pcie_gen3_enable(rdev);
+ /* enable aspm */
+ cik_program_aspm(rdev);
+
+ /* scratch needs to be initialized before MC */
+ r = r600_vram_scratch_init(rdev);
+ if (r)
+ return r;
+
+ cik_mc_program(rdev);
+
if (rdev->flags & RADEON_IS_IGP) {
if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
!rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
@@ -5964,18 +6989,26 @@ static int cik_startup(struct radeon_device *rdev)
}
}
- r = r600_vram_scratch_init(rdev);
- if (r)
- return r;
-
- cik_mc_program(rdev);
r = cik_pcie_gart_enable(rdev);
if (r)
return r;
cik_gpu_init(rdev);
/* allocate rlc buffers */
- r = si_rlc_init(rdev);
+ if (rdev->flags & RADEON_IS_IGP) {
+ if (rdev->family == CHIP_KAVERI) {
+ rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
+ rdev->rlc.reg_list_size =
+ (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
+ } else {
+ rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
+ rdev->rlc.reg_list_size =
+ (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
+ }
+ }
+ rdev->rlc.cs_data = ci_cs_data;
+ rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
+ r = sumo_rlc_init(rdev);
if (r) {
DRM_ERROR("Failed to init rlc BOs!\n");
return r;
@@ -6023,12 +7056,15 @@ static int cik_startup(struct radeon_device *rdev)
return r;
}
- r = cik_uvd_resume(rdev);
+ r = radeon_uvd_resume(rdev);
if (!r) {
- r = radeon_fence_driver_start_ring(rdev,
- R600_RING_TYPE_UVD_INDEX);
- if (r)
- dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
+ r = uvd_v4_2_resume(rdev);
+ if (!r) {
+ r = radeon_fence_driver_start_ring(rdev,
+ R600_RING_TYPE_UVD_INDEX);
+ if (r)
+ dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
+ }
}
if (r)
rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
@@ -6051,7 +7087,7 @@ static int cik_startup(struct radeon_device *rdev)
ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
CP_RB0_RPTR, CP_RB0_WPTR,
- 0, 0xfffff, RADEON_CP_PACKET2);
+ RADEON_CP_PACKET2);
if (r)
return r;
@@ -6060,7 +7096,7 @@ static int cik_startup(struct radeon_device *rdev)
ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
- 0, 0xfffff, PACKET3(PACKET3_NOP, 0x3FFF));
+ PACKET3(PACKET3_NOP, 0x3FFF));
if (r)
return r;
ring->me = 1; /* first MEC */
@@ -6072,7 +7108,7 @@ static int cik_startup(struct radeon_device *rdev)
ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
- 0, 0xffffffff, PACKET3(PACKET3_NOP, 0x3FFF));
+ PACKET3(PACKET3_NOP, 0x3FFF));
if (r)
return r;
/* dGPU only have 1 MEC */
@@ -6085,7 +7121,7 @@ static int cik_startup(struct radeon_device *rdev)
r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
- 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
+ SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
if (r)
return r;
@@ -6093,7 +7129,7 @@ static int cik_startup(struct radeon_device *rdev)
r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
- 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
+ SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
if (r)
return r;
@@ -6107,12 +7143,11 @@ static int cik_startup(struct radeon_device *rdev)
ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
if (ring->ring_size) {
- r = radeon_ring_init(rdev, ring, ring->ring_size,
- R600_WB_UVD_RPTR_OFFSET,
+ r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
- 0, 0xfffff, RADEON_CP_PACKET2);
+ RADEON_CP_PACKET2);
if (!r)
- r = r600_uvd_init(rdev);
+ r = uvd_v1_0_init(rdev);
if (r)
DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
}
@@ -6129,6 +7164,10 @@ static int cik_startup(struct radeon_device *rdev)
return r;
}
+ r = dce6_audio_init(rdev);
+ if (r)
+ return r;
+
return 0;
}
@@ -6174,11 +7213,14 @@ int cik_resume(struct radeon_device *rdev)
*/
int cik_suspend(struct radeon_device *rdev)
{
+ dce6_audio_fini(rdev);
radeon_vm_manager_fini(rdev);
cik_cp_enable(rdev, false);
cik_sdma_enable(rdev, false);
- r600_uvd_rbc_stop(rdev);
+ uvd_v1_0_fini(rdev);
radeon_uvd_suspend(rdev);
+ cik_fini_pg(rdev);
+ cik_fini_cg(rdev);
cik_irq_suspend(rdev);
radeon_wb_disable(rdev);
cik_pcie_gart_disable(rdev);
@@ -6299,7 +7341,7 @@ int cik_init(struct radeon_device *rdev)
cik_cp_fini(rdev);
cik_sdma_fini(rdev);
cik_irq_fini(rdev);
- si_rlc_fini(rdev);
+ sumo_rlc_fini(rdev);
cik_mec_fini(rdev);
radeon_wb_fini(rdev);
radeon_ib_pool_fini(rdev);
@@ -6334,13 +7376,16 @@ void cik_fini(struct radeon_device *rdev)
{
cik_cp_fini(rdev);
cik_sdma_fini(rdev);
+ cik_fini_pg(rdev);
+ cik_fini_cg(rdev);
cik_irq_fini(rdev);
- si_rlc_fini(rdev);
+ sumo_rlc_fini(rdev);
cik_mec_fini(rdev);
radeon_wb_fini(rdev);
radeon_vm_manager_fini(rdev);
radeon_ib_pool_fini(rdev);
radeon_irq_kms_fini(rdev);
+ uvd_v1_0_fini(rdev);
radeon_uvd_fini(rdev);
cik_pcie_gart_fini(rdev);
r600_vram_scratch_fini(rdev);
@@ -6369,8 +7414,8 @@ static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
struct radeon_crtc *radeon_crtc,
struct drm_display_mode *mode)
{
- u32 tmp;
-
+ u32 tmp, buffer_alloc, i;
+ u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
/*
* Line Buffer Setup
* There are 6 line buffers, one for each display controllers.
@@ -6380,22 +7425,37 @@ static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
* them using the stereo blender.
*/
if (radeon_crtc->base.enabled && mode) {
- if (mode->crtc_hdisplay < 1920)
+ if (mode->crtc_hdisplay < 1920) {
tmp = 1;
- else if (mode->crtc_hdisplay < 2560)
+ buffer_alloc = 2;
+ } else if (mode->crtc_hdisplay < 2560) {
tmp = 2;
- else if (mode->crtc_hdisplay < 4096)
+ buffer_alloc = 2;
+ } else if (mode->crtc_hdisplay < 4096) {
tmp = 0;
- else {
+ buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
+ } else {
DRM_DEBUG_KMS("Mode too big for LB!\n");
tmp = 0;
+ buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
}
- } else
+ } else {
tmp = 1;
+ buffer_alloc = 0;
+ }
WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
+ WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
+ DMIF_BUFFERS_ALLOCATED(buffer_alloc));
+ for (i = 0; i < rdev->usec_timeout; i++) {
+ if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
+ DMIF_BUFFERS_ALLOCATED_COMPLETED)
+ break;
+ udelay(1);
+ }
+
if (radeon_crtc->base.enabled && mode) {
switch (tmp) {
case 0:
@@ -6797,7 +7857,7 @@ static void dce8_program_watermarks(struct radeon_device *rdev,
u32 lb_size, u32 num_heads)
{
struct drm_display_mode *mode = &radeon_crtc->base.mode;
- struct dce8_wm_params wm;
+ struct dce8_wm_params wm_low, wm_high;
u32 pixel_period;
u32 line_time = 0;
u32 latency_watermark_a = 0, latency_watermark_b = 0;
@@ -6807,35 +7867,82 @@ static void dce8_program_watermarks(struct radeon_device *rdev,
pixel_period = 1000000 / (u32)mode->clock;
line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
- wm.yclk = rdev->pm.current_mclk * 10;
- wm.sclk = rdev->pm.current_sclk * 10;
- wm.disp_clk = mode->clock;
- wm.src_width = mode->crtc_hdisplay;
- wm.active_time = mode->crtc_hdisplay * pixel_period;
- wm.blank_time = line_time - wm.active_time;
- wm.interlaced = false;
+ /* watermark for high clocks */
+ if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
+ rdev->pm.dpm_enabled) {
+ wm_high.yclk =
+ radeon_dpm_get_mclk(rdev, false) * 10;
+ wm_high.sclk =
+ radeon_dpm_get_sclk(rdev, false) * 10;
+ } else {
+ wm_high.yclk = rdev->pm.current_mclk * 10;
+ wm_high.sclk = rdev->pm.current_sclk * 10;
+ }
+
+ wm_high.disp_clk = mode->clock;
+ wm_high.src_width = mode->crtc_hdisplay;
+ wm_high.active_time = mode->crtc_hdisplay * pixel_period;
+ wm_high.blank_time = line_time - wm_high.active_time;
+ wm_high.interlaced = false;
if (mode->flags & DRM_MODE_FLAG_INTERLACE)
- wm.interlaced = true;
- wm.vsc = radeon_crtc->vsc;
- wm.vtaps = 1;
+ wm_high.interlaced = true;
+ wm_high.vsc = radeon_crtc->vsc;
+ wm_high.vtaps = 1;
if (radeon_crtc->rmx_type != RMX_OFF)
- wm.vtaps = 2;
- wm.bytes_per_pixel = 4; /* XXX: get this from fb config */
- wm.lb_size = lb_size;
- wm.dram_channels = cik_get_number_of_dram_channels(rdev);
- wm.num_heads = num_heads;
+ wm_high.vtaps = 2;
+ wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
+ wm_high.lb_size = lb_size;
+ wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
+ wm_high.num_heads = num_heads;
/* set for high clocks */
- latency_watermark_a = min(dce8_latency_watermark(&wm), (u32)65535);
+ latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
+
+ /* possibly force display priority to high */
+ /* should really do this at mode validation time... */
+ if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
+ !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
+ !dce8_check_latency_hiding(&wm_high) ||
+ (rdev->disp_priority == 2)) {
+ DRM_DEBUG_KMS("force priority to high\n");
+ }
+
+ /* watermark for low clocks */
+ if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
+ rdev->pm.dpm_enabled) {
+ wm_low.yclk =
+ radeon_dpm_get_mclk(rdev, true) * 10;
+ wm_low.sclk =
+ radeon_dpm_get_sclk(rdev, true) * 10;
+ } else {
+ wm_low.yclk = rdev->pm.current_mclk * 10;
+ wm_low.sclk = rdev->pm.current_sclk * 10;
+ }
+
+ wm_low.disp_clk = mode->clock;
+ wm_low.src_width = mode->crtc_hdisplay;
+ wm_low.active_time = mode->crtc_hdisplay * pixel_period;
+ wm_low.blank_time = line_time - wm_low.active_time;
+ wm_low.interlaced = false;
+ if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+ wm_low.interlaced = true;
+ wm_low.vsc = radeon_crtc->vsc;
+ wm_low.vtaps = 1;
+ if (radeon_crtc->rmx_type != RMX_OFF)
+ wm_low.vtaps = 2;
+ wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
+ wm_low.lb_size = lb_size;
+ wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
+ wm_low.num_heads = num_heads;
+
/* set for low clocks */
- /* wm.yclk = low clk; wm.sclk = low clk */
- latency_watermark_b = min(dce8_latency_watermark(&wm), (u32)65535);
+ latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
/* possibly force display priority to high */
/* should really do this at mode validation time... */
- if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm) ||
- !dce8_average_bandwidth_vs_available_bandwidth(&wm) ||
- !dce8_check_latency_hiding(&wm) ||
+ if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
+ !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
+ !dce8_check_latency_hiding(&wm_low) ||
(rdev->disp_priority == 2)) {
DRM_DEBUG_KMS("force priority to high\n");
}
@@ -6860,6 +7967,11 @@ static void dce8_program_watermarks(struct radeon_device *rdev,
LATENCY_HIGH_WATERMARK(line_time)));
/* restore original selection */
WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
+
+ /* save values for DPM */
+ radeon_crtc->line_time = line_time;
+ radeon_crtc->wm_high = latency_watermark_a;
+ radeon_crtc->wm_low = latency_watermark_b;
}
/**
@@ -6949,39 +8061,307 @@ int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
return r;
}
-int cik_uvd_resume(struct radeon_device *rdev)
+static void cik_pcie_gen3_enable(struct radeon_device *rdev)
{
- uint64_t addr;
- uint32_t size;
- int r;
+ struct pci_dev *root = rdev->pdev->bus->self;
+ int bridge_pos, gpu_pos;
+ u32 speed_cntl, mask, current_data_rate;
+ int ret, i;
+ u16 tmp16;
- r = radeon_uvd_resume(rdev);
- if (r)
- return r;
+ if (radeon_pcie_gen2 == 0)
+ return;
+
+ if (rdev->flags & RADEON_IS_IGP)
+ return;
+
+ if (!(rdev->flags & RADEON_IS_PCIE))
+ return;
- /* programm the VCPU memory controller bits 0-27 */
- addr = rdev->uvd.gpu_addr >> 3;
- size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) >> 3;
- WREG32(UVD_VCPU_CACHE_OFFSET0, addr);
- WREG32(UVD_VCPU_CACHE_SIZE0, size);
+ ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
+ if (ret != 0)
+ return;
- addr += size;
- size = RADEON_UVD_STACK_SIZE >> 3;
- WREG32(UVD_VCPU_CACHE_OFFSET1, addr);
- WREG32(UVD_VCPU_CACHE_SIZE1, size);
+ if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
+ return;
+
+ speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
+ current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
+ LC_CURRENT_DATA_RATE_SHIFT;
+ if (mask & DRM_PCIE_SPEED_80) {
+ if (current_data_rate == 2) {
+ DRM_INFO("PCIE gen 3 link speeds already enabled\n");
+ return;
+ }
+ DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
+ } else if (mask & DRM_PCIE_SPEED_50) {
+ if (current_data_rate == 1) {
+ DRM_INFO("PCIE gen 2 link speeds already enabled\n");
+ return;
+ }
+ DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
+ }
- addr += size;
- size = RADEON_UVD_HEAP_SIZE >> 3;
- WREG32(UVD_VCPU_CACHE_OFFSET2, addr);
- WREG32(UVD_VCPU_CACHE_SIZE2, size);
+ bridge_pos = pci_pcie_cap(root);
+ if (!bridge_pos)
+ return;
- /* bits 28-31 */
- addr = (rdev->uvd.gpu_addr >> 28) & 0xF;
- WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
+ gpu_pos = pci_pcie_cap(rdev->pdev);
+ if (!gpu_pos)
+ return;
- /* bits 32-39 */
- addr = (rdev->uvd.gpu_addr >> 32) & 0xFF;
- WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
+ if (mask & DRM_PCIE_SPEED_80) {
+ /* re-try equalization if gen3 is not already enabled */
+ if (current_data_rate != 2) {
+ u16 bridge_cfg, gpu_cfg;
+ u16 bridge_cfg2, gpu_cfg2;
+ u32 max_lw, current_lw, tmp;
+
+ pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
+ pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
+
+ tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
+ pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
+
+ tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
+ pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
+
+ tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
+ max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
+ current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
+
+ if (current_lw < max_lw) {
+ tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
+ if (tmp & LC_RENEGOTIATION_SUPPORT) {
+ tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
+ tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
+ tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
+ WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
+ }
+ }
- return 0;
+ for (i = 0; i < 10; i++) {
+ /* check status */
+ pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
+ if (tmp16 & PCI_EXP_DEVSTA_TRPND)
+ break;
+
+ pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
+ pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
+
+ pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
+ pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
+
+ tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
+ tmp |= LC_SET_QUIESCE;
+ WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
+
+ tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
+ tmp |= LC_REDO_EQ;
+ WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
+
+ mdelay(100);
+
+ /* linkctl */
+ pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
+ tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
+ tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
+ pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
+
+ pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
+ tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
+ tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
+ pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
+
+ /* linkctl2 */
+ pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
+ tmp16 &= ~((1 << 4) | (7 << 9));
+ tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
+ pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
+
+ pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
+ tmp16 &= ~((1 << 4) | (7 << 9));
+ tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
+ pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
+
+ tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
+ tmp &= ~LC_SET_QUIESCE;
+ WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
+ }
+ }
+ }
+
+ /* set the link speed */
+ speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
+ speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
+ WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
+
+ pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
+ tmp16 &= ~0xf;
+ if (mask & DRM_PCIE_SPEED_80)
+ tmp16 |= 3; /* gen3 */
+ else if (mask & DRM_PCIE_SPEED_50)
+ tmp16 |= 2; /* gen2 */
+ else
+ tmp16 |= 1; /* gen1 */
+ pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
+
+ speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
+ speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
+ WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
+
+ for (i = 0; i < rdev->usec_timeout; i++) {
+ speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
+ if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
+ break;
+ udelay(1);
+ }
+}
+
+static void cik_program_aspm(struct radeon_device *rdev)
+{
+ u32 data, orig;
+ bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
+ bool disable_clkreq = false;
+
+ if (radeon_aspm == 0)
+ return;
+
+ /* XXX double check IGPs */
+ if (rdev->flags & RADEON_IS_IGP)
+ return;
+
+ if (!(rdev->flags & RADEON_IS_PCIE))
+ return;
+
+ orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
+ data &= ~LC_XMIT_N_FTS_MASK;
+ data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
+ if (orig != data)
+ WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
+
+ orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
+ data |= LC_GO_TO_RECOVERY;
+ if (orig != data)
+ WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
+
+ orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
+ data |= P_IGNORE_EDB_ERR;
+ if (orig != data)
+ WREG32_PCIE_PORT(PCIE_P_CNTL, data);
+
+ orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
+ data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
+ data |= LC_PMI_TO_L1_DIS;
+ if (!disable_l0s)
+ data |= LC_L0S_INACTIVITY(7);
+
+ if (!disable_l1) {
+ data |= LC_L1_INACTIVITY(7);
+ data &= ~LC_PMI_TO_L1_DIS;
+ if (orig != data)
+ WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
+
+ if (!disable_plloff_in_l1) {
+ bool clk_req_support;
+
+ orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
+ data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
+ data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
+ if (orig != data)
+ WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
+
+ orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
+ data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
+ data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
+ if (orig != data)
+ WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
+
+ orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
+ data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
+ data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
+ if (orig != data)
+ WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
+
+ orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
+ data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
+ data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
+ if (orig != data)
+ WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
+
+ orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
+ data &= ~LC_DYN_LANES_PWR_STATE_MASK;
+ data |= LC_DYN_LANES_PWR_STATE(3);
+ if (orig != data)
+ WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
+
+ if (!disable_clkreq) {
+ struct pci_dev *root = rdev->pdev->bus->self;
+ u32 lnkcap;
+
+ clk_req_support = false;
+ pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
+ if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
+ clk_req_support = true;
+ } else {
+ clk_req_support = false;
+ }
+
+ if (clk_req_support) {
+ orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
+ data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
+ if (orig != data)
+ WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
+
+ orig = data = RREG32_SMC(THM_CLK_CNTL);
+ data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
+ data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
+ if (orig != data)
+ WREG32_SMC(THM_CLK_CNTL, data);
+
+ orig = data = RREG32_SMC(MISC_CLK_CTRL);
+ data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
+ data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
+ if (orig != data)
+ WREG32_SMC(MISC_CLK_CTRL, data);
+
+ orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
+ data &= ~BCLK_AS_XCLK;
+ if (orig != data)
+ WREG32_SMC(CG_CLKPIN_CNTL, data);
+
+ orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
+ data &= ~FORCE_BIF_REFCLK_EN;
+ if (orig != data)
+ WREG32_SMC(CG_CLKPIN_CNTL_2, data);
+
+ orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
+ data &= ~MPLL_CLKOUT_SEL_MASK;
+ data |= MPLL_CLKOUT_SEL(4);
+ if (orig != data)
+ WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
+ }
+ }
+ } else {
+ if (orig != data)
+ WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
+ }
+
+ orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
+ data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
+ if (orig != data)
+ WREG32_PCIE_PORT(PCIE_CNTL2, data);
+
+ if (!disable_l0s) {
+ data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
+ if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
+ data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
+ if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
+ orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
+ data &= ~LC_L0S_INACTIVITY_MASK;
+ if (orig != data)
+ WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
+ }
+ }
+ }
}