From d84c4d194ebad0f5d327da72404c37c7de2c1714 Mon Sep 17 00:00:00 2001 From: Jimmy Kizito Date: Thu, 14 Apr 2022 09:49:37 -0400 Subject: drm/amd/display: Update link training fallback behaviour. [Why] Some displays may need several link training attempts before link training succeeds. [How] If training succeeds after falling back to lower link bandwidth, retry at original link bandwidth instead of abandoning link training whenever link bandwidth is less than stream bandwidth. Reviewed-by: Jun Lei Acked-by: Qingqing Zhuo Signed-off-by: Jimmy Kizito Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 77 ++++++++++++++++-------- 1 file changed, 53 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 975d631534b5..d8de8dbf3676 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -2783,31 +2783,37 @@ bool perform_link_training_with_retries( struct dc_link *link = stream->link; enum dp_panel_mode panel_mode = dp_get_panel_mode(link); enum link_training_result status = LINK_TRAINING_CR_FAIL_LANE0; - struct dc_link_settings current_setting = *link_setting; + struct dc_link_settings cur_link_settings = *link_setting; const struct link_hwss *link_hwss = get_link_hwss(link, &pipe_ctx->link_res); int fail_count = 0; + bool is_link_bw_low = false; /* link bandwidth < stream bandwidth */ + bool is_link_bw_min = /* RBR x 1 */ + (cur_link_settings.link_rate <= LINK_RATE_LOW) && + (cur_link_settings.lane_count <= LANE_COUNT_ONE); dp_trace_commit_lt_init(link); - if (dp_get_link_encoding_format(¤t_setting) == DP_8b_10b_ENCODING) + if (dp_get_link_encoding_format(&cur_link_settings) == DP_8b_10b_ENCODING) /* We need to do this before the link training to ensure the idle * pattern in SST mode will be sent right after the link training */ link_hwss->setup_stream_encoder(pipe_ctx); dp_trace_set_lt_start_timestamp(link, false); - for (j = 0; j < attempts; ++j) { + j = 0; + while (j < attempts && fail_count < (attempts * 10)) { - DC_LOG_HW_LINK_TRAINING("%s: Beginning link training attempt %u of %d\n", - __func__, (unsigned int)j + 1, attempts); + DC_LOG_HW_LINK_TRAINING("%s: Beginning link training attempt %u of %d @ rate(%d) x lane(%d)\n", + __func__, (unsigned int)j + 1, attempts, cur_link_settings.link_rate, + cur_link_settings.lane_count); dp_enable_link_phy( link, &pipe_ctx->link_res, signal, pipe_ctx->clock_source->id, - ¤t_setting); + &cur_link_settings); if (stream->sink_patches.dppowerup_delay > 0) { int delay_dp_power_up_in_ms = stream->sink_patches.dppowerup_delay; @@ -2832,30 +2838,30 @@ bool perform_link_training_with_retries( dp_set_panel_mode(link, panel_mode); if (link->aux_access_disabled) { - dc_link_dp_perform_link_training_skip_aux(link, &pipe_ctx->link_res, ¤t_setting); + dc_link_dp_perform_link_training_skip_aux(link, &pipe_ctx->link_res, &cur_link_settings); return true; } else { /** @todo Consolidate USB4 DP and DPx.x training. */ if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) { status = dc_link_dpia_perform_link_training(link, &pipe_ctx->link_res, - ¤t_setting, + &cur_link_settings, skip_video_pattern); /* Transmit idle pattern once training successful. */ - if (status == LINK_TRAINING_SUCCESS) + if (status == LINK_TRAINING_SUCCESS && !is_link_bw_low) dp_set_hw_test_pattern(link, &pipe_ctx->link_res, DP_TEST_PATTERN_VIDEO_MODE, NULL, 0); } else { status = dc_link_dp_perform_link_training(link, &pipe_ctx->link_res, - ¤t_setting, + &cur_link_settings, skip_video_pattern); } dp_trace_lt_total_count_increment(link, false); dp_trace_lt_result_update(link, status, false); dp_trace_set_lt_end_timestamp(link, false); - if (status == LINK_TRAINING_SUCCESS) + if (status == LINK_TRAINING_SUCCESS && !is_link_bw_low) return true; } @@ -2866,8 +2872,9 @@ bool perform_link_training_with_retries( if (j == (attempts - 1) && link->ep_type == DISPLAY_ENDPOINT_PHY) break; - DC_LOG_WARNING("%s: Link training attempt %u of %d failed\n", - __func__, (unsigned int)j + 1, attempts); + DC_LOG_WARNING("%s: Link training attempt %u of %d failed @ rate(%d) x lane(%d)\n", + __func__, (unsigned int)j + 1, attempts, cur_link_settings.link_rate, + cur_link_settings.lane_count); dp_disable_link_phy(link, &pipe_ctx->link_res, signal); @@ -2876,27 +2883,49 @@ bool perform_link_training_with_retries( enum dc_connection_type type = dc_connection_none; dc_link_detect_sink(link, &type); - if (type == dc_connection_none) + if (type == dc_connection_none) { + DC_LOG_HW_LINK_TRAINING("%s: Aborting training because sink unplugged\n", __func__); break; - } else if (do_fallback) { + } + } + + /* Try to train again at original settings if: + * - not falling back between training attempts; + * - aborted previous attempt due to reasons other than sink unplug; + * - successfully trained but at a link rate lower than that required by stream; + * - reached minimum link bandwidth. + */ + if (!do_fallback || (status == LINK_TRAINING_ABORT) || + (status == LINK_TRAINING_SUCCESS && is_link_bw_low) || + is_link_bw_min) { + j++; + cur_link_settings = *link_setting; + delay_between_attempts += LINK_TRAINING_RETRY_DELAY; + is_link_bw_low = false; + is_link_bw_min = (cur_link_settings.link_rate <= LINK_RATE_LOW) && + (cur_link_settings.lane_count <= LANE_COUNT_ONE); + + } else if (do_fallback) { /* Try training at lower link bandwidth if doing fallback. */ uint32_t req_bw; uint32_t link_bw; - decide_fallback_link_setting(link, *link_setting, ¤t_setting, status); - /* Fail link training if reduced link bandwidth no longer meets - * stream requirements. + decide_fallback_link_setting(link, *link_setting, &cur_link_settings, status); + /* Flag if reduced link bandwidth no longer meets stream requirements or fallen back to + * minimum link bandwidth. */ req_bw = dc_bandwidth_in_kbps_from_timing(&stream->timing); - link_bw = dc_link_bandwidth_kbps(link, ¤t_setting); - if (req_bw > link_bw) - break; + link_bw = dc_link_bandwidth_kbps(link, &cur_link_settings); + is_link_bw_low = (req_bw > link_bw); + is_link_bw_min = ((cur_link_settings.link_rate <= LINK_RATE_LOW) && + (cur_link_settings.lane_count <= LANE_COUNT_ONE)); + + if (is_link_bw_low) + DC_LOG_WARNING("%s: Link bandwidth too low after fallback req_bw(%d) > link_bw(%d)\n", + __func__, req_bw, link_bw); } msleep(delay_between_attempts); - - delay_between_attempts += LINK_TRAINING_RETRY_DELAY; } - return false; } -- cgit v1.2.3 From fc0b067df7ed973addbba8e136d9a729df86ccdc Mon Sep 17 00:00:00 2001 From: Jimmy Kizito Date: Fri, 1 Apr 2022 15:24:47 -0400 Subject: drm/amd/display: Query DPIA HPD status. [Why] Driver needs up to date DPIA HPD status. [How] Use HPD query command to get DPIA HPD status. Reviewed-by: Meenakshikumar Somasundaram Reviewed-by: Jun Lei Acked-by: Qingqing Zhuo Signed-off-by: Jimmy Kizito Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 3 ++- drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c | 19 +++++++++++++++++++ drivers/gpu/drm/amd/display/dc/inc/dc_link_dpia.h | 5 +++++ 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 67ef357e5798..b40abd2bf7f6 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -33,6 +33,7 @@ #include "gpio_service_interface.h" #include "core_status.h" #include "dc_link_dp.h" +#include "dc_link_dpia.h" #include "dc_link_ddc.h" #include "link_hwss.h" #include "opp.h" @@ -240,7 +241,7 @@ bool dc_link_detect_sink(struct dc_link *link, enum dc_connection_type *type) /* Link may not have physical HPD pin. */ if (link->ep_type != DISPLAY_ENDPOINT_PHY) { - if (link->is_hpd_pending || !link->hpd_status) + if (link->is_hpd_pending || !dc_link_dpia_query_hpd_status(link)) *type = dc_connection_none; else *type = dc_connection_single; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c index a5765f36d86f..1b7a8774b0c9 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c @@ -34,6 +34,7 @@ #include "dm_helpers.h" #include "dmub/inc/dmub_cmd.h" #include "inc/link_dpcd.h" +#include "dc_dmub_srv.h" #define DC_LOGGER \ link->ctx->logger @@ -69,6 +70,24 @@ enum dc_status dpcd_get_tunneling_device_data(struct dc_link *link) return status; } +bool dc_link_dpia_query_hpd_status(struct dc_link *link) +{ + union dmub_rb_cmd cmd = {0}; + struct dc_dmub_srv *dmub_srv = link->ctx->dmub_srv; + bool is_hpd_high = false; + + /* prepare QUERY_HPD command */ + cmd.query_hpd.header.type = DMUB_CMD__QUERY_HPD_STATE; + cmd.query_hpd.data.instance = link->link_id.enum_id - ENUM_ID_1; + cmd.query_hpd.data.ch_type = AUX_CHANNEL_DPIA; + + /* Return HPD status reported by DMUB if query successfully executed. */ + if (dc_dmub_srv_cmd_with_reply_data(dmub_srv, &cmd) && cmd.query_hpd.data.status == AUX_RET_SUCCESS) + is_hpd_high = cmd.query_hpd.data.result; + + return is_hpd_high; +} + /* Configure link as prescribed in link_setting; set LTTPR mode; and * Initialize link training settings. * Abort link training if sink unplug detected. diff --git a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dpia.h b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dpia.h index 74dafd0f9d3d..39c1d1d07357 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dpia.h +++ b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dpia.h @@ -87,6 +87,11 @@ union dpia_set_config_data { */ enum dc_status dpcd_get_tunneling_device_data(struct dc_link *link); +/* Query hot plug status of USB4 DP tunnel. + * Returns true if HPD high. + */ +bool dc_link_dpia_query_hpd_status(struct dc_link *link); + /* Train DP tunneling link for USB4 DPIA display endpoint. * DPIA equivalent of dc_link_dp_perfrorm_link_training. * Aborts link training upon detection of sink unplug. -- cgit v1.2.3 From 903940b0b7c7f48e9743c65ae7cd65267083539f Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Mon, 2 May 2022 15:04:31 -0400 Subject: drm/amd/display: Clean up code in dc [Why & How] Code clean up in dc. Reviewed-by: Jun Lei Acked-by: Qingqing Zhuo Signed-off-by: Alvin Lee Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 15 +++++++++------ drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c | 1 - 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index e41a48f596a3..f14449401188 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -2901,14 +2901,15 @@ static void commit_planes_for_stream(struct dc *dc, top_pipe_to_program->stream_res.tg); } - if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) + if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) { dc->hwss.interdependent_update_lock(dc, context, true); - else + } else { /* Lock the top pipe while updating plane addrs, since freesync requires * plane addr update event triggers to be synchronized. * top_pipe_to_program is expected to never be NULL */ dc->hwss.pipe_control_lock(dc, top_pipe_to_program, true); + } // Stream updates if (stream_update) @@ -2924,10 +2925,11 @@ static void commit_planes_for_stream(struct dc *dc, if (dc->hwss.program_front_end_for_ctx) dc->hwss.program_front_end_for_ctx(dc, context); - if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) + if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) { dc->hwss.interdependent_update_lock(dc, context, false); - else + } else { dc->hwss.pipe_control_lock(dc, top_pipe_to_program, false); + } dc->hwss.post_unlock_program_front_end(dc, context); return; } @@ -3052,10 +3054,11 @@ static void commit_planes_for_stream(struct dc *dc, } - if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) + if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) { dc->hwss.interdependent_update_lock(dc, context, false); - else + } else { dc->hwss.pipe_control_lock(dc, top_pipe_to_program, false); + } if ((update_type != UPDATE_TYPE_FAST) && stream->update_flags.bits.dsc_changed) if (top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable) { diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index e1f87bd72e4a..0da024912dbe 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -1773,7 +1773,6 @@ void dcn20_post_unlock_program_front_end( */ for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - if (pipe->plane_state && !pipe->top_pipe && pipe->update_flags.bits.enable) { struct hubp *hubp = pipe->plane_res.hubp; int j = 0; -- cgit v1.2.3 From 3f69ee66f507a9e1180fd3a67b43807fae9b0e37 Mon Sep 17 00:00:00 2001 From: Paul Hsieh Date: Tue, 3 May 2022 14:26:41 +0800 Subject: drm/amd/display: clear request when release aux engine [Why] when driver and dmub request aux engine at the same time, dmub grant the aux engine but driver fail. Then driver release aux engine but doesn't clear the request bit. Then aux engine will be occupied by driver forever. [How] When driver release aux engine, clear request bit as well. Reviewed-by: Anthony Koo Acked-by: Qingqing Zhuo Signed-off-by: Paul Hsieh Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce/dce_aux.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c index 29e20d92b0bb..9e39cd7b203e 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c @@ -87,7 +87,8 @@ static void release_engine( engine->ddc = NULL; - REG_UPDATE(AUX_ARB_CONTROL, AUX_SW_DONE_USING_AUX_REG, 1); + REG_UPDATE_2(AUX_ARB_CONTROL, AUX_SW_DONE_USING_AUX_REG, 1, + AUX_SW_USE_AUX_REG_REQ, 0); } #define SW_CAN_ACCESS_AUX 1 -- cgit v1.2.3 From 49947b906a6bd9668eaf4f9cf691973c25c26955 Mon Sep 17 00:00:00 2001 From: David Galiffi Date: Tue, 3 May 2022 18:30:25 -0400 Subject: drm/amd/display: Check if modulo is 0 before dividing. [How & Why] If a value of 0 is read, then this will cause a divide-by-0 panic. Reviewed-by: Martin Leung Acked-by: Qingqing Zhuo Signed-off-by: David Galiffi Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c index 5e6fea85a7b5..845aa8a1027d 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c @@ -1101,9 +1101,12 @@ static bool get_pixel_clk_frequency_100hz( * not be programmed equal to DPREFCLK */ modulo_hz = REG_READ(MODULO[inst]); - *pixel_clk_khz = div_u64((uint64_t)clock_hz* - clock_source->ctx->dc->clk_mgr->dprefclk_khz*10, - modulo_hz); + if (modulo_hz) + *pixel_clk_khz = div_u64((uint64_t)clock_hz* + clock_source->ctx->dc->clk_mgr->dprefclk_khz*10, + modulo_hz); + else + *pixel_clk_khz = 0; } else { /* NOTE: There is agreement with VBIOS here that MODULO is * programmed equal to DPREFCLK, in which case PHASE will be -- cgit v1.2.3 From ab144f0b4ad615b86934ce9cbdd27b23f65ba3a4 Mon Sep 17 00:00:00 2001 From: Derek Lai Date: Thu, 5 May 2022 17:59:49 +0800 Subject: drm/amd/display: Allow individual control of eDP hotplug support [Why] Second eDP can send display off notification through HPD but DC isn't hooked up to handle. Some primary eDP panels will toggle on/off incorrectly if it's enabled generically. [How] Extend the debug option to allow individually enabling hotplug either the first eDP or the second eDP in a dual eDP system. Reviewed-by: Nicholas Kazlauskas Acked-by: Qingqing Zhuo Signed-off-by: Derek Lai Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 21 +++++++++++++++++++-- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index b40abd2bf7f6..a789ea8af27f 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -1605,8 +1605,25 @@ static bool dc_link_construct_legacy(struct dc_link *link, if (link->hpd_gpio) { if (!link->dc->config.allow_edp_hotplug_detection) link->irq_source_hpd = DC_IRQ_SOURCE_INVALID; - link->irq_source_hpd_rx = - dal_irq_get_rx_source(link->hpd_gpio); + + switch (link->dc->config.allow_edp_hotplug_detection) { + case 1: // only the 1st eDP handles hotplug + if (link->link_index == 0) + link->irq_source_hpd_rx = + dal_irq_get_rx_source(link->hpd_gpio); + else + link->irq_source_hpd = DC_IRQ_SOURCE_INVALID; + break; + case 2: // only the 2nd eDP handles hotplug + if (link->link_index == 1) + link->irq_source_hpd_rx = + dal_irq_get_rx_source(link->hpd_gpio); + else + link->irq_source_hpd = DC_IRQ_SOURCE_INVALID; + break; + default: + break; + } } break; diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 26c24db8f1da..7cfc04a8ef15 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -329,7 +329,7 @@ struct dc_config { bool disable_dmcu; bool enable_4to1MPC; bool enable_windowed_mpo_odm; - bool allow_edp_hotplug_detection; + uint32_t allow_edp_hotplug_detection; bool clamp_min_dcfclk; uint64_t vblank_alignment_dto_params; uint8_t vblank_alignment_max_frame_time_diff; -- cgit v1.2.3 From 66a197203794339b028eedfa880bff9367fce783 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Thu, 5 May 2022 16:50:42 -0400 Subject: drm/amd/display: Check zero planes for OTG disable W/A on clock change [Why] A display clock change hang can occur when switching between DIO and HPO enabled modes during the optimize_bandwidth in dc_commit_state_no_check call. This happens when going from 4k120 8bpc 420 to 4k144 10bpc 444. Display clock in the DIO case is 1200MHz, but pixel rate is 600MHz because the pixel format is 420. Display clock in the HPO case is less (800MHz?) because of ODM combine which results in a smaller divider. The DIO is still active in prepare but not active in the optimize which results in the hang occuring. During this change there are no planes on the stream so it's safe to apply the workaround, but dpms_off = false and signal type is not virtual. [How] Check for plane_count == 0, no planes on the stream. It's easiest to check pipe->plane_state == NULL as an equivalent check rather than trying to search for the stream status in the context associated with the stream, so let's do that. The primary, non MPO pipe should not have a NULL plane state. Reviewed-by: Dmytro Laktyushkin Acked-by: Qingqing Zhuo Signed-off-by: Nicholas Kazlauskas Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c | 3 ++- drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c index 27501b735a9c..a2ade6e93f5e 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c @@ -91,7 +91,8 @@ static void dcn315_disable_otg_wa(struct clk_mgr *clk_mgr_base, bool disable) if (pipe->top_pipe || pipe->prev_odm_pipe) continue; - if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal))) { + if (pipe->stream && (pipe->stream->dpms_off || pipe->plane_state == NULL || + dc_is_virtual_signal(pipe->stream->signal))) { if (disable) pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg); else diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c index 3121dd2d2a91..fc3af81ed6c6 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c @@ -122,7 +122,8 @@ static void dcn316_disable_otg_wa(struct clk_mgr *clk_mgr_base, bool disable) if (pipe->top_pipe || pipe->prev_odm_pipe) continue; - if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal))) { + if (pipe->stream && (pipe->stream->dpms_off || pipe->plane_state == NULL || + dc_is_virtual_signal(pipe->stream->signal))) { if (disable) pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg); else -- cgit v1.2.3 From e4b0eac3e6242abf5f5ebcffdeb852e7ffa1c3d0 Mon Sep 17 00:00:00 2001 From: Jasdeep Dhillon Date: Fri, 6 May 2022 13:03:45 -0400 Subject: drm/amd/display: Move FPU associated DCN30 code to DML folder [why & how] As part of the FPU isolation work documented in https://patchwork.freedesktop.org/series/93042/, isolate code that uses FPU in DCN30 to DML, where all FPU code should locate. Reviewed-by: Rodrigo Siqueira Acked-by: Qingqing Zhuo Signed-off-by: Jasdeep Dhillon Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c | 10 + .../gpu/drm/amd/display/dc/dcn30/dcn30_resource.c | 516 ++--------------- .../gpu/drm/amd/display/dc/dcn30/dcn30_resource.h | 5 + .../drm/amd/display/dc/dcn301/dcn301_resource.c | 2 + .../drm/amd/display/dc/dcn302/dcn302_resource.c | 2 + .../drm/amd/display/dc/dcn303/dcn303_resource.c | 2 + .../gpu/drm/amd/display/dc/dcn31/dcn31_resource.c | 2 + drivers/gpu/drm/amd/display/dc/dml/Makefile | 3 +- .../gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c | 617 +++++++++++++++++++++ .../gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h | 67 +++ drivers/gpu/drm/amd/display/dc/inc/core_types.h | 7 + 11 files changed, 756 insertions(+), 477 deletions(-) create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c index f5e8916601d3..b604fb26f288 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c @@ -28,6 +28,8 @@ #include "dc.h" #include "dcn_calc_math.h" +#include "dml/dcn30/dcn30_fpu.h" + #define REG(reg)\ optc1->tg_regs->reg @@ -184,6 +186,14 @@ void optc3_set_dsc_config(struct timing_generator *optc, } +void optc3_set_vrr_m_const(struct timing_generator *optc, + double vtotal_avg) +{ + DC_FP_START(); + optc3_fpu_set_vrr_m_const(optc, vtotal_avg); + DC_FP_END(); +} + void optc3_set_odm_bypass(struct timing_generator *optc, const struct dc_crtc_timing *dc_crtc_timing) { diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c index 336b2ce6a636..1c1a67c4cec1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c @@ -84,6 +84,7 @@ #include "dce/dce_aux.h" #include "dce/dce_i2c.h" +#include "dml/dcn30/dcn30_fpu.h" #include "dml/dcn30/display_mode_vba_30.h" #include "vm_helper.h" #include "dcn20/dcn20_vmid.h" @@ -91,137 +92,6 @@ #define DC_LOGGER_INIT(logger) -struct _vcs_dpi_ip_params_st dcn3_0_ip = { - .use_min_dcfclk = 0, - .clamp_min_dcfclk = 0, - .odm_capable = 1, - .gpuvm_enable = 0, - .hostvm_enable = 0, - .gpuvm_max_page_table_levels = 4, - .hostvm_max_page_table_levels = 4, - .hostvm_cached_page_table_levels = 0, - .pte_group_size_bytes = 2048, - .num_dsc = 6, - .rob_buffer_size_kbytes = 184, - .det_buffer_size_kbytes = 184, - .dpte_buffer_size_in_pte_reqs_luma = 84, - .pde_proc_buffer_size_64k_reqs = 48, - .dpp_output_buffer_pixels = 2560, - .opp_output_buffer_lines = 1, - .pixel_chunk_size_kbytes = 8, - .pte_enable = 1, - .max_page_table_levels = 2, - .pte_chunk_size_kbytes = 2, // ? - .meta_chunk_size_kbytes = 2, - .writeback_chunk_size_kbytes = 8, - .line_buffer_size_bits = 789504, - .is_line_buffer_bpp_fixed = 0, // ? - .line_buffer_fixed_bpp = 0, // ? - .dcc_supported = true, - .writeback_interface_buffer_size_kbytes = 90, - .writeback_line_buffer_buffer_size = 0, - .max_line_buffer_lines = 12, - .writeback_luma_buffer_size_kbytes = 12, // writeback_line_buffer_buffer_size = 656640 - .writeback_chroma_buffer_size_kbytes = 8, - .writeback_chroma_line_buffer_width_pixels = 4, - .writeback_max_hscl_ratio = 1, - .writeback_max_vscl_ratio = 1, - .writeback_min_hscl_ratio = 1, - .writeback_min_vscl_ratio = 1, - .writeback_max_hscl_taps = 1, - .writeback_max_vscl_taps = 1, - .writeback_line_buffer_luma_buffer_size = 0, - .writeback_line_buffer_chroma_buffer_size = 14643, - .cursor_buffer_size = 8, - .cursor_chunk_size = 2, - .max_num_otg = 6, - .max_num_dpp = 6, - .max_num_wb = 1, - .max_dchub_pscl_bw_pix_per_clk = 4, - .max_pscl_lb_bw_pix_per_clk = 2, - .max_lb_vscl_bw_pix_per_clk = 4, - .max_vscl_hscl_bw_pix_per_clk = 4, - .max_hscl_ratio = 6, - .max_vscl_ratio = 6, - .hscl_mults = 4, - .vscl_mults = 4, - .max_hscl_taps = 8, - .max_vscl_taps = 8, - .dispclk_ramp_margin_percent = 1, - .underscan_factor = 1.11, - .min_vblank_lines = 32, - .dppclk_delay_subtotal = 46, - .dynamic_metadata_vm_enabled = true, - .dppclk_delay_scl_lb_only = 16, - .dppclk_delay_scl = 50, - .dppclk_delay_cnvc_formatter = 27, - .dppclk_delay_cnvc_cursor = 6, - .dispclk_delay_subtotal = 119, - .dcfclk_cstate_latency = 5.2, // SRExitTime - .max_inter_dcn_tile_repeaters = 8, - .odm_combine_4to1_supported = true, - - .xfc_supported = false, - .xfc_fill_bw_overhead_percent = 10.0, - .xfc_fill_constant_bytes = 0, - .gfx7_compat_tiling_supported = 0, - .number_of_cursors = 1, -}; - -struct _vcs_dpi_soc_bounding_box_st dcn3_0_soc = { - .clock_limits = { - { - .state = 0, - .dispclk_mhz = 562.0, - .dppclk_mhz = 300.0, - .phyclk_mhz = 300.0, - .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 405.6, - }, - }, - .min_dcfclk = 500.0, /* TODO: set this to actual min DCFCLK */ - .num_states = 1, - .sr_exit_time_us = 15.5, - .sr_enter_plus_exit_time_us = 20, - .urgent_latency_us = 4.0, - .urgent_latency_pixel_data_only_us = 4.0, - .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, - .urgent_latency_vm_data_only_us = 4.0, - .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, - .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, - .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, - .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 80.0, - .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, - .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 40.0, - .max_avg_sdp_bw_use_normal_percent = 60.0, - .max_avg_dram_bw_use_normal_percent = 40.0, - .writeback_latency_us = 12.0, - .max_request_size_bytes = 256, - .fabric_datapath_to_dcn_data_return_bytes = 64, - .dcn_downspread_percent = 0.5, - .downspread_percent = 0.38, - .dram_page_open_time_ns = 50.0, - .dram_rw_turnaround_time_ns = 17.5, - .dram_return_buffer_per_channel_bytes = 8192, - .round_trip_ping_latency_dcfclk_cycles = 191, - .urgent_out_of_order_return_per_channel_bytes = 4096, - .channel_interleave_bytes = 256, - .num_banks = 8, - .gpuvm_min_page_size_bytes = 4096, - .hostvm_min_page_size_bytes = 4096, - .dram_clock_change_latency_us = 404, - .dummy_pstate_latency_us = 5, - .writeback_dram_clock_change_latency_us = 23.0, - .return_bus_width_bytes = 64, - .dispclk_dppclk_vco_speed_mhz = 3650, - .xfc_bus_transport_time_us = 20, // ? - .xfc_xbuf_latency_tolerance_us = 4, // ? - .use_urgent_burst_bw = 1, // ? - .do_urgent_latency_adjustment = true, - .urgent_latency_adjustment_fabric_clock_component_us = 1.0, - .urgent_latency_adjustment_fabric_clock_reference_mhz = 1000, -}; - enum dcn30_clk_src_array_id { DCN30_CLK_SRC_PLL0, DCN30_CLK_SRC_PLL1, @@ -1480,90 +1350,9 @@ int dcn30_populate_dml_pipes_from_context( void dcn30_populate_dml_writeback_from_context( struct dc *dc, struct resource_context *res_ctx, display_e2e_pipe_params_st *pipes) { - int pipe_cnt, i, j; - double max_calc_writeback_dispclk; - double writeback_dispclk; - struct writeback_st dout_wb; - - for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { - struct dc_stream_state *stream = res_ctx->pipe_ctx[i].stream; - - if (!stream) - continue; - max_calc_writeback_dispclk = 0; - - /* Set writeback information */ - pipes[pipe_cnt].dout.wb_enable = 0; - pipes[pipe_cnt].dout.num_active_wb = 0; - for (j = 0; j < stream->num_wb_info; j++) { - struct dc_writeback_info *wb_info = &stream->writeback_info[j]; - - if (wb_info->wb_enabled && wb_info->writeback_source_plane && - (wb_info->writeback_source_plane == res_ctx->pipe_ctx[i].plane_state)) { - pipes[pipe_cnt].dout.wb_enable = 1; - pipes[pipe_cnt].dout.num_active_wb++; - dout_wb.wb_src_height = wb_info->dwb_params.cnv_params.crop_en ? - wb_info->dwb_params.cnv_params.crop_height : - wb_info->dwb_params.cnv_params.src_height; - dout_wb.wb_src_width = wb_info->dwb_params.cnv_params.crop_en ? - wb_info->dwb_params.cnv_params.crop_width : - wb_info->dwb_params.cnv_params.src_width; - dout_wb.wb_dst_width = wb_info->dwb_params.dest_width; - dout_wb.wb_dst_height = wb_info->dwb_params.dest_height; - - /* For IP that doesn't support WB scaling, set h/v taps to 1 to avoid DML validation failure */ - if (dc->dml.ip.writeback_max_hscl_taps > 1) { - dout_wb.wb_htaps_luma = wb_info->dwb_params.scaler_taps.h_taps; - dout_wb.wb_vtaps_luma = wb_info->dwb_params.scaler_taps.v_taps; - } else { - dout_wb.wb_htaps_luma = 1; - dout_wb.wb_vtaps_luma = 1; - } - dout_wb.wb_htaps_chroma = 0; - dout_wb.wb_vtaps_chroma = 0; - dout_wb.wb_hratio = wb_info->dwb_params.cnv_params.crop_en ? - (double)wb_info->dwb_params.cnv_params.crop_width / - (double)wb_info->dwb_params.dest_width : - (double)wb_info->dwb_params.cnv_params.src_width / - (double)wb_info->dwb_params.dest_width; - dout_wb.wb_vratio = wb_info->dwb_params.cnv_params.crop_en ? - (double)wb_info->dwb_params.cnv_params.crop_height / - (double)wb_info->dwb_params.dest_height : - (double)wb_info->dwb_params.cnv_params.src_height / - (double)wb_info->dwb_params.dest_height; - if (wb_info->dwb_params.cnv_params.fc_out_format == DWB_OUT_FORMAT_64BPP_ARGB || - wb_info->dwb_params.cnv_params.fc_out_format == DWB_OUT_FORMAT_64BPP_RGBA) - dout_wb.wb_pixel_format = dm_444_64; - else - dout_wb.wb_pixel_format = dm_444_32; - - /* Workaround for cases where multiple writebacks are connected to same plane - * In which case, need to compute worst case and set the associated writeback parameters - * This workaround is necessary due to DML computation assuming only 1 set of writeback - * parameters per pipe - */ - writeback_dispclk = dml30_CalculateWriteBackDISPCLK( - dout_wb.wb_pixel_format, - pipes[pipe_cnt].pipe.dest.pixel_rate_mhz, - dout_wb.wb_hratio, - dout_wb.wb_vratio, - dout_wb.wb_htaps_luma, - dout_wb.wb_vtaps_luma, - dout_wb.wb_src_width, - dout_wb.wb_dst_width, - pipes[pipe_cnt].pipe.dest.htotal, - dc->current_state->bw_ctx.dml.ip.writeback_line_buffer_buffer_size); - - if (writeback_dispclk > max_calc_writeback_dispclk) { - max_calc_writeback_dispclk = writeback_dispclk; - pipes[pipe_cnt].dout.wb = dout_wb; - } - } - } - - pipe_cnt++; - } - + DC_FP_START(); + dcn30_fpu_populate_dml_writeback_from_context(dc, res_ctx, pipes); + DC_FP_END(); } unsigned int dcn30_calc_max_scaled_time( @@ -1598,7 +1387,7 @@ void dcn30_set_mcif_arb_params( enum mmhubbub_wbif_mode wbif_mode; struct display_mode_lib *dml = &context->bw_ctx.dml; struct mcif_arb_params *wb_arb_params; - int i, j, k, dwb_pipe; + int i, j, dwb_pipe; /* Writeback MCIF_WB arbitration parameters */ dwb_pipe = 0; @@ -1622,17 +1411,15 @@ void dcn30_set_mcif_arb_params( else wbif_mode = PACKED_444; - for (k = 0; k < sizeof(wb_arb_params->cli_watermark)/sizeof(wb_arb_params->cli_watermark[0]); k++) { - wb_arb_params->cli_watermark[k] = get_wm_writeback_urgent(dml, pipes, pipe_cnt) * 1000; - wb_arb_params->pstate_watermark[k] = get_wm_writeback_dram_clock_change(dml, pipes, pipe_cnt) * 1000; - } + DC_FP_START(); + dcn30_fpu_set_mcif_arb_params(wb_arb_params, dml, pipes, pipe_cnt, j); + DC_FP_END(); wb_arb_params->time_per_pixel = (1000000 << 6) / context->res_ctx.pipe_ctx[i].stream->phy_pix_clk; /* time_per_pixel should be in u6.6 format */ wb_arb_params->slice_lines = 32; wb_arb_params->arbitration_slice = 2; /* irrelevant since there is no YUV output */ wb_arb_params->max_scaled_time = dcn30_calc_max_scaled_time(wb_arb_params->time_per_pixel, wbif_mode, wb_arb_params->cli_watermark[0]); /* assume 4 watermark sets have the same value */ - wb_arb_params->dram_speed_change_duration = dml->vba.WritebackAllowDRAMClockChangeEndPosition[j] * pipes[0].clks_cfg.refclk_mhz; /* num_clock_cycles = us * MHz */ dwb_pipe++; @@ -2111,178 +1898,11 @@ validate_out: return out; } -/* - * This must be noinline to ensure anything that deals with FP registers - * is contained within this call; previously our compiling with hard-float - * would result in fp instructions being emitted outside of the boundaries - * of the DC_FP_START/END macros, which makes sense as the compiler has no - * idea about what is wrapped and what is not - * - * This is largely just a workaround to avoid breakage introduced with 5.6, - * ideally all fp-using code should be moved into its own file, only that - * should be compiled with hard-float, and all code exported from there - * should be strictly wrapped with DC_FP_START/END - */ -static noinline void dcn30_calculate_wm_and_dlg_fp( - struct dc *dc, struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int pipe_cnt, - int vlevel) +void dcn30_update_soc_for_wm_a(struct dc *dc, struct dc_state *context) { - int maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb; - int i, pipe_idx; - double dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][maxMpcComb]; - bool pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] != dm_dram_clock_change_unsupported; - - if (context->bw_ctx.dml.soc.min_dcfclk > dcfclk) - dcfclk = context->bw_ctx.dml.soc.min_dcfclk; - - pipes[0].clks_cfg.voltage = vlevel; - pipes[0].clks_cfg.dcfclk_mhz = dcfclk; - pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz; - - /* Set B: - * DCFCLK: 1GHz or min required above 1GHz - * FCLK/UCLK: Max - */ - if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].valid) { - if (vlevel == 0) { - pipes[0].clks_cfg.voltage = 1; - pipes[0].clks_cfg.dcfclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dcfclk_mhz; - } - context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us; - context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us; - context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us; - } - context->bw_ctx.bw.dcn.watermarks.b.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - - pipes[0].clks_cfg.voltage = vlevel; - pipes[0].clks_cfg.dcfclk_mhz = dcfclk; - - /* Set D: - * DCFCLK: Min Required - * FCLK(proportional to UCLK): 1GHz or Max - * MALL stutter, sr_enter_exit = 4, sr_exit = 2us - */ - /* - if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].valid) { - context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us; - context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us; - context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us; - } - context->bw_ctx.bw.dcn.watermarks.d.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - */ - - /* Set C: - * DCFCLK: Min Required - * FCLK(proportional to UCLK): 1GHz or Max - * pstate latency overridden to 5us - */ - if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid) { - unsigned int min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed; - unsigned int min_dram_speed_mts_margin = 160; - - if (context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] == dm_dram_clock_change_unsupported) - min_dram_speed_mts = dc->clk_mgr->bw_params->clk_table.entries[dc->clk_mgr->bw_params->clk_table.num_entries - 1].memclk_mhz * 16; - - /* find largest table entry that is lower than dram speed, but lower than DPM0 still uses DPM0 */ - for (i = 3; i > 0; i--) - if (min_dram_speed_mts + min_dram_speed_mts_margin > dc->clk_mgr->bw_params->dummy_pstate_table[i].dram_speed_mts) - break; - - context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->dummy_pstate_table[i].dummy_pstate_latency_us; - context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us; - context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us; - } - - context->bw_ctx.bw.dcn.watermarks.c.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - - if (!pstate_en) { - /* The only difference between A and C is p-state latency, if p-state is not supported we want to - * calculate DLG based on dummy p-state latency, and max out the set A p-state watermark - */ - context->bw_ctx.bw.dcn.watermarks.a = context->bw_ctx.bw.dcn.watermarks.c; - context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = 0; - } else { - /* Set A: - * DCFCLK: Min Required - * FCLK(proportional to UCLK): 1GHz or Max - * - * Set A calculated last so that following calculations are based on Set A - */ - dc->res_pool->funcs->update_soc_for_wm_a(dc, context); - context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - } - - context->perf_params.stutter_period_us = context->bw_ctx.dml.vba.StutterPeriod; - - /* Make set D = set A until set D is enabled */ - context->bw_ctx.bw.dcn.watermarks.d = context->bw_ctx.bw.dcn.watermarks.a; - - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { - if (!context->res_ctx.pipe_ctx[i].stream) - continue; - - pipes[pipe_idx].clks_cfg.dispclk_mhz = get_dispclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt); - pipes[pipe_idx].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); - - if (dc->config.forced_clocks) { - pipes[pipe_idx].clks_cfg.dispclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz; - pipes[pipe_idx].clks_cfg.dppclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz; - } - if (dc->debug.min_disp_clk_khz > pipes[pipe_idx].clks_cfg.dispclk_mhz * 1000) - pipes[pipe_idx].clks_cfg.dispclk_mhz = dc->debug.min_disp_clk_khz / 1000.0; - if (dc->debug.min_dpp_clk_khz > pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000) - pipes[pipe_idx].clks_cfg.dppclk_mhz = dc->debug.min_dpp_clk_khz / 1000.0; - - pipe_idx++; - } - DC_FP_START(); - dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel); + dcn30_fpu_update_soc_for_wm_a(dc, context); DC_FP_END(); - - if (!pstate_en) - /* Restore full p-state latency */ - context->bw_ctx.dml.soc.dram_clock_change_latency_us = - dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us; -} - -void dcn30_update_soc_for_wm_a(struct dc *dc, struct dc_state *context) -{ - if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].valid) { - context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us; - context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us; - context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us; - } } void dcn30_calculate_wm_and_dlg( @@ -2292,7 +1912,7 @@ void dcn30_calculate_wm_and_dlg( int vlevel) { DC_FP_START(); - dcn30_calculate_wm_and_dlg_fp(dc, context, pipes, pipe_cnt, vlevel); + dcn30_fpu_calculate_wm_and_dlg(dc, context, pipes, pipe_cnt, vlevel); DC_FP_END(); } @@ -2351,40 +1971,6 @@ validate_out: return out; } -/* - * This must be noinline to ensure anything that deals with FP registers - * is contained within this call; previously our compiling with hard-float - * would result in fp instructions being emitted outside of the boundaries - * of the DC_FP_START/END macros, which makes sense as the compiler has no - * idea about what is wrapped and what is not - * - * This is largely just a workaround to avoid breakage introduced with 5.6, - * ideally all fp-using code should be moved into its own file, only that - * should be compiled with hard-float, and all code exported from there - * should be strictly wrapped with DC_FP_START/END - */ -static noinline void dcn30_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts, - unsigned int *optimal_dcfclk, - unsigned int *optimal_fclk) -{ - double bw_from_dram, bw_from_dram1, bw_from_dram2; - - bw_from_dram1 = uclk_mts * dcn3_0_soc.num_chans * - dcn3_0_soc.dram_channel_width_bytes * (dcn3_0_soc.max_avg_dram_bw_use_normal_percent / 100); - bw_from_dram2 = uclk_mts * dcn3_0_soc.num_chans * - dcn3_0_soc.dram_channel_width_bytes * (dcn3_0_soc.max_avg_sdp_bw_use_normal_percent / 100); - - bw_from_dram = (bw_from_dram1 < bw_from_dram2) ? bw_from_dram1 : bw_from_dram2; - - if (optimal_fclk) - *optimal_fclk = bw_from_dram / - (dcn3_0_soc.fabric_datapath_to_dcn_data_return_bytes * (dcn3_0_soc.max_avg_sdp_bw_use_normal_percent / 100)); - - if (optimal_dcfclk) - *optimal_dcfclk = bw_from_dram / - (dcn3_0_soc.return_bus_width_bytes * (dcn3_0_soc.max_avg_sdp_bw_use_normal_percent / 100)); -} - void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) { unsigned int i, j; @@ -2399,47 +1985,43 @@ void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params unsigned int num_dcfclk_sta_targets = 4; unsigned int num_uclk_states; + struct dc_bounding_box_max_clk dcn30_bb_max_clk; + + memset(&dcn30_bb_max_clk, 0, sizeof(dcn30_bb_max_clk)); + if (dc->ctx->dc_bios->vram_info.num_chans) dcn3_0_soc.num_chans = dc->ctx->dc_bios->vram_info.num_chans; - if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes) - dcn3_0_soc.dram_channel_width_bytes = dc->ctx->dc_bios->vram_info.dram_channel_width_bytes; - - dcn3_0_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; - dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; + DC_FP_START(); + dcn30_fpu_update_dram_channel_width_bytes(dc); + DC_FP_END(); if (bw_params->clk_table.entries[0].memclk_mhz) { - int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, max_phyclk_mhz = 0; for (i = 0; i < MAX_NUM_DPM_LVL; i++) { - if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz) - max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz; - if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz) - max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz; - if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz) - max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz; - if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz) - max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz; + if (bw_params->clk_table.entries[i].dcfclk_mhz > dcn30_bb_max_clk.max_dcfclk_mhz) + dcn30_bb_max_clk.max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz; + if (bw_params->clk_table.entries[i].dispclk_mhz > dcn30_bb_max_clk.max_dispclk_mhz) + dcn30_bb_max_clk.max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz; + if (bw_params->clk_table.entries[i].dppclk_mhz > dcn30_bb_max_clk.max_dppclk_mhz) + dcn30_bb_max_clk.max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz; + if (bw_params->clk_table.entries[i].phyclk_mhz > dcn30_bb_max_clk.max_phyclk_mhz) + dcn30_bb_max_clk.max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz; } - if (!max_dcfclk_mhz) - max_dcfclk_mhz = dcn3_0_soc.clock_limits[0].dcfclk_mhz; - if (!max_dispclk_mhz) - max_dispclk_mhz = dcn3_0_soc.clock_limits[0].dispclk_mhz; - if (!max_dppclk_mhz) - max_dppclk_mhz = dcn3_0_soc.clock_limits[0].dppclk_mhz; - if (!max_phyclk_mhz) - max_phyclk_mhz = dcn3_0_soc.clock_limits[0].phyclk_mhz; + DC_FP_START(); + dcn30_fpu_update_max_clk(&dcn30_bb_max_clk); + DC_FP_END(); - if (max_dcfclk_mhz > dcfclk_sta_targets[num_dcfclk_sta_targets-1]) { + if (dcn30_bb_max_clk.max_dcfclk_mhz > dcfclk_sta_targets[num_dcfclk_sta_targets-1]) { // If max DCFCLK is greater than the max DCFCLK STA target, insert into the DCFCLK STA target array - dcfclk_sta_targets[num_dcfclk_sta_targets] = max_dcfclk_mhz; + dcfclk_sta_targets[num_dcfclk_sta_targets] = dcn30_bb_max_clk.max_dcfclk_mhz; num_dcfclk_sta_targets++; - } else if (max_dcfclk_mhz < dcfclk_sta_targets[num_dcfclk_sta_targets-1]) { + } else if (dcn30_bb_max_clk.max_dcfclk_mhz < dcfclk_sta_targets[num_dcfclk_sta_targets-1]) { // If max DCFCLK is less than the max DCFCLK STA target, cap values and remove duplicates for (i = 0; i < num_dcfclk_sta_targets; i++) { - if (dcfclk_sta_targets[i] > max_dcfclk_mhz) { - dcfclk_sta_targets[i] = max_dcfclk_mhz; + if (dcfclk_sta_targets[i] > dcn30_bb_max_clk.max_dcfclk_mhz) { + dcfclk_sta_targets[i] = dcn30_bb_max_clk.max_dcfclk_mhz; break; } } @@ -2452,7 +2034,7 @@ void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params // Calculate optimal dcfclk for each uclk for (i = 0; i < num_uclk_states; i++) { DC_FP_START(); - dcn30_get_optimal_dcfclk_fclk_for_uclk(bw_params->clk_table.entries[i].memclk_mhz * 16, + dcn30_fpu_get_optimal_dcfclk_fclk_for_uclk(bw_params->clk_table.entries[i].memclk_mhz * 16, &optimal_dcfclk_for_uclk[i], NULL); DC_FP_END(); if (optimal_dcfclk_for_uclk[i] < bw_params->clk_table.entries[0].dcfclk_mhz) { @@ -2479,7 +2061,7 @@ void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params dcfclk_mhz[num_states] = dcfclk_sta_targets[i]; dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++]; } else { - if (j < num_uclk_states && optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) { + if (j < num_uclk_states && optimal_dcfclk_for_uclk[j] <= dcn30_bb_max_clk.max_dcfclk_mhz) { dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j]; dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16; } else { @@ -2494,33 +2076,15 @@ void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params } while (j < num_uclk_states && num_states < DC__VOLTAGE_STATES && - optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) { + optimal_dcfclk_for_uclk[j] <= dcn30_bb_max_clk.max_dcfclk_mhz) { dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j]; dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16; } dcn3_0_soc.num_states = num_states; - for (i = 0; i < dcn3_0_soc.num_states; i++) { - dcn3_0_soc.clock_limits[i].state = i; - dcn3_0_soc.clock_limits[i].dcfclk_mhz = dcfclk_mhz[i]; - dcn3_0_soc.clock_limits[i].fabricclk_mhz = dcfclk_mhz[i]; - dcn3_0_soc.clock_limits[i].dram_speed_mts = dram_speed_mts[i]; - - /* Fill all states with max values of all other clocks */ - dcn3_0_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz; - dcn3_0_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz; - dcn3_0_soc.clock_limits[i].phyclk_mhz = max_phyclk_mhz; - dcn3_0_soc.clock_limits[i].dtbclk_mhz = dcn3_0_soc.clock_limits[0].dtbclk_mhz; - /* These clocks cannot come from bw_params, always fill from dcn3_0_soc[1] */ - /* FCLK, PHYCLK_D18, SOCCLK, DSCCLK */ - dcn3_0_soc.clock_limits[i].phyclk_d18_mhz = dcn3_0_soc.clock_limits[0].phyclk_d18_mhz; - dcn3_0_soc.clock_limits[i].socclk_mhz = dcn3_0_soc.clock_limits[0].socclk_mhz; - dcn3_0_soc.clock_limits[i].dscclk_mhz = dcn3_0_soc.clock_limits[0].dscclk_mhz; - } - /* re-init DML with updated bb */ - dml_init_instance(&dc->dml, &dcn3_0_soc, &dcn3_0_ip, DML_PROJECT_DCN30); - if (dc->current_state) - dml_init_instance(&dc->current_state->bw_ctx.dml, &dcn3_0_soc, &dcn3_0_ip, DML_PROJECT_DCN30); + DC_FP_START(); + dcn30_fpu_update_bw_bounding_box(dc, bw_params, &dcn30_bb_max_clk, dcfclk_mhz, dram_speed_mts); + DC_FP_END(); } } diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h index b92e4cc0232f..3330a1026fa5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h @@ -35,6 +35,9 @@ struct dc; struct resource_pool; struct _vcs_dpi_display_pipe_params_st; +extern struct _vcs_dpi_ip_params_st dcn3_0_ip; +extern struct _vcs_dpi_soc_bounding_box_st dcn3_0_soc; + struct dcn30_resource_pool { struct resource_pool base; }; @@ -96,4 +99,6 @@ enum dc_status dcn30_add_stream_to_ctx( void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params); +void dcn30_setup_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, struct dc_state *context); + #endif /* _DCN30_RESOURCE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c index 4daf8931aa7c..a5df74110284 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c @@ -81,6 +81,8 @@ #include "dce/dce_aux.h" #include "dce/dce_i2c.h" +#include "dml/dcn30/dcn30_fpu.h" + #include "dml/dcn30/display_mode_vba_30.h" #include "dml/dcn301/dcn301_fpu.h" #include "vm_helper.h" diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c index f0938653bb88..f537888f4fa6 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c @@ -43,6 +43,8 @@ #include "dcn20/dcn20_dsc.h" #include "dcn20/dcn20_resource.h" +#include "dml/dcn30/dcn30_fpu.h" + #include "dcn10/dcn10_resource.h" #include "dce/dce_abm.h" diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c index 4fcbc0502808..76f863eb86ef 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c @@ -25,6 +25,8 @@ #include "dcn20/dcn20_dsc.h" #include "dcn20/dcn20_resource.h" +#include "dml/dcn30/dcn30_fpu.h" + #include "dcn10/dcn10_resource.h" #include "dc_link_ddc.h" diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index ccf1b71a8269..3d9f07d4770b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -36,6 +36,8 @@ #include "dcn20/dcn20_resource.h" #include "dcn30/dcn30_resource.h" +#include "dml/dcn30/dcn30_fpu.h" + #include "dcn10/dcn10_ipp.h" #include "dcn30/dcn30_hubbub.h" #include "dcn31/dcn31_hubbub.h" diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index ee911452c048..a64b88ca01a9 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -71,6 +71,7 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_ccflags) $(fram CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_mode_vba_31.o := $(dml_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := $(dml_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/dcn30_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/dcn31_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn301/dcn301_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn302/dcn302_fpu.o := $(dml_ccflags) @@ -113,7 +114,7 @@ DML += dcn20/dcn20_fpu.o DML += display_mode_vba.o dcn20/display_rq_dlg_calc_20.o dcn20/display_mode_vba_20.o DML += dcn20/display_rq_dlg_calc_20v2.o dcn20/display_mode_vba_20v2.o DML += dcn21/display_rq_dlg_calc_21.o dcn21/display_mode_vba_21.o -DML += dcn30/display_mode_vba_30.o dcn30/display_rq_dlg_calc_30.o +DML += dcn30/dcn30_fpu.o dcn30/display_mode_vba_30.o dcn30/display_rq_dlg_calc_30.o DML += dcn31/display_mode_vba_31.o dcn31/display_rq_dlg_calc_31.o DML += dcn31/dcn31_fpu.o DML += dcn301/dcn301_fpu.o diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c new file mode 100644 index 000000000000..574676a0711a --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c @@ -0,0 +1,617 @@ +/* + * Copyright 2020-2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ +#include "resource.h" +#include "clk_mgr.h" +#include "reg_helper.h" +#include "dcn_calc_math.h" +#include "dcn20/dcn20_resource.h" +#include "dcn30/dcn30_resource.h" + + +#include "display_mode_vba_30.h" +#include "dcn30_fpu.h" + +#define REG(reg)\ + optc1->tg_regs->reg + +#define CTX \ + optc1->base.ctx + +#undef FN +#define FN(reg_name, field_name) \ + optc1->tg_shift->field_name, optc1->tg_mask->field_name + + +struct _vcs_dpi_ip_params_st dcn3_0_ip = { + .use_min_dcfclk = 0, + .clamp_min_dcfclk = 0, + .odm_capable = 1, + .gpuvm_enable = 0, + .hostvm_enable = 0, + .gpuvm_max_page_table_levels = 4, + .hostvm_max_page_table_levels = 4, + .hostvm_cached_page_table_levels = 0, + .pte_group_size_bytes = 2048, + .num_dsc = 6, + .rob_buffer_size_kbytes = 184, + .det_buffer_size_kbytes = 184, + .dpte_buffer_size_in_pte_reqs_luma = 84, + .pde_proc_buffer_size_64k_reqs = 48, + .dpp_output_buffer_pixels = 2560, + .opp_output_buffer_lines = 1, + .pixel_chunk_size_kbytes = 8, + .pte_enable = 1, + .max_page_table_levels = 2, + .pte_chunk_size_kbytes = 2, // ? + .meta_chunk_size_kbytes = 2, + .writeback_chunk_size_kbytes = 8, + .line_buffer_size_bits = 789504, + .is_line_buffer_bpp_fixed = 0, // ? + .line_buffer_fixed_bpp = 0, // ? + .dcc_supported = true, + .writeback_interface_buffer_size_kbytes = 90, + .writeback_line_buffer_buffer_size = 0, + .max_line_buffer_lines = 12, + .writeback_luma_buffer_size_kbytes = 12, // writeback_line_buffer_buffer_size = 656640 + .writeback_chroma_buffer_size_kbytes = 8, + .writeback_chroma_line_buffer_width_pixels = 4, + .writeback_max_hscl_ratio = 1, + .writeback_max_vscl_ratio = 1, + .writeback_min_hscl_ratio = 1, + .writeback_min_vscl_ratio = 1, + .writeback_max_hscl_taps = 1, + .writeback_max_vscl_taps = 1, + .writeback_line_buffer_luma_buffer_size = 0, + .writeback_line_buffer_chroma_buffer_size = 14643, + .cursor_buffer_size = 8, + .cursor_chunk_size = 2, + .max_num_otg = 6, + .max_num_dpp = 6, + .max_num_wb = 1, + .max_dchub_pscl_bw_pix_per_clk = 4, + .max_pscl_lb_bw_pix_per_clk = 2, + .max_lb_vscl_bw_pix_per_clk = 4, + .max_vscl_hscl_bw_pix_per_clk = 4, + .max_hscl_ratio = 6, + .max_vscl_ratio = 6, + .hscl_mults = 4, + .vscl_mults = 4, + .max_hscl_taps = 8, + .max_vscl_taps = 8, + .dispclk_ramp_margin_percent = 1, + .underscan_factor = 1.11, + .min_vblank_lines = 32, + .dppclk_delay_subtotal = 46, + .dynamic_metadata_vm_enabled = true, + .dppclk_delay_scl_lb_only = 16, + .dppclk_delay_scl = 50, + .dppclk_delay_cnvc_formatter = 27, + .dppclk_delay_cnvc_cursor = 6, + .dispclk_delay_subtotal = 119, + .dcfclk_cstate_latency = 5.2, // SRExitTime + .max_inter_dcn_tile_repeaters = 8, + .max_num_hdmi_frl_outputs = 1, + .odm_combine_4to1_supported = true, + + .xfc_supported = false, + .xfc_fill_bw_overhead_percent = 10.0, + .xfc_fill_constant_bytes = 0, + .gfx7_compat_tiling_supported = 0, + .number_of_cursors = 1, +}; + +struct _vcs_dpi_soc_bounding_box_st dcn3_0_soc = { + .clock_limits = { + { + .state = 0, + .dispclk_mhz = 562.0, + .dppclk_mhz = 300.0, + .phyclk_mhz = 300.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 405.6, + }, + }, + + .min_dcfclk = 500.0, /* TODO: set this to actual min DCFCLK */ + .num_states = 1, + .sr_exit_time_us = 15.5, + .sr_enter_plus_exit_time_us = 20, + .urgent_latency_us = 4.0, + .urgent_latency_pixel_data_only_us = 4.0, + .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, + .urgent_latency_vm_data_only_us = 4.0, + .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, + .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, + .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 80.0, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, + .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 40.0, + .max_avg_sdp_bw_use_normal_percent = 60.0, + .max_avg_dram_bw_use_normal_percent = 40.0, + .writeback_latency_us = 12.0, + .max_request_size_bytes = 256, + .fabric_datapath_to_dcn_data_return_bytes = 64, + .dcn_downspread_percent = 0.5, + .downspread_percent = 0.38, + .dram_page_open_time_ns = 50.0, + .dram_rw_turnaround_time_ns = 17.5, + .dram_return_buffer_per_channel_bytes = 8192, + .round_trip_ping_latency_dcfclk_cycles = 191, + .urgent_out_of_order_return_per_channel_bytes = 4096, + .channel_interleave_bytes = 256, + .num_banks = 8, + .gpuvm_min_page_size_bytes = 4096, + .hostvm_min_page_size_bytes = 4096, + .dram_clock_change_latency_us = 404, + .dummy_pstate_latency_us = 5, + .writeback_dram_clock_change_latency_us = 23.0, + .return_bus_width_bytes = 64, + .dispclk_dppclk_vco_speed_mhz = 3650, + .xfc_bus_transport_time_us = 20, // ? + .xfc_xbuf_latency_tolerance_us = 4, // ? + .use_urgent_burst_bw = 1, // ? + .do_urgent_latency_adjustment = true, + .urgent_latency_adjustment_fabric_clock_component_us = 1.0, + .urgent_latency_adjustment_fabric_clock_reference_mhz = 1000, +}; + + +void optc3_fpu_set_vrr_m_const(struct timing_generator *optc, + double vtotal_avg) +{ +struct optc *optc1 = DCN10TG_FROM_TG(optc); + double vtotal_min, vtotal_max; + double ratio, modulo, phase; + uint32_t vblank_start; + uint32_t v_total_mask_value = 0; + + dc_assert_fp_enabled(); + + /* Compute VTOTAL_MIN and VTOTAL_MAX, so that + * VOTAL_MAX - VTOTAL_MIN = 1 + */ + v_total_mask_value = 16; + vtotal_min = dcn_bw_floor(vtotal_avg); + vtotal_max = dcn_bw_ceil(vtotal_avg); + + /* Check that bottom VBLANK is at least 2 lines tall when running with + * VTOTAL_MIN. Note that VTOTAL registers are defined as 'total number + * of lines in a frame - 1'. + */ + REG_GET(OTG_V_BLANK_START_END, OTG_V_BLANK_START, + &vblank_start); + ASSERT(vtotal_min >= vblank_start + 1); + + /* Special case where the average frame rate can be achieved + * without using the DTO + */ + if (vtotal_min == vtotal_max) { + REG_SET(OTG_V_TOTAL, 0, OTG_V_TOTAL, (uint32_t)vtotal_min); + + optc->funcs->set_vtotal_min_max(optc, 0, 0); + REG_SET(OTG_M_CONST_DTO0, 0, OTG_M_CONST_DTO_PHASE, 0); + REG_SET(OTG_M_CONST_DTO1, 0, OTG_M_CONST_DTO_MODULO, 0); + REG_UPDATE_3(OTG_V_TOTAL_CONTROL, + OTG_V_TOTAL_MIN_SEL, 0, + OTG_V_TOTAL_MAX_SEL, 0, + OTG_SET_V_TOTAL_MIN_MASK_EN, 0); + return; + } + + ratio = vtotal_max - vtotal_avg; + modulo = 65536.0 * 65536.0 - 1.0; /* 2^32 - 1 */ + phase = ratio * modulo; + + /* Special cases where the DTO phase gets rounded to 0 or + * to DTO modulo + */ + if (phase <= 0 || phase >= modulo) { + REG_SET(OTG_V_TOTAL, 0, OTG_V_TOTAL, + phase <= 0 ? + (uint32_t)vtotal_max : (uint32_t)vtotal_min); + REG_SET(OTG_V_TOTAL_MIN, 0, OTG_V_TOTAL_MIN, 0); + REG_SET(OTG_V_TOTAL_MAX, 0, OTG_V_TOTAL_MAX, 0); + REG_SET(OTG_M_CONST_DTO0, 0, OTG_M_CONST_DTO_PHASE, 0); + REG_SET(OTG_M_CONST_DTO1, 0, OTG_M_CONST_DTO_MODULO, 0); + REG_UPDATE_3(OTG_V_TOTAL_CONTROL, + OTG_V_TOTAL_MIN_SEL, 0, + OTG_V_TOTAL_MAX_SEL, 0, + OTG_SET_V_TOTAL_MIN_MASK_EN, 0); + return; + } + REG_UPDATE_6(OTG_V_TOTAL_CONTROL, + OTG_V_TOTAL_MIN_SEL, 1, + OTG_V_TOTAL_MAX_SEL, 1, + OTG_SET_V_TOTAL_MIN_MASK_EN, 1, + OTG_SET_V_TOTAL_MIN_MASK, v_total_mask_value, + OTG_VTOTAL_MID_REPLACING_MIN_EN, 0, + OTG_VTOTAL_MID_REPLACING_MAX_EN, 0); + REG_SET(OTG_V_TOTAL, 0, OTG_V_TOTAL, (uint32_t)vtotal_min); + optc->funcs->set_vtotal_min_max(optc, vtotal_min, vtotal_max); + REG_SET(OTG_M_CONST_DTO0, 0, OTG_M_CONST_DTO_PHASE, (uint32_t)phase); + REG_SET(OTG_M_CONST_DTO1, 0, OTG_M_CONST_DTO_MODULO, (uint32_t)modulo); +} + +void dcn30_fpu_populate_dml_writeback_from_context( + struct dc *dc, struct resource_context *res_ctx, display_e2e_pipe_params_st *pipes) +{ + int pipe_cnt, i, j; + double max_calc_writeback_dispclk; + double writeback_dispclk; + struct writeback_st dout_wb; + + dc_assert_fp_enabled(); + + for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { + struct dc_stream_state *stream = res_ctx->pipe_ctx[i].stream; + + if (!stream) + continue; + max_calc_writeback_dispclk = 0; + + /* Set writeback information */ + pipes[pipe_cnt].dout.wb_enable = 0; + pipes[pipe_cnt].dout.num_active_wb = 0; + for (j = 0; j < stream->num_wb_info; j++) { + struct dc_writeback_info *wb_info = &stream->writeback_info[j]; + + if (wb_info->wb_enabled && wb_info->writeback_source_plane && + (wb_info->writeback_source_plane == res_ctx->pipe_ctx[i].plane_state)) { + pipes[pipe_cnt].dout.wb_enable = 1; + pipes[pipe_cnt].dout.num_active_wb++; + dout_wb.wb_src_height = wb_info->dwb_params.cnv_params.crop_en ? + wb_info->dwb_params.cnv_params.crop_height : + wb_info->dwb_params.cnv_params.src_height; + dout_wb.wb_src_width = wb_info->dwb_params.cnv_params.crop_en ? + wb_info->dwb_params.cnv_params.crop_width : + wb_info->dwb_params.cnv_params.src_width; + dout_wb.wb_dst_width = wb_info->dwb_params.dest_width; + dout_wb.wb_dst_height = wb_info->dwb_params.dest_height; + + /* For IP that doesn't support WB scaling, set h/v taps to 1 to avoid DML validation failure */ + if (dc->dml.ip.writeback_max_hscl_taps > 1) { + dout_wb.wb_htaps_luma = wb_info->dwb_params.scaler_taps.h_taps; + dout_wb.wb_vtaps_luma = wb_info->dwb_params.scaler_taps.v_taps; + } else { + dout_wb.wb_htaps_luma = 1; + dout_wb.wb_vtaps_luma = 1; + } + dout_wb.wb_htaps_chroma = 0; + dout_wb.wb_vtaps_chroma = 0; + dout_wb.wb_hratio = wb_info->dwb_params.cnv_params.crop_en ? + (double)wb_info->dwb_params.cnv_params.crop_width / + (double)wb_info->dwb_params.dest_width : + (double)wb_info->dwb_params.cnv_params.src_width / + (double)wb_info->dwb_params.dest_width; + dout_wb.wb_vratio = wb_info->dwb_params.cnv_params.crop_en ? + (double)wb_info->dwb_params.cnv_params.crop_height / + (double)wb_info->dwb_params.dest_height : + (double)wb_info->dwb_params.cnv_params.src_height / + (double)wb_info->dwb_params.dest_height; + if (wb_info->dwb_params.cnv_params.fc_out_format == DWB_OUT_FORMAT_64BPP_ARGB || + wb_info->dwb_params.cnv_params.fc_out_format == DWB_OUT_FORMAT_64BPP_RGBA) + dout_wb.wb_pixel_format = dm_444_64; + else + dout_wb.wb_pixel_format = dm_444_32; + + /* Workaround for cases where multiple writebacks are connected to same plane + * In which case, need to compute worst case and set the associated writeback parameters + * This workaround is necessary due to DML computation assuming only 1 set of writeback + * parameters per pipe + */ + writeback_dispclk = dml30_CalculateWriteBackDISPCLK( + dout_wb.wb_pixel_format, + pipes[pipe_cnt].pipe.dest.pixel_rate_mhz, + dout_wb.wb_hratio, + dout_wb.wb_vratio, + dout_wb.wb_htaps_luma, + dout_wb.wb_vtaps_luma, + dout_wb.wb_src_width, + dout_wb.wb_dst_width, + pipes[pipe_cnt].pipe.dest.htotal, + dc->current_state->bw_ctx.dml.ip.writeback_line_buffer_buffer_size); + + if (writeback_dispclk > max_calc_writeback_dispclk) { + max_calc_writeback_dispclk = writeback_dispclk; + pipes[pipe_cnt].dout.wb = dout_wb; + } + } + } + + pipe_cnt++; + } +} + +void dcn30_fpu_set_mcif_arb_params(struct mcif_arb_params *wb_arb_params, + struct display_mode_lib *dml, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, + int cur_pipe) +{ + int i; + + dc_assert_fp_enabled(); + + for (i = 0; i < sizeof(wb_arb_params->cli_watermark)/sizeof(wb_arb_params->cli_watermark[0]); i++) { + wb_arb_params->cli_watermark[i] = get_wm_writeback_urgent(dml, pipes, pipe_cnt) * 1000; + wb_arb_params->pstate_watermark[i] = get_wm_writeback_dram_clock_change(dml, pipes, pipe_cnt) * 1000; + } + + wb_arb_params->dram_speed_change_duration = dml->vba.WritebackAllowDRAMClockChangeEndPosition[cur_pipe] * pipes[0].clks_cfg.refclk_mhz; /* num_clock_cycles = us * MHz */ +} + +void dcn30_fpu_update_soc_for_wm_a(struct dc *dc, struct dc_state *context) +{ + +dc_assert_fp_enabled(); + +if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].valid) { + context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us; + context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us; + context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us; + } +} + +void dcn30_fpu_calculate_wm_and_dlg( + struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, + int vlevel) +{ +int maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb; + int i, pipe_idx; + double dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][maxMpcComb]; + bool pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] != dm_dram_clock_change_unsupported; + +dc_assert_fp_enabled(); + + if (context->bw_ctx.dml.soc.min_dcfclk > dcfclk) + dcfclk = context->bw_ctx.dml.soc.min_dcfclk; + + pipes[0].clks_cfg.voltage = vlevel; + pipes[0].clks_cfg.dcfclk_mhz = dcfclk; + pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz; + + /* Set B: + * DCFCLK: 1GHz or min required above 1GHz + * FCLK/UCLK: Max + */ + if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].valid) { + if (vlevel == 0) { + pipes[0].clks_cfg.voltage = 1; + pipes[0].clks_cfg.dcfclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dcfclk_mhz; + } + context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us; + context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us; + context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us; + } + context->bw_ctx.bw.dcn.watermarks.b.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + + pipes[0].clks_cfg.voltage = vlevel; + pipes[0].clks_cfg.dcfclk_mhz = dcfclk; + + /* Set D: + * DCFCLK: Min Required + * FCLK(proportional to UCLK): 1GHz or Max + * MALL stutter, sr_enter_exit = 4, sr_exit = 2us + */ + /* + if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].valid) { + context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us; + context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us; + context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us; + } + context->bw_ctx.bw.dcn.watermarks.d.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + */ + + /* Set C: + * DCFCLK: Min Required + * FCLK(proportional to UCLK): 1GHz or Max + * pstate latency overridden to 5us + */ + if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid) { + unsigned int min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed; + unsigned int min_dram_speed_mts_margin = 160; + + if (context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] == dm_dram_clock_change_unsupported) + min_dram_speed_mts = dc->clk_mgr->bw_params->clk_table.entries[dc->clk_mgr->bw_params->clk_table.num_entries - 1].memclk_mhz * 16; + + /* find largest table entry that is lower than dram speed, but lower than DPM0 still uses DPM0 */ + for (i = 3; i > 0; i--) + if (min_dram_speed_mts + min_dram_speed_mts_margin > dc->clk_mgr->bw_params->dummy_pstate_table[i].dram_speed_mts) + break; + + context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->dummy_pstate_table[i].dummy_pstate_latency_us; + + context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us; + context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us; + } + + context->bw_ctx.bw.dcn.watermarks.c.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + + if (!pstate_en) { + /* The only difference between A and C is p-state latency, if p-state is not supported we want to + * calculate DLG based on dummy p-state latency, and max out the set A p-state watermark + */ + context->bw_ctx.bw.dcn.watermarks.a = context->bw_ctx.bw.dcn.watermarks.c; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = 0; + } else { + /* Set A: + * DCFCLK: Min Required + * FCLK(proportional to UCLK): 1GHz or Max + * + * Set A calculated last so that following calculations are based on Set A + */ + dc->res_pool->funcs->update_soc_for_wm_a(dc, context); + context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + } + + context->perf_params.stutter_period_us = context->bw_ctx.dml.vba.StutterPeriod; + + /* Make set D = set A until set D is enabled */ + context->bw_ctx.bw.dcn.watermarks.d = context->bw_ctx.bw.dcn.watermarks.a; + + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + if (!context->res_ctx.pipe_ctx[i].stream) + continue; + + pipes[pipe_idx].clks_cfg.dispclk_mhz = get_dispclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt); + pipes[pipe_idx].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + + if (dc->config.forced_clocks) { + pipes[pipe_idx].clks_cfg.dispclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz; + pipes[pipe_idx].clks_cfg.dppclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz; + } + if (dc->debug.min_disp_clk_khz > pipes[pipe_idx].clks_cfg.dispclk_mhz * 1000) + pipes[pipe_idx].clks_cfg.dispclk_mhz = dc->debug.min_disp_clk_khz / 1000.0; + if (dc->debug.min_dpp_clk_khz > pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000) + pipes[pipe_idx].clks_cfg.dppclk_mhz = dc->debug.min_dpp_clk_khz / 1000.0; + + pipe_idx++; + } + + DC_FP_START(); + dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel); + DC_FP_END(); + + if (!pstate_en) + /* Restore full p-state latency */ + context->bw_ctx.dml.soc.dram_clock_change_latency_us = + dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us; + +} + +void dcn30_fpu_update_dram_channel_width_bytes(struct dc *dc) +{ + dc_assert_fp_enabled(); + + if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes) + dcn3_0_soc.dram_channel_width_bytes = dc->ctx->dc_bios->vram_info.dram_channel_width_bytes; +} + +void dcn30_fpu_update_max_clk(struct dc_bounding_box_max_clk *dcn30_bb_max_clk) +{ + dc_assert_fp_enabled(); + + if (!dcn30_bb_max_clk->max_dcfclk_mhz) + dcn30_bb_max_clk->max_dcfclk_mhz = dcn3_0_soc.clock_limits[0].dcfclk_mhz; + if (!dcn30_bb_max_clk->max_dispclk_mhz) + dcn30_bb_max_clk->max_dispclk_mhz = dcn3_0_soc.clock_limits[0].dispclk_mhz; + if (!dcn30_bb_max_clk->max_dppclk_mhz) + dcn30_bb_max_clk->max_dppclk_mhz = dcn3_0_soc.clock_limits[0].dppclk_mhz; + if (!dcn30_bb_max_clk->max_phyclk_mhz) + dcn30_bb_max_clk->max_phyclk_mhz = dcn3_0_soc.clock_limits[0].phyclk_mhz; +} + +void dcn30_fpu_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts, + unsigned int *optimal_dcfclk, + unsigned int *optimal_fclk) +{ + double bw_from_dram, bw_from_dram1, bw_from_dram2; + + dc_assert_fp_enabled(); + + bw_from_dram1 = uclk_mts * dcn3_0_soc.num_chans * + dcn3_0_soc.dram_channel_width_bytes * (dcn3_0_soc.max_avg_dram_bw_use_normal_percent / 100); + bw_from_dram2 = uclk_mts * dcn3_0_soc.num_chans * + dcn3_0_soc.dram_channel_width_bytes * (dcn3_0_soc.max_avg_sdp_bw_use_normal_percent / 100); + + bw_from_dram = (bw_from_dram1 < bw_from_dram2) ? bw_from_dram1 : bw_from_dram2; + + if (optimal_fclk) + *optimal_fclk = bw_from_dram / + (dcn3_0_soc.fabric_datapath_to_dcn_data_return_bytes * (dcn3_0_soc.max_avg_sdp_bw_use_normal_percent / 100)); + + if (optimal_dcfclk) + *optimal_dcfclk = bw_from_dram / + (dcn3_0_soc.return_bus_width_bytes * (dcn3_0_soc.max_avg_sdp_bw_use_normal_percent / 100)); +} + +void dcn30_fpu_update_bw_bounding_box(struct dc *dc, + struct clk_bw_params *bw_params, + struct dc_bounding_box_max_clk *dcn30_bb_max_clk, + unsigned int *dcfclk_mhz, + unsigned int *dram_speed_mts) +{ + unsigned int i; + + dc_assert_fp_enabled(); + + dcn3_0_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; + dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; + + for (i = 0; i < dcn3_0_soc.num_states; i++) { + dcn3_0_soc.clock_limits[i].state = i; + dcn3_0_soc.clock_limits[i].dcfclk_mhz = dcfclk_mhz[i]; + dcn3_0_soc.clock_limits[i].fabricclk_mhz = dcfclk_mhz[i]; + dcn3_0_soc.clock_limits[i].dram_speed_mts = dram_speed_mts[i]; + + /* Fill all states with max values of all other clocks */ + dcn3_0_soc.clock_limits[i].dispclk_mhz = dcn30_bb_max_clk->max_dispclk_mhz; + dcn3_0_soc.clock_limits[i].dppclk_mhz = dcn30_bb_max_clk->max_dppclk_mhz; + dcn3_0_soc.clock_limits[i].phyclk_mhz = dcn30_bb_max_clk->max_phyclk_mhz; + dcn3_0_soc.clock_limits[i].dtbclk_mhz = dcn3_0_soc.clock_limits[0].dtbclk_mhz; + /* These clocks cannot come from bw_params, always fill from dcn3_0_soc[1] */ + /* FCLK, PHYCLK_D18, SOCCLK, DSCCLK */ + dcn3_0_soc.clock_limits[i].phyclk_d18_mhz = dcn3_0_soc.clock_limits[0].phyclk_d18_mhz; + dcn3_0_soc.clock_limits[i].socclk_mhz = dcn3_0_soc.clock_limits[0].socclk_mhz; + dcn3_0_soc.clock_limits[i].dscclk_mhz = dcn3_0_soc.clock_limits[0].dscclk_mhz; + } + /* re-init DML with updated bb */ + dml_init_instance(&dc->dml, &dcn3_0_soc, &dcn3_0_ip, DML_PROJECT_DCN30); + if (dc->current_state) + dml_init_instance(&dc->current_state->bw_ctx.dml, &dcn3_0_soc, &dcn3_0_ip, DML_PROJECT_DCN30); + +} + + diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h new file mode 100644 index 000000000000..dedfe7b5f173 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h @@ -0,0 +1,67 @@ +/* + * Copyright 2020-2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DCN30_FPU_H__ +#define __DCN30_FPU_H__ + +#include "core_types.h" +#include "dcn20/dcn20_optc.h" + +void optc3_fpu_set_vrr_m_const(struct timing_generator *optc, + double vtotal_avg); + +void dcn30_fpu_populate_dml_writeback_from_context( + struct dc *dc, struct resource_context *res_ctx, display_e2e_pipe_params_st *pipes); + +void dcn30_fpu_set_mcif_arb_params(struct mcif_arb_params *wb_arb_params, + struct display_mode_lib *dml, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, + int cur_pipe); + +void dcn30_fpu_update_soc_for_wm_a(struct dc *dc, struct dc_state *context); + +void dcn30_fpu_calculate_wm_and_dlg( + struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, + int vlevel); + +void dcn30_fpu_update_dram_channel_width_bytes(struct dc *dc); + +void dcn30_fpu_update_max_clk(struct dc_bounding_box_max_clk *dcn30_bb_max_clk); + +void dcn30_fpu_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts, + unsigned int *optimal_dcfclk, + unsigned int *optimal_fclk); + +void dcn30_fpu_update_bw_bounding_box(struct dc *dc, + struct clk_bw_params *bw_params, + struct dc_bounding_box_max_clk *dcn30_bb_max_clk, + unsigned int *dcfclk_mhz, + unsigned int *dram_speed_mts); + + +#endif /* __DCN30_FPU_H__*/ diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index 26f3a55c35d7..555d4d9e1454 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -486,4 +486,11 @@ struct dc_state { } perf_params; }; +struct dc_bounding_box_max_clk { + int max_dcfclk_mhz; + int max_dispclk_mhz; + int max_dppclk_mhz; + int max_phyclk_mhz; +}; + #endif /* _CORE_TYPES_H_ */ -- cgit v1.2.3 From a32cc8177eabcd3497721836241f3d456342be62 Mon Sep 17 00:00:00 2001 From: Bhawanpreet Lakha Date: Thu, 5 May 2022 16:16:47 -0400 Subject: drm/amd/display: Fic incorrect pipe being used for clk update [Why] we save the prev_dppclk value using "dpp_inst" but when reading this value we use the index "i". In a case where a pipe is fused off we can end up reading the incorrect instance because i != dpp_inst in this case. [How] read the prev_dppclk using dpp_inst instead of i Reviewed-by: Dmytro Laktyushkin Acked-by: Qingqing Zhuo Signed-off-by: Bhawanpreet Lakha Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c index 02943ca65807..cf1b5f354ae9 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c @@ -122,7 +122,7 @@ static void rn_update_clocks_update_dpp_dto(struct clk_mgr_internal *clk_mgr, dpp_inst = clk_mgr->base.ctx->dc->res_pool->dpps[i]->inst; dppclk_khz = context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz; - prev_dppclk_khz = clk_mgr->dccg->pipe_dppclk_khz[i]; + prev_dppclk_khz = clk_mgr->dccg->pipe_dppclk_khz[dpp_inst]; if (safe_to_lower || prev_dppclk_khz < dppclk_khz) clk_mgr->dccg->funcs->update_dpp_dto( -- cgit v1.2.3 From 4d1d699f479dc8e01fbb6b7b5a8f2116de3a3883 Mon Sep 17 00:00:00 2001 From: Michael Strauss Date: Fri, 6 May 2022 17:17:55 -0400 Subject: Revert "drm/amd/display: Refactor LTTPR cap retrieval" This reverts commit 3b90318d44f87a3582f876802253a7748d270385. [WHY] Regressions unintentionally caused by change, reverting until this can be resolved. Reviewed-by: Aric Cyr Acked-by: Qingqing Zhuo Signed-off-by: Michael Strauss Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 171 ++++++++------------- drivers/gpu/drm/amd/display/dc/dc_link.h | 2 - drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h | 3 +- .../drm/amd/display/include/link_service_types.h | 6 - 4 files changed, 69 insertions(+), 113 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index d8de8dbf3676..3c9523218c19 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -5126,13 +5126,16 @@ static bool dpcd_read_sink_ext_caps(struct dc_link *link) return true; } -void dp_retrieve_lttpr_cap(struct dc_link *link) +bool dp_retrieve_lttpr_cap(struct dc_link *link) { + uint8_t lttpr_dpcd_data[8]; bool allow_lttpr_non_transparent_mode = 0; + bool vbios_lttpr_enable = link->dc->caps.vbios_lttpr_enable; bool vbios_lttpr_interop = link->dc->caps.vbios_lttpr_aware; enum dc_status status = DC_ERROR_UNEXPECTED; + bool is_lttpr_present = false; - memset(link->lttpr_dpcd_data, '\0', sizeof(link->lttpr_dpcd_data)); + memset(lttpr_dpcd_data, '\0', sizeof(lttpr_dpcd_data)); if ((link->dc->config.allow_lttpr_non_transparent_mode.bits.DP2_0 && link->dpcd_caps.channel_coding_cap.bits.DP_128b_132b_SUPPORTED)) { @@ -5142,116 +5145,82 @@ void dp_retrieve_lttpr_cap(struct dc_link *link) allow_lttpr_non_transparent_mode = 1; } - link->lttpr_mode = LTTPR_MODE_NON_LTTPR; - link->lttpr_support = LTTPR_UNSUPPORTED; - /* - * Logic to determine LTTPR support + * Logic to determine LTTPR mode */ - if (vbios_lttpr_interop) - link->lttpr_support = LTTPR_SUPPORTED; - else if (link->dc->config.allow_lttpr_non_transparent_mode.raw == 0 - || !link->dc->caps.extended_aux_timeout_support) - link->lttpr_support = LTTPR_UNSUPPORTED; - else - link->lttpr_support = LTTPR_CHECK_EXT_SUPPORT; + link->lttpr_mode = LTTPR_MODE_NON_LTTPR; + if (vbios_lttpr_enable && vbios_lttpr_interop) + link->lttpr_mode = LTTPR_MODE_NON_TRANSPARENT; + else if (!vbios_lttpr_enable && vbios_lttpr_interop) { + if (allow_lttpr_non_transparent_mode) + link->lttpr_mode = LTTPR_MODE_NON_TRANSPARENT; + else + link->lttpr_mode = LTTPR_MODE_TRANSPARENT; + } else if (!vbios_lttpr_enable && !vbios_lttpr_interop) { + if (!allow_lttpr_non_transparent_mode || !link->dc->caps.extended_aux_timeout_support) + link->lttpr_mode = LTTPR_MODE_NON_LTTPR; + else + link->lttpr_mode = LTTPR_MODE_NON_TRANSPARENT; + } +#if defined(CONFIG_DRM_AMD_DC_DCN) /* Check DP tunnel LTTPR mode debug option. */ if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA && link->dc->debug.dpia_debug.bits.force_non_lttpr) - link->lttpr_support = LTTPR_UNSUPPORTED; + link->lttpr_mode = LTTPR_MODE_NON_LTTPR; +#endif - if (link->lttpr_support > LTTPR_UNSUPPORTED) { + if (link->lttpr_mode == LTTPR_MODE_NON_TRANSPARENT || link->lttpr_mode == LTTPR_MODE_TRANSPARENT) { /* By reading LTTPR capability, RX assumes that we will enable * LTTPR extended aux timeout if LTTPR is present. */ status = core_link_read_dpcd( link, DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV, - link->lttpr_dpcd_data, - sizeof(link->lttpr_dpcd_data)); - } -} - -bool dp_parse_lttpr_mode(struct dc_link *link) -{ - bool dpcd_allow_lttpr_non_transparent_mode = false; - bool is_lttpr_present = false; - - bool vbios_lttpr_enable = link->dc->caps.vbios_lttpr_enable; - - if ((link->dc->config.allow_lttpr_non_transparent_mode.bits.DP2_0 && - link->dpcd_caps.channel_coding_cap.bits.DP_128b_132b_SUPPORTED)) { - dpcd_allow_lttpr_non_transparent_mode = true; - } else if (link->dc->config.allow_lttpr_non_transparent_mode.bits.DP1_4A && - !link->dpcd_caps.channel_coding_cap.bits.DP_128b_132b_SUPPORTED) { - dpcd_allow_lttpr_non_transparent_mode = true; + lttpr_dpcd_data, + sizeof(lttpr_dpcd_data)); + + link->dpcd_caps.lttpr_caps.revision.raw = + lttpr_dpcd_data[DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV - + DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV]; + + link->dpcd_caps.lttpr_caps.max_link_rate = + lttpr_dpcd_data[DP_MAX_LINK_RATE_PHY_REPEATER - + DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV]; + + link->dpcd_caps.lttpr_caps.phy_repeater_cnt = + lttpr_dpcd_data[DP_PHY_REPEATER_CNT - + DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV]; + + link->dpcd_caps.lttpr_caps.max_lane_count = + lttpr_dpcd_data[DP_MAX_LANE_COUNT_PHY_REPEATER - + DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV]; + + link->dpcd_caps.lttpr_caps.mode = + lttpr_dpcd_data[DP_PHY_REPEATER_MODE - + DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV]; + + link->dpcd_caps.lttpr_caps.max_ext_timeout = + lttpr_dpcd_data[DP_PHY_REPEATER_EXTENDED_WAIT_TIMEOUT - + DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV]; + link->dpcd_caps.lttpr_caps.main_link_channel_coding.raw = + lttpr_dpcd_data[DP_MAIN_LINK_CHANNEL_CODING_PHY_REPEATER - + DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV]; + + link->dpcd_caps.lttpr_caps.supported_128b_132b_rates.raw = + lttpr_dpcd_data[DP_PHY_REPEATER_128B132B_RATES - + DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV]; + + /* Attempt to train in LTTPR transparent mode if repeater count exceeds 8. */ + is_lttpr_present = (link->dpcd_caps.lttpr_caps.max_lane_count > 0 && + link->dpcd_caps.lttpr_caps.max_lane_count <= 4 && + link->dpcd_caps.lttpr_caps.revision.raw >= 0x14); + if (is_lttpr_present) { + CONN_DATA_DETECT(link, lttpr_dpcd_data, sizeof(lttpr_dpcd_data), "LTTPR Caps: "); + configure_lttpr_mode_transparent(link); + } else + link->lttpr_mode = LTTPR_MODE_NON_LTTPR; } - - /* - * Logic to determine LTTPR mode - */ - if (link->lttpr_support == LTTPR_SUPPORTED) - if (vbios_lttpr_enable) - link->lttpr_mode = LTTPR_MODE_NON_TRANSPARENT; - else if (dpcd_allow_lttpr_non_transparent_mode) - link->lttpr_mode = LTTPR_MODE_NON_TRANSPARENT; - else - link->lttpr_mode = LTTPR_MODE_TRANSPARENT; - else // lttpr_support == LTTPR_CHECK_EXT_SUPPORT - if (dpcd_allow_lttpr_non_transparent_mode) { - link->lttpr_support = LTTPR_SUPPORTED; - link->lttpr_mode = LTTPR_MODE_NON_TRANSPARENT; - } else { - link->lttpr_support = LTTPR_UNSUPPORTED; - } - - if (link->lttpr_support == LTTPR_UNSUPPORTED) - return false; - - link->dpcd_caps.lttpr_caps.revision.raw = - link->lttpr_dpcd_data[DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV - - DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV]; - - link->dpcd_caps.lttpr_caps.max_link_rate = - link->lttpr_dpcd_data[DP_MAX_LINK_RATE_PHY_REPEATER - - DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV]; - - link->dpcd_caps.lttpr_caps.phy_repeater_cnt = - link->lttpr_dpcd_data[DP_PHY_REPEATER_CNT - - DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV]; - - link->dpcd_caps.lttpr_caps.max_lane_count = - link->lttpr_dpcd_data[DP_MAX_LANE_COUNT_PHY_REPEATER - - DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV]; - - link->dpcd_caps.lttpr_caps.mode = - link->lttpr_dpcd_data[DP_PHY_REPEATER_MODE - - DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV]; - - link->dpcd_caps.lttpr_caps.max_ext_timeout = - link->lttpr_dpcd_data[DP_PHY_REPEATER_EXTENDED_WAIT_TIMEOUT - - DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV]; - - link->dpcd_caps.lttpr_caps.main_link_channel_coding.raw = - link->lttpr_dpcd_data[DP_MAIN_LINK_CHANNEL_CODING_PHY_REPEATER - - DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV]; - - link->dpcd_caps.lttpr_caps.supported_128b_132b_rates.raw = - link->lttpr_dpcd_data[DP_PHY_REPEATER_128B132B_RATES - - DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV]; - - - /* Attempt to train in LTTPR transparent mode if repeater count exceeds 8. */ - is_lttpr_present = (link->dpcd_caps.lttpr_caps.max_lane_count > 0 && - link->dpcd_caps.lttpr_caps.max_lane_count <= 4 && - link->dpcd_caps.lttpr_caps.revision.raw >= 0x14); - if (is_lttpr_present) { - CONN_DATA_DETECT(link, link->lttpr_dpcd_data, sizeof(link->lttpr_dpcd_data), "LTTPR Caps: "); - configure_lttpr_mode_transparent(link); - } else - link->lttpr_mode = LTTPR_MODE_NON_LTTPR; - return is_lttpr_present; } @@ -5403,8 +5372,7 @@ static bool retrieve_link_cap(struct dc_link *link) status = wa_try_to_wake_dprx(link, timeout_ms); } - dp_retrieve_lttpr_cap(link); - + is_lttpr_present = dp_retrieve_lttpr_cap(link); /* Read DP tunneling information. */ status = dpcd_get_tunneling_device_data(link); @@ -5440,9 +5408,6 @@ static bool retrieve_link_cap(struct dc_link *link) return false; } - if (link->lttpr_support > LTTPR_UNSUPPORTED) - is_lttpr_present = dp_parse_lttpr_mode(link); - if (!is_lttpr_present) dc_link_aux_try_to_configure_timeout(link->ddc, LINK_AUX_DEFAULT_TIMEOUT_PERIOD); diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h index 251f2bbc96b9..a3c37ee3f849 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_link.h +++ b/drivers/gpu/drm/amd/display/dc/dc_link.h @@ -129,8 +129,6 @@ struct dc_link { bool link_state_valid; bool aux_access_disabled; bool sync_lt_in_progress; - uint8_t lttpr_dpcd_data[8]; - enum lttpr_support lttpr_support; enum lttpr_mode lttpr_mode; bool is_internal_display; diff --git a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h index 78f09893c118..44f167d2584f 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h @@ -217,8 +217,7 @@ void disable_dp_hpo_output(struct dc_link *link, void setup_dp_hpo_stream(struct pipe_ctx *pipe_ctx, bool enable); bool is_dp_128b_132b_signal(struct pipe_ctx *pipe_ctx); -void dp_retrieve_lttpr_cap(struct dc_link *link); -bool dp_apply_lttpr_mode(struct dc_link *link); +bool dp_retrieve_lttpr_cap(struct dc_link *link); void edp_panel_backlight_power_on(struct dc_link *link); void dp_receiver_power_ctrl(struct dc_link *link, bool on); void dp_source_sequence_trace(struct dc_link *link, uint8_t dp_test_mode); diff --git a/drivers/gpu/drm/amd/display/include/link_service_types.h b/drivers/gpu/drm/amd/display/include/link_service_types.h index 9f465b4d626e..447a56286dd0 100644 --- a/drivers/gpu/drm/amd/display/include/link_service_types.h +++ b/drivers/gpu/drm/amd/display/include/link_service_types.h @@ -80,12 +80,6 @@ enum link_training_result { DP_128b_132b_CDS_DONE_TIMEOUT, }; -enum lttpr_support { - LTTPR_UNSUPPORTED, - LTTPR_CHECK_EXT_SUPPORT, - LTTPR_SUPPORTED, -}; - enum lttpr_mode { LTTPR_MODE_NON_LTTPR, LTTPR_MODE_TRANSPARENT, -- cgit v1.2.3 From c51bdd1a9c34936c1a6b2f6eb79703c730cc0e6e Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Sun, 8 May 2022 23:31:34 -0400 Subject: drm/amd/display: 3.2.186 This version brings along the following: - Improvements in link training fallback - Adding individual edp hotplug support - Fixes in DPIA HPD status, display clock change hang, etc. - FPU isolation work for DCN30 Acked-by: Qingqing Zhuo Signed-off-by: Aric Cyr Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 7cfc04a8ef15..a31ea3644ec2 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -47,7 +47,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.185" +#define DC_VER "3.2.186" #define MAX_SURFACES 3 #define MAX_PLANES 6 -- cgit v1.2.3 From 2c2dd0555fd6bcea6d43ab3224c6af718b910e22 Mon Sep 17 00:00:00 2001 From: Haohui Mai Date: Mon, 16 May 2022 05:00:53 -0700 Subject: drm/amdgpu: Clean up of initializing doorbells for gfx_v9 and gfx_v10 Clean up redundant, copy-paste code blocks during the initialization of the doorbells in mqd_init(). Signed-off-by: Haohui Mai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 17 ----------------- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 17 ----------------- 2 files changed, 34 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 65a4126135b0..63fbe03283bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -6919,23 +6919,6 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; - tmp = 0; - /* enable the doorbell if requested */ - if (prop->use_doorbell) { - tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, - DOORBELL_OFFSET, prop->doorbell_index); - - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, - DOORBELL_EN, 1); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, - DOORBELL_SOURCE, 0); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, - DOORBELL_HIT, 0); - } - - mqd->cp_hqd_pq_doorbell_control = tmp; - /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 83639b5ea6a9..f49a2dd89ee7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3535,23 +3535,6 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; - tmp = 0; - /* enable the doorbell if requested */ - if (ring->use_doorbell) { - tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, - DOORBELL_OFFSET, ring->doorbell_index); - - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, - DOORBELL_EN, 1); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, - DOORBELL_SOURCE, 0); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, - DOORBELL_HIT, 0); - } - - mqd->cp_hqd_pq_doorbell_control = tmp; - /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ ring->wptr = 0; mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR); -- cgit v1.2.3 From a0af5dbdc914eae667fba8322cb02afc4ce3967b Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Thu, 12 May 2022 20:38:18 -0400 Subject: drm/amdkfd: simplify cpu hive assignment CPU hive assignment currently assumes when a GPU hive is connected_to_cpu, there is only one hive in the system. Only assign CPUs to the hive if they are explicitly directly connected to the GPU hive to get rid of the need for this assumption. It's more efficient to do this when querying IO links since other non-CRAT info has to be filled in anyways. Also, stop re-assigning the same CPU to the same GPU hive if it has already been done before. Signed-off-by: Jonathan Kim Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 2e20f54bb147..8d50d207cf66 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1271,6 +1271,12 @@ static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev) if (!peer_dev) continue; + /* Include the CPU peer in GPU hive if connected over xGMI. */ + if (!peer_dev->gpu && !peer_dev->node_props.hive_id && + dev->node_props.hive_id && + dev->gpu->adev->gmc.xgmi.connected_to_cpu) + peer_dev->node_props.hive_id = dev->node_props.hive_id; + list_for_each_entry(inbound_link, &peer_dev->io_link_props, list) { if (inbound_link->node_to != link->node_from) @@ -1302,22 +1308,6 @@ int kfd_topology_add_device(struct kfd_dev *gpu) pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id); - /* Include the CPU in xGMI hive if xGMI connected by assigning it the hive ID. */ - if (gpu->hive_id && gpu->adev->gmc.xgmi.connected_to_cpu) { - struct kfd_topology_device *top_dev; - - down_read(&topology_lock); - - list_for_each_entry(top_dev, &topology_device_list, list) { - if (top_dev->gpu) - break; - - top_dev->node_props.hive_id = gpu->hive_id; - } - - up_read(&topology_lock); - } - /* Check to see if this gpu device exists in the topology_device_list. * If so, assign the gpu to that device, * else create a Virtual CRAT for this gpu device and then parse that -- cgit v1.2.3 From 69493c034d2455204dfcd370de8c4dc204374a94 Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 13 May 2022 13:54:02 +0200 Subject: drm/amdgpu: cleanup ctx implementation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let each context have a pointer to the ctx manager and properly initialize the adev pointer inside the context manager. Reduce the BUG_ON() in amdgpu_ctx_add_fence() into a WARN_ON() and directly return the sequence number instead of writing into a parmeter. Signed-off-by: Christian König Reviewed-by: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 46 ++++++++++++++++----------------- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h | 11 ++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 +- 4 files changed, 30 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index e552a2004868..84caab5e4d22 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1252,7 +1252,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, p->fence = dma_fence_get(&job->base.s_fence->finished); - amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq); + seq = amdgpu_ctx_add_fence(p->ctx, entity, p->fence); amdgpu_cs_post_dependencies(p); if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) && diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index c317078d1afd..a61e4c83a545 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -135,9 +135,9 @@ static enum amdgpu_ring_priority_level amdgpu_ctx_sched_prio_to_ring_prio(int32_ static unsigned int amdgpu_ctx_get_hw_prio(struct amdgpu_ctx *ctx, u32 hw_ip) { - struct amdgpu_device *adev = ctx->adev; - int32_t ctx_prio; + struct amdgpu_device *adev = ctx->mgr->adev; unsigned int hw_prio; + int32_t ctx_prio; ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ? ctx->init_priority : ctx->override_priority; @@ -166,7 +166,7 @@ static unsigned int amdgpu_ctx_get_hw_prio(struct amdgpu_ctx *ctx, u32 hw_ip) static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip, const u32 ring) { - struct amdgpu_device *adev = ctx->adev; + struct amdgpu_device *adev = ctx->mgr->adev; struct amdgpu_ctx_entity *entity; struct drm_gpu_scheduler **scheds = NULL, *sched = NULL; unsigned num_scheds = 0; @@ -220,10 +220,8 @@ error_free_entity: return r; } -static int amdgpu_ctx_init(struct amdgpu_device *adev, - int32_t priority, - struct drm_file *filp, - struct amdgpu_ctx *ctx) +static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority, + struct drm_file *filp, struct amdgpu_ctx *ctx) { int r; @@ -233,15 +231,14 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, memset(ctx, 0, sizeof(*ctx)); - ctx->adev = adev; - kref_init(&ctx->refcount); + ctx->mgr = mgr; spin_lock_init(&ctx->ring_lock); mutex_init(&ctx->lock); - ctx->reset_counter = atomic_read(&adev->gpu_reset_counter); + ctx->reset_counter = atomic_read(&mgr->adev->gpu_reset_counter); ctx->reset_counter_query = ctx->reset_counter; - ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter); + ctx->vram_lost_counter = atomic_read(&mgr->adev->vram_lost_counter); ctx->init_priority = priority; ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET; ctx->stable_pstate = AMDGPU_CTX_STABLE_PSTATE_NONE; @@ -266,7 +263,7 @@ static void amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity) static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx, u32 *stable_pstate) { - struct amdgpu_device *adev = ctx->adev; + struct amdgpu_device *adev = ctx->mgr->adev; enum amd_dpm_forced_level current_level; current_level = amdgpu_dpm_get_performance_level(adev); @@ -294,7 +291,7 @@ static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx, static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx, u32 stable_pstate) { - struct amdgpu_device *adev = ctx->adev; + struct amdgpu_device *adev = ctx->mgr->adev; enum amd_dpm_forced_level level; u32 current_stable_pstate; int r; @@ -345,7 +342,8 @@ done: static void amdgpu_ctx_fini(struct kref *ref) { struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount); - struct amdgpu_device *adev = ctx->adev; + struct amdgpu_ctx_mgr *mgr = ctx->mgr; + struct amdgpu_device *adev = mgr->adev; unsigned i, j, idx; if (!adev) @@ -421,7 +419,7 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev, } *id = (uint32_t)r; - r = amdgpu_ctx_init(adev, priority, filp, ctx); + r = amdgpu_ctx_init(mgr, priority, filp, ctx); if (r) { idr_remove(&mgr->ctx_handles, *id); *id = 0; @@ -671,9 +669,9 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx) return 0; } -void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, - struct drm_sched_entity *entity, - struct dma_fence *fence, uint64_t *handle) +uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, + struct drm_sched_entity *entity, + struct dma_fence *fence) { struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity); uint64_t seq = centity->sequence; @@ -682,8 +680,7 @@ void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, idx = seq & (amdgpu_sched_jobs - 1); other = centity->fences[idx]; - if (other) - BUG_ON(!dma_fence_is_signaled(other)); + WARN_ON(other && !dma_fence_is_signaled(other)); dma_fence_get(fence); @@ -693,8 +690,7 @@ void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, spin_unlock(&ctx->ring_lock); dma_fence_put(other); - if (handle) - *handle = seq; + return seq; } struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, @@ -731,7 +727,7 @@ static void amdgpu_ctx_set_entity_priority(struct amdgpu_ctx *ctx, int hw_ip, int32_t priority) { - struct amdgpu_device *adev = ctx->adev; + struct amdgpu_device *adev = ctx->mgr->adev; unsigned int hw_prio; struct drm_gpu_scheduler **scheds = NULL; unsigned num_scheds; @@ -796,8 +792,10 @@ int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, return r; } -void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr) +void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr, + struct amdgpu_device *adev) { + mgr->adev = adev; mutex_init(&mgr->lock); idr_init(&mgr->ctx_handles); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h index 142f2f87d44c..681050bc828c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h @@ -40,7 +40,7 @@ struct amdgpu_ctx_entity { struct amdgpu_ctx { struct kref refcount; - struct amdgpu_device *adev; + struct amdgpu_ctx_mgr *mgr; unsigned reset_counter; unsigned reset_counter_query; uint32_t vram_lost_counter; @@ -70,9 +70,9 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx); int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance, u32 ring, struct drm_sched_entity **entity); -void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, - struct drm_sched_entity *entity, - struct dma_fence *fence, uint64_t *seq); +uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, + struct drm_sched_entity *entity, + struct dma_fence *fence); struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, struct drm_sched_entity *entity, uint64_t seq); @@ -85,7 +85,8 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, struct drm_sched_entity *entity); -void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr); +void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr, + struct amdgpu_device *adev); void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr); long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout); void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 497478f8a5d3..801f6fa692e9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -1152,7 +1152,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) mutex_init(&fpriv->bo_list_lock); idr_init(&fpriv->bo_list_handles); - amdgpu_ctx_mgr_init(&fpriv->ctx_mgr); + amdgpu_ctx_mgr_init(&fpriv->ctx_mgr, adev); file_priv->driver_priv = fpriv; goto out_suspend; -- cgit v1.2.3 From 842035543c0bfa35b1471e74094a107673815b01 Mon Sep 17 00:00:00 2001 From: Haohui Mai Date: Mon, 16 May 2022 23:06:35 -0700 Subject: drm/amdgpu: Set CP_HQD_PQ_CONTROL.RPTR_BLOCK_SIZE correctly Remove the accidental shifts on the values of RPTR_BLOCK_SIZE in gfx_v8-v11. The bug essentially always programs the corresponding fields to zero instead of the correct value. The hardware clamps the min value to 5 so this resulted in a value of 5 being programmed. Signed-off-by: Haohui Mai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 63fbe03283bf..dada7ab5d43f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -6898,7 +6898,7 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_device *adev, void *m, tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, (order_base_2(prop->queue_size / 4) - 1)); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, - ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); + (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); #ifdef __BIG_ENDIAN tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 8773cbd1f03b..8c0a3fc7aaa6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -4082,7 +4082,7 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, (order_base_2(prop->queue_size / 4) - 1)); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, - ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); + (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 90f64219d291..fb9302910742 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -4490,7 +4490,7 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, (order_base_2(ring->ring_size / 4) - 1)); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, - ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); + (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); #ifdef __BIG_ENDIAN tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index f49a2dd89ee7..f12ae6e2359a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3514,7 +3514,7 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, (order_base_2(ring->ring_size / 4) - 1)); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, - ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); + (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); #ifdef __BIG_ENDIAN tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); #endif -- cgit v1.2.3 From 10784fec9cbddad2ca6031b28c1ca1d041444dc5 Mon Sep 17 00:00:00 2001 From: Haohui Mai Date: Tue, 17 May 2022 04:24:38 -0700 Subject: drm/amdgpu/gfx10: rework KIQ programming Make sure the queue is not longer active before programming the kiq EOP registers. Signed-off-by: Haohui Mai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index dada7ab5d43f..02754ee86c81 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -6956,20 +6956,6 @@ static int gfx_v10_0_kiq_init_register(struct amdgpu_ring *ring) /* disable wptr polling */ WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); - /* write the EOP addr */ - WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR, - mqd->cp_hqd_eop_base_addr_lo); - WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, - mqd->cp_hqd_eop_base_addr_hi); - - /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ - WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL, - mqd->cp_hqd_eop_control); - - /* enable doorbell? */ - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, - mqd->cp_hqd_pq_doorbell_control); - /* disable the queue if it's active */ if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); @@ -6988,6 +6974,19 @@ static int gfx_v10_0_kiq_init_register(struct amdgpu_ring *ring) mqd->cp_hqd_pq_wptr_hi); } + /* disable doorbells */ + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0); + + /* write the EOP addr */ + WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR, + mqd->cp_hqd_eop_base_addr_lo); + WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, + mqd->cp_hqd_eop_base_addr_hi); + + /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ + WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL, + mqd->cp_hqd_eop_control); + /* set the pointer to the MQD */ WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); -- cgit v1.2.3 From a2b28708b645c5632dc93669ab06e97874c8244f Mon Sep 17 00:00:00 2001 From: Gong Yuanjun Date: Tue, 17 May 2022 17:57:00 +0800 Subject: drm/radeon: fix a possible null pointer dereference In radeon_fp_native_mode(), the return value of drm_mode_duplicate() is assigned to mode, which will lead to a NULL pointer dereference on failure of drm_mode_duplicate(). Add a check to avoid npd. The failure status of drm_cvt_mode() on the other path is checked too. Signed-off-by: Gong Yuanjun Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_connectors.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index a16892c16f60..58db79921cd3 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -473,6 +473,8 @@ static struct drm_display_mode *radeon_fp_native_mode(struct drm_encoder *encode native_mode->vdisplay != 0 && native_mode->clock != 0) { mode = drm_mode_duplicate(dev, native_mode); + if (!mode) + return NULL; mode->type = DRM_MODE_TYPE_PREFERRED | DRM_MODE_TYPE_DRIVER; drm_mode_set_name(mode); @@ -487,6 +489,8 @@ static struct drm_display_mode *radeon_fp_native_mode(struct drm_encoder *encode * simpler. */ mode = drm_cvt_mode(dev, native_mode->hdisplay, native_mode->vdisplay, 60, true, false, false); + if (!mode) + return NULL; mode->type = DRM_MODE_TYPE_PREFERRED | DRM_MODE_TYPE_DRIVER; DRM_DEBUG_KMS("Adding cvt approximation of native panel mode %s\n", mode->name); } -- cgit v1.2.3 From d2f4460a3d9502513419f06cc376c7ade49d5753 Mon Sep 17 00:00:00 2001 From: Gong Yuanjun Date: Tue, 17 May 2022 17:57:46 +0800 Subject: drm/amd/pm: fix a potential gpu_metrics_table memory leak gpu_metrics_table is allocated in yellow_carp_init_smc_tables() but not freed in yellow_carp_fini_smc_tables(). Signed-off-by: Gong Yuanjun Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c index 87257b1b028f..feff4f8c927c 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c @@ -190,6 +190,9 @@ static int yellow_carp_fini_smc_tables(struct smu_context *smu) kfree(smu_table->watermarks_table); smu_table->watermarks_table = NULL; + kfree(smu_table->gpu_metrics_table); + smu_table->gpu_metrics_table = NULL; + return 0; } -- cgit v1.2.3 From 72063c71c39162d897c7c6f47fdc26425cfba03b Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Thu, 5 May 2022 11:49:14 +0800 Subject: drm/amd/pm: enable more dpm features for SMU 13.0.0 Enable MP0CLK DPM and FW Dstate since they are already supported by latest 78.36.0 PMFW. Signed-off-by: Evan Quan Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 197a0e2ff063..7bfceca246ae 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -296,6 +296,9 @@ smu_v13_0_0_get_allowed_feature_mask(struct smu_context *smu, *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_BACO_BIT); + *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_MP0CLK_BIT); + *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_FW_DSTATE_BIT); + return 0; } -- cgit v1.2.3 From 704d6bf605faf65555438c2fa0282c02dca1a7b6 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Fri, 6 May 2022 17:36:06 +0800 Subject: drm/amd/pm: skip dpm disablement on suspend for SMU 13.0.0 Since PMFW will handle this properly. Driver involvement is unnecessary. Signed-off-by: Evan Quan Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 6016b325b6b5..a601024ba4de 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -1436,6 +1436,7 @@ static int smu_disable_dpms(struct smu_context *smu) case IP_VERSION(11, 0, 0): case IP_VERSION(11, 0, 5): case IP_VERSION(11, 0, 9): + case IP_VERSION(13, 0, 0): return 0; default: break; -- cgit v1.2.3 From 1c65e54881f3a56f16783b0b772501a8ddeb8c10 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Mon, 9 May 2022 11:42:23 +0800 Subject: drm/amd/pm: update SMU 13.0.0 driver_if header To align with 78.37.0 and later PMFWs. Signed-off-by: Evan Quan Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- .../pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h | 22 ++++++++++------------ drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h | 2 +- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h index ecc6411dfc8d..c1f76236da26 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h @@ -671,8 +671,8 @@ typedef struct { uint16_t reserved[2]; //Frequency changes - uint16_t GfxclkFmin; // MHz - uint16_t GfxclkFmax; // MHz + int16_t GfxclkFmin; // MHz + int16_t GfxclkFmax; // MHz uint16_t UclkFmin; // MHz uint16_t UclkFmax; // MHz @@ -683,15 +683,14 @@ typedef struct { //Fan control uint8_t FanLinearPwmPoints[NUM_OD_FAN_MAX_POINTS]; uint8_t FanLinearTempPoints[NUM_OD_FAN_MAX_POINTS]; - uint16_t FanMaximumRpm; uint16_t FanMinimumPwm; - uint16_t FanAcousticLimitRpm; + uint16_t AcousticTargetRpmThreshold; + uint16_t AcousticLimitRpmThreshold; uint16_t FanTargetTemperature; // Degree Celcius uint8_t FanZeroRpmEnable; uint8_t FanZeroRpmStopTemp; uint8_t FanMode; - uint8_t Padding[1]; - + uint8_t MaxOpTemp; uint32_t Spare[13]; uint32_t MmHubPadding[8]; // SMU internal use. Adding here instead of external as a workaround @@ -719,15 +718,14 @@ typedef struct { uint8_t FanLinearPwmPoints; uint8_t FanLinearTempPoints; - uint16_t FanMaximumRpm; uint16_t FanMinimumPwm; - uint16_t FanAcousticLimitRpm; + uint16_t AcousticTargetRpmThreshold; + uint16_t AcousticLimitRpmThreshold; uint16_t FanTargetTemperature; // Degree Celcius uint8_t FanZeroRpmEnable; uint8_t FanZeroRpmStopTemp; uint8_t FanMode; - uint8_t Padding[1]; - + uint8_t MaxOpTemp; uint32_t Spare[13]; @@ -997,7 +995,8 @@ typedef struct { uint16_t SocketPowerLimitAcTau[PPT_THROTTLER_COUNT]; // Time constant of LPF in ms uint16_t SocketPowerLimitDcTau[PPT_THROTTLER_COUNT]; // Time constant of LPF in ms - uint32_t SpareVmin[12]; + QuadraticInt_t Vmin_droop; + uint32_t SpareVmin[9]; //SECTION: DPM Configuration 1 @@ -1286,7 +1285,6 @@ typedef struct { uint32_t PostVoltageSetBacoDelay; // in microseconds. Amount of time FW will wait after power good is established or PSI0 command is issued uint32_t BacoEntryDelay; // in milliseconds. Amount of time FW will wait to trigger BACO entry after receiving entry notification from OS - // SECTION: Board Reserved uint32_t BoardSpare[64]; diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h index 2b44d41a5157..afa1991e26f9 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h @@ -30,7 +30,7 @@ #define SMU13_DRIVER_IF_VERSION_ALDE 0x08 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x04 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_5 0x04 -#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0 0x27 +#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0 0x28 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x28 #define SMU13_MODE1_RESET_WAIT_TIME_IN_MS 500 //500ms -- cgit v1.2.3 From 6fd693817dcf07aed021b4196993822fad225664 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Tue, 10 May 2022 11:04:06 +0800 Subject: drm/amd/pm: correct the softpptable ids used for SMU 13.0.0 To better match with the pptable_id settings from VBIOS. Signed-off-by: Evan Quan Reviewed-by: Hawking Zhang Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 47 +++++++++++++++++++------- 1 file changed, 35 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index ae6321af9d88..7be4f6875a7b 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -218,13 +218,25 @@ int smu_v13_0_init_pptable_microcode(struct smu_context *smu) pptable_id == 3688) pptable_id = 36881; /* - * Temporary solution for SMU V13.0.0: - * - use 99991 signed pptable when SCPM enabled - * TODO: drop this when the pptable carried in vbios - * is ready. + * Temporary solution for SMU V13.0.0 with SCPM enabled: + * - use 36831 signed pptable when pp_table_id is 3683 + * - use 36641 signed pptable when pp_table_id is 3664 or 0 + * TODO: drop these when the pptable carried in vbios is ready. */ - if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 0)) - pptable_id = 99991; + if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 0)) { + switch (pptable_id) { + case 0: + case 3664: + pptable_id = 36641; + break; + case 3683: + pptable_id = 36831; + break; + default: + dev_err(adev->dev, "Unsupported pptable id %d\n", pptable_id); + return -EINVAL; + } + } } /* "pptable_id == 0" means vbios carries the pptable. */ @@ -448,13 +460,24 @@ int smu_v13_0_setup_pptable(struct smu_context *smu) pptable_id = smu->smu_table.boot_values.pp_table_id; /* - * Temporary solution for SMU V13.0.0: - * - use 9999 unsigned pptable when SCPM disabled - * TODO: drop this when the pptable carried in vbios - * is ready. + * Temporary solution for SMU V13.0.0 with SCPM disabled: + * - use 3664 or 3683 on request + * - use 3664 when pptable_id is 0 + * TODO: drop these when the pptable carried in vbios is ready. */ - if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 0)) - pptable_id = 9999; + if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 0)) { + switch (pptable_id) { + case 0: + pptable_id = 3664; + break; + case 3664: + case 3683: + break; + default: + dev_err(adev->dev, "Unsupported pptable id %d\n", pptable_id); + return -EINVAL; + } + } } /* force using vbios pptable in sriov mode */ -- cgit v1.2.3 From 0aceb728f4e4790d80ce64e83bb8fad40693db13 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Tue, 10 May 2022 15:00:43 +0800 Subject: drm/amd/pm: enable more dpm features for SMU 13.0.0 Enable OOB Monitor and SOC CG which are ready since 78.38.0. Signed-off-by: Evan Quan Reviewed-by: Hawking Zhang Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 7bfceca246ae..7bb2923eb819 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -299,6 +299,9 @@ smu_v13_0_0_get_allowed_feature_mask(struct smu_context *smu, *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_MP0CLK_BIT); *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_FW_DSTATE_BIT); + *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_OUT_OF_BAND_MONITOR_BIT); + *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_SOC_CG_BIT); + return 0; } -- cgit v1.2.3 From 3670c46f07d13ba42bb66948fde68495078457ec Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Fri, 13 May 2022 11:03:07 +0800 Subject: drm/amd/pm: enable memory temp reading for SMU 13.0.0 With the latest vbios, the memory temp reading is working. Signed-off-by: Evan Quan Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 7bb2923eb819..7432b3e76d3d 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -275,9 +275,7 @@ smu_v13_0_0_get_allowed_feature_mask(struct smu_context *smu, *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_VDDIO_MEM_SCALING_BIT); } -#if 0 *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_MEM_TEMP_READ_BIT); -#endif if (adev->pm.pp_feature & PP_SCLK_DEEP_SLEEP_MASK) *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DS_GFXCLK_BIT); -- cgit v1.2.3 From 2c270d3e71ed0b68b2f75c0b15645fb023b4032c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 18 May 2022 20:38:36 +0300 Subject: drm/amdgpu/pm: smu_v13_0_4: delete duplicate condition There is no need to check if "clock_ranges' is non-NULL. It is checked already on the line before. Signed-off-by: Dan Carpenter Signed-off-by: Alex Deucher --- .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c | 62 +++++++++++----------- 1 file changed, 30 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c index 7d6ff141b43f..5a17b51aa0f9 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c @@ -644,42 +644,40 @@ static int smu_v13_0_4_set_watermarks_table(struct smu_context *smu, if (!table || !clock_ranges) return -EINVAL; - if (clock_ranges) { - if (clock_ranges->num_reader_wm_sets > NUM_WM_RANGES || - clock_ranges->num_writer_wm_sets > NUM_WM_RANGES) - return -EINVAL; - - for (i = 0; i < clock_ranges->num_reader_wm_sets; i++) { - table->WatermarkRow[WM_DCFCLK][i].MinClock = - clock_ranges->reader_wm_sets[i].min_drain_clk_mhz; - table->WatermarkRow[WM_DCFCLK][i].MaxClock = - clock_ranges->reader_wm_sets[i].max_drain_clk_mhz; - table->WatermarkRow[WM_DCFCLK][i].MinMclk = - clock_ranges->reader_wm_sets[i].min_fill_clk_mhz; - table->WatermarkRow[WM_DCFCLK][i].MaxMclk = - clock_ranges->reader_wm_sets[i].max_fill_clk_mhz; - - table->WatermarkRow[WM_DCFCLK][i].WmSetting = - clock_ranges->reader_wm_sets[i].wm_inst; - } + if (clock_ranges->num_reader_wm_sets > NUM_WM_RANGES || + clock_ranges->num_writer_wm_sets > NUM_WM_RANGES) + return -EINVAL; - for (i = 0; i < clock_ranges->num_writer_wm_sets; i++) { - table->WatermarkRow[WM_SOCCLK][i].MinClock = - clock_ranges->writer_wm_sets[i].min_fill_clk_mhz; - table->WatermarkRow[WM_SOCCLK][i].MaxClock = - clock_ranges->writer_wm_sets[i].max_fill_clk_mhz; - table->WatermarkRow[WM_SOCCLK][i].MinMclk = - clock_ranges->writer_wm_sets[i].min_drain_clk_mhz; - table->WatermarkRow[WM_SOCCLK][i].MaxMclk = - clock_ranges->writer_wm_sets[i].max_drain_clk_mhz; - - table->WatermarkRow[WM_SOCCLK][i].WmSetting = - clock_ranges->writer_wm_sets[i].wm_inst; - } + for (i = 0; i < clock_ranges->num_reader_wm_sets; i++) { + table->WatermarkRow[WM_DCFCLK][i].MinClock = + clock_ranges->reader_wm_sets[i].min_drain_clk_mhz; + table->WatermarkRow[WM_DCFCLK][i].MaxClock = + clock_ranges->reader_wm_sets[i].max_drain_clk_mhz; + table->WatermarkRow[WM_DCFCLK][i].MinMclk = + clock_ranges->reader_wm_sets[i].min_fill_clk_mhz; + table->WatermarkRow[WM_DCFCLK][i].MaxMclk = + clock_ranges->reader_wm_sets[i].max_fill_clk_mhz; + + table->WatermarkRow[WM_DCFCLK][i].WmSetting = + clock_ranges->reader_wm_sets[i].wm_inst; + } - smu->watermarks_bitmap |= WATERMARKS_EXIST; + for (i = 0; i < clock_ranges->num_writer_wm_sets; i++) { + table->WatermarkRow[WM_SOCCLK][i].MinClock = + clock_ranges->writer_wm_sets[i].min_fill_clk_mhz; + table->WatermarkRow[WM_SOCCLK][i].MaxClock = + clock_ranges->writer_wm_sets[i].max_fill_clk_mhz; + table->WatermarkRow[WM_SOCCLK][i].MinMclk = + clock_ranges->writer_wm_sets[i].min_drain_clk_mhz; + table->WatermarkRow[WM_SOCCLK][i].MaxMclk = + clock_ranges->writer_wm_sets[i].max_drain_clk_mhz; + + table->WatermarkRow[WM_SOCCLK][i].WmSetting = + clock_ranges->writer_wm_sets[i].wm_inst; } + smu->watermarks_bitmap |= WATERMARKS_EXIST; + /* pass data to smu controller */ if ((smu->watermarks_bitmap & WATERMARKS_EXIST) && !(smu->watermarks_bitmap & WATERMARKS_LOADED)) { -- cgit v1.2.3 From 950d64250fba5fbb8c290f692de8ffdee380726c Mon Sep 17 00:00:00 2001 From: "Stanley.Yang" Date: Wed, 27 Apr 2022 12:16:51 +0800 Subject: drm/amdgpu: support ras on SRIOV support umc/gfx/sdma ras on guest side Changed from V1: move sriov judgment in amdgpu_ras_interrupt_fatal_error_handler Signed-off-by: Stanley.Yang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 42 +++++++++++++++++++++--------- drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 4 +++ drivers/gpu/drm/amd/amdgpu/psp_v13_0.c | 9 ++++--- 4 files changed, 44 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 9af8d7a1d011..c45736a902fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5219,6 +5219,10 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */ r = amdgpu_device_reset_sriov(adev, job ? false : true); if (r) adev->asic_reset_res = r; + + /* Aldebaran supports ras in SRIOV, so need resume ras during reset */ + if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2)) + amdgpu_ras_resume(adev); } else { r = amdgpu_do_asic_reset(device_list_handle, &reset_context); if (r && r == -EAGAIN) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 035891ec59d5..2de9309a4193 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -726,7 +726,9 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev, /* Do not enable if it is not allowed. */ WARN_ON(enable && !amdgpu_ras_is_feature_allowed(adev, head)); - if (!amdgpu_ras_intr_triggered()) { + /* Only enable ras feature operation handle on host side */ + if (!amdgpu_sriov_vf(adev) && + !amdgpu_ras_intr_triggered()) { ret = psp_ras_enable_features(&adev->psp, info, enable); if (ret) { dev_err(adev->dev, "ras %s %s failed poison:%d ret:%d\n", @@ -1523,7 +1525,9 @@ static int amdgpu_ras_fs_fini(struct amdgpu_device *adev) */ void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev) { - if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF)) + /* Fatal error events are handled on host side */ + if (amdgpu_sriov_vf(adev) || + !amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF)) return; if (adev->nbio.ras && @@ -2270,10 +2274,14 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev) { adev->ras_hw_enabled = adev->ras_enabled = 0; - if (amdgpu_sriov_vf(adev) || !adev->is_atom_fw || + if (!adev->is_atom_fw || !amdgpu_ras_asic_supported(adev)) return; + if (!(amdgpu_sriov_vf(adev) && + (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2)))) + return; + if (!adev->gmc.xgmi.connected_to_cpu) { if (amdgpu_atomfirmware_mem_ecc_supported(adev)) { dev_info(adev->dev, "MEM ECC is active.\n"); @@ -2285,15 +2293,21 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev) if (amdgpu_atomfirmware_sram_ecc_supported(adev)) { dev_info(adev->dev, "SRAM ECC is active.\n"); - adev->ras_hw_enabled |= ~(1 << AMDGPU_RAS_BLOCK__UMC | - 1 << AMDGPU_RAS_BLOCK__DF); - - if (adev->ip_versions[VCN_HWIP][0] == IP_VERSION(2, 6, 0)) - adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__VCN | - 1 << AMDGPU_RAS_BLOCK__JPEG); - else - adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__VCN | - 1 << AMDGPU_RAS_BLOCK__JPEG); + if (!amdgpu_sriov_vf(adev)) { + adev->ras_hw_enabled |= ~(1 << AMDGPU_RAS_BLOCK__UMC | + 1 << AMDGPU_RAS_BLOCK__DF); + + if (adev->ip_versions[VCN_HWIP][0] == IP_VERSION(2, 6, 0)) + adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__VCN | + 1 << AMDGPU_RAS_BLOCK__JPEG); + else + adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__VCN | + 1 << AMDGPU_RAS_BLOCK__JPEG); + } else { + adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__PCIE_BIF | + 1 << AMDGPU_RAS_BLOCK__SDMA | + 1 << AMDGPU_RAS_BLOCK__GFX); + } } else { dev_info(adev->dev, "SRAM ECC is not presented.\n"); } @@ -2637,6 +2651,10 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev) struct amdgpu_ras_block_object *obj; int r; + /* Guest side doesn't need init ras feature */ + if (amdgpu_sriov_vf(adev)) + return 0; + list_for_each_entry_safe(node, tmp, &adev->ras_list, node) { if (!node->ras_obj) { dev_warn(adev->dev, "Warning: abnormal ras list node.\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index 8e221a1ba937..42c1f050542f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -124,6 +124,10 @@ int amdgpu_sdma_process_ras_data_cb(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); + + if (amdgpu_sriov_vf(adev)) + return AMDGPU_RAS_SUCCESS; + amdgpu_ras_reset_gpu(adev); return AMDGPU_RAS_SUCCESS; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index d6d79e97def9..18014ed0e853 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -85,9 +85,12 @@ static int psp_v13_0_init_microcode(struct psp_context *psp) err = psp_init_sos_microcode(psp, chip_name); if (err) return err; - err = psp_init_ta_microcode(&adev->psp, chip_name); - if (err) - return err; + /* It's not necessary to load ras ta on Guest side */ + if (!amdgpu_sriov_vf(adev)) { + err = psp_init_ta_microcode(&adev->psp, chip_name); + if (err) + return err; + } break; case IP_VERSION(13, 0, 1): case IP_VERSION(13, 0, 3): -- cgit v1.2.3 From 4d33e7040d70b50f1fb564f7020644ec5b45d6b7 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Tue, 17 May 2022 11:27:11 +0530 Subject: drm/amdgpu: move amdgpu_gmc_tmz_set after ip_version populated To enable TMZ feature based on IP version needs adev->ip_version populated but its empty. Move amdgpu_gmc_tmz_set to a place where ip_version is populated. Signed-off-by: Sunil Khatri Reviewed-by: Alexander Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index c45736a902fb..625424f3082b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1556,9 +1556,6 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev) adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); - amdgpu_gmc_tmz_set(adev); - - return 0; } @@ -3701,6 +3698,9 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (r) return r; + /* Enable TMZ based on IP_VERSION */ + amdgpu_gmc_tmz_set(adev); + amdgpu_gmc_noretry_set(adev); /* Need to get xgmi info early to decide the reset behavior*/ if (adev->gmc.xgmi.supported) { -- cgit v1.2.3 From 0ef3dc7e97884a861db4cb3dfd721db71edb0236 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Tue, 17 May 2022 11:28:46 +0530 Subject: drm/amdgpu: change code name to ip version for tmz set Use IP version rather then code name of IPs for tmz set. Signed-off-by: Sunil Khatri Reviewed-by: Alexander Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 88b852b3a2cb..7e55ee61f84c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -512,9 +512,12 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev) */ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev) { - switch (adev->asic_type) { - case CHIP_RAVEN: - case CHIP_RENOIR: + switch (adev->ip_versions[GC_HWIP][0]) { + /* RAVEN */ + case IP_VERSION(9, 2, 2): + case IP_VERSION(9, 1, 0): + /* RENOIR looks like RAVEN */ + case IP_VERSION(9, 3, 0): if (amdgpu_tmz == 0) { adev->gmc.tmz_enabled = false; dev_info(adev->dev, @@ -525,12 +528,18 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev) "Trusted Memory Zone (TMZ) feature enabled\n"); } break; - case CHIP_NAVI10: - case CHIP_NAVI14: - case CHIP_NAVI12: - case CHIP_VANGOGH: - case CHIP_YELLOW_CARP: - case CHIP_IP_DISCOVERY: + case IP_VERSION(10, 1, 10): + case IP_VERSION(10, 1, 1): + case IP_VERSION(10, 1, 2): + case IP_VERSION(10, 1, 3): + case IP_VERSION(10, 3, 0): + case IP_VERSION(10, 3, 2): + case IP_VERSION(10, 3, 4): + case IP_VERSION(10, 3, 5): + /* VANGOGH */ + case IP_VERSION(10, 3, 1): + /* YELLOW_CARP*/ + case IP_VERSION(10, 3, 3): /* Don't enable it by default yet. */ if (amdgpu_tmz < 1) { -- cgit v1.2.3 From 49b74d12d1e02fc67b2854a593e589372d894e62 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Tue, 17 May 2022 11:33:45 +0530 Subject: drm/amdgpu: add support of tmz for GC 10.3.7 Add support of IP GC 10.3.7 in amdgpu_gmc_tmz_set. Signed-off-by: Sunil Khatri Reviewed-by: Alexander Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 7e55ee61f84c..798c56214a23 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -540,6 +540,8 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev) case IP_VERSION(10, 3, 1): /* YELLOW_CARP*/ case IP_VERSION(10, 3, 3): + /* GC 10.3.7 */ + case IP_VERSION(10, 3, 7): /* Don't enable it by default yet. */ if (amdgpu_tmz < 1) { -- cgit v1.2.3 From b0f4d663fce6a4232d3c20ce820f919111b1c60b Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Thu, 19 May 2022 10:50:25 +0530 Subject: drm/amd/pm: Fix missing thermal throttler status On aldebaran, when thermal throttling happens due to excessive GPU temperature, the reason for throttling event is missed in warning message. This patch fixes it. Signed-off-by: Lijo Lazar Reviewed-by: Yang Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c index 38af648cb857..fb130409309c 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c @@ -1666,6 +1666,7 @@ static const struct throttling_logging_label { uint32_t feature_mask; const char *label; } logging_label[] = { + {(1U << THROTTLER_TEMP_GPU_BIT), "GPU"}, {(1U << THROTTLER_TEMP_MEM_BIT), "HBM"}, {(1U << THROTTLER_TEMP_VR_GFX_BIT), "VR of GFX rail"}, {(1U << THROTTLER_TEMP_VR_MEM_BIT), "VR of HBM rail"}, -- cgit v1.2.3 From 6880ed280edf292c542aa87567547ffb9c222597 Mon Sep 17 00:00:00 2001 From: Alan Liu Date: Tue, 17 May 2022 22:30:00 +0800 Subject: drm/amd/display: Add HDMI_ACP_SEND register Define HDMI_ACP_SEND register shift/mask. Signed-off-by: Alan Liu Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/include/asic_reg/dce/dce_10_0_sh_mask.h | 2 ++ drivers/gpu/drm/amd/include/asic_reg/dce/dce_11_0_sh_mask.h | 2 ++ drivers/gpu/drm/amd/include/asic_reg/dce/dce_11_2_sh_mask.h | 2 ++ drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_sh_mask.h | 2 ++ drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_sh_mask.h | 2 ++ drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_sh_mask.h | 2 ++ drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_sh_mask.h | 2 ++ drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_3_sh_mask.h | 4 ++-- drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_sh_mask.h | 2 ++ 9 files changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_10_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_10_0_sh_mask.h index c755f43aaaf8..7a2c6b12c249 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_10_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_10_0_sh_mask.h @@ -6070,6 +6070,8 @@ #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND__SHIFT 0x8 #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT_MASK 0x200 #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT__SHIFT 0x9 +#define HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND_MASK 0x1000 +#define HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND__SHIFT 0xc #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE_MASK 0x3f0000 #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE__SHIFT 0x10 #define HDMI_INFOFRAME_CONTROL0__HDMI_AVI_INFO_SEND_MASK 0x1 diff --git a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_11_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_11_0_sh_mask.h index 14a3bacfcfd1..fa1f4374fafe 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_11_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_11_0_sh_mask.h @@ -6058,6 +6058,8 @@ #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND__SHIFT 0x8 #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT_MASK 0x200 #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT__SHIFT 0x9 +#define HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND_MASK 0x1000 +#define HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND__SHIFT 0xc #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE_MASK 0x3f0000 #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE__SHIFT 0x10 #define HDMI_INFOFRAME_CONTROL0__HDMI_AVI_INFO_SEND_MASK 0x1 diff --git a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_11_2_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_11_2_sh_mask.h index 106094ed0661..39f6fde6db1d 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_11_2_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_11_2_sh_mask.h @@ -7142,6 +7142,8 @@ #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND__SHIFT 0x8 #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT_MASK 0x200 #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT__SHIFT 0x9 +#define HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND_MASK 0x1000 +#define HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND__SHIFT 0xc #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE_MASK 0x3f0000 #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE__SHIFT 0x10 #define HDMI_INFOFRAME_CONTROL0__HDMI_AVI_INFO_SEND_MASK 0x1 diff --git a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_sh_mask.h index bcd190a3fcdd..c5f4afac3b39 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_sh_mask.h @@ -37285,12 +37285,14 @@ #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT__SHIFT 0x5 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND__SHIFT 0x8 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT__SHIFT 0x9 +#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND__SHIFT 0xc #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE__SHIFT 0x10 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_NULL_SEND_MASK 0x00000001L #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_SEND_MASK 0x00000010L #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT_MASK 0x00000020L #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND_MASK 0x00000100L #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT_MASK 0x00000200L +#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND_MASK 0x00001000L #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE_MASK 0x003F0000L //DIG0_HDMI_INFOFRAME_CONTROL0 #define DIG0_HDMI_INFOFRAME_CONTROL0__HDMI_AVI_INFO_SEND__SHIFT 0x0 diff --git a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_sh_mask.h index 9b6825b74cc1..23580907663b 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_sh_mask.h @@ -5584,6 +5584,8 @@ #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND__SHIFT 0x8 #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT_MASK 0x200 #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT__SHIFT 0x9 +#define HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND_MASK 0x1000 +#define HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND__SHIFT 0xc #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE_MASK 0x3f0000 #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE__SHIFT 0x10 #define HDMI_INFOFRAME_CONTROL0__HDMI_AVI_INFO_SEND_MASK 0x1 diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_sh_mask.h index e7c0cad41081..a788ff3b68c0 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_sh_mask.h @@ -30357,12 +30357,14 @@ #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT__SHIFT 0x5 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND__SHIFT 0x8 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT__SHIFT 0x9 +#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND__SHIFT 0xc #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE__SHIFT 0x10 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_NULL_SEND_MASK 0x00000001L #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_SEND_MASK 0x00000010L #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT_MASK 0x00000020L #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND_MASK 0x00000100L #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT_MASK 0x00000200L +#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND_MASK 0x00001000L #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE_MASK 0x003F0000L //DIG0_HDMI_INFOFRAME_CONTROL0 #define DIG0_HDMI_INFOFRAME_CONTROL0__HDMI_AUDIO_INFO_SEND__SHIFT 0x4 diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_sh_mask.h index dc8ce7aaa0cf..c70f7ba94d8f 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_sh_mask.h @@ -39439,12 +39439,14 @@ #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT__SHIFT 0x5 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND__SHIFT 0x8 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT__SHIFT 0x9 +#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND__SHIFT 0xc #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE__SHIFT 0x10 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_NULL_SEND_MASK 0x00000001L #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_SEND_MASK 0x00000010L #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT_MASK 0x00000020L #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND_MASK 0x00000100L #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT_MASK 0x00000200L +#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND_MASK 0x00001000L #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE_MASK 0x003F0000L //DIG0_HDMI_INFOFRAME_CONTROL0 #define DIG0_HDMI_INFOFRAME_CONTROL0__HDMI_AUDIO_INFO_SEND__SHIFT 0x4 diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_3_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_3_sh_mask.h index 91969554e36a..ca1e1eb39256 100755 --- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_3_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_3_sh_mask.h @@ -16956,7 +16956,7 @@ #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT__SHIFT 0x5 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND__SHIFT 0x8 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT__SHIFT 0x9 - +#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND__SHIFT 0xc #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE__SHIFT 0x10 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_NULL_SEND_MASK 0x00000001L @@ -16964,7 +16964,7 @@ #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT_MASK 0x00000020L #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND_MASK 0x00000100L #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT_MASK 0x00000200L - +#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND_MASK 0x00001000L #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE_MASK 0x003F0000L //DIG0_HDMI_INFOFRAME_CONTROL0 diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_sh_mask.h index 2f780aefc722..6104ae304099 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_sh_mask.h @@ -35487,12 +35487,14 @@ #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT__SHIFT 0x5 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND__SHIFT 0x8 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT__SHIFT 0x9 +#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND__SHIFT 0xc #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE__SHIFT 0x10 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_NULL_SEND_MASK 0x00000001L #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_SEND_MASK 0x00000010L #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT_MASK 0x00000020L #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND_MASK 0x00000100L #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT_MASK 0x00000200L +#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND_MASK 0x00001000L #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE_MASK 0x003F0000L //DIG0_HDMI_INFOFRAME_CONTROL0 #define DIG0_HDMI_INFOFRAME_CONTROL0__HDMI_AUDIO_INFO_SEND__SHIFT 0x4 -- cgit v1.2.3 From 5e613723f804658feb689be1b3cb88ceeed234d3 Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Mon, 16 May 2022 14:22:38 -0400 Subject: drm/amdkfd: port cwsr trap handler from dkms branch Most of changes are for debugger feature, and it is to simplify trap handler support for new asics in the future. Signed-off-by: Eric Huang Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h | 2527 ++++++++++---------- .../gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm | 325 ++- .../gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm | 244 +- 3 files changed, 1596 insertions(+), 1500 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h index 475f89700c74..8cbdc7f519c6 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h @@ -166,7 +166,7 @@ static const uint32_t cwsr_trap_gfx8_hex[] = { 0x807c847c, 0x806eff6e, 0x00000400, 0xbf0a757c, 0xbf85ffef, 0xbf9c0000, - 0xbf8200cd, 0xbef8007e, + 0xbf8200ce, 0xbef8007e, 0x8679ff7f, 0x0000ffff, 0x8779ff79, 0x00040000, 0xbefa0080, 0xbefb00ff, @@ -212,304 +212,310 @@ static const uint32_t cwsr_trap_gfx8_hex[] = { 0x761e0000, 0xe0524100, 0x761e0100, 0xe0524200, 0x761e0200, 0xe0524300, - 0x761e0300, 0xb8f22a05, - 0x80728172, 0x8e728a72, - 0xb8f61605, 0x80768176, - 0x8e768676, 0x80727672, - 0x80f2c072, 0xb8f31605, - 0x80738173, 0x8e738473, - 0x8e7a8273, 0xbefa00ff, - 0x01000000, 0xbefc0073, - 0xc031003c, 0x00000072, - 0x80f2c072, 0xbf8c007f, - 0x80fc907c, 0xbe802d00, - 0xbe822d02, 0xbe842d04, - 0xbe862d06, 0xbe882d08, - 0xbe8a2d0a, 0xbe8c2d0c, - 0xbe8e2d0e, 0xbf06807c, - 0xbf84fff1, 0xb8f22a05, - 0x80728172, 0x8e728a72, - 0xb8f61605, 0x80768176, - 0x8e768676, 0x80727672, - 0xbefa0084, 0xbefa00ff, - 0x01000000, 0xc0211cfc, + 0x761e0300, 0xbf8c0f70, + 0xb8f22a05, 0x80728172, + 0x8e728a72, 0xb8f61605, + 0x80768176, 0x8e768676, + 0x80727672, 0x80f2c072, + 0xb8f31605, 0x80738173, + 0x8e738473, 0x8e7a8273, + 0xbefa00ff, 0x01000000, + 0xbefc0073, 0xc031003c, + 0x00000072, 0x80f2c072, + 0xbf8c007f, 0x80fc907c, + 0xbe802d00, 0xbe822d02, + 0xbe842d04, 0xbe862d06, + 0xbe882d08, 0xbe8a2d0a, + 0xbe8c2d0c, 0xbe8e2d0e, + 0xbf06807c, 0xbf84fff1, + 0xb8f22a05, 0x80728172, + 0x8e728a72, 0xb8f61605, + 0x80768176, 0x8e768676, + 0x80727672, 0xbefa0084, + 0xbefa00ff, 0x01000000, + 0xc0211cfc, 0x00000072, + 0x80728472, 0xc0211c3c, 0x00000072, 0x80728472, - 0xc0211c3c, 0x00000072, - 0x80728472, 0xc0211c7c, + 0xc0211c7c, 0x00000072, + 0x80728472, 0xc0211bbc, 0x00000072, 0x80728472, - 0xc0211bbc, 0x00000072, - 0x80728472, 0xc0211bfc, + 0xc0211bfc, 0x00000072, + 0x80728472, 0xc0211d3c, 0x00000072, 0x80728472, - 0xc0211d3c, 0x00000072, - 0x80728472, 0xc0211d7c, + 0xc0211d7c, 0x00000072, + 0x80728472, 0xc0211a3c, 0x00000072, 0x80728472, - 0xc0211a3c, 0x00000072, - 0x80728472, 0xc0211a7c, + 0xc0211a7c, 0x00000072, + 0x80728472, 0xc0211dfc, 0x00000072, 0x80728472, - 0xc0211dfc, 0x00000072, - 0x80728472, 0xc0211b3c, + 0xc0211b3c, 0x00000072, + 0x80728472, 0xc0211b7c, 0x00000072, 0x80728472, - 0xc0211b7c, 0x00000072, - 0x80728472, 0xbf8c007f, - 0xbefc0073, 0xbefe006e, - 0xbeff006f, 0x867375ff, - 0x000003ff, 0xb9734803, - 0x867375ff, 0xfffff800, - 0x8f738b73, 0xb973a2c3, - 0xb977f801, 0x8673ff71, - 0xf0000000, 0x8f739c73, - 0x8e739073, 0xbef60080, - 0x87767376, 0x8673ff71, - 0x08000000, 0x8f739b73, - 0x8e738f73, 0x87767376, - 0x8673ff74, 0x00800000, - 0x8f739773, 0xb976f807, - 0x8671ff71, 0x0000ffff, - 0x86fe7e7e, 0x86ea6a6a, - 0x8f768374, 0xb976e0c2, - 0xbf800002, 0xb9740002, - 0xbf8a0000, 0x95807370, - 0xbf810000, 0x00000000, + 0xbf8c007f, 0xbefc0073, + 0xbefe006e, 0xbeff006f, + 0x867375ff, 0x000003ff, + 0xb9734803, 0x867375ff, + 0xfffff800, 0x8f738b73, + 0xb973a2c3, 0xb977f801, + 0x8673ff71, 0xf0000000, + 0x8f739c73, 0x8e739073, + 0xbef60080, 0x87767376, + 0x8673ff71, 0x08000000, + 0x8f739b73, 0x8e738f73, + 0x87767376, 0x8673ff74, + 0x00800000, 0x8f739773, + 0xb976f807, 0x8671ff71, + 0x0000ffff, 0x86fe7e7e, + 0x86ea6a6a, 0x8f768374, + 0xb976e0c2, 0xbf800002, + 0xb9740002, 0xbf8a0000, + 0x95807370, 0xbf810000, }; static const uint32_t cwsr_trap_gfx9_hex[] = { - 0xbf820001, 0xbf820248, - 0xb8f8f802, 0x89788678, - 0xb8eef801, 0x866eff6e, - 0x00000800, 0xbf840003, + 0xbf820001, 0xbf820254, + 0xb8f8f802, 0x8978ff78, + 0x00020006, 0xb8fbf803, 0x866eff78, 0x00002000, - 0xbf840016, 0xb8fbf803, + 0xbf840009, 0x866eff6d, + 0x00ff0000, 0xbf85001e, 0x866eff7b, 0x00000400, - 0xbf85003b, 0x866eff7b, - 0x00000800, 0xbf850003, - 0x866eff7b, 0x00000100, - 0xbf84000c, 0x866eff78, - 0x00002000, 0xbf840005, - 0xbf8e0010, 0xb8eef803, - 0x866eff6e, 0x00000400, - 0xbf84fffb, 0x8778ff78, - 0x00002000, 0x80ec886c, - 0x82ed806d, 0xb8eef807, - 0x866fff6e, 0x001f8000, - 0x8e6f8b6f, 0x8977ff77, - 0xfc000000, 0x87776f77, - 0x896eff6e, 0x001f8000, - 0xb96ef807, 0xb8faf812, + 0xbf850051, 0xbf8e0010, + 0xb8fbf803, 0xbf82fffa, + 0x866eff7b, 0x00000900, + 0xbf850015, 0x866eff7b, + 0x000071ff, 0xbf840008, + 0x866fff7b, 0x00007080, + 0xbf840001, 0xbeee1a87, + 0xb8eff801, 0x8e6e8c6e, + 0x866e6f6e, 0xbf85000a, + 0x866eff6d, 0x00ff0000, + 0xbf850007, 0xb8eef801, + 0x866eff6e, 0x00000800, + 0xbf850003, 0x866eff7b, + 0x00000400, 0xbf850036, + 0xb8faf807, 0x867aff7a, + 0x001f8000, 0x8e7a8b7a, + 0x8977ff77, 0xfc000000, + 0x87777a77, 0xba7ff807, + 0x00000000, 0xb8faf812, 0xb8fbf813, 0x8efa887a, - 0xc0071bbd, 0x00000000, - 0xbf8cc07f, 0xc0071ebd, - 0x00000008, 0xbf8cc07f, - 0x86ee6e6e, 0xbf840001, - 0xbe801d6e, 0xb8fbf803, - 0x867bff7b, 0x000001ff, + 0xc0031bbd, 0x00000010, + 0xbf8cc07f, 0x8e6e976e, + 0x8977ff77, 0x00800000, + 0x87776e77, 0xc0071bbd, + 0x00000000, 0xbf8cc07f, + 0xc0071ebd, 0x00000008, + 0xbf8cc07f, 0x86ee6e6e, + 0xbf840001, 0xbe801d6e, + 0x866eff6d, 0x01ff0000, + 0xbf850005, 0x8778ff78, + 0x00002000, 0x80ec886c, + 0x82ed806d, 0xbf820005, + 0x866eff6d, 0x01000000, 0xbf850002, 0x806c846c, 0x826d806d, 0x866dff6d, - 0x0000ffff, 0x8f6e8b77, - 0x866eff6e, 0x001f8000, - 0xb96ef807, 0x86fe7e7e, + 0x0000ffff, 0x8f7a8b77, + 0x867aff7a, 0x001f8000, + 0xb97af807, 0x86fe7e7e, 0x86ea6a6a, 0x8f6e8378, 0xb96ee0c2, 0xbf800002, 0xb9780002, 0xbe801f6c, 0x866dff6d, 0x0000ffff, 0xbefa0080, 0xb97a0283, - 0xb8fa2407, 0x8e7a9b7a, - 0x876d7a6d, 0xb8fa03c7, - 0x8e7a9a7a, 0x876d7a6d, 0xb8faf807, 0x867aff7a, - 0x00007fff, 0xb97af807, - 0xbeee007e, 0xbeef007f, - 0xbefe0180, 0xbf900004, - 0x877a8478, 0xb97af802, - 0xbf8e0002, 0xbf88fffe, - 0xb8fa2a05, 0x807a817a, - 0x8e7a8a7a, 0xb8fb1605, - 0x807b817b, 0x8e7b867b, - 0x807a7b7a, 0x807a7e7a, - 0x827b807f, 0x867bff7b, - 0x0000ffff, 0xc04b1c3d, - 0x00000050, 0xbf8cc07f, - 0xc04b1d3d, 0x00000060, - 0xbf8cc07f, 0xc0431e7d, - 0x00000074, 0xbf8cc07f, - 0xbef4007e, 0x8675ff7f, - 0x0000ffff, 0x8775ff75, - 0x00040000, 0xbef60080, - 0xbef700ff, 0x00807fac, - 0x867aff7f, 0x08000000, - 0x8f7a837a, 0x87777a77, - 0x867aff7f, 0x70000000, - 0x8f7a817a, 0x87777a77, - 0xbef1007c, 0xbef00080, - 0xb8f02a05, 0x80708170, - 0x8e708a70, 0xb8fa1605, - 0x807a817a, 0x8e7a867a, - 0x80707a70, 0xbef60084, - 0xbef600ff, 0x01000000, - 0xbefe007c, 0xbefc0070, - 0xc0611c7a, 0x0000007c, - 0xbf8cc07f, 0x80708470, - 0xbefc007e, 0xbefe007c, - 0xbefc0070, 0xc0611b3a, + 0x001f8000, 0x8e7a8b7a, + 0x8977ff77, 0xfc000000, + 0x87777a77, 0xba7ff807, + 0x00000000, 0xbeee007e, + 0xbeef007f, 0xbefe0180, + 0xbf900004, 0x877a8478, + 0xb97af802, 0xbf8e0002, + 0xbf88fffe, 0xb8fa2a05, + 0x807a817a, 0x8e7a8a7a, + 0xb8fb1605, 0x807b817b, + 0x8e7b867b, 0x807a7b7a, + 0x807a7e7a, 0x827b807f, + 0x867bff7b, 0x0000ffff, + 0xc04b1c3d, 0x00000050, + 0xbf8cc07f, 0xc04b1d3d, + 0x00000060, 0xbf8cc07f, + 0xc0431e7d, 0x00000074, + 0xbf8cc07f, 0xbef4007e, + 0x8675ff7f, 0x0000ffff, + 0x8775ff75, 0x00040000, + 0xbef60080, 0xbef700ff, + 0x00807fac, 0xbef1007c, + 0xbef00080, 0xb8f02a05, + 0x80708170, 0x8e708a70, + 0xb8fa1605, 0x807a817a, + 0x8e7a867a, 0x80707a70, + 0xbef60084, 0xbef600ff, + 0x01000000, 0xbefe007c, + 0xbefc0070, 0xc0611c7a, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, 0xbefc0070, - 0xc0611b7a, 0x0000007c, + 0xc0611b3a, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, - 0xbefc0070, 0xc0611bba, + 0xbefc0070, 0xc0611b7a, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, 0xbefc0070, - 0xc0611bfa, 0x0000007c, + 0xc0611bba, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, - 0xbefc0070, 0xc0611e3a, - 0x0000007c, 0xbf8cc07f, - 0x80708470, 0xbefc007e, - 0xb8fbf803, 0xbefe007c, - 0xbefc0070, 0xc0611efa, + 0xbefc0070, 0xc0611bfa, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, 0xbefc0070, - 0xc0611a3a, 0x0000007c, + 0xc0611e3a, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0xb8fbf803, + 0xbefe007c, 0xbefc0070, + 0xc0611efa, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, - 0xbefc0070, 0xc0611a7a, - 0x0000007c, 0xbf8cc07f, - 0x80708470, 0xbefc007e, - 0xb8f1f801, 0xbefe007c, - 0xbefc0070, 0xc0611c7a, + 0xbefc0070, 0xc0611a3a, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, - 0x867aff7f, 0x04000000, - 0xbeef0080, 0x876f6f7a, - 0xb8f02a05, 0x80708170, - 0x8e708a70, 0xb8fb1605, - 0x807b817b, 0x8e7b847b, - 0x8e76827b, 0xbef600ff, - 0x01000000, 0xbef20174, - 0x80747074, 0x82758075, - 0xbefc0080, 0xbf800000, - 0xbe802b00, 0xbe822b02, - 0xbe842b04, 0xbe862b06, - 0xbe882b08, 0xbe8a2b0a, - 0xbe8c2b0c, 0xbe8e2b0e, - 0xc06b003a, 0x00000000, - 0xbf8cc07f, 0xc06b013a, - 0x00000010, 0xbf8cc07f, - 0xc06b023a, 0x00000020, - 0xbf8cc07f, 0xc06b033a, - 0x00000030, 0xbf8cc07f, - 0x8074c074, 0x82758075, - 0x807c907c, 0xbf0a7b7c, - 0xbf85ffe7, 0xbef40172, - 0xbef00080, 0xbefe00c1, - 0xbeff00c1, 0xbee80080, - 0xbee90080, 0xbef600ff, - 0x01000000, 0x867aff78, - 0x00400000, 0xbf850003, - 0xb8faf803, 0x897a7aff, - 0x10000000, 0xbf85004d, - 0xbe840080, 0xd2890000, - 0x00000900, 0x80048104, - 0xd2890001, 0x00000900, - 0x80048104, 0xd2890002, - 0x00000900, 0x80048104, - 0xd2890003, 0x00000900, - 0x80048104, 0xc069003a, - 0x00000070, 0xbf8cc07f, - 0x80709070, 0xbf06c004, - 0xbf84ffee, 0xbe840080, - 0xd2890000, 0x00000901, + 0xbefe007c, 0xbefc0070, + 0xc0611a7a, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0xb8f1f801, + 0xbefe007c, 0xbefc0070, + 0xc0611c7a, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0x867aff7f, + 0x04000000, 0xbeef0080, + 0x876f6f7a, 0xb8f02a05, + 0x80708170, 0x8e708a70, + 0xb8fb1605, 0x807b817b, + 0x8e7b847b, 0x8e76827b, + 0xbef600ff, 0x01000000, + 0xbef20174, 0x80747074, + 0x82758075, 0xbefc0080, + 0xbf800000, 0xbe802b00, + 0xbe822b02, 0xbe842b04, + 0xbe862b06, 0xbe882b08, + 0xbe8a2b0a, 0xbe8c2b0c, + 0xbe8e2b0e, 0xc06b003a, + 0x00000000, 0xbf8cc07f, + 0xc06b013a, 0x00000010, + 0xbf8cc07f, 0xc06b023a, + 0x00000020, 0xbf8cc07f, + 0xc06b033a, 0x00000030, + 0xbf8cc07f, 0x8074c074, + 0x82758075, 0x807c907c, + 0xbf0a7b7c, 0xbf85ffe7, + 0xbef40172, 0xbef00080, + 0xbefe00c1, 0xbeff00c1, + 0xbee80080, 0xbee90080, + 0xbef600ff, 0x01000000, + 0x867aff78, 0x00400000, + 0xbf850003, 0xb8faf803, + 0x897a7aff, 0x10000000, + 0xbf85004d, 0xbe840080, + 0xd2890000, 0x00000900, 0x80048104, 0xd2890001, - 0x00000901, 0x80048104, - 0xd2890002, 0x00000901, + 0x00000900, 0x80048104, + 0xd2890002, 0x00000900, 0x80048104, 0xd2890003, - 0x00000901, 0x80048104, + 0x00000900, 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, 0xbe840080, 0xd2890000, - 0x00000902, 0x80048104, - 0xd2890001, 0x00000902, + 0x00000901, 0x80048104, + 0xd2890001, 0x00000901, 0x80048104, 0xd2890002, - 0x00000902, 0x80048104, - 0xd2890003, 0x00000902, + 0x00000901, 0x80048104, + 0xd2890003, 0x00000901, 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, 0xbe840080, - 0xd2890000, 0x00000903, + 0xd2890000, 0x00000902, 0x80048104, 0xd2890001, - 0x00000903, 0x80048104, - 0xd2890002, 0x00000903, + 0x00000902, 0x80048104, + 0xd2890002, 0x00000902, 0x80048104, 0xd2890003, - 0x00000903, 0x80048104, + 0x00000902, 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, - 0xbf820008, 0xe0724000, - 0x701d0000, 0xe0724100, - 0x701d0100, 0xe0724200, - 0x701d0200, 0xe0724300, - 0x701d0300, 0xbefe00c1, - 0xbeff00c1, 0xb8fb4306, - 0x867bc17b, 0xbf840063, - 0xbf8a0000, 0x867aff6f, - 0x04000000, 0xbf84005f, - 0x8e7b867b, 0x8e7b827b, - 0xbef6007b, 0xb8f02a05, - 0x80708170, 0x8e708a70, - 0xb8fa1605, 0x807a817a, - 0x8e7a867a, 0x80707a70, - 0x8070ff70, 0x00000080, - 0xbef600ff, 0x01000000, - 0xbefc0080, 0xd28c0002, - 0x000100c1, 0xd28d0003, - 0x000204c1, 0x867aff78, - 0x00400000, 0xbf850003, - 0xb8faf803, 0x897a7aff, - 0x10000000, 0xbf850030, - 0x24040682, 0xd86e4000, - 0x00000002, 0xbf8cc07f, 0xbe840080, 0xd2890000, - 0x00000900, 0x80048104, - 0xd2890001, 0x00000900, + 0x00000903, 0x80048104, + 0xd2890001, 0x00000903, 0x80048104, 0xd2890002, - 0x00000900, 0x80048104, - 0xd2890003, 0x00000900, + 0x00000903, 0x80048104, + 0xd2890003, 0x00000903, 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, - 0xbf84ffee, 0xbe840080, - 0xd2890000, 0x00000901, + 0xbf84ffee, 0xbf820008, + 0xe0724000, 0x701d0000, + 0xe0724100, 0x701d0100, + 0xe0724200, 0x701d0200, + 0xe0724300, 0x701d0300, + 0xbefe00c1, 0xbeff00c1, + 0xb8fb4306, 0x867bc17b, + 0xbf840063, 0xbf8a0000, + 0x867aff6f, 0x04000000, + 0xbf84005f, 0x8e7b867b, + 0x8e7b827b, 0xbef6007b, + 0xb8f02a05, 0x80708170, + 0x8e708a70, 0xb8fa1605, + 0x807a817a, 0x8e7a867a, + 0x80707a70, 0x8070ff70, + 0x00000080, 0xbef600ff, + 0x01000000, 0xbefc0080, + 0xd28c0002, 0x000100c1, + 0xd28d0003, 0x000204c1, + 0x867aff78, 0x00400000, + 0xbf850003, 0xb8faf803, + 0x897a7aff, 0x10000000, + 0xbf850030, 0x24040682, + 0xd86e4000, 0x00000002, + 0xbf8cc07f, 0xbe840080, + 0xd2890000, 0x00000900, 0x80048104, 0xd2890001, - 0x00000901, 0x80048104, - 0xd2890002, 0x00000901, + 0x00000900, 0x80048104, + 0xd2890002, 0x00000900, 0x80048104, 0xd2890003, - 0x00000901, 0x80048104, + 0x00000900, 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, - 0x680404ff, 0x00000200, + 0xbe840080, 0xd2890000, + 0x00000901, 0x80048104, + 0xd2890001, 0x00000901, + 0x80048104, 0xd2890002, + 0x00000901, 0x80048104, + 0xd2890003, 0x00000901, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0x680404ff, + 0x00000200, 0xd0c9006a, + 0x0000f702, 0xbf87ffd2, + 0xbf820015, 0xd1060002, + 0x00011103, 0x7e0602ff, + 0x00000200, 0xbefc00ff, + 0x00010000, 0xbe800077, + 0x8677ff77, 0xff7fffff, + 0x8777ff77, 0x00058000, + 0xd8ec0000, 0x00000002, + 0xbf8cc07f, 0xe0765000, + 0x701d0002, 0x68040702, 0xd0c9006a, 0x0000f702, - 0xbf87ffd2, 0xbf820015, - 0xd1060002, 0x00011103, - 0x7e0602ff, 0x00000200, - 0xbefc00ff, 0x00010000, - 0xbe800077, 0x8677ff77, - 0xff7fffff, 0x8777ff77, - 0x00058000, 0xd8ec0000, - 0x00000002, 0xbf8cc07f, - 0xe0765000, 0x701d0002, - 0x68040702, 0xd0c9006a, - 0x0000f702, 0xbf87fff7, - 0xbef70000, 0xbef000ff, - 0x00000400, 0xbefe00c1, - 0xbeff00c1, 0xb8fb2a05, - 0x807b817b, 0x8e7b827b, - 0x8e76887b, 0xbef600ff, + 0xbf87fff7, 0xbef70000, + 0xbef000ff, 0x00000400, + 0xbefe00c1, 0xbeff00c1, + 0xb8fb2a05, 0x807b817b, + 0x8e7b827b, 0xbef600ff, 0x01000000, 0xbefc0084, 0xbf0a7b7c, 0xbf84006d, 0xbf11017c, 0x807bff7b, @@ -566,15 +572,11 @@ static const uint32_t cwsr_trap_gfx9_hex[] = { 0x701d0300, 0x807c847c, 0x8070ff70, 0x00000400, 0xbf0a7b7c, 0xbf85ffef, - 0xbf9c0000, 0xbf8200da, + 0xbf9c0000, 0xbf8200c7, 0xbef4007e, 0x8675ff7f, 0x0000ffff, 0x8775ff75, 0x00040000, 0xbef60080, 0xbef700ff, 0x00807fac, - 0x866eff7f, 0x08000000, - 0x8f6e836e, 0x87776e77, - 0x866eff7f, 0x70000000, - 0x8f6e816e, 0x87776e77, 0x866eff7f, 0x04000000, 0xbf84001e, 0xbefe00c1, 0xbeff00c1, 0xb8ef4306, @@ -591,34 +593,34 @@ static const uint32_t cwsr_trap_gfx9_hex[] = { 0x781d0000, 0x807cff7c, 0x00000200, 0x8078ff78, 0x00000200, 0xbf0a6f7c, - 0xbf85fff6, 0xbef80080, - 0xbefe00c1, 0xbeff00c1, - 0xb8ef2a05, 0x806f816f, - 0x8e6f826f, 0x8e76886f, - 0xbef600ff, 0x01000000, - 0xbeee0078, 0x8078ff78, - 0x00000400, 0xbefc0084, - 0xbf11087c, 0x806fff6f, - 0x00008000, 0xe0524000, - 0x781d0000, 0xe0524100, - 0x781d0100, 0xe0524200, - 0x781d0200, 0xe0524300, - 0x781d0300, 0xbf8c0f70, - 0x7e000300, 0x7e020301, - 0x7e040302, 0x7e060303, - 0x807c847c, 0x8078ff78, - 0x00000400, 0xbf0a6f7c, - 0xbf85ffee, 0xbf9c0000, - 0xe0524000, 0x6e1d0000, - 0xe0524100, 0x6e1d0100, - 0xe0524200, 0x6e1d0200, - 0xe0524300, 0x6e1d0300, - 0xb8f82a05, 0x80788178, - 0x8e788a78, 0xb8ee1605, - 0x806e816e, 0x8e6e866e, - 0x80786e78, 0x80f8c078, - 0xb8ef1605, 0x806f816f, - 0x8e6f846f, 0x8e76826f, + 0xbf85fff6, 0xbefe00c1, + 0xbeff00c1, 0xbef600ff, + 0x01000000, 0xb8ef2a05, + 0x806f816f, 0x8e6f826f, + 0x806fff6f, 0x00008000, + 0xbef80080, 0xbeee0078, + 0x8078ff78, 0x00000400, + 0xbefc0084, 0xbf11087c, + 0xe0524000, 0x781d0000, + 0xe0524100, 0x781d0100, + 0xe0524200, 0x781d0200, + 0xe0524300, 0x781d0300, + 0xbf8c0f70, 0x7e000300, + 0x7e020301, 0x7e040302, + 0x7e060303, 0x807c847c, + 0x8078ff78, 0x00000400, + 0xbf0a6f7c, 0xbf85ffee, + 0xbf9c0000, 0xe0524000, + 0x6e1d0000, 0xe0524100, + 0x6e1d0100, 0xe0524200, + 0x6e1d0200, 0xe0524300, + 0x6e1d0300, 0xbf8c0f70, + 0xb8f82a05, 0x80788178, + 0x8e788a78, 0xb8ee1605, + 0x806e816e, 0x8e6e866e, + 0x80786e78, 0x80f8c078, + 0xb8ef1605, 0x806f816f, + 0x8e6f846f, 0x8e76826f, 0xbef600ff, 0x01000000, 0xbefc006f, 0xc031003a, 0x00000078, 0x80f8c078, @@ -663,90 +665,101 @@ static const uint32_t cwsr_trap_gfx9_hex[] = { 0xc00b1c37, 0x00000050, 0xc00b1d37, 0x00000060, 0xc0031e77, 0x00000074, - 0xbf8cc07f, 0x866fff6d, - 0xf8000000, 0x8f6f9b6f, - 0x8e6f906f, 0xbeee0080, - 0x876e6f6e, 0x866fff6d, - 0x04000000, 0x8f6f9a6f, - 0x8e6f8f6f, 0x876e6f6e, - 0x866fff7a, 0x00800000, - 0x8f6f976f, 0xb96ef807, - 0x866dff6d, 0x0000ffff, - 0x86fe7e7e, 0x86ea6a6a, - 0x8f6e837a, 0xb96ee0c2, - 0xbf800002, 0xb97a0002, - 0xbf8a0000, 0x95806f6c, - 0xbf810000, 0x00000000, + 0xbf8cc07f, 0x8f6e8b77, + 0x866eff6e, 0x001f8000, + 0xb96ef807, 0x866dff6d, + 0x0000ffff, 0x86fe7e7e, + 0x86ea6a6a, 0x8f6e837a, + 0xb96ee0c2, 0xbf800002, + 0xb97a0002, 0xbf8a0000, + 0xbe801f6c, 0xbf810000, }; static const uint32_t cwsr_trap_nv1x_hex[] = { - 0xbf820001, 0xbf8201cd, + 0xbf820001, 0xbf8201f1, 0xb0804004, 0xb978f802, - 0x8a788678, 0xb96ef801, - 0x876eff6e, 0x00000800, - 0xbf840003, 0x876eff78, + 0x8a78ff78, 0x00020006, + 0xb97bf803, 0x876eff78, 0x00002000, 0xbf840009, - 0xb97bf803, 0x876eff7b, - 0x00000400, 0xbf850033, - 0x876eff7b, 0x00000100, - 0xbf840002, 0x8878ff78, - 0x00002000, 0x8a77ff77, - 0xff000000, 0xb96ef807, - 0x876fff6e, 0x02000000, - 0x8f6f866f, 0x88776f77, - 0x876fff6e, 0x003f8000, - 0x8f6f896f, 0x88776f77, - 0x8a6eff6e, 0x023f8000, - 0xb9eef807, 0xb97af812, + 0x876eff6d, 0x00ff0000, + 0xbf85001e, 0x876eff7b, + 0x00000400, 0xbf850057, + 0xbf8e0010, 0xb97bf803, + 0xbf82fffa, 0x876eff7b, + 0x00000900, 0xbf850015, + 0x876eff7b, 0x000071ff, + 0xbf840008, 0x876fff7b, + 0x00007080, 0xbf840001, + 0xbeee1d87, 0xb96ff801, + 0x8f6e8c6e, 0x876e6f6e, + 0xbf85000a, 0x876eff6d, + 0x00ff0000, 0xbf850007, + 0xb96ef801, 0x876eff6e, + 0x00000800, 0xbf850003, + 0x876eff7b, 0x00000400, + 0xbf85003c, 0x8a77ff77, + 0xff000000, 0xb97af807, + 0x877bff7a, 0x02000000, + 0x8f7b867b, 0x88777b77, + 0x877bff7a, 0x003f8000, + 0x8f7b897b, 0x88777b77, + 0x8a7aff7a, 0x023f8000, + 0xb9faf807, 0xb97af812, 0xb97bf813, 0x8ffa887a, - 0xf4051bbd, 0xfa000000, - 0xbf8cc07f, 0xf4051ebd, - 0xfa000008, 0xbf8cc07f, - 0x87ee6e6e, 0xbf840001, - 0xbe80206e, 0xb97bf803, - 0x877bff7b, 0x000001ff, + 0xf4011bbd, 0xfa000010, + 0xbf8cc07f, 0x8f6e976e, + 0x8a77ff77, 0x00800000, + 0x88776e77, 0xf4051bbd, + 0xfa000000, 0xbf8cc07f, + 0xf4051ebd, 0xfa000008, + 0xbf8cc07f, 0x87ee6e6e, + 0xbf840001, 0xbe80206e, + 0x876eff6d, 0x01ff0000, + 0xbf850005, 0x8878ff78, + 0x00002000, 0x80ec886c, + 0x82ed806d, 0xbf820005, + 0x876eff6d, 0x01000000, 0xbf850002, 0x806c846c, 0x826d806d, 0x876dff6d, - 0x0000ffff, 0x906e8977, - 0x876fff6e, 0x003f8000, - 0x906e8677, 0x876eff6e, - 0x02000000, 0x886e6f6e, - 0xb9eef807, 0x87fe7e7e, + 0x0000ffff, 0x907a8977, + 0x877bff7a, 0x003f8000, + 0x907a8677, 0x877aff7a, + 0x02000000, 0x887a7b7a, + 0xb9faf807, 0x87fe7e7e, 0x87ea6a6a, 0xb9f8f802, 0xbe80226c, 0x876dff6d, 0x0000ffff, 0xbefa0380, - 0xb9fa0283, 0xb97a2c07, - 0x8f7a9a7a, 0x886d7a6d, - 0xb97a03c7, 0x8f7a997a, - 0x886d7a6d, 0xb97a0647, - 0x8f7a987a, 0x886d7a6d, - 0xb97af807, 0x877aff7a, - 0x00007fff, 0xb9faf807, - 0xbeee037e, 0xbeef037f, - 0xbefe0480, 0xbf900004, - 0xbf8e0002, 0xbf88fffe, - 0xb97b02dc, 0x8f7b997b, - 0x887b7b7f, 0xb97a2a05, + 0xb9fa0283, 0x8a77ff77, + 0xff000000, 0xb97af807, + 0x877bff7a, 0x02000000, + 0x8f7b867b, 0x88777b77, + 0x877bff7a, 0x003f8000, + 0x8f7b897b, 0x88777b77, + 0x8a7aff7a, 0x023f8000, + 0xb9faf807, 0xbeee037e, + 0xbeef037f, 0xbefe0480, + 0xbf900004, 0xbf8e0002, + 0xbf88fffe, 0x877aff7f, + 0x04000000, 0x8f7a857a, + 0x886d7a6d, 0xb97b02dc, + 0x8f7b997b, 0xb97a2a05, 0x807a817a, 0xbf0d997b, 0xbf850002, 0x8f7a897a, 0xbf820001, 0x8f7a8a7a, - 0x877bff7f, 0x0000ffff, - 0x807aff7a, 0x00000200, - 0x807a7e7a, 0x827b807b, - 0xf4491c3d, 0xfa000050, - 0xf4491d3d, 0xfa000060, - 0xf4411e7d, 0xfa000074, - 0xbef4037e, 0x8775ff7f, - 0x0000ffff, 0x8875ff75, - 0x00040000, 0xbef60380, - 0xbef703ff, 0x10807fac, - 0x877aff7f, 0x08000000, - 0x907a837a, 0x88777a77, - 0x877aff7f, 0x70000000, - 0x907a817a, 0x88777a77, - 0xbef1037c, 0xbef00380, - 0xb97302dc, 0x8f739973, - 0x8873737f, 0xb97bf816, + 0xb97b1e06, 0x8f7b8a7b, + 0x807a7b7a, 0x877bff7f, + 0x0000ffff, 0x807aff7a, + 0x00000200, 0x807a7e7a, + 0x827b807b, 0xf4491c3d, + 0xfa000050, 0xf4491d3d, + 0xfa000060, 0xf4411e7d, + 0xfa000074, 0xbef4037e, + 0x8775ff7f, 0x0000ffff, + 0x8875ff75, 0x00040000, + 0xbef60380, 0xbef703ff, + 0x10807fac, 0xbef1037c, + 0xbef00380, 0xb97302dc, + 0x8f739973, 0xb97bf816, 0xba80f816, 0x00000000, 0xbefe03c1, 0x907c9973, 0x877c817c, 0xbf06817c, @@ -776,8 +789,9 @@ static const uint32_t cwsr_trap_nv1x_hex[] = { 0xbefe037c, 0xbefc0370, 0xf4611b3a, 0xf8000000, 0x80708470, 0xbefc037e, + 0x8a7aff6d, 0x80000000, 0xbefe037c, 0xbefc0370, - 0xf4611b7a, 0xf8000000, + 0xf4611eba, 0xf8000000, 0x80708470, 0xbefc037e, 0xbefe037c, 0xbefc0370, 0xf4611bba, 0xf8000000, @@ -838,7 +852,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = { 0xbf820001, 0xbeff03c1, 0xb97b4306, 0x877bc17b, 0xbf840044, 0xbf8a0000, - 0x877aff73, 0x04000000, + 0x877aff6d, 0x80000000, 0xbf840040, 0x8f7b867b, 0x8f7b827b, 0xbef6037b, 0xb9702a05, 0x80708170, @@ -894,7 +908,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = { 0xbf0a7b7c, 0xbf85ffef, 0xbf820025, 0xbef603ff, 0x01000000, 0xbefc0384, - 0xbf0a7b7c, 0xbf840020, + 0xbf0a7b7c, 0xbf840011, 0x7e008700, 0x7e028701, 0x7e048702, 0x7e068703, 0xe0704000, 0x705d0000, @@ -911,71 +925,69 @@ static const uint32_t cwsr_trap_nv1x_hex[] = { 0x705d0000, 0x807c817c, 0x8070ff70, 0x00000080, 0xbf0a7b7c, 0xbf85fff8, - 0xbf820151, 0xbef4037e, + 0xbf820144, 0xbef4037e, 0x8775ff7f, 0x0000ffff, 0x8875ff75, 0x00040000, 0xbef60380, 0xbef703ff, - 0x10807fac, 0x876eff7f, - 0x08000000, 0x906e836e, - 0x88776e77, 0x876eff7f, - 0x70000000, 0x906e816e, - 0x88776e77, 0xb97202dc, - 0x8f729972, 0x8872727f, - 0x876eff7f, 0x04000000, - 0xbf840034, 0xbefe03c1, - 0x907c9972, 0x877c817c, - 0xbf06817c, 0xbf850002, - 0xbeff0380, 0xbf820001, - 0xbeff03c1, 0xb96f4306, - 0x876fc16f, 0xbf840029, - 0x8f6f866f, 0x8f6f826f, - 0xbef6036f, 0xb9782a05, - 0x80788178, 0xbf0d9972, - 0xbf850002, 0x8f788978, - 0xbf820001, 0x8f788a78, - 0xb96e1e06, 0x8f6e8a6e, - 0x80786e78, 0x8078ff78, - 0x00000200, 0x8078ff78, - 0x00000080, 0xbef603ff, - 0x01000000, 0x907c9972, - 0x877c817c, 0xbf06817c, - 0xbefc0380, 0xbf850009, - 0xe0310000, 0x781d0000, - 0x807cff7c, 0x00000080, - 0x8078ff78, 0x00000080, - 0xbf0a6f7c, 0xbf85fff8, - 0xbf820008, 0xe0310000, - 0x781d0000, 0x807cff7c, - 0x00000100, 0x8078ff78, - 0x00000100, 0xbf0a6f7c, - 0xbf85fff8, 0xbef80380, + 0x10807fac, 0xb97202dc, + 0x8f729972, 0x876eff7f, + 0x04000000, 0xbf840034, 0xbefe03c1, 0x907c9972, 0x877c817c, 0xbf06817c, 0xbf850002, 0xbeff0380, 0xbf820001, 0xbeff03c1, - 0xb96f2a05, 0x806f816f, - 0x8f6f826f, 0x907c9972, - 0x877c817c, 0xbf06817c, - 0xbf850021, 0xbef603ff, - 0x01000000, 0xbeee0378, + 0xb96f4306, 0x876fc16f, + 0xbf840029, 0x8f6f866f, + 0x8f6f826f, 0xbef6036f, + 0xb9782a05, 0x80788178, + 0xbf0d9972, 0xbf850002, + 0x8f788978, 0xbf820001, + 0x8f788a78, 0xb96e1e06, + 0x8f6e8a6e, 0x80786e78, 0x8078ff78, 0x00000200, - 0xbefc0384, 0xe0304000, - 0x785d0000, 0xe0304080, - 0x785d0100, 0xe0304100, - 0x785d0200, 0xe0304180, - 0x785d0300, 0xbf8c3f70, - 0x7e008500, 0x7e028501, - 0x7e048502, 0x7e068503, - 0x807c847c, 0x8078ff78, - 0x00000200, 0xbf0a6f7c, - 0xbf85ffee, 0xe0304000, - 0x6e5d0000, 0xe0304080, - 0x6e5d0100, 0xe0304100, - 0x6e5d0200, 0xe0304180, - 0x6e5d0300, 0xbf820032, + 0x8078ff78, 0x00000080, + 0xbef603ff, 0x01000000, + 0x907c9972, 0x877c817c, + 0xbf06817c, 0xbefc0380, + 0xbf850009, 0xe0310000, + 0x781d0000, 0x807cff7c, + 0x00000080, 0x8078ff78, + 0x00000080, 0xbf0a6f7c, + 0xbf85fff8, 0xbf820008, + 0xe0310000, 0x781d0000, + 0x807cff7c, 0x00000100, + 0x8078ff78, 0x00000100, + 0xbf0a6f7c, 0xbf85fff8, + 0xbef80380, 0xbefe03c1, + 0x907c9972, 0x877c817c, + 0xbf06817c, 0xbf850002, + 0xbeff0380, 0xbf820001, + 0xbeff03c1, 0xb96f2a05, + 0x806f816f, 0x8f6f826f, + 0x907c9972, 0x877c817c, + 0xbf06817c, 0xbf850024, + 0xbef603ff, 0x01000000, + 0xbeee0378, 0x8078ff78, + 0x00000200, 0xbefc0384, + 0xbf0a6f7c, 0xbf840050, + 0xe0304000, 0x785d0000, + 0xe0304080, 0x785d0100, + 0xe0304100, 0x785d0200, + 0xe0304180, 0x785d0300, + 0xbf8c3f70, 0x7e008500, + 0x7e028501, 0x7e048502, + 0x7e068503, 0x807c847c, + 0x8078ff78, 0x00000200, + 0xbf0a6f7c, 0xbf85ffee, + 0xe0304000, 0x6e5d0000, + 0xe0304080, 0x6e5d0100, + 0xe0304100, 0x6e5d0200, + 0xe0304180, 0x6e5d0300, + 0xbf8c3f70, 0xbf820034, 0xbef603ff, 0x01000000, 0xbeee0378, 0x8078ff78, 0x00000400, 0xbefc0384, + 0xbf0a6f7c, 0xbf840012, 0xe0304000, 0x785d0000, 0xe0304100, 0x785d0100, 0xe0304200, 0x785d0200, @@ -1060,270 +1072,272 @@ static const uint32_t cwsr_trap_nv1x_hex[] = { 0xb96e2a05, 0x806e816e, 0xbf0d9972, 0xbf850002, 0x8f6e896e, 0xbf820001, - 0x8f6e8a6e, 0x806eff6e, - 0x00000200, 0x806e746e, - 0x826f8075, 0x876fff6f, - 0x0000ffff, 0xf4091c37, - 0xfa000050, 0xf4091d37, - 0xfa000060, 0xf4011e77, - 0xfa000074, 0xbf8cc07f, - 0x876fff6d, 0xfc000000, - 0x906f9a6f, 0x8f6f906f, - 0xbeee0380, 0x886e6f6e, - 0x876fff6d, 0x02000000, - 0x906f996f, 0x8f6f8f6f, - 0x886e6f6e, 0x876fff6d, - 0x01000000, 0x906f986f, - 0x8f6f996f, 0x886e6f6e, - 0x876fff7a, 0x00800000, - 0x906f976f, 0xb9eef807, - 0x876dff6d, 0x0000ffff, - 0x87fe7e7e, 0x87ea6a6a, - 0xb9faf802, 0xbe80226c, - 0xbf810000, 0xbf9f0000, + 0x8f6e8a6e, 0xb96f1e06, + 0x8f6f8a6f, 0x806e6f6e, + 0x806eff6e, 0x00000200, + 0x806e746e, 0x826f8075, + 0x876fff6f, 0x0000ffff, + 0xf4091c37, 0xfa000050, + 0xf4091d37, 0xfa000060, + 0xf4011e77, 0xfa000074, + 0xbf8cc07f, 0x906e8977, + 0x876fff6e, 0x003f8000, + 0x906e8677, 0x876eff6e, + 0x02000000, 0x886e6f6e, + 0xb9eef807, 0x876dff6d, + 0x0000ffff, 0x87fe7e7e, + 0x87ea6a6a, 0xb9faf802, + 0xbe80226c, 0xbf810000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, + 0xbf9f0000, 0x00000000, }; static const uint32_t cwsr_trap_arcturus_hex[] = { - 0xbf820001, 0xbf8202c4, - 0xb8f8f802, 0x89788678, - 0xb8eef801, 0x866eff6e, - 0x00000800, 0xbf840003, + 0xbf820001, 0xbf8202d0, + 0xb8f8f802, 0x8978ff78, + 0x00020006, 0xb8fbf803, 0x866eff78, 0x00002000, - 0xbf840016, 0xb8fbf803, + 0xbf840009, 0x866eff6d, + 0x00ff0000, 0xbf85001e, 0x866eff7b, 0x00000400, - 0xbf85003b, 0x866eff7b, - 0x00000800, 0xbf850003, - 0x866eff7b, 0x00000100, - 0xbf84000c, 0x866eff78, - 0x00002000, 0xbf840005, - 0xbf8e0010, 0xb8eef803, - 0x866eff6e, 0x00000400, - 0xbf84fffb, 0x8778ff78, - 0x00002000, 0x80ec886c, - 0x82ed806d, 0xb8eef807, - 0x866fff6e, 0x001f8000, - 0x8e6f8b6f, 0x8977ff77, - 0xfc000000, 0x87776f77, - 0x896eff6e, 0x001f8000, - 0xb96ef807, 0xb8faf812, + 0xbf850051, 0xbf8e0010, + 0xb8fbf803, 0xbf82fffa, + 0x866eff7b, 0x00000900, + 0xbf850015, 0x866eff7b, + 0x000071ff, 0xbf840008, + 0x866fff7b, 0x00007080, + 0xbf840001, 0xbeee1a87, + 0xb8eff801, 0x8e6e8c6e, + 0x866e6f6e, 0xbf85000a, + 0x866eff6d, 0x00ff0000, + 0xbf850007, 0xb8eef801, + 0x866eff6e, 0x00000800, + 0xbf850003, 0x866eff7b, + 0x00000400, 0xbf850036, + 0xb8faf807, 0x867aff7a, + 0x001f8000, 0x8e7a8b7a, + 0x8977ff77, 0xfc000000, + 0x87777a77, 0xba7ff807, + 0x00000000, 0xb8faf812, 0xb8fbf813, 0x8efa887a, - 0xc0071bbd, 0x00000000, - 0xbf8cc07f, 0xc0071ebd, - 0x00000008, 0xbf8cc07f, - 0x86ee6e6e, 0xbf840001, - 0xbe801d6e, 0xb8fbf803, - 0x867bff7b, 0x000001ff, + 0xc0031bbd, 0x00000010, + 0xbf8cc07f, 0x8e6e976e, + 0x8977ff77, 0x00800000, + 0x87776e77, 0xc0071bbd, + 0x00000000, 0xbf8cc07f, + 0xc0071ebd, 0x00000008, + 0xbf8cc07f, 0x86ee6e6e, + 0xbf840001, 0xbe801d6e, + 0x866eff6d, 0x01ff0000, + 0xbf850005, 0x8778ff78, + 0x00002000, 0x80ec886c, + 0x82ed806d, 0xbf820005, + 0x866eff6d, 0x01000000, 0xbf850002, 0x806c846c, 0x826d806d, 0x866dff6d, - 0x0000ffff, 0x8f6e8b77, - 0x866eff6e, 0x001f8000, - 0xb96ef807, 0x86fe7e7e, + 0x0000ffff, 0x8f7a8b77, + 0x867aff7a, 0x001f8000, + 0xb97af807, 0x86fe7e7e, 0x86ea6a6a, 0x8f6e8378, 0xb96ee0c2, 0xbf800002, 0xb9780002, 0xbe801f6c, 0x866dff6d, 0x0000ffff, 0xbefa0080, 0xb97a0283, - 0xb8fa2407, 0x8e7a9b7a, - 0x876d7a6d, 0xb8fa03c7, - 0x8e7a9a7a, 0x876d7a6d, 0xb8faf807, 0x867aff7a, - 0x00007fff, 0xb97af807, - 0xbeee007e, 0xbeef007f, - 0xbefe0180, 0xbf900004, - 0x877a8478, 0xb97af802, - 0xbf8e0002, 0xbf88fffe, - 0xb8fa2a05, 0x807a817a, - 0x8e7a8a7a, 0x8e7a817a, - 0xb8fb1605, 0x807b817b, - 0x8e7b867b, 0x807a7b7a, - 0x807a7e7a, 0x827b807f, - 0x867bff7b, 0x0000ffff, - 0xc04b1c3d, 0x00000050, - 0xbf8cc07f, 0xc04b1d3d, - 0x00000060, 0xbf8cc07f, - 0xc0431e7d, 0x00000074, - 0xbf8cc07f, 0xbef4007e, - 0x8675ff7f, 0x0000ffff, - 0x8775ff75, 0x00040000, - 0xbef60080, 0xbef700ff, - 0x00807fac, 0x867aff7f, - 0x08000000, 0x8f7a837a, - 0x87777a77, 0x867aff7f, - 0x70000000, 0x8f7a817a, - 0x87777a77, 0xbef1007c, - 0xbef00080, 0xb8f02a05, - 0x80708170, 0x8e708a70, - 0x8e708170, 0xb8fa1605, - 0x807a817a, 0x8e7a867a, - 0x80707a70, 0xbef60084, - 0xbef600ff, 0x01000000, - 0xbefe007c, 0xbefc0070, - 0xc0611c7a, 0x0000007c, - 0xbf8cc07f, 0x80708470, - 0xbefc007e, 0xbefe007c, - 0xbefc0070, 0xc0611b3a, + 0x001f8000, 0x8e7a8b7a, + 0x8977ff77, 0xfc000000, + 0x87777a77, 0xba7ff807, + 0x00000000, 0xbeee007e, + 0xbeef007f, 0xbefe0180, + 0xbf900004, 0x877a8478, + 0xb97af802, 0xbf8e0002, + 0xbf88fffe, 0xb8fa2a05, + 0x807a817a, 0x8e7a8a7a, + 0x8e7a817a, 0xb8fb1605, + 0x807b817b, 0x8e7b867b, + 0x807a7b7a, 0x807a7e7a, + 0x827b807f, 0x867bff7b, + 0x0000ffff, 0xc04b1c3d, + 0x00000050, 0xbf8cc07f, + 0xc04b1d3d, 0x00000060, + 0xbf8cc07f, 0xc0431e7d, + 0x00000074, 0xbf8cc07f, + 0xbef4007e, 0x8675ff7f, + 0x0000ffff, 0x8775ff75, + 0x00040000, 0xbef60080, + 0xbef700ff, 0x00807fac, + 0xbef1007c, 0xbef00080, + 0xb8f02a05, 0x80708170, + 0x8e708a70, 0x8e708170, + 0xb8fa1605, 0x807a817a, + 0x8e7a867a, 0x80707a70, + 0xbef60084, 0xbef600ff, + 0x01000000, 0xbefe007c, + 0xbefc0070, 0xc0611c7a, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, 0xbefc0070, - 0xc0611b7a, 0x0000007c, + 0xc0611b3a, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, - 0xbefc0070, 0xc0611bba, + 0xbefc0070, 0xc0611b7a, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, 0xbefc0070, - 0xc0611bfa, 0x0000007c, + 0xc0611bba, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, - 0xbefc0070, 0xc0611e3a, - 0x0000007c, 0xbf8cc07f, - 0x80708470, 0xbefc007e, - 0xb8fbf803, 0xbefe007c, - 0xbefc0070, 0xc0611efa, + 0xbefc0070, 0xc0611bfa, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, 0xbefc0070, - 0xc0611a3a, 0x0000007c, + 0xc0611e3a, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0xb8fbf803, + 0xbefe007c, 0xbefc0070, + 0xc0611efa, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, - 0xbefc0070, 0xc0611a7a, + 0xbefc0070, 0xc0611a3a, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, - 0xb8f1f801, 0xbefe007c, - 0xbefc0070, 0xc0611c7a, - 0x0000007c, 0xbf8cc07f, - 0x80708470, 0xbefc007e, - 0x867aff7f, 0x04000000, - 0xbeef0080, 0x876f6f7a, - 0xb8f02a05, 0x80708170, - 0x8e708a70, 0x8e708170, - 0xb8fb1605, 0x807b817b, - 0x8e7b847b, 0x8e76827b, - 0xbef600ff, 0x01000000, - 0xbef20174, 0x80747074, - 0x82758075, 0xbefc0080, - 0xbf800000, 0xbe802b00, - 0xbe822b02, 0xbe842b04, - 0xbe862b06, 0xbe882b08, - 0xbe8a2b0a, 0xbe8c2b0c, - 0xbe8e2b0e, 0xc06b003a, - 0x00000000, 0xbf8cc07f, - 0xc06b013a, 0x00000010, - 0xbf8cc07f, 0xc06b023a, - 0x00000020, 0xbf8cc07f, - 0xc06b033a, 0x00000030, - 0xbf8cc07f, 0x8074c074, - 0x82758075, 0x807c907c, - 0xbf0a7b7c, 0xbf85ffe7, - 0xbef40172, 0xbef00080, - 0xbefe00c1, 0xbeff00c1, - 0xbee80080, 0xbee90080, - 0xbef600ff, 0x01000000, - 0x867aff78, 0x00400000, - 0xbf850003, 0xb8faf803, - 0x897a7aff, 0x10000000, - 0xbf85004d, 0xbe840080, - 0xd2890000, 0x00000900, - 0x80048104, 0xd2890001, + 0xbefe007c, 0xbefc0070, + 0xc0611a7a, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0xb8f1f801, + 0xbefe007c, 0xbefc0070, + 0xc0611c7a, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0x867aff7f, + 0x04000000, 0xbeef0080, + 0x876f6f7a, 0xb8f02a05, + 0x80708170, 0x8e708a70, + 0x8e708170, 0xb8fb1605, + 0x807b817b, 0x8e7b847b, + 0x8e76827b, 0xbef600ff, + 0x01000000, 0xbef20174, + 0x80747074, 0x82758075, + 0xbefc0080, 0xbf800000, + 0xbe802b00, 0xbe822b02, + 0xbe842b04, 0xbe862b06, + 0xbe882b08, 0xbe8a2b0a, + 0xbe8c2b0c, 0xbe8e2b0e, + 0xc06b003a, 0x00000000, + 0xbf8cc07f, 0xc06b013a, + 0x00000010, 0xbf8cc07f, + 0xc06b023a, 0x00000020, + 0xbf8cc07f, 0xc06b033a, + 0x00000030, 0xbf8cc07f, + 0x8074c074, 0x82758075, + 0x807c907c, 0xbf0a7b7c, + 0xbf85ffe7, 0xbef40172, + 0xbef00080, 0xbefe00c1, + 0xbeff00c1, 0xbee80080, + 0xbee90080, 0xbef600ff, + 0x01000000, 0x867aff78, + 0x00400000, 0xbf850003, + 0xb8faf803, 0x897a7aff, + 0x10000000, 0xbf85004d, + 0xbe840080, 0xd2890000, 0x00000900, 0x80048104, - 0xd2890002, 0x00000900, - 0x80048104, 0xd2890003, + 0xd2890001, 0x00000900, + 0x80048104, 0xd2890002, 0x00000900, 0x80048104, - 0xc069003a, 0x00000070, - 0xbf8cc07f, 0x80709070, - 0xbf06c004, 0xbf84ffee, - 0xbe840080, 0xd2890000, - 0x00000901, 0x80048104, - 0xd2890001, 0x00000901, - 0x80048104, 0xd2890002, - 0x00000901, 0x80048104, - 0xd2890003, 0x00000901, + 0xd2890003, 0x00000900, 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, 0xbe840080, - 0xd2890000, 0x00000902, + 0xd2890000, 0x00000901, 0x80048104, 0xd2890001, - 0x00000902, 0x80048104, - 0xd2890002, 0x00000902, + 0x00000901, 0x80048104, + 0xd2890002, 0x00000901, 0x80048104, 0xd2890003, - 0x00000902, 0x80048104, + 0x00000901, 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, 0xbe840080, 0xd2890000, - 0x00000903, 0x80048104, - 0xd2890001, 0x00000903, - 0x80048104, 0xd2890002, - 0x00000903, 0x80048104, - 0xd2890003, 0x00000903, - 0x80048104, 0xc069003a, - 0x00000070, 0xbf8cc07f, - 0x80709070, 0xbf06c004, - 0xbf84ffee, 0xbf820008, - 0xe0724000, 0x701d0000, - 0xe0724100, 0x701d0100, - 0xe0724200, 0x701d0200, - 0xe0724300, 0x701d0300, - 0xbefe00c1, 0xbeff00c1, - 0xb8fb4306, 0x867bc17b, - 0xbf840064, 0xbf8a0000, - 0x867aff6f, 0x04000000, - 0xbf840060, 0x8e7b867b, - 0x8e7b827b, 0xbef6007b, - 0xb8f02a05, 0x80708170, - 0x8e708a70, 0x8e708170, - 0xb8fa1605, 0x807a817a, - 0x8e7a867a, 0x80707a70, - 0x8070ff70, 0x00000080, - 0xbef600ff, 0x01000000, - 0xbefc0080, 0xd28c0002, - 0x000100c1, 0xd28d0003, - 0x000204c1, 0x867aff78, - 0x00400000, 0xbf850003, - 0xb8faf803, 0x897a7aff, - 0x10000000, 0xbf850030, - 0x24040682, 0xd86e4000, - 0x00000002, 0xbf8cc07f, - 0xbe840080, 0xd2890000, - 0x00000900, 0x80048104, - 0xd2890001, 0x00000900, + 0x00000902, 0x80048104, + 0xd2890001, 0x00000902, 0x80048104, 0xd2890002, - 0x00000900, 0x80048104, - 0xd2890003, 0x00000900, + 0x00000902, 0x80048104, + 0xd2890003, 0x00000902, 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, 0xbe840080, - 0xd2890000, 0x00000901, + 0xd2890000, 0x00000903, 0x80048104, 0xd2890001, - 0x00000901, 0x80048104, - 0xd2890002, 0x00000901, + 0x00000903, 0x80048104, + 0xd2890002, 0x00000903, 0x80048104, 0xd2890003, - 0x00000901, 0x80048104, + 0x00000903, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0xbf820008, 0xe0724000, + 0x701d0000, 0xe0724100, + 0x701d0100, 0xe0724200, + 0x701d0200, 0xe0724300, + 0x701d0300, 0xbefe00c1, + 0xbeff00c1, 0xb8fb4306, + 0x867bc17b, 0xbf840064, + 0xbf8a0000, 0x867aff6f, + 0x04000000, 0xbf840060, + 0x8e7b867b, 0x8e7b827b, + 0xbef6007b, 0xb8f02a05, + 0x80708170, 0x8e708a70, + 0x8e708170, 0xb8fa1605, + 0x807a817a, 0x8e7a867a, + 0x80707a70, 0x8070ff70, + 0x00000080, 0xbef600ff, + 0x01000000, 0xbefc0080, + 0xd28c0002, 0x000100c1, + 0xd28d0003, 0x000204c1, + 0x867aff78, 0x00400000, + 0xbf850003, 0xb8faf803, + 0x897a7aff, 0x10000000, + 0xbf850030, 0x24040682, + 0xd86e4000, 0x00000002, + 0xbf8cc07f, 0xbe840080, + 0xd2890000, 0x00000900, + 0x80048104, 0xd2890001, + 0x00000900, 0x80048104, + 0xd2890002, 0x00000900, + 0x80048104, 0xd2890003, + 0x00000900, 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, - 0x680404ff, 0x00000200, + 0xbe840080, 0xd2890000, + 0x00000901, 0x80048104, + 0xd2890001, 0x00000901, + 0x80048104, 0xd2890002, + 0x00000901, 0x80048104, + 0xd2890003, 0x00000901, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0x680404ff, + 0x00000200, 0xd0c9006a, + 0x0000f702, 0xbf87ffd2, + 0xbf820015, 0xd1060002, + 0x00011103, 0x7e0602ff, + 0x00000200, 0xbefc00ff, + 0x00010000, 0xbe800077, + 0x8677ff77, 0xff7fffff, + 0x8777ff77, 0x00058000, + 0xd8ec0000, 0x00000002, + 0xbf8cc07f, 0xe0765000, + 0x701d0002, 0x68040702, 0xd0c9006a, 0x0000f702, - 0xbf87ffd2, 0xbf820015, - 0xd1060002, 0x00011103, - 0x7e0602ff, 0x00000200, - 0xbefc00ff, 0x00010000, - 0xbe800077, 0x8677ff77, - 0xff7fffff, 0x8777ff77, - 0x00058000, 0xd8ec0000, - 0x00000002, 0xbf8cc07f, - 0xe0765000, 0x701d0002, - 0x68040702, 0xd0c9006a, - 0x0000f702, 0xbf87fff7, - 0xbef70000, 0xbef000ff, - 0x00000400, 0xbefe00c1, - 0xbeff00c1, 0xb8fb2a05, - 0x807b817b, 0x8e7b827b, - 0x8e76887b, 0xbef600ff, + 0xbf87fff7, 0xbef70000, + 0xbef000ff, 0x00000400, + 0xbefe00c1, 0xbeff00c1, + 0xb8fb2a05, 0x807b817b, + 0x8e7b827b, 0xbef600ff, 0x01000000, 0xbefc0084, 0xbf0a7b7c, 0xbf84006d, 0xbf11017c, 0x807bff7b, @@ -1440,15 +1454,11 @@ static const uint32_t cwsr_trap_arcturus_hex[] = { 0x701d0300, 0x807c847c, 0x8070ff70, 0x00000400, 0xbf0a7b7c, 0xbf85ffeb, - 0xbf9c0000, 0xbf820106, + 0xbf9c0000, 0xbf8200e3, 0xbef4007e, 0x8675ff7f, 0x0000ffff, 0x8775ff75, 0x00040000, 0xbef60080, 0xbef700ff, 0x00807fac, - 0x866eff7f, 0x08000000, - 0x8f6e836e, 0x87776e77, - 0x866eff7f, 0x70000000, - 0x8f6e816e, 0x87776e77, 0x866eff7f, 0x04000000, 0xbf84001f, 0xbefe00c1, 0xbeff00c1, 0xb8ef4306, @@ -1466,26 +1476,14 @@ static const uint32_t cwsr_trap_arcturus_hex[] = { 0x807cff7c, 0x00000200, 0x8078ff78, 0x00000200, 0xbf0a6f7c, 0xbf85fff6, - 0xbef80080, 0xbefe00c1, - 0xbeff00c1, 0xb8ef2a05, - 0x806f816f, 0x8e6f826f, - 0x8e76886f, 0xbef90076, + 0xbefe00c1, 0xbeff00c1, 0xbef600ff, 0x01000000, + 0xb8ef2a05, 0x806f816f, + 0x8e6f826f, 0x806fff6f, + 0x00008000, 0xbef80080, 0xbeee0078, 0x8078ff78, - 0x00000400, 0xbef30079, - 0x8079ff79, 0x00000400, - 0xbefc0084, 0xbf11087c, - 0x806fff6f, 0x00008000, - 0xe0524000, 0x791d0000, - 0xe0524100, 0x791d0100, - 0xe0524200, 0x791d0200, - 0xe0524300, 0x791d0300, - 0x8079ff79, 0x00000400, - 0xbf8c0f70, 0xd3d94000, - 0x18000100, 0xd3d94001, - 0x18000101, 0xd3d94002, - 0x18000102, 0xd3d94003, - 0x18000103, 0xe0524000, + 0x00000400, 0xbefc0084, + 0xbf11087c, 0xe0524000, 0x781d0000, 0xe0524100, 0x781d0100, 0xe0524200, 0x781d0200, 0xe0524300, @@ -1494,20 +1492,24 @@ static const uint32_t cwsr_trap_arcturus_hex[] = { 0x7e040302, 0x7e060303, 0x807c847c, 0x8078ff78, 0x00000400, 0xbf0a6f7c, - 0xbf85ffdb, 0xbf9c0000, - 0xe0524000, 0x731d0000, - 0xe0524100, 0x731d0100, - 0xe0524200, 0x731d0200, - 0xe0524300, 0x731d0300, - 0xbf8c0f70, 0xd3d94000, - 0x18000100, 0xd3d94001, - 0x18000101, 0xd3d94002, - 0x18000102, 0xd3d94003, - 0x18000103, 0xe0524000, - 0x6e1d0000, 0xe0524100, - 0x6e1d0100, 0xe0524200, - 0x6e1d0200, 0xe0524300, - 0x6e1d0300, 0xb8f82a05, + 0xbf85ffee, 0xbefc0080, + 0xbf11087c, 0xe0524000, + 0x781d0000, 0xe0524100, + 0x781d0100, 0xe0524200, + 0x781d0200, 0xe0524300, + 0x781d0300, 0xbf8c0f70, + 0xd3d94000, 0x18000100, + 0xd3d94001, 0x18000101, + 0xd3d94002, 0x18000102, + 0xd3d94003, 0x18000103, + 0x807c847c, 0x8078ff78, + 0x00000400, 0xbf0a6f7c, + 0xbf85ffea, 0xbf9c0000, + 0xe0524000, 0x6e1d0000, + 0xe0524100, 0x6e1d0100, + 0xe0524200, 0x6e1d0200, + 0xe0524300, 0x6e1d0300, + 0xbf8c0f70, 0xb8f82a05, 0x80788178, 0x8e788a78, 0x8e788178, 0xb8ee1605, 0x806e816e, 0x8e6e866e, @@ -1559,268 +1561,225 @@ static const uint32_t cwsr_trap_arcturus_hex[] = { 0xc00b1c37, 0x00000050, 0xc00b1d37, 0x00000060, 0xc0031e77, 0x00000074, - 0xbf8cc07f, 0x866fff6d, - 0xf8000000, 0x8f6f9b6f, - 0x8e6f906f, 0xbeee0080, - 0x876e6f6e, 0x866fff6d, - 0x04000000, 0x8f6f9a6f, - 0x8e6f8f6f, 0x876e6f6e, - 0x866fff7a, 0x00800000, - 0x8f6f976f, 0xb96ef807, - 0x866dff6d, 0x0000ffff, - 0x86fe7e7e, 0x86ea6a6a, - 0x8f6e837a, 0xb96ee0c2, - 0xbf800002, 0xb97a0002, - 0xbf8a0000, 0x95806f6c, - 0xbf810000, 0x00000000, + 0xbf8cc07f, 0x8f6e8b77, + 0x866eff6e, 0x001f8000, + 0xb96ef807, 0x866dff6d, + 0x0000ffff, 0x86fe7e7e, + 0x86ea6a6a, 0x8f6e837a, + 0xb96ee0c2, 0xbf800002, + 0xb97a0002, 0xbf8a0000, + 0xbe801f6c, 0xbf810000, }; static const uint32_t cwsr_trap_aldebaran_hex[] = { - 0xbf820001, 0xbf8202ce, - 0xb8f8f802, 0x89788678, - 0xb8eef801, 0x866eff6e, - 0x00000800, 0xbf840003, + 0xbf820001, 0xbf8202db, + 0xb8f8f802, 0x8978ff78, + 0x00020006, 0xb8fbf803, 0x866eff78, 0x00002000, - 0xbf840016, 0xb8fbf803, + 0xbf840009, 0x866eff6d, + 0x00ff0000, 0xbf85001e, 0x866eff7b, 0x00000400, - 0xbf85003b, 0x866eff7b, - 0x00000800, 0xbf850003, - 0x866eff7b, 0x00000100, - 0xbf84000c, 0x866eff78, - 0x00002000, 0xbf840005, - 0xbf8e0010, 0xb8eef803, - 0x866eff6e, 0x00000400, - 0xbf84fffb, 0x8778ff78, - 0x00002000, 0x80ec886c, - 0x82ed806d, 0xb8eef807, - 0x866fff6e, 0x001f8000, - 0x8e6f8b6f, 0x8977ff77, - 0xfc000000, 0x87776f77, - 0x896eff6e, 0x001f8000, - 0xb96ef807, 0xb8faf812, + 0xbf850051, 0xbf8e0010, + 0xb8fbf803, 0xbf82fffa, + 0x866eff7b, 0x00000900, + 0xbf850015, 0x866eff7b, + 0x000071ff, 0xbf840008, + 0x866fff7b, 0x00007080, + 0xbf840001, 0xbeee1a87, + 0xb8eff801, 0x8e6e8c6e, + 0x866e6f6e, 0xbf85000a, + 0x866eff6d, 0x00ff0000, + 0xbf850007, 0xb8eef801, + 0x866eff6e, 0x00000800, + 0xbf850003, 0x866eff7b, + 0x00000400, 0xbf850036, + 0xb8faf807, 0x867aff7a, + 0x001f8000, 0x8e7a8b7a, + 0x8977ff77, 0xfc000000, + 0x87777a77, 0xba7ff807, + 0x00000000, 0xb8faf812, 0xb8fbf813, 0x8efa887a, - 0xc0071bbd, 0x00000000, - 0xbf8cc07f, 0xc0071ebd, - 0x00000008, 0xbf8cc07f, - 0x86ee6e6e, 0xbf840001, - 0xbe801d6e, 0xb8fbf803, - 0x867bff7b, 0x000001ff, + 0xc0031bbd, 0x00000010, + 0xbf8cc07f, 0x8e6e976e, + 0x8977ff77, 0x00800000, + 0x87776e77, 0xc0071bbd, + 0x00000000, 0xbf8cc07f, + 0xc0071ebd, 0x00000008, + 0xbf8cc07f, 0x86ee6e6e, + 0xbf840001, 0xbe801d6e, + 0x866eff6d, 0x01ff0000, + 0xbf850005, 0x8778ff78, + 0x00002000, 0x80ec886c, + 0x82ed806d, 0xbf820005, + 0x866eff6d, 0x01000000, 0xbf850002, 0x806c846c, 0x826d806d, 0x866dff6d, - 0x0000ffff, 0x8f6e8b77, - 0x866eff6e, 0x001f8000, - 0xb96ef807, 0x86fe7e7e, + 0x0000ffff, 0x8f7a8b77, + 0x867aff7a, 0x001f8000, + 0xb97af807, 0x86fe7e7e, 0x86ea6a6a, 0x8f6e8378, 0xb96ee0c2, 0xbf800002, 0xb9780002, 0xbe801f6c, 0x866dff6d, 0x0000ffff, 0xbefa0080, 0xb97a0283, - 0xb8fa2407, 0x8e7a9b7a, - 0x876d7a6d, 0xb8fa03c7, - 0x8e7a9a7a, 0x876d7a6d, 0xb8faf807, 0x867aff7a, - 0x00007fff, 0xb97af807, - 0xbeee007e, 0xbeef007f, - 0xbefe0180, 0xbf900004, - 0x877a8478, 0xb97af802, - 0xbf8e0002, 0xbf88fffe, - 0xb8fa2985, 0x807a817a, - 0x8e7a8a7a, 0x8e7a817a, - 0xb8fb1605, 0x807b817b, - 0x8e7b867b, 0x807a7b7a, - 0x807a7e7a, 0x827b807f, - 0x867bff7b, 0x0000ffff, - 0xc04b1c3d, 0x00000050, - 0xbf8cc07f, 0xc04b1d3d, - 0x00000060, 0xbf8cc07f, - 0xc0431e7d, 0x00000074, - 0xbf8cc07f, 0xbef4007e, - 0x8675ff7f, 0x0000ffff, - 0x8775ff75, 0x00040000, - 0xbef60080, 0xbef700ff, - 0x00807fac, 0x867aff7f, - 0x08000000, 0x8f7a837a, - 0x87777a77, 0x867aff7f, - 0x70000000, 0x8f7a817a, - 0x87777a77, 0xbef1007c, - 0xbef00080, 0xb8f02985, - 0x80708170, 0x8e708a70, - 0x8e708170, 0xb8fa1605, - 0x807a817a, 0x8e7a867a, - 0x80707a70, 0xbef60084, - 0xbef600ff, 0x01000000, - 0xbefe007c, 0xbefc0070, - 0xc0611c7a, 0x0000007c, - 0xbf8cc07f, 0x80708470, - 0xbefc007e, 0xbefe007c, - 0xbefc0070, 0xc0611b3a, + 0x001f8000, 0x8e7a8b7a, + 0x8977ff77, 0xfc000000, + 0x87777a77, 0xba7ff807, + 0x00000000, 0xbeee007e, + 0xbeef007f, 0xbefe0180, + 0xbf900004, 0x877a8478, + 0xb97af802, 0xbf8e0002, + 0xbf88fffe, 0xb8fa2985, + 0x807a817a, 0x8e7a8a7a, + 0x8e7a817a, 0xb8fb1605, + 0x807b817b, 0x8e7b867b, + 0x807a7b7a, 0x807a7e7a, + 0x827b807f, 0x867bff7b, + 0x0000ffff, 0xc04b1c3d, + 0x00000050, 0xbf8cc07f, + 0xc04b1d3d, 0x00000060, + 0xbf8cc07f, 0xc0431e7d, + 0x00000074, 0xbf8cc07f, + 0xbef4007e, 0x8675ff7f, + 0x0000ffff, 0x8775ff75, + 0x00040000, 0xbef60080, + 0xbef700ff, 0x00807fac, + 0xbef1007c, 0xbef00080, + 0xb8f02985, 0x80708170, + 0x8e708a70, 0x8e708170, + 0xb8fa1605, 0x807a817a, + 0x8e7a867a, 0x80707a70, + 0xbef60084, 0xbef600ff, + 0x01000000, 0xbefe007c, + 0xbefc0070, 0xc0611c7a, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, 0xbefc0070, - 0xc0611b7a, 0x0000007c, + 0xc0611b3a, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, - 0xbefc0070, 0xc0611bba, + 0xbefc0070, 0xc0611b7a, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, 0xbefc0070, - 0xc0611bfa, 0x0000007c, + 0xc0611bba, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, - 0xbefc0070, 0xc0611e3a, - 0x0000007c, 0xbf8cc07f, - 0x80708470, 0xbefc007e, - 0xb8fbf803, 0xbefe007c, - 0xbefc0070, 0xc0611efa, + 0xbefc0070, 0xc0611bfa, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, 0xbefc0070, - 0xc0611a3a, 0x0000007c, + 0xc0611e3a, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0xb8fbf803, + 0xbefe007c, 0xbefc0070, + 0xc0611efa, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, - 0xbefc0070, 0xc0611a7a, + 0xbefc0070, 0xc0611a3a, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, - 0xb8f1f801, 0xbefe007c, - 0xbefc0070, 0xc0611c7a, - 0x0000007c, 0xbf8cc07f, - 0x80708470, 0xbefc007e, - 0x867aff7f, 0x04000000, - 0xbeef0080, 0x876f6f7a, - 0xb8f02985, 0x80708170, - 0x8e708a70, 0x8e708170, - 0xb8fb1605, 0x807b817b, - 0x8e7b847b, 0x8e76827b, - 0xbef600ff, 0x01000000, - 0xbef20174, 0x80747074, - 0x82758075, 0xbefc0080, - 0xbf800000, 0xbe802b00, - 0xbe822b02, 0xbe842b04, - 0xbe862b06, 0xbe882b08, - 0xbe8a2b0a, 0xbe8c2b0c, - 0xbe8e2b0e, 0xc06b003a, - 0x00000000, 0xbf8cc07f, - 0xc06b013a, 0x00000010, - 0xbf8cc07f, 0xc06b023a, - 0x00000020, 0xbf8cc07f, - 0xc06b033a, 0x00000030, - 0xbf8cc07f, 0x8074c074, - 0x82758075, 0x807c907c, - 0xbf0a7b7c, 0xbf85ffe7, - 0xbef40172, 0xbef00080, - 0xbefe00c1, 0xbeff00c1, - 0xbee80080, 0xbee90080, - 0xbef600ff, 0x01000000, - 0x867aff78, 0x00400000, - 0xbf850003, 0xb8faf803, - 0x897a7aff, 0x10000000, - 0xbf85004d, 0xbe840080, - 0xd2890000, 0x00000900, - 0x80048104, 0xd2890001, + 0xbefe007c, 0xbefc0070, + 0xc0611a7a, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0xb8f1f801, + 0xbefe007c, 0xbefc0070, + 0xc0611c7a, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0x867aff7f, + 0x04000000, 0xbeef0080, + 0x876f6f7a, 0xb8f02985, + 0x80708170, 0x8e708a70, + 0x8e708170, 0xb8fb1605, + 0x807b817b, 0x8e7b847b, + 0x8e76827b, 0xbef600ff, + 0x01000000, 0xbef20174, + 0x80747074, 0x82758075, + 0xbefc0080, 0xbf800000, + 0xbe802b00, 0xbe822b02, + 0xbe842b04, 0xbe862b06, + 0xbe882b08, 0xbe8a2b0a, + 0xbe8c2b0c, 0xbe8e2b0e, + 0xc06b003a, 0x00000000, + 0xbf8cc07f, 0xc06b013a, + 0x00000010, 0xbf8cc07f, + 0xc06b023a, 0x00000020, + 0xbf8cc07f, 0xc06b033a, + 0x00000030, 0xbf8cc07f, + 0x8074c074, 0x82758075, + 0x807c907c, 0xbf0a7b7c, + 0xbf85ffe7, 0xbef40172, + 0xbef00080, 0xbefe00c1, + 0xbeff00c1, 0xbee80080, + 0xbee90080, 0xbef600ff, + 0x01000000, 0x867aff78, + 0x00400000, 0xbf850003, + 0xb8faf803, 0x897a7aff, + 0x10000000, 0xbf85004d, + 0xbe840080, 0xd2890000, 0x00000900, 0x80048104, - 0xd2890002, 0x00000900, - 0x80048104, 0xd2890003, + 0xd2890001, 0x00000900, + 0x80048104, 0xd2890002, 0x00000900, 0x80048104, - 0xc069003a, 0x00000070, - 0xbf8cc07f, 0x80709070, - 0xbf06c004, 0xbf84ffee, - 0xbe840080, 0xd2890000, - 0x00000901, 0x80048104, - 0xd2890001, 0x00000901, - 0x80048104, 0xd2890002, - 0x00000901, 0x80048104, - 0xd2890003, 0x00000901, + 0xd2890003, 0x00000900, 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, 0xbe840080, - 0xd2890000, 0x00000902, + 0xd2890000, 0x00000901, 0x80048104, 0xd2890001, - 0x00000902, 0x80048104, - 0xd2890002, 0x00000902, + 0x00000901, 0x80048104, + 0xd2890002, 0x00000901, 0x80048104, 0xd2890003, - 0x00000902, 0x80048104, + 0x00000901, 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, 0xbe840080, 0xd2890000, - 0x00000903, 0x80048104, - 0xd2890001, 0x00000903, - 0x80048104, 0xd2890002, - 0x00000903, 0x80048104, - 0xd2890003, 0x00000903, - 0x80048104, 0xc069003a, - 0x00000070, 0xbf8cc07f, - 0x80709070, 0xbf06c004, - 0xbf84ffee, 0xbf820008, - 0xe0724000, 0x701d0000, - 0xe0724100, 0x701d0100, - 0xe0724200, 0x701d0200, - 0xe0724300, 0x701d0300, - 0xbefe00c1, 0xbeff00c1, - 0xb8fb4306, 0x867bc17b, - 0xbf840064, 0xbf8a0000, - 0x867aff6f, 0x04000000, - 0xbf840060, 0x8e7b867b, - 0x8e7b827b, 0xbef6007b, - 0xb8f02985, 0x80708170, - 0x8e708a70, 0x8e708170, - 0xb8fa1605, 0x807a817a, - 0x8e7a867a, 0x80707a70, - 0x8070ff70, 0x00000080, - 0xbef600ff, 0x01000000, - 0xbefc0080, 0xd28c0002, - 0x000100c1, 0xd28d0003, - 0x000204c1, 0x867aff78, - 0x00400000, 0xbf850003, - 0xb8faf803, 0x897a7aff, - 0x10000000, 0xbf850030, - 0x24040682, 0xd86e4000, - 0x00000002, 0xbf8cc07f, - 0xbe840080, 0xd2890000, - 0x00000900, 0x80048104, - 0xd2890001, 0x00000900, + 0x00000902, 0x80048104, + 0xd2890001, 0x00000902, 0x80048104, 0xd2890002, - 0x00000900, 0x80048104, - 0xd2890003, 0x00000900, + 0x00000902, 0x80048104, + 0xd2890003, 0x00000902, 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, 0xbe840080, - 0xd2890000, 0x00000901, + 0xd2890000, 0x00000903, 0x80048104, 0xd2890001, - 0x00000901, 0x80048104, - 0xd2890002, 0x00000901, + 0x00000903, 0x80048104, + 0xd2890002, 0x00000903, 0x80048104, 0xd2890003, - 0x00000901, 0x80048104, + 0x00000903, 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, - 0x680404ff, 0x00000200, - 0xd0c9006a, 0x0000f702, - 0xbf87ffd2, 0xbf820015, - 0xd1060002, 0x00011103, - 0x7e0602ff, 0x00000200, - 0xbefc00ff, 0x00010000, - 0xbe800077, 0x8677ff77, - 0xff7fffff, 0x8777ff77, - 0x00058000, 0xd8ec0000, - 0x00000002, 0xbf8cc07f, - 0xe0765000, 0x701d0002, - 0x68040702, 0xd0c9006a, - 0x0000f702, 0xbf87fff7, - 0xbef70000, 0xbef000ff, - 0x00000400, 0xbefe00c1, - 0xbeff00c1, 0xb8fb2b05, - 0x807b817b, 0x8e7b827b, - 0xbef600ff, 0x01000000, - 0xbefc0084, 0xbf0a7b7c, - 0xbf84006d, 0xbf11017c, - 0x807bff7b, 0x00001000, + 0xbf820008, 0xe0724000, + 0x701d0000, 0xe0724100, + 0x701d0100, 0xe0724200, + 0x701d0200, 0xe0724300, + 0x701d0300, 0xbefe00c1, + 0xbeff00c1, 0xb8fb4306, + 0x867bc17b, 0xbf840064, + 0xbf8a0000, 0x867aff6f, + 0x04000000, 0xbf840060, + 0x8e7b867b, 0x8e7b827b, + 0xbef6007b, 0xb8f02985, + 0x80708170, 0x8e708a70, + 0x8e708170, 0xb8fa1605, + 0x807a817a, 0x8e7a867a, + 0x80707a70, 0x8070ff70, + 0x00000080, 0xbef600ff, + 0x01000000, 0xbefc0080, + 0xd28c0002, 0x000100c1, + 0xd28d0003, 0x000204c1, 0x867aff78, 0x00400000, 0xbf850003, 0xb8faf803, 0x897a7aff, 0x10000000, - 0xbf850051, 0xbe840080, + 0xbf850030, 0x24040682, + 0xd86e4000, 0x00000002, + 0xbf8cc07f, 0xbe840080, 0xd2890000, 0x00000900, 0x80048104, 0xd2890001, 0x00000900, 0x80048104, @@ -1839,51 +1798,31 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = { 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, - 0xbf84ffee, 0xbe840080, - 0xd2890000, 0x00000902, - 0x80048104, 0xd2890001, - 0x00000902, 0x80048104, - 0xd2890002, 0x00000902, - 0x80048104, 0xd2890003, - 0x00000902, 0x80048104, - 0xc069003a, 0x00000070, - 0xbf8cc07f, 0x80709070, - 0xbf06c004, 0xbf84ffee, - 0xbe840080, 0xd2890000, - 0x00000903, 0x80048104, - 0xd2890001, 0x00000903, - 0x80048104, 0xd2890002, - 0x00000903, 0x80048104, - 0xd2890003, 0x00000903, - 0x80048104, 0xc069003a, - 0x00000070, 0xbf8cc07f, - 0x80709070, 0xbf06c004, - 0xbf84ffee, 0x807c847c, - 0xbf0a7b7c, 0xbf85ffb1, - 0xbf9c0000, 0xbf820012, - 0x7e000300, 0x7e020301, - 0x7e040302, 0x7e060303, - 0xe0724000, 0x701d0000, - 0xe0724100, 0x701d0100, - 0xe0724200, 0x701d0200, - 0xe0724300, 0x701d0300, - 0x807c847c, 0x8070ff70, - 0x00000400, 0xbf0a7b7c, - 0xbf85ffef, 0xbf9c0000, - 0xb8fb2985, 0x807b817b, - 0x8e7b837b, 0xb8fa2b05, - 0x807a817a, 0x8e7a827a, - 0x80fb7a7b, 0x867b7b7b, - 0xbf84007a, 0x807bff7b, - 0x00001000, 0xbefc0080, - 0xbf11017c, 0x867aff78, + 0xbf84ffee, 0x680404ff, + 0x00000200, 0xd0c9006a, + 0x0000f702, 0xbf87ffd2, + 0xbf820015, 0xd1060002, + 0x00011103, 0x7e0602ff, + 0x00000200, 0xbefc00ff, + 0x00010000, 0xbe800077, + 0x8677ff77, 0xff7fffff, + 0x8777ff77, 0x00058000, + 0xd8ec0000, 0x00000002, + 0xbf8cc07f, 0xe0765000, + 0x701d0002, 0x68040702, + 0xd0c9006a, 0x0000f702, + 0xbf87fff7, 0xbef70000, + 0xbef000ff, 0x00000400, + 0xbefe00c1, 0xbeff00c1, + 0xb8fb2b05, 0x807b817b, + 0x8e7b827b, 0xbef600ff, + 0x01000000, 0xbefc0084, + 0xbf0a7b7c, 0xbf84006d, + 0xbf11017c, 0x807bff7b, + 0x00001000, 0x867aff78, 0x00400000, 0xbf850003, 0xb8faf803, 0x897a7aff, - 0x10000000, 0xbf850059, - 0xd3d84000, 0x18000100, - 0xd3d84001, 0x18000101, - 0xd3d84002, 0x18000102, - 0xd3d84003, 0x18000103, + 0x10000000, 0xbf850051, 0xbe840080, 0xd2890000, 0x00000900, 0x80048104, 0xd2890001, 0x00000900, @@ -1923,169 +1862,241 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = { 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, 0x807c847c, 0xbf0a7b7c, - 0xbf85ffa9, 0xbf9c0000, - 0xbf820016, 0xd3d84000, - 0x18000100, 0xd3d84001, - 0x18000101, 0xd3d84002, - 0x18000102, 0xd3d84003, - 0x18000103, 0xe0724000, + 0xbf85ffb1, 0xbf9c0000, + 0xbf820012, 0x7e000300, + 0x7e020301, 0x7e040302, + 0x7e060303, 0xe0724000, 0x701d0000, 0xe0724100, 0x701d0100, 0xe0724200, 0x701d0200, 0xe0724300, 0x701d0300, 0x807c847c, 0x8070ff70, 0x00000400, - 0xbf0a7b7c, 0xbf85ffeb, - 0xbf9c0000, 0xbf820101, - 0xbef4007e, 0x8675ff7f, - 0x0000ffff, 0x8775ff75, - 0x00040000, 0xbef60080, - 0xbef700ff, 0x00807fac, - 0x866eff7f, 0x08000000, - 0x8f6e836e, 0x87776e77, - 0x866eff7f, 0x70000000, - 0x8f6e816e, 0x87776e77, - 0x866eff7f, 0x04000000, - 0xbf84001f, 0xbefe00c1, - 0xbeff00c1, 0xb8ef4306, - 0x866fc16f, 0xbf84001a, - 0x8e6f866f, 0x8e6f826f, - 0xbef6006f, 0xb8f82985, - 0x80788178, 0x8e788a78, - 0x8e788178, 0xb8ee1605, - 0x806e816e, 0x8e6e866e, - 0x80786e78, 0x8078ff78, - 0x00000080, 0xbef600ff, - 0x01000000, 0xbefc0080, - 0xe0510000, 0x781d0000, - 0xe0510100, 0x781d0000, - 0x807cff7c, 0x00000200, - 0x8078ff78, 0x00000200, - 0xbf0a6f7c, 0xbf85fff6, + 0xbf0a7b7c, 0xbf85ffef, + 0xbf9c0000, 0xb8fb2985, + 0x807b817b, 0x8e7b837b, + 0xb8fa2b05, 0x807a817a, + 0x8e7a827a, 0x80fb7a7b, + 0x867b7b7b, 0xbf84007a, + 0x807bff7b, 0x00001000, + 0xbefc0080, 0xbf11017c, + 0x867aff78, 0x00400000, + 0xbf850003, 0xb8faf803, + 0x897a7aff, 0x10000000, + 0xbf850059, 0xd3d84000, + 0x18000100, 0xd3d84001, + 0x18000101, 0xd3d84002, + 0x18000102, 0xd3d84003, + 0x18000103, 0xbe840080, + 0xd2890000, 0x00000900, + 0x80048104, 0xd2890001, + 0x00000900, 0x80048104, + 0xd2890002, 0x00000900, + 0x80048104, 0xd2890003, + 0x00000900, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0xbe840080, 0xd2890000, + 0x00000901, 0x80048104, + 0xd2890001, 0x00000901, + 0x80048104, 0xd2890002, + 0x00000901, 0x80048104, + 0xd2890003, 0x00000901, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0xbe840080, + 0xd2890000, 0x00000902, + 0x80048104, 0xd2890001, + 0x00000902, 0x80048104, + 0xd2890002, 0x00000902, + 0x80048104, 0xd2890003, + 0x00000902, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0xbe840080, 0xd2890000, + 0x00000903, 0x80048104, + 0xd2890001, 0x00000903, + 0x80048104, 0xd2890002, + 0x00000903, 0x80048104, + 0xd2890003, 0x00000903, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0x807c847c, + 0xbf0a7b7c, 0xbf85ffa9, + 0xbf9c0000, 0xbf820016, + 0xd3d84000, 0x18000100, + 0xd3d84001, 0x18000101, + 0xd3d84002, 0x18000102, + 0xd3d84003, 0x18000103, + 0xe0724000, 0x701d0000, + 0xe0724100, 0x701d0100, + 0xe0724200, 0x701d0200, + 0xe0724300, 0x701d0300, + 0x807c847c, 0x8070ff70, + 0x00000400, 0xbf0a7b7c, + 0xbf85ffeb, 0xbf9c0000, + 0xbf8200ee, 0xbef4007e, + 0x8675ff7f, 0x0000ffff, + 0x8775ff75, 0x00040000, + 0xbef60080, 0xbef700ff, + 0x00807fac, 0x866eff7f, + 0x04000000, 0xbf84001f, 0xbefe00c1, 0xbeff00c1, + 0xb8ef4306, 0x866fc16f, + 0xbf84001a, 0x8e6f866f, + 0x8e6f826f, 0xbef6006f, + 0xb8f82985, 0x80788178, + 0x8e788a78, 0x8e788178, + 0xb8ee1605, 0x806e816e, + 0x8e6e866e, 0x80786e78, + 0x8078ff78, 0x00000080, 0xbef600ff, 0x01000000, - 0xb8ef2b05, 0x806f816f, - 0x8e6f826f, 0x806fff6f, - 0x00008000, 0xbef80080, - 0xbeee0078, 0x8078ff78, - 0x00000400, 0xbefc0084, + 0xbefc0080, 0xe0510000, + 0x781d0000, 0xe0510100, + 0x781d0000, 0x807cff7c, + 0x00000200, 0x8078ff78, + 0x00000200, 0xbf0a6f7c, + 0xbf85fff6, 0xbefe00c1, + 0xbeff00c1, 0xbef600ff, + 0x01000000, 0xb8ef2b05, + 0x806f816f, 0x8e6f826f, + 0x806fff6f, 0x00008000, + 0xbef80080, 0xbeee0078, + 0x8078ff78, 0x00000400, + 0xbefc0084, 0xbf11087c, + 0xe0524000, 0x781d0000, + 0xe0524100, 0x781d0100, + 0xe0524200, 0x781d0200, + 0xe0524300, 0x781d0300, + 0xbf8c0f70, 0x7e000300, + 0x7e020301, 0x7e040302, + 0x7e060303, 0x807c847c, + 0x8078ff78, 0x00000400, + 0xbf0a6f7c, 0xbf85ffee, + 0xb8ef2985, 0x806f816f, + 0x8e6f836f, 0xb8f92b05, + 0x80798179, 0x8e798279, + 0x80ef796f, 0x866f6f6f, + 0xbf84001a, 0x806fff6f, + 0x00008000, 0xbefc0080, 0xbf11087c, 0xe0524000, 0x781d0000, 0xe0524100, 0x781d0100, 0xe0524200, 0x781d0200, 0xe0524300, 0x781d0300, 0xbf8c0f70, - 0x7e000300, 0x7e020301, - 0x7e040302, 0x7e060303, + 0xd3d94000, 0x18000100, + 0xd3d94001, 0x18000101, + 0xd3d94002, 0x18000102, + 0xd3d94003, 0x18000103, 0x807c847c, 0x8078ff78, 0x00000400, 0xbf0a6f7c, - 0xbf85ffee, 0xb8ef2985, - 0x806f816f, 0x8e6f836f, - 0xb8f92b05, 0x80798179, - 0x8e798279, 0x80ef796f, - 0x866f6f6f, 0xbf84001a, - 0x806fff6f, 0x00008000, - 0xbefc0080, 0xbf11087c, - 0xe0524000, 0x781d0000, - 0xe0524100, 0x781d0100, - 0xe0524200, 0x781d0200, - 0xe0524300, 0x781d0300, - 0xbf8c0f70, 0xd3d94000, - 0x18000100, 0xd3d94001, - 0x18000101, 0xd3d94002, - 0x18000102, 0xd3d94003, - 0x18000103, 0x807c847c, - 0x8078ff78, 0x00000400, - 0xbf0a6f7c, 0xbf85ffea, - 0xbf9c0000, 0xe0524000, - 0x6e1d0000, 0xe0524100, - 0x6e1d0100, 0xe0524200, - 0x6e1d0200, 0xe0524300, - 0x6e1d0300, 0xbf8c0f70, - 0xb8f82985, 0x80788178, - 0x8e788a78, 0x8e788178, - 0xb8ee1605, 0x806e816e, - 0x8e6e866e, 0x80786e78, - 0x80f8c078, 0xb8ef1605, - 0x806f816f, 0x8e6f846f, - 0x8e76826f, 0xbef600ff, - 0x01000000, 0xbefc006f, - 0xc031003a, 0x00000078, - 0x80f8c078, 0xbf8cc07f, - 0x80fc907c, 0xbf800000, - 0xbe802d00, 0xbe822d02, - 0xbe842d04, 0xbe862d06, - 0xbe882d08, 0xbe8a2d0a, - 0xbe8c2d0c, 0xbe8e2d0e, - 0xbf06807c, 0xbf84fff0, - 0xb8f82985, 0x80788178, - 0x8e788a78, 0x8e788178, - 0xb8ee1605, 0x806e816e, - 0x8e6e866e, 0x80786e78, - 0xbef60084, 0xbef600ff, - 0x01000000, 0xc0211bfa, + 0xbf85ffea, 0xbf9c0000, + 0xe0524000, 0x6e1d0000, + 0xe0524100, 0x6e1d0100, + 0xe0524200, 0x6e1d0200, + 0xe0524300, 0x6e1d0300, + 0xbf8c0f70, 0xb8f82985, + 0x80788178, 0x8e788a78, + 0x8e788178, 0xb8ee1605, + 0x806e816e, 0x8e6e866e, + 0x80786e78, 0x80f8c078, + 0xb8ef1605, 0x806f816f, + 0x8e6f846f, 0x8e76826f, + 0xbef600ff, 0x01000000, + 0xbefc006f, 0xc031003a, + 0x00000078, 0x80f8c078, + 0xbf8cc07f, 0x80fc907c, + 0xbf800000, 0xbe802d00, + 0xbe822d02, 0xbe842d04, + 0xbe862d06, 0xbe882d08, + 0xbe8a2d0a, 0xbe8c2d0c, + 0xbe8e2d0e, 0xbf06807c, + 0xbf84fff0, 0xb8f82985, + 0x80788178, 0x8e788a78, + 0x8e788178, 0xb8ee1605, + 0x806e816e, 0x8e6e866e, + 0x80786e78, 0xbef60084, + 0xbef600ff, 0x01000000, + 0xc0211bfa, 0x00000078, + 0x80788478, 0xc0211b3a, 0x00000078, 0x80788478, - 0xc0211b3a, 0x00000078, - 0x80788478, 0xc0211b7a, + 0xc0211b7a, 0x00000078, + 0x80788478, 0xc0211c3a, 0x00000078, 0x80788478, - 0xc0211c3a, 0x00000078, - 0x80788478, 0xc0211c7a, + 0xc0211c7a, 0x00000078, + 0x80788478, 0xc0211eba, 0x00000078, 0x80788478, - 0xc0211eba, 0x00000078, - 0x80788478, 0xc0211efa, + 0xc0211efa, 0x00000078, + 0x80788478, 0xc0211a3a, 0x00000078, 0x80788478, - 0xc0211a3a, 0x00000078, - 0x80788478, 0xc0211a7a, + 0xc0211a7a, 0x00000078, + 0x80788478, 0xc0211cfa, 0x00000078, 0x80788478, - 0xc0211cfa, 0x00000078, - 0x80788478, 0xbf8cc07f, - 0xbefc006f, 0xbefe0070, - 0xbeff0071, 0x866f7bff, - 0x000003ff, 0xb96f4803, - 0x866f7bff, 0xfffff800, - 0x8f6f8b6f, 0xb96fa2c3, - 0xb973f801, 0xb8ee2985, - 0x806e816e, 0x8e6e8a6e, - 0x8e6e816e, 0xb8ef1605, - 0x806f816f, 0x8e6f866f, - 0x806e6f6e, 0x806e746e, - 0x826f8075, 0x866fff6f, - 0x0000ffff, 0xc00b1c37, - 0x00000050, 0xc00b1d37, - 0x00000060, 0xc0031e77, - 0x00000074, 0xbf8cc07f, - 0x866fff6d, 0xf8000000, - 0x8f6f9b6f, 0x8e6f906f, - 0xbeee0080, 0x876e6f6e, - 0x866fff6d, 0x04000000, - 0x8f6f9a6f, 0x8e6f8f6f, - 0x876e6f6e, 0x866fff7a, - 0x00800000, 0x8f6f976f, + 0xbf8cc07f, 0xbefc006f, + 0xbefe0070, 0xbeff0071, + 0x866f7bff, 0x000003ff, + 0xb96f4803, 0x866f7bff, + 0xfffff800, 0x8f6f8b6f, + 0xb96fa2c3, 0xb973f801, + 0xb8ee2985, 0x806e816e, + 0x8e6e8a6e, 0x8e6e816e, + 0xb8ef1605, 0x806f816f, + 0x8e6f866f, 0x806e6f6e, + 0x806e746e, 0x826f8075, + 0x866fff6f, 0x0000ffff, + 0xc00b1c37, 0x00000050, + 0xc00b1d37, 0x00000060, + 0xc0031e77, 0x00000074, + 0xbf8cc07f, 0x8f6e8b77, + 0x866eff6e, 0x001f8000, 0xb96ef807, 0x866dff6d, 0x0000ffff, 0x86fe7e7e, 0x86ea6a6a, 0x8f6e837a, 0xb96ee0c2, 0xbf800002, 0xb97a0002, 0xbf8a0000, - 0x95806f6c, 0xbf810000, + 0xbe801f6c, 0xbf810000, }; static const uint32_t cwsr_trap_gfx10_hex[] = { - 0xbf820001, 0xbf8201cf, + 0xbf820001, 0xbf82021c, 0xb0804004, 0xb978f802, - 0x8a788678, 0xb96ef801, - 0x876eff6e, 0x00000800, - 0xbf840003, 0x876eff78, + 0x8a78ff78, 0x00020006, + 0xb97bf803, 0x876eff78, 0x00002000, 0xbf840009, - 0xb97bf803, 0x876eff7b, - 0x00000400, 0xbf85001d, - 0x876eff7b, 0x00000100, - 0xbf840002, 0x8878ff78, - 0x00002000, 0xb97af812, + 0x876eff6d, 0x00ff0000, + 0xbf85001e, 0x876eff7b, + 0x00000400, 0xbf850041, + 0xbf8e0010, 0xb97bf803, + 0xbf82fffa, 0x876eff7b, + 0x00000900, 0xbf850015, + 0x876eff7b, 0x000071ff, + 0xbf840008, 0x876fff7b, + 0x00007080, 0xbf840001, + 0xbeee1d87, 0xb96ff801, + 0x8f6e8c6e, 0x876e6f6e, + 0xbf85000a, 0x876eff6d, + 0x00ff0000, 0xbf850007, + 0xb96ef801, 0x876eff6e, + 0x00000800, 0xbf850003, + 0x876eff7b, 0x00000400, + 0xbf850026, 0xb97af812, 0xb97bf813, 0x8ffa887a, - 0xf4051bbd, 0xfa000000, - 0xbf8cc07f, 0xf4051ebd, - 0xfa000008, 0xbf8cc07f, - 0x87ee6e6e, 0xbf840001, - 0xbe80206e, 0xb97bf803, - 0x877bff7b, 0x000001ff, + 0xf4011bbd, 0xfa000010, + 0xbf8cc07f, 0x8f6e976e, + 0x8a77ff77, 0x00800000, + 0x88776e77, 0xf4051bbd, + 0xfa000000, 0xbf8cc07f, + 0xf4051ebd, 0xfa000008, + 0xbf8cc07f, 0x87ee6e6e, + 0xbf840001, 0xbe80206e, + 0x876eff6d, 0x01ff0000, + 0xbf850005, 0x8878ff78, + 0x00002000, 0x80ec886c, + 0x82ed806d, 0xbf820005, + 0x876eff6d, 0x01000000, 0xbf850002, 0x806c846c, 0x826d806d, 0x876dff6d, 0x0000ffff, 0x87fe7e7e, @@ -2095,37 +2106,55 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { 0xb9fa0283, 0xbeee037e, 0xbeef037f, 0xbefe0480, 0xbf900004, 0xbf8cc07f, + 0x877aff7f, 0x04000000, + 0x8f7a857a, 0x886d7a6d, + 0xbefa037e, 0x877bff7f, + 0x0000ffff, 0xbefe03c1, + 0xbeff03c1, 0xdc5f8000, + 0x007a0000, 0x7e000280, + 0xbefe037a, 0xbeff037b, 0xb97b02dc, 0x8f7b997b, - 0x887b7b7f, 0xb97a2a05, - 0x807a817a, 0xbf0d997b, - 0xbf850002, 0x8f7a897a, - 0xbf820001, 0x8f7a8a7a, + 0xb97a2a05, 0x807a817a, + 0xbf0d997b, 0xbf850002, + 0x8f7a897a, 0xbf820001, + 0x8f7a8a7a, 0xb97b1e06, + 0x8f7b8a7b, 0x807a7b7a, 0x877bff7f, 0x0000ffff, 0x807aff7a, 0x00000200, 0x807a7e7a, 0x827b807b, - 0xbef4037e, 0x8775ff7f, - 0x0000ffff, 0x8875ff75, - 0x00040000, 0xbef60380, - 0xbef703ff, 0x10807fac, - 0x877aff7f, 0x08000000, - 0x907a837a, 0x88777a77, - 0x877aff7f, 0x70000000, - 0x907a817a, 0x88777a77, - 0xbef1037c, 0xbef00380, - 0xb97302dc, 0x8f739973, - 0x8873737f, 0xbefe03c1, + 0xd7610000, 0x00010870, + 0xd7610000, 0x00010a71, + 0xd7610000, 0x00010c72, + 0xd7610000, 0x00010e73, + 0xd7610000, 0x00011074, + 0xd7610000, 0x00011275, + 0xd7610000, 0x00011476, + 0xd7610000, 0x00011677, + 0xd7610000, 0x00011a79, + 0xd7610000, 0x00011c7e, + 0xd7610000, 0x00011e7f, + 0xbefe03ff, 0x00003fff, + 0xbeff0380, 0xdc5f8040, + 0x007a0000, 0xd760007a, + 0x00011d00, 0xd760007b, + 0x00011f00, 0xbefe037a, + 0xbeff037b, 0xbef4037e, + 0x8775ff7f, 0x0000ffff, + 0x8875ff75, 0x00040000, + 0xbef60380, 0xbef703ff, + 0x10807fac, 0xbef1037c, + 0xbef00380, 0xb97302dc, + 0x8f739973, 0xbefe03c1, 0x907c9973, 0x877c817c, 0xbf06817c, 0xbf850002, 0xbeff0380, 0xbf820002, - 0xbeff03c1, 0xbf82000b, + 0xbeff03c1, 0xbf820009, 0xbef603ff, 0x01000000, - 0xe0704000, 0x705d0000, 0xe0704080, 0x705d0100, 0xe0704100, 0x705d0200, 0xe0704180, 0x705d0300, - 0xbf82000a, 0xbef603ff, - 0x01000000, 0xe0704000, - 0x705d0000, 0xe0704100, + 0xbf820008, 0xbef603ff, + 0x01000000, 0xe0704100, 0x705d0100, 0xe0704200, 0x705d0200, 0xe0704300, 0x705d0300, 0xb9702a05, @@ -2140,8 +2169,9 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { 0xbefc0380, 0xd7610002, 0x0000f871, 0x807c817c, 0xd7610002, 0x0000f86c, - 0x807c817c, 0xd7610002, - 0x0000f86d, 0x807c817c, + 0x807c817c, 0x8a7aff6d, + 0x80000000, 0xd7610002, + 0x0000f87a, 0x807c817c, 0xd7610002, 0x0000f86e, 0x807c817c, 0xd7610002, 0x0000f86f, 0x807c817c, @@ -2156,160 +2186,157 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { 0x0000f871, 0x807c817c, 0xb971f815, 0xd7610002, 0x0000f871, 0x807c817c, + 0xbefe03ff, 0x0000ffff, 0xbeff0380, 0xe0704000, - 0x705d0200, 0xb9702a05, - 0x80708170, 0xbf0d9973, - 0xbf850002, 0x8f708970, - 0xbf820001, 0x8f708a70, - 0xb97a1e06, 0x8f7a8a7a, - 0x80707a70, 0xbef603ff, - 0x01000000, 0xbef90380, - 0xbefc0380, 0xbf800000, - 0xbe802f00, 0xbe822f02, - 0xbe842f04, 0xbe862f06, - 0xbe882f08, 0xbe8a2f0a, - 0xbe8c2f0c, 0xbe8e2f0e, - 0xd7610002, 0x0000f200, - 0x80798179, 0xd7610002, - 0x0000f201, 0x80798179, - 0xd7610002, 0x0000f202, - 0x80798179, 0xd7610002, - 0x0000f203, 0x80798179, - 0xd7610002, 0x0000f204, + 0x705d0200, 0xbefe03c1, + 0xb9702a05, 0x80708170, + 0xbf0d9973, 0xbf850002, + 0x8f708970, 0xbf820001, + 0x8f708a70, 0xb97a1e06, + 0x8f7a8a7a, 0x80707a70, + 0xbef603ff, 0x01000000, + 0xbef90380, 0xbefc0380, + 0xbf800000, 0xbe802f00, + 0xbe822f02, 0xbe842f04, + 0xbe862f06, 0xbe882f08, + 0xbe8a2f0a, 0xbe8c2f0c, + 0xbe8e2f0e, 0xd7610002, + 0x0000f200, 0x80798179, + 0xd7610002, 0x0000f201, 0x80798179, 0xd7610002, - 0x0000f205, 0x80798179, - 0xd7610002, 0x0000f206, + 0x0000f202, 0x80798179, + 0xd7610002, 0x0000f203, 0x80798179, 0xd7610002, - 0x0000f207, 0x80798179, - 0xd7610002, 0x0000f208, + 0x0000f204, 0x80798179, + 0xd7610002, 0x0000f205, 0x80798179, 0xd7610002, - 0x0000f209, 0x80798179, - 0xd7610002, 0x0000f20a, + 0x0000f206, 0x80798179, + 0xd7610002, 0x0000f207, 0x80798179, 0xd7610002, - 0x0000f20b, 0x80798179, - 0xd7610002, 0x0000f20c, + 0x0000f208, 0x80798179, + 0xd7610002, 0x0000f209, 0x80798179, 0xd7610002, - 0x0000f20d, 0x80798179, - 0xd7610002, 0x0000f20e, + 0x0000f20a, 0x80798179, + 0xd7610002, 0x0000f20b, 0x80798179, 0xd7610002, - 0x0000f20f, 0x80798179, - 0xbf06a079, 0xbf840006, - 0xe0704000, 0x705d0200, - 0x8070ff70, 0x00000080, - 0xbef90380, 0x7e040280, - 0x807c907c, 0xbf0aff7c, - 0x00000060, 0xbf85ffbc, - 0xbe802f00, 0xbe822f02, - 0xbe842f04, 0xbe862f06, - 0xbe882f08, 0xbe8a2f0a, - 0xd7610002, 0x0000f200, + 0x0000f20c, 0x80798179, + 0xd7610002, 0x0000f20d, 0x80798179, 0xd7610002, - 0x0000f201, 0x80798179, - 0xd7610002, 0x0000f202, + 0x0000f20e, 0x80798179, + 0xd7610002, 0x0000f20f, + 0x80798179, 0xbf06a079, + 0xbf840006, 0xe0704000, + 0x705d0200, 0x8070ff70, + 0x00000080, 0xbef90380, + 0x7e040280, 0x807c907c, + 0xbf0aff7c, 0x00000060, + 0xbf85ffbc, 0xbe802f00, + 0xbe822f02, 0xbe842f04, + 0xbe862f06, 0xbe882f08, + 0xbe8a2f0a, 0xd7610002, + 0x0000f200, 0x80798179, + 0xd7610002, 0x0000f201, 0x80798179, 0xd7610002, - 0x0000f203, 0x80798179, - 0xd7610002, 0x0000f204, + 0x0000f202, 0x80798179, + 0xd7610002, 0x0000f203, 0x80798179, 0xd7610002, - 0x0000f205, 0x80798179, - 0xd7610002, 0x0000f206, + 0x0000f204, 0x80798179, + 0xd7610002, 0x0000f205, 0x80798179, 0xd7610002, - 0x0000f207, 0x80798179, - 0xd7610002, 0x0000f208, + 0x0000f206, 0x80798179, + 0xd7610002, 0x0000f207, 0x80798179, 0xd7610002, - 0x0000f209, 0x80798179, - 0xd7610002, 0x0000f20a, + 0x0000f208, 0x80798179, + 0xd7610002, 0x0000f209, 0x80798179, 0xd7610002, - 0x0000f20b, 0x80798179, - 0xe0704000, 0x705d0200, + 0x0000f20a, 0x80798179, + 0xd7610002, 0x0000f20b, + 0x80798179, 0xe0704000, + 0x705d0200, 0xbefe03c1, + 0x907c9973, 0x877c817c, + 0xbf06817c, 0xbf850002, + 0xbeff0380, 0xbf820001, + 0xbeff03c1, 0xb97b4306, + 0x877bc17b, 0xbf840044, + 0xbf8a0000, 0x877aff6d, + 0x80000000, 0xbf840040, + 0x8f7b867b, 0x8f7b827b, + 0xbef6037b, 0xb9702a05, + 0x80708170, 0xbf0d9973, + 0xbf850002, 0x8f708970, + 0xbf820001, 0x8f708a70, + 0xb97a1e06, 0x8f7a8a7a, + 0x80707a70, 0x8070ff70, + 0x00000200, 0x8070ff70, + 0x00000080, 0xbef603ff, + 0x01000000, 0xd7650000, + 0x000100c1, 0xd7660000, + 0x000200c1, 0x16000084, + 0x907c9973, 0x877c817c, + 0xbf06817c, 0xbefc0380, + 0xbf850012, 0xbe8303ff, + 0x00000080, 0xbf800000, + 0xbf800000, 0xbf800000, + 0xd8d80000, 0x01000000, + 0xbf8c0000, 0xe0704000, + 0x705d0100, 0x807c037c, + 0x80700370, 0xd5250000, + 0x0001ff00, 0x00000080, + 0xbf0a7b7c, 0xbf85fff4, + 0xbf820011, 0xbe8303ff, + 0x00000100, 0xbf800000, + 0xbf800000, 0xbf800000, + 0xd8d80000, 0x01000000, + 0xbf8c0000, 0xe0704000, + 0x705d0100, 0x807c037c, + 0x80700370, 0xd5250000, + 0x0001ff00, 0x00000100, + 0xbf0a7b7c, 0xbf85fff4, 0xbefe03c1, 0x907c9973, 0x877c817c, 0xbf06817c, - 0xbf850002, 0xbeff0380, - 0xbf820001, 0xbeff03c1, - 0xb97b4306, 0x877bc17b, - 0xbf840044, 0xbf8a0000, - 0x877aff73, 0x04000000, - 0xbf840040, 0x8f7b867b, - 0x8f7b827b, 0xbef6037b, - 0xb9702a05, 0x80708170, - 0xbf0d9973, 0xbf850002, - 0x8f708970, 0xbf820001, - 0x8f708a70, 0xb97a1e06, - 0x8f7a8a7a, 0x80707a70, - 0x8070ff70, 0x00000200, - 0x8070ff70, 0x00000080, - 0xbef603ff, 0x01000000, - 0xd7650000, 0x000100c1, - 0xd7660000, 0x000200c1, - 0x16000084, 0x907c9973, + 0xbf850004, 0xbef003ff, + 0x00000200, 0xbeff0380, + 0xbf820003, 0xbef003ff, + 0x00000400, 0xbeff03c1, + 0xb97b2a05, 0x807b817b, + 0x8f7b827b, 0x907c9973, 0x877c817c, 0xbf06817c, - 0xbefc0380, 0xbf850012, - 0xbe8303ff, 0x00000080, - 0xbf800000, 0xbf800000, - 0xbf800000, 0xd8d80000, - 0x01000000, 0xbf8c0000, - 0xe0704000, 0x705d0100, - 0x807c037c, 0x80700370, - 0xd5250000, 0x0001ff00, - 0x00000080, 0xbf0a7b7c, - 0xbf85fff4, 0xbf820011, - 0xbe8303ff, 0x00000100, - 0xbf800000, 0xbf800000, - 0xbf800000, 0xd8d80000, - 0x01000000, 0xbf8c0000, - 0xe0704000, 0x705d0100, - 0x807c037c, 0x80700370, - 0xd5250000, 0x0001ff00, - 0x00000100, 0xbf0a7b7c, - 0xbf85fff4, 0xbefe03c1, - 0x907c9973, 0x877c817c, - 0xbf06817c, 0xbf850004, - 0xbef003ff, 0x00000200, - 0xbeff0380, 0xbf820003, - 0xbef003ff, 0x00000400, - 0xbeff03c1, 0xb97b2a05, - 0x807b817b, 0x8f7b827b, - 0x907c9973, 0x877c817c, - 0xbf06817c, 0xbf850017, + 0xbf850017, 0xbef603ff, + 0x01000000, 0xbefc0384, + 0xbf0a7b7c, 0xbf840037, + 0x7e008700, 0x7e028701, + 0x7e048702, 0x7e068703, + 0xe0704000, 0x705d0000, + 0xe0704080, 0x705d0100, + 0xe0704100, 0x705d0200, + 0xe0704180, 0x705d0300, + 0x807c847c, 0x8070ff70, + 0x00000200, 0xbf0a7b7c, + 0xbf85ffef, 0xbf820025, 0xbef603ff, 0x01000000, 0xbefc0384, 0xbf0a7b7c, - 0xbf840037, 0x7e008700, + 0xbf840011, 0x7e008700, 0x7e028701, 0x7e048702, 0x7e068703, 0xe0704000, - 0x705d0000, 0xe0704080, - 0x705d0100, 0xe0704100, - 0x705d0200, 0xe0704180, + 0x705d0000, 0xe0704100, + 0x705d0100, 0xe0704200, + 0x705d0200, 0xe0704300, 0x705d0300, 0x807c847c, - 0x8070ff70, 0x00000200, + 0x8070ff70, 0x00000400, 0xbf0a7b7c, 0xbf85ffef, - 0xbf820025, 0xbef603ff, - 0x01000000, 0xbefc0384, - 0xbf0a7b7c, 0xbf840020, - 0x7e008700, 0x7e028701, - 0x7e048702, 0x7e068703, + 0xb97b1e06, 0x877bc17b, + 0xbf84000c, 0x8f7b837b, + 0x807b7c7b, 0xbefe03c1, + 0xbeff0380, 0x7e008700, 0xe0704000, 0x705d0000, - 0xe0704100, 0x705d0100, - 0xe0704200, 0x705d0200, - 0xe0704300, 0x705d0300, - 0x807c847c, 0x8070ff70, - 0x00000400, 0xbf0a7b7c, - 0xbf85ffef, 0xb97b1e06, - 0x877bc17b, 0xbf84000c, - 0x8f7b837b, 0x807b7c7b, - 0xbefe03c1, 0xbeff0380, - 0x7e008700, 0xe0704000, - 0x705d0000, 0x807c817c, - 0x8070ff70, 0x00000080, - 0xbf0a7b7c, 0xbf85fff8, - 0xbf82013c, 0xbef4037e, - 0x8775ff7f, 0x0000ffff, - 0x8875ff75, 0x00040000, - 0xbef60380, 0xbef703ff, - 0x10807fac, 0x876eff7f, - 0x08000000, 0x906e836e, - 0x88776e77, 0x876eff7f, - 0x70000000, 0x906e816e, - 0x88776e77, 0xb97202dc, - 0x8f729972, 0x8872727f, + 0x807c817c, 0x8070ff70, + 0x00000080, 0xbf0a7b7c, + 0xbf85fff8, 0xbf82013b, + 0xbef4037e, 0x8775ff7f, + 0x0000ffff, 0x8875ff75, + 0x00040000, 0xbef60380, + 0xbef703ff, 0x10807fac, + 0xb97202dc, 0x8f729972, 0x876eff7f, 0x04000000, 0xbf840034, 0xbefe03c1, 0x907c9972, 0x877c817c, @@ -2345,10 +2372,11 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { 0xb96f2a05, 0x806f816f, 0x8f6f826f, 0x907c9972, 0x877c817c, 0xbf06817c, - 0xbf850021, 0xbef603ff, + 0xbf850024, 0xbef603ff, 0x01000000, 0xbeee0378, 0x8078ff78, 0x00000200, - 0xbefc0384, 0xe0304000, + 0xbefc0384, 0xbf0a6f7c, + 0xbf840050, 0xe0304000, 0x785d0000, 0xe0304080, 0x785d0100, 0xe0304100, 0x785d0200, 0xe0304180, @@ -2361,94 +2389,97 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { 0x6e5d0000, 0xe0304080, 0x6e5d0100, 0xe0304100, 0x6e5d0200, 0xe0304180, - 0x6e5d0300, 0xbf820032, - 0xbef603ff, 0x01000000, - 0xbeee0378, 0x8078ff78, - 0x00000400, 0xbefc0384, + 0x6e5d0300, 0xbf8c3f70, + 0xbf820034, 0xbef603ff, + 0x01000000, 0xbeee0378, + 0x8078ff78, 0x00000400, + 0xbefc0384, 0xbf0a6f7c, + 0xbf840012, 0xe0304000, + 0x785d0000, 0xe0304100, + 0x785d0100, 0xe0304200, + 0x785d0200, 0xe0304300, + 0x785d0300, 0xbf8c3f70, + 0x7e008500, 0x7e028501, + 0x7e048502, 0x7e068503, + 0x807c847c, 0x8078ff78, + 0x00000400, 0xbf0a6f7c, + 0xbf85ffee, 0xb96f1e06, + 0x876fc16f, 0xbf84000e, + 0x8f6f836f, 0x806f7c6f, + 0xbefe03c1, 0xbeff0380, 0xe0304000, 0x785d0000, - 0xe0304100, 0x785d0100, - 0xe0304200, 0x785d0200, - 0xe0304300, 0x785d0300, 0xbf8c3f70, 0x7e008500, - 0x7e028501, 0x7e048502, - 0x7e068503, 0x807c847c, - 0x8078ff78, 0x00000400, - 0xbf0a6f7c, 0xbf85ffee, - 0xb96f1e06, 0x876fc16f, - 0xbf84000e, 0x8f6f836f, - 0x806f7c6f, 0xbefe03c1, - 0xbeff0380, 0xe0304000, - 0x785d0000, 0xbf8c3f70, - 0x7e008500, 0x807c817c, - 0x8078ff78, 0x00000080, - 0xbf0a6f7c, 0xbf85fff7, - 0xbeff03c1, 0xe0304000, - 0x6e5d0000, 0xe0304100, - 0x6e5d0100, 0xe0304200, - 0x6e5d0200, 0xe0304300, - 0x6e5d0300, 0xbf8c3f70, + 0x807c817c, 0x8078ff78, + 0x00000080, 0xbf0a6f7c, + 0xbf85fff7, 0xbeff03c1, + 0xe0304000, 0x6e5d0000, + 0xe0304100, 0x6e5d0100, + 0xe0304200, 0x6e5d0200, + 0xe0304300, 0x6e5d0300, + 0xbf8c3f70, 0xb9782a05, + 0x80788178, 0xbf0d9972, + 0xbf850002, 0x8f788978, + 0xbf820001, 0x8f788a78, + 0xb96e1e06, 0x8f6e8a6e, + 0x80786e78, 0x8078ff78, + 0x00000200, 0x80f8ff78, + 0x00000050, 0xbef603ff, + 0x01000000, 0xbefc03ff, + 0x0000006c, 0x80f89078, + 0xf429003a, 0xf0000000, + 0xbf8cc07f, 0x80fc847c, + 0xbf800000, 0xbe803100, + 0xbe823102, 0x80f8a078, + 0xf42d003a, 0xf0000000, + 0xbf8cc07f, 0x80fc887c, + 0xbf800000, 0xbe803100, + 0xbe823102, 0xbe843104, + 0xbe863106, 0x80f8c078, + 0xf431003a, 0xf0000000, + 0xbf8cc07f, 0x80fc907c, + 0xbf800000, 0xbe803100, + 0xbe823102, 0xbe843104, + 0xbe863106, 0xbe883108, + 0xbe8a310a, 0xbe8c310c, + 0xbe8e310e, 0xbf06807c, + 0xbf84fff0, 0xba80f801, + 0x00000000, 0xbf8a0000, 0xb9782a05, 0x80788178, 0xbf0d9972, 0xbf850002, 0x8f788978, 0xbf820001, 0x8f788a78, 0xb96e1e06, 0x8f6e8a6e, 0x80786e78, 0x8078ff78, 0x00000200, - 0x80f8ff78, 0x00000050, 0xbef603ff, 0x01000000, - 0xbefc03ff, 0x0000006c, - 0x80f89078, 0xf429003a, - 0xf0000000, 0xbf8cc07f, - 0x80fc847c, 0xbf800000, - 0xbe803100, 0xbe823102, - 0x80f8a078, 0xf42d003a, - 0xf0000000, 0xbf8cc07f, - 0x80fc887c, 0xbf800000, - 0xbe803100, 0xbe823102, - 0xbe843104, 0xbe863106, - 0x80f8c078, 0xf431003a, - 0xf0000000, 0xbf8cc07f, - 0x80fc907c, 0xbf800000, - 0xbe803100, 0xbe823102, - 0xbe843104, 0xbe863106, - 0xbe883108, 0xbe8a310a, - 0xbe8c310c, 0xbe8e310e, - 0xbf06807c, 0xbf84fff0, - 0xba80f801, 0x00000000, - 0xbf8a0000, 0xb9782a05, - 0x80788178, 0xbf0d9972, - 0xbf850002, 0x8f788978, - 0xbf820001, 0x8f788a78, - 0xb96e1e06, 0x8f6e8a6e, - 0x80786e78, 0x8078ff78, - 0x00000200, 0xbef603ff, - 0x01000000, 0xf4211bfa, + 0xf4211bfa, 0xf0000000, + 0x80788478, 0xf4211b3a, 0xf0000000, 0x80788478, - 0xf4211b3a, 0xf0000000, - 0x80788478, 0xf4211b7a, + 0xf4211b7a, 0xf0000000, + 0x80788478, 0xf4211c3a, 0xf0000000, 0x80788478, - 0xf4211c3a, 0xf0000000, - 0x80788478, 0xf4211c7a, + 0xf4211c7a, 0xf0000000, + 0x80788478, 0xf4211eba, 0xf0000000, 0x80788478, - 0xf4211eba, 0xf0000000, - 0x80788478, 0xf4211efa, + 0xf4211efa, 0xf0000000, + 0x80788478, 0xf4211e7a, 0xf0000000, 0x80788478, - 0xf4211e7a, 0xf0000000, - 0x80788478, 0xf4211cfa, + 0xf4211cfa, 0xf0000000, + 0x80788478, 0xf4211bba, 0xf0000000, 0x80788478, + 0xbf8cc07f, 0xb9eef814, 0xf4211bba, 0xf0000000, 0x80788478, 0xbf8cc07f, - 0xb9eef814, 0xf4211bba, - 0xf0000000, 0x80788478, - 0xbf8cc07f, 0xb9eef815, - 0xbefc036f, 0xbefe0370, - 0xbeff0371, 0x876f7bff, - 0x000003ff, 0xb9ef4803, - 0x876f7bff, 0xfffff800, - 0x906f8b6f, 0xb9efa2c3, - 0xb9f3f801, 0xb96e2a05, - 0x806e816e, 0xbf0d9972, - 0xbf850002, 0x8f6e896e, - 0xbf820001, 0x8f6e8a6e, + 0xb9eef815, 0xbefc036f, + 0xbefe0370, 0xbeff0371, + 0x876f7bff, 0x000003ff, + 0xb9ef4803, 0x876f7bff, + 0xfffff800, 0x906f8b6f, + 0xb9efa2c3, 0xb9f3f801, + 0xb96e2a05, 0x806e816e, + 0xbf0d9972, 0xbf850002, + 0x8f6e896e, 0xbf820001, + 0x8f6e8a6e, 0xb96f1e06, + 0x8f6f8a6f, 0x806e6f6e, 0x806eff6e, 0x00000200, 0x806e746e, 0x826f8075, 0x876fff6f, 0x0000ffff, diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm index 5081f91190b8..0348191e8592 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm @@ -35,10 +35,9 @@ var SINGLE_STEP_MISSED_WORKAROUND = 1 //workaround for lost MODE.DEBUG_EN exception when SAVECTX raised -var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23 -var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000 var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006 var SQ_WAVE_STATUS_HALT_MASK = 0x2000 +var SQ_WAVE_STATUS_ECC_ERR_MASK = 0x20000 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9 @@ -52,8 +51,10 @@ var SQ_WAVE_IB_STS2_WAVE64_SHIFT = 11 var SQ_WAVE_IB_STS2_WAVE64_SIZE = 1 var SQ_WAVE_TRAPSTS_SAVECTX_MASK = 0x400 -var SQ_WAVE_TRAPSTS_EXCE_MASK = 0x1FF +var SQ_WAVE_TRAPSTS_EXCP_MASK = 0x1FF var SQ_WAVE_TRAPSTS_SAVECTX_SHIFT = 10 +var SQ_WAVE_TRAPSTS_ADDR_WATCH_MASK = 0x80 +var SQ_WAVE_TRAPSTS_ADDR_WATCH_SHIFT = 7 var SQ_WAVE_TRAPSTS_MEM_VIOL_MASK = 0x100 var SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT = 8 var SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK = 0x3FF @@ -63,46 +64,37 @@ var SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK = 0xFFFFF800 var SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT = 11 var SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE = 21 var SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK = 0x800 +var SQ_WAVE_TRAPSTS_EXCP_HI_MASK = 0x7000 + +var SQ_WAVE_MODE_EXCP_EN_SHIFT = 12 +var SQ_WAVE_MODE_EXCP_EN_ADDR_WATCH_SHIFT = 19 -var SQ_WAVE_IB_STS_RCNT_SHIFT = 16 var SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT = 15 var SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT = 25 -var SQ_WAVE_IB_STS_REPLAY_W64H_SIZE = 1 var SQ_WAVE_IB_STS_REPLAY_W64H_MASK = 0x02000000 -var SQ_WAVE_IB_STS_FIRST_REPLAY_SIZE = 1 -var SQ_WAVE_IB_STS_RCNT_SIZE = 6 var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK = 0x003F8000 -var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG = 0x00007FFF var SQ_WAVE_MODE_DEBUG_EN_MASK = 0x800 -var SQ_BUF_RSRC_WORD1_ATC_SHIFT = 24 -var SQ_BUF_RSRC_WORD3_MTYPE_SHIFT = 27 - // bits [31:24] unused by SPI debug data var TTMP11_SAVE_REPLAY_W64H_SHIFT = 31 var TTMP11_SAVE_REPLAY_W64H_MASK = 0x80000000 var TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT = 24 var TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK = 0x7F000000 +var TTMP11_DEBUG_TRAP_ENABLED_SHIFT = 23 +var TTMP11_DEBUG_TRAP_ENABLED_MASK = 0x800000 // SQ_SEL_X/Y/Z/W, BUF_NUM_FORMAT_FLOAT, (0 for MUBUF stride[17:14] // when ADD_TID_ENABLE and BUF_DATA_FORMAT_32 for MTBUF), ADD_TID_ENABLE var S_SAVE_BUF_RSRC_WORD1_STRIDE = 0x00040000 var S_SAVE_BUF_RSRC_WORD3_MISC = 0x10807FAC - -var S_SAVE_SPI_INIT_ATC_MASK = 0x08000000 -var S_SAVE_SPI_INIT_ATC_SHIFT = 27 -var S_SAVE_SPI_INIT_MTYPE_MASK = 0x70000000 -var S_SAVE_SPI_INIT_MTYPE_SHIFT = 28 +var S_SAVE_PC_HI_TRAP_ID_MASK = 0x00FF0000 +var S_SAVE_PC_HI_HT_MASK = 0x01000000 var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26 -var S_SAVE_PC_HI_RCNT_SHIFT = 26 -var S_SAVE_PC_HI_RCNT_MASK = 0xFC000000 -var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT = 25 -var S_SAVE_PC_HI_FIRST_REPLAY_MASK = 0x02000000 -var S_SAVE_PC_HI_REPLAY_W64H_SHIFT = 24 -var S_SAVE_PC_HI_REPLAY_W64H_MASK = 0x01000000 +var S_SAVE_PC_HI_FIRST_WAVE_MASK = 0x80000000 +var S_SAVE_PC_HI_FIRST_WAVE_SHIFT = 31 var s_sgpr_save_num = 108 @@ -130,19 +122,10 @@ var s_save_ttmps_hi = s_save_trapsts var S_RESTORE_BUF_RSRC_WORD1_STRIDE = S_SAVE_BUF_RSRC_WORD1_STRIDE var S_RESTORE_BUF_RSRC_WORD3_MISC = S_SAVE_BUF_RSRC_WORD3_MISC -var S_RESTORE_SPI_INIT_ATC_MASK = 0x08000000 -var S_RESTORE_SPI_INIT_ATC_SHIFT = 27 -var S_RESTORE_SPI_INIT_MTYPE_MASK = 0x70000000 -var S_RESTORE_SPI_INIT_MTYPE_SHIFT = 28 var S_RESTORE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 var S_RESTORE_SPI_INIT_FIRST_WAVE_SHIFT = 26 var S_WAVE_SIZE = 25 -var S_RESTORE_PC_HI_RCNT_SHIFT = S_SAVE_PC_HI_RCNT_SHIFT -var S_RESTORE_PC_HI_RCNT_MASK = S_SAVE_PC_HI_RCNT_MASK -var S_RESTORE_PC_HI_FIRST_REPLAY_SHIFT = S_SAVE_PC_HI_FIRST_REPLAY_SHIFT -var S_RESTORE_PC_HI_FIRST_REPLAY_MASK = S_SAVE_PC_HI_FIRST_REPLAY_MASK - var s_restore_spi_init_lo = exec_lo var s_restore_spi_init_hi = exec_hi var s_restore_mem_offset = ttmp12 @@ -179,51 +162,77 @@ L_JUMP_TO_RESTORE: L_SKIP_RESTORE: s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC - s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK -if SINGLE_STEP_MISSED_WORKAROUND - // No single step exceptions if MODE.DEBUG_EN=0. - s_getreg_b32 ttmp2, hwreg(HW_REG_MODE) - s_and_b32 ttmp2, ttmp2, SQ_WAVE_MODE_DEBUG_EN_MASK - s_cbranch_scc0 L_NO_SINGLE_STEP_WORKAROUND + // Clear SPI_PRIO: do not save with elevated priority. + // Clear ECC_ERR: prevents SQC store and triggers FATAL_HALT if setreg'd. + s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK|SQ_WAVE_STATUS_ECC_ERR_MASK + + s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) - // Second-level trap already handled exception if STATUS.HALT=1. s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK + s_cbranch_scc0 L_NOT_HALTED + +L_HALTED: + // Host trap may occur while wave is halted. + s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK + s_cbranch_scc1 L_FETCH_2ND_TRAP +L_CHECK_SAVE: + s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK + s_cbranch_scc1 L_SAVE + + // Wave is halted but neither host trap nor SAVECTX is raised. + // Caused by instruction fetch memory violation. + // Spin wait until context saved to prevent interrupt storm. + s_sleep 0x10 + s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) + s_branch L_CHECK_SAVE + +L_NOT_HALTED: + // Let second-level handle non-SAVECTX exception or trap. + // Any concurrent SAVECTX will be handled upon re-entry once halted. + + // Check non-maskable exceptions. memory_violation, illegal_instruction + // and xnack_error exceptions always cause the wave to enter the trap + // handler. + s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK|SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK + s_cbranch_scc1 L_FETCH_2ND_TRAP + + // Check for maskable exceptions in trapsts.excp and trapsts.excp_hi. + // Maskable exceptions only cause the wave to enter the trap handler if + // their respective bit in mode.excp_en is set. + s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCP_MASK|SQ_WAVE_TRAPSTS_EXCP_HI_MASK + s_cbranch_scc0 L_CHECK_TRAP_ID + + s_and_b32 ttmp3, s_save_trapsts, SQ_WAVE_TRAPSTS_ADDR_WATCH_MASK|SQ_WAVE_TRAPSTS_EXCP_HI_MASK + s_cbranch_scc0 L_NOT_ADDR_WATCH + s_bitset1_b32 ttmp2, SQ_WAVE_TRAPSTS_ADDR_WATCH_SHIFT // Check all addr_watch[123] exceptions against excp_en.addr_watch + +L_NOT_ADDR_WATCH: + s_getreg_b32 ttmp3, hwreg(HW_REG_MODE) + s_lshl_b32 ttmp2, ttmp2, SQ_WAVE_MODE_EXCP_EN_SHIFT + s_and_b32 ttmp2, ttmp2, ttmp3 + s_cbranch_scc1 L_FETCH_2ND_TRAP + +L_CHECK_TRAP_ID: + // Check trap_id != 0 + s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK + s_cbranch_scc1 L_FETCH_2ND_TRAP + +if SINGLE_STEP_MISSED_WORKAROUND // Prioritize single step exception over context save. // Second-level trap will halt wave and RFE, re-entering for SAVECTX. - s_cbranch_scc0 L_FETCH_2ND_TRAP - -L_NO_SINGLE_STEP_WORKAROUND: + s_getreg_b32 ttmp2, hwreg(HW_REG_MODE) + s_and_b32 ttmp2, ttmp2, SQ_WAVE_MODE_DEBUG_EN_MASK + s_cbranch_scc1 L_FETCH_2ND_TRAP end - - s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) - s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK //check whether this is for save + s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK s_cbranch_scc1 L_SAVE - // If STATUS.MEM_VIOL is asserted then halt the wave to prevent - // the exception raising again and blocking context save. - s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK - s_cbranch_scc0 L_FETCH_2ND_TRAP - s_or_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK - L_FETCH_2ND_TRAP: - #if ASIC_TARGET_NAVI1X - // Preserve and clear scalar XNACK state before issuing scalar loads. - // Save IB_STS.REPLAY_W64H[25], RCNT[21:16], FIRST_REPLAY[15] into - // unused space ttmp11[31:24]. - s_andn2_b32 ttmp11, ttmp11, (TTMP11_SAVE_REPLAY_W64H_MASK | TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK) - s_getreg_b32 ttmp2, hwreg(HW_REG_IB_STS) - s_and_b32 ttmp3, ttmp2, SQ_WAVE_IB_STS_REPLAY_W64H_MASK - s_lshl_b32 ttmp3, ttmp3, (TTMP11_SAVE_REPLAY_W64H_SHIFT - SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT) - s_or_b32 ttmp11, ttmp11, ttmp3 - s_and_b32 ttmp3, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK - s_lshl_b32 ttmp3, ttmp3, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT) - s_or_b32 ttmp11, ttmp11, ttmp3 - s_andn2_b32 ttmp2, ttmp2, (SQ_WAVE_IB_STS_REPLAY_W64H_MASK | SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK) - s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2 + save_and_clear_ib_sts(ttmp14, ttmp15) #endif // Read second-level TBA/TMA from first-level TMA and jump if available. @@ -232,31 +241,49 @@ L_FETCH_2ND_TRAP: s_getreg_b32 ttmp14, hwreg(HW_REG_SHADER_TMA_LO) s_getreg_b32 ttmp15, hwreg(HW_REG_SHADER_TMA_HI) s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 + + s_load_dword ttmp2, [ttmp14, ttmp15], 0x10 glc:1 // debug trap enabled flag + s_waitcnt lgkmcnt(0) + s_lshl_b32 ttmp2, ttmp2, TTMP11_DEBUG_TRAP_ENABLED_SHIFT + s_andn2_b32 ttmp11, ttmp11, TTMP11_DEBUG_TRAP_ENABLED_MASK + s_or_b32 ttmp11, ttmp11, ttmp2 + s_load_dwordx2 [ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 glc:1 // second-level TBA s_waitcnt lgkmcnt(0) s_load_dwordx2 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 glc:1 // second-level TMA s_waitcnt lgkmcnt(0) + s_and_b64 [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3] s_cbranch_scc0 L_NO_NEXT_TRAP // second-level trap handler not been set s_setpc_b64 [ttmp2, ttmp3] // jump to second-level trap handler L_NO_NEXT_TRAP: - s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) - s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCE_MASK - s_cbranch_scc1 L_EXCP_CASE // Exception, jump back to the shader program directly. - s_add_u32 ttmp0, ttmp0, 4 // S_TRAP case, add 4 to ttmp0 - s_addc_u32 ttmp1, ttmp1, 0 -L_EXCP_CASE: + // If not caused by trap then halt wave to prevent re-entry. + s_and_b32 ttmp2, s_save_pc_hi, (S_SAVE_PC_HI_TRAP_ID_MASK|S_SAVE_PC_HI_HT_MASK) + s_cbranch_scc1 L_TRAP_CASE + s_or_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK + + // If the PC points to S_ENDPGM then context save will fail if STATUS.HALT is set. + // Rewind the PC to prevent this from occurring. + s_sub_u32 ttmp0, ttmp0, 0x8 + s_subb_u32 ttmp1, ttmp1, 0x0 + + s_branch L_EXIT_TRAP + +L_TRAP_CASE: + // Host trap will not cause trap re-entry. + s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_HT_MASK + s_cbranch_scc1 L_EXIT_TRAP + + // Advance past trap instruction to prevent re-entry. + s_add_u32 ttmp0, ttmp0, 0x4 + s_addc_u32 ttmp1, ttmp1, 0x0 + +L_EXIT_TRAP: s_and_b32 ttmp1, ttmp1, 0xFFFF #if ASIC_TARGET_NAVI1X - // Restore SQ_WAVE_IB_STS. - s_lshr_b32 ttmp2, ttmp11, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT) - s_and_b32 ttmp3, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK - s_lshr_b32 ttmp2, ttmp11, (TTMP11_SAVE_REPLAY_W64H_SHIFT - SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT) - s_and_b32 ttmp2, ttmp2, SQ_WAVE_IB_STS_REPLAY_W64H_MASK - s_or_b32 ttmp2, ttmp2, ttmp3 - s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2 + restore_ib_sts(ttmp14, ttmp15) #endif // Restore SQ_WAVE_STATUS. @@ -272,19 +299,7 @@ L_SAVE: s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp //clear saveCtx bit #if ASIC_TARGET_NAVI1X - s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_RCNT_SHIFT, SQ_WAVE_IB_STS_RCNT_SIZE) - s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_RCNT_SHIFT - s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp - s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT, SQ_WAVE_IB_STS_FIRST_REPLAY_SIZE) - s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT - s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp - s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT, SQ_WAVE_IB_STS_REPLAY_W64H_SIZE) - s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_REPLAY_W64H_SHIFT - s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp - s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS) //clear RCNT and FIRST_REPLAY and REPLAY_W64H in IB_STS - s_and_b32 s_save_tmp, s_save_tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG - - s_setreg_b32 hwreg(HW_REG_IB_STS), s_save_tmp + save_and_clear_ib_sts(s_save_tmp, s_save_trapsts) #endif /* inform SPI the readiness and wait for SPI's go signal */ @@ -305,16 +320,57 @@ L_SLEEP: s_waitcnt lgkmcnt(0) #endif + // Save first_wave flag so we can clear high bits of save address. + s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK + s_lshl_b32 s_save_tmp, s_save_tmp, (S_SAVE_PC_HI_FIRST_WAVE_SHIFT - S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT) + s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp + +#if NO_SQC_STORE + // Trap temporaries must be saved via VGPR but all VGPRs are in use. + // There is no ttmp space to hold the resource constant for VGPR save. + // Save v0 by itself since it requires only two SGPRs. + s_mov_b32 s_save_ttmps_lo, exec_lo + s_and_b32 s_save_ttmps_hi, exec_hi, 0xFFFF + s_mov_b32 exec_lo, 0xFFFFFFFF + s_mov_b32 exec_hi, 0xFFFFFFFF + global_store_dword_addtid v0, [s_save_ttmps_lo, s_save_ttmps_hi] slc:1 glc:1 + v_mov_b32 v0, 0x0 + s_mov_b32 exec_lo, s_save_ttmps_lo + s_mov_b32 exec_hi, s_save_ttmps_hi +#endif + // Save trap temporaries 4-11, 13 initialized by SPI debug dispatch logic - // ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40 + // ttmp SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)+0x40 get_wave_size(s_save_ttmps_hi) get_vgpr_size_bytes(s_save_ttmps_lo, s_save_ttmps_hi) + get_svgpr_size_bytes(s_save_ttmps_hi) + s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_ttmps_hi s_and_b32 s_save_ttmps_hi, s_save_spi_init_hi, 0xFFFF s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, get_sgpr_size_bytes() s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_spi_init_lo s_addc_u32 s_save_ttmps_hi, s_save_ttmps_hi, 0x0 -#if ASIC_TARGET_NAVI1X +#if NO_SQC_STORE + v_writelane_b32 v0, ttmp4, 0x4 + v_writelane_b32 v0, ttmp5, 0x5 + v_writelane_b32 v0, ttmp6, 0x6 + v_writelane_b32 v0, ttmp7, 0x7 + v_writelane_b32 v0, ttmp8, 0x8 + v_writelane_b32 v0, ttmp9, 0x9 + v_writelane_b32 v0, ttmp10, 0xA + v_writelane_b32 v0, ttmp11, 0xB + v_writelane_b32 v0, ttmp13, 0xD + v_writelane_b32 v0, exec_lo, 0xE + v_writelane_b32 v0, exec_hi, 0xF + + s_mov_b32 exec_lo, 0x3FFF + s_mov_b32 exec_hi, 0x0 + global_store_dword_addtid v0, [s_save_ttmps_lo, s_save_ttmps_hi] inst_offset:0x40 slc:1 glc:1 + v_readlane_b32 ttmp14, v0, 0xE + v_readlane_b32 ttmp15, v0, 0xF + s_mov_b32 exec_lo, ttmp14 + s_mov_b32 exec_hi, ttmp15 +#else s_store_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_save_ttmps_lo, s_save_ttmps_hi], 0x50 glc:1 s_store_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_save_ttmps_lo, s_save_ttmps_hi], 0x60 glc:1 s_store_dword ttmp13, [s_save_ttmps_lo, s_save_ttmps_hi], 0x74 glc:1 @@ -326,12 +382,6 @@ L_SLEEP: s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE s_mov_b32 s_save_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) although not neccessarily inited s_mov_b32 s_save_buf_rsrc3, S_SAVE_BUF_RSRC_WORD3_MISC - s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_ATC_MASK - s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT) - s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or ATC - s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_MTYPE_MASK - s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT) - s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or MTYPE s_mov_b32 s_save_m0, m0 @@ -361,7 +411,9 @@ L_SAVE_4VGPR_WAVE32: // VGPR Allocated in 4-GPR granularity +#if !NO_SQC_STORE buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 +#endif buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128 buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128*2 buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128*3 @@ -372,7 +424,9 @@ L_SAVE_4VGPR_WAVE64: // VGPR Allocated in 4-GPR granularity +#if !NO_SQC_STORE buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 +#endif buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256 buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2 buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3 @@ -397,7 +451,8 @@ L_SAVE_HWREG: write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) write_hwreg_to_mem(s_save_pc_lo, s_save_buf_rsrc0, s_save_mem_offset) - write_hwreg_to_mem(s_save_pc_hi, s_save_buf_rsrc0, s_save_mem_offset) + s_andn2_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK + write_hwreg_to_mem(s_save_tmp, s_save_buf_rsrc0, s_save_mem_offset) write_hwreg_to_mem(s_save_exec_lo, s_save_buf_rsrc0, s_save_mem_offset) write_hwreg_to_mem(s_save_exec_hi, s_save_buf_rsrc0, s_save_mem_offset) write_hwreg_to_mem(s_save_status, s_save_buf_rsrc0, s_save_mem_offset) @@ -418,9 +473,13 @@ L_SAVE_HWREG: write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) #if NO_SQC_STORE - // Write HWREG/SGPRs with 32 VGPR lanes, wave32 is common case. + // Write HWREGs with 16 VGPR lanes. TTMPs occupy space after this. + s_mov_b32 exec_lo, 0xFFFF s_mov_b32 exec_hi, 0x0 buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 + + // Write SGPRs with 32 VGPR lanes. This works in wave32 and wave64 mode. + s_mov_b32 exec_lo, 0xFFFFFFFF #endif /* save SGPRs */ @@ -506,7 +565,7 @@ L_SAVE_LDS_NORMAL: s_cbranch_scc0 L_SAVE_LDS_DONE //no lds used? jump to L_SAVE_DONE s_barrier //LDS is used? wait for other waves in the same TG - s_and_b32 s_save_tmp, s_wave_size, S_SAVE_SPI_INIT_FIRST_WAVE_MASK + s_and_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK s_cbranch_scc0 L_SAVE_LDS_DONE // first wave do LDS save; @@ -628,7 +687,7 @@ L_SAVE_VGPR_WAVE64: // VGPR store using dw burst s_mov_b32 m0, 0x4 //VGPR initial index value =4 s_cmp_lt_u32 m0, s_save_alloc_size - s_cbranch_scc0 L_SAVE_VGPR_END + s_cbranch_scc0 L_SAVE_SHARED_VGPR L_SAVE_VGPR_W64_LOOP: v_movrels_b32 v0, v0 //v0 = v[0+m0] @@ -646,6 +705,7 @@ L_SAVE_VGPR_W64_LOOP: s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0 s_cbranch_scc1 L_SAVE_VGPR_W64_LOOP //VGPR save is complete? +L_SAVE_SHARED_VGPR: //Below part will be the save shared vgpr part (new for gfx10) s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE) s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //shared_vgpr_size is zero? @@ -674,12 +734,7 @@ L_RESTORE: s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE s_mov_b32 s_restore_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) s_mov_b32 s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC - s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_ATC_MASK - s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT) - s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or ATC - s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_MTYPE_MASK - s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT) - s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or MTYPE + //determine it is wave32 or wave64 get_wave_size(s_restore_size) @@ -765,6 +820,8 @@ L_RESTORE_VGPR_NORMAL: s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v1, v0 will be the last s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128*4 s_mov_b32 m0, 4 //VGPR initial index value = 4 + s_cmp_lt_u32 m0, s_restore_alloc_size + s_cbranch_scc0 L_RESTORE_SGPR L_RESTORE_VGPR_WAVE32_LOOP: buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 @@ -786,6 +843,7 @@ L_RESTORE_VGPR_WAVE32_LOOP: buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:128 buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:128*2 buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:128*3 + s_waitcnt vmcnt(0) s_branch L_RESTORE_SGPR @@ -796,6 +854,8 @@ L_RESTORE_VGPR_WAVE64: s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v4, v0 will be the last s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 s_mov_b32 m0, 4 //VGPR initial index value = 4 + s_cmp_lt_u32 m0, s_restore_alloc_size + s_cbranch_scc0 L_RESTORE_SHARED_VGPR L_RESTORE_VGPR_WAVE64_LOOP: buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 @@ -812,6 +872,7 @@ L_RESTORE_VGPR_WAVE64_LOOP: s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0 s_cbranch_scc1 L_RESTORE_VGPR_WAVE64_LOOP //VGPR restore (except v0) is complete? +L_RESTORE_SHARED_VGPR: //Below part will be the restore shared vgpr part (new for gfx10) s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE) //shared_vgpr_size s_and_b32 s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF //shared_vgpr_size is zero? @@ -945,8 +1006,10 @@ L_RESTORE_HWREG: s_setreg_b32 hwreg(HW_REG_MODE), s_restore_mode // Restore trap temporaries 4-11, 13 initialized by SPI debug dispatch logic - // ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40 + // ttmp SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)+0x40 get_vgpr_size_bytes(s_restore_ttmps_lo, s_restore_size) + get_svgpr_size_bytes(s_restore_ttmps_hi) + s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_ttmps_hi s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, get_sgpr_size_bytes() s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_buf_rsrc0 s_addc_u32 s_restore_ttmps_hi, s_restore_buf_rsrc1, 0x0 @@ -957,23 +1020,7 @@ L_RESTORE_HWREG: s_waitcnt lgkmcnt(0) #if ASIC_TARGET_NAVI1X - s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_RCNT_MASK - s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_RCNT_SHIFT - s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_RCNT_SHIFT - s_mov_b32 s_restore_tmp, 0x0 - s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0 - s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_FIRST_REPLAY_MASK - s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT - s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT - s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0 - s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_REPLAY_W64H_MASK - s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_REPLAY_W64H_SHIFT - s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT - s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0 - - s_and_b32 s_restore_m0, s_restore_status, SQ_WAVE_STATUS_INST_ATC_MASK - s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_STATUS_INST_ATC_SHIFT - s_setreg_b32 hwreg(HW_REG_IB_STS), s_restore_tmp + restore_ib_sts(s_restore_tmp, s_restore_m0) #endif s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS @@ -1089,5 +1136,29 @@ end function get_wave_size(s_reg) s_getreg_b32 s_reg, hwreg(HW_REG_IB_STS2,SQ_WAVE_IB_STS2_WAVE64_SHIFT,SQ_WAVE_IB_STS2_WAVE64_SIZE) s_lshl_b32 s_reg, s_reg, S_WAVE_SIZE - s_or_b32 s_reg, s_save_spi_init_hi, s_reg //share with exec_hi, it's at bit25 +end + +function save_and_clear_ib_sts(tmp1, tmp2) + // Preserve and clear scalar XNACK state before issuing scalar loads. + // Save IB_STS.REPLAY_W64H[25], RCNT[21:16], FIRST_REPLAY[15] into + // unused space ttmp11[31:24]. + s_andn2_b32 ttmp11, ttmp11, (TTMP11_SAVE_REPLAY_W64H_MASK | TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK) + s_getreg_b32 tmp1, hwreg(HW_REG_IB_STS) + s_and_b32 tmp2, tmp1, SQ_WAVE_IB_STS_REPLAY_W64H_MASK + s_lshl_b32 tmp2, tmp2, (TTMP11_SAVE_REPLAY_W64H_SHIFT - SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT) + s_or_b32 ttmp11, ttmp11, tmp2 + s_and_b32 tmp2, tmp1, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK + s_lshl_b32 tmp2, tmp2, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT) + s_or_b32 ttmp11, ttmp11, tmp2 + s_andn2_b32 tmp1, tmp1, (SQ_WAVE_IB_STS_REPLAY_W64H_MASK | SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK) + s_setreg_b32 hwreg(HW_REG_IB_STS), tmp1 +end + +function restore_ib_sts(tmp1, tmp2) + s_lshr_b32 tmp1, ttmp11, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT) + s_and_b32 tmp2, tmp1, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK + s_lshr_b32 tmp1, ttmp11, (TTMP11_SAVE_REPLAY_W64H_SHIFT - SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT) + s_and_b32 tmp1, tmp1, SQ_WAVE_IB_STS_REPLAY_W64H_MASK + s_or_b32 tmp1, tmp1, tmp2 + s_setreg_b32 hwreg(HW_REG_IB_STS), tmp1 end diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm index eed78a04e7c7..6770cbe3250a 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm @@ -46,8 +46,6 @@ var SINGLE_STEP_MISSED_WORKAROUND = 1 //workaround for lost MODE.DEBUG_EN /**************************************************************************/ /* variables */ /**************************************************************************/ -var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23 -var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000 var SQ_WAVE_STATUS_SPI_PRIO_SHIFT = 1 var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006 var SQ_WAVE_STATUS_HALT_MASK = 0x2000 @@ -56,6 +54,7 @@ var SQ_WAVE_STATUS_PRE_SPI_PRIO_SIZE = 1 var SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT = 3 var SQ_WAVE_STATUS_POST_SPI_PRIO_SIZE = 29 var SQ_WAVE_STATUS_ALLOW_REPLAY_MASK = 0x400000 +var SQ_WAVE_STATUS_ECC_ERR_MASK = 0x20000 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9 @@ -72,8 +71,10 @@ var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 8 #endif var SQ_WAVE_TRAPSTS_SAVECTX_MASK = 0x400 -var SQ_WAVE_TRAPSTS_EXCE_MASK = 0x1FF // Exception mask +var SQ_WAVE_TRAPSTS_EXCP_MASK = 0x1FF var SQ_WAVE_TRAPSTS_SAVECTX_SHIFT = 10 +var SQ_WAVE_TRAPSTS_ADDR_WATCH_MASK = 0x80 +var SQ_WAVE_TRAPSTS_ADDR_WATCH_SHIFT = 7 var SQ_WAVE_TRAPSTS_MEM_VIOL_MASK = 0x100 var SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT = 8 var SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK = 0x3FF @@ -83,37 +84,30 @@ var SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK = 0xFFFFF800 var SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT = 11 var SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE = 21 var SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK = 0x800 +var SQ_WAVE_TRAPSTS_EXCP_HI_MASK = 0x7000 var SQ_WAVE_TRAPSTS_XNACK_ERROR_MASK = 0x10000000 -var SQ_WAVE_IB_STS_RCNT_SHIFT = 16 //FIXME +var SQ_WAVE_MODE_EXCP_EN_SHIFT = 12 +var SQ_WAVE_MODE_EXCP_EN_ADDR_WATCH_SHIFT = 19 + var SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT = 15 //FIXME var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK = 0x1F8000 -var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG = 0x00007FFF //FIXME var SQ_WAVE_MODE_DEBUG_EN_MASK = 0x800 -var SQ_BUF_RSRC_WORD1_ATC_SHIFT = 24 -var SQ_BUF_RSRC_WORD3_MTYPE_SHIFT = 27 - var TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT = 26 // bits [31:26] unused by SPI debug data var TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK = 0xFC000000 +var TTMP11_DEBUG_TRAP_ENABLED_SHIFT = 23 +var TTMP11_DEBUG_TRAP_ENABLED_MASK = 0x800000 /* Save */ var S_SAVE_BUF_RSRC_WORD1_STRIDE = 0x00040000 //stride is 4 bytes var S_SAVE_BUF_RSRC_WORD3_MISC = 0x00807FAC //SQ_SEL_X/Y/Z/W, BUF_NUM_FORMAT_FLOAT, (0 for MUBUF stride[17:14] when ADD_TID_ENABLE and BUF_DATA_FORMAT_32 for MTBUF), ADD_TID_ENABLE - -var S_SAVE_SPI_INIT_ATC_MASK = 0x08000000 //bit[27]: ATC bit -var S_SAVE_SPI_INIT_ATC_SHIFT = 27 -var S_SAVE_SPI_INIT_MTYPE_MASK = 0x70000000 //bit[30:28]: Mtype -var S_SAVE_SPI_INIT_MTYPE_SHIFT = 28 +var S_SAVE_PC_HI_TRAP_ID_MASK = 0x00FF0000 +var S_SAVE_PC_HI_HT_MASK = 0x01000000 var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26 -var S_SAVE_PC_HI_RCNT_SHIFT = 27 //FIXME check with Brian to ensure all fields other than PC[47:0] can be used -var S_SAVE_PC_HI_RCNT_MASK = 0xF8000000 //FIXME -var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT = 26 //FIXME -var S_SAVE_PC_HI_FIRST_REPLAY_MASK = 0x04000000 //FIXME - var s_save_spi_init_lo = exec_lo var s_save_spi_init_hi = exec_hi @@ -140,18 +134,9 @@ var s_save_ttmps_hi = s_save_trapsts //no conflict var S_RESTORE_BUF_RSRC_WORD1_STRIDE = S_SAVE_BUF_RSRC_WORD1_STRIDE var S_RESTORE_BUF_RSRC_WORD3_MISC = S_SAVE_BUF_RSRC_WORD3_MISC -var S_RESTORE_SPI_INIT_ATC_MASK = 0x08000000 //bit[27]: ATC bit -var S_RESTORE_SPI_INIT_ATC_SHIFT = 27 -var S_RESTORE_SPI_INIT_MTYPE_MASK = 0x70000000 //bit[30:28]: Mtype -var S_RESTORE_SPI_INIT_MTYPE_SHIFT = 28 var S_RESTORE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG var S_RESTORE_SPI_INIT_FIRST_WAVE_SHIFT = 26 -var S_RESTORE_PC_HI_RCNT_SHIFT = S_SAVE_PC_HI_RCNT_SHIFT -var S_RESTORE_PC_HI_RCNT_MASK = S_SAVE_PC_HI_RCNT_MASK -var S_RESTORE_PC_HI_FIRST_REPLAY_SHIFT = S_SAVE_PC_HI_FIRST_REPLAY_SHIFT -var S_RESTORE_PC_HI_FIRST_REPLAY_MASK = S_SAVE_PC_HI_FIRST_REPLAY_MASK - var s_restore_spi_init_lo = exec_lo var s_restore_spi_init_hi = exec_hi @@ -199,71 +184,77 @@ L_JUMP_TO_RESTORE: L_SKIP_RESTORE: s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC - s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK //check whether this is for save -if SINGLE_STEP_MISSED_WORKAROUND - // No single step exceptions if MODE.DEBUG_EN=0. - s_getreg_b32 ttmp2, hwreg(HW_REG_MODE) - s_and_b32 ttmp2, ttmp2, SQ_WAVE_MODE_DEBUG_EN_MASK - s_cbranch_scc0 L_NO_SINGLE_STEP_WORKAROUND + // Clear SPI_PRIO: do not save with elevated priority. + // Clear ECC_ERR: prevents SQC store and triggers FATAL_HALT if setreg'd. + s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK|SQ_WAVE_STATUS_ECC_ERR_MASK - // Second-level trap already handled exception if STATUS.HALT=1. - s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK + s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) - // Prioritize single step exception over context save. - // Second-level trap will halt wave and RFE, re-entering for SAVECTX. - s_cbranch_scc0 L_FETCH_2ND_TRAP + s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK + s_cbranch_scc0 L_NOT_HALTED -L_NO_SINGLE_STEP_WORKAROUND: -end +L_HALTED: + // Host trap may occur while wave is halted. + s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK + s_cbranch_scc1 L_FETCH_2ND_TRAP - s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) +L_CHECK_SAVE: s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK //check whether this is for save s_cbranch_scc1 L_SAVE //this is the operation for save - // ********* Handle non-CWSR traps ******************* - - // Illegal instruction is a non-maskable exception which blocks context save. - // Halt the wavefront and return from the trap. - s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK - s_cbranch_scc1 L_HALT_WAVE - - // If STATUS.MEM_VIOL is asserted then we cannot fetch from the TMA. - // Instead, halt the wavefront and return from the trap. - s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK - s_cbranch_scc0 L_FETCH_2ND_TRAP - -L_HALT_WAVE: - // If STATUS.HALT is set then this fault must come from SQC instruction fetch. - // We cannot prevent further faults. Spin wait until context saved. - s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK - s_cbranch_scc0 L_NOT_ALREADY_HALTED - -L_WAIT_CTX_SAVE: + // Wave is halted but neither host trap nor SAVECTX is raised. + // Caused by instruction fetch memory violation. + // Spin wait until context saved to prevent interrupt storm. s_sleep 0x10 - s_getreg_b32 ttmp2, hwreg(HW_REG_TRAPSTS) - s_and_b32 ttmp2, ttmp2, SQ_WAVE_TRAPSTS_SAVECTX_MASK - s_cbranch_scc0 L_WAIT_CTX_SAVE + s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) + s_branch L_CHECK_SAVE + +L_NOT_HALTED: + // Let second-level handle non-SAVECTX exception or trap. + // Any concurrent SAVECTX will be handled upon re-entry once halted. + + // Check non-maskable exceptions. memory_violation, illegal_instruction + // and xnack_error exceptions always cause the wave to enter the trap + // handler. + s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK|SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK + s_cbranch_scc1 L_FETCH_2ND_TRAP + + // Check for maskable exceptions in trapsts.excp and trapsts.excp_hi. + // Maskable exceptions only cause the wave to enter the trap handler if + // their respective bit in mode.excp_en is set. + s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCP_MASK|SQ_WAVE_TRAPSTS_EXCP_HI_MASK + s_cbranch_scc0 L_CHECK_TRAP_ID + + s_and_b32 ttmp3, s_save_trapsts, SQ_WAVE_TRAPSTS_ADDR_WATCH_MASK|SQ_WAVE_TRAPSTS_EXCP_HI_MASK + s_cbranch_scc0 L_NOT_ADDR_WATCH + s_bitset1_b32 ttmp2, SQ_WAVE_TRAPSTS_ADDR_WATCH_SHIFT // Check all addr_watch[123] exceptions against excp_en.addr_watch + +L_NOT_ADDR_WATCH: + s_getreg_b32 ttmp3, hwreg(HW_REG_MODE) + s_lshl_b32 ttmp2, ttmp2, SQ_WAVE_MODE_EXCP_EN_SHIFT + s_and_b32 ttmp2, ttmp2, ttmp3 + s_cbranch_scc1 L_FETCH_2ND_TRAP + +L_CHECK_TRAP_ID: + // Check trap_id != 0 + s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK + s_cbranch_scc1 L_FETCH_2ND_TRAP -L_NOT_ALREADY_HALTED: - s_or_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK +if SINGLE_STEP_MISSED_WORKAROUND + // Prioritize single step exception over context save. + // Second-level trap will halt wave and RFE, re-entering for SAVECTX. + s_getreg_b32 ttmp2, hwreg(HW_REG_MODE) + s_and_b32 ttmp2, ttmp2, SQ_WAVE_MODE_DEBUG_EN_MASK + s_cbranch_scc1 L_FETCH_2ND_TRAP +end - // If the PC points to S_ENDPGM then context save will fail if STATUS.HALT is set. - // Rewind the PC to prevent this from occurring. The debugger compensates for this. - s_sub_u32 ttmp0, ttmp0, 0x8 - s_subb_u32 ttmp1, ttmp1, 0x0 + s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK + s_cbranch_scc1 L_SAVE L_FETCH_2ND_TRAP: // Preserve and clear scalar XNACK state before issuing scalar reads. - // Save IB_STS.FIRST_REPLAY[15] and IB_STS.RCNT[20:16] into unused space ttmp11[31:26]. - s_getreg_b32 ttmp2, hwreg(HW_REG_IB_STS) - s_and_b32 ttmp3, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK - s_lshl_b32 ttmp3, ttmp3, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT) - s_andn2_b32 ttmp11, ttmp11, TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK - s_or_b32 ttmp11, ttmp11, ttmp3 - - s_andn2_b32 ttmp2, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK - s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2 + save_and_clear_ib_sts(ttmp14) // Read second-level TBA/TMA from first-level TMA and jump if available. // ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data) @@ -271,27 +262,48 @@ L_FETCH_2ND_TRAP: s_getreg_b32 ttmp14, hwreg(HW_REG_SQ_SHADER_TMA_LO) s_getreg_b32 ttmp15, hwreg(HW_REG_SQ_SHADER_TMA_HI) s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 + + s_load_dword ttmp2, [ttmp14, ttmp15], 0x10 glc:1 // debug trap enabled flag + s_waitcnt lgkmcnt(0) + s_lshl_b32 ttmp2, ttmp2, TTMP11_DEBUG_TRAP_ENABLED_SHIFT + s_andn2_b32 ttmp11, ttmp11, TTMP11_DEBUG_TRAP_ENABLED_MASK + s_or_b32 ttmp11, ttmp11, ttmp2 + s_load_dwordx2 [ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 glc:1 // second-level TBA s_waitcnt lgkmcnt(0) s_load_dwordx2 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 glc:1 // second-level TMA s_waitcnt lgkmcnt(0) + s_and_b64 [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3] s_cbranch_scc0 L_NO_NEXT_TRAP // second-level trap handler not been set s_setpc_b64 [ttmp2, ttmp3] // jump to second-level trap handler L_NO_NEXT_TRAP: - s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) - s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCE_MASK // Check whether it is an exception - s_cbranch_scc1 L_EXCP_CASE // Exception, jump back to the shader program directly. - s_add_u32 ttmp0, ttmp0, 4 // S_TRAP case, add 4 to ttmp0 - s_addc_u32 ttmp1, ttmp1, 0 -L_EXCP_CASE: + // If not caused by trap then halt wave to prevent re-entry. + s_and_b32 ttmp2, s_save_pc_hi, (S_SAVE_PC_HI_TRAP_ID_MASK|S_SAVE_PC_HI_HT_MASK) + s_cbranch_scc1 L_TRAP_CASE + s_or_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK + + // If the PC points to S_ENDPGM then context save will fail if STATUS.HALT is set. + // Rewind the PC to prevent this from occurring. + s_sub_u32 ttmp0, ttmp0, 0x8 + s_subb_u32 ttmp1, ttmp1, 0x0 + + s_branch L_EXIT_TRAP + +L_TRAP_CASE: + // Host trap will not cause trap re-entry. + s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_HT_MASK + s_cbranch_scc1 L_EXIT_TRAP + + // Advance past trap instruction to prevent re-entry. + s_add_u32 ttmp0, ttmp0, 0x4 + s_addc_u32 ttmp1, ttmp1, 0x0 + +L_EXIT_TRAP: s_and_b32 ttmp1, ttmp1, 0xFFFF - // Restore SQ_WAVE_IB_STS. - s_lshr_b32 ttmp2, ttmp11, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT) - s_and_b32 ttmp2, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK - s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2 + restore_ib_sts(ttmp14) // Restore SQ_WAVE_STATUS. s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 @@ -312,16 +324,7 @@ L_SAVE: s_mov_b32 s_save_tmp, 0 //clear saveCtx bit s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp //clear saveCtx bit - s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_RCNT_SHIFT, SQ_WAVE_IB_STS_RCNT_SIZE) //save RCNT - s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_RCNT_SHIFT - s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp - s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT, SQ_WAVE_IB_STS_FIRST_REPLAY_SIZE) //save FIRST_REPLAY - s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT - s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp - s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS) //clear RCNT and FIRST_REPLAY in IB_STS - s_and_b32 s_save_tmp, s_save_tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG - - s_setreg_b32 hwreg(HW_REG_IB_STS), s_save_tmp + save_and_clear_ib_sts(s_save_tmp) /* inform SPI the readiness and wait for SPI's go signal */ s_mov_b32 s_save_exec_lo, exec_lo //save EXEC and use EXEC for the go signal from SPI @@ -360,12 +363,6 @@ L_SAVE: s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE s_mov_b32 s_save_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) although not neccessarily inited s_mov_b32 s_save_buf_rsrc3, S_SAVE_BUF_RSRC_WORD3_MISC - s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_ATC_MASK - s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT) //get ATC bit into position - s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or ATC - s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_MTYPE_MASK - s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT) //get MTYPE bits into position - s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or MTYPE //FIXME right now s_save_m0/s_save_mem_offset use tma_lo/tma_hi (might need to save them before using them?) s_mov_b32 s_save_m0, m0 //save M0 @@ -690,12 +687,6 @@ L_RESTORE: s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE s_mov_b32 s_restore_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) s_mov_b32 s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC - s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_ATC_MASK - s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT) //get ATC bit into position - s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or ATC - s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_MTYPE_MASK - s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT) //get MTYPE bits into position - s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or MTYPE /* global mem offset */ // s_mov_b32 s_restore_mem_offset, 0x0 //mem offset initial value = 0 @@ -889,19 +880,7 @@ L_RESTORE: s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x74 glc:1 s_waitcnt lgkmcnt(0) - //reuse s_restore_m0 as a temp register - s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_RCNT_MASK - s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_RCNT_SHIFT - s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_RCNT_SHIFT - s_mov_b32 s_restore_tmp, 0x0 //IB_STS is zero - s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0 - s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_FIRST_REPLAY_MASK - s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT - s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT - s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0 - s_and_b32 s_restore_m0, s_restore_status, SQ_WAVE_STATUS_INST_ATC_MASK - s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_STATUS_INST_ATC_SHIFT - s_setreg_b32 hwreg(HW_REG_IB_STS), s_restore_tmp + restore_ib_sts(s_restore_tmp) s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 @@ -910,8 +889,7 @@ L_RESTORE: s_barrier //barrier to ensure the readiness of LDS before access attempts from any other wave in the same TG //FIXME not performance-optimal at this time -// s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution - s_rfe_restore_b64 s_restore_pc_lo, s_restore_m0 // s_restore_m0[0] is used to set STATUS.inst_atc + s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution /**************************************************************************/ @@ -1078,3 +1056,19 @@ function set_status_without_spi_prio(status, tmp) s_nop 0x2 // avoid S_SETREG => S_SETREG hazard s_setreg_b32 hwreg(HW_REG_STATUS, SQ_WAVE_STATUS_PRE_SPI_PRIO_SHIFT, SQ_WAVE_STATUS_PRE_SPI_PRIO_SIZE), status end + +function save_and_clear_ib_sts(tmp) + // Save IB_STS.FIRST_REPLAY[15] and IB_STS.RCNT[20:16] into unused space ttmp11[31:26]. + s_getreg_b32 tmp, hwreg(HW_REG_IB_STS) + s_and_b32 tmp, tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK + s_lshl_b32 tmp, tmp, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT) + s_andn2_b32 ttmp11, ttmp11, TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK + s_or_b32 ttmp11, ttmp11, tmp + s_setreg_imm32_b32 hwreg(HW_REG_IB_STS), 0x0 +end + +function restore_ib_sts(tmp) + s_lshr_b32 tmp, ttmp11, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT) + s_and_b32 tmp, tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK + s_setreg_b32 hwreg(HW_REG_IB_STS), tmp +end -- cgit v1.2.3 From 6a8170383c7acdf6fb8da1f3774fa2bc9191d628 Mon Sep 17 00:00:00 2001 From: Jay Cornwall Date: Thu, 30 Dec 2021 21:32:06 +0800 Subject: drm/amdkfd: Add gfx11 trap handler Based on gfx10 with following changes: - GPR_ALLOC.VGPR_SIZE field moved (and size corrected in gfx10) - s_sendmsg_rtn_b64 replaces some s_sendmsg/s_getreg - Buffer instructions no longer have direct-to-LDS modifier Signed-off-by: Jay Cornwall Reviewed-by: Laurent Morichetti Signed-off-by: Eric Huang Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h | 463 ++++++++++++++++++++- .../gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm | 69 ++- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 6 +- 3 files changed, 507 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h index 8cbdc7f519c6..60a81649cf12 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h @@ -776,7 +776,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = { 0xe0704100, 0x705d0100, 0xe0704200, 0x705d0200, 0xe0704300, 0x705d0300, - 0xb9702a05, 0x80708170, + 0xb9703a05, 0x80708170, 0xbf0d9973, 0xbf850002, 0x8f708970, 0xbf820001, 0x8f708a70, 0xb97a1e06, @@ -855,7 +855,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = { 0x877aff6d, 0x80000000, 0xbf840040, 0x8f7b867b, 0x8f7b827b, 0xbef6037b, - 0xb9702a05, 0x80708170, + 0xb9703a05, 0x80708170, 0xbf0d9973, 0xbf850002, 0x8f708970, 0xbf820001, 0x8f708a70, 0xb97a1e06, @@ -891,7 +891,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = { 0xbef003ff, 0x00000200, 0xbeff0380, 0xbf820003, 0xbef003ff, 0x00000400, - 0xbeff03c1, 0xb97b2a05, + 0xbeff03c1, 0xb97b3a05, 0x807b817b, 0x8f7b827b, 0x907c9973, 0x877c817c, 0xbf06817c, 0xbf850017, @@ -939,7 +939,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = { 0xb96f4306, 0x876fc16f, 0xbf840029, 0x8f6f866f, 0x8f6f826f, 0xbef6036f, - 0xb9782a05, 0x80788178, + 0xb9783a05, 0x80788178, 0xbf0d9972, 0xbf850002, 0x8f788978, 0xbf820001, 0x8f788a78, 0xb96e1e06, @@ -962,7 +962,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = { 0x907c9972, 0x877c817c, 0xbf06817c, 0xbf850002, 0xbeff0380, 0xbf820001, - 0xbeff03c1, 0xb96f2a05, + 0xbeff03c1, 0xb96f3a05, 0x806f816f, 0x8f6f826f, 0x907c9972, 0x877c817c, 0xbf06817c, 0xbf850024, @@ -1010,7 +1010,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = { 0x6e5d0100, 0xe0304200, 0x6e5d0200, 0xe0304300, 0x6e5d0300, 0xbf8c3f70, - 0xb9782a05, 0x80788178, + 0xb9783a05, 0x80788178, 0xbf0d9972, 0xbf850002, 0x8f788978, 0xbf820001, 0x8f788a78, 0xb96e1e06, @@ -1037,7 +1037,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = { 0xbe8c310c, 0xbe8e310e, 0xbf06807c, 0xbf84fff0, 0xba80f801, 0x00000000, - 0xbf8a0000, 0xb9782a05, + 0xbf8a0000, 0xb9783a05, 0x80788178, 0xbf0d9972, 0xbf850002, 0x8f788978, 0xbf820001, 0x8f788a78, @@ -2261,7 +2261,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { 0xbf8a0000, 0x877aff6d, 0x80000000, 0xbf840040, 0x8f7b867b, 0x8f7b827b, - 0xbef6037b, 0xb9702a05, + 0xbef6037b, 0xb9703a05, 0x80708170, 0xbf0d9973, 0xbf850002, 0x8f708970, 0xbf820001, 0x8f708a70, @@ -2298,7 +2298,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { 0x00000200, 0xbeff0380, 0xbf820003, 0xbef003ff, 0x00000400, 0xbeff03c1, - 0xb97b2a05, 0x807b817b, + 0xb97b3a05, 0x807b817b, 0x8f7b827b, 0x907c9973, 0x877c817c, 0xbf06817c, 0xbf850017, 0xbef603ff, @@ -2345,7 +2345,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { 0xbeff03c1, 0xb96f4306, 0x876fc16f, 0xbf840029, 0x8f6f866f, 0x8f6f826f, - 0xbef6036f, 0xb9782a05, + 0xbef6036f, 0xb9783a05, 0x80788178, 0xbf0d9972, 0xbf850002, 0x8f788978, 0xbf820001, 0x8f788a78, @@ -2369,7 +2369,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { 0x877c817c, 0xbf06817c, 0xbf850002, 0xbeff0380, 0xbf820001, 0xbeff03c1, - 0xb96f2a05, 0x806f816f, + 0xb96f3a05, 0x806f816f, 0x8f6f826f, 0x907c9972, 0x877c817c, 0xbf06817c, 0xbf850024, 0xbef603ff, @@ -2416,7 +2416,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { 0xe0304100, 0x6e5d0100, 0xe0304200, 0x6e5d0200, 0xe0304300, 0x6e5d0300, - 0xbf8c3f70, 0xb9782a05, + 0xbf8c3f70, 0xb9783a05, 0x80788178, 0xbf0d9972, 0xbf850002, 0x8f788978, 0xbf820001, 0x8f788a78, @@ -2444,7 +2444,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { 0xbe8e310e, 0xbf06807c, 0xbf84fff0, 0xba80f801, 0x00000000, 0xbf8a0000, - 0xb9782a05, 0x80788178, + 0xb9783a05, 0x80788178, 0xbf0d9972, 0xbf850002, 0x8f788978, 0xbf820001, 0x8f788a78, 0xb96e1e06, @@ -2494,3 +2494,440 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0x00000000, }; + +static const uint32_t cwsr_trap_gfx11_hex[] = { + 0xbfa00001, 0xbfa0021b, + 0xb0804006, 0xb8f8f802, + 0x91788678, 0xb8fbf803, + 0x8b6eff78, 0x00002000, + 0xbfa10009, 0x8b6eff6d, + 0x00ff0000, 0xbfa2001e, + 0x8b6eff7b, 0x00000400, + 0xbfa20041, 0xbf830010, + 0xb8fbf803, 0xbfa0fffa, + 0x8b6eff7b, 0x00000900, + 0xbfa20015, 0x8b6eff7b, + 0x000071ff, 0xbfa10008, + 0x8b6fff7b, 0x00007080, + 0xbfa10001, 0xbeee1287, + 0xb8eff801, 0x846e8c6e, + 0x8b6e6f6e, 0xbfa2000a, + 0x8b6eff6d, 0x00ff0000, + 0xbfa20007, 0xb8eef801, + 0x8b6eff6e, 0x00000800, + 0xbfa20003, 0x8b6eff7b, + 0x00000400, 0xbfa20026, + 0xbefa4d82, 0xbf89fc07, + 0x84fa887a, 0xf4005bbd, + 0xf8000010, 0xbf89fc07, + 0x846e976e, 0x9177ff77, + 0x00800000, 0x8c776e77, + 0xf4045bbd, 0xf8000000, + 0xbf89fc07, 0xf4045ebd, + 0xf8000008, 0xbf89fc07, + 0x8bee6e6e, 0xbfa10001, + 0xbe80486e, 0x8b6eff6d, + 0x01ff0000, 0xbfa20005, + 0x8c78ff78, 0x00002000, + 0x80ec886c, 0x82ed806d, + 0xbfa00005, 0x8b6eff6d, + 0x01000000, 0xbfa20002, + 0x806c846c, 0x826d806d, + 0x8b6dff6d, 0x0000ffff, + 0x8bfe7e7e, 0x8bea6a6a, + 0xb978f802, 0xbe804a6c, + 0x8b6dff6d, 0x0000ffff, + 0xbefa0080, 0xb97a0283, + 0xbeee007e, 0xbeef007f, + 0xbefe0180, 0xbefe4d84, + 0xbf89fc07, 0x8b7aff7f, + 0x04000000, 0x847a857a, + 0x8c6d7a6d, 0xbefa007e, + 0x8b7bff7f, 0x0000ffff, + 0xbefe00c1, 0xbeff00c1, + 0xdca6c000, 0x007a0000, + 0x7e000280, 0xbefe007a, + 0xbeff007b, 0xb8fb02dc, + 0x847b997b, 0xb8fa3b05, + 0x807a817a, 0xbf0d997b, + 0xbfa20002, 0x847a897a, + 0xbfa00001, 0x847a8a7a, + 0xb8fb1e06, 0x847b8a7b, + 0x807a7b7a, 0x8b7bff7f, + 0x0000ffff, 0x807aff7a, + 0x00000200, 0x807a7e7a, + 0x827b807b, 0xd7610000, + 0x00010870, 0xd7610000, + 0x00010a71, 0xd7610000, + 0x00010c72, 0xd7610000, + 0x00010e73, 0xd7610000, + 0x00011074, 0xd7610000, + 0x00011275, 0xd7610000, + 0x00011476, 0xd7610000, + 0x00011677, 0xd7610000, + 0x00011a79, 0xd7610000, + 0x00011c7e, 0xd7610000, + 0x00011e7f, 0xbefe00ff, + 0x00003fff, 0xbeff0080, + 0xdca6c040, 0x007a0000, + 0xd760007a, 0x00011d00, + 0xd760007b, 0x00011f00, + 0xbefe007a, 0xbeff007b, + 0xbef4007e, 0x8b75ff7f, + 0x0000ffff, 0x8c75ff75, + 0x00040000, 0xbef60080, + 0xbef700ff, 0x10807fac, + 0xbef1007d, 0xbef00080, + 0xb8f302dc, 0x84739973, + 0xbefe00c1, 0x857d9973, + 0x8b7d817d, 0xbf06817d, + 0xbfa20002, 0xbeff0080, + 0xbfa00002, 0xbeff00c1, + 0xbfa00009, 0xbef600ff, + 0x01000000, 0xe0685080, + 0x701d0100, 0xe0685100, + 0x701d0200, 0xe0685180, + 0x701d0300, 0xbfa00008, + 0xbef600ff, 0x01000000, + 0xe0685100, 0x701d0100, + 0xe0685200, 0x701d0200, + 0xe0685300, 0x701d0300, + 0xb8f03b05, 0x80708170, + 0xbf0d9973, 0xbfa20002, + 0x84708970, 0xbfa00001, + 0x84708a70, 0xb8fa1e06, + 0x847a8a7a, 0x80707a70, + 0x8070ff70, 0x00000200, + 0xbef600ff, 0x01000000, + 0x7e000280, 0x7e020280, + 0x7e040280, 0xbefd0080, + 0xd7610002, 0x0000fa71, + 0x807d817d, 0xd7610002, + 0x0000fa6c, 0x807d817d, + 0x917aff6d, 0x80000000, + 0xd7610002, 0x0000fa7a, + 0x807d817d, 0xd7610002, + 0x0000fa6e, 0x807d817d, + 0xd7610002, 0x0000fa6f, + 0x807d817d, 0xd7610002, + 0x0000fa78, 0x807d817d, + 0xb8faf803, 0xd7610002, + 0x0000fa7a, 0x807d817d, + 0xd7610002, 0x0000fa7b, + 0x807d817d, 0xb8f1f801, + 0xd7610002, 0x0000fa71, + 0x807d817d, 0xb8f1f814, + 0xd7610002, 0x0000fa71, + 0x807d817d, 0xb8f1f815, + 0xd7610002, 0x0000fa71, + 0x807d817d, 0xbefe00ff, + 0x0000ffff, 0xbeff0080, + 0xe0685000, 0x701d0200, + 0xbefe00c1, 0xb8f03b05, + 0x80708170, 0xbf0d9973, + 0xbfa20002, 0x84708970, + 0xbfa00001, 0x84708a70, + 0xb8fa1e06, 0x847a8a7a, + 0x80707a70, 0xbef600ff, + 0x01000000, 0xbef90080, + 0xbefd0080, 0xbf800000, + 0xbe804100, 0xbe824102, + 0xbe844104, 0xbe864106, + 0xbe884108, 0xbe8a410a, + 0xbe8c410c, 0xbe8e410e, + 0xd7610002, 0x0000f200, + 0x80798179, 0xd7610002, + 0x0000f201, 0x80798179, + 0xd7610002, 0x0000f202, + 0x80798179, 0xd7610002, + 0x0000f203, 0x80798179, + 0xd7610002, 0x0000f204, + 0x80798179, 0xd7610002, + 0x0000f205, 0x80798179, + 0xd7610002, 0x0000f206, + 0x80798179, 0xd7610002, + 0x0000f207, 0x80798179, + 0xd7610002, 0x0000f208, + 0x80798179, 0xd7610002, + 0x0000f209, 0x80798179, + 0xd7610002, 0x0000f20a, + 0x80798179, 0xd7610002, + 0x0000f20b, 0x80798179, + 0xd7610002, 0x0000f20c, + 0x80798179, 0xd7610002, + 0x0000f20d, 0x80798179, + 0xd7610002, 0x0000f20e, + 0x80798179, 0xd7610002, + 0x0000f20f, 0x80798179, + 0xbf06a079, 0xbfa10006, + 0xe0685000, 0x701d0200, + 0x8070ff70, 0x00000080, + 0xbef90080, 0x7e040280, + 0x807d907d, 0xbf0aff7d, + 0x00000060, 0xbfa2ffbc, + 0xbe804100, 0xbe824102, + 0xbe844104, 0xbe864106, + 0xbe884108, 0xbe8a410a, + 0xd7610002, 0x0000f200, + 0x80798179, 0xd7610002, + 0x0000f201, 0x80798179, + 0xd7610002, 0x0000f202, + 0x80798179, 0xd7610002, + 0x0000f203, 0x80798179, + 0xd7610002, 0x0000f204, + 0x80798179, 0xd7610002, + 0x0000f205, 0x80798179, + 0xd7610002, 0x0000f206, + 0x80798179, 0xd7610002, + 0x0000f207, 0x80798179, + 0xd7610002, 0x0000f208, + 0x80798179, 0xd7610002, + 0x0000f209, 0x80798179, + 0xd7610002, 0x0000f20a, + 0x80798179, 0xd7610002, + 0x0000f20b, 0x80798179, + 0xe0685000, 0x701d0200, + 0xbefe00c1, 0x857d9973, + 0x8b7d817d, 0xbf06817d, + 0xbfa20002, 0xbeff0080, + 0xbfa00001, 0xbeff00c1, + 0xb8fb4306, 0x8b7bc17b, + 0xbfa10044, 0xbfbd0000, + 0x8b7aff6d, 0x80000000, + 0xbfa10040, 0x847b867b, + 0x847b827b, 0xbef6007b, + 0xb8f03b05, 0x80708170, + 0xbf0d9973, 0xbfa20002, + 0x84708970, 0xbfa00001, + 0x84708a70, 0xb8fa1e06, + 0x847a8a7a, 0x80707a70, + 0x8070ff70, 0x00000200, + 0x8070ff70, 0x00000080, + 0xbef600ff, 0x01000000, + 0xd71f0000, 0x000100c1, + 0xd7200000, 0x000200c1, + 0x16000084, 0x857d9973, + 0x8b7d817d, 0xbf06817d, + 0xbefd0080, 0xbfa20012, + 0xbe8300ff, 0x00000080, + 0xbf800000, 0xbf800000, + 0xbf800000, 0xd8d80000, + 0x01000000, 0xbf890000, + 0xe0685000, 0x701d0100, + 0x807d037d, 0x80700370, + 0xd5250000, 0x0001ff00, + 0x00000080, 0xbf0a7b7d, + 0xbfa2fff4, 0xbfa00011, + 0xbe8300ff, 0x00000100, + 0xbf800000, 0xbf800000, + 0xbf800000, 0xd8d80000, + 0x01000000, 0xbf890000, + 0xe0685000, 0x701d0100, + 0x807d037d, 0x80700370, + 0xd5250000, 0x0001ff00, + 0x00000100, 0xbf0a7b7d, + 0xbfa2fff4, 0xbefe00c1, + 0x857d9973, 0x8b7d817d, + 0xbf06817d, 0xbfa20004, + 0xbef000ff, 0x00000200, + 0xbeff0080, 0xbfa00003, + 0xbef000ff, 0x00000400, + 0xbeff00c1, 0xb8fb3b05, + 0x807b817b, 0x847b827b, + 0x857d9973, 0x8b7d817d, + 0xbf06817d, 0xbfa20017, + 0xbef600ff, 0x01000000, + 0xbefd0084, 0xbf0a7b7d, + 0xbfa10037, 0x7e008700, + 0x7e028701, 0x7e048702, + 0x7e068703, 0xe0685000, + 0x701d0000, 0xe0685080, + 0x701d0100, 0xe0685100, + 0x701d0200, 0xe0685180, + 0x701d0300, 0x807d847d, + 0x8070ff70, 0x00000200, + 0xbf0a7b7d, 0xbfa2ffef, + 0xbfa00025, 0xbef600ff, + 0x01000000, 0xbefd0084, + 0xbf0a7b7d, 0xbfa10011, + 0x7e008700, 0x7e028701, + 0x7e048702, 0x7e068703, + 0xe0685000, 0x701d0000, + 0xe0685100, 0x701d0100, + 0xe0685200, 0x701d0200, + 0xe0685300, 0x701d0300, + 0x807d847d, 0x8070ff70, + 0x00000400, 0xbf0a7b7d, + 0xbfa2ffef, 0xb8fb1e06, + 0x8b7bc17b, 0xbfa1000c, + 0x847b837b, 0x807b7d7b, + 0xbefe00c1, 0xbeff0080, + 0x7e008700, 0xe0685000, + 0x701d0000, 0x807d817d, + 0x8070ff70, 0x00000080, + 0xbf0a7b7d, 0xbfa2fff8, + 0xbfa00141, 0xbef4007e, + 0x8b75ff7f, 0x0000ffff, + 0x8c75ff75, 0x00040000, + 0xbef60080, 0xbef700ff, + 0x10807fac, 0xb8f202dc, + 0x84729972, 0x8b6eff7f, + 0x04000000, 0xbfa1003a, + 0xbefe00c1, 0x857d9972, + 0x8b7d817d, 0xbf06817d, + 0xbfa20002, 0xbeff0080, + 0xbfa00001, 0xbeff00c1, + 0xb8ef4306, 0x8b6fc16f, + 0xbfa1002f, 0x846f866f, + 0x846f826f, 0xbef6006f, + 0xb8f83b05, 0x80788178, + 0xbf0d9972, 0xbfa20002, + 0x84788978, 0xbfa00001, + 0x84788a78, 0xb8ee1e06, + 0x846e8a6e, 0x80786e78, + 0x8078ff78, 0x00000200, + 0x8078ff78, 0x00000080, + 0xbef600ff, 0x01000000, + 0x857d9972, 0x8b7d817d, + 0xbf06817d, 0xbefd0080, + 0xbfa2000c, 0xe0500000, + 0x781d0000, 0xbf8903f7, + 0xdac00000, 0x00000000, + 0x807dff7d, 0x00000080, + 0x8078ff78, 0x00000080, + 0xbf0a6f7d, 0xbfa2fff5, + 0xbfa0000b, 0xe0500000, + 0x781d0000, 0xbf8903f7, + 0xdac00000, 0x00000000, + 0x807dff7d, 0x00000100, + 0x8078ff78, 0x00000100, + 0xbf0a6f7d, 0xbfa2fff5, + 0xbef80080, 0xbefe00c1, + 0x857d9972, 0x8b7d817d, + 0xbf06817d, 0xbfa20002, + 0xbeff0080, 0xbfa00001, + 0xbeff00c1, 0xb8ef3b05, + 0x806f816f, 0x846f826f, + 0x857d9972, 0x8b7d817d, + 0xbf06817d, 0xbfa20024, + 0xbef600ff, 0x01000000, + 0xbeee0078, 0x8078ff78, + 0x00000200, 0xbefd0084, + 0xbf0a6f7d, 0xbfa10050, + 0xe0505000, 0x781d0000, + 0xe0505080, 0x781d0100, + 0xe0505100, 0x781d0200, + 0xe0505180, 0x781d0300, + 0xbf8903f7, 0x7e008500, + 0x7e028501, 0x7e048502, + 0x7e068503, 0x807d847d, + 0x8078ff78, 0x00000200, + 0xbf0a6f7d, 0xbfa2ffee, + 0xe0505000, 0x6e1d0000, + 0xe0505080, 0x6e1d0100, + 0xe0505100, 0x6e1d0200, + 0xe0505180, 0x6e1d0300, + 0xbf8903f7, 0xbfa00034, + 0xbef600ff, 0x01000000, + 0xbeee0078, 0x8078ff78, + 0x00000400, 0xbefd0084, + 0xbf0a6f7d, 0xbfa10012, + 0xe0505000, 0x781d0000, + 0xe0505100, 0x781d0100, + 0xe0505200, 0x781d0200, + 0xe0505300, 0x781d0300, + 0xbf8903f7, 0x7e008500, + 0x7e028501, 0x7e048502, + 0x7e068503, 0x807d847d, + 0x8078ff78, 0x00000400, + 0xbf0a6f7d, 0xbfa2ffee, + 0xb8ef1e06, 0x8b6fc16f, + 0xbfa1000e, 0x846f836f, + 0x806f7d6f, 0xbefe00c1, + 0xbeff0080, 0xe0505000, + 0x781d0000, 0xbf8903f7, + 0x7e008500, 0x807d817d, + 0x8078ff78, 0x00000080, + 0xbf0a6f7d, 0xbfa2fff7, + 0xbeff00c1, 0xe0505000, + 0x6e1d0000, 0xe0505100, + 0x6e1d0100, 0xe0505200, + 0x6e1d0200, 0xe0505300, + 0x6e1d0300, 0xbf8903f7, + 0xb8f83b05, 0x80788178, + 0xbf0d9972, 0xbfa20002, + 0x84788978, 0xbfa00001, + 0x84788a78, 0xb8ee1e06, + 0x846e8a6e, 0x80786e78, + 0x8078ff78, 0x00000200, + 0x80f8ff78, 0x00000050, + 0xbef600ff, 0x01000000, + 0xbefd00ff, 0x0000006c, + 0x80f89078, 0xf428403a, + 0xf0000000, 0xbf89fc07, + 0x80fd847d, 0xbf800000, + 0xbe804300, 0xbe824302, + 0x80f8a078, 0xf42c403a, + 0xf0000000, 0xbf89fc07, + 0x80fd887d, 0xbf800000, + 0xbe804300, 0xbe824302, + 0xbe844304, 0xbe864306, + 0x80f8c078, 0xf430403a, + 0xf0000000, 0xbf89fc07, + 0x80fd907d, 0xbf800000, + 0xbe804300, 0xbe824302, + 0xbe844304, 0xbe864306, + 0xbe884308, 0xbe8a430a, + 0xbe8c430c, 0xbe8e430e, + 0xbf06807d, 0xbfa1fff0, + 0xb980f801, 0x00000000, + 0xbfbd0000, 0xb8f83b05, + 0x80788178, 0xbf0d9972, + 0xbfa20002, 0x84788978, + 0xbfa00001, 0x84788a78, + 0xb8ee1e06, 0x846e8a6e, + 0x80786e78, 0x8078ff78, + 0x00000200, 0xbef600ff, + 0x01000000, 0xf4205bfa, + 0xf0000000, 0x80788478, + 0xf4205b3a, 0xf0000000, + 0x80788478, 0xf4205b7a, + 0xf0000000, 0x80788478, + 0xf4205c3a, 0xf0000000, + 0x80788478, 0xf4205c7a, + 0xf0000000, 0x80788478, + 0xf4205eba, 0xf0000000, + 0x80788478, 0xf4205efa, + 0xf0000000, 0x80788478, + 0xf4205e7a, 0xf0000000, + 0x80788478, 0xf4205cfa, + 0xf0000000, 0x80788478, + 0xf4205bba, 0xf0000000, + 0x80788478, 0xbf89fc07, + 0xb96ef814, 0xf4205bba, + 0xf0000000, 0x80788478, + 0xbf89fc07, 0xb96ef815, + 0xbefd006f, 0xbefe0070, + 0xbeff0071, 0x8b6f7bff, + 0x000003ff, 0xb96f4803, + 0x8b6f7bff, 0xfffff800, + 0x856f8b6f, 0xb96fa2c3, + 0xb973f801, 0xb8ee3b05, + 0x806e816e, 0xbf0d9972, + 0xbfa20002, 0x846e896e, + 0xbfa00001, 0x846e8a6e, + 0xb8ef1e06, 0x846f8a6f, + 0x806e6f6e, 0x806eff6e, + 0x00000200, 0x806e746e, + 0x826f8075, 0x8b6fff6f, + 0x0000ffff, 0xf4085c37, + 0xf8000050, 0xf4085d37, + 0xf8000060, 0xf4005e77, + 0xf8000074, 0xbf89fc07, + 0x8b6dff6d, 0x0000ffff, + 0x8bfe7e7e, 0x8bea6a6a, + 0xb97af802, 0xbe804a6c, + 0xbfb00000, 0xbf9f0000, + 0xbf9f0000, 0xbf9f0000, + 0xbf9f0000, 0xbf9f0000, +}; diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm index 0348191e8592..250ab007399b 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm @@ -23,15 +23,26 @@ /* To compile this assembly code: * * Navi1x: - * cpp -DASIC_TARGET_NAVI1X=1 cwsr_trap_handler_gfx10.asm -P -o nv1x.sp3 - * sp3-nv1x nv1x.sp3 -hex nv1x.hex + * cpp -DASIC_FAMILY=CHIP_NAVI10 cwsr_trap_handler_gfx10.asm -P -o nv1x.sp3 + * sp3 nv1x.sp3 -hex nv1x.hex * - * Others: - * cpp -DASIC_TARGET_NAVI1X=0 cwsr_trap_handler_gfx10.asm -P -o gfx10.sp3 - * sp3-gfx10 gfx10.sp3 -hex gfx10.hex + * gfx10: + * cpp -DASIC_FAMILY=CHIP_SIENNA_CICHLID cwsr_trap_handler_gfx10.asm -P -o gfx10.sp3 + * sp3 gfx10.sp3 -hex gfx10.hex + * + * gfx11: + * cpp -DASIC_FAMILY=CHIP_PLUM_BONITO cwsr_trap_handler_gfx10.asm -P -o gfx11.sp3 + * sp3 gfx11.sp3 -hex gfx11.hex */ -#define NO_SQC_STORE !ASIC_TARGET_NAVI1X +#define CHIP_NAVI10 26 +#define CHIP_SIENNA_CICHLID 30 +#define CHIP_PLUM_BONITO 36 + +#define NO_SQC_STORE (ASIC_FAMILY >= CHIP_SIENNA_CICHLID) +#define HAVE_XNACK (ASIC_FAMILY < CHIP_SIENNA_CICHLID) +#define HAVE_SENDMSG_RTN (ASIC_FAMILY >= CHIP_PLUM_BONITO) +#define HAVE_BUFFER_LDS_LOAD (ASIC_FAMILY < CHIP_PLUM_BONITO) var SINGLE_STEP_MISSED_WORKAROUND = 1 //workaround for lost MODE.DEBUG_EN exception when SAVECTX raised @@ -41,15 +52,18 @@ var SQ_WAVE_STATUS_ECC_ERR_MASK = 0x20000 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9 -var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 8 -var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE = 6 -var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT = 24 -var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE = 4 +var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE = 8 var SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT = 24 var SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE = 4 var SQ_WAVE_IB_STS2_WAVE64_SHIFT = 11 var SQ_WAVE_IB_STS2_WAVE64_SIZE = 1 +#if ASIC_FAMILY < CHIP_PLUM_BONITO +var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 8 +#else +var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 12 +#endif + var SQ_WAVE_TRAPSTS_SAVECTX_MASK = 0x400 var SQ_WAVE_TRAPSTS_EXCP_MASK = 0x1FF var SQ_WAVE_TRAPSTS_SAVECTX_SHIFT = 10 @@ -231,15 +245,20 @@ end s_cbranch_scc1 L_SAVE L_FETCH_2ND_TRAP: -#if ASIC_TARGET_NAVI1X +#if HAVE_XNACK save_and_clear_ib_sts(ttmp14, ttmp15) #endif // Read second-level TBA/TMA from first-level TMA and jump if available. // ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data) // ttmp12 holds SQ_WAVE_STATUS +#if HAVE_SENDMSG_RTN + s_sendmsg_rtn_b64 [ttmp14, ttmp15], sendmsg(MSG_RTN_GET_TMA) + s_waitcnt lgkmcnt(0) +#else s_getreg_b32 ttmp14, hwreg(HW_REG_SHADER_TMA_LO) s_getreg_b32 ttmp15, hwreg(HW_REG_SHADER_TMA_HI) +#endif s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 s_load_dword ttmp2, [ttmp14, ttmp15], 0x10 glc:1 // debug trap enabled flag @@ -282,7 +301,7 @@ L_TRAP_CASE: L_EXIT_TRAP: s_and_b32 ttmp1, ttmp1, 0xFFFF -#if ASIC_TARGET_NAVI1X +#if HAVE_XNACK restore_ib_sts(ttmp14, ttmp15) #endif @@ -298,7 +317,7 @@ L_SAVE: s_mov_b32 s_save_tmp, 0 s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp //clear saveCtx bit -#if ASIC_TARGET_NAVI1X +#if HAVE_XNACK save_and_clear_ib_sts(s_save_tmp, s_save_trapsts) #endif @@ -307,9 +326,13 @@ L_SAVE: s_mov_b32 s_save_exec_hi, exec_hi s_mov_b64 exec, 0x0 //clear EXEC to get ready to receive +#if HAVE_SENDMSG_RTN + s_sendmsg_rtn_b64 [exec_lo, exec_hi], sendmsg(MSG_RTN_SAVE_WAVE) +#else s_sendmsg sendmsg(MSG_SAVEWAVE) //send SPI a message and wait for SPI's write to EXEC +#endif -#if ASIC_TARGET_NAVI1X +#if ASIC_FAMILY < CHIP_SIENNA_CICHLID L_SLEEP: // sleep 1 (64clk) is not enough for 8 waves per SIMD, which will cause // SQ hang, since the 7,8th wave could not get arbit to exec inst, while @@ -389,7 +412,7 @@ L_SLEEP: s_mov_b32 s_save_mem_offset, 0x0 get_wave_size(s_wave_size) -#if ASIC_TARGET_NAVI1X +#if HAVE_XNACK // Save and clear vector XNACK state late to free up SGPRs. s_getreg_b32 s_save_xnack_mask, hwreg(HW_REG_SHADER_XNACK_MASK) s_setreg_imm32_b32 hwreg(HW_REG_SHADER_XNACK_MASK), 0x0 @@ -777,7 +800,13 @@ L_RESTORE_LDS_NORMAL: s_cbranch_scc1 L_RESTORE_LDS_LOOP_W64 L_RESTORE_LDS_LOOP_W32: +#if HAVE_BUFFER_LDS_LOAD buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 // first 64DW +#else + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset + s_waitcnt vmcnt(0) + ds_store_addtid_b32 v0 +#endif s_add_u32 m0, m0, 128 // 128 DW s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128 //mem offset increased by 128DW s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0 @@ -785,7 +814,13 @@ L_RESTORE_LDS_LOOP_W32: s_branch L_RESTORE_VGPR L_RESTORE_LDS_LOOP_W64: +#if HAVE_BUFFER_LDS_LOAD buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 // first 64DW +#else + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset + s_waitcnt vmcnt(0) + ds_store_addtid_b32 v0 +#endif s_add_u32 m0, m0, 256 // 256 DW s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256 //mem offset increased by 256DW s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0 @@ -996,7 +1031,7 @@ L_RESTORE_HWREG: s_and_b32 s_restore_m0, SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK, s_restore_trapsts s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE), s_restore_m0 -#if ASIC_TARGET_NAVI1X +#if HAVE_XNACK s_setreg_b32 hwreg(HW_REG_SHADER_XNACK_MASK), s_restore_xnack_mask #endif @@ -1019,7 +1054,7 @@ L_RESTORE_HWREG: s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x74 glc:1 s_waitcnt lgkmcnt(0) -#if ASIC_TARGET_NAVI1X +#if HAVE_XNACK restore_ib_sts(s_restore_tmp, s_restore_m0) #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index f1a225a20719..8667e3df2d0b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -441,10 +441,14 @@ static void kfd_cwsr_init(struct kfd_dev *kfd) BUILD_BUG_ON(sizeof(cwsr_trap_nv1x_hex) > PAGE_SIZE); kfd->cwsr_isa = cwsr_trap_nv1x_hex; kfd->cwsr_isa_size = sizeof(cwsr_trap_nv1x_hex); - } else { + } else if (KFD_GC_VERSION(kfd) < IP_VERSION(11, 0, 0)) { BUILD_BUG_ON(sizeof(cwsr_trap_gfx10_hex) > PAGE_SIZE); kfd->cwsr_isa = cwsr_trap_gfx10_hex; kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx10_hex); + } else { + BUILD_BUG_ON(sizeof(cwsr_trap_gfx11_hex) > PAGE_SIZE); + kfd->cwsr_isa = cwsr_trap_gfx11_hex; + kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx11_hex); } kfd->cwsr_enabled = true; -- cgit v1.2.3 From 396beb91a9eb86cbfa404e4220cca8f3ada70777 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Wed, 6 Apr 2022 14:14:50 +0800 Subject: drm/amd/pm: correct the metrics version for SMU 11.0.11/12/13 Correct the metrics version used for SMU 11.0.11/12/13. Fixes misreported GPU metrics (e.g., fan speed, etc.) depending on which version of SMU firmware is loaded. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1925 Signed-off-by: Evan Quan Signed-off-by: Alex Deucher --- .../drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 57 +++++++++++++++++----- 1 file changed, 44 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index d68be8f8850e..78f3d9e722bb 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -697,12 +697,28 @@ static int sienna_cichlid_get_smu_metrics_data(struct smu_context *smu, uint32_t apu_percent = 0; uint32_t dgpu_percent = 0; - if ((smu->adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7)) && - (smu->smc_fw_version >= 0x3A4900)) - use_metrics_v3 = true; - else if ((smu->adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7)) && - (smu->smc_fw_version >= 0x3A4300)) - use_metrics_v2 = true; + switch (smu->adev->ip_versions[MP1_HWIP][0]) { + case IP_VERSION(11, 0, 7): + if (smu->smc_fw_version >= 0x3A4900) + use_metrics_v3 = true; + else if (smu->smc_fw_version >= 0x3A4300) + use_metrics_v2 = true; + break; + case IP_VERSION(11, 0, 11): + if (smu->smc_fw_version >= 0x412D00) + use_metrics_v2 = true; + break; + case IP_VERSION(11, 0, 12): + if (smu->smc_fw_version >= 0x3B2300) + use_metrics_v2 = true; + break; + case IP_VERSION(11, 0, 13): + if (smu->smc_fw_version >= 0x491100) + use_metrics_v2 = true; + break; + default: + break; + } ret = smu_cmn_get_metrics_table(smu, NULL, @@ -3833,13 +3849,28 @@ static ssize_t sienna_cichlid_get_gpu_metrics(struct smu_context *smu, uint16_t average_gfx_activity; int ret = 0; - if ((adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7)) && - (smu->smc_fw_version >= 0x3A4900)) - use_metrics_v3 = true; - else if ((adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7)) && - (smu->smc_fw_version >= 0x3A4300)) - use_metrics_v2 = true; - + switch (smu->adev->ip_versions[MP1_HWIP][0]) { + case IP_VERSION(11, 0, 7): + if (smu->smc_fw_version >= 0x3A4900) + use_metrics_v3 = true; + else if (smu->smc_fw_version >= 0x3A4300) + use_metrics_v2 = true; + break; + case IP_VERSION(11, 0, 11): + if (smu->smc_fw_version >= 0x412D00) + use_metrics_v2 = true; + break; + case IP_VERSION(11, 0, 12): + if (smu->smc_fw_version >= 0x3B2300) + use_metrics_v2 = true; + break; + case IP_VERSION(11, 0, 13): + if (smu->smc_fw_version >= 0x491100) + use_metrics_v2 = true; + break; + default: + break; + } ret = smu_cmn_get_metrics_table(smu, &metrics_external, -- cgit v1.2.3 From caa5eadc140ca3748b2ae187da36383edc779300 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Thu, 19 May 2022 17:28:12 +0800 Subject: drm/amdgpu: suppress some compile warnings Suppress two compile warnings about "no previous prototype". Reported-by: kernel test robot Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 2 +- drivers/gpu/drm/amd/amdgpu/soc21.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 881570dced41..16cdfb30b013 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -1361,7 +1361,7 @@ union mall_info { struct mall_info_v1_0 v1; }; -int amdgpu_discovery_get_mall_info(struct amdgpu_device *adev) +static int amdgpu_discovery_get_mall_info(struct amdgpu_device *adev) { struct binary_header *bhdr; union mall_info *mall_info; diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index c6a8520053bb..9e18a2b22607 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -42,6 +42,7 @@ #include "soc15.h" #include "soc15_common.h" +#include "soc21.h" static const struct amd_ip_funcs soc21_common_ip_funcs; -- cgit v1.2.3 From a35faec3db0e13aac8ea720bc1a3503081dd5a3d Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 16 May 2022 10:05:48 +0300 Subject: drm/amdgpu: Off by one in dm_dmub_outbox1_low_irq() The > ARRAY_SIZE() should be >= ARRAY_SIZE() to prevent an out of bounds access. Fixes: e27c41d5b068 ("drm/amd/display: Support for DMUB HPD interrupt handling") Reviewed-by: Harry Wentland Signed-off-by: Dan Carpenter Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index a92cfb055c15..5ea5e14b83c8 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -769,7 +769,7 @@ static void dm_dmub_outbox1_low_irq(void *interrupt_params) do { dc_stat_get_dmub_notification(adev->dm.dc, ¬ify); - if (notify.type > ARRAY_SIZE(dm->dmub_thread_offload)) { + if (notify.type >= ARRAY_SIZE(dm->dmub_thread_offload)) { DRM_ERROR("DM: notify type %d invalid!", notify.type); continue; } -- cgit v1.2.3 From 76818cdd11a25ac6cb1d98875719935d8d0e2e51 Mon Sep 17 00:00:00 2001 From: Sung Joon Kim Date: Thu, 19 May 2022 17:46:36 -0400 Subject: drm/amd/display: add Coverage blend mode for overlay plane According to the KMS man page, there is a "Coverage" alpha blend mode that assumes the pixel color values have NOT been pre-multiplied and will be done when the actual blending to the background color values happens. Previously, this mode hasn't been enabled in our driver and it was assumed that all normal overlay planes are pre-multiplied by default. When a 3rd party app is used to input a image in a specific format, e.g. PNG, as a source of a overlay plane to blend with the background primary plane, the pixel color values are not pre-multiplied. So by adding "Coverage" blend mode, our driver will support those cases. Issue fixed: Overlay plane alpha channel blending is incorrect Issue tracker: https://gitlab.freedesktop.org/drm/amd/-/issues/1769 Reference: https://dri.freedesktop.org/docs/drm/gpu/drm-kms.html#plane-composition-properties Adding Coverage support also enables IGT kms_plane_alpha_blend Coverage subtests: 1. coverage-7efc 2. coverage-vs-premult-vs-constant Changes 1. Add DRM_MODE_BLEND_COVERAGE blend mode capability 2. Add "pre_multiplied_alpha" flag for Coverage case 3. Read the correct flag and set the DCN MPCC pre_multiplied register bit (only on overlay plane) Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1769 Signed-off-by: Sung Joon Kim Reviewed-by: Melissa Wen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 17 ++++++++++---- drivers/gpu/drm/amd/display/dc/core/dc_surface.c | 2 ++ drivers/gpu/drm/amd/display/dc/dc.h | 2 ++ .../drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c | 27 +++++++++++----------- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c | 16 ++++++++----- 5 files changed, 40 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 5ea5e14b83c8..70be67a56673 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -5381,17 +5381,19 @@ fill_plane_buffer_attributes(struct amdgpu_device *adev, static void fill_blending_from_plane_state(const struct drm_plane_state *plane_state, - bool *per_pixel_alpha, bool *global_alpha, - int *global_alpha_value) + bool *per_pixel_alpha, bool *pre_multiplied_alpha, + bool *global_alpha, int *global_alpha_value) { *per_pixel_alpha = false; + *pre_multiplied_alpha = true; *global_alpha = false; *global_alpha_value = 0xff; if (plane_state->plane->type != DRM_PLANE_TYPE_OVERLAY) return; - if (plane_state->pixel_blend_mode == DRM_MODE_BLEND_PREMULTI) { + if (plane_state->pixel_blend_mode == DRM_MODE_BLEND_PREMULTI || + plane_state->pixel_blend_mode == DRM_MODE_BLEND_COVERAGE) { static const uint32_t alpha_formats[] = { DRM_FORMAT_ARGB8888, DRM_FORMAT_RGBA8888, @@ -5406,6 +5408,9 @@ fill_blending_from_plane_state(const struct drm_plane_state *plane_state, break; } } + + if (per_pixel_alpha && plane_state->pixel_blend_mode == DRM_MODE_BLEND_COVERAGE) + *pre_multiplied_alpha = false; } if (plane_state->alpha < 0xffff) { @@ -5568,7 +5573,7 @@ fill_dc_plane_info_and_addr(struct amdgpu_device *adev, return ret; fill_blending_from_plane_state( - plane_state, &plane_info->per_pixel_alpha, + plane_state, &plane_info->per_pixel_alpha, &plane_info->pre_multiplied_alpha, &plane_info->global_alpha, &plane_info->global_alpha_value); return 0; @@ -5615,6 +5620,7 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev, dc_plane_state->tiling_info = plane_info.tiling_info; dc_plane_state->visible = plane_info.visible; dc_plane_state->per_pixel_alpha = plane_info.per_pixel_alpha; + dc_plane_state->pre_multiplied_alpha = plane_info.pre_multiplied_alpha; dc_plane_state->global_alpha = plane_info.global_alpha; dc_plane_state->global_alpha_value = plane_info.global_alpha_value; dc_plane_state->dcc = plane_info.dcc; @@ -7911,7 +7917,8 @@ static int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, if (plane->type == DRM_PLANE_TYPE_OVERLAY && plane_cap && plane_cap->per_pixel_alpha) { unsigned int blend_caps = BIT(DRM_MODE_BLEND_PIXEL_NONE) | - BIT(DRM_MODE_BLEND_PREMULTI); + BIT(DRM_MODE_BLEND_PREMULTI) | + BIT(DRM_MODE_BLEND_COVERAGE); drm_plane_create_alpha_property(plane); drm_plane_create_blend_mode_property(plane, blend_caps); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c index e6b9c6a71841..5bc6ff2fa73e 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c @@ -61,6 +61,8 @@ static void dc_plane_construct(struct dc_context *ctx, struct dc_plane_state *pl plane_state->blend_tf->type = TF_TYPE_BYPASS; } + plane_state->pre_multiplied_alpha = true; + } static void dc_plane_destruct(struct dc_plane_state *plane_state) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index a31ea3644ec2..3960c74482be 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -1011,6 +1011,7 @@ struct dc_plane_state { bool is_tiling_rotated; bool per_pixel_alpha; + bool pre_multiplied_alpha; bool global_alpha; int global_alpha_value; bool visible; @@ -1045,6 +1046,7 @@ struct dc_plane_info { bool horizontal_mirror; bool visible; bool per_pixel_alpha; + bool pre_multiplied_alpha; bool global_alpha; int global_alpha_value; bool input_csc_enabled; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index e02ac75afbf7..e3a62873c0e7 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -2550,12 +2550,21 @@ void dcn10_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx) blnd_cfg.overlap_only = false; blnd_cfg.global_gain = 0xff; - if (per_pixel_alpha && pipe_ctx->plane_state->global_alpha) { - blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN; - blnd_cfg.global_gain = pipe_ctx->plane_state->global_alpha_value; - } else if (per_pixel_alpha) { - blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA; + if (per_pixel_alpha) { + /* DCN1.0 has output CM before MPC which seems to screw with + * pre-multiplied alpha. + */ + blnd_cfg.pre_multiplied_alpha = (is_rgb_cspace( + pipe_ctx->stream->output_color_space) + && pipe_ctx->plane_state->pre_multiplied_alpha); + if (pipe_ctx->plane_state->global_alpha) { + blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN; + blnd_cfg.global_gain = pipe_ctx->plane_state->global_alpha_value; + } else { + blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA; + } } else { + blnd_cfg.pre_multiplied_alpha = false; blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA; } @@ -2564,14 +2573,6 @@ void dcn10_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx) else blnd_cfg.global_alpha = 0xff; - /* DCN1.0 has output CM before MPC which seems to screw with - * pre-multiplied alpha. - */ - blnd_cfg.pre_multiplied_alpha = is_rgb_cspace( - pipe_ctx->stream->output_color_space) - && per_pixel_alpha; - - /* * TODO: remove hack * Note: currently there is a bug in init_hw such that diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index 0da024912dbe..ec6aa8d8b251 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -2345,12 +2345,16 @@ void dcn20_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx) blnd_cfg.overlap_only = false; blnd_cfg.global_gain = 0xff; - if (per_pixel_alpha && pipe_ctx->plane_state->global_alpha) { - blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN; - blnd_cfg.global_gain = pipe_ctx->plane_state->global_alpha_value; - } else if (per_pixel_alpha) { - blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA; + if (per_pixel_alpha) { + blnd_cfg.pre_multiplied_alpha = pipe_ctx->plane_state->pre_multiplied_alpha; + if (pipe_ctx->plane_state->global_alpha) { + blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN; + blnd_cfg.global_gain = pipe_ctx->plane_state->global_alpha_value; + } else { + blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA; + } } else { + blnd_cfg.pre_multiplied_alpha = false; blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA; } @@ -2364,7 +2368,7 @@ void dcn20_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx) blnd_cfg.top_gain = 0x1f000; blnd_cfg.bottom_inside_gain = 0x1f000; blnd_cfg.bottom_outside_gain = 0x1f000; - blnd_cfg.pre_multiplied_alpha = per_pixel_alpha; + if (pipe_ctx->plane_state->format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA) blnd_cfg.pre_multiplied_alpha = false; -- cgit v1.2.3 From a0ccc717c4ab3ef572f023fdceffb4b6df496a0d Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 16 May 2022 14:12:33 -0400 Subject: drm/amdgpu/discovery: validate VCN and SDMA instances MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Validate the VCN and SDMA instances against the driver structure sizes to make sure we don't get into a situation where the firmware reports more instances than the driver supports. Reviewed-by: Guchun Chen Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 16cdfb30b013..47f0344205ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -1130,13 +1130,24 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) adev->vcn.vcn_config[adev->vcn.num_vcn_inst] = ip->revision & 0xc0; ip->revision &= ~0xc0; - adev->vcn.num_vcn_inst++; + if (adev->vcn.num_vcn_inst < AMDGPU_MAX_VCN_INSTANCES) + adev->vcn.num_vcn_inst++; + else + dev_err(adev->dev, "Too many VCN instances: %d vs %d\n", + adev->vcn.num_vcn_inst + 1, + AMDGPU_MAX_VCN_INSTANCES); } if (le16_to_cpu(ip->hw_id) == SDMA0_HWID || le16_to_cpu(ip->hw_id) == SDMA1_HWID || le16_to_cpu(ip->hw_id) == SDMA2_HWID || - le16_to_cpu(ip->hw_id) == SDMA3_HWID) - adev->sdma.num_instances++; + le16_to_cpu(ip->hw_id) == SDMA3_HWID) { + if (adev->sdma.num_instances < AMDGPU_MAX_SDMA_INSTANCES) + adev->sdma.num_instances++; + else + dev_err(adev->dev, "Too many SDMA instances: %d vs %d\n", + adev->sdma.num_instances + 1, + AMDGPU_MAX_SDMA_INSTANCES); + } if (le16_to_cpu(ip->hw_id) == UMC_HWID) adev->gmc.num_umc++; -- cgit v1.2.3 From 1c755241463bab5d90404a782abf3baf7b7a3217 Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Fri, 20 May 2022 11:04:35 +0800 Subject: drm/amdgpu: clean up asd on the ta_firmware_header_v2_0 On the psp13 series use ta_firmware_header_v2_0 and the asd firmware was buildin ta, so needn't request asd firmware separately. Signed-off-by: Prike Liang Reviewed-by: Yifan Zhang Acked-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/psp_v13_0.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index 18014ed0e853..9e1ef81933ff 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -32,13 +32,10 @@ MODULE_FIRMWARE("amdgpu/aldebaran_sos.bin"); MODULE_FIRMWARE("amdgpu/aldebaran_ta.bin"); MODULE_FIRMWARE("amdgpu/aldebaran_cap.bin"); -MODULE_FIRMWARE("amdgpu/yellow_carp_asd.bin"); MODULE_FIRMWARE("amdgpu/yellow_carp_toc.bin"); MODULE_FIRMWARE("amdgpu/yellow_carp_ta.bin"); -MODULE_FIRMWARE("amdgpu/psp_13_0_5_asd.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_5_toc.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_5_ta.bin"); -MODULE_FIRMWARE("amdgpu/psp_13_0_8_asd.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_8_toc.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_8_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_0_sos.bin"); @@ -96,9 +93,6 @@ static int psp_v13_0_init_microcode(struct psp_context *psp) case IP_VERSION(13, 0, 3): case IP_VERSION(13, 0, 5): case IP_VERSION(13, 0, 8): - err = psp_init_asd_microcode(psp, chip_name); - if (err) - return err; err = psp_init_toc_microcode(psp, chip_name); if (err) return err; -- cgit v1.2.3 From a5457087eb10322864dedb7768b7a95332393efe Mon Sep 17 00:00:00 2001 From: Candice Li Date: Fri, 20 May 2022 20:51:53 +0800 Subject: drm/amdgpu: Resolve pcie_bif RAS recovery bug Check shared buf instead of init flag for xgmi ta shared buf init during xgmi ta initialization. Signed-off-by: Candice Li Reviewed-by: John Clements Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 214e4e89a028..e9411c28d88b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -1177,7 +1177,7 @@ int psp_xgmi_initialize(struct psp_context *psp, bool set_extended_data, bool lo psp->xgmi_context.context.mem_context.shared_mem_size = PSP_XGMI_SHARED_MEM_SIZE; psp->xgmi_context.context.ta_load_type = GFX_CMD_ID_LOAD_TA; - if (!psp->xgmi_context.context.initialized) { + if (!psp->xgmi_context.context.mem_context.shared_buf) { ret = psp_ta_init_shared_buf(psp, &psp->xgmi_context.context.mem_context); if (ret) return ret; -- cgit v1.2.3 From d534ca7128d7bf681ed6d462c09b9d6ffb3bed91 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 23 May 2022 11:24:31 -0400 Subject: drm/amdgpu: differentiate between LP and non-LP DDR memory Some applications want to know whether the memory is LP or not. Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c | 8 ++++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 4 +++- include/uapi/drm/amdgpu_drm.h | 2 ++ 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index 63e0293edc5f..fd8f3731758e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -188,13 +188,17 @@ static int convert_atom_mem_type_to_vram_type(struct amdgpu_device *adev, vram_type = AMDGPU_VRAM_TYPE_DDR3; break; case Ddr4MemType: - case LpDdr4MemType: vram_type = AMDGPU_VRAM_TYPE_DDR4; break; + case LpDdr4MemType: + vram_type = AMDGPU_VRAM_TYPE_LPDDR4; + break; case Ddr5MemType: - case LpDdr5MemType: vram_type = AMDGPU_VRAM_TYPE_DDR5; break; + case LpDdr5MemType: + vram_type = AMDGPU_VRAM_TYPE_LPDDR5; + break; default: vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 5444515c1476..e1e6441c475f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -1018,7 +1018,9 @@ static const char *amdgpu_vram_names[] = { "DDR3", "DDR4", "GDDR6", - "DDR5" + "DDR5", + "LPDDR4", + "LPDDR5" }; /** diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 9a1d210d135d..d9d475d65c76 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -988,6 +988,8 @@ struct drm_amdgpu_info_vbios { #define AMDGPU_VRAM_TYPE_DDR4 8 #define AMDGPU_VRAM_TYPE_GDDR6 9 #define AMDGPU_VRAM_TYPE_DDR5 10 +#define AMDGPU_VRAM_TYPE_LPDDR4 11 +#define AMDGPU_VRAM_TYPE_LPDDR5 12 struct drm_amdgpu_info_device { /** PCI Device ID */ -- cgit v1.2.3 From 31ab27b14daaa75541a415c6794d6f3567fea44a Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 23 May 2022 10:24:18 +1000 Subject: drm/amdgpu/cs: make commands with 0 chunks illegal behaviour. Submitting a cs with 0 chunks, causes an oops later, found trying to execute the wrong userspace driver. MESA_LOADER_DRIVER_OVERRIDE=v3d glxinfo [172536.665184] BUG: kernel NULL pointer dereference, address: 00000000000001d8 [172536.665188] #PF: supervisor read access in kernel mode [172536.665189] #PF: error_code(0x0000) - not-present page [172536.665191] PGD 6712a0067 P4D 6712a0067 PUD 5af9ff067 PMD 0 [172536.665195] Oops: 0000 [#1] SMP NOPTI [172536.665197] CPU: 7 PID: 2769838 Comm: glxinfo Tainted: P O 5.10.81 #1-NixOS [172536.665199] Hardware name: To be filled by O.E.M. To be filled by O.E.M./CROSSHAIR V FORMULA-Z, BIOS 2201 03/23/2015 [172536.665272] RIP: 0010:amdgpu_cs_ioctl+0x96/0x1ce0 [amdgpu] [172536.665274] Code: 75 18 00 00 4c 8b b2 88 00 00 00 8b 46 08 48 89 54 24 68 49 89 f7 4c 89 5c 24 60 31 d2 4c 89 74 24 30 85 c0 0f 85 c0 01 00 00 <48> 83 ba d8 01 00 00 00 48 8b b4 24 90 00 00 00 74 16 48 8b 46 10 [172536.665276] RSP: 0018:ffffb47c0e81bbe0 EFLAGS: 00010246 [172536.665277] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 [172536.665278] RDX: 0000000000000000 RSI: ffffb47c0e81be28 RDI: ffffb47c0e81bd68 [172536.665279] RBP: ffff936524080010 R08: 0000000000000000 R09: ffffb47c0e81be38 [172536.665281] R10: ffff936524080010 R11: ffff936524080000 R12: ffffb47c0e81bc40 [172536.665282] R13: ffffb47c0e81be28 R14: ffff9367bc410000 R15: ffffb47c0e81be28 [172536.665283] FS: 00007fe35e05d740(0000) GS:ffff936c1edc0000(0000) knlGS:0000000000000000 [172536.665284] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [172536.665286] CR2: 00000000000001d8 CR3: 0000000532e46000 CR4: 00000000000406e0 [172536.665287] Call Trace: [172536.665322] ? amdgpu_cs_find_mapping+0x110/0x110 [amdgpu] [172536.665332] drm_ioctl_kernel+0xaa/0xf0 [drm] [172536.665338] drm_ioctl+0x201/0x3b0 [drm] [172536.665369] ? amdgpu_cs_find_mapping+0x110/0x110 [amdgpu] [172536.665372] ? selinux_file_ioctl+0x135/0x230 [172536.665399] amdgpu_drm_ioctl+0x49/0x80 [amdgpu] [172536.665403] __x64_sys_ioctl+0x83/0xb0 [172536.665406] do_syscall_64+0x33/0x40 [172536.665409] entry_SYSCALL_64_after_hwframe+0x44/0xa9 Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/2018 Signed-off-by: Dave Airlie Cc: stable@vger.kernel.org Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 84caab5e4d22..b28af04b0c3e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -116,7 +116,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs int ret; if (cs->in.num_chunks == 0) - return 0; + return -EINVAL; chunk_array = kvmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL); if (!chunk_array) -- cgit v1.2.3 From ab5a7fb6d2296b9486d17d1e24f4bde90822e644 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 21 May 2022 13:11:14 +0200 Subject: drm/amdgpu/gfx: fix typos in comments Spelling mistakes (triple letters) in comments. Detected with the help of Coccinelle. Signed-off-by: Julia Lawall Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 02754ee86c81..c5f46d264b23 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -5111,7 +5111,7 @@ static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev) mutex_unlock(&adev->srbm_mutex); /* Initialize all compute VMIDs to have no GDS, GWS, or OA - acccess. These should be enabled by FW for target VMIDs. */ + access. These should be enabled by FW for target VMIDs. */ for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0); WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index fb9302910742..7f0b18b0d4c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -3714,7 +3714,7 @@ static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev) mutex_unlock(&adev->srbm_mutex); /* Initialize all compute VMIDs to have no GDS, GWS, or OA - acccess. These should be enabled by FW for target VMIDs. */ + access. These should be enabled by FW for target VMIDs. */ for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { WREG32(amdgpu_gds_reg_offset[i].mem_base, 0); WREG32(amdgpu_gds_reg_offset[i].mem_size, 0); @@ -5815,7 +5815,7 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ gfx_v8_0_wait_for_rlc_serdes(adev); - /* write cmd to Set CGCG Overrride */ + /* write cmd to Set CGCG Override */ gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD); /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index f12ae6e2359a..5349ca4d19e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -2535,7 +2535,7 @@ static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) mutex_unlock(&adev->srbm_mutex); /* Initialize all compute VMIDs to have no GDS, GWS, or OA - acccess. These should be enabled by FW for target VMIDs. */ + access. These should be enabled by FW for target VMIDs. */ for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0); WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0); -- cgit v1.2.3 From 6bd8d4b7d511f00a9e02f89b250fba3013200843 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 21 May 2022 13:11:24 +0200 Subject: drm/amdkfd: fix typo in comment Spelling mistake (triple letters) in comment. Detected with the help of Coccinelle. Signed-off-by: Julia Lawall Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 8b5452a8d330..67abf8dcd30a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1621,7 +1621,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( mutex_lock(&mem->lock); - /* Unpin MMIO/DOORBELL BO's that were pinnned during allocation */ + /* Unpin MMIO/DOORBELL BO's that were pinned during allocation */ if (mem->alloc_flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) { -- cgit v1.2.3 From 39dbde650f9377f97ad985bfa16af93381766232 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Tue, 24 May 2022 17:35:55 +0530 Subject: drm/amd/pm: Return auto perf level, if unsupported When powerplay is not enabled, return AUTO as default level. Signed-off-by: Lijo Lazar Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index 5472f9936feb..d1bf073adf54 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -770,6 +770,9 @@ enum amd_dpm_forced_level amdgpu_dpm_get_performance_level(struct amdgpu_device const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; enum amd_dpm_forced_level level; + if (!pp_funcs) + return AMD_DPM_FORCED_LEVEL_AUTO; + mutex_lock(&adev->pm.mutex); if (pp_funcs->get_performance_level) level = pp_funcs->get_performance_level(adev->powerplay.pp_handle); -- cgit v1.2.3 From 62e9bd20035b53ff6c679499c08546d96c6c60a7 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 24 May 2022 23:23:59 -0400 Subject: drm/amdgpu: add beige goby PCI ID Add a beige goby PCI ID. Reviewed-by: Guchun Chen Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 8592d43a79b0..afabdbbb22c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1940,6 +1940,7 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x7421, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY}, {0x1002, 0x7422, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY}, {0x1002, 0x7423, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY}, + {0x1002, 0x7424, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY}, {0x1002, 0x743F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY}, { PCI_DEVICE(0x1002, PCI_ANY_ID), -- cgit v1.2.3 From fab2cc8335839867a3db38f195441b9c7c6460f6 Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 6 May 2022 13:08:34 +0200 Subject: drm/amdgpu: add AMDGPU_GEM_CREATE_DISCARDABLE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a AMDGPU_GEM_CREATE_DISCARDABLE flag to note that the content of a BO doesn't needs to be preserved during eviction. KFD was already using a similar functionality for SVM BOs so replace the internal flag with the new UAPI. Signed-off-by: Christian König Reviewed-by: Alex Deucher Reviewed-by: Marek Olšák Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 2 +- include/uapi/drm/amdgpu_drm.h | 4 ++++ 6 files changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 652571267077..56f4c9aa87c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -296,8 +296,8 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, AMDGPU_GEM_CREATE_VRAM_CLEARED | AMDGPU_GEM_CREATE_VM_ALWAYS_VALID | AMDGPU_GEM_CREATE_EXPLICIT_SYNC | - AMDGPU_GEM_CREATE_ENCRYPTED)) - + AMDGPU_GEM_CREATE_ENCRYPTED | + AMDGPU_GEM_CREATE_DISCARDABLE)) return -EINVAL; /* reject invalid gem domains */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index e1e6441c475f..2c82b1d5a0d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -567,6 +567,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev, bp->domain; bo->allowed_domains = bo->preferred_domains; if (bp->type != ttm_bo_type_kernel && + !(bp->flags & AMDGPU_GEM_CREATE_DISCARDABLE) && bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM) bo->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 4c9cbdc66995..147b79c10cbb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -41,7 +41,6 @@ /* BO flag to indicate a KFD userptr BO */ #define AMDGPU_AMDKFD_CREATE_USERPTR_BO (1ULL << 63) -#define AMDGPU_AMDKFD_CREATE_SVM_BO (1ULL << 62) #define to_amdgpu_bo_user(abo) container_of((abo), struct amdgpu_bo_user, bo) #define to_amdgpu_bo_vm(abo) container_of((abo), struct amdgpu_bo_vm, bo) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index ec26edd4f4d8..be6f76a30ac6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -117,7 +117,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, } abo = ttm_to_amdgpu_bo(bo); - if (abo->flags & AMDGPU_AMDKFD_CREATE_SVM_BO) { + if (abo->flags & AMDGPU_GEM_CREATE_DISCARDABLE) { placement->num_placement = 0; placement->num_busy_placement = 0; return; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 29e9ebf6d8d5..2ebf0132c25b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -531,7 +531,7 @@ svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange, bp.domain = AMDGPU_GEM_DOMAIN_VRAM; bp.flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS; bp.flags |= clear ? AMDGPU_GEM_CREATE_VRAM_CLEARED : 0; - bp.flags |= AMDGPU_AMDKFD_CREATE_SVM_BO; + bp.flags |= AMDGPU_GEM_CREATE_DISCARDABLE; bp.type = ttm_bo_type_device; bp.resv = NULL; diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index d9d475d65c76..a81bef5cfeaa 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -140,6 +140,10 @@ extern "C" { * not require GTT memory accounting */ #define AMDGPU_GEM_CREATE_PREEMPTIBLE (1 << 11) +/* Flag that BO can be discarded under memory pressure without keeping the + * content. + */ +#define AMDGPU_GEM_CREATE_DISCARDABLE (1 << 12) struct drm_amdgpu_gem_create_in { /** the requested memory size */ -- cgit v1.2.3 From b6c65a2c92aa880e8050a91ca83288b85fc32575 Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 6 May 2022 13:11:41 +0200 Subject: drm/amdgpu: add AMDGPU_VM_NOALLOC v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the AMDGPU_VM_NOALLOC flag to let userspace control MALL allocation. v2: also add the flag to the allowed flags. Signed-off-by: Christian König Reviewed-by: Alex Deucher (v1) Reviewed-by: Marek Olšák Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 5 ++++- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 3 +++ drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 3 +++ include/uapi/drm/amdgpu_drm.h | 2 ++ 4 files changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 56f4c9aa87c6..8ef31d687ef3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -645,6 +645,8 @@ uint64_t amdgpu_gem_va_map_flags(struct amdgpu_device *adev, uint32_t flags) pte_flag |= AMDGPU_PTE_WRITEABLE; if (flags & AMDGPU_VM_PAGE_PRT) pte_flag |= AMDGPU_PTE_PRT; + if (flags & AMDGPU_VM_PAGE_NOALLOC) + pte_flag |= AMDGPU_PTE_NOALLOC; if (adev->gmc.gmc_funcs->map_mtype) pte_flag |= amdgpu_gmc_map_mtype(adev, @@ -658,7 +660,8 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, { const uint32_t valid_flags = AMDGPU_VM_DELAY_UPDATE | AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE | - AMDGPU_VM_PAGE_EXECUTABLE | AMDGPU_VM_MTYPE_MASK; + AMDGPU_VM_PAGE_EXECUTABLE | AMDGPU_VM_MTYPE_MASK | + AMDGPU_VM_PAGE_NOALLOC; const uint32_t prt_flags = AMDGPU_VM_DELAY_UPDATE | AMDGPU_VM_PAGE_PRT; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index b8c79789e1e4..9077dfccaf3c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -613,6 +613,9 @@ static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev, *flags &= ~AMDGPU_PTE_MTYPE_NV10_MASK; *flags |= (mapping->flags & AMDGPU_PTE_MTYPE_NV10_MASK); + *flags &= ~AMDGPU_PTE_NOALLOC; + *flags |= (mapping->flags & AMDGPU_PTE_NOALLOC); + if (mapping->flags & AMDGPU_PTE_PRT) { *flags |= AMDGPU_PTE_PRT; *flags |= AMDGPU_PTE_SNOOPED; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index 477f67d9b07c..a0c0b7d9f444 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -500,6 +500,9 @@ static void gmc_v11_0_get_vm_pte(struct amdgpu_device *adev, *flags &= ~AMDGPU_PTE_MTYPE_NV10_MASK; *flags |= (mapping->flags & AMDGPU_PTE_MTYPE_NV10_MASK); + *flags &= ~AMDGPU_PTE_NOALLOC; + *flags |= (mapping->flags & AMDGPU_PTE_NOALLOC); + if (mapping->flags & AMDGPU_PTE_PRT) { *flags |= AMDGPU_PTE_PRT; *flags |= AMDGPU_PTE_SNOOPED; diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index a81bef5cfeaa..18d3246d636e 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -533,6 +533,8 @@ struct drm_amdgpu_gem_op { #define AMDGPU_VM_MTYPE_UC (4 << 5) /* Use Read Write MTYPE instead of default MTYPE */ #define AMDGPU_VM_MTYPE_RW (5 << 5) +/* don't allocate MALL */ +#define AMDGPU_VM_PAGE_NOALLOC (1 << 9) struct drm_amdgpu_gem_va { /** GEM object handle */ -- cgit v1.2.3 From 08cffb3eb731fefd0dea12424cedbfa63c356ee0 Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 6 May 2022 13:21:28 +0200 Subject: drm/amdgpu: bump minor version number MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Increase the minor version number to indicate that the new flags are available. Signed-off-by: Christian König Reviewed-by: Alex Deucher Reviewed-by: Marek Olšák Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index afabdbbb22c6..8890300766a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -99,10 +99,11 @@ * - 3.43.0 - Add device hot plug/unplug support * - 3.44.0 - DCN3 supports DCC independent block settings: !64B && 128B, 64B && 128B * - 3.45.0 - Add context ioctl stable pstate interface - * * 3.46.0 - To enable hot plug amdgpu tests in libdrm + * - 3.46.0 - To enable hot plug amdgpu tests in libdrm + * * 3.47.0 - Add AMDGPU_GEM_CREATE_DISCARDABLE and AMDGPU_VM_NOALLOC flags */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 46 +#define KMS_DRIVER_MINOR 47 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit; -- cgit v1.2.3 From af0b541670090e87996e0894bd0e457edf617541 Mon Sep 17 00:00:00 2001 From: Christian König Date: Wed, 11 May 2022 11:06:26 +0200 Subject: drm/amdgpu: Convert to common fdinfo format v5 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert fdinfo format to one documented in drm-usage-stats.rst. It turned out that the existing implementation was actually completely nonsense. The calculated percentages indeed represented the usage of the engine, but with varying time slices. So 10% usage for application A could mean something completely different than 10% usage for application B. Completely nuke that and just use the now standardized nanosecond interface. v2: drop the documentation change for now, nuke percentage calculation v3: only account for each hw_ip, move the time_spend to the ctx mgr. v4: move general ctx changes into separate patch, rework the fdinfo to ctx_mgr interface so that all usages are calculated at once, drop some unecessary and dangerous refcount dance. v5: add one more comment how we calculate the time spend Signed-off-by: Tvrtko Ursulin Signed-off-by: Christian König Reviewed-by: Shashank Sharma Cc: Daniel Vetter Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 177 +++++++++++++++-------------- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h | 12 +- drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c | 57 ++++------ 3 files changed, 125 insertions(+), 121 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index a61e4c83a545..7dc92ef36b2b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -162,17 +162,50 @@ static unsigned int amdgpu_ctx_get_hw_prio(struct amdgpu_ctx *ctx, u32 hw_ip) return hw_prio; } +/* Calculate the time spend on the hw */ +static ktime_t amdgpu_ctx_fence_time(struct dma_fence *fence) +{ + struct drm_sched_fence *s_fence; + + if (!fence) + return ns_to_ktime(0); + + /* When the fence is not even scheduled it can't have spend time */ + s_fence = to_drm_sched_fence(fence); + if (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &s_fence->scheduled.flags)) + return ns_to_ktime(0); + + /* When it is still running account how much already spend */ + if (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &s_fence->finished.flags)) + return ktime_sub(ktime_get(), s_fence->scheduled.timestamp); + + return ktime_sub(s_fence->finished.timestamp, + s_fence->scheduled.timestamp); +} + +static ktime_t amdgpu_ctx_entity_time(struct amdgpu_ctx *ctx, + struct amdgpu_ctx_entity *centity) +{ + ktime_t res = ns_to_ktime(0); + uint32_t i; + + spin_lock(&ctx->ring_lock); + for (i = 0; i < amdgpu_sched_jobs; i++) { + res = ktime_add(res, amdgpu_ctx_fence_time(centity->fences[i])); + } + spin_unlock(&ctx->ring_lock); + return res; +} static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip, const u32 ring) { + struct drm_gpu_scheduler **scheds = NULL, *sched = NULL; struct amdgpu_device *adev = ctx->mgr->adev; struct amdgpu_ctx_entity *entity; - struct drm_gpu_scheduler **scheds = NULL, *sched = NULL; - unsigned num_scheds = 0; - int32_t ctx_prio; - unsigned int hw_prio; enum drm_sched_priority drm_prio; + unsigned int hw_prio, num_scheds; + int32_t ctx_prio; int r; entity = kzalloc(struct_size(entity, fences, amdgpu_sched_jobs), @@ -182,6 +215,7 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip, ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ? ctx->init_priority : ctx->override_priority; + entity->hw_ip = hw_ip; entity->sequence = 1; hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip); drm_prio = amdgpu_ctx_to_drm_sched_prio(ctx_prio); @@ -220,6 +254,23 @@ error_free_entity: return r; } +static ktime_t amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity) +{ + ktime_t res = ns_to_ktime(0); + int i; + + if (!entity) + return res; + + for (i = 0; i < amdgpu_sched_jobs; ++i) { + res = ktime_add(res, amdgpu_ctx_fence_time(entity->fences[i])); + dma_fence_put(entity->fences[i]); + } + + kfree(entity); + return res; +} + static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority, struct drm_file *filp, struct amdgpu_ctx *ctx) { @@ -246,20 +297,6 @@ static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority, return 0; } -static void amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity) -{ - - int i; - - if (!entity) - return; - - for (i = 0; i < amdgpu_sched_jobs; ++i) - dma_fence_put(entity->fences[i]); - - kfree(entity); -} - static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx, u32 *stable_pstate) { @@ -351,8 +388,10 @@ static void amdgpu_ctx_fini(struct kref *ref) for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) { - amdgpu_ctx_fini_entity(ctx->entities[i][j]); - ctx->entities[i][j] = NULL; + ktime_t spend; + + spend = amdgpu_ctx_fini_entity(ctx->entities[i][j]); + atomic64_add(ktime_to_ns(spend), &mgr->time_spend[i]); } } @@ -689,6 +728,9 @@ uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, centity->sequence++; spin_unlock(&ctx->ring_lock); + atomic64_add(ktime_to_ns(amdgpu_ctx_fence_time(other)), + &ctx->mgr->time_spend[centity->hw_ip]); + dma_fence_put(other); return seq; } @@ -795,9 +837,14 @@ int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr, struct amdgpu_device *adev) { + unsigned int i; + mgr->adev = adev; mutex_init(&mgr->lock); idr_init(&mgr->ctx_handles); + + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) + atomic64_set(&mgr->time_spend[i], 0); } long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout) @@ -873,80 +920,38 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr) mutex_destroy(&mgr->lock); } -static void amdgpu_ctx_fence_time(struct amdgpu_ctx *ctx, - struct amdgpu_ctx_entity *centity, ktime_t *total, ktime_t *max) -{ - ktime_t now, t1; - uint32_t i; - - *total = *max = 0; - - now = ktime_get(); - for (i = 0; i < amdgpu_sched_jobs; i++) { - struct dma_fence *fence; - struct drm_sched_fence *s_fence; - - spin_lock(&ctx->ring_lock); - fence = dma_fence_get(centity->fences[i]); - spin_unlock(&ctx->ring_lock); - if (!fence) - continue; - s_fence = to_drm_sched_fence(fence); - if (!dma_fence_is_signaled(&s_fence->scheduled)) { - dma_fence_put(fence); - continue; - } - t1 = s_fence->scheduled.timestamp; - if (!ktime_before(t1, now)) { - dma_fence_put(fence); - continue; - } - if (dma_fence_is_signaled(&s_fence->finished) && - s_fence->finished.timestamp < now) - *total += ktime_sub(s_fence->finished.timestamp, t1); - else - *total += ktime_sub(now, t1); - t1 = ktime_sub(now, t1); - dma_fence_put(fence); - *max = max(t1, *max); - } -} - -ktime_t amdgpu_ctx_mgr_fence_usage(struct amdgpu_ctx_mgr *mgr, uint32_t hwip, - uint32_t idx, uint64_t *elapsed) +void amdgpu_ctx_mgr_usage(struct amdgpu_ctx_mgr *mgr, + ktime_t usage[AMDGPU_HW_IP_NUM]) { - struct idr *idp; struct amdgpu_ctx *ctx; + unsigned int hw_ip, i; uint32_t id; - struct amdgpu_ctx_entity *centity; - ktime_t total = 0, max = 0; - if (idx >= AMDGPU_MAX_ENTITY_NUM) - return 0; - idp = &mgr->ctx_handles; + /* + * This is a little bit racy because it can be that a ctx or a fence are + * destroyed just in the moment we try to account them. But that is ok + * since exactly that case is explicitely allowed by the interface. + */ mutex_lock(&mgr->lock); - idr_for_each_entry(idp, ctx, id) { - ktime_t ttotal, tmax; + for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) { + uint64_t ns = atomic64_read(&mgr->time_spend[hw_ip]); - if (!ctx->entities[hwip][idx]) - continue; + usage[hw_ip] = ns_to_ktime(ns); + } - centity = ctx->entities[hwip][idx]; - amdgpu_ctx_fence_time(ctx, centity, &ttotal, &tmax); + idr_for_each_entry(&mgr->ctx_handles, ctx, id) { + for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) { + for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i) { + struct amdgpu_ctx_entity *centity; + ktime_t spend; - /* Harmonic mean approximation diverges for very small - * values. If ratio < 0.01% ignore - */ - if (AMDGPU_CTX_FENCE_USAGE_MIN_RATIO(tmax, ttotal)) - continue; - - total = ktime_add(total, ttotal); - max = ktime_after(tmax, max) ? tmax : max; + centity = ctx->entities[hw_ip][i]; + if (!centity) + continue; + spend = amdgpu_ctx_entity_time(ctx, centity); + usage[hw_ip] = ktime_add(usage[hw_ip], spend); + } + } } - mutex_unlock(&mgr->lock); - if (elapsed) - *elapsed = max; - - return total; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h index 681050bc828c..cc7c8afff414 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h @@ -23,16 +23,20 @@ #ifndef __AMDGPU_CTX_H__ #define __AMDGPU_CTX_H__ +#include +#include + #include "amdgpu_ring.h" struct drm_device; struct drm_file; struct amdgpu_fpriv; +struct amdgpu_ctx_mgr; #define AMDGPU_MAX_ENTITY_NUM 4 -#define AMDGPU_CTX_FENCE_USAGE_MIN_RATIO(max, total) ((max) > 16384ULL*(total)) struct amdgpu_ctx_entity { + uint32_t hw_ip; uint64_t sequence; struct drm_sched_entity entity; struct dma_fence *fences[]; @@ -61,6 +65,7 @@ struct amdgpu_ctx_mgr { struct mutex lock; /* protected by lock */ struct idr ctx_handles; + atomic64_t time_spend[AMDGPU_HW_IP_NUM]; }; extern const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM]; @@ -90,6 +95,7 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr, void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr); long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout); void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr); -ktime_t amdgpu_ctx_mgr_fence_usage(struct amdgpu_ctx_mgr *mgr, uint32_t hwip, - uint32_t idx, uint64_t *elapsed); +void amdgpu_ctx_mgr_usage(struct amdgpu_ctx_mgr *mgr, + ktime_t usage[AMDGPU_HW_IP_NUM]); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c index 5a6857c44bb6..4d453845235c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c @@ -32,6 +32,7 @@ #include #include +#include #include "amdgpu.h" #include "amdgpu_vm.h" @@ -54,26 +55,23 @@ static const char *amdgpu_ip_name[AMDGPU_HW_IP_NUM] = { void amdgpu_show_fdinfo(struct seq_file *m, struct file *f) { - struct amdgpu_fpriv *fpriv; - uint32_t bus, dev, fn, i, domain; - uint64_t vram_mem = 0, gtt_mem = 0, cpu_mem = 0; struct drm_file *file = f->private_data; struct amdgpu_device *adev = drm_to_adev(file->minor->dev); + struct amdgpu_fpriv *fpriv = file->driver_priv; + + uint64_t vram_mem = 0, gtt_mem = 0, cpu_mem = 0; + ktime_t usage[AMDGPU_HW_IP_NUM]; + uint32_t bus, dev, fn, domain; struct amdgpu_bo *root; + unsigned int hw_ip; int ret; - ret = amdgpu_file_to_fpriv(f, &fpriv); - if (ret) - return; bus = adev->pdev->bus->number; domain = pci_domain_nr(adev->pdev->bus); dev = PCI_SLOT(adev->pdev->devfn); fn = PCI_FUNC(adev->pdev->devfn); - root = amdgpu_bo_ref(fpriv->vm.root.bo); - if (!root) - return; - + root = fpriv->vm.root.bo; ret = amdgpu_bo_reserve(root, false); if (ret) { DRM_ERROR("Fail to reserve bo\n"); @@ -81,31 +79,26 @@ void amdgpu_show_fdinfo(struct seq_file *m, struct file *f) } amdgpu_vm_get_memory(&fpriv->vm, &vram_mem, >t_mem, &cpu_mem); amdgpu_bo_unreserve(root); - amdgpu_bo_unref(&root); - seq_printf(m, "pdev:\t%04x:%02x:%02x.%d\npasid:\t%u\n", domain, bus, - dev, fn, fpriv->vm.pasid); - seq_printf(m, "vram mem:\t%llu kB\n", vram_mem/1024UL); - seq_printf(m, "gtt mem:\t%llu kB\n", gtt_mem/1024UL); - seq_printf(m, "cpu mem:\t%llu kB\n", cpu_mem/1024UL); - for (i = 0; i < AMDGPU_HW_IP_NUM; i++) { - uint32_t count = amdgpu_ctx_num_entities[i]; - int idx = 0; - uint64_t total = 0, min = 0; - uint32_t perc, frac; + amdgpu_ctx_mgr_usage(&fpriv->ctx_mgr, usage); - for (idx = 0; idx < count; idx++) { - total = amdgpu_ctx_mgr_fence_usage(&fpriv->ctx_mgr, - i, idx, &min); - if ((total == 0) || (min == 0)) - continue; + /* + * ****************************************************************** + * For text output format description please see drm-usage-stats.rst! + * ****************************************************************** + */ - perc = div64_u64(10000 * total, min); - frac = perc % 100; + seq_printf(m, "pasid:\t%u\n", fpriv->vm.pasid); + seq_printf(m, "drm-driver:\t%s\n", file->minor->dev->driver->name); + seq_printf(m, "drm-pdev:\t%04x:%02x:%02x.%d\n", domain, bus, dev, fn); + seq_printf(m, "drm-memory-vram:\t%llu KiB\n", vram_mem/1024UL); + seq_printf(m, "drm-memory-gtt: \t%llu KiB\n", gtt_mem/1024UL); + seq_printf(m, "drm-memory-cpu: \t%llu KiB\n", cpu_mem/1024UL); + for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) { + if (!usage[hw_ip]) + continue; - seq_printf(m, "%s%d:\t%d.%d%%\n", - amdgpu_ip_name[i], - idx, perc/100, frac); - } + seq_printf(m, "drm-engine-%s:\t%Ld ns\n", amdgpu_ip_name[hw_ip], + ktime_to_ns(usage[hw_ip])); } } -- cgit v1.2.3 From 9bdc1992c925a35c6f7200e8abe54e3f00ce7719 Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 10 May 2022 18:29:35 +0200 Subject: drm/amdgpu: add drm-client-id to fdinfo v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is enough to get gputop working :) v2: rebase and some addition cleanup Signed-off-by: Christian König Reviewed-by: Shashank Sharma (v1) Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c index 4d453845235c..99a7855ab1bc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c @@ -58,11 +58,11 @@ void amdgpu_show_fdinfo(struct seq_file *m, struct file *f) struct drm_file *file = f->private_data; struct amdgpu_device *adev = drm_to_adev(file->minor->dev); struct amdgpu_fpriv *fpriv = file->driver_priv; + struct amdgpu_vm *vm = &fpriv->vm; uint64_t vram_mem = 0, gtt_mem = 0, cpu_mem = 0; ktime_t usage[AMDGPU_HW_IP_NUM]; uint32_t bus, dev, fn, domain; - struct amdgpu_bo *root; unsigned int hw_ip; int ret; @@ -71,14 +71,12 @@ void amdgpu_show_fdinfo(struct seq_file *m, struct file *f) dev = PCI_SLOT(adev->pdev->devfn); fn = PCI_FUNC(adev->pdev->devfn); - root = fpriv->vm.root.bo; - ret = amdgpu_bo_reserve(root, false); - if (ret) { - DRM_ERROR("Fail to reserve bo\n"); + ret = amdgpu_bo_reserve(vm->root.bo, false); + if (ret) return; - } - amdgpu_vm_get_memory(&fpriv->vm, &vram_mem, >t_mem, &cpu_mem); - amdgpu_bo_unreserve(root); + + amdgpu_vm_get_memory(vm, &vram_mem, >t_mem, &cpu_mem); + amdgpu_bo_unreserve(vm->root.bo); amdgpu_ctx_mgr_usage(&fpriv->ctx_mgr, usage); @@ -91,6 +89,7 @@ void amdgpu_show_fdinfo(struct seq_file *m, struct file *f) seq_printf(m, "pasid:\t%u\n", fpriv->vm.pasid); seq_printf(m, "drm-driver:\t%s\n", file->minor->dev->driver->name); seq_printf(m, "drm-pdev:\t%04x:%02x:%02x.%d\n", domain, bus, dev, fn); + seq_printf(m, "drm-client-id:\t%Lu\n", vm->immediate.fence_context); seq_printf(m, "drm-memory-vram:\t%llu KiB\n", vram_mem/1024UL); seq_printf(m, "drm-memory-gtt: \t%llu KiB\n", gtt_mem/1024UL); seq_printf(m, "drm-memory-cpu: \t%llu KiB\n", cpu_mem/1024UL); -- cgit v1.2.3 From 597b89d30b42dcc8e6b262e6876b42dde66f97f0 Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Mon, 16 May 2022 11:52:51 +0300 Subject: gpu: host1x: Add context bus The context bus is a "dummy" bus that contains struct devices that correspond to IOMMU contexts assigned through Host1x to processes. Even when host1x itself is built as a module, the bus is registered in built-in code so that the built-in ARM SMMU driver is able to reference it. Signed-off-by: Mikko Perttunen Signed-off-by: Thierry Reding --- drivers/gpu/Makefile | 3 +-- drivers/gpu/host1x/Kconfig | 5 +++++ drivers/gpu/host1x/Makefile | 1 + drivers/gpu/host1x/context_bus.c | 31 +++++++++++++++++++++++++++++++ include/linux/host1x_context_bus.h | 15 +++++++++++++++ 5 files changed, 53 insertions(+), 2 deletions(-) create mode 100644 drivers/gpu/host1x/context_bus.c create mode 100644 include/linux/host1x_context_bus.h diff --git a/drivers/gpu/Makefile b/drivers/gpu/Makefile index 835c88318cec..8997f0096545 100644 --- a/drivers/gpu/Makefile +++ b/drivers/gpu/Makefile @@ -2,7 +2,6 @@ # drm/tegra depends on host1x, so if both drivers are built-in care must be # taken to initialize them in the correct order. Link order is the only way # to ensure this currently. -obj-$(CONFIG_TEGRA_HOST1X) += host1x/ -obj-y += drm/ vga/ +obj-y += host1x/ drm/ vga/ obj-$(CONFIG_IMX_IPUV3_CORE) += ipu-v3/ obj-$(CONFIG_TRACE_GPU_MEM) += trace/ diff --git a/drivers/gpu/host1x/Kconfig b/drivers/gpu/host1x/Kconfig index 6815b4db17c1..1861a8180d3f 100644 --- a/drivers/gpu/host1x/Kconfig +++ b/drivers/gpu/host1x/Kconfig @@ -1,8 +1,13 @@ # SPDX-License-Identifier: GPL-2.0-only + +config TEGRA_HOST1X_CONTEXT_BUS + bool + config TEGRA_HOST1X tristate "NVIDIA Tegra host1x driver" depends on ARCH_TEGRA || (ARM && COMPILE_TEST) select DMA_SHARED_BUFFER + select TEGRA_HOST1X_CONTEXT_BUS select IOMMU_IOVA help Driver for the NVIDIA Tegra host1x hardware. diff --git a/drivers/gpu/host1x/Makefile b/drivers/gpu/host1x/Makefile index d2b6f7de0498..c891a3e33844 100644 --- a/drivers/gpu/host1x/Makefile +++ b/drivers/gpu/host1x/Makefile @@ -18,3 +18,4 @@ host1x-y = \ hw/host1x07.o obj-$(CONFIG_TEGRA_HOST1X) += host1x.o +obj-$(CONFIG_TEGRA_HOST1X_CONTEXT_BUS) += context_bus.o diff --git a/drivers/gpu/host1x/context_bus.c b/drivers/gpu/host1x/context_bus.c new file mode 100644 index 000000000000..b0d35b2bbe89 --- /dev/null +++ b/drivers/gpu/host1x/context_bus.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021, NVIDIA Corporation. + */ + +#include +#include + +struct bus_type host1x_context_device_bus_type = { + .name = "host1x-context", +}; +EXPORT_SYMBOL_GPL(host1x_context_device_bus_type); + +static int __init host1x_context_device_bus_init(void) +{ + int err; + + if (!of_machine_is_compatible("nvidia,tegra186") && + !of_machine_is_compatible("nvidia,tegra194") && + !of_machine_is_compatible("nvidia,tegra234")) + return 0; + + err = bus_register(&host1x_context_device_bus_type); + if (err < 0) { + pr_err("bus type registration failed: %d\n", err); + return err; + } + + return 0; +} +postcore_initcall(host1x_context_device_bus_init); diff --git a/include/linux/host1x_context_bus.h b/include/linux/host1x_context_bus.h new file mode 100644 index 000000000000..72462737a6db --- /dev/null +++ b/include/linux/host1x_context_bus.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2021, NVIDIA Corporation. All rights reserved. + */ + +#ifndef __LINUX_HOST1X_CONTEXT_BUS_H +#define __LINUX_HOST1X_CONTEXT_BUS_H + +#include + +#ifdef CONFIG_TEGRA_HOST1X_CONTEXT_BUS +extern struct bus_type host1x_context_device_bus_type; +#endif + +#endif -- cgit v1.2.3 From 993a2adc6e2e94a0a7b5bfc054eda90ac95f62c3 Mon Sep 17 00:00:00 2001 From: Kuogee Hsieh Date: Tue, 17 May 2022 09:21:34 -0700 Subject: drm/msm/dp: Always clear mask bits to disable interrupts at dp_ctrl_reset_irq_ctrl() dp_catalog_ctrl_reset() will software reset DP controller. But it will not reset programmable registers to default value. DP driver still have to clear mask bits to interrupt status registers to disable interrupts after software reset of controller. At current implementation, dp_ctrl_reset_irq_ctrl() will software reset dp controller but did not call dp_catalog_ctrl_enable_irq(false) to clear hpd related interrupt mask bits to disable hpd related interrupts due to it mistakenly think hpd related interrupt mask bits will be cleared by software reset of dp controller automatically. This mistake may cause system to crash during suspending procedure due to unexpected irq fired and trigger event thread to access dp controller registers with controller clocks are disabled. This patch fixes system crash during suspending problem by removing "enable" flag condition checking at dp_ctrl_reset_irq_ctrl() so that hpd related interrupt mask bits are cleared to prevent unexpected from happening. Changes in v2: -- add more details commit text Changes in v3: -- add synchrons_irq() -- add atomic_t suspended Changes in v4: -- correct Fixes's commit ID -- remove synchrons_irq() Changes in v5: -- revise commit text Changes in v6: -- add event_lock to protect "suspended" Changes in v7: -- delete "suspended" flag Fixes: 989ebe7bc446 ("drm/msm/dp: do not initialize phy until plugin interrupt received") Signed-off-by: Kuogee Hsieh Reviewed-by: Stephen Boyd Patchwork: https://patchwork.freedesktop.org/patch/486591/ Link: https://lore.kernel.org/r/1652804494-19650-1-git-send-email-quic_khsieh@quicinc.com Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/dp/dp_ctrl.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c b/drivers/gpu/drm/msm/dp/dp_ctrl.c index af7a80c63610..f3e333eff32d 100644 --- a/drivers/gpu/drm/msm/dp/dp_ctrl.c +++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c @@ -1389,8 +1389,13 @@ void dp_ctrl_reset_irq_ctrl(struct dp_ctrl *dp_ctrl, bool enable) dp_catalog_ctrl_reset(ctrl->catalog); - if (enable) - dp_catalog_ctrl_enable_irq(ctrl->catalog, enable); + /* + * all dp controller programmable registers will not + * be reset to default value after DP_SW_RESET + * therefore interrupt mask bits have to be updated + * to enable/disable interrupts + */ + dp_catalog_ctrl_enable_irq(ctrl->catalog, enable); } void dp_ctrl_phy_init(struct dp_ctrl *dp_ctrl) -- cgit v1.2.3 From 6daf7e4aa91e05b3e138805613686fb5c46d8196 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Wed, 25 May 2022 13:09:12 +0530 Subject: drm/msm/disp/dpu1: remove superfluous init Commit 58dca9810749 ("drm/msm/disp/dpu1: Add support for DSC in encoder") added dsc_common_mode variable which was set to zero but then again programmed, so drop the superfluous init. Fixes: 58dca9810749 ("drm/msm/disp/dpu1: Add support for DSC in encoder") Reported-by: kernel test robot Reviewed-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Signed-off-by: Vinod Koul Patchwork: https://patchwork.freedesktop.org/patch/487208/ Link: https://lore.kernel.org/r/20220525073912.2706505-1-vkoul@kernel.org Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index 52516eb20cb8..8da01d862b83 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -1814,7 +1814,6 @@ static void dpu_encoder_prep_dsc(struct dpu_encoder_virt *dpu_enc, } } - dsc_common_mode = 0; pic_width = dsc->drm->pic_width; dsc_common_mode = DSC_MODE_MULTIPLEX | DSC_MODE_SPLIT_PANEL; -- cgit v1.2.3 From fb0af2daaa04d69b9e66613b8721d23d41f387f6 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Tue, 24 May 2022 16:14:13 +0800 Subject: drm/msm/dpu: Remove unused code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Eliminate the follow clang warning: drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c:544:33: warning: variable ‘mode’ set but not used [-Wunused-but-set-variable]. Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Fixes: 3177589c6e93("drm/msm/dpu: encoder: drop unused mode_fixup callback") Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/487136/ Link: https://lore.kernel.org/r/20220524081413.37895-1-jiapeng.chong@linux.alibaba.com Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index 8da01d862b83..3a462e327e0e 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -541,7 +541,6 @@ static int dpu_encoder_virt_atomic_check( struct dpu_encoder_virt *dpu_enc; struct msm_drm_private *priv; struct dpu_kms *dpu_kms; - const struct drm_display_mode *mode; struct drm_display_mode *adj_mode; struct msm_display_topology topology; struct dpu_global_state *global_state; @@ -559,7 +558,6 @@ static int dpu_encoder_virt_atomic_check( priv = drm_enc->dev->dev_private; dpu_kms = to_dpu_kms(priv->kms); - mode = &crtc_state->mode; adj_mode = &crtc_state->adjusted_mode; global_state = dpu_kms_get_global_state(crtc_state->state); if (IS_ERR(global_state)) -- cgit v1.2.3 From 8caad14e722477807e9324fe0e85dd10151fb134 Mon Sep 17 00:00:00 2001 From: Haowen Bai Date: Mon, 30 May 2022 10:19:55 +0800 Subject: drm/msm/dpu: Fix pointer dereferenced before checking The phys_enc->wb_idx is dereferencing before null checking, so move it after checking. Signed-off-by: Haowen Bai Reviewed-by: Dmitry Baryshkov Fixes: d7d0e73f7de33 ("drm/msm/dpu: introduce the dpu_encoder_phys_* for Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/487606/ Link: https://lore.kernel.org/r/1653877196-23114-1-git-send-email-baihaowen@meizu.com Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c index 4829d1ce0cf8..59da348ff339 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c @@ -574,11 +574,11 @@ static void dpu_encoder_phys_wb_disable(struct dpu_encoder_phys *phys_enc) */ static void dpu_encoder_phys_wb_destroy(struct dpu_encoder_phys *phys_enc) { - DPU_DEBUG("[wb:%d]\n", phys_enc->wb_idx - WB_0); - if (!phys_enc) return; + DPU_DEBUG("[wb:%d]\n", phys_enc->wb_idx - WB_0); + kfree(phys_enc); } -- cgit v1.2.3 From b9364eed9232f3d2a846f68c2307eb25c93cc2d0 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Tue, 31 May 2022 16:01:26 -0700 Subject: drm/msm/dpu: Move min BW request and full BW disable back to mdss In commit a670ff578f1f ("drm/msm/dpu: always use mdp device to scale bandwidth") we fully moved interconnect stuff to the DPU driver. This had no change for sc7180 but _did_ have an impact for other SoCs. It made them match the sc7180 scheme. Unfortunately, the sc7180 scheme seems like it was a bit broken. Specifically the interconnect needs to be on for more than just the DPU driver's AXI bus. In the very least it also needs to be on for the DSI driver's AXI bus. This can be seen fairly easily by doing this on a ChromeOS sc7180-trogdor class device: set_power_policy --ac_screen_dim_delay=5 --ac_screen_off_delay=10 sleep 10 cd /sys/bus/platform/devices/ae94000.dsi/power echo on > control When you do that, you'll get a warning splat in the logs about "gcc_disp_hf_axi_clk status stuck at 'off'". One could argue that perhaps what I have done above is "illegal" and that it can't happen naturally in the system because in normal system usage the DPU is pretty much always on when DSI is on. That being said: * In official ChromeOS builds (admittedly a 5.4 kernel with backports) we have seen that splat at bootup. * Even though we don't use "autosuspend" for these components, we don't use the "put_sync" variants. Thus plausibly the DSI could stay "runtime enabled" past when the DPU is enabled. Techncially we shouldn't do that if the DPU's suspend ends up yanking our clock. Let's change things such that the "bare minimum" request for the interconnect happens in the mdss driver again. That means that all of the children can assume that the interconnect is on at the minimum bandwidth. We'll then let the DPU request the higher amount that it wants. It should be noted that this isn't as hacky of a solution as it might initially appear. Specifically: * Since MDSS and DPU individually get their own references to the interconnect then the framework will actually handle aggregating them. The two drivers are _not_ clobbering each other. * When the Qualcomm interconnect driver aggregates it takes the max of all the peaks. Thus having MDSS request a peak, as we're doing here, won't actually change the total interconnect bandwidth (it won't be added to the request for the DPU). This perhaps explains why the "average" requested in MDSS was historically 0 since that one _would_ be added in. NOTE also that in the downstream ChromeOS 5.4 and 5.15 kernels, we're also seeing some RPMH hangs that are addressed by this fix. These hangs are showing up in the field and on _some_ devices with enough stress testing of suspend/resume. Specifically right at suspend time with a stack crawl that looks like this (from chromeos-5.15 tree): rpmh_write_batch+0x19c/0x240 qcom_icc_bcm_voter_commit+0x210/0x420 qcom_icc_set+0x28/0x38 apply_constraints+0x70/0xa4 icc_set_bw+0x150/0x24c dpu_runtime_resume+0x50/0x1c4 pm_generic_runtime_resume+0x30/0x44 __genpd_runtime_resume+0x68/0x7c genpd_runtime_resume+0x12c/0x20c __rpm_callback+0x98/0x138 rpm_callback+0x30/0x88 rpm_resume+0x370/0x4a0 __pm_runtime_resume+0x80/0xb0 dpu_kms_enable_commit+0x24/0x30 msm_atomic_commit_tail+0x12c/0x630 commit_tail+0xac/0x150 drm_atomic_helper_commit+0x114/0x11c drm_atomic_commit+0x68/0x78 drm_atomic_helper_disable_all+0x158/0x1c8 drm_atomic_helper_suspend+0xc0/0x1c0 drm_mode_config_helper_suspend+0x2c/0x60 msm_pm_prepare+0x2c/0x40 pm_generic_prepare+0x30/0x44 genpd_prepare+0x80/0xd0 device_prepare+0x78/0x17c dpm_prepare+0xb0/0x384 dpm_suspend_start+0x34/0xc0 We don't completely understand all the mechanisms in play, but the hang seemed to come and go with random factors. It's not terribly surprising that the hang is gone after this patch since the line of code that was failing is no longer present in the kernel. Fixes: a670ff578f1f ("drm/msm/dpu: always use mdp device to scale bandwidth") Fixes: c33b7c0389e1 ("drm/msm/dpu: add support for clk and bw scaling for display") Signed-off-by: Douglas Anderson Reviewed-by: Abhinav Kumar Tested-by: Jessica Zhang # RB3 (sdm845) and Reviewed-by: Stephen Boyd Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/487884/ Link: https://lore.kernel.org/r/20220531160059.v2.1.Ie7f6d4bf8cce28131da31a43354727e417cae98d@changeid Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c | 8 ----- drivers/gpu/drm/msm/msm_mdss.c | 57 +++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c index bce47647d891..e23e2552e802 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c @@ -49,8 +49,6 @@ #define DPU_DEBUGFS_DIR "msm_dpu" #define DPU_DEBUGFS_HWMASKNAME "hw_log_mask" -#define MIN_IB_BW 400000000ULL /* Min ib vote 400MB */ - static int dpu_kms_hw_init(struct msm_kms *kms); static void _dpu_kms_mmu_destroy(struct dpu_kms *dpu_kms); @@ -1305,15 +1303,9 @@ static int __maybe_unused dpu_runtime_resume(struct device *dev) struct dpu_kms *dpu_kms = to_dpu_kms(priv->kms); struct drm_encoder *encoder; struct drm_device *ddev; - int i; ddev = dpu_kms->dev; - WARN_ON(!(dpu_kms->num_paths)); - /* Min vote of BW is required before turning on AXI clk */ - for (i = 0; i < dpu_kms->num_paths; i++) - icc_set_bw(dpu_kms->path[i], 0, Bps_to_icc(MIN_IB_BW)); - rc = clk_bulk_prepare_enable(dpu_kms->num_clocks, dpu_kms->clocks); if (rc) { DPU_ERROR("clock enable failed rc:%d\n", rc); diff --git a/drivers/gpu/drm/msm/msm_mdss.c b/drivers/gpu/drm/msm/msm_mdss.c index 0454a571adf7..e13c5c12b775 100644 --- a/drivers/gpu/drm/msm/msm_mdss.c +++ b/drivers/gpu/drm/msm/msm_mdss.c @@ -5,6 +5,7 @@ #include #include +#include #include #include #include @@ -25,6 +26,8 @@ #define UBWC_CTRL_2 0x150 #define UBWC_PREDICTION_MODE 0x154 +#define MIN_IB_BW 400000000UL /* Min ib vote 400MB */ + struct msm_mdss { struct device *dev; @@ -36,8 +39,47 @@ struct msm_mdss { unsigned long enabled_mask; struct irq_domain *domain; } irq_controller; + struct icc_path *path[2]; + u32 num_paths; }; +static int msm_mdss_parse_data_bus_icc_path(struct device *dev, + struct msm_mdss *msm_mdss) +{ + struct icc_path *path0 = of_icc_get(dev, "mdp0-mem"); + struct icc_path *path1 = of_icc_get(dev, "mdp1-mem"); + + if (IS_ERR_OR_NULL(path0)) + return PTR_ERR_OR_ZERO(path0); + + msm_mdss->path[0] = path0; + msm_mdss->num_paths = 1; + + if (!IS_ERR_OR_NULL(path1)) { + msm_mdss->path[1] = path1; + msm_mdss->num_paths++; + } + + return 0; +} + +static void msm_mdss_put_icc_path(void *data) +{ + struct msm_mdss *msm_mdss = data; + int i; + + for (i = 0; i < msm_mdss->num_paths; i++) + icc_put(msm_mdss->path[i]); +} + +static void msm_mdss_icc_request_bw(struct msm_mdss *msm_mdss, unsigned long bw) +{ + int i; + + for (i = 0; i < msm_mdss->num_paths; i++) + icc_set_bw(msm_mdss->path[i], 0, Bps_to_icc(bw)); +} + static void msm_mdss_irq(struct irq_desc *desc) { struct msm_mdss *msm_mdss = irq_desc_get_handler_data(desc); @@ -136,6 +178,13 @@ static int msm_mdss_enable(struct msm_mdss *msm_mdss) { int ret; + /* + * Several components have AXI clocks that can only be turned on if + * the interconnect is enabled (non-zero bandwidth). Let's make sure + * that the interconnects are at least at a minimum amount. + */ + msm_mdss_icc_request_bw(msm_mdss, MIN_IB_BW); + ret = clk_bulk_prepare_enable(msm_mdss->num_clocks, msm_mdss->clocks); if (ret) { dev_err(msm_mdss->dev, "clock enable failed, ret:%d\n", ret); @@ -178,6 +227,7 @@ static int msm_mdss_enable(struct msm_mdss *msm_mdss) static int msm_mdss_disable(struct msm_mdss *msm_mdss) { clk_bulk_disable_unprepare(msm_mdss->num_clocks, msm_mdss->clocks); + msm_mdss_icc_request_bw(msm_mdss, 0); return 0; } @@ -271,6 +321,13 @@ static struct msm_mdss *msm_mdss_init(struct platform_device *pdev, bool is_mdp5 dev_dbg(&pdev->dev, "mapped mdss address space @%pK\n", msm_mdss->mmio); + ret = msm_mdss_parse_data_bus_icc_path(&pdev->dev, msm_mdss); + if (ret) + return ERR_PTR(ret); + ret = devm_add_action_or_reset(&pdev->dev, msm_mdss_put_icc_path, msm_mdss); + if (ret) + return ERR_PTR(ret); + if (is_mdp5) ret = mdp5_mdss_parse_clock(pdev, &msm_mdss->clocks); else -- cgit v1.2.3