From 4fa790421c10e5c9c62406655c06d97a94555d54 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Fri, 8 Aug 2014 19:25:57 +0100 Subject: drm/i915: Fix erroneous conversion to u8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit adj was defined as u8. The issue is last_adj can be negative and adj is initialized with: adj = dev_priv->rps.last_adj; and we were also happily doing things like: if (adj < 0) (thank static analysers!) v2: Make new_delay an int in case we overflow the u8 in the intermediate computations. new_delay will get clamped at the end anyway. (Ville) Cc: Deepak S Cc: Ville Syrjälä Signed-off-by: Damien Lespiau Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_irq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 87abe8679495..f0d24db76e72 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1327,10 +1327,10 @@ static u32 vlv_c0_residency(struct drm_i915_private *dev_priv, * @dev_priv: DRM device private * */ -static u32 vlv_calc_delay_from_C0_counters(struct drm_i915_private *dev_priv) +static int vlv_calc_delay_from_C0_counters(struct drm_i915_private *dev_priv) { u32 residency_C0_up = 0, residency_C0_down = 0; - u8 new_delay, adj; + int new_delay, adj; dev_priv->rps.ei_interrupt_count++; -- cgit v1.2.3 From f45651bae2ee73ae551699d481f76aa6ad92138f Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 8 Aug 2014 21:51:10 +0300 Subject: drm/i915: Eliminate rmw from .update_primary_plane() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the entire DSPCNTR register setup into the .update_primary_plane() functions. That's where it belongs anyway and it'll also help 830M which has the extra problem that plane registers reads will return the value latched at the last vblank, not the value that was last written. Also move DSPPOS and DSPSIZE setup there. v2: Don't move variable initialization to avoid churn later Reviewed-by: Matt Roper Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 100 +++++++++++------------------------ 1 file changed, 32 insertions(+), 68 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 24295694e493..041fd76a2ded 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2388,12 +2388,26 @@ static void i9xx_update_primary_plane(struct drm_crtc *crtc, int plane = intel_crtc->plane; unsigned long linear_offset; u32 dspcntr; - u32 reg; + u32 reg = DSPCNTR(plane); + + dspcntr = DISPPLANE_GAMMA_ENABLE; + + if (intel_crtc->primary_enabled) + dspcntr |= DISPLAY_PLANE_ENABLE; + + if (INTEL_INFO(dev)->gen < 4) { + if (intel_crtc->pipe == PIPE_B) + dspcntr |= DISPPLANE_SEL_PIPE_B; + + /* pipesrc and dspsize control the size that is scaled from, + * which should always be the user's requested size. + */ + I915_WRITE(DSPSIZE(plane), + ((intel_crtc->config.pipe_src_h - 1) << 16) | + (intel_crtc->config.pipe_src_w - 1)); + I915_WRITE(DSPPOS(plane), 0); + } - reg = DSPCNTR(plane); - dspcntr = I915_READ(reg); - /* Mask out pixel format bits in case we change it */ - dspcntr &= ~DISPPLANE_PIXFORMAT_MASK; switch (fb->pixel_format) { case DRM_FORMAT_C8: dspcntr |= DISPPLANE_8BPP; @@ -2425,12 +2439,9 @@ static void i9xx_update_primary_plane(struct drm_crtc *crtc, BUG(); } - if (INTEL_INFO(dev)->gen >= 4) { - if (obj->tiling_mode != I915_TILING_NONE) - dspcntr |= DISPPLANE_TILED; - else - dspcntr &= ~DISPPLANE_TILED; - } + if (INTEL_INFO(dev)->gen >= 4 && + obj->tiling_mode != I915_TILING_NONE) + dspcntr |= DISPPLANE_TILED; if (IS_G4X(dev)) dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE; @@ -2474,12 +2485,16 @@ static void ironlake_update_primary_plane(struct drm_crtc *crtc, int plane = intel_crtc->plane; unsigned long linear_offset; u32 dspcntr; - u32 reg; + u32 reg = DSPCNTR(plane); + + dspcntr = DISPPLANE_GAMMA_ENABLE; + + if (intel_crtc->primary_enabled) + dspcntr |= DISPLAY_PLANE_ENABLE; + + if (IS_HASWELL(dev) || IS_BROADWELL(dev)) + dspcntr |= DISPPLANE_PIPE_CSC_ENABLE; - reg = DSPCNTR(plane); - dspcntr = I915_READ(reg); - /* Mask out pixel format bits in case we change it */ - dspcntr &= ~DISPPLANE_PIXFORMAT_MASK; switch (fb->pixel_format) { case DRM_FORMAT_C8: dspcntr |= DISPPLANE_8BPP; @@ -2509,12 +2524,8 @@ static void ironlake_update_primary_plane(struct drm_crtc *crtc, if (obj->tiling_mode != I915_TILING_NONE) dspcntr |= DISPPLANE_TILED; - else - dspcntr &= ~DISPPLANE_TILED; - if (IS_HASWELL(dev) || IS_BROADWELL(dev)) - dspcntr &= ~DISPPLANE_TRICKLE_FEED_DISABLE; - else + if (!IS_HASWELL(dev) && !IS_BROADWELL(dev)) dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE; I915_WRITE(reg, dspcntr); @@ -3936,7 +3947,6 @@ static void ironlake_crtc_enable(struct drm_crtc *crtc) struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_encoder *encoder; int pipe = intel_crtc->pipe; - enum plane plane = intel_crtc->plane; WARN_ON(!crtc->enabled); @@ -3958,10 +3968,6 @@ static void ironlake_crtc_enable(struct drm_crtc *crtc) ironlake_set_pipeconf(crtc); - /* Set up the display plane register */ - I915_WRITE(DSPCNTR(plane), DISPPLANE_GAMMA_ENABLE); - POSTING_READ(DSPCNTR(plane)); - dev_priv->display.update_primary_plane(crtc, crtc->primary->fb, crtc->x, crtc->y); @@ -4049,7 +4055,6 @@ static void haswell_crtc_enable(struct drm_crtc *crtc) struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_encoder *encoder; int pipe = intel_crtc->pipe; - enum plane plane = intel_crtc->plane; WARN_ON(!crtc->enabled); @@ -4073,10 +4078,6 @@ static void haswell_crtc_enable(struct drm_crtc *crtc) intel_set_pipe_csc(crtc); - /* Set up the display plane register */ - I915_WRITE(DSPCNTR(plane), DISPPLANE_GAMMA_ENABLE | DISPPLANE_PIPE_CSC_ENABLE); - POSTING_READ(DSPCNTR(plane)); - dev_priv->display.update_primary_plane(crtc, crtc->primary->fb, crtc->x, crtc->y); @@ -4632,9 +4633,7 @@ static void valleyview_crtc_enable(struct drm_crtc *crtc) struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_encoder *encoder; int pipe = intel_crtc->pipe; - int plane = intel_crtc->plane; bool is_dsi; - u32 dspcntr; WARN_ON(!crtc->enabled); @@ -4650,27 +4649,13 @@ static void valleyview_crtc_enable(struct drm_crtc *crtc) vlv_prepare_pll(intel_crtc); } - /* Set up the display plane register */ - dspcntr = DISPPLANE_GAMMA_ENABLE; - if (intel_crtc->config.has_dp_encoder) intel_dp_set_m_n(intel_crtc); intel_set_pipe_timings(intel_crtc); - /* pipesrc and dspsize control the size that is scaled from, - * which should always be the user's requested size. - */ - I915_WRITE(DSPSIZE(plane), - ((intel_crtc->config.pipe_src_h - 1) << 16) | - (intel_crtc->config.pipe_src_w - 1)); - I915_WRITE(DSPPOS(plane), 0); - i9xx_set_pipeconf(intel_crtc); - I915_WRITE(DSPCNTR(plane), dspcntr); - POSTING_READ(DSPCNTR(plane)); - dev_priv->display.update_primary_plane(crtc, crtc->primary->fb, crtc->x, crtc->y); @@ -4725,8 +4710,6 @@ static void i9xx_crtc_enable(struct drm_crtc *crtc) struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_encoder *encoder; int pipe = intel_crtc->pipe; - int plane = intel_crtc->plane; - u32 dspcntr; WARN_ON(!crtc->enabled); @@ -4735,32 +4718,13 @@ static void i9xx_crtc_enable(struct drm_crtc *crtc) i9xx_set_pll_dividers(intel_crtc); - /* Set up the display plane register */ - dspcntr = DISPPLANE_GAMMA_ENABLE; - - if (pipe == 0) - dspcntr &= ~DISPPLANE_SEL_PIPE_MASK; - else - dspcntr |= DISPPLANE_SEL_PIPE_B; - if (intel_crtc->config.has_dp_encoder) intel_dp_set_m_n(intel_crtc); intel_set_pipe_timings(intel_crtc); - /* pipesrc and dspsize control the size that is scaled from, - * which should always be the user's requested size. - */ - I915_WRITE(DSPSIZE(plane), - ((intel_crtc->config.pipe_src_h - 1) << 16) | - (intel_crtc->config.pipe_src_w - 1)); - I915_WRITE(DSPPOS(plane), 0); - i9xx_set_pipeconf(intel_crtc); - I915_WRITE(DSPCNTR(plane), dspcntr); - POSTING_READ(DSPCNTR(plane)); - dev_priv->display.update_primary_plane(crtc, crtc->primary->fb, crtc->x, crtc->y); -- cgit v1.2.3 From fdd508a6419217cce28213f3c9bd27c02a0d4c71 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 8 Aug 2014 21:51:11 +0300 Subject: drm/i915: Call .update_primary_plane in intel_{enable, disable}_primary_hw_plane() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make the intel_{enable,disable}_primary_hw_plane() simply call .update_primary_plane(), thus eliminating the rmw from these functions which should help the poor old 830M. Now we can also remove the .update_primary_plane() from the .crtc_enable() hooks because we end up calling it via intel_crtc_enable_planes()->intel_enable_primary_hw_plane(). This also has the nice benefit of making primary planes a bit closer to the way we handle sprite planes during modesets. v2: Just write 0 to DSPCNTR and DSPSURF/DSPADDR if the plane is (to be) disabled. Quicker, and more importantly avoids an oops when fb==NULL due to BIOS fb takeover failure. Pimp the commit message a bit (Matt) v3: Drop useless primary_enabled checks when setting DISPLAY_PLANE_ENABLE Reviewed-by: Matt Roper Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 119 +++++++++++++++-------------------- 1 file changed, 49 insertions(+), 70 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 041fd76a2ded..f306c91b74a5 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2082,35 +2082,28 @@ void intel_flush_primary_plane(struct drm_i915_private *dev_priv, /** * intel_enable_primary_hw_plane - enable the primary plane on a given pipe - * @dev_priv: i915 private structure - * @plane: plane to enable - * @pipe: pipe being fed + * @plane: plane to be enabled + * @crtc: crtc for the plane * - * Enable @plane on @pipe, making sure that @pipe is running first. + * Enable @plane on @crtc, making sure that the pipe is running first. */ -static void intel_enable_primary_hw_plane(struct drm_i915_private *dev_priv, - enum plane plane, enum pipe pipe) +static void intel_enable_primary_hw_plane(struct drm_plane *plane, + struct drm_crtc *crtc) { - struct drm_device *dev = dev_priv->dev; - struct intel_crtc *intel_crtc = - to_intel_crtc(dev_priv->pipe_to_crtc_mapping[pipe]); - int reg; - u32 val; + struct drm_device *dev = plane->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); /* If the pipe isn't enabled, we can't pump pixels and may hang */ - assert_pipe_enabled(dev_priv, pipe); + assert_pipe_enabled(dev_priv, intel_crtc->pipe); if (intel_crtc->primary_enabled) return; intel_crtc->primary_enabled = true; - reg = DSPCNTR(plane); - val = I915_READ(reg); - WARN_ON(val & DISPLAY_PLANE_ENABLE); - - I915_WRITE(reg, val | DISPLAY_PLANE_ENABLE); - intel_flush_primary_plane(dev_priv, plane); + dev_priv->display.update_primary_plane(crtc, plane->fb, + crtc->x, crtc->y); /* * BDW signals flip done immediately if the plane @@ -2123,31 +2116,27 @@ static void intel_enable_primary_hw_plane(struct drm_i915_private *dev_priv, /** * intel_disable_primary_hw_plane - disable the primary hardware plane - * @dev_priv: i915 private structure - * @plane: plane to disable - * @pipe: pipe consuming the data + * @plane: plane to be disabled + * @crtc: crtc for the plane * - * Disable @plane; should be an independent operation. + * Disable @plane on @crtc, making sure that the pipe is running first. */ -static void intel_disable_primary_hw_plane(struct drm_i915_private *dev_priv, - enum plane plane, enum pipe pipe) +static void intel_disable_primary_hw_plane(struct drm_plane *plane, + struct drm_crtc *crtc) { - struct intel_crtc *intel_crtc = - to_intel_crtc(dev_priv->pipe_to_crtc_mapping[pipe]); - int reg; - u32 val; + struct drm_device *dev = plane->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + + assert_pipe_enabled(dev_priv, intel_crtc->pipe); if (!intel_crtc->primary_enabled) return; intel_crtc->primary_enabled = false; - reg = DSPCNTR(plane); - val = I915_READ(reg); - WARN_ON((val & DISPLAY_PLANE_ENABLE) == 0); - - I915_WRITE(reg, val & ~DISPLAY_PLANE_ENABLE); - intel_flush_primary_plane(dev_priv, plane); + dev_priv->display.update_primary_plane(crtc, plane->fb, + crtc->x, crtc->y); } static bool need_vtd_wa(struct drm_device *dev) @@ -2390,10 +2379,19 @@ static void i9xx_update_primary_plane(struct drm_crtc *crtc, u32 dspcntr; u32 reg = DSPCNTR(plane); + if (!intel_crtc->primary_enabled) { + I915_WRITE(reg, 0); + if (INTEL_INFO(dev)->gen >= 4) + I915_WRITE(DSPSURF(plane), 0); + else + I915_WRITE(DSPADDR(plane), 0); + POSTING_READ(reg); + return; + } + dspcntr = DISPPLANE_GAMMA_ENABLE; - if (intel_crtc->primary_enabled) - dspcntr |= DISPLAY_PLANE_ENABLE; + dspcntr |= DISPLAY_PLANE_ENABLE; if (INTEL_INFO(dev)->gen < 4) { if (intel_crtc->pipe == PIPE_B) @@ -2487,10 +2485,16 @@ static void ironlake_update_primary_plane(struct drm_crtc *crtc, u32 dspcntr; u32 reg = DSPCNTR(plane); + if (!intel_crtc->primary_enabled) { + I915_WRITE(reg, 0); + I915_WRITE(DSPSURF(plane), 0); + POSTING_READ(reg); + return; + } + dspcntr = DISPPLANE_GAMMA_ENABLE; - if (intel_crtc->primary_enabled) - dspcntr |= DISPLAY_PLANE_ENABLE; + dspcntr |= DISPLAY_PLANE_ENABLE; if (IS_HASWELL(dev) || IS_BROADWELL(dev)) dspcntr |= DISPPLANE_PIPE_CSC_ENABLE; @@ -3884,14 +3888,12 @@ static void intel_crtc_dpms_overlay(struct intel_crtc *intel_crtc, bool enable) static void intel_crtc_enable_planes(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); int pipe = intel_crtc->pipe; - int plane = intel_crtc->plane; drm_vblank_on(dev, pipe); - intel_enable_primary_hw_plane(dev_priv, plane, pipe); + intel_enable_primary_hw_plane(crtc->primary, crtc); intel_enable_planes(crtc); intel_crtc_update_cursor(crtc, true); intel_crtc_dpms_overlay(intel_crtc, true); @@ -3928,7 +3930,7 @@ static void intel_crtc_disable_planes(struct drm_crtc *crtc) intel_crtc_dpms_overlay(intel_crtc, false); intel_crtc_update_cursor(crtc, false); intel_disable_planes(crtc); - intel_disable_primary_hw_plane(dev_priv, plane, pipe); + intel_disable_primary_hw_plane(crtc->primary, crtc); /* * FIXME: Once we grow proper nuclear flip support out of this we need @@ -3968,9 +3970,6 @@ static void ironlake_crtc_enable(struct drm_crtc *crtc) ironlake_set_pipeconf(crtc); - dev_priv->display.update_primary_plane(crtc, crtc->primary->fb, - crtc->x, crtc->y); - intel_crtc->active = true; intel_set_cpu_fifo_underrun_reporting(dev, pipe, true); @@ -4078,9 +4077,6 @@ static void haswell_crtc_enable(struct drm_crtc *crtc) intel_set_pipe_csc(crtc); - dev_priv->display.update_primary_plane(crtc, crtc->primary->fb, - crtc->x, crtc->y); - intel_crtc->active = true; intel_set_cpu_fifo_underrun_reporting(dev, pipe, true); @@ -4629,7 +4625,6 @@ static void valleyview_modeset_global_resources(struct drm_device *dev) static void valleyview_crtc_enable(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_encoder *encoder; int pipe = intel_crtc->pipe; @@ -4656,9 +4651,6 @@ static void valleyview_crtc_enable(struct drm_crtc *crtc) i9xx_set_pipeconf(intel_crtc); - dev_priv->display.update_primary_plane(crtc, crtc->primary->fb, - crtc->x, crtc->y); - intel_crtc->active = true; intel_set_cpu_fifo_underrun_reporting(dev, pipe, true); @@ -4706,7 +4698,6 @@ static void i9xx_set_pll_dividers(struct intel_crtc *crtc) static void i9xx_crtc_enable(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_encoder *encoder; int pipe = intel_crtc->pipe; @@ -4725,9 +4716,6 @@ static void i9xx_crtc_enable(struct drm_crtc *crtc) i9xx_set_pipeconf(intel_crtc); - dev_priv->display.update_primary_plane(crtc, crtc->primary->fb, - crtc->x, crtc->y); - intel_crtc->active = true; if (!IS_GEN2(dev)) @@ -11351,7 +11339,6 @@ static int intel_crtc_set_config(struct drm_mode_set *set) ret = intel_set_mode(set->crtc, set->mode, set->x, set->y, set->fb); } else if (config->fb_changed) { - struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(set->crtc); intel_crtc_wait_for_pending_flips(set->crtc); @@ -11365,8 +11352,7 @@ static int intel_crtc_set_config(struct drm_mode_set *set) */ if (!intel_crtc->primary_enabled && ret == 0) { WARN_ON(!intel_crtc->active); - intel_enable_primary_hw_plane(dev_priv, intel_crtc->plane, - intel_crtc->pipe); + intel_enable_primary_hw_plane(set->crtc->primary, set->crtc); } /* @@ -11519,8 +11505,6 @@ static int intel_primary_plane_disable(struct drm_plane *plane) { struct drm_device *dev = plane->dev; - struct drm_i915_private *dev_priv = dev->dev_private; - struct intel_plane *intel_plane = to_intel_plane(plane); struct intel_crtc *intel_crtc; if (!plane->fb) @@ -11543,8 +11527,8 @@ intel_primary_plane_disable(struct drm_plane *plane) goto disable_unpin; intel_crtc_wait_for_pending_flips(plane->crtc); - intel_disable_primary_hw_plane(dev_priv, intel_plane->plane, - intel_plane->pipe); + intel_disable_primary_hw_plane(plane, plane->crtc); + disable_unpin: mutex_lock(&dev->struct_mutex); i915_gem_track_fb(intel_fb_obj(plane->fb), NULL, @@ -11564,9 +11548,7 @@ intel_primary_plane_setplane(struct drm_plane *plane, struct drm_crtc *crtc, uint32_t src_w, uint32_t src_h) { struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - struct intel_plane *intel_plane = to_intel_plane(plane); struct drm_i915_gem_object *obj = intel_fb_obj(fb); struct drm_i915_gem_object *old_obj = intel_fb_obj(plane->fb); struct drm_rect dest = { @@ -11653,9 +11635,7 @@ intel_primary_plane_setplane(struct drm_plane *plane, struct drm_crtc *crtc, INTEL_FRONTBUFFER_PRIMARY(intel_crtc->pipe)); if (intel_crtc->primary_enabled) - intel_disable_primary_hw_plane(dev_priv, - intel_plane->plane, - intel_plane->pipe); + intel_disable_primary_hw_plane(plane, crtc); if (plane->fb != fb) @@ -11672,8 +11652,7 @@ intel_primary_plane_setplane(struct drm_plane *plane, struct drm_crtc *crtc, return ret; if (!intel_crtc->primary_enabled) - intel_enable_primary_hw_plane(dev_priv, intel_crtc->plane, - intel_crtc->pipe); + intel_enable_primary_hw_plane(plane, crtc); return 0; } -- cgit v1.2.3 From 22c59960d9fe72f3fbd28de69cc43c5522dd5fe6 Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Fri, 8 Aug 2014 17:45:32 -0300 Subject: drm/i915: fix i915_interrupt_info on BDW Currently, if the machine is runtime suspended an you read the file, you will get an "Unclaimed register" error message. Testcase: igt/pm_rpm/debugfs-read Signed-off-by: Paulo Zanoni Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 330caa1ab9f9..3b7decbeeed3 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -703,6 +703,12 @@ static int i915_interrupt_info(struct seq_file *m, void *data) } for_each_pipe(pipe) { + if (!intel_display_power_enabled(dev_priv, + POWER_DOMAIN_PIPE(pipe))) { + seq_printf(m, "Pipe %c power disabled\n", + pipe_name(pipe)); + continue; + } seq_printf(m, "Pipe %c IMR:\t%08x\n", pipe_name(pipe), I915_READ(GEN8_DE_PIPE_IMR(pipe))); -- cgit v1.2.3 From 3bb11b536c1037143765b4efc8056600438df7f6 Mon Sep 17 00:00:00 2001 From: Sonika Jindal Date: Mon, 11 Aug 2014 09:06:39 +0530 Subject: drm/i915: Continuation of future readiness series Removing the check for HAS_PCH_SPLIT, it looks redundant here. Anyways all the platforms are checked separately. v2: Reordering as per the gen (Ville) Signed-off-by: Sonika Jindal Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 42 +++++++++++++++++------------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index f306c91b74a5..0746590ed4e3 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -12354,29 +12354,27 @@ static void intel_init_display(struct drm_device *dev) dev_priv->display.get_display_clock_speed = i830_get_display_clock_speed; - if (HAS_PCH_SPLIT(dev)) { - if (IS_GEN5(dev)) { - dev_priv->display.fdi_link_train = ironlake_fdi_link_train; - dev_priv->display.write_eld = ironlake_write_eld; - } else if (IS_GEN6(dev)) { - dev_priv->display.fdi_link_train = gen6_fdi_link_train; - dev_priv->display.write_eld = ironlake_write_eld; - dev_priv->display.modeset_global_resources = - snb_modeset_global_resources; - } else if (IS_IVYBRIDGE(dev)) { - /* FIXME: detect B0+ stepping and use auto training */ - dev_priv->display.fdi_link_train = ivb_manual_fdi_link_train; - dev_priv->display.write_eld = ironlake_write_eld; - dev_priv->display.modeset_global_resources = - ivb_modeset_global_resources; - } else if (IS_HASWELL(dev) || IS_GEN8(dev)) { - dev_priv->display.fdi_link_train = hsw_fdi_link_train; - dev_priv->display.write_eld = haswell_write_eld; - dev_priv->display.modeset_global_resources = - haswell_modeset_global_resources; - } - } else if (IS_G4X(dev)) { + if (IS_G4X(dev)) { dev_priv->display.write_eld = g4x_write_eld; + } else if (IS_GEN5(dev)) { + dev_priv->display.fdi_link_train = ironlake_fdi_link_train; + dev_priv->display.write_eld = ironlake_write_eld; + } else if (IS_GEN6(dev)) { + dev_priv->display.fdi_link_train = gen6_fdi_link_train; + dev_priv->display.write_eld = ironlake_write_eld; + dev_priv->display.modeset_global_resources = + snb_modeset_global_resources; + } else if (IS_IVYBRIDGE(dev)) { + /* FIXME: detect B0+ stepping and use auto training */ + dev_priv->display.fdi_link_train = ivb_manual_fdi_link_train; + dev_priv->display.write_eld = ironlake_write_eld; + dev_priv->display.modeset_global_resources = + ivb_modeset_global_resources; + } else if (IS_HASWELL(dev) || IS_GEN8(dev)) { + dev_priv->display.fdi_link_train = hsw_fdi_link_train; + dev_priv->display.write_eld = haswell_write_eld; + dev_priv->display.modeset_global_resources = + haswell_modeset_global_resources; } else if (IS_VALLEYVIEW(dev)) { dev_priv->display.modeset_global_resources = valleyview_modeset_global_resources; -- cgit v1.2.3 From d6699dd3a7f696a80a5f8e5bb6ecf6ff6dd7c998 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Sat, 9 Aug 2014 16:29:31 +0100 Subject: drm/i915: Fix wrong number of HDMI translation entries I keep telling myself that those tables aren't great because their size is the number of dwords we need to program and not the number of entries (number of dwords = number of entries * 2). And... I got it wrong when I refactored the code. Fortunately, it was only wrong when the VBT table (or the code parsing it) is itself erroneous. Long story short, it shouldn't matter, but still, there's a potential array overflow and random programming of the DDI translation tables. Cc: Paulo Zanoni Signed-off-by: Damien Lespiau Reviewed-by: Jani Nikula Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ddi.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index ca1f9a8a7d03..02d55843c78d 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -169,14 +169,14 @@ static void intel_prepare_ddi_buffers(struct drm_device *dev, enum port port) ddi_translations_dp = bdw_ddi_translations_dp; ddi_translations_edp = bdw_ddi_translations_edp; ddi_translations_hdmi = bdw_ddi_translations_hdmi; - n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); + n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi) / 2; hdmi_800mV_0dB = 7; } else if (IS_HASWELL(dev)) { ddi_translations_fdi = hsw_ddi_translations_fdi; ddi_translations_dp = hsw_ddi_translations_dp; ddi_translations_edp = hsw_ddi_translations_dp; ddi_translations_hdmi = hsw_ddi_translations_hdmi; - n_hdmi_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi); + n_hdmi_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi) / 2; hdmi_800mV_0dB = 6; } else { WARN(1, "ddi translation table missing\n"); @@ -184,7 +184,7 @@ static void intel_prepare_ddi_buffers(struct drm_device *dev, enum port port) ddi_translations_fdi = bdw_ddi_translations_fdi; ddi_translations_dp = bdw_ddi_translations_dp; ddi_translations_hdmi = bdw_ddi_translations_hdmi; - n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); + n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi) / 2; hdmi_800mV_0dB = 7; } -- cgit v1.2.3 From dc8cd1e790081a31ba4d86c3c0812c348eeec7fc Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 9 Aug 2014 17:37:22 +0100 Subject: drm/i915: Only perform set-to-gtt domain for objects bound to the global gtt If an object is not bound into the global GTT, then it cannot be accessed via the GTT. This restores the original code that was muddled by ppGTT. In the process, we remove a WARN that had long outlived its usefulness and was simply being coded around instead. Signed-off-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 20743bd421f9..1be7e541a7c7 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3592,11 +3592,12 @@ int i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) { struct drm_i915_private *dev_priv = obj->base.dev->dev_private; + struct i915_vma *vma = i915_gem_obj_to_ggtt(obj); uint32_t old_write_domain, old_read_domains; int ret; /* Not valid to be called on unbound objects. */ - if (!i915_gem_obj_bound_any(obj)) + if (vma == NULL) return -EINVAL; if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) @@ -3638,13 +3639,9 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) old_write_domain); /* And bump the LRU for this access */ - if (i915_gem_object_is_inactive(obj)) { - struct i915_vma *vma = i915_gem_obj_to_ggtt(obj); - if (vma) - list_move_tail(&vma->mm_list, - &dev_priv->gtt.base.inactive_list); - - } + if (i915_gem_object_is_inactive(obj)) + list_move_tail(&vma->mm_list, + &dev_priv->gtt.base.inactive_list); return 0; } @@ -3808,9 +3805,6 @@ static bool is_pin_display(struct drm_i915_gem_object *obj) { struct i915_vma *vma; - if (list_empty(&obj->vma_list)) - return false; - vma = i915_gem_obj_to_ggtt(obj); if (!vma) return false; @@ -5253,12 +5247,6 @@ struct i915_vma *i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj) { struct i915_vma *vma; - /* This WARN has probably outlived its usefulness (callers already - * WARN if they don't find the GGTT vma they expect). When removing, - * remember to remove the pre-check in is_pin_display() as well */ - if (WARN_ON(list_empty(&obj->vma_list))) - return NULL; - vma = list_first_entry(&obj->vma_list, typeof(*vma), vma_link); if (vma->vm != obj_to_ggtt(obj)) return NULL; -- cgit v1.2.3 From e6a844687cf929ec053c7578d5ecc794a8a6c5cf Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 11 Aug 2014 12:00:12 +0200 Subject: drm/i915: Force CPU relocations if not GTT mapped Move the decision on whether we need to have a mappable object during execbuffer to the fore and then reuse that decision by propagating the flag through to reservation. As a corollary, before doing the actual relocation through the GTT, we can make sure that we do have a GTT mapping through which to operate. Note that the key to make this work is to ditch the obj->map_and_fenceable unbind optimization - with full ppgtt it doesn't make a lot of sense any more anyway. v2: Revamp and resend to ease future patches. v3: Refresh patch rationale References: https://bugs.freedesktop.org/show_bug.cgi?id=81094 Signed-off-by: Chris Wilson Cc: Ben Widawsky Cc: Daniel Vetter [danvet: Explain why obj->map_and_fenceable is key and split out the secure batch fix.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 8 ++-- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 59 +++++++++++++++--------------- 2 files changed, 34 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1be7e541a7c7..1ca2231e3929 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2928,9 +2928,8 @@ int i915_vma_unbind(struct i915_vma *vma) vma->unbind_vma(vma); list_del_init(&vma->mm_list); - /* Avoid an unnecessary call to unbind on rebind. */ if (i915_is_ggtt(vma->vm)) - obj->map_and_fenceable = true; + obj->map_and_fenceable = false; drm_mm_remove_node(&vma->node); i915_gem_vma_destroy(vma); @@ -3282,6 +3281,9 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj) return 0; } } else if (enable) { + if (WARN_ON(!obj->map_and_fenceable)) + return -EINVAL; + reg = i915_find_fence_reg(dev); if (IS_ERR(reg)) return PTR_ERR(reg); @@ -4331,8 +4333,6 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, obj->fence_reg = I915_FENCE_REG_NONE; obj->madv = I915_MADV_WILLNEED; - /* Avoid an unnecessary call to unbind on the first bind. */ - obj->map_and_fenceable = true; i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size); } diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 60998fc4e5b2..6320a385841b 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -35,6 +35,7 @@ #define __EXEC_OBJECT_HAS_PIN (1<<31) #define __EXEC_OBJECT_HAS_FENCE (1<<30) +#define __EXEC_OBJECT_NEEDS_MAP (1<<29) #define __EXEC_OBJECT_NEEDS_BIAS (1<<28) #define BATCH_OFFSET_BIAS (256*1024) @@ -534,14 +535,6 @@ i915_gem_execbuffer_relocate(struct eb_vmas *eb) return ret; } -static int -need_reloc_mappable(struct i915_vma *vma) -{ - struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; - return entry->relocation_count && !use_cpu_reloc(vma->obj) && - i915_is_ggtt(vma->vm); -} - static int i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, struct intel_engine_cs *ring, @@ -550,19 +543,12 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, struct drm_i915_gem_object *obj = vma->obj; struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; - bool need_fence; uint64_t flags; int ret; flags = 0; - - need_fence = - has_fenced_gpu_access && - entry->flags & EXEC_OBJECT_NEEDS_FENCE && - obj->tiling_mode != I915_TILING_NONE; - if (need_fence || need_reloc_mappable(vma)) + if (entry->flags & __EXEC_OBJECT_NEEDS_MAP) flags |= PIN_MAPPABLE; - if (entry->flags & EXEC_OBJECT_NEEDS_GTT) flags |= PIN_GLOBAL; if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS) @@ -601,26 +587,40 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, } static bool -eb_vma_misplaced(struct i915_vma *vma, bool has_fenced_gpu_access) +need_reloc_mappable(struct i915_vma *vma) { struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; - struct drm_i915_gem_object *obj = vma->obj; - bool need_fence, need_mappable; - need_fence = - has_fenced_gpu_access && - entry->flags & EXEC_OBJECT_NEEDS_FENCE && - obj->tiling_mode != I915_TILING_NONE; - need_mappable = need_fence || need_reloc_mappable(vma); + if (entry->relocation_count == 0) + return false; + + if (!i915_is_ggtt(vma->vm)) + return false; + + /* See also use_cpu_reloc() */ + if (HAS_LLC(vma->obj->base.dev)) + return false; + + if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU) + return false; - WARN_ON((need_mappable || need_fence) && + return true; +} + +static bool +eb_vma_misplaced(struct i915_vma *vma) +{ + struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; + struct drm_i915_gem_object *obj = vma->obj; + + WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP && !i915_is_ggtt(vma->vm)); if (entry->alignment && vma->node.start & (entry->alignment - 1)) return true; - if (need_mappable && !obj->map_and_fenceable) + if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable) return true; if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS && @@ -664,9 +664,10 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring, obj->tiling_mode != I915_TILING_NONE; need_mappable = need_fence || need_reloc_mappable(vma); - if (need_mappable) + if (need_mappable) { + entry->flags |= __EXEC_OBJECT_NEEDS_MAP; list_move(&vma->exec_list, &ordered_vmas); - else + } else list_move_tail(&vma->exec_list, &ordered_vmas); obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND; @@ -696,7 +697,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring, if (!drm_mm_node_allocated(&vma->node)) continue; - if (eb_vma_misplaced(vma, has_fenced_gpu_access)) + if (eb_vma_misplaced(vma)) ret = i915_vma_unbind(vma); else ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs); -- cgit v1.2.3 From 82b6b6d786466e705e7244cc676189ce47a9199a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 9 Aug 2014 17:37:24 +0100 Subject: drm/i915: Remove fenced_gpu_access and pending_fenced_gpu_access This migrates the fence tracking onto the existing seqno infrastructure so that the later conversion to tracking via requests is simplified. Signed-off-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 7 ------ drivers/gpu/drm/i915/i915_gem.c | 17 --------------- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 34 +++++++++++++++++------------- drivers/gpu/drm/i915/i915_gem_tiling.c | 2 +- 4 files changed, 20 insertions(+), 40 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index fab97bc3215f..5c3f033ff928 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1777,13 +1777,6 @@ struct drm_i915_gem_object { * Only honoured if hardware has relevant pte bit */ unsigned long gt_ro:1; - - /* - * Is the GPU currently using a fence to access this buffer, - */ - unsigned int pending_fenced_gpu_access:1; - unsigned int fenced_gpu_access:1; - unsigned int cache_level:3; unsigned int has_aliasing_ppgtt_mapping:1; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1ca2231e3929..3eec344bdac0 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2161,8 +2161,6 @@ static void i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, struct intel_engine_cs *ring) { - struct drm_device *dev = obj->base.dev; - struct drm_i915_private *dev_priv = dev->dev_private; u32 seqno = intel_ring_get_seqno(ring); BUG_ON(ring == NULL); @@ -2181,19 +2179,6 @@ i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, list_move_tail(&obj->ring_list, &ring->active_list); obj->last_read_seqno = seqno; - - if (obj->fenced_gpu_access) { - obj->last_fenced_seqno = seqno; - - /* Bump MRU to take account of the delayed flush */ - if (obj->fence_reg != I915_FENCE_REG_NONE) { - struct drm_i915_fence_reg *reg; - - reg = &dev_priv->fence_regs[obj->fence_reg]; - list_move_tail(®->lru_list, - &dev_priv->mm.fence_list); - } - } } void i915_vma_move_to_active(struct i915_vma *vma, @@ -2229,7 +2214,6 @@ i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj) obj->base.write_domain = 0; obj->last_fenced_seqno = 0; - obj->fenced_gpu_access = false; obj->active = 0; drm_gem_object_unreference(&obj->base); @@ -3174,7 +3158,6 @@ i915_gem_object_wait_fence(struct drm_i915_gem_object *obj) obj->last_fenced_seqno = 0; } - obj->fenced_gpu_access = false; return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 6320a385841b..70946c551e5d 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -542,7 +542,6 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, { struct drm_i915_gem_object *obj = vma->obj; struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; - bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; uint64_t flags; int ret; @@ -560,17 +559,13 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, entry->flags |= __EXEC_OBJECT_HAS_PIN; - if (has_fenced_gpu_access) { - if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { - ret = i915_gem_object_get_fence(obj); - if (ret) - return ret; - - if (i915_gem_object_pin_fence(obj)) - entry->flags |= __EXEC_OBJECT_HAS_FENCE; + if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { + ret = i915_gem_object_get_fence(obj); + if (ret) + return ret; - obj->pending_fenced_gpu_access = true; - } + if (i915_gem_object_pin_fence(obj)) + entry->flags |= __EXEC_OBJECT_HAS_FENCE; } if (entry->offset != vma->node.start) { @@ -658,8 +653,9 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring, obj = vma->obj; entry = vma->exec_entry; + if (!has_fenced_gpu_access) + entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE; need_fence = - has_fenced_gpu_access && entry->flags & EXEC_OBJECT_NEEDS_FENCE && obj->tiling_mode != I915_TILING_NONE; need_mappable = need_fence || need_reloc_mappable(vma); @@ -672,7 +668,6 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring, obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND; obj->base.pending_write_domain = 0; - obj->pending_fenced_gpu_access = false; } list_splice(&ordered_vmas, vmas); @@ -959,9 +954,11 @@ static void i915_gem_execbuffer_move_to_active(struct list_head *vmas, struct intel_engine_cs *ring) { + u32 seqno = intel_ring_get_seqno(ring); struct i915_vma *vma; list_for_each_entry(vma, vmas, exec_list) { + struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; struct drm_i915_gem_object *obj = vma->obj; u32 old_read = obj->base.read_domains; u32 old_write = obj->base.write_domain; @@ -970,18 +967,25 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas, if (obj->base.write_domain == 0) obj->base.pending_read_domains |= obj->base.read_domains; obj->base.read_domains = obj->base.pending_read_domains; - obj->fenced_gpu_access = obj->pending_fenced_gpu_access; i915_vma_move_to_active(vma, ring); if (obj->base.write_domain) { obj->dirty = 1; - obj->last_write_seqno = intel_ring_get_seqno(ring); + obj->last_write_seqno = seqno; intel_fb_obj_invalidate(obj, ring); /* update for the implicit flush after a batch */ obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; } + if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { + obj->last_fenced_seqno = seqno; + if (entry->flags & __EXEC_OBJECT_HAS_FENCE) { + struct drm_i915_private *dev_priv = to_i915(ring->dev); + list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list, + &dev_priv->mm.fence_list); + } + } trace_i915_gem_object_change_domain(obj, old_read, old_write); } diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index cb150e8b4336..7e623bf097a1 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -376,7 +376,7 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, if (ret == 0) { obj->fence_dirty = - obj->fenced_gpu_access || + obj->last_fenced_seqno || obj->fence_reg != I915_FENCE_REG_NONE; obj->tiling_mode = args->tiling_mode; -- cgit v1.2.3 From 87f1f46514babd40fc3551ca2d6148cdedd9c7e3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 9 Aug 2014 19:18:42 +0100 Subject: drm/i915: Copy PCI device id into the device info block This is so that we can make the drm_i915_private->info always the preferred source for chipset type and feature queries. Signed-off-by: Chris Wilson Reviewed-by: Jani Nikula Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_dma.c | 5 +++-- drivers/gpu/drm/i915/i915_drv.h | 50 +++++++++++++++++++++-------------------- 2 files changed, 29 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index c965698a8bac..1867e2619e73 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1603,9 +1603,10 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) dev->dev_private = dev_priv; dev_priv->dev = dev; - /* copy initial configuration to dev_priv->info */ + /* Setup the write-once "constant" device info */ device_info = (struct intel_device_info *)&dev_priv->info; - *device_info = *info; + memcpy(device_info, info, sizeof(dev_priv->info)); + device_info->device_id = dev->pdev->device; spin_lock_init(&dev_priv->irq_lock); spin_lock_init(&dev_priv->gpu_error.lock); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 5c3f033ff928..8a55f07d80cb 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -558,6 +558,7 @@ struct intel_uncore { struct intel_device_info { u32 display_mmio_offset; + u16 device_id; u8 num_pipes:3; u8 num_sprites[I915_MAX_PIPES]; u8 gen; @@ -1980,51 +1981,52 @@ struct drm_i915_cmd_table { int count; }; -#define INTEL_INFO(dev) (&to_i915(dev)->info) +#define INTEL_INFO(p) (&to_i915(p)->info) +#define INTEL_DEVID(p) (INTEL_INFO(p)->device_id) -#define IS_I830(dev) ((dev)->pdev->device == 0x3577) -#define IS_845G(dev) ((dev)->pdev->device == 0x2562) +#define IS_I830(dev) (INTEL_DEVID(dev) == 0x3577) +#define IS_845G(dev) (INTEL_DEVID(dev) == 0x2562) #define IS_I85X(dev) (INTEL_INFO(dev)->is_i85x) -#define IS_I865G(dev) ((dev)->pdev->device == 0x2572) +#define IS_I865G(dev) (INTEL_DEVID(dev) == 0x2572) #define IS_I915G(dev) (INTEL_INFO(dev)->is_i915g) -#define IS_I915GM(dev) ((dev)->pdev->device == 0x2592) -#define IS_I945G(dev) ((dev)->pdev->device == 0x2772) +#define IS_I915GM(dev) (INTEL_DEVID(dev) == 0x2592) +#define IS_I945G(dev) (INTEL_DEVID(dev) == 0x2772) #define IS_I945GM(dev) (INTEL_INFO(dev)->is_i945gm) #define IS_BROADWATER(dev) (INTEL_INFO(dev)->is_broadwater) #define IS_CRESTLINE(dev) (INTEL_INFO(dev)->is_crestline) -#define IS_GM45(dev) ((dev)->pdev->device == 0x2A42) +#define IS_GM45(dev) (INTEL_DEVID(dev) == 0x2A42) #define IS_G4X(dev) (INTEL_INFO(dev)->is_g4x) -#define IS_PINEVIEW_G(dev) ((dev)->pdev->device == 0xa001) -#define IS_PINEVIEW_M(dev) ((dev)->pdev->device == 0xa011) +#define IS_PINEVIEW_G(dev) (INTEL_DEVID(dev) == 0xa001) +#define IS_PINEVIEW_M(dev) (INTEL_DEVID(dev) == 0xa011) #define IS_PINEVIEW(dev) (INTEL_INFO(dev)->is_pineview) #define IS_G33(dev) (INTEL_INFO(dev)->is_g33) -#define IS_IRONLAKE_M(dev) ((dev)->pdev->device == 0x0046) +#define IS_IRONLAKE_M(dev) (INTEL_DEVID(dev) == 0x0046) #define IS_IVYBRIDGE(dev) (INTEL_INFO(dev)->is_ivybridge) -#define IS_IVB_GT1(dev) ((dev)->pdev->device == 0x0156 || \ - (dev)->pdev->device == 0x0152 || \ - (dev)->pdev->device == 0x015a) -#define IS_SNB_GT1(dev) ((dev)->pdev->device == 0x0102 || \ - (dev)->pdev->device == 0x0106 || \ - (dev)->pdev->device == 0x010A) +#define IS_IVB_GT1(dev) (INTEL_DEVID(dev) == 0x0156 || \ + INTEL_DEVID(dev) == 0x0152 || \ + INTEL_DEVID(dev) == 0x015a) +#define IS_SNB_GT1(dev) (INTEL_DEVID(dev) == 0x0102 || \ + INTEL_DEVID(dev) == 0x0106 || \ + INTEL_DEVID(dev) == 0x010A) #define IS_VALLEYVIEW(dev) (INTEL_INFO(dev)->is_valleyview) #define IS_CHERRYVIEW(dev) (INTEL_INFO(dev)->is_valleyview && IS_GEN8(dev)) #define IS_HASWELL(dev) (INTEL_INFO(dev)->is_haswell) #define IS_BROADWELL(dev) (!INTEL_INFO(dev)->is_valleyview && IS_GEN8(dev)) #define IS_MOBILE(dev) (INTEL_INFO(dev)->is_mobile) #define IS_HSW_EARLY_SDV(dev) (IS_HASWELL(dev) && \ - ((dev)->pdev->device & 0xFF00) == 0x0C00) + (INTEL_DEVID(dev) & 0xFF00) == 0x0C00) #define IS_BDW_ULT(dev) (IS_BROADWELL(dev) && \ - (((dev)->pdev->device & 0xf) == 0x2 || \ - ((dev)->pdev->device & 0xf) == 0x6 || \ - ((dev)->pdev->device & 0xf) == 0xe)) + ((INTEL_DEVID(dev) & 0xf) == 0x2 || \ + (INTEL_DEVID(dev) & 0xf) == 0x6 || \ + (INTEL_DEVID(dev) & 0xf) == 0xe)) #define IS_HSW_ULT(dev) (IS_HASWELL(dev) && \ - ((dev)->pdev->device & 0xFF00) == 0x0A00) + (INTEL_DEVID(dev) & 0xFF00) == 0x0A00) #define IS_ULT(dev) (IS_HSW_ULT(dev) || IS_BDW_ULT(dev)) #define IS_HSW_GT3(dev) (IS_HASWELL(dev) && \ - ((dev)->pdev->device & 0x00F0) == 0x0020) + (INTEL_DEVID(dev) & 0x00F0) == 0x0020) /* ULX machines are also considered ULT. */ -#define IS_HSW_ULX(dev) ((dev)->pdev->device == 0x0A0E || \ - (dev)->pdev->device == 0x0A1E) +#define IS_HSW_ULX(dev) (INTEL_DEVID(dev) == 0x0A0E || \ + INTEL_DEVID(dev) == 0x0A1E) #define IS_PRELIMINARY_HW(intel_info) ((intel_info)->is_preliminary) /* -- cgit v1.2.3 From f6daaec29b2a201eb8db2ce26b4460b779ad8111 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Sat, 9 Aug 2014 23:00:56 +0100 Subject: drm/i915: Make intel_disable_shared_dpll() static Found with sparse. Signed-off-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 0746590ed4e3..245cf4128314 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1797,7 +1797,7 @@ static void intel_enable_shared_dpll(struct intel_crtc *crtc) pll->on = true; } -void intel_disable_shared_dpll(struct intel_crtc *crtc) +static void intel_disable_shared_dpll(struct intel_crtc *crtc) { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; -- cgit v1.2.3 From 1bee20175f27b46427f10290fdd4a79334d41a60 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Sat, 9 Aug 2014 23:00:58 +0100 Subject: drm/i915: Remove set but unused 'gt_perf_status' Signed-off-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 41de760bf1d4..12f4e143328c 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3719,7 +3719,6 @@ static void gen6_enable_rps(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; struct intel_engine_cs *ring; u32 rp_state_cap; - u32 gt_perf_status; u32 rc6vids, pcu_mbox = 0, rc6_mask = 0; u32 gtfifodbg; int rc6_mode; @@ -3744,7 +3743,6 @@ static void gen6_enable_rps(struct drm_device *dev) gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL); rp_state_cap = I915_READ(GEN6_RP_STATE_CAP); - gt_perf_status = I915_READ(GEN6_GT_PERF_STATUS); parse_rp_state_cap(dev_priv, rp_state_cap); -- cgit v1.2.3 From 9bec9b1334d687c0a9fcf3d3a1987a61b4826a45 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 11 Aug 2014 09:21:35 +0100 Subject: drm/i915: Double check ring is idle before declaring the GPU wedged During ring initialisation, sometimes we observe, though not in production hardware, that the idle flag is not set even though the ring is empty. Double check before giving up. Signed-off-by: Chris Wilson Cc: Damien Lespiau Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 117543e58d48..a059b64a0fb2 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -478,7 +478,12 @@ static bool stop_ring(struct intel_engine_cs *ring) I915_WRITE_MODE(ring, _MASKED_BIT_ENABLE(STOP_RING)); if (wait_for((I915_READ_MODE(ring) & MODE_IDLE) != 0, 1000)) { DRM_ERROR("%s : timed out trying to stop ring\n", ring->name); - return false; + /* Sometimes we observe that the idle flag is not + * set even though the ring is empty. So double + * check before giving up. + */ + if (I915_READ_HEAD(ring) != I915_READ_TAIL(ring)) + return false; } } -- cgit v1.2.3 From dbbe91279511d6a18a521b953a3c139e4787e660 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 9 Aug 2014 19:18:43 +0100 Subject: drm/i915: Agnostic INTEL_INFO Adapt the macro so that we can pass either the struct drm_device or the struct drm_i915_private pointers and get the answer we want. Over time, my plan is to convert all users over to using drm_i915_private and so trimming down the pointer dance. Having spent a few hours chasing that goal and achieved over 8k of object code saving, it appears to be a worthwhile target. This interim macro allows us to slowly convert over. Signed-off-by: Chris Wilson [danvet: Drop the (struct drm_device *) cast per the m-l discussion. Also explain the seemingly unecessary first cast.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_dma.c | 3 +++ drivers/gpu/drm/i915/i915_drv.h | 5 ++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 1867e2619e73..1763fbf34e1d 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1596,6 +1596,9 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) if (!drm_core_check_feature(dev, DRIVER_MODESET) && !dev->agp) return -EINVAL; + /* For the ugly agnostic INTEL_INFO macro */ + BUILD_BUG_ON(sizeof(*dev_priv) == sizeof(*dev)); + dev_priv = kzalloc(sizeof(*dev_priv), GFP_KERNEL); if (dev_priv == NULL) return -ENOMEM; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 8a55f07d80cb..6959c1b2d648 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1981,7 +1981,10 @@ struct drm_i915_cmd_table { int count; }; -#define INTEL_INFO(p) (&to_i915(p)->info) +/* Note that the (struct drm_i915_private *) cast is just to shut up gcc. */ +#define __I915__(p) ((sizeof(*(p)) == sizeof(struct drm_i915_private)) ? \ + (struct drm_i915_private *)(p) : to_i915(p)) +#define INTEL_INFO(p) (&__I915__(p)->info) #define INTEL_DEVID(p) (INTEL_INFO(p)->device_id) #define IS_I830(dev) (INTEL_DEVID(dev) == 0x3577) -- cgit v1.2.3 From da51a1e7e398129d9fddd4b26b8469145dd4fd08 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 11 Aug 2014 12:08:58 +0200 Subject: drm/i915: Fix secure dispatch with full ppgtt Based upon a hunk from a patch from Chris Wilson, but augmented to: - Process the batch in the full ppgtt vm so that self-relocations match again with userspace's expectations.. - Add a comment why plain pin for the global gtt binding is safe at that point. v2: Drop local bind_vm variable (Chris). v3: Explain why this works despite the lack of proper active tracking for the ggtt batch vma. Cc: Chris Wilson Cc: Ben Widawsky Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 48 ++++++++++++++++-------------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 70946c551e5d..e1eac15b2583 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -95,7 +95,6 @@ eb_lookup_vmas(struct eb_vmas *eb, struct i915_address_space *vm, struct drm_file *file) { - struct drm_i915_private *dev_priv = vm->dev->dev_private; struct drm_i915_gem_object *obj; struct list_head objects; int i, ret; @@ -130,7 +129,6 @@ eb_lookup_vmas(struct eb_vmas *eb, i = 0; while (!list_empty(&objects)) { struct i915_vma *vma; - struct i915_address_space *bind_vm = vm; if (exec[i].flags & EXEC_OBJECT_NEEDS_GTT && USES_FULL_PPGTT(vm->dev)) { @@ -138,13 +136,6 @@ eb_lookup_vmas(struct eb_vmas *eb, goto err; } - /* If we have secure dispatch, or the userspace assures us that - * they know what they're doing, use the GGTT VM. - */ - if (((args->flags & I915_EXEC_SECURE) && - (i == (args->buffer_count - 1)))) - bind_vm = &dev_priv->gtt.base; - obj = list_first_entry(&objects, struct drm_i915_gem_object, obj_exec_link); @@ -157,7 +148,7 @@ eb_lookup_vmas(struct eb_vmas *eb, * from the (obj, vm) we don't run the risk of creating * duplicated vmas for the same vm. */ - vma = i915_gem_obj_lookup_or_create_vma(obj, bind_vm); + vma = i915_gem_obj_lookup_or_create_vma(obj, vm); if (IS_ERR(vma)) { DRM_DEBUG("Failed to lookup VMA\n"); ret = PTR_ERR(vma); @@ -1391,25 +1382,36 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, /* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure * batch" bit. Hence we need to pin secure batches into the global gtt. * hsw should have this fixed, but bdw mucks it up again. */ - if (flags & I915_DISPATCH_SECURE && - !batch_obj->has_global_gtt_mapping) { - /* When we have multiple VMs, we'll need to make sure that we - * allocate space first */ - struct i915_vma *vma = i915_gem_obj_to_ggtt(batch_obj); - BUG_ON(!vma); - vma->bind_vma(vma, batch_obj->cache_level, GLOBAL_BIND); - } + if (flags & I915_DISPATCH_SECURE) { + /* + * So on first glance it looks freaky that we pin the batch here + * outside of the reservation loop. But: + * - The batch is already pinned into the relevant ppgtt, so we + * already have the backing storage fully allocated. + * - No other BO uses the global gtt (well contexts, but meh), + * so we don't really have issues with mutliple objects not + * fitting due to fragmentation. + * So this is actually safe. + */ + ret = i915_gem_obj_ggtt_pin(batch_obj, 0, 0); + if (ret) + goto err; - if (flags & I915_DISPATCH_SECURE) exec_start += i915_gem_obj_ggtt_offset(batch_obj); - else + } else exec_start += i915_gem_obj_offset(batch_obj, vm); ret = legacy_ringbuffer_submission(dev, file, ring, ctx, - args, &eb->vmas, batch_obj, exec_start, flags); - if (ret) - goto err; + args, &eb->vmas, batch_obj, exec_start, flags); + /* + * FIXME: We crucially rely upon the active tracking for the (ppgtt) + * batch vma for correctness. For less ugly and less fragility this + * needs to be adjusted to also track the ggtt batch vma properly as + * active. + */ + if (flags & I915_DISPATCH_SECURE) + i915_gem_object_ggtt_unpin(batch_obj); err: /* the request owns the ref now */ i915_gem_context_unreference(ctx); -- cgit v1.2.3 From ad19f10bc2a5964f1564639e60953de76b7e50f6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 10 Aug 2014 06:29:08 +0100 Subject: drm/i915: Pre-validate the NEED_GTTS flag for execbuffer We have an implementation requirement that precludes the user from requesting a ggtt entry when the device is operating in ppgtt mode. Move the current check from inside the execbuffer object collation to the prevalidation phase. v2: Roll both invalid flags checks into one Signed-off-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index e1eac15b2583..446f4b6fbefe 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -130,12 +130,6 @@ eb_lookup_vmas(struct eb_vmas *eb, while (!list_empty(&objects)) { struct i915_vma *vma; - if (exec[i].flags & EXEC_OBJECT_NEEDS_GTT && - USES_FULL_PPGTT(vm->dev)) { - ret = -EINVAL; - goto err; - } - obj = list_first_entry(&objects, struct drm_i915_gem_object, obj_exec_link); @@ -877,18 +871,24 @@ i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) } static int -validate_exec_list(struct drm_i915_gem_exec_object2 *exec, +validate_exec_list(struct drm_device *dev, + struct drm_i915_gem_exec_object2 *exec, int count) { - int i; unsigned relocs_total = 0; unsigned relocs_max = UINT_MAX / sizeof(struct drm_i915_gem_relocation_entry); + unsigned invalid_flags; + int i; + + invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS; + if (USES_FULL_PPGTT(dev)) + invalid_flags |= EXEC_OBJECT_NEEDS_GTT; for (i = 0; i < count; i++) { char __user *ptr = to_user_ptr(exec[i].relocs_ptr); int length; /* limited by fault_in_pages_readable() */ - if (exec[i].flags & __EXEC_OBJECT_UNKNOWN_FLAGS) + if (exec[i].flags & invalid_flags) return -EINVAL; /* First check for malicious input causing overflow in @@ -1250,7 +1250,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, if (!i915_gem_check_execbuffer(args)) return -EINVAL; - ret = validate_exec_list(exec, args->buffer_count); + ret = validate_exec_list(dev, exec, args->buffer_count); if (ret) return ret; -- cgit v1.2.3 From 060e82c6f4ccf678decffb28ba8301ca9220a995 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 10 Aug 2014 06:29:10 +0100 Subject: drm/i915: Remove redundant list_empty(eb->vmas) tests in execbuffer Part of the pre-validation for an execbuffer call is that there is at least one object in the execlist. As we bail if we fail to lookup any object, we can be sure that after the eb_lookup_vma() there is at least one object in the vma list and so we do not need to assert. Signed-off-by: Chris Wilson Cc: Ben Widawsky Cc: Daniel Vetter Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 446f4b6fbefe..6c07940b468c 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -622,9 +622,6 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring, bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; int retry; - if (list_empty(vmas)) - return 0; - i915_gem_retire_requests_ring(ring); vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm; @@ -725,9 +722,6 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev, int i, total, ret; unsigned count = args->buffer_count; - if (WARN_ON(list_empty(&eb->vmas))) - return 0; - vm = list_first_entry(&eb->vmas, struct i915_vma, exec_list)->vm; /* We may process another execbuffer during the unlock... */ -- cgit v1.2.3 From 906843c3a1acc36407e500a073679c4207d307cd Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 10 Aug 2014 06:29:11 +0100 Subject: drm/i915: Simplify relocate_entry_gtt() and make 64-bit safe Even though we should not try to use 4+GiB GTTs on 32-bit systems, by using a local variable we can future proof the code whilst making it easier to read. Signed-off-by: Chris Wilson [danvet: Appease checkpatch a bit.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 6c07940b468c..dec2cc2fbd42 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -293,7 +293,7 @@ relocate_entry_gtt(struct drm_i915_gem_object *obj, struct drm_device *dev = obj->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; uint64_t delta = reloc->delta + target_offset; - uint32_t __iomem *reloc_entry; + uint64_t offset; void __iomem *reloc_page; int ret; @@ -306,25 +306,24 @@ relocate_entry_gtt(struct drm_i915_gem_object *obj, return ret; /* Map the page containing the relocation we're going to perform. */ - reloc->offset += i915_gem_obj_ggtt_offset(obj); + offset = i915_gem_obj_ggtt_offset(obj); + offset += reloc->offset; reloc_page = io_mapping_map_atomic_wc(dev_priv->gtt.mappable, - reloc->offset & PAGE_MASK); - reloc_entry = (uint32_t __iomem *) - (reloc_page + offset_in_page(reloc->offset)); - iowrite32(lower_32_bits(delta), reloc_entry); + offset & PAGE_MASK); + iowrite32(lower_32_bits(delta), reloc_page + offset_in_page(offset)); if (INTEL_INFO(dev)->gen >= 8) { - reloc_entry += 1; + offset += sizeof(uint32_t); - if (offset_in_page(reloc->offset + sizeof(uint32_t)) == 0) { + if (offset_in_page(offset) == 0) { io_mapping_unmap_atomic(reloc_page); - reloc_page = io_mapping_map_atomic_wc( - dev_priv->gtt.mappable, - reloc->offset + sizeof(uint32_t)); - reloc_entry = reloc_page; + reloc_page = + io_mapping_map_atomic_wc(dev_priv->gtt.mappable, + offset); } - iowrite32(upper_32_bits(delta), reloc_entry); + iowrite32(upper_32_bits(delta), + reloc_page + offset_in_page(offset)); } io_mapping_unmap_atomic(reloc_page); -- cgit v1.2.3 From b20385f1f8434ec32d73414ffcadb7dcbd3a2a61 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:10 +0100 Subject: drm/i915/bdw: New source and header file for LRs, LRCs and Execlists Some legacy HW context code assumptions don't make sense for this new submission method, so we will place this stuff in a separate file. Note for reviewers: I've carefully considered the best name for this file and this was my best option (other possibilities were intel_lr_context.c or intel_execlist.c). I am open to a certain bikeshedding on this matter, anyway. And some point in time, it would be a good idea to split intel_lrc.c/.h even further, but for the moment just shove everything together. v2: Change to intel_lrc.c v3: Squash together with the header file addition Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/intel_lrc.c | 42 ++++++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_lrc.h | 27 ++++++++++++++++++++++++++ 4 files changed, 71 insertions(+) create mode 100644 drivers/gpu/drm/i915/intel_lrc.c create mode 100644 drivers/gpu/drm/i915/intel_lrc.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 91bd167e1cb7..c1dd485aeb6c 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -31,6 +31,7 @@ i915-y += i915_cmd_parser.o \ i915_gpu_error.o \ i915_irq.o \ i915_trace_points.o \ + intel_lrc.o \ intel_ringbuffer.o \ intel_uncore.o diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 6959c1b2d648..ec2a094f3622 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -35,6 +35,7 @@ #include "i915_reg.h" #include "intel_bios.h" #include "intel_ringbuffer.h" +#include "intel_lrc.h" #include "i915_gem_gtt.h" #include #include diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c new file mode 100644 index 000000000000..49bb6fcdface --- /dev/null +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -0,0 +1,42 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Ben Widawsky + * Michel Thierry + * Thomas Daniel + * Oscar Mateo + * + */ + +/* + * GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts". + * These expanded contexts enable a number of new abilities, especially + * "Execlists" (also implemented in this file). + * + * Execlists are the new method by which, on gen8+ hardware, workloads are + * submitted for execution (as opposed to the legacy, ringbuffer-based, method). + */ + +#include +#include +#include "i915_drv.h" diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h new file mode 100644 index 000000000000..f6830a4ec773 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -0,0 +1,27 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef _INTEL_LRC_H_ +#define _INTEL_LRC_H_ + +#endif /* _INTEL_LRC_H_ */ -- cgit v1.2.3 From 127f100369a1f302904335950387d566680eb275 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:11 +0100 Subject: drm/i915/bdw: Macro for LRCs and module option for Execlists GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts". These expanded contexts enable a number of new abilities, especially "Execlists". The macro is defined to off until we have things in place to hope to work. v2: Rename "advanced contexts" to the more correct "logical ring contexts". v3: Add a module parameter to enable execlists. Execlist are relatively new, and so it'd be wise to be able to switch back to ring submission to debug subtle problems that will inevitably arise. v4: Add an intel_enable_execlists function. v5: Sanitize early, as suggested by Daniel. Remove lrc_enabled. Signed-off-by: Ben Widawsky (v1) Signed-off-by: Damien Lespiau (v3) Signed-off-by: Oscar Mateo (v2, v4 & v5) Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_gem.c | 3 +++ drivers/gpu/drm/i915/i915_params.c | 6 ++++++ drivers/gpu/drm/i915/intel_lrc.c | 11 +++++++++++ drivers/gpu/drm/i915/intel_lrc.h | 3 +++ 5 files changed, 25 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ec2a094f3622..fd2aa15ce02b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2062,6 +2062,7 @@ struct drm_i915_cmd_table { #define I915_NEED_GFX_HWS(dev) (INTEL_INFO(dev)->need_gfx_hws) #define HAS_HW_CONTEXTS(dev) (INTEL_INFO(dev)->gen >= 6) +#define HAS_LOGICAL_RING_CONTEXTS(dev) 0 #define HAS_ALIASING_PPGTT(dev) (INTEL_INFO(dev)->gen >= 6) #define HAS_PPGTT(dev) (INTEL_INFO(dev)->gen >= 7 && !IS_GEN8(dev)) #define USES_PPGTT(dev) (i915.enable_ppgtt) @@ -2149,6 +2150,7 @@ struct i915_params { int enable_rc6; int enable_fbc; int enable_ppgtt; + int enable_execlists; int enable_psr; unsigned int preliminary_hw_support; int disable_power_well; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 3eec344bdac0..5646e9ba6383 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4723,6 +4723,9 @@ int i915_gem_init(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; int ret; + i915.enable_execlists = intel_sanitize_enable_execlists(dev, + i915.enable_execlists); + mutex_lock(&dev->struct_mutex); if (IS_VALLEYVIEW(dev)) { diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 62ee8308d682..f7f8350c3793 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -35,6 +35,7 @@ struct i915_params i915 __read_mostly = { .vbt_sdvo_panel_type = -1, .enable_rc6 = -1, .enable_fbc = -1, + .enable_execlists = -1, .enable_hangcheck = true, .enable_ppgtt = -1, .enable_psr = 1, @@ -118,6 +119,11 @@ MODULE_PARM_DESC(enable_ppgtt, "Override PPGTT usage. " "(-1=auto [default], 0=disabled, 1=aliasing, 2=full)"); +module_param_named(enable_execlists, i915.enable_execlists, int, 0400); +MODULE_PARM_DESC(enable_execlists, + "Override execlists usage. " + "(-1=auto [default], 0=disabled, 1=enabled)"); + module_param_named(enable_psr, i915.enable_psr, int, 0600); MODULE_PARM_DESC(enable_psr, "Enable PSR (default: true)"); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 49bb6fcdface..21f7f1cce86e 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -40,3 +40,14 @@ #include #include #include "i915_drv.h" + +int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists) +{ + if (enable_execlists == 0) + return 0; + + if (HAS_LOGICAL_RING_CONTEXTS(dev) && USES_PPGTT(dev)) + return 1; + + return 0; +} diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index f6830a4ec773..75ee9c3cb7dc 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -24,4 +24,7 @@ #ifndef _INTEL_LRC_H_ #define _INTEL_LRC_H_ +/* Execlists */ +int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists); + #endif /* _INTEL_LRC_H_ */ -- cgit v1.2.3 From bd84b1e995918ad83bdba5d5be1bef901e169f19 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 11 Aug 2014 15:57:57 +0200 Subject: drm/i915: WARN if module opt sanitization goes out of order Depending upon one module option to be sanitized (through USES_PPGTT) for the other is a bit too fragile for my taste. At least WARN about this. Cc: Ben Widawsky Cc: Damien Lespiau Cc: Oscar Mateo Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_lrc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 21f7f1cce86e..44721292eb77 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -43,6 +43,8 @@ int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists) { + WARN_ON(i915.enable_ppgtt == -1); + if (enable_execlists == 0) return 0; -- cgit v1.2.3 From ede7d42baeece583c864badb6f9081f4cded6c32 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:12 +0100 Subject: drm/i915/bdw: Initialization for Logical Ring Contexts For the moment this is just a placeholder, but it shows one of the main differences between the good ol' HW contexts and the shiny new Logical Ring Contexts: LR contexts allocate and free their own backing objects. Another difference is that the allocation is deferred (as the create function name suggests), but that does not happen in this patch yet, because for the moment we are only dealing with the default context. Early in the series we had our own gen8_gem_context_init/fini functions, but the truth is they now look almost the same as the legacy hw context init/fini functions. We can always split them later if this ceases to be the case. Also, we do not fall back to legacy ringbuffers when logical ring context initialization fails (not very likely to happen and, even if it does, hw contexts would probably fail as well). v2: Daniel says "explain, do not showcase". Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau [danvet: s/BUG_ON/WARN_ON/.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_context.c | 29 +++++++++++++++++++++++------ drivers/gpu/drm/i915/intel_lrc.c | 15 +++++++++++++++ drivers/gpu/drm/i915/intel_lrc.h | 5 +++++ 3 files changed, 43 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 3b99390e467a..3552a351ccf7 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -182,7 +182,10 @@ void i915_gem_context_free(struct kref *ctx_ref) typeof(*ctx), ref); struct i915_hw_ppgtt *ppgtt = NULL; - if (ctx->legacy_hw_ctx.rcs_state) { + if (i915.enable_execlists) { + ppgtt = ctx_to_ppgtt(ctx); + intel_lr_context_free(ctx); + } else if (ctx->legacy_hw_ctx.rcs_state) { /* We refcount even the aliasing PPGTT to keep the code symmetric */ if (USES_PPGTT(ctx->legacy_hw_ctx.rcs_state->base.dev)) ppgtt = ctx_to_ppgtt(ctx); @@ -417,7 +420,11 @@ int i915_gem_context_init(struct drm_device *dev) if (WARN_ON(dev_priv->ring[RCS].default_context)) return 0; - if (HAS_HW_CONTEXTS(dev)) { + if (i915.enable_execlists) { + /* NB: intentionally left blank. We will allocate our own + * backing objects as we need them, thank you very much */ + dev_priv->hw_context_size = 0; + } else if (HAS_HW_CONTEXTS(dev)) { dev_priv->hw_context_size = round_up(get_context_size(dev), 4096); if (dev_priv->hw_context_size > (1<<20)) { DRM_DEBUG_DRIVER("Disabling HW Contexts; invalid size %d\n", @@ -433,11 +440,20 @@ int i915_gem_context_init(struct drm_device *dev) return PTR_ERR(ctx); } - /* NB: RCS will hold a ref for all rings */ - for (i = 0; i < I915_NUM_RINGS; i++) - dev_priv->ring[i].default_context = ctx; + for (i = 0; i < I915_NUM_RINGS; i++) { + struct intel_engine_cs *ring = &dev_priv->ring[i]; + + /* NB: RCS will hold a ref for all rings */ + ring->default_context = ctx; + + /* FIXME: we really only want to do this for initialized rings */ + if (i915.enable_execlists) + intel_lr_context_deferred_create(ctx, ring); + } - DRM_DEBUG_DRIVER("%s context support initialized\n", dev_priv->hw_context_size ? "HW" : "fake"); + DRM_DEBUG_DRIVER("%s context support initialized\n", + i915.enable_execlists ? "LR" : + dev_priv->hw_context_size ? "HW" : "fake"); return 0; } @@ -779,6 +795,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, struct intel_context *ctx; int ret; + /* FIXME: allow user-created LR contexts as well */ if (!hw_context_enabled(dev)) return -ENODEV; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 44721292eb77..2d82d52d18bb 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -53,3 +53,18 @@ int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists return 0; } + +void intel_lr_context_free(struct intel_context *ctx) +{ + /* TODO */ +} + +int intel_lr_context_deferred_create(struct intel_context *ctx, + struct intel_engine_cs *ring) +{ + WARN_ON(ctx->legacy_hw_ctx.rcs_state != NULL); + + /* TODO */ + + return 0; +} diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 75ee9c3cb7dc..3b93572431e3 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -24,6 +24,11 @@ #ifndef _INTEL_LRC_H_ #define _INTEL_LRC_H_ +/* Logical Ring Contexts */ +void intel_lr_context_free(struct intel_context *ctx); +int intel_lr_context_deferred_create(struct intel_context *ctx, + struct intel_engine_cs *ring); + /* Execlists */ int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists); -- cgit v1.2.3 From c9e003af2d44d9f6eafe855448c41c9ac08ae895 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:13 +0100 Subject: drm/i915/bdw: Introduce one context backing object per engine A context backing object only makes sense for a given engine (because it holds state data specific to that engine). In legacy ringbuffer sumission mode, the only MI_SET_CONTEXT we really perform is for the render engine, so one backing object is all we nee. With Execlists, however, we need backing objects for every engine, as contexts become the only way to submit workloads to the GPU. To tackle this problem, we multiplex the context struct to contain objects. Originally, I colored this code by instantiating one new context for every engine I wanted to use, but this change suggested by Brad Volkin makes it more elegant. v2: Leave the old backing object pointer behind. Daniel Vetter suggested using a union, but it makes more sense to keep rcs_state as a NULL pointer behind, to make sure no one uses it incorrectly when Execlists are enabled, similar to what he suggested for ring->buffer (Rusty's API level 5). v3: Use the name "state" instead of the too-generic "obj", so that it mirrors the name choice for the legacy rcs_state. Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index fd2aa15ce02b..ad70e8ec18bc 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -626,11 +626,17 @@ struct intel_context { struct i915_ctx_hang_stats hang_stats; struct i915_address_space *vm; + /* Legacy ring buffer submission */ struct { struct drm_i915_gem_object *rcs_state; bool initialized; } legacy_hw_ctx; + /* Execlists */ + struct { + struct drm_i915_gem_object *state; + } engine[I915_NUM_RINGS]; + struct list_head link; }; -- cgit v1.2.3 From 8c8579176a144b1dca1d99ebb92510924168d508 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:14 +0100 Subject: drm/i915/bdw: A bit more advanced LR context alloc/free Now that we have the ability to allocate our own context backing objects and we have multiplexed one of them per engine inside the context structs, we can finally allocate and free them correctly. Regarding the context size, reading the register to calculate the sizes can work, I think, however the docs are very clear about the actual context sizes on GEN8, so just hardcode that and use it. v2: Rebased on top of the Full PPGTT series. It is important to notice that at this point we have one global default context per engine, all of them using the aliasing PPGTT (as opposed to the single global default context we have with legacy HW contexts). v3: - Go back to one single global default context, this time with multiple backing objects inside. - Use different context sizes for non-render engines, as suggested by Damien (still hardcoded, since the information about the context size registers in the BSpec is, well, *lacking*). - Render ctx size is 20 (or 19) pages, but not 21 (caught by Damien). - Move default context backing object creation to intel_init_ring (so that we don't waste memory in rings that might not get initialized). v4: - Reuse the HW legacy context init/fini. - Create a separate free function. - Rename the functions with an intel_ preffix. v5: Several rebases to account for the changes in the previous patches. Signed-off-by: Ben Widawsky (v1) Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_gem_context.c | 2 +- drivers/gpu/drm/i915/intel_lrc.c | 59 +++++++++++++++++++++++++++++++-- 3 files changed, 60 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ad70e8ec18bc..cbae19bab4bf 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2528,6 +2528,8 @@ int i915_switch_context(struct intel_engine_cs *ring, struct intel_context * i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id); void i915_gem_context_free(struct kref *ctx_ref); +struct drm_i915_gem_object * +i915_gem_alloc_context_obj(struct drm_device *dev, size_t size); static inline void i915_gem_context_reference(struct intel_context *ctx) { kref_get(&ctx->ref); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 3552a351ccf7..9f8fbbacf6c0 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -199,7 +199,7 @@ void i915_gem_context_free(struct kref *ctx_ref) kfree(ctx); } -static struct drm_i915_gem_object * +struct drm_i915_gem_object * i915_gem_alloc_context_obj(struct drm_device *dev, size_t size) { struct drm_i915_gem_object *obj; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 2d82d52d18bb..9f30ee80e487 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -41,6 +41,11 @@ #include #include "i915_drv.h" +#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE) +#define GEN8_LR_CONTEXT_OTHER_SIZE (2 * PAGE_SIZE) + +#define GEN8_LR_CONTEXT_ALIGN 4096 + int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists) { WARN_ON(i915.enable_ppgtt == -1); @@ -56,15 +61,65 @@ int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists void intel_lr_context_free(struct intel_context *ctx) { - /* TODO */ + int i; + + for (i = 0; i < I915_NUM_RINGS; i++) { + struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state; + if (ctx_obj) { + i915_gem_object_ggtt_unpin(ctx_obj); + drm_gem_object_unreference(&ctx_obj->base); + } + } +} + +static uint32_t get_lr_context_size(struct intel_engine_cs *ring) +{ + int ret = 0; + + WARN_ON(INTEL_INFO(ring->dev)->gen != 8); + + switch (ring->id) { + case RCS: + ret = GEN8_LR_CONTEXT_RENDER_SIZE; + break; + case VCS: + case BCS: + case VECS: + case VCS2: + ret = GEN8_LR_CONTEXT_OTHER_SIZE; + break; + } + + return ret; } int intel_lr_context_deferred_create(struct intel_context *ctx, struct intel_engine_cs *ring) { + struct drm_device *dev = ring->dev; + struct drm_i915_gem_object *ctx_obj; + uint32_t context_size; + int ret; + WARN_ON(ctx->legacy_hw_ctx.rcs_state != NULL); - /* TODO */ + context_size = round_up(get_lr_context_size(ring), 4096); + + ctx_obj = i915_gem_alloc_context_obj(dev, context_size); + if (IS_ERR(ctx_obj)) { + ret = PTR_ERR(ctx_obj); + DRM_DEBUG_DRIVER("Alloc LRC backing obj failed: %d\n", ret); + return ret; + } + + ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN, 0); + if (ret) { + DRM_DEBUG_DRIVER("Pin LRC backing obj failed: %d\n", ret); + drm_gem_object_unreference(&ctx_obj->base); + return ret; + } + + ctx->engine[ring->id].state = ctx_obj; return 0; } -- cgit v1.2.3 From 84c2377fcee7a43cd964b62143e9a3714130bb0c Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:15 +0100 Subject: drm/i915/bdw: Allocate ringbuffers for Logical Ring Contexts As we have said a couple of times by now, logical ring contexts have their own ringbuffers: not only the backing pages, but the whole management struct. In a previous version of the series, this was achieved with two separate patches: drm/i915/bdw: Allocate ringbuffer backing objects for default global LRC drm/i915/bdw: Allocate ringbuffer for user-created LRCs Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/intel_lrc.c | 38 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_ringbuffer.c | 6 +++--- drivers/gpu/drm/i915/intel_ringbuffer.h | 4 ++++ 4 files changed, 46 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index cbae19bab4bf..eccb8e406e9c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -635,6 +635,7 @@ struct intel_context { /* Execlists */ struct { struct drm_i915_gem_object *state; + struct intel_ringbuffer *ringbuf; } engine[I915_NUM_RINGS]; struct list_head link; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 9f30ee80e487..0c80bb1f5420 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -65,7 +65,11 @@ void intel_lr_context_free(struct intel_context *ctx) for (i = 0; i < I915_NUM_RINGS; i++) { struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state; + struct intel_ringbuffer *ringbuf = ctx->engine[i].ringbuf; + if (ctx_obj) { + intel_destroy_ringbuffer_obj(ringbuf); + kfree(ringbuf); i915_gem_object_ggtt_unpin(ctx_obj); drm_gem_object_unreference(&ctx_obj->base); } @@ -99,6 +103,7 @@ int intel_lr_context_deferred_create(struct intel_context *ctx, struct drm_device *dev = ring->dev; struct drm_i915_gem_object *ctx_obj; uint32_t context_size; + struct intel_ringbuffer *ringbuf; int ret; WARN_ON(ctx->legacy_hw_ctx.rcs_state != NULL); @@ -119,6 +124,39 @@ int intel_lr_context_deferred_create(struct intel_context *ctx, return ret; } + ringbuf = kzalloc(sizeof(*ringbuf), GFP_KERNEL); + if (!ringbuf) { + DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n", + ring->name); + i915_gem_object_ggtt_unpin(ctx_obj); + drm_gem_object_unreference(&ctx_obj->base); + ret = -ENOMEM; + return ret; + } + + ringbuf->size = 32 * PAGE_SIZE; + ringbuf->effective_size = ringbuf->size; + ringbuf->head = 0; + ringbuf->tail = 0; + ringbuf->space = ringbuf->size; + ringbuf->last_retired_head = -1; + + /* TODO: For now we put this in the mappable region so that we can reuse + * the existing ringbuffer code which ioremaps it. When we start + * creating many contexts, this will no longer work and we must switch + * to a kmapish interface. + */ + ret = intel_alloc_ringbuffer_obj(dev, ringbuf); + if (ret) { + DRM_DEBUG_DRIVER("Failed to allocate ringbuffer obj %s: %d\n", + ring->name, ret); + kfree(ringbuf); + i915_gem_object_ggtt_unpin(ctx_obj); + drm_gem_object_unreference(&ctx_obj->base); + return ret; + } + + ctx->engine[ring->id].ringbuf = ringbuf; ctx->engine[ring->id].state = ctx_obj; return 0; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index a059b64a0fb2..064652034d7e 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1519,7 +1519,7 @@ static int init_phys_status_page(struct intel_engine_cs *ring) return 0; } -static void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf) +void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf) { if (!ringbuf->obj) return; @@ -1530,8 +1530,8 @@ static void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf) ringbuf->obj = NULL; } -static int intel_alloc_ringbuffer_obj(struct drm_device *dev, - struct intel_ringbuffer *ringbuf) +int intel_alloc_ringbuffer_obj(struct drm_device *dev, + struct intel_ringbuffer *ringbuf) { struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_gem_object *obj; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 70525d0c2c74..669cc7527f9a 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -355,6 +355,10 @@ intel_write_status_page(struct intel_engine_cs *ring, #define I915_GEM_HWS_SCRATCH_INDEX 0x30 #define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT) +void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf); +int intel_alloc_ringbuffer_obj(struct drm_device *dev, + struct intel_ringbuffer *ringbuf); + void intel_stop_ring_buffer(struct intel_engine_cs *ring); void intel_cleanup_ring_buffer(struct intel_engine_cs *ring); -- cgit v1.2.3 From 0c7dd53b84def4fbbba907bef3d32a5171b617a5 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 11 Aug 2014 16:17:44 +0200 Subject: drm/i915/bdw: Add a context and an engine pointers to the ringbuffer Any given ringbuffer is unequivocally tied to one context and one engine. By setting the appropriate pointers to them, the ringbuffer struct holds all the infromation you might need to submit a workload for processing, Execlists style. v2: Drop ring->ctx since that looks terribly ill-defined for legacy ringbuffer submission. Signed-off-by: Oscar Mateo (v1) Acked-by: Damien Lespiau (v2) Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_lrc.c | 1 + drivers/gpu/drm/i915/intel_ringbuffer.c | 1 + drivers/gpu/drm/i915/intel_ringbuffer.h | 2 ++ 3 files changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 0c80bb1f5420..8f2d14da6228 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -134,6 +134,7 @@ int intel_lr_context_deferred_create(struct intel_context *ctx, return ret; } + ringbuf->ring = ring; ringbuf->size = 32 * PAGE_SIZE; ringbuf->effective_size = ringbuf->size; ringbuf->head = 0; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 064652034d7e..c35f956ed6a0 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1594,6 +1594,7 @@ static int intel_init_ring_buffer(struct drm_device *dev, INIT_LIST_HEAD(&ring->active_list); INIT_LIST_HEAD(&ring->request_list); ringbuf->size = 32 * PAGE_SIZE; + ringbuf->ring = ring; memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno)); init_waitqueue_head(&ring->irq_queue); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 669cc7527f9a..fe9d9d9d3598 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -90,6 +90,8 @@ struct intel_ringbuffer { struct drm_i915_gem_object *obj; void __iomem *virtual_start; + struct intel_engine_cs *ring; + u32 head; u32 tail; int space; -- cgit v1.2.3 From 8670d6f97d8c19595950af1838f8458d7529825f Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:17 +0100 Subject: drm/i915/bdw: Populate LR contexts (somewhat) For the most part, logical ring context objects are similar to hardware contexts in that the backing object is meant to be opaque. There are some exceptions where we need to poke certain offsets of the object for initialization, updating the tail pointer or updating the PDPs. For our basic execlist implementation we'll only need our PPGTT PDs, and ringbuffer addresses in order to set up the context. With previous patches, we have both, so start prepping the context to be load. Before running a context for the first time you must populate some fields in the context object. These fields begin 1 PAGE + LRCA, ie. the first page (in 0 based counting) of the context image. These same fields will be read and written to as contexts are saved and restored once the system is up and running. Many of these fields are completely reused from previous global registers: ringbuffer head/tail/control, context control matches some previous MI_SET_CONTEXT flags, and page directories. There are other fields which we don't touch which we may want in the future. v2: CTX_LRI_HEADER_0 is MI_LOAD_REGISTER_IMM(14) for render and (11) for other engines. v3: Several rebases and general changes to the code. v4: Squash with "Extract LR context object populating" Also, Damien's review comments: - Set the Force Posted bit on the LRI header, as the BSpec suggest we do. - Prevent warning when compiling a 32-bits kernel without HIGHMEM64. - Add a clarifying comment to the context population code. v5: Damien's review comments: - The third MI_LOAD_REGISTER_IMM in the context does not set Force Posted. - Remove dead code. v6: Add a note about the (presumed) differences between BDW and CHV state contexts. Also, Brad's review comments: - Use the _MASKED_BIT_ENABLE, upper_32_bits and lower_32_bits macros. - Be less magical about how we set the ring size in the context. Signed-off-by: Ben Widawsky (v1) Signed-off-by: Rafael Barbalho (v2) Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_lrc.c | 159 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 156 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 7a6cc69cdc2b..c1d24242a02d 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -282,6 +282,7 @@ * address/value pairs. Don't overdue it, though, x <= 2^4 must hold! */ #define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1) +#define MI_LRI_FORCE_POSTED (1<<12) #define MI_STORE_REGISTER_MEM(x) MI_INSTR(0x24, 2*(x)-1) #define MI_STORE_REGISTER_MEM_GEN8(x) MI_INSTR(0x24, 3*(x)-1) #define MI_SRM_LRM_GLOBAL_GTT (1<<22) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 8f2d14da6228..a7a08a85edb3 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -46,6 +46,38 @@ #define GEN8_LR_CONTEXT_ALIGN 4096 +#define RING_ELSP(ring) ((ring)->mmio_base+0x230) +#define RING_CONTEXT_CONTROL(ring) ((ring)->mmio_base+0x244) + +#define CTX_LRI_HEADER_0 0x01 +#define CTX_CONTEXT_CONTROL 0x02 +#define CTX_RING_HEAD 0x04 +#define CTX_RING_TAIL 0x06 +#define CTX_RING_BUFFER_START 0x08 +#define CTX_RING_BUFFER_CONTROL 0x0a +#define CTX_BB_HEAD_U 0x0c +#define CTX_BB_HEAD_L 0x0e +#define CTX_BB_STATE 0x10 +#define CTX_SECOND_BB_HEAD_U 0x12 +#define CTX_SECOND_BB_HEAD_L 0x14 +#define CTX_SECOND_BB_STATE 0x16 +#define CTX_BB_PER_CTX_PTR 0x18 +#define CTX_RCS_INDIRECT_CTX 0x1a +#define CTX_RCS_INDIRECT_CTX_OFFSET 0x1c +#define CTX_LRI_HEADER_1 0x21 +#define CTX_CTX_TIMESTAMP 0x22 +#define CTX_PDP3_UDW 0x24 +#define CTX_PDP3_LDW 0x26 +#define CTX_PDP2_UDW 0x28 +#define CTX_PDP2_LDW 0x2a +#define CTX_PDP1_UDW 0x2c +#define CTX_PDP1_LDW 0x2e +#define CTX_PDP0_UDW 0x30 +#define CTX_PDP0_LDW 0x32 +#define CTX_LRI_HEADER_2 0x41 +#define CTX_R_PWR_CLK_STATE 0x42 +#define CTX_GPGPU_CSR_BASE_ADDRESS 0x44 + int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists) { WARN_ON(i915.enable_ppgtt == -1); @@ -59,6 +91,115 @@ int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists return 0; } +static int +populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_obj, + struct intel_engine_cs *ring, struct intel_ringbuffer *ringbuf) +{ + struct drm_i915_gem_object *ring_obj = ringbuf->obj; + struct i915_hw_ppgtt *ppgtt = ctx_to_ppgtt(ctx); + struct page *page; + uint32_t *reg_state; + int ret; + + ret = i915_gem_object_set_to_cpu_domain(ctx_obj, true); + if (ret) { + DRM_DEBUG_DRIVER("Could not set to CPU domain\n"); + return ret; + } + + ret = i915_gem_object_get_pages(ctx_obj); + if (ret) { + DRM_DEBUG_DRIVER("Could not get object pages\n"); + return ret; + } + + i915_gem_object_pin_pages(ctx_obj); + + /* The second page of the context object contains some fields which must + * be set up prior to the first execution. */ + page = i915_gem_object_get_page(ctx_obj, 1); + reg_state = kmap_atomic(page); + + /* A context is actually a big batch buffer with several MI_LOAD_REGISTER_IMM + * commands followed by (reg, value) pairs. The values we are setting here are + * only for the first context restore: on a subsequent save, the GPU will + * recreate this batchbuffer with new values (including all the missing + * MI_LOAD_REGISTER_IMM commands that we are not initializing here). */ + if (ring->id == RCS) + reg_state[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(14); + else + reg_state[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(11); + reg_state[CTX_LRI_HEADER_0] |= MI_LRI_FORCE_POSTED; + reg_state[CTX_CONTEXT_CONTROL] = RING_CONTEXT_CONTROL(ring); + reg_state[CTX_CONTEXT_CONTROL+1] = + _MASKED_BIT_ENABLE((1<<3) | MI_RESTORE_INHIBIT); + reg_state[CTX_RING_HEAD] = RING_HEAD(ring->mmio_base); + reg_state[CTX_RING_HEAD+1] = 0; + reg_state[CTX_RING_TAIL] = RING_TAIL(ring->mmio_base); + reg_state[CTX_RING_TAIL+1] = 0; + reg_state[CTX_RING_BUFFER_START] = RING_START(ring->mmio_base); + reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(ring_obj); + reg_state[CTX_RING_BUFFER_CONTROL] = RING_CTL(ring->mmio_base); + reg_state[CTX_RING_BUFFER_CONTROL+1] = + ((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES) | RING_VALID; + reg_state[CTX_BB_HEAD_U] = ring->mmio_base + 0x168; + reg_state[CTX_BB_HEAD_U+1] = 0; + reg_state[CTX_BB_HEAD_L] = ring->mmio_base + 0x140; + reg_state[CTX_BB_HEAD_L+1] = 0; + reg_state[CTX_BB_STATE] = ring->mmio_base + 0x110; + reg_state[CTX_BB_STATE+1] = (1<<5); + reg_state[CTX_SECOND_BB_HEAD_U] = ring->mmio_base + 0x11c; + reg_state[CTX_SECOND_BB_HEAD_U+1] = 0; + reg_state[CTX_SECOND_BB_HEAD_L] = ring->mmio_base + 0x114; + reg_state[CTX_SECOND_BB_HEAD_L+1] = 0; + reg_state[CTX_SECOND_BB_STATE] = ring->mmio_base + 0x118; + reg_state[CTX_SECOND_BB_STATE+1] = 0; + if (ring->id == RCS) { + /* TODO: according to BSpec, the register state context + * for CHV does not have these. OTOH, these registers do + * exist in CHV. I'm waiting for a clarification */ + reg_state[CTX_BB_PER_CTX_PTR] = ring->mmio_base + 0x1c0; + reg_state[CTX_BB_PER_CTX_PTR+1] = 0; + reg_state[CTX_RCS_INDIRECT_CTX] = ring->mmio_base + 0x1c4; + reg_state[CTX_RCS_INDIRECT_CTX+1] = 0; + reg_state[CTX_RCS_INDIRECT_CTX_OFFSET] = ring->mmio_base + 0x1c8; + reg_state[CTX_RCS_INDIRECT_CTX_OFFSET+1] = 0; + } + reg_state[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9); + reg_state[CTX_LRI_HEADER_1] |= MI_LRI_FORCE_POSTED; + reg_state[CTX_CTX_TIMESTAMP] = ring->mmio_base + 0x3a8; + reg_state[CTX_CTX_TIMESTAMP+1] = 0; + reg_state[CTX_PDP3_UDW] = GEN8_RING_PDP_UDW(ring, 3); + reg_state[CTX_PDP3_LDW] = GEN8_RING_PDP_LDW(ring, 3); + reg_state[CTX_PDP2_UDW] = GEN8_RING_PDP_UDW(ring, 2); + reg_state[CTX_PDP2_LDW] = GEN8_RING_PDP_LDW(ring, 2); + reg_state[CTX_PDP1_UDW] = GEN8_RING_PDP_UDW(ring, 1); + reg_state[CTX_PDP1_LDW] = GEN8_RING_PDP_LDW(ring, 1); + reg_state[CTX_PDP0_UDW] = GEN8_RING_PDP_UDW(ring, 0); + reg_state[CTX_PDP0_LDW] = GEN8_RING_PDP_LDW(ring, 0); + reg_state[CTX_PDP3_UDW+1] = upper_32_bits(ppgtt->pd_dma_addr[3]); + reg_state[CTX_PDP3_LDW+1] = lower_32_bits(ppgtt->pd_dma_addr[3]); + reg_state[CTX_PDP2_UDW+1] = upper_32_bits(ppgtt->pd_dma_addr[2]); + reg_state[CTX_PDP2_LDW+1] = lower_32_bits(ppgtt->pd_dma_addr[2]); + reg_state[CTX_PDP1_UDW+1] = upper_32_bits(ppgtt->pd_dma_addr[1]); + reg_state[CTX_PDP1_LDW+1] = lower_32_bits(ppgtt->pd_dma_addr[1]); + reg_state[CTX_PDP0_UDW+1] = upper_32_bits(ppgtt->pd_dma_addr[0]); + reg_state[CTX_PDP0_LDW+1] = lower_32_bits(ppgtt->pd_dma_addr[0]); + if (ring->id == RCS) { + reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1); + reg_state[CTX_R_PWR_CLK_STATE] = 0x20c8; + reg_state[CTX_R_PWR_CLK_STATE+1] = 0; + } + + kunmap_atomic(reg_state); + + ctx_obj->dirty = 1; + set_page_dirty(page); + i915_gem_object_unpin_pages(ctx_obj); + + return 0; +} + void intel_lr_context_free(struct intel_context *ctx) { int i; @@ -151,14 +292,24 @@ int intel_lr_context_deferred_create(struct intel_context *ctx, if (ret) { DRM_DEBUG_DRIVER("Failed to allocate ringbuffer obj %s: %d\n", ring->name, ret); - kfree(ringbuf); - i915_gem_object_ggtt_unpin(ctx_obj); - drm_gem_object_unreference(&ctx_obj->base); - return ret; + goto error; + } + + ret = populate_lr_context(ctx, ctx_obj, ring, ringbuf); + if (ret) { + DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret); + intel_destroy_ringbuffer_obj(ringbuf); + goto error; } ctx->engine[ring->id].ringbuf = ringbuf; ctx->engine[ring->id].state = ctx_obj; return 0; + +error: + kfree(ringbuf); + i915_gem_object_ggtt_unpin(ctx_obj); + drm_gem_object_unreference(&ctx_obj->base); + return ret; } -- cgit v1.2.3 From ec3e9963a681789860e5c0120a745b717d942392 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:18 +0100 Subject: drm/i915/bdw: Deferred creation of user-created LRCs The backing objects and ringbuffers for contexts created via open fd are actually empty until the user starts sending execbuffers to them. At that point, we allocate & populate them. We do this because, at create time, we really don't know which engine is going to be used with the context later on (and we don't want to waste memory on objects that we might never use). v2: As contexts created via ioctl can only be used with the render ring, we have enough information to allocate & populate them right away. v3: Defer the creation always, even with ioctl-created contexts, as requested by Daniel Vetter. Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_context.c | 7 +++---- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 8 ++++++++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 9f8fbbacf6c0..bcb41002aa13 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -782,9 +782,9 @@ int i915_switch_context(struct intel_engine_cs *ring, return do_switch(ring, to); } -static bool hw_context_enabled(struct drm_device *dev) +static bool contexts_enabled(struct drm_device *dev) { - return to_i915(dev)->hw_context_size; + return i915.enable_execlists || to_i915(dev)->hw_context_size; } int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, @@ -795,8 +795,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, struct intel_context *ctx; int ret; - /* FIXME: allow user-created LR contexts as well */ - if (!hw_context_enabled(dev)) + if (!contexts_enabled(dev)) return -ENODEV; ret = i915_mutex_lock_interruptible(dev); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index dec2cc2fbd42..29cb2156d32b 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -931,6 +931,14 @@ i915_gem_validate_context(struct drm_device *dev, struct drm_file *file, return ERR_PTR(-EIO); } + if (i915.enable_execlists && !ctx->engine[ring->id].state) { + int ret = intel_lr_context_deferred_create(ctx, ring); + if (ret) { + DRM_DEBUG("Could not create LRC %u: %d\n", ctx_id, ret); + return ERR_PTR(ret); + } + } + return ctx; } -- cgit v1.2.3 From a83014d3f8b936778a9bc9b3d4137769bb26d9eb Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:21 +0100 Subject: drm/i915: Abstract the legacy workload submission mechanism away As suggested by Daniel Vetter. The idea, in subsequent patches, is to provide an alternative to these vfuncs for the Execlists submission mechanism. v2: Splitted into two and reordered to illustrate our intentions, instead of showing it off. Also, remove the add_request vfunc and added the stop_ring one. Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau [danvet: - Make checkpatch happy. - Be grumpy about the excessive vtable. - Ditch gt->is_ring_initialized.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 23 +++++++++++++++++++++++ drivers/gpu/drm/i915/i915_gem.c | 15 +++++++++++---- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 20 ++++++++++---------- 3 files changed, 44 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index eccb8e406e9c..9198f1c96470 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1645,6 +1645,20 @@ struct drm_i915_private { /* Old ums support infrastructure, same warning applies. */ struct i915_ums_state ums; + /* Abstract the submission mechanism (legacy ringbuffer or execlists) away */ + struct { + int (*do_execbuf)(struct drm_device *dev, struct drm_file *file, + struct intel_engine_cs *ring, + struct intel_context *ctx, + struct drm_i915_gem_execbuffer2 *args, + struct list_head *vmas, + struct drm_i915_gem_object *batch_obj, + u64 exec_start, u32 flags); + int (*init_rings)(struct drm_device *dev); + void (*cleanup_ring)(struct intel_engine_cs *ring); + void (*stop_ring)(struct intel_engine_cs *ring); + } gt; + /* * NOTE: This is the dri1/ums dungeon, don't add stuff here. Your patch * will be rejected. Instead look for a better place. @@ -2252,6 +2266,14 @@ int i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +int i915_gem_ringbuffer_submission(struct drm_device *dev, + struct drm_file *file, + struct intel_engine_cs *ring, + struct intel_context *ctx, + struct drm_i915_gem_execbuffer2 *args, + struct list_head *vmas, + struct drm_i915_gem_object *batch_obj, + u64 exec_start, u32 flags); int i915_gem_execbuffer(struct drm_device *dev, void *data, struct drm_file *file_priv); int i915_gem_execbuffer2(struct drm_device *dev, void *data, @@ -2404,6 +2426,7 @@ void i915_gem_reset(struct drm_device *dev); bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force); int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj); int __must_check i915_gem_init(struct drm_device *dev); +int i915_gem_init_rings(struct drm_device *dev); int __must_check i915_gem_init_hw(struct drm_device *dev); int i915_gem_l3_remap(struct intel_engine_cs *ring, int slice); void i915_gem_init_swizzling(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 5646e9ba6383..33a54cbf9a2e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4495,7 +4495,7 @@ i915_gem_stop_ringbuffers(struct drm_device *dev) int i; for_each_ring(ring, dev_priv, i) - intel_stop_ring_buffer(ring); + dev_priv->gt.stop_ring(ring); } int @@ -4612,7 +4612,7 @@ intel_enable_blt(struct drm_device *dev) return true; } -static int i915_gem_init_rings(struct drm_device *dev) +int i915_gem_init_rings(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; int ret; @@ -4695,7 +4695,7 @@ i915_gem_init_hw(struct drm_device *dev) i915_gem_init_swizzling(dev); - ret = i915_gem_init_rings(dev); + ret = dev_priv->gt.init_rings(dev); if (ret) return ret; @@ -4736,6 +4736,13 @@ int i915_gem_init(struct drm_device *dev) DRM_DEBUG_DRIVER("allow wake ack timed out\n"); } + if (!i915.enable_execlists) { + dev_priv->gt.do_execbuf = i915_gem_ringbuffer_submission; + dev_priv->gt.init_rings = i915_gem_init_rings; + dev_priv->gt.cleanup_ring = intel_cleanup_ring_buffer; + dev_priv->gt.stop_ring = intel_stop_ring_buffer; + } + i915_gem_init_userptr(dev); i915_gem_init_global_gtt(dev); @@ -4771,7 +4778,7 @@ i915_gem_cleanup_ringbuffer(struct drm_device *dev) int i; for_each_ring(ring, dev_priv, i) - intel_cleanup_ring_buffer(ring); + dev_priv->gt.cleanup_ring(ring); } int diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 29cb2156d32b..26b38b3ae4f3 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1023,14 +1023,14 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev, return 0; } -static int -legacy_ringbuffer_submission(struct drm_device *dev, struct drm_file *file, - struct intel_engine_cs *ring, - struct intel_context *ctx, - struct drm_i915_gem_execbuffer2 *args, - struct list_head *vmas, - struct drm_i915_gem_object *batch_obj, - u64 exec_start, u32 flags) +int +i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file, + struct intel_engine_cs *ring, + struct intel_context *ctx, + struct drm_i915_gem_execbuffer2 *args, + struct list_head *vmas, + struct drm_i915_gem_object *batch_obj, + u64 exec_start, u32 flags) { struct drm_clip_rect *cliprects = NULL; struct drm_i915_private *dev_priv = dev->dev_private; @@ -1402,8 +1402,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, } else exec_start += i915_gem_obj_offset(batch_obj, vm); - ret = legacy_ringbuffer_submission(dev, file, ring, ctx, - args, &eb->vmas, batch_obj, exec_start, flags); + ret = dev_priv->gt.do_execbuf(dev, file, ring, ctx, args, + &eb->vmas, batch_obj, exec_start, flags); /* * FIXME: We crucially rely upon the active tracking for the (ppgtt) -- cgit v1.2.3 From 454afebde873874b939465bfc1a294ac3697c96e Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:22 +0100 Subject: drm/i915/bdw: Skeleton for the new logical rings submission path Execlists are indeed a brave new world with respect to workload submission to the GPU. In previous version of these series, I have tried to impact the legacy ringbuffer submission path as little as possible (mostly, passing the context around and using the correct ringbuffer when I needed one) but Daniel is afraid (probably with a reason) that these changes and, especially, future ones, will end up breaking older gens. This commit and some others coming next will try to limit the damage by creating an alternative path for workload submission. The first step is here: laying out a new ring init/fini. Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 5 ++ drivers/gpu/drm/i915/intel_lrc.c | 151 +++++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_lrc.h | 12 ++++ 3 files changed, 168 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 33a54cbf9a2e..9acb2469116a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4741,6 +4741,11 @@ int i915_gem_init(struct drm_device *dev) dev_priv->gt.init_rings = i915_gem_init_rings; dev_priv->gt.cleanup_ring = intel_cleanup_ring_buffer; dev_priv->gt.stop_ring = intel_stop_ring_buffer; + } else { + dev_priv->gt.do_execbuf = intel_execlists_submission; + dev_priv->gt.init_rings = intel_logical_rings_init; + dev_priv->gt.cleanup_ring = intel_logical_ring_cleanup; + dev_priv->gt.stop_ring = intel_logical_ring_stop; } i915_gem_init_userptr(dev); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index a7a08a85edb3..9c2ff8f11c90 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -91,6 +91,157 @@ int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists return 0; } +int intel_execlists_submission(struct drm_device *dev, struct drm_file *file, + struct intel_engine_cs *ring, + struct intel_context *ctx, + struct drm_i915_gem_execbuffer2 *args, + struct list_head *vmas, + struct drm_i915_gem_object *batch_obj, + u64 exec_start, u32 flags) +{ + /* TODO */ + return 0; +} + +void intel_logical_ring_stop(struct intel_engine_cs *ring) +{ + /* TODO */ +} + +void intel_logical_ring_cleanup(struct intel_engine_cs *ring) +{ + /* TODO */ +} + +static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *ring) +{ + /* TODO */ + return 0; +} + +static int logical_render_ring_init(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_engine_cs *ring = &dev_priv->ring[RCS]; + + ring->name = "render ring"; + ring->id = RCS; + ring->mmio_base = RENDER_RING_BASE; + ring->irq_enable_mask = + GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT; + + return logical_ring_init(dev, ring); +} + +static int logical_bsd_ring_init(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_engine_cs *ring = &dev_priv->ring[VCS]; + + ring->name = "bsd ring"; + ring->id = VCS; + ring->mmio_base = GEN6_BSD_RING_BASE; + ring->irq_enable_mask = + GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT; + + return logical_ring_init(dev, ring); +} + +static int logical_bsd2_ring_init(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_engine_cs *ring = &dev_priv->ring[VCS2]; + + ring->name = "bds2 ring"; + ring->id = VCS2; + ring->mmio_base = GEN8_BSD2_RING_BASE; + ring->irq_enable_mask = + GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT; + + return logical_ring_init(dev, ring); +} + +static int logical_blt_ring_init(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_engine_cs *ring = &dev_priv->ring[BCS]; + + ring->name = "blitter ring"; + ring->id = BCS; + ring->mmio_base = BLT_RING_BASE; + ring->irq_enable_mask = + GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT; + + return logical_ring_init(dev, ring); +} + +static int logical_vebox_ring_init(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_engine_cs *ring = &dev_priv->ring[VECS]; + + ring->name = "video enhancement ring"; + ring->id = VECS; + ring->mmio_base = VEBOX_RING_BASE; + ring->irq_enable_mask = + GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT; + + return logical_ring_init(dev, ring); +} + +int intel_logical_rings_init(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + int ret; + + ret = logical_render_ring_init(dev); + if (ret) + return ret; + + if (HAS_BSD(dev)) { + ret = logical_bsd_ring_init(dev); + if (ret) + goto cleanup_render_ring; + } + + if (HAS_BLT(dev)) { + ret = logical_blt_ring_init(dev); + if (ret) + goto cleanup_bsd_ring; + } + + if (HAS_VEBOX(dev)) { + ret = logical_vebox_ring_init(dev); + if (ret) + goto cleanup_blt_ring; + } + + if (HAS_BSD2(dev)) { + ret = logical_bsd2_ring_init(dev); + if (ret) + goto cleanup_vebox_ring; + } + + ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000)); + if (ret) + goto cleanup_bsd2_ring; + + return 0; + +cleanup_bsd2_ring: + intel_logical_ring_cleanup(&dev_priv->ring[VCS2]); +cleanup_vebox_ring: + intel_logical_ring_cleanup(&dev_priv->ring[VECS]); +cleanup_blt_ring: + intel_logical_ring_cleanup(&dev_priv->ring[BCS]); +cleanup_bsd_ring: + intel_logical_ring_cleanup(&dev_priv->ring[VCS]); +cleanup_render_ring: + intel_logical_ring_cleanup(&dev_priv->ring[RCS]); + + return ret; +} + static int populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_obj, struct intel_engine_cs *ring, struct intel_ringbuffer *ringbuf) diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 3b93572431e3..bf0eff4e9f08 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -24,6 +24,11 @@ #ifndef _INTEL_LRC_H_ #define _INTEL_LRC_H_ +/* Logical Rings */ +void intel_logical_ring_stop(struct intel_engine_cs *ring); +void intel_logical_ring_cleanup(struct intel_engine_cs *ring); +int intel_logical_rings_init(struct drm_device *dev); + /* Logical Ring Contexts */ void intel_lr_context_free(struct intel_context *ctx); int intel_lr_context_deferred_create(struct intel_context *ctx, @@ -31,5 +36,12 @@ int intel_lr_context_deferred_create(struct intel_context *ctx, /* Execlists */ int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists); +int intel_execlists_submission(struct drm_device *dev, struct drm_file *file, + struct intel_engine_cs *ring, + struct intel_context *ctx, + struct drm_i915_gem_execbuffer2 *args, + struct list_head *vmas, + struct drm_i915_gem_object *batch_obj, + u64 exec_start, u32 flags); #endif /* _INTEL_LRC_H_ */ -- cgit v1.2.3 From 48d823878d64f93163f5a949623346748bbce1b4 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:23 +0100 Subject: drm/i915/bdw: Generic logical ring init and cleanup Allocate and populate the default LRC for every ring, call gen-specific init/cleanup, init/fini the command parser and set the status page (now inside the LRC object). These are things all engines/rings have in common. Stopping the ring before cleanup and initializing the seqnos is left as a TODO task (we need more infrastructure in place before we can achieve this). v2: Check the ringbuffer backing obj for ring_is_initialized, instead of the context backing obj (similar, but not exactly the same). Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_context.c | 4 --- drivers/gpu/drm/i915/intel_lrc.c | 54 +++++++++++++++++++++++++++++++-- drivers/gpu/drm/i915/intel_ringbuffer.c | 17 +++++++++++ drivers/gpu/drm/i915/intel_ringbuffer.h | 6 +--- 4 files changed, 70 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index bcb41002aa13..7a08f3e9e1ae 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -445,10 +445,6 @@ int i915_gem_context_init(struct drm_device *dev) /* NB: RCS will hold a ref for all rings */ ring->default_context = ctx; - - /* FIXME: we really only want to do this for initialized rings */ - if (i915.enable_execlists) - intel_lr_context_deferred_create(ctx, ring); } DRM_DEBUG_DRIVER("%s context support initialized\n", diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 9c2ff8f11c90..ed7a4ff3bbd2 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -110,12 +110,60 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring) void intel_logical_ring_cleanup(struct intel_engine_cs *ring) { - /* TODO */ + if (!intel_ring_initialized(ring)) + return; + + /* TODO: make sure the ring is stopped */ + ring->preallocated_lazy_request = NULL; + ring->outstanding_lazy_seqno = 0; + + if (ring->cleanup) + ring->cleanup(ring); + + i915_cmd_parser_fini_ring(ring); + + if (ring->status_page.obj) { + kunmap(sg_page(ring->status_page.obj->pages->sgl)); + ring->status_page.obj = NULL; + } } static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *ring) { - /* TODO */ + int ret; + struct intel_context *dctx = ring->default_context; + struct drm_i915_gem_object *dctx_obj; + + /* Intentionally left blank. */ + ring->buffer = NULL; + + ring->dev = dev; + INIT_LIST_HEAD(&ring->active_list); + INIT_LIST_HEAD(&ring->request_list); + init_waitqueue_head(&ring->irq_queue); + + ret = intel_lr_context_deferred_create(dctx, ring); + if (ret) + return ret; + + /* The status page is offset 0 from the context object in LRCs. */ + dctx_obj = dctx->engine[ring->id].state; + ring->status_page.gfx_addr = i915_gem_obj_ggtt_offset(dctx_obj); + ring->status_page.page_addr = kmap(sg_page(dctx_obj->pages->sgl)); + if (ring->status_page.page_addr == NULL) + return -ENOMEM; + ring->status_page.obj = dctx_obj; + + ret = i915_cmd_parser_init_ring(ring); + if (ret) + return ret; + + if (ring->init) { + ret = ring->init(ring); + if (ret) + return ret; + } + return 0; } @@ -399,6 +447,8 @@ int intel_lr_context_deferred_create(struct intel_context *ctx, int ret; WARN_ON(ctx->legacy_hw_ctx.rcs_state != NULL); + if (ctx->engine[ring->id].state) + return 0; context_size = round_up(get_lr_context_size(ring), 4096); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index c35f956ed6a0..e4b97f5c5797 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -40,6 +40,23 @@ */ #define CACHELINE_BYTES 64 +bool +intel_ring_initialized(struct intel_engine_cs *ring) +{ + struct drm_device *dev = ring->dev; + + if (!dev) + return false; + + if (i915.enable_execlists) { + struct intel_context *dctx = ring->default_context; + struct intel_ringbuffer *ringbuf = dctx->engine[ring->id].ringbuf; + + return ringbuf->obj; + } else + return ring->buffer && ring->buffer->obj; +} + static inline int __ring_space(int head, int tail, int size) { int space = head - (tail + I915_RING_FREE_SPACE); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index fe9d9d9d3598..fbe54ef6a9a1 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -289,11 +289,7 @@ struct intel_engine_cs { u32 (*get_cmd_length_mask)(u32 cmd_header); }; -static inline bool -intel_ring_initialized(struct intel_engine_cs *ring) -{ - return ring->buffer && ring->buffer->obj; -} +bool intel_ring_initialized(struct intel_engine_cs *ring); static inline unsigned intel_ring_flag(struct intel_engine_cs *ring) -- cgit v1.2.3 From 9b1136d505b1de5478e11b59ca59cf8ce2a33217 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:24 +0100 Subject: drm/i915/bdw: GEN-specific logical ring init Logical rings do not need most of the initialization their legacy ringbuffer counterparts do: we just need the pipe control object for the render ring, enable Execlists on the hardware and a few workarounds. v2: Squash with: "drm/i915: Extract pipe control fini & make init outside accesible". Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau [danvet: Make checkpatch happy.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_lrc.c | 54 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_ringbuffer.c | 34 +++++++++++++-------- drivers/gpu/drm/i915/intel_ringbuffer.h | 3 ++ 3 files changed, 78 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index ed7a4ff3bbd2..1a1f5f98f05b 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -108,6 +108,49 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring) /* TODO */ } +static int gen8_init_common_ring(struct intel_engine_cs *ring) +{ + struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + + I915_WRITE(RING_MODE_GEN7(ring), + _MASKED_BIT_DISABLE(GFX_REPLAY_MODE) | + _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE)); + POSTING_READ(RING_MODE_GEN7(ring)); + DRM_DEBUG_DRIVER("Execlists enabled for %s\n", ring->name); + + memset(&ring->hangcheck, 0, sizeof(ring->hangcheck)); + + return 0; +} + +static int gen8_init_render_ring(struct intel_engine_cs *ring) +{ + struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + int ret; + + ret = gen8_init_common_ring(ring); + if (ret) + return ret; + + /* We need to disable the AsyncFlip performance optimisations in order + * to use MI_WAIT_FOR_EVENT within the CS. It should already be + * programmed to '1' on all products. + * + * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv,bdw,chv + */ + I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE)); + + ret = intel_init_pipe_control(ring); + if (ret) + return ret; + + I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); + + return ret; +} + void intel_logical_ring_cleanup(struct intel_engine_cs *ring) { if (!intel_ring_initialized(ring)) @@ -178,6 +221,9 @@ static int logical_render_ring_init(struct drm_device *dev) ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT; + ring->init = gen8_init_render_ring; + ring->cleanup = intel_fini_pipe_control; + return logical_ring_init(dev, ring); } @@ -192,6 +238,8 @@ static int logical_bsd_ring_init(struct drm_device *dev) ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT; + ring->init = gen8_init_common_ring; + return logical_ring_init(dev, ring); } @@ -206,6 +254,8 @@ static int logical_bsd2_ring_init(struct drm_device *dev) ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT; + ring->init = gen8_init_common_ring; + return logical_ring_init(dev, ring); } @@ -220,6 +270,8 @@ static int logical_blt_ring_init(struct drm_device *dev) ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT; + ring->init = gen8_init_common_ring; + return logical_ring_init(dev, ring); } @@ -234,6 +286,8 @@ static int logical_vebox_ring_init(struct drm_device *dev) ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT; + ring->init = gen8_init_common_ring; + return logical_ring_init(dev, ring); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index e4b97f5c5797..dab5e7c79036 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -597,8 +597,25 @@ out: return ret; } -static int -init_pipe_control(struct intel_engine_cs *ring) +void +intel_fini_pipe_control(struct intel_engine_cs *ring) +{ + struct drm_device *dev = ring->dev; + + if (ring->scratch.obj == NULL) + return; + + if (INTEL_INFO(dev)->gen >= 5) { + kunmap(sg_page(ring->scratch.obj->pages->sgl)); + i915_gem_object_ggtt_unpin(ring->scratch.obj); + } + + drm_gem_object_unreference(&ring->scratch.obj->base); + ring->scratch.obj = NULL; +} + +int +intel_init_pipe_control(struct intel_engine_cs *ring) { int ret; @@ -673,7 +690,7 @@ static int init_render_ring(struct intel_engine_cs *ring) _MASKED_BIT_ENABLE(GFX_REPLAY_MODE)); if (INTEL_INFO(dev)->gen >= 5) { - ret = init_pipe_control(ring); + ret = intel_init_pipe_control(ring); if (ret) return ret; } @@ -708,16 +725,7 @@ static void render_ring_cleanup(struct intel_engine_cs *ring) dev_priv->semaphore_obj = NULL; } - if (ring->scratch.obj == NULL) - return; - - if (INTEL_INFO(dev)->gen >= 5) { - kunmap(sg_page(ring->scratch.obj->pages->sgl)); - i915_gem_object_ggtt_unpin(ring->scratch.obj); - } - - drm_gem_object_unreference(&ring->scratch.obj->base); - ring->scratch.obj = NULL; + intel_fini_pipe_control(ring); } static int gen8_rcs_signal(struct intel_engine_cs *signaller, diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index fbe54ef6a9a1..677df0d7be48 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -381,6 +381,9 @@ void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno); int intel_ring_flush_all_caches(struct intel_engine_cs *ring); int intel_ring_invalidate_all_caches(struct intel_engine_cs *ring); +void intel_fini_pipe_control(struct intel_engine_cs *ring); +int intel_init_pipe_control(struct intel_engine_cs *ring); + int intel_init_render_ring_buffer(struct drm_device *dev); int intel_init_bsd_ring_buffer(struct drm_device *dev); int intel_init_bsd2_ring_buffer(struct drm_device *dev); -- cgit v1.2.3 From e94e37ad19c74b4c2569d556cda9da4a03d4e3f8 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:25 +0100 Subject: drm/i915/bdw: GEN-specific logical ring set/get seqno No mistery here: the seqno is still retrieved from the engine's HW status page (the one in the default context. For the moment, I see no reason to worry about other context's HWS page). Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_lrc.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 1a1f5f98f05b..c9518c6261de 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -151,6 +151,16 @@ static int gen8_init_render_ring(struct intel_engine_cs *ring) return ret; } +static u32 gen8_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency) +{ + return intel_read_status_page(ring, I915_GEM_HWS_INDEX); +} + +static void gen8_set_seqno(struct intel_engine_cs *ring, u32 seqno) +{ + intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno); +} + void intel_logical_ring_cleanup(struct intel_engine_cs *ring) { if (!intel_ring_initialized(ring)) @@ -223,6 +233,8 @@ static int logical_render_ring_init(struct drm_device *dev) ring->init = gen8_init_render_ring; ring->cleanup = intel_fini_pipe_control; + ring->get_seqno = gen8_get_seqno; + ring->set_seqno = gen8_set_seqno; return logical_ring_init(dev, ring); } @@ -239,6 +251,8 @@ static int logical_bsd_ring_init(struct drm_device *dev) GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT; ring->init = gen8_init_common_ring; + ring->get_seqno = gen8_get_seqno; + ring->set_seqno = gen8_set_seqno; return logical_ring_init(dev, ring); } @@ -255,6 +269,8 @@ static int logical_bsd2_ring_init(struct drm_device *dev) GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT; ring->init = gen8_init_common_ring; + ring->get_seqno = gen8_get_seqno; + ring->set_seqno = gen8_set_seqno; return logical_ring_init(dev, ring); } @@ -271,6 +287,8 @@ static int logical_blt_ring_init(struct drm_device *dev) GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT; ring->init = gen8_init_common_ring; + ring->get_seqno = gen8_get_seqno; + ring->set_seqno = gen8_set_seqno; return logical_ring_init(dev, ring); } @@ -287,6 +305,8 @@ static int logical_vebox_ring_init(struct drm_device *dev) GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT; ring->init = gen8_init_common_ring; + ring->get_seqno = gen8_get_seqno; + ring->set_seqno = gen8_set_seqno; return logical_ring_init(dev, ring); } -- cgit v1.2.3 From 26fbb77445bd402417f42936f68c0da26d33855d Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 11 Aug 2014 18:37:37 +0300 Subject: drm/i915: Make hpd debug messages less cryptic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Don't print raw numbers, use port_name() and tell the user whether it's long or short without having to figure out what the other magic number means. Signed-off-by: Ville Syrjälä Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_irq.c | 4 +++- drivers/gpu/drm/i915/intel_dp.c | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index f0d24db76e72..36eb1f234608 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1777,7 +1777,9 @@ static inline void intel_hpd_irq_handler(struct drm_device *dev, long_hpd = (dig_hotplug_reg >> dig_shift) & PORTB_HOTPLUG_LONG_DETECT; } - DRM_DEBUG_DRIVER("digital hpd port %d %d\n", port, long_hpd); + DRM_DEBUG_DRIVER("digital hpd port %c - %s\n", + port_name(port), + long_hpd ? "long" : "short"); /* for long HPD pulses we want to have the digital queue happen, but we still want HPD storm detection to function. */ if (long_hpd) { diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 3b88255d87dc..def55cdfef25 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -4041,7 +4041,8 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) if (intel_dig_port->base.type != INTEL_OUTPUT_EDP) intel_dig_port->base.type = INTEL_OUTPUT_DISPLAYPORT; - DRM_DEBUG_KMS("got hpd irq on port %d - %s\n", intel_dig_port->port, + DRM_DEBUG_KMS("got hpd irq on port %c - %s\n", + port_name(intel_dig_port->port), long_hpd ? "long" : "short"); if (long_hpd) { -- cgit v1.2.3 From 82e104cc266c6da30a30fc5028b2f0236c669cd7 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:26 +0100 Subject: drm/i915/bdw: New logical ring submission mechanism Well, new-ish: if all this code looks familiar, that's because it's a clone of the existing submission mechanism (with some modifications here and there to adapt it to LRCs and Execlists). And why did we do this instead of reusing code, one might wonder? Well, there are some fears that the differences are big enough that they will end up breaking all platforms. Also, Execlists offer several advantages, like control over when the GPU is done with a given workload, that can help simplify the submission mechanism, no doubt. I am interested in getting Execlists to work first and foremost, but in the future this parallel submission mechanism will help us to fine tune the mechanism without affecting old gens. v2: Pass the ringbuffer only (whenever possible). Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau [danvet: Appease checkpatch. Again. And drop the legacy sarea gunk that somehow crept in.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_lrc.c | 189 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_lrc.h | 13 +++ drivers/gpu/drm/i915/intel_ringbuffer.c | 22 ++-- drivers/gpu/drm/i915/intel_ringbuffer.h | 3 + 4 files changed, 217 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index c9518c6261de..31025847d680 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -108,6 +108,195 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring) /* TODO */ } +void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf) +{ + intel_logical_ring_advance(ringbuf); + + if (intel_ring_stopped(ringbuf->ring)) + return; + + /* TODO: how to submit a context to the ELSP is not here yet */ +} + +static int logical_ring_alloc_seqno(struct intel_engine_cs *ring) +{ + if (ring->outstanding_lazy_seqno) + return 0; + + if (ring->preallocated_lazy_request == NULL) { + struct drm_i915_gem_request *request; + + request = kmalloc(sizeof(*request), GFP_KERNEL); + if (request == NULL) + return -ENOMEM; + + ring->preallocated_lazy_request = request; + } + + return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_seqno); +} + +static int logical_ring_wait_request(struct intel_ringbuffer *ringbuf, + int bytes) +{ + struct intel_engine_cs *ring = ringbuf->ring; + struct drm_i915_gem_request *request; + u32 seqno = 0; + int ret; + + if (ringbuf->last_retired_head != -1) { + ringbuf->head = ringbuf->last_retired_head; + ringbuf->last_retired_head = -1; + + ringbuf->space = intel_ring_space(ringbuf); + if (ringbuf->space >= bytes) + return 0; + } + + list_for_each_entry(request, &ring->request_list, list) { + if (__intel_ring_space(request->tail, ringbuf->tail, + ringbuf->size) >= bytes) { + seqno = request->seqno; + break; + } + } + + if (seqno == 0) + return -ENOSPC; + + ret = i915_wait_seqno(ring, seqno); + if (ret) + return ret; + + /* TODO: make sure we update the right ringbuffer's last_retired_head + * when retiring requests */ + i915_gem_retire_requests_ring(ring); + ringbuf->head = ringbuf->last_retired_head; + ringbuf->last_retired_head = -1; + + ringbuf->space = intel_ring_space(ringbuf); + return 0; +} + +static int logical_ring_wait_for_space(struct intel_ringbuffer *ringbuf, + int bytes) +{ + struct intel_engine_cs *ring = ringbuf->ring; + struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + unsigned long end; + int ret; + + ret = logical_ring_wait_request(ringbuf, bytes); + if (ret != -ENOSPC) + return ret; + + /* Force the context submission in case we have been skipping it */ + intel_logical_ring_advance_and_submit(ringbuf); + + /* With GEM the hangcheck timer should kick us out of the loop, + * leaving it early runs the risk of corrupting GEM state (due + * to running on almost untested codepaths). But on resume + * timers don't work yet, so prevent a complete hang in that + * case by choosing an insanely large timeout. */ + end = jiffies + 60 * HZ; + + do { + ringbuf->head = I915_READ_HEAD(ring); + ringbuf->space = intel_ring_space(ringbuf); + if (ringbuf->space >= bytes) { + ret = 0; + break; + } + + msleep(1); + + if (dev_priv->mm.interruptible && signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } + + ret = i915_gem_check_wedge(&dev_priv->gpu_error, + dev_priv->mm.interruptible); + if (ret) + break; + + if (time_after(jiffies, end)) { + ret = -EBUSY; + break; + } + } while (1); + + return ret; +} + +static int logical_ring_wrap_buffer(struct intel_ringbuffer *ringbuf) +{ + uint32_t __iomem *virt; + int rem = ringbuf->size - ringbuf->tail; + + if (ringbuf->space < rem) { + int ret = logical_ring_wait_for_space(ringbuf, rem); + + if (ret) + return ret; + } + + virt = ringbuf->virtual_start + ringbuf->tail; + rem /= 4; + while (rem--) + iowrite32(MI_NOOP, virt++); + + ringbuf->tail = 0; + ringbuf->space = intel_ring_space(ringbuf); + + return 0; +} + +static int logical_ring_prepare(struct intel_ringbuffer *ringbuf, int bytes) +{ + int ret; + + if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) { + ret = logical_ring_wrap_buffer(ringbuf); + if (unlikely(ret)) + return ret; + } + + if (unlikely(ringbuf->space < bytes)) { + ret = logical_ring_wait_for_space(ringbuf, bytes); + if (unlikely(ret)) + return ret; + } + + return 0; +} + +int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords) +{ + struct intel_engine_cs *ring = ringbuf->ring; + struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + int ret; + + ret = i915_gem_check_wedge(&dev_priv->gpu_error, + dev_priv->mm.interruptible); + if (ret) + return ret; + + ret = logical_ring_prepare(ringbuf, num_dwords * sizeof(uint32_t)); + if (ret) + return ret; + + /* Preallocate the olr before touching the ring */ + ret = logical_ring_alloc_seqno(ring); + if (ret) + return ret; + + ringbuf->space -= num_dwords * sizeof(uint32_t); + return 0; +} + static int gen8_init_common_ring(struct intel_engine_cs *ring) { struct drm_device *dev = ring->dev; diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index bf0eff4e9f08..4e032875c1fd 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -29,6 +29,19 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring); void intel_logical_ring_cleanup(struct intel_engine_cs *ring); int intel_logical_rings_init(struct drm_device *dev); +void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf); +static inline void intel_logical_ring_advance(struct intel_ringbuffer *ringbuf) +{ + ringbuf->tail &= ringbuf->size - 1; +} +static inline void intel_logical_ring_emit(struct intel_ringbuffer *ringbuf, + u32 data) +{ + iowrite32(data, ringbuf->virtual_start + ringbuf->tail); + ringbuf->tail += 4; +} +int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords); + /* Logical Ring Contexts */ void intel_lr_context_free(struct intel_context *ctx); int intel_lr_context_deferred_create(struct intel_context *ctx, diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index dab5e7c79036..0bfa018fab20 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -57,7 +57,7 @@ intel_ring_initialized(struct intel_engine_cs *ring) return ring->buffer && ring->buffer->obj; } -static inline int __ring_space(int head, int tail, int size) +int __intel_ring_space(int head, int tail, int size) { int space = head - (tail + I915_RING_FREE_SPACE); if (space < 0) @@ -65,12 +65,13 @@ static inline int __ring_space(int head, int tail, int size) return space; } -static inline int ring_space(struct intel_ringbuffer *ringbuf) +int intel_ring_space(struct intel_ringbuffer *ringbuf) { - return __ring_space(ringbuf->head & HEAD_ADDR, ringbuf->tail, ringbuf->size); + return __intel_ring_space(ringbuf->head & HEAD_ADDR, + ringbuf->tail, ringbuf->size); } -static bool intel_ring_stopped(struct intel_engine_cs *ring) +bool intel_ring_stopped(struct intel_engine_cs *ring) { struct drm_i915_private *dev_priv = ring->dev->dev_private; return dev_priv->gpu_error.stop_rings & intel_ring_flag(ring); @@ -585,7 +586,7 @@ static int init_ring_common(struct intel_engine_cs *ring) else { ringbuf->head = I915_READ_HEAD(ring); ringbuf->tail = I915_READ_TAIL(ring) & TAIL_ADDR; - ringbuf->space = ring_space(ringbuf); + ringbuf->space = intel_ring_space(ringbuf); ringbuf->last_retired_head = -1; } @@ -1702,13 +1703,14 @@ static int intel_ring_wait_request(struct intel_engine_cs *ring, int n) ringbuf->head = ringbuf->last_retired_head; ringbuf->last_retired_head = -1; - ringbuf->space = ring_space(ringbuf); + ringbuf->space = intel_ring_space(ringbuf); if (ringbuf->space >= n) return 0; } list_for_each_entry(request, &ring->request_list, list) { - if (__ring_space(request->tail, ringbuf->tail, ringbuf->size) >= n) { + if (__intel_ring_space(request->tail, ringbuf->tail, + ringbuf->size) >= n) { seqno = request->seqno; break; } @@ -1725,7 +1727,7 @@ static int intel_ring_wait_request(struct intel_engine_cs *ring, int n) ringbuf->head = ringbuf->last_retired_head; ringbuf->last_retired_head = -1; - ringbuf->space = ring_space(ringbuf); + ringbuf->space = intel_ring_space(ringbuf); return 0; } @@ -1754,7 +1756,7 @@ static int ring_wait_for_space(struct intel_engine_cs *ring, int n) trace_i915_ring_wait_begin(ring); do { ringbuf->head = I915_READ_HEAD(ring); - ringbuf->space = ring_space(ringbuf); + ringbuf->space = intel_ring_space(ringbuf); if (ringbuf->space >= n) { ret = 0; break; @@ -1806,7 +1808,7 @@ static int intel_wrap_ring_buffer(struct intel_engine_cs *ring) iowrite32(MI_NOOP, virt++); ringbuf->tail = 0; - ringbuf->space = ring_space(ringbuf); + ringbuf->space = intel_ring_space(ringbuf); return 0; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 677df0d7be48..81bad364e36d 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -374,6 +374,9 @@ static inline void intel_ring_advance(struct intel_engine_cs *ring) struct intel_ringbuffer *ringbuf = ring->buffer; ringbuf->tail &= ringbuf->size - 1; } +int __intel_ring_space(int head, int tail, int size); +int intel_ring_space(struct intel_ringbuffer *ringbuf); +bool intel_ring_stopped(struct intel_engine_cs *ring); void __intel_ring_advance(struct intel_engine_cs *ring); int __must_check intel_ring_idle(struct intel_engine_cs *ring); -- cgit v1.2.3 From 4da46e1e5bb7e7396fad172cdaffbe496562f3d8 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:27 +0100 Subject: drm/i915/bdw: GEN-specific logical ring emit request Very similar to the legacy add_request, only modified to account for logical ringbuffer. v2: Use MI_GLOBAL_GTT, as suggested by Brad Volkin. v3: Unify render and non-render in the same function, as noticed by Brad Volkin. Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_lrc.c | 31 +++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_ringbuffer.h | 3 +++ 3 files changed, 35 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index c1d24242a02d..3388afb90a93 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -272,6 +272,7 @@ #define MI_SEMAPHORE_POLL (1<<15) #define MI_SEMAPHORE_SAD_GTE_SDD (1<<12) #define MI_STORE_DWORD_IMM MI_INSTR(0x20, 1) +#define MI_STORE_DWORD_IMM_GEN8 MI_INSTR(0x20, 2) #define MI_MEM_VIRTUAL (1 << 22) /* 965+ only */ #define MI_STORE_DWORD_INDEX MI_INSTR(0x21, 1) #define MI_STORE_DWORD_INDEX_SHIFT 2 diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 31025847d680..94f8b4087642 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -350,6 +350,32 @@ static void gen8_set_seqno(struct intel_engine_cs *ring, u32 seqno) intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno); } +static int gen8_emit_request(struct intel_ringbuffer *ringbuf) +{ + struct intel_engine_cs *ring = ringbuf->ring; + u32 cmd; + int ret; + + ret = intel_logical_ring_begin(ringbuf, 6); + if (ret) + return ret; + + cmd = MI_STORE_DWORD_IMM_GEN8; + cmd |= MI_GLOBAL_GTT; + + intel_logical_ring_emit(ringbuf, cmd); + intel_logical_ring_emit(ringbuf, + (ring->status_page.gfx_addr + + (I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT))); + intel_logical_ring_emit(ringbuf, 0); + intel_logical_ring_emit(ringbuf, ring->outstanding_lazy_seqno); + intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT); + intel_logical_ring_emit(ringbuf, MI_NOOP); + intel_logical_ring_advance_and_submit(ringbuf); + + return 0; +} + void intel_logical_ring_cleanup(struct intel_engine_cs *ring) { if (!intel_ring_initialized(ring)) @@ -424,6 +450,7 @@ static int logical_render_ring_init(struct drm_device *dev) ring->cleanup = intel_fini_pipe_control; ring->get_seqno = gen8_get_seqno; ring->set_seqno = gen8_set_seqno; + ring->emit_request = gen8_emit_request; return logical_ring_init(dev, ring); } @@ -442,6 +469,7 @@ static int logical_bsd_ring_init(struct drm_device *dev) ring->init = gen8_init_common_ring; ring->get_seqno = gen8_get_seqno; ring->set_seqno = gen8_set_seqno; + ring->emit_request = gen8_emit_request; return logical_ring_init(dev, ring); } @@ -460,6 +488,7 @@ static int logical_bsd2_ring_init(struct drm_device *dev) ring->init = gen8_init_common_ring; ring->get_seqno = gen8_get_seqno; ring->set_seqno = gen8_set_seqno; + ring->emit_request = gen8_emit_request; return logical_ring_init(dev, ring); } @@ -478,6 +507,7 @@ static int logical_blt_ring_init(struct drm_device *dev) ring->init = gen8_init_common_ring; ring->get_seqno = gen8_get_seqno; ring->set_seqno = gen8_set_seqno; + ring->emit_request = gen8_emit_request; return logical_ring_init(dev, ring); } @@ -496,6 +526,7 @@ static int logical_vebox_ring_init(struct drm_device *dev) ring->init = gen8_init_common_ring; ring->get_seqno = gen8_get_seqno; ring->set_seqno = gen8_set_seqno; + ring->emit_request = gen8_emit_request; return logical_ring_init(dev, ring); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 81bad364e36d..467885159a80 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -216,6 +216,9 @@ struct intel_engine_cs { unsigned int num_dwords); } semaphore; + /* Execlists */ + int (*emit_request)(struct intel_ringbuffer *ringbuf); + /** * List of objects currently involved in rendering from the * ringbuffer. -- cgit v1.2.3 From 4712274c362b7730a1c6e01c9a51a6d46f5b7f43 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:28 +0100 Subject: drm/i915/bdw: GEN-specific logical ring emit flush Same as the legacy-style ring->flush. v2: The BSD invalidate bit still exists in GEN8! Add it for the VCS rings (but still consolidate the blt and bsd ring flushes into one). This was noticed by Brad Volkin. v3: The command for BSD and for other rings is slightly different: get it exactly the same as in gen6_ring_flush + gen6_bsd_ring_flush Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau [danvet: Checkpatch.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_lrc.c | 85 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_ringbuffer.c | 7 --- drivers/gpu/drm/i915/intel_ringbuffer.h | 10 ++++ 3 files changed, 95 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 94f8b4087642..a88fa6e9360b 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -340,6 +340,86 @@ static int gen8_init_render_ring(struct intel_engine_cs *ring) return ret; } +static int gen8_emit_flush(struct intel_ringbuffer *ringbuf, + u32 invalidate_domains, + u32 unused) +{ + struct intel_engine_cs *ring = ringbuf->ring; + struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + uint32_t cmd; + int ret; + + ret = intel_logical_ring_begin(ringbuf, 4); + if (ret) + return ret; + + cmd = MI_FLUSH_DW + 1; + + if (ring == &dev_priv->ring[VCS]) { + if (invalidate_domains & I915_GEM_GPU_DOMAINS) + cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD | + MI_FLUSH_DW_STORE_INDEX | + MI_FLUSH_DW_OP_STOREDW; + } else { + if (invalidate_domains & I915_GEM_DOMAIN_RENDER) + cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX | + MI_FLUSH_DW_OP_STOREDW; + } + + intel_logical_ring_emit(ringbuf, cmd); + intel_logical_ring_emit(ringbuf, + I915_GEM_HWS_SCRATCH_ADDR | + MI_FLUSH_DW_USE_GTT); + intel_logical_ring_emit(ringbuf, 0); /* upper addr */ + intel_logical_ring_emit(ringbuf, 0); /* value */ + intel_logical_ring_advance(ringbuf); + + return 0; +} + +static int gen8_emit_flush_render(struct intel_ringbuffer *ringbuf, + u32 invalidate_domains, + u32 flush_domains) +{ + struct intel_engine_cs *ring = ringbuf->ring; + u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES; + u32 flags = 0; + int ret; + + flags |= PIPE_CONTROL_CS_STALL; + + if (flush_domains) { + flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; + flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; + } + + if (invalidate_domains) { + flags |= PIPE_CONTROL_TLB_INVALIDATE; + flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_QW_WRITE; + flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; + } + + ret = intel_logical_ring_begin(ringbuf, 6); + if (ret) + return ret; + + intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6)); + intel_logical_ring_emit(ringbuf, flags); + intel_logical_ring_emit(ringbuf, scratch_addr); + intel_logical_ring_emit(ringbuf, 0); + intel_logical_ring_emit(ringbuf, 0); + intel_logical_ring_emit(ringbuf, 0); + intel_logical_ring_advance(ringbuf); + + return 0; +} + static u32 gen8_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency) { return intel_read_status_page(ring, I915_GEM_HWS_INDEX); @@ -451,6 +531,7 @@ static int logical_render_ring_init(struct drm_device *dev) ring->get_seqno = gen8_get_seqno; ring->set_seqno = gen8_set_seqno; ring->emit_request = gen8_emit_request; + ring->emit_flush = gen8_emit_flush_render; return logical_ring_init(dev, ring); } @@ -470,6 +551,7 @@ static int logical_bsd_ring_init(struct drm_device *dev) ring->get_seqno = gen8_get_seqno; ring->set_seqno = gen8_set_seqno; ring->emit_request = gen8_emit_request; + ring->emit_flush = gen8_emit_flush; return logical_ring_init(dev, ring); } @@ -489,6 +571,7 @@ static int logical_bsd2_ring_init(struct drm_device *dev) ring->get_seqno = gen8_get_seqno; ring->set_seqno = gen8_set_seqno; ring->emit_request = gen8_emit_request; + ring->emit_flush = gen8_emit_flush; return logical_ring_init(dev, ring); } @@ -508,6 +591,7 @@ static int logical_blt_ring_init(struct drm_device *dev) ring->get_seqno = gen8_get_seqno; ring->set_seqno = gen8_set_seqno; ring->emit_request = gen8_emit_request; + ring->emit_flush = gen8_emit_flush; return logical_ring_init(dev, ring); } @@ -527,6 +611,7 @@ static int logical_vebox_ring_init(struct drm_device *dev) ring->get_seqno = gen8_get_seqno; ring->set_seqno = gen8_set_seqno; ring->emit_request = gen8_emit_request; + ring->emit_flush = gen8_emit_flush; return logical_ring_init(dev, ring); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 0bfa018fab20..4236014c1cda 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -33,13 +33,6 @@ #include "i915_trace.h" #include "intel_drv.h" -/* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill, - * but keeps the logic simple. Indeed, the whole purpose of this macro is just - * to give some inclination as to some of the magic values used in the various - * workarounds! - */ -#define CACHELINE_BYTES 64 - bool intel_ring_initialized(struct intel_engine_cs *ring) { diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 467885159a80..e497837c7724 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -5,6 +5,13 @@ #define I915_CMD_HASH_ORDER 9 +/* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill, + * but keeps the logic simple. Indeed, the whole purpose of this macro is just + * to give some inclination as to some of the magic values used in the various + * workarounds! + */ +#define CACHELINE_BYTES 64 + /* * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 "Ring Buffer Use" * Gen3 BSpec "vol1c Memory Interface Functions" / 2.3.4.5 "Ring Buffer Use" @@ -218,6 +225,9 @@ struct intel_engine_cs { /* Execlists */ int (*emit_request)(struct intel_ringbuffer *ringbuf); + int (*emit_flush)(struct intel_ringbuffer *ringbuf, + u32 invalidate_domains, + u32 flush_domains); /** * List of objects currently involved in rendering from the -- cgit v1.2.3 From 9832b9dae8f9f505c7ed898a043b4f54b54597ed Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:30 +0100 Subject: drm/i915/bdw: Ring idle and stop with logical rings This is a hard one, since there is no direct hardware ring to control when in Execlists. We reuse intel_ring_idle here, but it should be fine as long as i915_add_request does the ring thing. Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_lrc.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index a88fa6e9360b..8a524baa8a6b 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -105,7 +105,24 @@ int intel_execlists_submission(struct drm_device *dev, struct drm_file *file, void intel_logical_ring_stop(struct intel_engine_cs *ring) { - /* TODO */ + struct drm_i915_private *dev_priv = ring->dev->dev_private; + int ret; + + if (!intel_ring_initialized(ring)) + return; + + ret = intel_ring_idle(ring); + if (ret && !i915_reset_in_progress(&to_i915(ring->dev)->gpu_error)) + DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n", + ring->name, ret); + + /* TODO: Is this correct with Execlists enabled? */ + I915_WRITE_MODE(ring, _MASKED_BIT_ENABLE(STOP_RING)); + if (wait_for_atomic((I915_READ_MODE(ring) & MODE_IDLE) != 0, 1000)) { + DRM_ERROR("%s :timed out trying to stop ring\n", ring->name); + return; + } + I915_WRITE_MODE(ring, _MASKED_BIT_DISABLE(STOP_RING)); } void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf) @@ -458,10 +475,13 @@ static int gen8_emit_request(struct intel_ringbuffer *ringbuf) void intel_logical_ring_cleanup(struct intel_engine_cs *ring) { + struct drm_i915_private *dev_priv = ring->dev->dev_private; + if (!intel_ring_initialized(ring)) return; - /* TODO: make sure the ring is stopped */ + intel_logical_ring_stop(ring); + WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0); ring->preallocated_lazy_request = NULL; ring->outstanding_lazy_seqno = 0; -- cgit v1.2.3 From 73d477f6bb17a1f14c4897a4b4a6597fe9a38ad2 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:31 +0100 Subject: drm/i915/bdw: Interrupts with logical rings We need to attend context switch interrupts from all rings. Also, fixed writing IMR/IER and added HWSTAM at ring init time. Notice that, if added to irq_enable_mask, the context switch interrupts would be incorrectly masked out when the user interrupts are due to no users waiting on a sequence number. Therefore, this commit adds a bitmask of interrupts to be kept unmasked at all times. v2: Disable HWSTAM, as suggested by Damien (nobody listens to these interrupts, anyway). v3: Add new get/put_irq functions. Signed-off-by: Thomas Daniel (v1) Signed-off-by: Oscar Mateo (v2 & v3) Reviewed-by: Damien Lespiau [danvet: Drop the GEN8_ prefix from the context switch interrupt define and move it to its brethren.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_irq.c | 19 +++++++++-- drivers/gpu/drm/i915/i915_reg.h | 2 ++ drivers/gpu/drm/i915/intel_lrc.c | 58 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_ringbuffer.h | 1 + 4 files changed, 77 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 36eb1f234608..00957fa0b877 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1647,6 +1647,8 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_device *dev, notify_ring(dev, &dev_priv->ring[RCS]); if (bcs & GT_RENDER_USER_INTERRUPT) notify_ring(dev, &dev_priv->ring[BCS]); + if ((rcs | bcs) & GT_CONTEXT_SWITCH_INTERRUPT) + DRM_DEBUG_DRIVER("TODO: Context switch\n"); } else DRM_ERROR("The master control interrupt lied (GT0)!\n"); } @@ -1659,9 +1661,13 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_device *dev, vcs = tmp >> GEN8_VCS1_IRQ_SHIFT; if (vcs & GT_RENDER_USER_INTERRUPT) notify_ring(dev, &dev_priv->ring[VCS]); + if (vcs & GT_CONTEXT_SWITCH_INTERRUPT) + DRM_DEBUG_DRIVER("TODO: Context switch\n"); vcs = tmp >> GEN8_VCS2_IRQ_SHIFT; if (vcs & GT_RENDER_USER_INTERRUPT) notify_ring(dev, &dev_priv->ring[VCS2]); + if (vcs & GT_CONTEXT_SWITCH_INTERRUPT) + DRM_DEBUG_DRIVER("TODO: Context switch\n"); } else DRM_ERROR("The master control interrupt lied (GT1)!\n"); } @@ -1685,6 +1691,8 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_device *dev, vcs = tmp >> GEN8_VECS_IRQ_SHIFT; if (vcs & GT_RENDER_USER_INTERRUPT) notify_ring(dev, &dev_priv->ring[VECS]); + if (vcs & GT_CONTEXT_SWITCH_INTERRUPT) + DRM_DEBUG_DRIVER("TODO: Context switch\n"); } else DRM_ERROR("The master control interrupt lied (GT3)!\n"); } @@ -3788,12 +3796,17 @@ static void gen8_gt_irq_postinstall(struct drm_i915_private *dev_priv) /* These are interrupts we'll toggle with the ring mask register */ uint32_t gt_interrupts[] = { GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT | + GT_CONTEXT_SWITCH_INTERRUPT << GEN8_RCS_IRQ_SHIFT | GT_RENDER_L3_PARITY_ERROR_INTERRUPT | - GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT, + GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT | + GT_CONTEXT_SWITCH_INTERRUPT << GEN8_BCS_IRQ_SHIFT, GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT | - GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT, + GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS1_IRQ_SHIFT | + GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT | + GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS2_IRQ_SHIFT, 0, - GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT + GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT | + GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VECS_IRQ_SHIFT }; for (i = 0; i < ARRAY_SIZE(gt_interrupts); i++) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 3388afb90a93..f79c20d49d99 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1087,6 +1087,7 @@ enum punit_power_well { #define RING_ACTHD_UDW(base) ((base)+0x5c) #define RING_NOPID(base) ((base)+0x94) #define RING_IMR(base) ((base)+0xa8) +#define RING_HWSTAM(base) ((base)+0x98) #define RING_TIMESTAMP(base) ((base)+0x358) #define TAIL_ADDR 0x001FFFF8 #define HEAD_WRAP_COUNT 0xFFE00000 @@ -1403,6 +1404,7 @@ enum punit_power_well { #define GT_BSD_CS_ERROR_INTERRUPT (1 << 15) #define GT_BSD_USER_INTERRUPT (1 << 12) #define GT_RENDER_L3_PARITY_ERROR_INTERRUPT_S1 (1 << 11) /* hsw+; rsvd on snb, ivb, vlv */ +#define GT_CONTEXT_SWITCH_INTERRUPT (1 << 8) #define GT_RENDER_L3_PARITY_ERROR_INTERRUPT (1 << 5) /* !snb */ #define GT_RENDER_PIPECTL_NOTIFY_INTERRUPT (1 << 4) #define GT_RENDER_CS_MASTER_ERROR_INTERRUPT (1 << 3) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 8a524baa8a6b..009a8b5c088e 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -319,6 +319,9 @@ static int gen8_init_common_ring(struct intel_engine_cs *ring) struct drm_device *dev = ring->dev; struct drm_i915_private *dev_priv = dev->dev_private; + I915_WRITE_IMR(ring, ~(ring->irq_enable_mask | ring->irq_keep_mask)); + I915_WRITE(RING_HWSTAM(ring->mmio_base), 0xffffffff); + I915_WRITE(RING_MODE_GEN7(ring), _MASKED_BIT_DISABLE(GFX_REPLAY_MODE) | _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE)); @@ -357,6 +360,39 @@ static int gen8_init_render_ring(struct intel_engine_cs *ring) return ret; } +static bool gen8_logical_ring_get_irq(struct intel_engine_cs *ring) +{ + struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + unsigned long flags; + + if (!dev->irq_enabled) + return false; + + spin_lock_irqsave(&dev_priv->irq_lock, flags); + if (ring->irq_refcount++ == 0) { + I915_WRITE_IMR(ring, ~(ring->irq_enable_mask | ring->irq_keep_mask)); + POSTING_READ(RING_IMR(ring->mmio_base)); + } + spin_unlock_irqrestore(&dev_priv->irq_lock, flags); + + return true; +} + +static void gen8_logical_ring_put_irq(struct intel_engine_cs *ring) +{ + struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + unsigned long flags; + + spin_lock_irqsave(&dev_priv->irq_lock, flags); + if (--ring->irq_refcount == 0) { + I915_WRITE_IMR(ring, ~ring->irq_keep_mask); + POSTING_READ(RING_IMR(ring->mmio_base)); + } + spin_unlock_irqrestore(&dev_priv->irq_lock, flags); +} + static int gen8_emit_flush(struct intel_ringbuffer *ringbuf, u32 invalidate_domains, u32 unused) @@ -545,6 +581,10 @@ static int logical_render_ring_init(struct drm_device *dev) ring->mmio_base = RENDER_RING_BASE; ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT; + ring->irq_keep_mask = + GT_CONTEXT_SWITCH_INTERRUPT << GEN8_RCS_IRQ_SHIFT; + if (HAS_L3_DPF(dev)) + ring->irq_keep_mask |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT; ring->init = gen8_init_render_ring; ring->cleanup = intel_fini_pipe_control; @@ -552,6 +592,8 @@ static int logical_render_ring_init(struct drm_device *dev) ring->set_seqno = gen8_set_seqno; ring->emit_request = gen8_emit_request; ring->emit_flush = gen8_emit_flush_render; + ring->irq_get = gen8_logical_ring_get_irq; + ring->irq_put = gen8_logical_ring_put_irq; return logical_ring_init(dev, ring); } @@ -566,12 +608,16 @@ static int logical_bsd_ring_init(struct drm_device *dev) ring->mmio_base = GEN6_BSD_RING_BASE; ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT; + ring->irq_keep_mask = + GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS1_IRQ_SHIFT; ring->init = gen8_init_common_ring; ring->get_seqno = gen8_get_seqno; ring->set_seqno = gen8_set_seqno; ring->emit_request = gen8_emit_request; ring->emit_flush = gen8_emit_flush; + ring->irq_get = gen8_logical_ring_get_irq; + ring->irq_put = gen8_logical_ring_put_irq; return logical_ring_init(dev, ring); } @@ -586,12 +632,16 @@ static int logical_bsd2_ring_init(struct drm_device *dev) ring->mmio_base = GEN8_BSD2_RING_BASE; ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT; + ring->irq_keep_mask = + GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS2_IRQ_SHIFT; ring->init = gen8_init_common_ring; ring->get_seqno = gen8_get_seqno; ring->set_seqno = gen8_set_seqno; ring->emit_request = gen8_emit_request; ring->emit_flush = gen8_emit_flush; + ring->irq_get = gen8_logical_ring_get_irq; + ring->irq_put = gen8_logical_ring_put_irq; return logical_ring_init(dev, ring); } @@ -606,12 +656,16 @@ static int logical_blt_ring_init(struct drm_device *dev) ring->mmio_base = BLT_RING_BASE; ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT; + ring->irq_keep_mask = + GT_CONTEXT_SWITCH_INTERRUPT << GEN8_BCS_IRQ_SHIFT; ring->init = gen8_init_common_ring; ring->get_seqno = gen8_get_seqno; ring->set_seqno = gen8_set_seqno; ring->emit_request = gen8_emit_request; ring->emit_flush = gen8_emit_flush; + ring->irq_get = gen8_logical_ring_get_irq; + ring->irq_put = gen8_logical_ring_put_irq; return logical_ring_init(dev, ring); } @@ -626,12 +680,16 @@ static int logical_vebox_ring_init(struct drm_device *dev) ring->mmio_base = VEBOX_RING_BASE; ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT; + ring->irq_keep_mask = + GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VECS_IRQ_SHIFT; ring->init = gen8_init_common_ring; ring->get_seqno = gen8_get_seqno; ring->set_seqno = gen8_set_seqno; ring->emit_request = gen8_emit_request; ring->emit_flush = gen8_emit_flush; + ring->irq_get = gen8_logical_ring_get_irq; + ring->irq_put = gen8_logical_ring_put_irq; return logical_ring_init(dev, ring); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index e497837c7724..cb529ee10c8f 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -224,6 +224,7 @@ struct intel_engine_cs { } semaphore; /* Execlists */ + u32 irq_keep_mask; /* bitmask for interrupts that should not be masked */ int (*emit_request)(struct intel_ringbuffer *ringbuf); int (*emit_flush)(struct intel_ringbuffer *ringbuf, u32 invalidate_domains, -- cgit v1.2.3 From 156485852684b511be28a83c78fece8b27ef7c26 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:32 +0100 Subject: drm/i915/bdw: GEN-specific logical ring emit batchbuffer start Dispatch_execbuffer's evil twin. Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau [danvet: Ditch the check for aliasing ppgtt. It'll break soon and execlists requires full ppgtt anyway.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_lrc.c | 27 +++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_ringbuffer.h | 2 ++ 2 files changed, 29 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 009a8b5c088e..e0d4ef2a5c30 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -360,6 +360,28 @@ static int gen8_init_render_ring(struct intel_engine_cs *ring) return ret; } +static int gen8_emit_bb_start(struct intel_ringbuffer *ringbuf, + u64 offset, unsigned flags) +{ + struct intel_engine_cs *ring = ringbuf->ring; + struct drm_i915_private *dev_priv = ring->dev->dev_private; + bool ppgtt = !(flags & I915_DISPATCH_SECURE); + int ret; + + ret = intel_logical_ring_begin(ringbuf, 4); + if (ret) + return ret; + + /* FIXME(BDW): Address space and security selectors. */ + intel_logical_ring_emit(ringbuf, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8)); + intel_logical_ring_emit(ringbuf, lower_32_bits(offset)); + intel_logical_ring_emit(ringbuf, upper_32_bits(offset)); + intel_logical_ring_emit(ringbuf, MI_NOOP); + intel_logical_ring_advance(ringbuf); + + return 0; +} + static bool gen8_logical_ring_get_irq(struct intel_engine_cs *ring) { struct drm_device *dev = ring->dev; @@ -594,6 +616,7 @@ static int logical_render_ring_init(struct drm_device *dev) ring->emit_flush = gen8_emit_flush_render; ring->irq_get = gen8_logical_ring_get_irq; ring->irq_put = gen8_logical_ring_put_irq; + ring->emit_bb_start = gen8_emit_bb_start; return logical_ring_init(dev, ring); } @@ -618,6 +641,7 @@ static int logical_bsd_ring_init(struct drm_device *dev) ring->emit_flush = gen8_emit_flush; ring->irq_get = gen8_logical_ring_get_irq; ring->irq_put = gen8_logical_ring_put_irq; + ring->emit_bb_start = gen8_emit_bb_start; return logical_ring_init(dev, ring); } @@ -642,6 +666,7 @@ static int logical_bsd2_ring_init(struct drm_device *dev) ring->emit_flush = gen8_emit_flush; ring->irq_get = gen8_logical_ring_get_irq; ring->irq_put = gen8_logical_ring_put_irq; + ring->emit_bb_start = gen8_emit_bb_start; return logical_ring_init(dev, ring); } @@ -666,6 +691,7 @@ static int logical_blt_ring_init(struct drm_device *dev) ring->emit_flush = gen8_emit_flush; ring->irq_get = gen8_logical_ring_get_irq; ring->irq_put = gen8_logical_ring_put_irq; + ring->emit_bb_start = gen8_emit_bb_start; return logical_ring_init(dev, ring); } @@ -690,6 +716,7 @@ static int logical_vebox_ring_init(struct drm_device *dev) ring->emit_flush = gen8_emit_flush; ring->irq_get = gen8_logical_ring_get_irq; ring->irq_put = gen8_logical_ring_put_irq; + ring->emit_bb_start = gen8_emit_bb_start; return logical_ring_init(dev, ring); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index cb529ee10c8f..24437da91f77 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -229,6 +229,8 @@ struct intel_engine_cs { int (*emit_flush)(struct intel_ringbuffer *ringbuf, u32 invalidate_domains, u32 flush_domains); + int (*emit_bb_start)(struct intel_ringbuffer *ringbuf, + u64 offset, unsigned flags); /** * List of objects currently involved in rendering from the -- cgit v1.2.3 From ba8b7ccb196b07c1c553450e8e7b44a7a938e58a Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:33 +0100 Subject: drm/i915/bdw: Workload submission mechanism for Execlists This is what i915_gem_do_execbuffer calls when it wants to execute some worload in an Execlists world. v2: Check arguments before doing stuff in intel_execlists_submission. Also, get rel_constants parsing right. Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau [danvet: Drop the chipset flush, that's pre-gen6. And appease checkpatch a bit .... again!] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 6 ++ drivers/gpu/drm/i915/i915_gem_execbuffer.c | 4 +- drivers/gpu/drm/i915/intel_lrc.c | 130 ++++++++++++++++++++++++++++- 3 files changed, 135 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 9198f1c96470..53cc4c083d0d 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2266,6 +2266,12 @@ int i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +void i915_gem_execbuffer_move_to_active(struct list_head *vmas, + struct intel_engine_cs *ring); +void i915_gem_execbuffer_retire_commands(struct drm_device *dev, + struct drm_file *file, + struct intel_engine_cs *ring, + struct drm_i915_gem_object *obj); int i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file, struct intel_engine_cs *ring, diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 26b38b3ae4f3..7e04a4825ed0 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -942,7 +942,7 @@ i915_gem_validate_context(struct drm_device *dev, struct drm_file *file, return ctx; } -static void +void i915_gem_execbuffer_move_to_active(struct list_head *vmas, struct intel_engine_cs *ring) { @@ -983,7 +983,7 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas, } } -static void +void i915_gem_execbuffer_retire_commands(struct drm_device *dev, struct drm_file *file, struct intel_engine_cs *ring, diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index e0d4ef2a5c30..e1a298f23f50 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -91,6 +91,55 @@ int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists return 0; } +static int logical_ring_invalidate_all_caches(struct intel_ringbuffer *ringbuf) +{ + struct intel_engine_cs *ring = ringbuf->ring; + uint32_t flush_domains; + int ret; + + flush_domains = 0; + if (ring->gpu_caches_dirty) + flush_domains = I915_GEM_GPU_DOMAINS; + + ret = ring->emit_flush(ringbuf, I915_GEM_GPU_DOMAINS, flush_domains); + if (ret) + return ret; + + ring->gpu_caches_dirty = false; + return 0; +} + +static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf, + struct list_head *vmas) +{ + struct intel_engine_cs *ring = ringbuf->ring; + struct i915_vma *vma; + uint32_t flush_domains = 0; + bool flush_chipset = false; + int ret; + + list_for_each_entry(vma, vmas, exec_list) { + struct drm_i915_gem_object *obj = vma->obj; + + ret = i915_gem_object_sync(obj, ring); + if (ret) + return ret; + + if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) + flush_chipset |= i915_gem_clflush_object(obj, false); + + flush_domains |= obj->base.write_domain; + } + + if (flush_domains & I915_GEM_DOMAIN_GTT) + wmb(); + + /* Unconditionally invalidate gpu caches and ensure that we do flush + * any residual writes from the previous batch. + */ + return logical_ring_invalidate_all_caches(ringbuf); +} + int intel_execlists_submission(struct drm_device *dev, struct drm_file *file, struct intel_engine_cs *ring, struct intel_context *ctx, @@ -99,7 +148,84 @@ int intel_execlists_submission(struct drm_device *dev, struct drm_file *file, struct drm_i915_gem_object *batch_obj, u64 exec_start, u32 flags) { - /* TODO */ + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf; + int instp_mode; + u32 instp_mask; + int ret; + + instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK; + instp_mask = I915_EXEC_CONSTANTS_MASK; + switch (instp_mode) { + case I915_EXEC_CONSTANTS_REL_GENERAL: + case I915_EXEC_CONSTANTS_ABSOLUTE: + case I915_EXEC_CONSTANTS_REL_SURFACE: + if (instp_mode != 0 && ring != &dev_priv->ring[RCS]) { + DRM_DEBUG("non-0 rel constants mode on non-RCS\n"); + return -EINVAL; + } + + if (instp_mode != dev_priv->relative_constants_mode) { + if (instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) { + DRM_DEBUG("rel surface constants mode invalid on gen5+\n"); + return -EINVAL; + } + + /* The HW changed the meaning on this bit on gen6 */ + instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE; + } + break; + default: + DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode); + return -EINVAL; + } + + if (args->num_cliprects != 0) { + DRM_DEBUG("clip rectangles are only valid on pre-gen5\n"); + return -EINVAL; + } else { + if (args->DR4 == 0xffffffff) { + DRM_DEBUG("UXA submitting garbage DR4, fixing up\n"); + args->DR4 = 0; + } + + if (args->DR1 || args->DR4 || args->cliprects_ptr) { + DRM_DEBUG("0 cliprects but dirt in cliprects fields\n"); + return -EINVAL; + } + } + + if (args->flags & I915_EXEC_GEN7_SOL_RESET) { + DRM_DEBUG("sol reset is gen7 only\n"); + return -EINVAL; + } + + ret = execlists_move_to_gpu(ringbuf, vmas); + if (ret) + return ret; + + if (ring == &dev_priv->ring[RCS] && + instp_mode != dev_priv->relative_constants_mode) { + ret = intel_logical_ring_begin(ringbuf, 4); + if (ret) + return ret; + + intel_logical_ring_emit(ringbuf, MI_NOOP); + intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(1)); + intel_logical_ring_emit(ringbuf, INSTPM); + intel_logical_ring_emit(ringbuf, instp_mask << 16 | instp_mode); + intel_logical_ring_advance(ringbuf); + + dev_priv->relative_constants_mode = instp_mode; + } + + ret = ring->emit_bb_start(ringbuf, exec_start, flags); + if (ret) + return ret; + + i915_gem_execbuffer_move_to_active(vmas, ring); + i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj); + return 0; } @@ -363,8 +489,6 @@ static int gen8_init_render_ring(struct intel_engine_cs *ring) static int gen8_emit_bb_start(struct intel_ringbuffer *ringbuf, u64 offset, unsigned flags) { - struct intel_engine_cs *ring = ringbuf->ring; - struct drm_i915_private *dev_priv = ring->dev->dev_private; bool ppgtt = !(flags & I915_DISPATCH_SECURE); int ret; -- cgit v1.2.3 From 14bf993e83e1d6924f4bf4506120a15c4b255e58 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:34 +0100 Subject: drm/i915/bdw: Always use MMIO flips with Execlists The normal flip function places things in the ring in the legacy way, so we either fix that or force MMIO flips always as we do in this patch. Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau [danvet: Checkpatch. Fucking again.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 2 ++ drivers/gpu/drm/i915/intel_lrc.c | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 245cf4128314..1bd1aa21a8e9 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -9536,6 +9536,8 @@ static bool use_mmio_flip(struct intel_engine_cs *ring, return false; else if (i915.use_mmio_flip > 0) return true; + else if (i915.enable_execlists) + return true; else return ring != obj->ring; } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index e1a298f23f50..6f1b64e01a05 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -85,7 +85,8 @@ int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists if (enable_execlists == 0) return 0; - if (HAS_LOGICAL_RING_CONTEXTS(dev) && USES_PPGTT(dev)) + if (HAS_LOGICAL_RING_CONTEXTS(dev) && USES_PPGTT(dev) && + i915.use_mmio_flip >= 0) return 1; return 0; -- cgit v1.2.3 From b9d06dd9d1dd3672b391e6387d62aa8dc4e377bd Mon Sep 17 00:00:00 2001 From: Michel Thierry Date: Wed, 6 Aug 2014 15:04:44 +0200 Subject: drm/i915: vma/ppgtt lifetime rules VMAs should take a reference of the address space they use. Now, when the fd is closed, it will release the ref that the context was holding, but it will still be referenced by any vmas that are still active. ppgtt_release() should then only be called when the last thing referencing it releases the ref, and it can just call the base cleanup and free the ppgtt. Note that with this we will extend the lifetime of ppgtts which contain shared objects. But all the non-shared objects will get removed as soon as they drop of the active list and for the shared ones the shrinker can eventually reap them. Since we currently can't evict ppgtt pagetables either I don't think that temporary leak is important. Signed-off-by: Michel Thierry [danvet: Add note about potential ppgtt leak with this approach.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_gem.c | 8 ++++++++ drivers/gpu/drm/i915/i915_gem_context.c | 23 +++-------------------- drivers/gpu/drm/i915/i915_gem_gtt.c | 5 +++++ 4 files changed, 18 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 53cc4c083d0d..38b1849a450a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2547,7 +2547,9 @@ void i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj); /* i915_gem_context.c */ #define ctx_to_ppgtt(ctx) container_of((ctx)->vm, struct i915_hw_ppgtt, base) +#define vm_to_ppgtt(vm) container_of(vm, struct i915_hw_ppgtt, base) int __must_check i915_gem_context_init(struct drm_device *dev); +void ppgtt_release(struct kref *kref); void i915_gem_context_fini(struct drm_device *dev); void i915_gem_context_reset(struct drm_device *dev); int i915_gem_context_open(struct drm_device *dev, struct drm_file *file); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 9acb2469116a..63aee412b258 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4476,12 +4476,20 @@ struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, void i915_gem_vma_destroy(struct i915_vma *vma) { + struct i915_address_space *vm = NULL; + struct i915_hw_ppgtt *ppgtt = NULL; WARN_ON(vma->node.allocated); /* Keep the vma as a placeholder in the execbuffer reservation lists */ if (!list_empty(&vma->exec_list)) return; + vm = vma->vm; + ppgtt = vm_to_ppgtt(vm); + + if (ppgtt) + kref_put(&ppgtt->ref, ppgtt_release); + list_del(&vma->vma_link); kfree(vma); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 7a08f3e9e1ae..a509a4bca991 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -108,30 +108,13 @@ static void do_ppgtt_cleanup(struct i915_hw_ppgtt *ppgtt) return; } - /* - * Make sure vmas are unbound before we take down the drm_mm - * - * FIXME: Proper refcounting should take care of this, this shouldn't be - * needed at all. - */ - if (!list_empty(&vm->active_list)) { - struct i915_vma *vma; - - list_for_each_entry(vma, &vm->active_list, mm_list) - if (WARN_ON(list_empty(&vma->vma_link) || - list_is_singular(&vma->vma_link))) - break; - - i915_gem_evict_vm(&ppgtt->base, true); - } else { - i915_gem_retire_requests(dev); - i915_gem_evict_vm(&ppgtt->base, false); - } + /* vmas should already be unbound */ + WARN_ON(!list_empty(&vm->active_list)); ppgtt->base.cleanup(&ppgtt->base); } -static void ppgtt_release(struct kref *kref) +void ppgtt_release(struct kref *kref) { struct i915_hw_ppgtt *ppgtt = container_of(kref, struct i915_hw_ppgtt, ref); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index b4b7cfd226b7..76dd3b428483 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2148,10 +2148,15 @@ i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, struct i915_address_space *vm) { struct i915_vma *vma; + struct i915_hw_ppgtt *ppgtt = NULL; vma = i915_gem_obj_to_vma(obj, vm); if (!vma) vma = __i915_gem_vma_create(obj, vm); + ppgtt = vm_to_ppgtt(vm); + if (ppgtt) + kref_get(&ppgtt->ref); + return vma; } -- cgit v1.2.3 From ee960be7bb09b201926cb37eaa82fb7da605ea7c Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 6 Aug 2014 15:04:45 +0200 Subject: drm/i915: Some cleanups for the ppgtt lifetime handling So when reviewing Michel's patch I've noticed a few things and cleaned them up: - The early checks in ppgtt_release are now redundant: The inactive list should always be empty now, so we can ditch these checks. Even for the aliasing ppgtt (though that's a different confusion) since we tear that down after all the objects are gone. - The ppgtt handling functions are splattered all over. Consolidate them in i915_gem_gtt.c, give them OCD prefixes and add wrappers for get/put. - There was a bit a confusion in ppgtt_release about whether it cares about the active or inactive list. It should care about them both, so augment the WARNINGs to check for both. There's still create_vm_for_ctx left to do, put that is blocked on the removal of ppgtt->ctx. Once that's done we can rename it to i915_ppgtt_create and move it to its siblings for handling ppgtts. v2: Move the ppgtt checks into the inline get/put functions as suggested by Chris. v3: Inline the now redundant ppgtt local variable. Cc: Michel Thierry Cc: Chris Wilson Reviewed-by: Michel Thierry Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 1 - drivers/gpu/drm/i915/i915_gem.c | 5 +---- drivers/gpu/drm/i915/i915_gem_context.c | 36 ++++----------------------------- drivers/gpu/drm/i915/i915_gem_gtt.c | 20 +++++++++++++----- drivers/gpu/drm/i915/i915_gem_gtt.h | 14 ++++++++++++- 5 files changed, 33 insertions(+), 43 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 38b1849a450a..101fc637eb46 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2549,7 +2549,6 @@ void i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj); #define ctx_to_ppgtt(ctx) container_of((ctx)->vm, struct i915_hw_ppgtt, base) #define vm_to_ppgtt(vm) container_of(vm, struct i915_hw_ppgtt, base) int __must_check i915_gem_context_init(struct drm_device *dev); -void ppgtt_release(struct kref *kref); void i915_gem_context_fini(struct drm_device *dev); void i915_gem_context_reset(struct drm_device *dev); int i915_gem_context_open(struct drm_device *dev, struct drm_file *file); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 63aee412b258..8061d45eaa80 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4477,7 +4477,6 @@ struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, void i915_gem_vma_destroy(struct i915_vma *vma) { struct i915_address_space *vm = NULL; - struct i915_hw_ppgtt *ppgtt = NULL; WARN_ON(vma->node.allocated); /* Keep the vma as a placeholder in the execbuffer reservation lists */ @@ -4485,10 +4484,8 @@ void i915_gem_vma_destroy(struct i915_vma *vma) return; vm = vma->vm; - ppgtt = vm_to_ppgtt(vm); - if (ppgtt) - kref_put(&ppgtt->ref, ppgtt_release); + i915_ppgtt_put(vm_to_ppgtt(vm)); list_del(&vma->vma_link); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index a509a4bca991..dc43d0263a01 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -96,33 +96,6 @@ #define GEN6_CONTEXT_ALIGN (64<<10) #define GEN7_CONTEXT_ALIGN 4096 -static void do_ppgtt_cleanup(struct i915_hw_ppgtt *ppgtt) -{ - struct drm_device *dev = ppgtt->base.dev; - struct drm_i915_private *dev_priv = dev->dev_private; - struct i915_address_space *vm = &ppgtt->base; - - if (ppgtt == dev_priv->mm.aliasing_ppgtt || - (list_empty(&vm->active_list) && list_empty(&vm->inactive_list))) { - ppgtt->base.cleanup(&ppgtt->base); - return; - } - - /* vmas should already be unbound */ - WARN_ON(!list_empty(&vm->active_list)); - - ppgtt->base.cleanup(&ppgtt->base); -} - -void ppgtt_release(struct kref *kref) -{ - struct i915_hw_ppgtt *ppgtt = - container_of(kref, struct i915_hw_ppgtt, ref); - - do_ppgtt_cleanup(ppgtt); - kfree(ppgtt); -} - static size_t get_context_alignment(struct drm_device *dev) { if (IS_GEN6(dev)) @@ -174,8 +147,7 @@ void i915_gem_context_free(struct kref *ctx_ref) ppgtt = ctx_to_ppgtt(ctx); } - if (ppgtt) - kref_put(&ppgtt->ref, ppgtt_release); + i915_ppgtt_put(ppgtt); if (ctx->legacy_hw_ctx.rcs_state) drm_gem_object_unreference(&ctx->legacy_hw_ctx.rcs_state->base); list_del(&ctx->link); @@ -222,7 +194,7 @@ create_vm_for_ctx(struct drm_device *dev, struct intel_context *ctx) if (!ppgtt) return ERR_PTR(-ENOMEM); - ret = i915_gem_init_ppgtt(dev, ppgtt); + ret = i915_ppgtt_init(dev, ppgtt); if (ret) { kfree(ppgtt); return ERR_PTR(ret); @@ -234,7 +206,7 @@ create_vm_for_ctx(struct drm_device *dev, struct intel_context *ctx) static struct intel_context * __create_hw_context(struct drm_device *dev, - struct drm_i915_file_private *file_priv) + struct drm_i915_file_private *file_priv) { struct drm_i915_private *dev_priv = dev->dev_private; struct intel_context *ctx; @@ -342,7 +314,7 @@ i915_gem_create_context(struct drm_device *dev, /* For platforms which only have aliasing PPGTT, we fake the * address space and refcounting. */ ctx->vm = &dev_priv->mm.aliasing_ppgtt->base; - kref_get(&dev_priv->mm.aliasing_ppgtt->ref); + i915_ppgtt_get(dev_priv->mm.aliasing_ppgtt); } else ctx->vm = &dev_priv->gtt.base; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 76dd3b428483..6ffa12b72538 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1180,7 +1180,7 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) return 0; } -int i915_gem_init_ppgtt(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) +int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) { struct drm_i915_private *dev_priv = dev->dev_private; int ret = 0; @@ -1211,6 +1211,19 @@ int i915_gem_init_ppgtt(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) return ret; } +void i915_ppgtt_release(struct kref *kref) +{ + struct i915_hw_ppgtt *ppgtt = + container_of(kref, struct i915_hw_ppgtt, ref); + + /* vmas should already be unbound */ + WARN_ON(!list_empty(&ppgtt->base.active_list)); + WARN_ON(!list_empty(&ppgtt->base.inactive_list)); + + ppgtt->base.cleanup(&ppgtt->base); + kfree(ppgtt); +} + static void ppgtt_bind_vma(struct i915_vma *vma, enum i915_cache_level cache_level, @@ -2148,15 +2161,12 @@ i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, struct i915_address_space *vm) { struct i915_vma *vma; - struct i915_hw_ppgtt *ppgtt = NULL; vma = i915_gem_obj_to_vma(obj, vm); if (!vma) vma = __i915_gem_vma_create(obj, vm); - ppgtt = vm_to_ppgtt(vm); - if (ppgtt) - kref_get(&ppgtt->ref); + i915_ppgtt_get(vm_to_ppgtt(vm)); return vma; } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 666c938a51e3..c6beb528f955 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -272,7 +272,19 @@ void i915_gem_init_global_gtt(struct drm_device *dev); void i915_gem_setup_global_gtt(struct drm_device *dev, unsigned long start, unsigned long mappable_end, unsigned long end); -int i915_gem_init_ppgtt(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt); + +int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt); +void i915_ppgtt_release(struct kref *kref); +static inline void i915_ppgtt_get(struct i915_hw_ppgtt *ppgtt) +{ + if (ppgtt) + kref_get(&ppgtt->ref); +} +static inline void i915_ppgtt_put(struct i915_hw_ppgtt *ppgtt) +{ + if (ppgtt) + kref_put(&ppgtt->ref, i915_ppgtt_release); +} void i915_check_and_clear_faults(struct drm_device *dev); void i915_gem_suspend_gtt_mappings(struct drm_device *dev); -- cgit v1.2.3 From 4d884705dababd7d0f3f12796bc7b45e84962596 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 6 Aug 2014 15:04:47 +0200 Subject: drm/i915: Track file_priv, not ctx in the ppgtt structure Hardware contexts reference a ppgtt, not the other way round. And the only user of this (in debugfs) actually only cares about which file the ppgtt is associated with. So give it what it wants. While at it give the ppgtt create function a proper name&place. Reviewed-by: Michel Thierry Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- drivers/gpu/drm/i915/i915_gem_context.c | 22 +--------------------- drivers/gpu/drm/i915/i915_gem_gtt.c | 21 +++++++++++++++++++++ drivers/gpu/drm/i915/i915_gem_gtt.h | 6 +++++- 4 files changed, 28 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 3b7decbeeed3..1c3a9943a742 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -333,7 +333,7 @@ static int per_file_stats(int id, void *ptr, void *data) } ppgtt = container_of(vma->vm, struct i915_hw_ppgtt, base); - if (ppgtt->ctx && ppgtt->ctx->file_priv != stats->file_priv) + if (ppgtt->file_priv != stats->file_priv) continue; if (obj->ring) /* XXX per-vma statistic */ diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index dc43d0263a01..90665872734d 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -184,26 +184,6 @@ i915_gem_alloc_context_obj(struct drm_device *dev, size_t size) return obj; } -static struct i915_hw_ppgtt * -create_vm_for_ctx(struct drm_device *dev, struct intel_context *ctx) -{ - struct i915_hw_ppgtt *ppgtt; - int ret; - - ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); - if (!ppgtt) - return ERR_PTR(-ENOMEM); - - ret = i915_ppgtt_init(dev, ppgtt); - if (ret) { - kfree(ppgtt); - return ERR_PTR(ret); - } - - ppgtt->ctx = ctx; - return ppgtt; -} - static struct intel_context * __create_hw_context(struct drm_device *dev, struct drm_i915_file_private *file_priv) @@ -290,7 +270,7 @@ i915_gem_create_context(struct drm_device *dev, } if (create_vm) { - struct i915_hw_ppgtt *ppgtt = create_vm_for_ctx(dev, ctx); + struct i915_hw_ppgtt *ppgtt = i915_ppgtt_create(dev, file_priv); if (IS_ERR_OR_NULL(ppgtt)) { DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n", diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 6ffa12b72538..a5715faba65f 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1211,6 +1211,27 @@ int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) return ret; } +struct i915_hw_ppgtt * +i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv) +{ + struct i915_hw_ppgtt *ppgtt; + int ret; + + ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); + if (!ppgtt) + return ERR_PTR(-ENOMEM); + + ret = i915_ppgtt_init(dev, ppgtt); + if (ret) { + kfree(ppgtt); + return ERR_PTR(ret); + } + + ppgtt->file_priv = fpriv; + + return ppgtt; +} + void i915_ppgtt_release(struct kref *kref) { struct i915_hw_ppgtt *ppgtt = diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index c6beb528f955..90ff45246b62 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -34,6 +34,8 @@ #ifndef __I915_GEM_GTT_H__ #define __I915_GEM_GTT_H__ +struct drm_i915_file_private; + typedef uint32_t gen6_gtt_pte_t; typedef uint64_t gen8_gtt_pte_t; typedef gen8_gtt_pte_t gen8_ppgtt_pde_t; @@ -258,7 +260,7 @@ struct i915_hw_ppgtt { dma_addr_t *gen8_pt_dma_addr[4]; }; - struct intel_context *ctx; + struct drm_i915_file_private *file_priv; int (*enable)(struct i915_hw_ppgtt *ppgtt); int (*switch_mm)(struct i915_hw_ppgtt *ppgtt, @@ -275,6 +277,8 @@ void i915_gem_setup_global_gtt(struct drm_device *dev, unsigned long start, int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt); void i915_ppgtt_release(struct kref *kref); +struct i915_hw_ppgtt *i915_ppgtt_create(struct drm_device *dev, + struct drm_i915_file_private *fpriv); static inline void i915_ppgtt_get(struct i915_hw_ppgtt *ppgtt) { if (ppgtt) -- cgit v1.2.3 From 841cd7737557785c0f215b0984c06aaaaa882302 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 6 Aug 2014 15:04:48 +0200 Subject: drm/i915: Only refcount ppgtt if it actually is one This essentially unbreaks non-ppgtt operation where we'd scribble over random memory. While at it give the vm_to_ppgtt function a proper prefix and make it a bit more paranoid. Reviewed-by: Michel Thierry Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 10 +++++++++- drivers/gpu/drm/i915/i915_gem.c | 3 ++- drivers/gpu/drm/i915/i915_gem_gtt.c | 3 ++- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 101fc637eb46..454badf31dfd 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2512,6 +2512,15 @@ static inline bool i915_is_ggtt(struct i915_address_space *vm) return vm == ggtt; } +static inline struct i915_hw_ppgtt * +i915_vm_to_ppgtt(struct i915_address_space *vm) +{ + WARN_ON(i915_is_ggtt(vm)); + + return container_of(vm, struct i915_hw_ppgtt, base); +} + + static inline bool i915_gem_obj_ggtt_bound(struct drm_i915_gem_object *obj) { return i915_gem_obj_bound(obj, obj_to_ggtt(obj)); @@ -2547,7 +2556,6 @@ void i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj); /* i915_gem_context.c */ #define ctx_to_ppgtt(ctx) container_of((ctx)->vm, struct i915_hw_ppgtt, base) -#define vm_to_ppgtt(vm) container_of(vm, struct i915_hw_ppgtt, base) int __must_check i915_gem_context_init(struct drm_device *dev); void i915_gem_context_fini(struct drm_device *dev); void i915_gem_context_reset(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 8061d45eaa80..e3e30cd474be 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4485,7 +4485,8 @@ void i915_gem_vma_destroy(struct i915_vma *vma) vm = vma->vm; - i915_ppgtt_put(vm_to_ppgtt(vm)); + if (!i915_is_ggtt(vm)) + i915_ppgtt_put(i915_vm_to_ppgtt(vm)); list_del(&vma->vma_link); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index a5715faba65f..48d8f4a21c3f 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2187,7 +2187,8 @@ i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, if (!vma) vma = __i915_gem_vma_create(obj, vm); - i915_ppgtt_get(vm_to_ppgtt(vm)); + if (!i915_is_ggtt(vm)) + i915_ppgtt_get(i915_vm_to_ppgtt(vm)); return vma; } -- cgit v1.2.3 From 5dc383b05a05d05e964172d882603cd171040c5f Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 6 Aug 2014 15:04:49 +0200 Subject: drm/i915: Add proper prefix to obj_to_ggtt Stuff in headers really aught to have this. Reviewed-by: Michel Thierry Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 11 ++++++----- drivers/gpu/drm/i915/i915_gem.c | 2 +- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 454badf31dfd..2db22732c7ae 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2503,7 +2503,7 @@ static inline bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj) { } /* Some GGTT VM helpers */ -#define obj_to_ggtt(obj) \ +#define i915_obj_to_ggtt(obj) \ (&((struct drm_i915_private *)(obj)->base.dev->dev_private)->gtt.base) static inline bool i915_is_ggtt(struct i915_address_space *vm) { @@ -2523,19 +2523,19 @@ i915_vm_to_ppgtt(struct i915_address_space *vm) static inline bool i915_gem_obj_ggtt_bound(struct drm_i915_gem_object *obj) { - return i915_gem_obj_bound(obj, obj_to_ggtt(obj)); + return i915_gem_obj_bound(obj, i915_obj_to_ggtt(obj)); } static inline unsigned long i915_gem_obj_ggtt_offset(struct drm_i915_gem_object *obj) { - return i915_gem_obj_offset(obj, obj_to_ggtt(obj)); + return i915_gem_obj_offset(obj, i915_obj_to_ggtt(obj)); } static inline unsigned long i915_gem_obj_ggtt_size(struct drm_i915_gem_object *obj) { - return i915_gem_obj_size(obj, obj_to_ggtt(obj)); + return i915_gem_obj_size(obj, i915_obj_to_ggtt(obj)); } static inline int __must_check @@ -2543,7 +2543,8 @@ i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj, uint32_t alignment, unsigned flags) { - return i915_gem_object_pin(obj, obj_to_ggtt(obj), alignment, flags | PIN_GLOBAL); + return i915_gem_object_pin(obj, i915_obj_to_ggtt(obj), + alignment, flags | PIN_GLOBAL); } static inline int diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e3e30cd474be..c9d1396781e2 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -5252,7 +5252,7 @@ struct i915_vma *i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj) struct i915_vma *vma; vma = list_first_entry(&obj->vma_list, typeof(*vma), vma_link); - if (vma->vm != obj_to_ggtt(obj)) + if (vma->vm != i915_obj_to_ggtt(obj)) return NULL; return vma; -- cgit v1.2.3 From 6c5566a82c6fb1da9e13a294f23d4cd85a08cb30 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 6 Aug 2014 15:04:50 +0200 Subject: drm/i915: Allow i915_gem_setup_global_gtt to fail We already needs this just as a safety check in case the preallocation reservation dance fails. But we definitely need this to be able to move tha aliasing ppgtt setup back out of the context code to this place, where it belongs. Reviewed-by: Michel Thierry Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 7 ++++++- drivers/gpu/drm/i915/i915_gem_gtt.c | 16 ++++++++++------ drivers/gpu/drm/i915/i915_gem_gtt.h | 4 ++-- 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c9d1396781e2..c8404a439502 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4754,7 +4754,12 @@ int i915_gem_init(struct drm_device *dev) dev_priv->gt.stop_ring = intel_logical_ring_stop; } - i915_gem_init_userptr(dev); + ret = i915_gem_init_userptr(dev); + if (ret) { + mutex_unlock(&dev->struct_mutex); + return ret; + } + i915_gem_init_global_gtt(dev); ret = i915_gem_context_init(dev); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 48d8f4a21c3f..d228f839ca4f 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1698,10 +1698,10 @@ static void i915_gtt_color_adjust(struct drm_mm_node *node, } } -void i915_gem_setup_global_gtt(struct drm_device *dev, - unsigned long start, - unsigned long mappable_end, - unsigned long end) +int i915_gem_setup_global_gtt(struct drm_device *dev, + unsigned long start, + unsigned long mappable_end, + unsigned long end) { /* Let GEM Manage all of the aperture. * @@ -1734,8 +1734,10 @@ void i915_gem_setup_global_gtt(struct drm_device *dev, WARN_ON(i915_gem_obj_ggtt_bound(obj)); ret = drm_mm_reserve_node(&ggtt_vm->mm, &vma->node); - if (ret) - DRM_DEBUG_KMS("Reservation failed\n"); + if (ret) { + DRM_DEBUG_KMS("Reservation failed: %i\n", ret); + return ret; + } obj->has_global_gtt_mapping = 1; } @@ -1752,6 +1754,8 @@ void i915_gem_setup_global_gtt(struct drm_device *dev, /* And finally clear the reserved guard page */ ggtt_vm->clear_range(ggtt_vm, end - PAGE_SIZE, PAGE_SIZE, true); + + return 0; } void i915_gem_init_global_gtt(struct drm_device *dev) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 90ff45246b62..0eb0dddff76b 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -271,8 +271,8 @@ struct i915_hw_ppgtt { int i915_gem_gtt_init(struct drm_device *dev); void i915_gem_init_global_gtt(struct drm_device *dev); -void i915_gem_setup_global_gtt(struct drm_device *dev, unsigned long start, - unsigned long mappable_end, unsigned long end); +int i915_gem_setup_global_gtt(struct drm_device *dev, unsigned long start, + unsigned long mappable_end, unsigned long end); int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt); -- cgit v1.2.3 From 896ab1a5d54269b463a24194c2e4a369103b46d8 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 6 Aug 2014 15:04:51 +0200 Subject: drm/i915: Fix up checks for aliasing ppgtt A subsequent patch will no longer initialize the aliasing ppgtt if we have full ppgtt enabled, since we simply don't need that any more. Unfortunately a few places check for the aliasing ppgtt instead of checking for ppgtt in general. Fix them up. One special case are the gtt offset and size macros, which have some code to remap the aliasing ppgtt to the global gtt. The aliasing ppgtt is _not_ a logical address space, so passing that in as the vm is plain and simple a bug. So just WARN about it and carry on - we have a gracefully fall-through anyway if we can't find the vma. Reviewed-by: Michel Thierry Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_cmd_parser.c | 4 +--- drivers/gpu/drm/i915/i915_dma.c | 2 +- drivers/gpu/drm/i915/i915_gem.c | 8 ++------ drivers/gpu/drm/i915/intel_ringbuffer.c | 4 +--- 4 files changed, 5 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index dea99d92fb4a..c45856bcc8b9 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -842,8 +842,6 @@ finish: */ bool i915_needs_cmd_parser(struct intel_engine_cs *ring) { - struct drm_i915_private *dev_priv = ring->dev->dev_private; - if (!ring->needs_cmd_parser) return false; @@ -852,7 +850,7 @@ bool i915_needs_cmd_parser(struct intel_engine_cs *ring) * disabled. That will cause all of the parser's PPGTT checks to * fail. For now, disable parsing when PPGTT is off. */ - if (!dev_priv->mm.aliasing_ppgtt) + if (USES_PPGTT(ring->dev)) return false; return (i915.enable_cmd_parser == 1); diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 1763fbf34e1d..895f9f2f35ea 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -999,7 +999,7 @@ static int i915_getparam(struct drm_device *dev, void *data, value = HAS_WT(dev); break; case I915_PARAM_HAS_ALIASING_PPGTT: - value = dev_priv->mm.aliasing_ppgtt || USES_FULL_PPGTT(dev); + value = USES_PPGTT(dev); break; case I915_PARAM_HAS_WAIT_TIMEOUT: value = 1; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c8404a439502..6a7795097017 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -5106,9 +5106,7 @@ unsigned long i915_gem_obj_offset(struct drm_i915_gem_object *o, struct drm_i915_private *dev_priv = o->base.dev->dev_private; struct i915_vma *vma; - if (!dev_priv->mm.aliasing_ppgtt || - vm == &dev_priv->mm.aliasing_ppgtt->base) - vm = &dev_priv->gtt.base; + WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); list_for_each_entry(vma, &o->vma_list, vma_link) { if (vma->vm == vm) @@ -5149,9 +5147,7 @@ unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o, struct drm_i915_private *dev_priv = o->base.dev->dev_private; struct i915_vma *vma; - if (!dev_priv->mm.aliasing_ppgtt || - vm == &dev_priv->mm.aliasing_ppgtt->base) - vm = &dev_priv->gtt.base; + WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); BUG_ON(list_empty(&o->vma_list)); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 4236014c1cda..13543f8528c2 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2006,9 +2006,7 @@ gen8_ring_dispatch_execbuffer(struct intel_engine_cs *ring, u64 offset, u32 len, unsigned flags) { - struct drm_i915_private *dev_priv = ring->dev->dev_private; - bool ppgtt = dev_priv->mm.aliasing_ppgtt != NULL && - !(flags & I915_DISPATCH_SECURE); + bool ppgtt = USES_PPGTT(ring->dev) && !(flags & I915_DISPATCH_SECURE); int ret; ret = intel_ring_begin(ring, 4); -- cgit v1.2.3 From 82460d97246a993aa49e88bf9b4154cce60f8da8 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 6 Aug 2014 20:19:53 +0200 Subject: drm/i915: Rework ppgtt init to no require an aliasing ppgtt Currently we abuse the aliasing ppgtt to set up the ppgtt support in general. Which is a bit backwards since with full ppgtt we don't ever need the aliasing ppgtt. So untangle this and separate the ppgtt init from the aliasing ppgtt. While at it drag it out of the context enabling (which just does a switch to the default context). Note that we still have the differentiation between synchronous and asynchronous ppgtt setup, but that will soon vanish. So also correctly wire up the return value handling to be prepared for when ->switch_mm drops the synchronous parameter and could start to fail. Reviewed-by: Michel Thierry Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 8 ++++ drivers/gpu/drm/i915/i915_gem_context.c | 7 --- drivers/gpu/drm/i915/i915_gem_gtt.c | 84 +++++++++++++-------------------- drivers/gpu/drm/i915/i915_gem_gtt.h | 1 + 4 files changed, 42 insertions(+), 58 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 6a7795097017..6c2f0b886eb0 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4719,6 +4719,14 @@ i915_gem_init_hw(struct drm_device *dev) if (ret && ret != -EIO) { DRM_ERROR("Context enable failed %d\n", ret); i915_gem_cleanup_ringbuffer(dev); + + return ret; + } + + ret = i915_ppgtt_init_hw(dev); + if (ret && ret != -EIO) { + DRM_ERROR("PPGTT enable failed %d\n", ret); + i915_gem_cleanup_ringbuffer(dev); } return ret; diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 90665872734d..eece059a8447 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -436,13 +436,6 @@ int i915_gem_context_enable(struct drm_i915_private *dev_priv) struct intel_engine_cs *ring; int ret, i; - /* This is the only place the aliasing PPGTT gets enabled, which means - * it has to happen before we bail on reset */ - if (dev_priv->mm.aliasing_ppgtt) { - struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; - ppgtt->enable(ppgtt); - } - /* FIXME: We should make this work, even in reset */ if (i915_reset_in_progress(&dev_priv->gpu_error)) return 0; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index d228f839ca4f..b7dcf72126f9 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -67,7 +67,6 @@ static void ppgtt_bind_vma(struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags); static void ppgtt_unbind_vma(struct i915_vma *vma); -static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt); static inline gen8_gtt_pte_t gen8_pte_encode(dma_addr_t addr, enum i915_cache_level level, @@ -604,7 +603,6 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size) kunmap_atomic(pd_vaddr); } - ppgtt->enable = gen8_ppgtt_enable; ppgtt->switch_mm = gen8_mm_switch; ppgtt->base.clear_range = gen8_ppgtt_clear_range; ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; @@ -825,39 +823,20 @@ static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt, return 0; } -static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt) +static void gen8_ppgtt_enable(struct drm_device *dev) { - struct drm_device *dev = ppgtt->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_engine_cs *ring; - int j, ret; + int j; for_each_ring(ring, dev_priv, j) { I915_WRITE(RING_MODE_GEN7(ring), _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); - - /* We promise to do a switch later with FULL PPGTT. If this is - * aliasing, this is the one and only switch we'll do */ - if (USES_FULL_PPGTT(dev)) - continue; - - ret = ppgtt->switch_mm(ppgtt, ring, true); - if (ret) - goto err_out; } - - return 0; - -err_out: - for_each_ring(ring, dev_priv, j) - I915_WRITE(RING_MODE_GEN7(ring), - _MASKED_BIT_DISABLE(GFX_PPGTT_ENABLE)); - return ret; } -static int gen7_ppgtt_enable(struct i915_hw_ppgtt *ppgtt) +static void gen7_ppgtt_enable(struct drm_device *dev) { - struct drm_device *dev = ppgtt->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_engine_cs *ring; uint32_t ecochk, ecobits; @@ -876,31 +855,16 @@ static int gen7_ppgtt_enable(struct i915_hw_ppgtt *ppgtt) I915_WRITE(GAM_ECOCHK, ecochk); for_each_ring(ring, dev_priv, i) { - int ret; /* GFX_MODE is per-ring on gen7+ */ I915_WRITE(RING_MODE_GEN7(ring), _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); - - /* We promise to do a switch later with FULL PPGTT. If this is - * aliasing, this is the one and only switch we'll do */ - if (USES_FULL_PPGTT(dev)) - continue; - - ret = ppgtt->switch_mm(ppgtt, ring, true); - if (ret) - return ret; } - - return 0; } -static int gen6_ppgtt_enable(struct i915_hw_ppgtt *ppgtt) +static void gen6_ppgtt_enable(struct drm_device *dev) { - struct drm_device *dev = ppgtt->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; - struct intel_engine_cs *ring; uint32_t ecochk, gab_ctl, ecobits; - int i; ecobits = I915_READ(GAC_ECO_BITS); I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT | @@ -913,14 +877,6 @@ static int gen6_ppgtt_enable(struct i915_hw_ppgtt *ppgtt) I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B); I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); - - for_each_ring(ring, dev_priv, i) { - int ret = ppgtt->switch_mm(ppgtt, ring, true); - if (ret) - return ret; - } - - return 0; } /* PPGTT support for Sandybdrige/Gen6 and later */ @@ -1140,13 +1096,10 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->base.pte_encode = dev_priv->gtt.base.pte_encode; if (IS_GEN6(dev)) { - ppgtt->enable = gen6_ppgtt_enable; ppgtt->switch_mm = gen6_mm_switch; } else if (IS_HASWELL(dev)) { - ppgtt->enable = gen7_ppgtt_enable; ppgtt->switch_mm = hsw_mm_switch; } else if (IS_GEN7(dev)) { - ppgtt->enable = gen7_ppgtt_enable; ppgtt->switch_mm = gen7_mm_switch; } else BUG(); @@ -1211,6 +1164,35 @@ int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) return ret; } +int i915_ppgtt_init_hw(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_engine_cs *ring; + struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; + int i, ret = 0; + + if (!USES_PPGTT(dev)) + return 0; + + if (IS_GEN6(dev)) + gen6_ppgtt_enable(dev); + else if (IS_GEN7(dev)) + gen7_ppgtt_enable(dev); + else if (INTEL_INFO(dev)->gen >= 8) + gen8_ppgtt_enable(dev); + else + WARN_ON(1); + + if (ppgtt) { + for_each_ring(ring, dev_priv, i) { + ret = ppgtt->switch_mm(ppgtt, ring, true); + if (ret != 0) + return ret; + } + } + + return ret; +} struct i915_hw_ppgtt * i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv) { diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 0eb0dddff76b..98fbb7309ea5 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -276,6 +276,7 @@ int i915_gem_setup_global_gtt(struct drm_device *dev, unsigned long start, int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt); +int i915_ppgtt_init_hw(struct drm_device *dev); void i915_ppgtt_release(struct kref *kref); struct i915_hw_ppgtt *i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv); -- cgit v1.2.3 From fa76da3499f1789f0e37d3bbcdc320bdf47c89ca Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 6 Aug 2014 20:19:54 +0200 Subject: drm/i915: Initialize the aliasing ppgtt as part of global gtt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stuffing this into the context setup code doesn't make a lot of sense. Also reusing the real ppgtt setup code makes even less sense since the aliasing ppgtt isn't a real address space. Leaving all that stuff unitialized will make sure that we catch any abusers promptly. This is also a prep work to clean up the context->ppgtt link. v2: Fix up the logic fail, I've fumbled it so badly to completely disable ppgtt on gen6. Spotted by Ville and Michel. Also move around the pde write into the gen6 init function, since otherwise it won't work at all. v3: Only initialize the aliasing ppgtt when we actually enable it. Cc: "Thierry, Michel" Cc: Ville Syrjälä Reviewed-by: Michel Thierry [danvet: Squash in fixup from Fengguang Wu.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_context.c | 13 +--------- drivers/gpu/drm/i915/i915_gem_gtt.c | 42 +++++++++++++++++++++++---------- 2 files changed, 31 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index eece059a8447..7093f5df9ee7 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -279,17 +279,6 @@ i915_gem_create_context(struct drm_device *dev, goto err_unpin; } else ctx->vm = &ppgtt->base; - - /* This case is reserved for the global default context and - * should only happen once. */ - if (is_global_default_ctx) { - if (WARN_ON(dev_priv->mm.aliasing_ppgtt)) { - ret = -EEXIST; - goto err_unpin; - } - - dev_priv->mm.aliasing_ppgtt = ppgtt; - } } else if (USES_PPGTT(dev)) { /* For platforms which only have aliasing PPGTT, we fake the * address space and refcounting. */ @@ -368,7 +357,7 @@ int i915_gem_context_init(struct drm_device *dev) } } - ctx = i915_gem_create_context(dev, NULL, USES_PPGTT(dev)); + ctx = i915_gem_create_context(dev, NULL, USES_FULL_PPGTT(dev)); if (IS_ERR(ctx)) { DRM_ERROR("Failed to create default global context (error %ld)\n", PTR_ERR(ctx)); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index b7dcf72126f9..b033ff770ac4 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1130,35 +1130,38 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->node.size >> 20, ppgtt->node.start / PAGE_SIZE); + gen6_write_pdes(ppgtt); + DRM_DEBUG("Adding PPGTT at offset %x\n", + ppgtt->pd_offset << 10); + return 0; } -int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) +static int __hw_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) { struct drm_i915_private *dev_priv = dev->dev_private; - int ret = 0; ppgtt->base.dev = dev; ppgtt->base.scratch = dev_priv->gtt.base.scratch; if (INTEL_INFO(dev)->gen < 8) - ret = gen6_ppgtt_init(ppgtt); + return gen6_ppgtt_init(ppgtt); else if (IS_GEN8(dev)) - ret = gen8_ppgtt_init(ppgtt, dev_priv->gtt.base.total); + return gen8_ppgtt_init(ppgtt, dev_priv->gtt.base.total); else BUG(); +} +int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + int ret = 0; - if (!ret) { - struct drm_i915_private *dev_priv = dev->dev_private; + ret = __hw_ppgtt_init(dev, ppgtt); + if (ret == 0) { kref_init(&ppgtt->ref); drm_mm_init(&ppgtt->base.mm, ppgtt->base.start, ppgtt->base.total); i915_init_vm(dev_priv, &ppgtt->base); - if (INTEL_INFO(dev)->gen < 8) { - gen6_write_pdes(ppgtt); - DRM_DEBUG("Adding PPGTT at offset %x\n", - ppgtt->pd_offset << 10); - } } return ret; @@ -1699,6 +1702,7 @@ int i915_gem_setup_global_gtt(struct drm_device *dev, struct drm_mm_node *entry; struct drm_i915_gem_object *obj; unsigned long hole_start, hole_end; + int ret; BUG_ON(mappable_end > end); @@ -1710,7 +1714,7 @@ int i915_gem_setup_global_gtt(struct drm_device *dev, /* Mark any preallocated objects as occupied */ list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { struct i915_vma *vma = i915_gem_obj_to_vma(obj, ggtt_vm); - int ret; + DRM_DEBUG_KMS("reserving preallocated space: %lx + %zx\n", i915_gem_obj_ggtt_offset(obj), obj->base.size); @@ -1737,6 +1741,20 @@ int i915_gem_setup_global_gtt(struct drm_device *dev, /* And finally clear the reserved guard page */ ggtt_vm->clear_range(ggtt_vm, end - PAGE_SIZE, PAGE_SIZE, true); + if (USES_PPGTT(dev) && !USES_FULL_PPGTT(dev)) { + struct i915_hw_ppgtt *ppgtt; + + ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); + if (!ppgtt) + return -ENOMEM; + + ret = __hw_ppgtt_init(dev, ppgtt); + if (ret != 0) + return ret; + + dev_priv->mm.aliasing_ppgtt = ppgtt; + } + return 0; } -- cgit v1.2.3 From ae6c4806927b8b0781ecc187aa16b10c820fc430 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 6 Aug 2014 15:04:53 +0200 Subject: drm/i915: Only track real ppgtt for a context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There's a bit a confusion since we track the global gtt, the aliasing and real ppgtt in the ctx->vm pointer. And not all callers really bother to check for the different cases and just presume that it points to a real ppgtt. Now looking closely we don't actually need ->vm to always point at an address space - the only place that cares actually has fixup code already to decide whether to look at the per-proces or the global address space. So switch to just tracking the ppgtt directly and ditch all the extraneous code. v2: Fixup the ppgtt debugfs file to not oops on a NULL ctx->ppgtt. Also drop the early exit - without aliasing ppgtt we want to dump all the ppgtts of the contexts if we have full ppgtt. v3: Actually git add the compile fix. Reviewed-by: Michel Thierry Cc: "Thierry, Michel" Cc: Ville Syrjälä OTC-Jira: VIZ-3724 [danvet: Resolve conflicts with execlist patches while applying.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 11 +++++++--- drivers/gpu/drm/i915/i915_drv.h | 3 +-- drivers/gpu/drm/i915/i915_gem_context.c | 33 +++++++++--------------------- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 5 +++-- drivers/gpu/drm/i915/i915_gpu_error.c | 10 ++++++--- drivers/gpu/drm/i915/intel_lrc.c | 2 +- 6 files changed, 30 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 1c3a9943a742..d42db6bc34e0 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1821,7 +1821,13 @@ static int per_file_ctx(int id, void *ptr, void *data) { struct intel_context *ctx = ptr; struct seq_file *m = data; - struct i915_hw_ppgtt *ppgtt = ctx_to_ppgtt(ctx); + struct i915_hw_ppgtt *ppgtt = ctx->ppgtt; + + if (!ppgtt) { + seq_printf(m, " no ppgtt for context %d\n", + ctx->user_handle); + return 0; + } if (i915_gem_context_is_default(ctx)) seq_puts(m, " default context:\n"); @@ -1881,8 +1887,7 @@ static void gen6_ppgtt_info(struct seq_file *m, struct drm_device *dev) seq_printf(m, "pd gtt offset: 0x%08x\n", ppgtt->pd_offset); ppgtt->debug_dump(ppgtt, m); - } else - return; + } list_for_each_entry_reverse(file, &dev->filelist, lhead) { struct drm_i915_file_private *file_priv = file->driver_priv; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 2db22732c7ae..eb99a109c0bc 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -624,7 +624,7 @@ struct intel_context { uint8_t remap_slice; struct drm_i915_file_private *file_priv; struct i915_ctx_hang_stats hang_stats; - struct i915_address_space *vm; + struct i915_hw_ppgtt *ppgtt; /* Legacy ring buffer submission */ struct { @@ -2556,7 +2556,6 @@ i915_gem_object_ggtt_unbind(struct drm_i915_gem_object *obj) void i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj); /* i915_gem_context.c */ -#define ctx_to_ppgtt(ctx) container_of((ctx)->vm, struct i915_hw_ppgtt, base) int __must_check i915_gem_context_init(struct drm_device *dev); void i915_gem_context_fini(struct drm_device *dev); void i915_gem_context_reset(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 7093f5df9ee7..206bf2d6c554 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -135,19 +135,13 @@ static int get_context_size(struct drm_device *dev) void i915_gem_context_free(struct kref *ctx_ref) { struct intel_context *ctx = container_of(ctx_ref, - typeof(*ctx), ref); - struct i915_hw_ppgtt *ppgtt = NULL; + typeof(*ctx), ref); - if (i915.enable_execlists) { - ppgtt = ctx_to_ppgtt(ctx); + if (i915.enable_execlists) intel_lr_context_free(ctx); - } else if (ctx->legacy_hw_ctx.rcs_state) { - /* We refcount even the aliasing PPGTT to keep the code symmetric */ - if (USES_PPGTT(ctx->legacy_hw_ctx.rcs_state->base.dev)) - ppgtt = ctx_to_ppgtt(ctx); - } - i915_ppgtt_put(ppgtt); + i915_ppgtt_put(ctx->ppgtt); + if (ctx->legacy_hw_ctx.rcs_state) drm_gem_object_unreference(&ctx->legacy_hw_ctx.rcs_state->base); list_del(&ctx->link); @@ -243,7 +237,6 @@ i915_gem_create_context(struct drm_device *dev, bool create_vm) { const bool is_global_default_ctx = file_priv == NULL; - struct drm_i915_private *dev_priv = dev->dev_private; struct intel_context *ctx; int ret = 0; @@ -277,15 +270,10 @@ i915_gem_create_context(struct drm_device *dev, PTR_ERR(ppgtt)); ret = PTR_ERR(ppgtt); goto err_unpin; - } else - ctx->vm = &ppgtt->base; - } else if (USES_PPGTT(dev)) { - /* For platforms which only have aliasing PPGTT, we fake the - * address space and refcounting. */ - ctx->vm = &dev_priv->mm.aliasing_ppgtt->base; - i915_ppgtt_get(dev_priv->mm.aliasing_ppgtt); - } else - ctx->vm = &dev_priv->gtt.base; + } + + ctx->ppgtt = ppgtt; + } return ctx; @@ -543,7 +531,6 @@ static int do_switch(struct intel_engine_cs *ring, { struct drm_i915_private *dev_priv = ring->dev->dev_private; struct intel_context *from = ring->last_context; - struct i915_hw_ppgtt *ppgtt = ctx_to_ppgtt(to); u32 hw_flags = 0; bool uninitialized = false; int ret, i; @@ -571,8 +558,8 @@ static int do_switch(struct intel_engine_cs *ring, */ from = ring->last_context; - if (USES_FULL_PPGTT(ring->dev)) { - ret = ppgtt->switch_mm(ppgtt, ring, false); + if (to->ppgtt) { + ret = to->ppgtt->switch_mm(to->ppgtt, ring, false); if (ret) goto unpin_out; } diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 7e04a4825ed0..1a0611bb576b 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1315,8 +1315,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, i915_gem_context_reference(ctx); - vm = ctx->vm; - if (!USES_FULL_PPGTT(dev)) + if (ctx->ppgtt) + vm = &ctx->ppgtt->base; + else vm = &dev_priv->gtt.base; eb = eb_create(args); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index eab41f9390f8..fc11ac6b0373 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -967,6 +967,12 @@ static void i915_gem_record_rings(struct drm_device *dev, request = i915_gem_find_active_request(ring); if (request) { + struct i915_address_space *vm; + + vm = request->ctx && request->ctx->ppgtt ? + &request->ctx->ppgtt->base : + &dev_priv->gtt.base; + /* We need to copy these to an anonymous buffer * as the simplest method to avoid being overwritten * by userspace. @@ -974,9 +980,7 @@ static void i915_gem_record_rings(struct drm_device *dev, error->ring[i].batchbuffer = i915_error_object_create(dev_priv, request->batch_obj, - request->ctx ? - request->ctx->vm : - &dev_priv->gtt.base); + vm); if (HAS_BROKEN_CS_TLB(dev_priv->dev) && ring->scratch.obj) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 6f1b64e01a05..6b5f416b5c0d 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -904,7 +904,7 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o struct intel_engine_cs *ring, struct intel_ringbuffer *ringbuf) { struct drm_i915_gem_object *ring_obj = ringbuf->obj; - struct i915_hw_ppgtt *ppgtt = ctx_to_ppgtt(ctx); + struct i915_hw_ppgtt *ppgtt = ctx->ppgtt; struct page *page; uint32_t *reg_state; int ret; -- cgit v1.2.3 From d624d86e1e3b69cadb2dad42588e71e9a3b6d70a Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 6 Aug 2014 15:04:54 +0200 Subject: drm/i915: Drop create_vm argument to i915_gem_create_context Now that all the flow is streamlined the rule is simple: We create a new ppgtt for a new context when we have full ppgtt enabled. Reviewed-by: Michel Thierry Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_context.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 206bf2d6c554..9683e62ec61a 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -233,8 +233,7 @@ err_out: */ static struct intel_context * i915_gem_create_context(struct drm_device *dev, - struct drm_i915_file_private *file_priv, - bool create_vm) + struct drm_i915_file_private *file_priv) { const bool is_global_default_ctx = file_priv == NULL; struct intel_context *ctx; @@ -262,7 +261,7 @@ i915_gem_create_context(struct drm_device *dev, } } - if (create_vm) { + if (USES_FULL_PPGTT(dev)) { struct i915_hw_ppgtt *ppgtt = i915_ppgtt_create(dev, file_priv); if (IS_ERR_OR_NULL(ppgtt)) { @@ -345,7 +344,7 @@ int i915_gem_context_init(struct drm_device *dev) } } - ctx = i915_gem_create_context(dev, NULL, USES_FULL_PPGTT(dev)); + ctx = i915_gem_create_context(dev, NULL); if (IS_ERR(ctx)) { DRM_ERROR("Failed to create default global context (error %ld)\n", PTR_ERR(ctx)); @@ -444,7 +443,7 @@ int i915_gem_context_open(struct drm_device *dev, struct drm_file *file) idr_init(&file_priv->context_idr); mutex_lock(&dev->struct_mutex); - ctx = i915_gem_create_context(dev, file_priv, USES_FULL_PPGTT(dev)); + ctx = i915_gem_create_context(dev, file_priv); mutex_unlock(&dev->struct_mutex); if (IS_ERR(ctx)) { @@ -702,7 +701,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, if (ret) return ret; - ctx = i915_gem_create_context(dev, file_priv, USES_FULL_PPGTT(dev)); + ctx = i915_gem_create_context(dev, file_priv); mutex_unlock(&dev->struct_mutex); if (IS_ERR(ctx)) return PTR_ERR(ctx); -- cgit v1.2.3 From 19dd120ceee085dbac70b1b01bd09d599cf87bd0 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 6 Aug 2014 15:04:55 +0200 Subject: drm/i915: Extract common cleanup into i915_ppgtt_release Address space cleanup isn't really a job for the low-level cleanup callbacks. Without this change we can't reuse the low-level cleanup callback for the aliasing ppgtt cleanup. Reviewed-by: Michel Thierry Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_gtt.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index b033ff770ac4..c1e2d9a9f9e2 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -391,9 +391,6 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm) struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt, base); - list_del(&vm->global_link); - drm_mm_takedown(&vm->mm); - gen8_ppgtt_unmap_pages(ppgtt); gen8_ppgtt_free(ppgtt); } @@ -974,8 +971,6 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm) struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt, base); - list_del(&vm->global_link); - drm_mm_takedown(&ppgtt->base.mm); drm_mm_remove_node(&ppgtt->node); gen6_ppgtt_unmap_pages(ppgtt); @@ -1226,6 +1221,9 @@ void i915_ppgtt_release(struct kref *kref) WARN_ON(!list_empty(&ppgtt->base.active_list)); WARN_ON(!list_empty(&ppgtt->base.inactive_list)); + list_del(&ppgtt->base.global_link); + drm_mm_takedown(&ppgtt->base.mm); + ppgtt->base.cleanup(&ppgtt->base); kfree(ppgtt); } -- cgit v1.2.3 From 90d0a0e8d0e64c92c4a6147f3c7cdc7c544d6b1a Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 6 Aug 2014 15:04:56 +0200 Subject: drm/i915: Extract commmon global gtt cleanup code We want to move the aliasing ppgtt cleanup back into the global gtt cleanup code for symmetry, but first we need to create such a place. Reviewed-by: Michel Thierry Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_dma.c | 4 ++-- drivers/gpu/drm/i915/i915_gem_gtt.c | 20 ++++++++++++-------- drivers/gpu/drm/i915/i915_gem_gtt.h | 1 + 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 895f9f2f35ea..c9af48503f76 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1821,7 +1821,7 @@ out_mtrrfree: arch_phys_wc_del(dev_priv->gtt.mtrr); io_mapping_free(dev_priv->gtt.mappable); out_gtt: - dev_priv->gtt.base.cleanup(&dev_priv->gtt.base); + i915_global_gtt_cleanup(dev); out_regs: intel_uncore_fini(dev); pci_iounmap(dev->pdev, dev_priv->regs); @@ -1920,7 +1920,7 @@ int i915_driver_unload(struct drm_device *dev) destroy_workqueue(dev_priv->wq); pm_qos_remove_request(&dev_priv->pm_qos); - dev_priv->gtt.base.cleanup(&dev_priv->gtt.base); + i915_global_gtt_cleanup(dev); intel_uncore_fini(dev); if (dev_priv->regs != NULL) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index c1e2d9a9f9e2..8d94e3582045 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1767,6 +1767,18 @@ void i915_gem_init_global_gtt(struct drm_device *dev) i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size); } +void i915_global_gtt_cleanup(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct i915_address_space *vm = &dev_priv->gtt.base; + + if (drm_mm_initialized(&vm->mm)) { + drm_mm_takedown(&vm->mm); + list_del(&vm->global_link); + } + + vm->cleanup(vm); +} static int setup_scratch_page(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -2035,10 +2047,6 @@ static void gen6_gmch_remove(struct i915_address_space *vm) struct i915_gtt *gtt = container_of(vm, struct i915_gtt, base); - if (drm_mm_initialized(&vm->mm)) { - drm_mm_takedown(&vm->mm); - list_del(&vm->global_link); - } iounmap(gtt->gsm); teardown_scratch_page(vm->dev); } @@ -2071,10 +2079,6 @@ static int i915_gmch_probe(struct drm_device *dev, static void i915_gmch_remove(struct i915_address_space *vm) { - if (drm_mm_initialized(&vm->mm)) { - drm_mm_takedown(&vm->mm); - list_del(&vm->global_link); - } intel_gmch_remove(); } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 98fbb7309ea5..6280648d4805 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -273,6 +273,7 @@ int i915_gem_gtt_init(struct drm_device *dev); void i915_gem_init_global_gtt(struct drm_device *dev); int i915_gem_setup_global_gtt(struct drm_device *dev, unsigned long start, unsigned long mappable_end, unsigned long end); +void i915_global_gtt_cleanup(struct drm_device *dev); int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt); -- cgit v1.2.3 From 70e32544aa4027b4c27226da32eb3866e7bbbcdc Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 6 Aug 2014 15:04:57 +0200 Subject: drm/i915: Cleanup aliasging ppgtt alongside the global gtt Also remove related WARN_ONs which seem to have been hit since a rather long time. But apperently no one noticed since our module reload is already WARNING-infested :( Reviewed-by: Michel Thierry Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_dma.c | 4 ---- drivers/gpu/drm/i915/i915_gem_gtt.c | 7 +++++++ 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index c9af48503f76..04dd6112865e 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1388,7 +1388,6 @@ cleanup_gem: i915_gem_cleanup_ringbuffer(dev); i915_gem_context_fini(dev); mutex_unlock(&dev->struct_mutex); - WARN_ON(dev_priv->mm.aliasing_ppgtt); cleanup_irq: drm_irq_uninstall(dev); cleanup_gem_stolen: @@ -1901,7 +1900,6 @@ int i915_driver_unload(struct drm_device *dev) mutex_lock(&dev->struct_mutex); i915_gem_cleanup_ringbuffer(dev); i915_gem_context_fini(dev); - WARN_ON(dev_priv->mm.aliasing_ppgtt); mutex_unlock(&dev->struct_mutex); i915_gem_cleanup_stolen(dev); @@ -1909,8 +1907,6 @@ int i915_driver_unload(struct drm_device *dev) i915_free_hws(dev); } - WARN_ON(!list_empty(&dev_priv->vm_list)); - drm_vblank_cleanup(dev); intel_teardown_gmbus(dev); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 8d94e3582045..d97b280861ee 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1772,6 +1772,12 @@ void i915_global_gtt_cleanup(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; struct i915_address_space *vm = &dev_priv->gtt.base; + if (dev_priv->mm.aliasing_ppgtt) { + struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; + + ppgtt->base.cleanup(&ppgtt->base); + } + if (drm_mm_initialized(&vm->mm)) { drm_mm_takedown(&vm->mm); list_del(&vm->global_link); @@ -1779,6 +1785,7 @@ void i915_global_gtt_cleanup(struct drm_device *dev) vm->cleanup(vm); } + static int setup_scratch_page(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; -- cgit v1.2.3 From a08a42ad441e113f87308e0844049cb881f1ac1d Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 12 Aug 2014 19:39:52 +0300 Subject: drm/i915: Don't try to enable cursor from setplane when crtc is disabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make sure the cursor gets fully clipped when enabling it on a disabled crtc via setplane. This will prevent the lower level code from attempting to enable the cursor in hardware. Cc: Paulo Zanoni Signed-off-by: Ville Syrjälä Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 1bd1aa21a8e9..a93d5ffd3863 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11743,8 +11743,8 @@ intel_cursor_plane_update(struct drm_plane *plane, struct drm_crtc *crtc, }; const struct drm_rect clip = { /* integer pixels */ - .x2 = intel_crtc->config.pipe_src_w, - .y2 = intel_crtc->config.pipe_src_h, + .x2 = intel_crtc->active ? intel_crtc->config.pipe_src_w : 0, + .y2 = intel_crtc->active ? intel_crtc->config.pipe_src_h : 0, }; bool visible; int ret; -- cgit v1.2.3 From d7ce484eeec43079ad842f1d351f53998ed6bb30 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 12 Aug 2014 19:39:53 +0300 Subject: drm/i915: Move CURSIZE setup to i845_update_cursor() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CURSIZE register exists on 845/865 only, so move it to i845_update_cursor(). Changes to cursor size must be done only when the cursor is disabled, so do the write just before enabling the cursor. Signed-off-by: Ville Syrjälä Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index a93d5ffd3863..cd3dc3b72798 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -8070,6 +8070,7 @@ static void i845_update_cursor(struct drm_crtc *crtc, u32 base) CURSOR_GAMMA_ENABLE | CURSOR_FORMAT_ARGB); if (intel_crtc->cursor_cntl != cntl) { + I915_WRITE(CURSIZE, (64 << 12) | 64); I915_WRITE(_CURACNTR, cntl); POSTING_READ(_CURACNTR); intel_crtc->cursor_cntl = cntl; @@ -8219,7 +8220,6 @@ static int intel_crtc_cursor_set_obj(struct drm_crtc *crtc, uint32_t width, uint32_t height) { struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); enum pipe pipe = intel_crtc->pipe; unsigned old_width; @@ -8292,9 +8292,6 @@ static int intel_crtc_cursor_set_obj(struct drm_crtc *crtc, addr = obj->phys_handle->busaddr; } - if (IS_GEN2(dev)) - I915_WRITE(CURSIZE, (height << 12) | width); - finish: if (intel_crtc->cursor_bo) { if (!INTEL_INFO(dev)->cursor_needs_physical) -- cgit v1.2.3 From 8ac5466926daef2406f7b25e9a272567cb81adb0 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 12 Aug 2014 19:39:54 +0300 Subject: drm/i915: Unify ivb_update_cursor() and i9xx_update_cursor() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ever since commit 5efb3e2838536832c9b6872512e6b6daf592cee9 Author: Ville Syrjälä Date: Wed Apr 9 13:28:53 2014 +0300 drm/i915/chv: Add cursor pipe offsets the only difference between i9xx_update_cursor() and ivb_update_cursor() was the hsw+ pipe csc handling. Let's unify them and we can rid outselves of some duplicated code. Signed-off-by: Ville Syrjälä Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 41 +----------------------------------- 1 file changed, 1 insertion(+), 40 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index cd3dc3b72798..b3fb127131f0 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -8104,43 +8104,6 @@ static void i9xx_update_cursor(struct drm_crtc *crtc, u32 base) } cntl |= pipe << 28; /* Connect to correct pipe */ } - if (intel_crtc->cursor_cntl != cntl) { - I915_WRITE(CURCNTR(pipe), cntl); - POSTING_READ(CURCNTR(pipe)); - intel_crtc->cursor_cntl = cntl; - } - - /* and commit changes on next vblank */ - I915_WRITE(CURBASE(pipe), base); - POSTING_READ(CURBASE(pipe)); -} - -static void ivb_update_cursor(struct drm_crtc *crtc, u32 base) -{ - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = dev->dev_private; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; - uint32_t cntl; - - cntl = 0; - if (base) { - cntl = MCURSOR_GAMMA_ENABLE; - switch (intel_crtc->cursor_width) { - case 64: - cntl |= CURSOR_MODE_64_ARGB_AX; - break; - case 128: - cntl |= CURSOR_MODE_128_ARGB_AX; - break; - case 256: - cntl |= CURSOR_MODE_256_ARGB_AX; - break; - default: - WARN_ON(1); - return; - } - } if (IS_HASWELL(dev) || IS_BROADWELL(dev)) cntl |= CURSOR_PIPE_CSC_ENABLE; @@ -8199,9 +8162,7 @@ static void intel_crtc_update_cursor(struct drm_crtc *crtc, I915_WRITE(CURPOS(pipe), pos); - if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev) || IS_BROADWELL(dev)) - ivb_update_cursor(crtc, base); - else if (IS_845G(dev) || IS_I865G(dev)) + if (IS_845G(dev) || IS_I865G(dev)) i845_update_cursor(crtc, base); else i9xx_update_cursor(crtc, base); -- cgit v1.2.3 From dc41c154ffc30afb7ee7e891140dead26fce5c39 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Wed, 13 Aug 2014 11:57:05 +0300 Subject: drm/i915: Add support for variable cursor size on 845/865 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 845/865 support different cursor sizes as well, albeit a bit differently than later platforms. Add the necessary code to make them work. Untested due to lack of hardware. v2: Warn but accept invalid stride (Chris) Rewrite the cursor size checks for other platforms (Chris) v3: More polish and magic to the cursor size checks (Chris) v4: Moar polish and a comment (Chris) Reviewed-by: Chris Wilson Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 3 +- drivers/gpu/drm/i915/intel_display.c | 111 +++++++++++++++++++++++++++-------- drivers/gpu/drm/i915/intel_drv.h | 1 + 3 files changed, 91 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index f79c20d49d99..203062e93452 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -4128,7 +4128,8 @@ enum punit_power_well { /* Old style CUR*CNTR flags (desktop 8xx) */ #define CURSOR_ENABLE 0x80000000 #define CURSOR_GAMMA_ENABLE 0x40000000 -#define CURSOR_STRIDE_MASK 0x30000000 +#define CURSOR_STRIDE_SHIFT 28 +#define CURSOR_STRIDE(x) ((ffs(x)-9) << CURSOR_STRIDE_SHIFT) /* 256,512,1k,2k */ #define CURSOR_PIPE_CSC_ENABLE (1<<24) #define CURSOR_FORMAT_SHIFT 24 #define CURSOR_FORMAT_MASK (0x07 << CURSOR_FORMAT_SHIFT) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index b3fb127131f0..fd15601f6360 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -8047,30 +8047,55 @@ static void i845_update_cursor(struct drm_crtc *crtc, u32 base) struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - uint32_t cntl; + uint32_t cntl = 0, size = 0; - if (base != intel_crtc->cursor_base) { - /* On these chipsets we can only modify the base whilst - * the cursor is disabled. - */ - if (intel_crtc->cursor_cntl) { - I915_WRITE(_CURACNTR, 0); - POSTING_READ(_CURACNTR); - intel_crtc->cursor_cntl = 0; + if (base) { + unsigned int width = intel_crtc->cursor_width; + unsigned int height = intel_crtc->cursor_height; + unsigned int stride = roundup_pow_of_two(width) * 4; + + switch (stride) { + default: + WARN_ONCE(1, "Invalid cursor width/stride, width=%u, stride=%u\n", + width, stride); + stride = 256; + /* fallthrough */ + case 256: + case 512: + case 1024: + case 2048: + break; } + cntl |= CURSOR_ENABLE | + CURSOR_GAMMA_ENABLE | + CURSOR_FORMAT_ARGB | + CURSOR_STRIDE(stride); + + size = (height << 12) | width; + } + + if (intel_crtc->cursor_cntl != 0 && + (intel_crtc->cursor_base != base || + intel_crtc->cursor_size != size || + intel_crtc->cursor_cntl != cntl)) { + /* On these chipsets we can only modify the base/size/stride + * whilst the cursor is disabled. + */ + I915_WRITE(_CURACNTR, 0); + POSTING_READ(_CURACNTR); + intel_crtc->cursor_cntl = 0; + } + + if (intel_crtc->cursor_base != base) I915_WRITE(_CURABASE, base); - POSTING_READ(_CURABASE); + + if (intel_crtc->cursor_size != size) { + I915_WRITE(CURSIZE, size); + intel_crtc->cursor_size = size; } - /* XXX width must be 64, stride 256 => 0x00 << 28 */ - cntl = 0; - if (base) - cntl = (CURSOR_ENABLE | - CURSOR_GAMMA_ENABLE | - CURSOR_FORMAT_ARGB); if (intel_crtc->cursor_cntl != cntl) { - I915_WRITE(CURSIZE, (64 << 12) | 64); I915_WRITE(_CURACNTR, cntl); POSTING_READ(_CURACNTR); intel_crtc->cursor_cntl = cntl; @@ -8169,6 +8194,43 @@ static void intel_crtc_update_cursor(struct drm_crtc *crtc, intel_crtc->cursor_base = base; } +static bool cursor_size_ok(struct drm_device *dev, + uint32_t width, uint32_t height) +{ + if (width == 0 || height == 0) + return false; + + /* + * 845g/865g are special in that they are only limited by + * the width of their cursors, the height is arbitrary up to + * the precision of the register. Everything else requires + * square cursors, limited to a few power-of-two sizes. + */ + if (IS_845G(dev) || IS_I865G(dev)) { + if ((width & 63) != 0) + return false; + + if (width > (IS_845G(dev) ? 64 : 512)) + return false; + + if (height > 1023) + return false; + } else { + switch (width | height) { + case 256: + case 128: + if (IS_GEN2(dev)) + return false; + case 64: + break; + default: + return false; + } + } + + return true; +} + /* * intel_crtc_cursor_set_obj - Set cursor to specified GEM object * @@ -8183,7 +8245,7 @@ static int intel_crtc_cursor_set_obj(struct drm_crtc *crtc, struct drm_device *dev = crtc->dev; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); enum pipe pipe = intel_crtc->pipe; - unsigned old_width; + unsigned old_width, stride; uint32_t addr; int ret; @@ -8197,14 +8259,13 @@ static int intel_crtc_cursor_set_obj(struct drm_crtc *crtc, } /* Check for which cursor types we support */ - if (!((width == 64 && height == 64) || - (width == 128 && height == 128 && !IS_GEN2(dev)) || - (width == 256 && height == 256 && !IS_GEN2(dev)))) { + if (!cursor_size_ok(dev, width, height)) { DRM_DEBUG("Cursor dimension not supported\n"); return -EINVAL; } - if (obj->base.size < width * height * 4) { + stride = roundup_pow_of_two(width) * 4; + if (obj->base.size < stride * height) { DRM_DEBUG_KMS("buffer is too small\n"); ret = -ENOMEM; goto fail; @@ -11797,6 +11858,7 @@ static void intel_crtc_init(struct drm_device *dev, int pipe) intel_crtc->cursor_base = ~0; intel_crtc->cursor_cntl = ~0; + intel_crtc->cursor_size = ~0; BUG_ON(pipe >= ARRAY_SIZE(dev_priv->plane_to_crtc_mapping) || dev_priv->plane_to_crtc_mapping[intel_crtc->plane] != NULL); @@ -12585,7 +12647,10 @@ void intel_modeset_init(struct drm_device *dev) dev->mode_config.max_height = 8192; } - if (IS_GEN2(dev)) { + if (IS_845G(dev) || IS_I865G(dev)) { + dev->mode_config.cursor_width = IS_845G(dev) ? 64 : 512; + dev->mode_config.cursor_height = 1023; + } else if (IS_GEN2(dev)) { dev->mode_config.cursor_width = GEN2_CURSOR_WIDTH; dev->mode_config.cursor_height = GEN2_CURSOR_HEIGHT; } else { diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 666ca8a044ea..579b1d40f934 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -405,6 +405,7 @@ struct intel_crtc { uint32_t cursor_addr; int16_t cursor_width, cursor_height; uint32_t cursor_cntl; + uint32_t cursor_size; uint32_t cursor_base; struct intel_plane_config plane_config; -- cgit v1.2.3 From 7312e2ddec1ffe4511a85a2814df44e79ded3c1d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 13 Aug 2014 12:14:12 +0100 Subject: drm/i915: Replace __I915__ with typesafe variant Ville pointed out the GCCism __builtin_types_compatible_p() that we could use to replace our heavily casted presumption __I915__ macro that was based on comparing struct sizes. Signed-off-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_dma.c | 3 --- drivers/gpu/drm/i915/i915_drv.h | 12 ++++++++++-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 04dd6112865e..db56b26a08c9 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1595,9 +1595,6 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) if (!drm_core_check_feature(dev, DRIVER_MODESET) && !dev->agp) return -EINVAL; - /* For the ugly agnostic INTEL_INFO macro */ - BUILD_BUG_ON(sizeof(*dev_priv) == sizeof(*dev)); - dev_priv = kzalloc(sizeof(*dev_priv), GFP_KERNEL); if (dev_priv == NULL) return -ENOMEM; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index eb99a109c0bc..2af0071efb38 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2004,8 +2004,16 @@ struct drm_i915_cmd_table { }; /* Note that the (struct drm_i915_private *) cast is just to shut up gcc. */ -#define __I915__(p) ((sizeof(*(p)) == sizeof(struct drm_i915_private)) ? \ - (struct drm_i915_private *)(p) : to_i915(p)) +#define __I915__(p) ({ \ + struct drm_i915_private *__p; \ + if (__builtin_types_compatible_p(typeof(*p), struct drm_i915_private)) \ + __p = (struct drm_i915_private *)p; \ + else if (__builtin_types_compatible_p(typeof(*p), struct drm_device)) \ + __p = to_i915((struct drm_device *)p); \ + else \ + BUILD_BUG(); \ + __p; \ +}) #define INTEL_INFO(p) (&__I915__(p)->info) #define INTEL_DEVID(p) (INTEL_INFO(p)->device_id) -- cgit v1.2.3 From 82e3b8c130f046b7dd1e7898c10e40edb52fee6d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 13 Aug 2014 13:09:46 +0100 Subject: drm/i915: Localise the fbdev console lock frobbing Rather than take and release the console_lock() around a non-existent DRM_I915_FBDEV, move the lock acquisation into the callee where it will be compiled out by the config option entirely. This includes moving the deferred fb_set_suspend() dance and encapsulating it entirely within intel_fbdev.c. v2: Use an integral work item so that we can explicitly flush the work upon suspend/unload. Signed-off-by: Chris Wilson Cc: Daniel Vetter [danvet: Add the flush_work in fbdev_fini per the mailing list discussion. And s/BUG_ON/WARN_ON/ because.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_dma.c | 3 --- drivers/gpu/drm/i915/i915_drv.c | 28 ++---------------------- drivers/gpu/drm/i915/i915_drv.h | 9 +------- drivers/gpu/drm/i915/intel_drv.h | 4 ++-- drivers/gpu/drm/i915/intel_fbdev.c | 44 +++++++++++++++++++++++++++++++++++++- 5 files changed, 48 insertions(+), 40 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index db56b26a08c9..3f676f904f7e 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1350,8 +1350,6 @@ static int i915_load_modeset_init(struct drm_device *dev) if (ret) goto cleanup_irq; - INIT_WORK(&dev_priv->console_resume_work, intel_console_resume); - intel_modeset_gem_init(dev); /* Always safe in the mode setting case. */ @@ -1864,7 +1862,6 @@ int i915_driver_unload(struct drm_device *dev) if (drm_core_check_feature(dev, DRIVER_MODESET)) { intel_fbdev_fini(dev); intel_modeset_cleanup(dev); - cancel_work_sync(&dev_priv->console_resume_work); /* * free the memory space allocated for the child device diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index ec96f9a9724c..01de97776d81 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -558,9 +558,7 @@ static int i915_drm_freeze(struct drm_device *dev) intel_uncore_forcewake_reset(dev, false); intel_opregion_fini(dev); - console_lock(); - intel_fbdev_set_suspend(dev, FBINFO_STATE_SUSPENDED); - console_unlock(); + intel_fbdev_set_suspend(dev, FBINFO_STATE_SUSPENDED, true); dev_priv->suspend_count++; @@ -599,18 +597,6 @@ int i915_suspend(struct drm_device *dev, pm_message_t state) return 0; } -void intel_console_resume(struct work_struct *work) -{ - struct drm_i915_private *dev_priv = - container_of(work, struct drm_i915_private, - console_resume_work); - struct drm_device *dev = dev_priv->dev; - - console_lock(); - intel_fbdev_set_suspend(dev, FBINFO_STATE_RUNNING); - console_unlock(); -} - static int i915_drm_thaw_early(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -681,17 +667,7 @@ static int __i915_drm_thaw(struct drm_device *dev, bool restore_gtt_mappings) intel_opregion_init(dev); - /* - * The console lock can be pretty contented on resume due - * to all the printk activity. Try to keep it out of the hot - * path of resume if possible. - */ - if (console_trylock()) { - intel_fbdev_set_suspend(dev, FBINFO_STATE_RUNNING); - console_unlock(); - } else { - schedule_work(&dev_priv->console_resume_work); - } + intel_fbdev_set_suspend(dev, FBINFO_STATE_RUNNING, false); mutex_lock(&dev_priv->modeset_restore_lock); dev_priv->modeset_restore = MODESET_DONE; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 2af0071efb38..541fb6f295bb 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1586,14 +1586,9 @@ struct drm_i915_private { #ifdef CONFIG_DRM_I915_FBDEV /* list of fbdev register on this device */ struct intel_fbdev *fbdev; + struct work_struct fbdev_suspend_work; #endif - /* - * The console may be contended at resume, but we don't - * want it to block on it. - */ - struct work_struct console_resume_work; - struct drm_property *broadcast_rgb_property; struct drm_property *force_audio_property; @@ -2225,8 +2220,6 @@ extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv); extern void i915_update_gfx_val(struct drm_i915_private *dev_priv); int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on); -extern void intel_console_resume(struct work_struct *work); - /* i915_irq.c */ void i915_queue_hangcheck(struct drm_device *dev); __printf(3, 4) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 579b1d40f934..4ab0d928b819 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -948,7 +948,7 @@ void intel_dvo_init(struct drm_device *dev); extern int intel_fbdev_init(struct drm_device *dev); extern void intel_fbdev_initial_config(struct drm_device *dev); extern void intel_fbdev_fini(struct drm_device *dev); -extern void intel_fbdev_set_suspend(struct drm_device *dev, int state); +extern void intel_fbdev_set_suspend(struct drm_device *dev, int state, bool synchronous); extern void intel_fbdev_output_poll_changed(struct drm_device *dev); extern void intel_fbdev_restore_mode(struct drm_device *dev); #else @@ -965,7 +965,7 @@ static inline void intel_fbdev_fini(struct drm_device *dev) { } -static inline void intel_fbdev_set_suspend(struct drm_device *dev, int state) +static inline void intel_fbdev_set_suspend(struct drm_device *dev, int state, bool synchronous) { } diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index f475414671d8..cf052a39558d 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -26,6 +26,7 @@ #include #include +#include #include #include #include @@ -636,6 +637,15 @@ out: return false; } +static void intel_fbdev_suspend_worker(struct work_struct *work) +{ + intel_fbdev_set_suspend(container_of(work, + struct drm_i915_private, + fbdev_suspend_work)->dev, + FBINFO_STATE_RUNNING, + true); +} + int intel_fbdev_init(struct drm_device *dev) { struct intel_fbdev *ifbdev; @@ -662,6 +672,8 @@ int intel_fbdev_init(struct drm_device *dev) } dev_priv->fbdev = ifbdev; + INIT_WORK(&dev_priv->fbdev_suspend_work, intel_fbdev_suspend_worker); + drm_fb_helper_single_add_all_connectors(&ifbdev->helper); return 0; @@ -682,12 +694,14 @@ void intel_fbdev_fini(struct drm_device *dev) if (!dev_priv->fbdev) return; + flush_work(&dev_priv->fbdev_suspend_work); + intel_fbdev_destroy(dev, dev_priv->fbdev); kfree(dev_priv->fbdev); dev_priv->fbdev = NULL; } -void intel_fbdev_set_suspend(struct drm_device *dev, int state) +void intel_fbdev_set_suspend(struct drm_device *dev, int state, bool synchronous) { struct drm_i915_private *dev_priv = dev->dev_private; struct intel_fbdev *ifbdev = dev_priv->fbdev; @@ -698,6 +712,33 @@ void intel_fbdev_set_suspend(struct drm_device *dev, int state) info = ifbdev->helper.fbdev; + if (synchronous) { + /* Flush any pending work to turn the console on, and then + * wait to turn it off. It must be synchronous as we are + * about to suspend or unload the driver. + * + * Note that from within the work-handler, we cannot flush + * ourselves, so only flush outstanding work upon suspend! + */ + if (state != FBINFO_STATE_RUNNING) + flush_work(&dev_priv->fbdev_suspend_work); + console_lock(); + } else { + /* + * The console lock can be pretty contented on resume due + * to all the printk activity. Try to keep it out of the hot + * path of resume if possible. + */ + WARN_ON(state != FBINFO_STATE_RUNNING); + if (!console_trylock()) { + /* Don't block our own workqueue as this can + * be run in parallel with other i915.ko tasks. + */ + schedule_work(&dev_priv->fbdev_suspend_work); + return; + } + } + /* On resume from hibernation: If the object is shmemfs backed, it has * been restored from swap. If the object is stolen however, it will be * full of whatever garbage was left in there. @@ -706,6 +747,7 @@ void intel_fbdev_set_suspend(struct drm_device *dev, int state) memset_io(info->screen_base, 0, info->screen_size); fb_set_suspend(info, state); + console_unlock(); } void intel_fbdev_output_poll_changed(struct drm_device *dev) -- cgit v1.2.3 From ebc3282409ae4d1e90c2f9608665cc4d8fbf7e73 Mon Sep 17 00:00:00 2001 From: Sagar Kamble Date: Wed, 13 Aug 2014 23:07:05 +0530 Subject: drm/i915: Created common handler for platform specific suspend/resume With this change, intel_runtime_suspend and intel_runtime_resume functions become completely platform agnostic. Platform specific suspend/resume changes are moved to intel_suspend_complete and intel_resume_prepare. Cc: Imre Deak Cc: Paulo Zanoni Cc: Daniel Vetter Cc: Jani Nikula Cc: Goel, Akash Signed-off-by: Sagar Kamble Reviewed-by: Imre Deak Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.c | 76 ++++++++++++++++++++++++++--------------- 1 file changed, 49 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 01de97776d81..b06e975dba39 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -494,6 +494,10 @@ bool i915_semaphore_is_enabled(struct drm_device *dev) return true; } + +static int intel_suspend_complete(struct drm_i915_private *dev_priv); +static int intel_resume_prepare(struct drm_i915_private *dev_priv); + static int i915_drm_freeze(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -959,14 +963,14 @@ static int i915_pm_poweroff(struct device *dev) return i915_drm_freeze(drm_dev); } -static int hsw_runtime_suspend(struct drm_i915_private *dev_priv) +static int hsw_suspend_complete(struct drm_i915_private *dev_priv) { hsw_enable_pc8(dev_priv); return 0; } -static int snb_runtime_resume(struct drm_i915_private *dev_priv) +static int snb_resume_prepare(struct drm_i915_private *dev_priv) { struct drm_device *dev = dev_priv->dev; @@ -975,7 +979,7 @@ static int snb_runtime_resume(struct drm_i915_private *dev_priv) return 0; } -static int hsw_runtime_resume(struct drm_i915_private *dev_priv) +static int hsw_resume_prepare(struct drm_i915_private *dev_priv) { hsw_disable_pc8(dev_priv); @@ -1271,7 +1275,7 @@ static void vlv_check_no_gt_access(struct drm_i915_private *dev_priv) I915_WRITE(VLV_GTLC_PW_STATUS, VLV_GTLC_ALLOWWAKEERR); } -static int vlv_runtime_suspend(struct drm_i915_private *dev_priv) +static int vlv_suspend_complete(struct drm_i915_private *dev_priv) { u32 mask; int err; @@ -1311,7 +1315,7 @@ err1: return err; } -static int vlv_runtime_resume(struct drm_i915_private *dev_priv) +static int vlv_resume_prepare(struct drm_i915_private *dev_priv) { struct drm_device *dev = dev_priv->dev; int err; @@ -1389,17 +1393,7 @@ static int intel_runtime_suspend(struct device *device) cancel_work_sync(&dev_priv->rps.work); intel_runtime_pm_disable_interrupts(dev); - if (IS_GEN6(dev)) { - ret = 0; - } else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) { - ret = hsw_runtime_suspend(dev_priv); - } else if (IS_VALLEYVIEW(dev)) { - ret = vlv_runtime_suspend(dev_priv); - } else { - ret = -ENODEV; - WARN_ON(1); - } - + ret = intel_suspend_complete(dev_priv); if (ret) { DRM_ERROR("Runtime suspend failed, disabling it (%d)\n", ret); intel_runtime_pm_restore_interrupts(dev); @@ -1437,17 +1431,7 @@ static int intel_runtime_resume(struct device *device) intel_opregion_notify_adapter(dev, PCI_D0); dev_priv->pm.suspended = false; - if (IS_GEN6(dev)) { - ret = snb_runtime_resume(dev_priv); - } else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) { - ret = hsw_runtime_resume(dev_priv); - } else if (IS_VALLEYVIEW(dev)) { - ret = vlv_runtime_resume(dev_priv); - } else { - WARN_ON(1); - ret = -ENODEV; - } - + ret = intel_resume_prepare(dev_priv); /* * No point of rolling back things in case of an error, as the best * we can do is to hope that things will still work (and disable RPM). @@ -1466,6 +1450,44 @@ static int intel_runtime_resume(struct device *device) return ret; } +static int intel_suspend_complete(struct drm_i915_private *dev_priv) +{ + struct drm_device *dev = dev_priv->dev; + int ret; + + if (IS_GEN6(dev)) { + ret = 0; + } else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) { + ret = hsw_suspend_complete(dev_priv); + } else if (IS_VALLEYVIEW(dev)) { + ret = vlv_suspend_complete(dev_priv); + } else { + ret = -ENODEV; + WARN_ON(1); + } + + return ret; +} + +static int intel_resume_prepare(struct drm_i915_private *dev_priv) +{ + struct drm_device *dev = dev_priv->dev; + int ret; + + if (IS_GEN6(dev)) { + ret = snb_resume_prepare(dev_priv); + } else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) { + ret = hsw_resume_prepare(dev_priv); + } else if (IS_VALLEYVIEW(dev)) { + ret = vlv_resume_prepare(dev_priv); + } else { + WARN_ON(1); + ret = -ENODEV; + } + + return ret; +} + static const struct dev_pm_ops i915_pm_ops = { .suspend = i915_pm_suspend, .suspend_late = i915_pm_suspend_late, -- cgit v1.2.3 From 016970beb05da6285c2f3ed2bee1c676cb75972e Mon Sep 17 00:00:00 2001 From: Sagar Kamble Date: Wed, 13 Aug 2014 23:07:06 +0530 Subject: drm/i915: Sharing platform specific sequence between runtime and system suspend/ resume paths On VLV, post S0i3 during i915_drm_thaw following issue is observed during ring initialization. [ 335.604039] [drm:stop_ring] ERROR render ring :timed out trying to stop ring [ 336.607340] [drm:stop_ring] ERROR render ring :timed out trying to stop ring [ 336.607345] [drm:init_ring_common] ERROR failed to set render ring head to zero ctl 00000000 head 00000000 tail 00000000 start 00000000 [ 337.610645] [drm:stop_ring] ERROR bsd ring :timed out trying to stop ring [ 338.613952] [drm:stop_ring] ERROR bsd ring :timed out trying to stop ring [ 338.613956] [drm:init_ring_common] ERROR failed to set bsd ring head to zero ctl 00000000 head 00000000 tail 00000000 start 00000000 [ 339.617256] [drm:stop_ring] ERROR render ring :timed out trying to stop ring [ 339.617258] -----------[ cut here ]----------- [ 339.617267] WARNING: CPU: 0 PID: 6 at drivers/gpu/drm/i915/intel_ringbuffer.c:1666 intel_cleanup_ring+0xe6/0xf0() [ 339.617396] --[ end trace 5ef5ed1a3c92e2a6 ]-- [ 339.617428] [drm:__i915_drm_thaw] ERROR failed to re-initialize GPU, declaring wedged! This is happening since wake is not enabled and Gunit registers are not restored. For this system suspend/resume paths need to follow save/restore and additional platform specific setup in suspend_complete and resume_prepare. suspend_complete is shared unconditionaly for VLV, HSW, BDW. resume_prepare for HSW and BDW has pc8 disabling which is needed during thaw_early so sharing uncondtionally. For VLV and SNB runtime resume specific sequence exists. Cc: Imre Deak Cc: Paulo Zanoni Cc: Daniel Vetter Cc: Jani Nikula Cc: Goel, Akash Signed-off-by: Sagar Kamble Reviewed-by: Imre Deak Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.c | 63 ++++++++++++++++++++++++++++------------- 1 file changed, 43 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index b06e975dba39..2f112853c36f 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -496,7 +496,8 @@ bool i915_semaphore_is_enabled(struct drm_device *dev) static int intel_suspend_complete(struct drm_i915_private *dev_priv); -static int intel_resume_prepare(struct drm_i915_private *dev_priv); +static int intel_resume_prepare(struct drm_i915_private *dev_priv, + bool rpm_resume); static int i915_drm_freeze(struct drm_device *dev) { @@ -604,15 +605,17 @@ int i915_suspend(struct drm_device *dev, pm_message_t state) static int i915_drm_thaw_early(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; + int ret; - if (IS_HASWELL(dev) || IS_BROADWELL(dev)) - hsw_disable_pc8(dev_priv); + ret = intel_resume_prepare(dev_priv, false); + if (ret) + DRM_ERROR("Resume prepare failed: %d,Continuing resume\n", ret); intel_uncore_early_sanitize(dev, true); intel_uncore_sanitize(dev); intel_power_domains_init_hw(dev_priv); - return 0; + return ret; } static int __i915_drm_thaw(struct drm_device *dev, bool restore_gtt_mappings) @@ -888,6 +891,7 @@ static int i915_pm_suspend_late(struct device *dev) struct pci_dev *pdev = to_pci_dev(dev); struct drm_device *drm_dev = pci_get_drvdata(pdev); struct drm_i915_private *dev_priv = drm_dev->dev_private; + int ret; /* * We have a suspedn ordering issue with the snd-hda driver also @@ -901,13 +905,16 @@ static int i915_pm_suspend_late(struct device *dev) if (drm_dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; - if (IS_HASWELL(drm_dev) || IS_BROADWELL(drm_dev)) - hsw_enable_pc8(dev_priv); + ret = intel_suspend_complete(dev_priv); - pci_disable_device(pdev); - pci_set_power_state(pdev, PCI_D3hot); + if (ret) + DRM_ERROR("Suspend complete failed: %d\n", ret); + else { + pci_disable_device(pdev); + pci_set_power_state(pdev, PCI_D3hot); + } - return 0; + return ret; } static int i915_pm_resume_early(struct device *dev) @@ -970,16 +977,19 @@ static int hsw_suspend_complete(struct drm_i915_private *dev_priv) return 0; } -static int snb_resume_prepare(struct drm_i915_private *dev_priv) +static int snb_resume_prepare(struct drm_i915_private *dev_priv, + bool rpm_resume) { struct drm_device *dev = dev_priv->dev; - intel_init_pch_refclk(dev); + if (rpm_resume) + intel_init_pch_refclk(dev); return 0; } -static int hsw_resume_prepare(struct drm_i915_private *dev_priv) +static int hsw_resume_prepare(struct drm_i915_private *dev_priv, + bool rpm_resume) { hsw_disable_pc8(dev_priv); @@ -1315,7 +1325,8 @@ err1: return err; } -static int vlv_resume_prepare(struct drm_i915_private *dev_priv) +static int vlv_resume_prepare(struct drm_i915_private *dev_priv, + bool rpm_resume) { struct drm_device *dev = dev_priv->dev; int err; @@ -1340,8 +1351,10 @@ static int vlv_resume_prepare(struct drm_i915_private *dev_priv) vlv_check_no_gt_access(dev_priv); - intel_init_clock_gating(dev); - i915_gem_restore_fences(dev); + if (rpm_resume) { + intel_init_clock_gating(dev); + i915_gem_restore_fences(dev); + } return ret; } @@ -1431,7 +1444,7 @@ static int intel_runtime_resume(struct device *device) intel_opregion_notify_adapter(dev, PCI_D0); dev_priv->pm.suspended = false; - ret = intel_resume_prepare(dev_priv); + ret = intel_resume_prepare(dev_priv, true); /* * No point of rolling back things in case of an error, as the best * we can do is to hope that things will still work (and disable RPM). @@ -1450,6 +1463,10 @@ static int intel_runtime_resume(struct device *device) return ret; } +/* + * This function implements common functionality of runtime and system + * suspend sequence. + */ static int intel_suspend_complete(struct drm_i915_private *dev_priv) { struct drm_device *dev = dev_priv->dev; @@ -1469,17 +1486,23 @@ static int intel_suspend_complete(struct drm_i915_private *dev_priv) return ret; } -static int intel_resume_prepare(struct drm_i915_private *dev_priv) +/* + * This function implements common functionality of runtime and system + * resume sequence. Variable rpm_resume used for implementing different + * code paths. + */ +static int intel_resume_prepare(struct drm_i915_private *dev_priv, + bool rpm_resume) { struct drm_device *dev = dev_priv->dev; int ret; if (IS_GEN6(dev)) { - ret = snb_resume_prepare(dev_priv); + ret = snb_resume_prepare(dev_priv, rpm_resume); } else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) { - ret = hsw_resume_prepare(dev_priv); + ret = hsw_resume_prepare(dev_priv, rpm_resume); } else if (IS_VALLEYVIEW(dev)) { - ret = vlv_resume_prepare(dev_priv); + ret = vlv_resume_prepare(dev_priv, rpm_resume); } else { WARN_ON(1); ret = -ENODEV; -- cgit v1.2.3 From 3a448734902359113b0c7c3454ce4cd56dc1e61f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 12 Aug 2014 20:05:47 +0100 Subject: drm/i915: Print captured bo for all VM in error state The current error state harks back to the era of just a single VM. For full-ppgtt, we capture every bo on every VM. It behoves us to then print every bo for every VM, which we currently fail to do and so miss vital information in the error state. v2: Use the vma address rather than -1! Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 2 + drivers/gpu/drm/i915/i915_gpu_error.c | 80 ++++++++++++++++++++++++----------- 2 files changed, 58 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 541fb6f295bb..ed52ac744105 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -396,6 +396,7 @@ struct drm_i915_error_state { pid_t pid; char comm[TASK_COMM_LEN]; } ring[I915_NUM_RINGS]; + struct drm_i915_error_buffer { u32 size; u32 name; @@ -414,6 +415,7 @@ struct drm_i915_error_state { } **active_bo, **pinned_bo; u32 *active_bo_count, *pinned_bo_count; + u32 vm_count; }; struct intel_connector; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index fc11ac6b0373..35e70d5d6282 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -192,10 +192,10 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m, struct drm_i915_error_buffer *err, int count) { - err_printf(m, "%s [%d]:\n", name, count); + err_printf(m, " %s [%d]:\n", name, count); while (count--) { - err_printf(m, " %08x %8u %02x %02x %x %x", + err_printf(m, " %08x %8u %02x %02x %x %x", err->gtt_offset, err->size, err->read_domains, @@ -393,15 +393,17 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, i915_ring_error_state(m, dev, &error->ring[i]); } - if (error->active_bo) + for (i = 0; i < error->vm_count; i++) { + err_printf(m, "vm[%d]\n", i); + print_error_buffers(m, "Active", - error->active_bo[0], - error->active_bo_count[0]); + error->active_bo[i], + error->active_bo_count[i]); - if (error->pinned_bo) print_error_buffers(m, "Pinned", - error->pinned_bo[0], - error->pinned_bo_count[0]); + error->pinned_bo[i], + error->pinned_bo_count[i]); + } for (i = 0; i < ARRAY_SIZE(error->ring); i++) { obj = error->ring[i].batchbuffer; @@ -644,13 +646,15 @@ unwind: (src)->base.size>>PAGE_SHIFT) static void capture_bo(struct drm_i915_error_buffer *err, - struct drm_i915_gem_object *obj) + struct i915_vma *vma) { + struct drm_i915_gem_object *obj = vma->obj; + err->size = obj->base.size; err->name = obj->base.name; err->rseqno = obj->last_read_seqno; err->wseqno = obj->last_write_seqno; - err->gtt_offset = i915_gem_obj_ggtt_offset(obj); + err->gtt_offset = vma->node.start; err->read_domains = obj->base.read_domains; err->write_domain = obj->base.write_domain; err->fence_reg = obj->fence_reg; @@ -674,7 +678,7 @@ static u32 capture_active_bo(struct drm_i915_error_buffer *err, int i = 0; list_for_each_entry(vma, head, mm_list) { - capture_bo(err++, vma->obj); + capture_bo(err++, vma); if (++i == count) break; } @@ -683,21 +687,27 @@ static u32 capture_active_bo(struct drm_i915_error_buffer *err, } static u32 capture_pinned_bo(struct drm_i915_error_buffer *err, - int count, struct list_head *head) + int count, struct list_head *head, + struct i915_address_space *vm) { struct drm_i915_gem_object *obj; - int i = 0; + struct drm_i915_error_buffer * const first = err; + struct drm_i915_error_buffer * const last = err + count; list_for_each_entry(obj, head, global_list) { - if (!i915_gem_obj_is_pinned(obj)) - continue; + struct i915_vma *vma; - capture_bo(err++, obj); - if (++i == count) + if (err == last) break; + + list_for_each_entry(vma, &obj->vma_list, vma_link) + if (vma->vm == vm && vma->pin_count > 0) { + capture_bo(err++, vma); + break; + } } - return i; + return err - first; } /* Generate a semi-unique error code. The code is not meant to have meaning, The @@ -1053,9 +1063,14 @@ static void i915_gem_capture_vm(struct drm_i915_private *dev_priv, list_for_each_entry(vma, &vm->active_list, mm_list) i++; error->active_bo_count[ndx] = i; - list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) - if (i915_gem_obj_is_pinned(obj)) - i++; + + list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { + list_for_each_entry(vma, &obj->vma_list, vma_link) + if (vma->vm == vm && vma->pin_count > 0) { + i++; + break; + } + } error->pinned_bo_count[ndx] = i - error->active_bo_count[ndx]; if (i) { @@ -1074,7 +1089,7 @@ static void i915_gem_capture_vm(struct drm_i915_private *dev_priv, error->pinned_bo_count[ndx] = capture_pinned_bo(pinned_bo, error->pinned_bo_count[ndx], - &dev_priv->mm.bound_list); + &dev_priv->mm.bound_list, vm); error->active_bo[ndx] = active_bo; error->pinned_bo[ndx] = pinned_bo; } @@ -1095,8 +1110,25 @@ static void i915_gem_capture_buffers(struct drm_i915_private *dev_priv, error->pinned_bo_count = kcalloc(cnt, sizeof(*error->pinned_bo_count), GFP_ATOMIC); - list_for_each_entry(vm, &dev_priv->vm_list, global_link) - i915_gem_capture_vm(dev_priv, error, vm, i++); + if (error->active_bo == NULL || + error->pinned_bo == NULL || + error->active_bo_count == NULL || + error->pinned_bo_count == NULL) { + kfree(error->active_bo); + kfree(error->active_bo_count); + kfree(error->pinned_bo); + kfree(error->pinned_bo_count); + + error->active_bo = NULL; + error->active_bo_count = NULL; + error->pinned_bo = NULL; + error->pinned_bo_count = NULL; + } else { + list_for_each_entry(vm, &dev_priv->vm_list, global_link) + i915_gem_capture_vm(dev_priv, error, vm, i++); + + error->vm_count = cnt; + } } /* Capture all registers which don't fit into another category. */ -- cgit v1.2.3 From 582d67f0b19afc2299bc8977aba835d8d25bb591 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:16 +0100 Subject: drm/i915: Add temporary ring->ctx backpointer The execlist patches have a bit a convoluted and long history and due to that have the actual submission still misplaced deeply burried in the low-level ringbuffer handling code. This design goes back to the legacy ringbuffer code with its tricky lazy request and simple work submissiion using ring tail writes. For that reason they need a ring->ctx backpointer. The goal is to unburry that code and move it up into a level where the full execlist context is available so that we can ditch this backpointer. Until that's done make it really obvious that there's work still to be done. Cc: Oscar Mateo Cc: Thomas Daniel Acked-by: Thomas Daniel Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_lrc.c | 2 ++ drivers/gpu/drm/i915/intel_ringbuffer.h | 7 +++++++ 2 files changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 6b5f416b5c0d..c2352d1b23fa 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1086,6 +1086,8 @@ int intel_lr_context_deferred_create(struct intel_context *ctx, } ringbuf->ring = ring; + ringbuf->FIXME_lrc_ctx = ctx; + ringbuf->size = 32 * PAGE_SIZE; ringbuf->effective_size = ringbuf->size; ringbuf->head = 0; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 24437da91f77..26785ca72530 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -99,6 +99,13 @@ struct intel_ringbuffer { struct intel_engine_cs *ring; + /* + * FIXME: This backpointer is an artifact of the history of how the + * execlist patches came into being. It will get removed once the basic + * code has landed. + */ + struct intel_context *FIXME_lrc_ctx; + u32 head; u32 tail; int space; -- cgit v1.2.3 From 48e29f5535b9eb506c44bd8f41bd9348fd219435 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:29 +0100 Subject: drm/i915/bdw: Emission of requests with logical rings On a previous iteration of this patch, I created an Execlists version of __i915_add_request and asbtracted it away as a vfunc. Daniel Vetter wondered then why that was needed: "with the clean split in command submission I expect every function to know wether it'll submit to an lrc (everything in intel_lrc.c) or wether it'll submit to a legacy ring (existing code), so I don't see a need for an add_request vfunc." The honest, hairy truth is that this patch is the glue keeping the whole logical ring puzzle together: - i915_add_request is used by intel_ring_idle, which in turn is used by i915_gpu_idle, which in turn is used in several places inside the eviction and gtt codes. - Also, it is used by i915_gem_check_olr, which is littered all over i915_gem.c - ... If I were to duplicate all the code that directly or indirectly uses __i915_add_request, I'll end up creating a separate driver. To show the differences between the existing legacy version and the new Execlists one, this time I have special-cased __i915_add_request instead of adding an add_request vfunc. I hope this helps to untangle this Gordian knot. Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau [danvet: Adjust to ringbuf->FIXME_lrc_ctx per the discussion with Thomas Daniel.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 72 +++++++++++++++++++++++++++++----------- drivers/gpu/drm/i915/intel_lrc.c | 30 ++++++++++++++--- drivers/gpu/drm/i915/intel_lrc.h | 1 + 3 files changed, 80 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 6c2f0b886eb0..32fa1e9eb844 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2311,10 +2311,21 @@ int __i915_add_request(struct intel_engine_cs *ring, { struct drm_i915_private *dev_priv = ring->dev->dev_private; struct drm_i915_gem_request *request; + struct intel_ringbuffer *ringbuf; u32 request_ring_position, request_start; int ret; - request_start = intel_ring_get_tail(ring->buffer); + request = ring->preallocated_lazy_request; + if (WARN_ON(request == NULL)) + return -ENOMEM; + + if (i915.enable_execlists) { + struct intel_context *ctx = request->ctx; + ringbuf = ctx->engine[ring->id].ringbuf; + } else + ringbuf = ring->buffer; + + request_start = intel_ring_get_tail(ringbuf); /* * Emit any outstanding flushes - execbuf can fail to emit the flush * after having emitted the batchbuffer command. Hence we need to fix @@ -2322,24 +2333,32 @@ int __i915_add_request(struct intel_engine_cs *ring, * is that the flush _must_ happen before the next request, no matter * what. */ - ret = intel_ring_flush_all_caches(ring); - if (ret) - return ret; - - request = ring->preallocated_lazy_request; - if (WARN_ON(request == NULL)) - return -ENOMEM; + if (i915.enable_execlists) { + ret = logical_ring_flush_all_caches(ringbuf); + if (ret) + return ret; + } else { + ret = intel_ring_flush_all_caches(ring); + if (ret) + return ret; + } /* Record the position of the start of the request so that * should we detect the updated seqno part-way through the * GPU processing the request, we never over-estimate the * position of the head. */ - request_ring_position = intel_ring_get_tail(ring->buffer); + request_ring_position = intel_ring_get_tail(ringbuf); - ret = ring->add_request(ring); - if (ret) - return ret; + if (i915.enable_execlists) { + ret = ring->emit_request(ringbuf); + if (ret) + return ret; + } else { + ret = ring->add_request(ring); + if (ret) + return ret; + } request->seqno = intel_ring_get_seqno(ring); request->ring = ring; @@ -2354,12 +2373,14 @@ int __i915_add_request(struct intel_engine_cs *ring, */ request->batch_obj = obj; - /* Hold a reference to the current context so that we can inspect - * it later in case a hangcheck error event fires. - */ - request->ctx = ring->last_context; - if (request->ctx) - i915_gem_context_reference(request->ctx); + if (!i915.enable_execlists) { + /* Hold a reference to the current context so that we can inspect + * it later in case a hangcheck error event fires. + */ + request->ctx = ring->last_context; + if (request->ctx) + i915_gem_context_reference(request->ctx); + } request->emitted_jiffies = jiffies; list_add_tail(&request->list, &ring->request_list); @@ -2614,6 +2635,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring) while (!list_empty(&ring->request_list)) { struct drm_i915_gem_request *request; + struct intel_ringbuffer *ringbuf; request = list_first_entry(&ring->request_list, struct drm_i915_gem_request, @@ -2623,12 +2645,24 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring) break; trace_i915_gem_request_retire(ring, request->seqno); + + /* This is one of the few common intersection points + * between legacy ringbuffer submission and execlists: + * we need to tell them apart in order to find the correct + * ringbuffer to which the request belongs to. + */ + if (i915.enable_execlists) { + struct intel_context *ctx = request->ctx; + ringbuf = ctx->engine[ring->id].ringbuf; + } else + ringbuf = ring->buffer; + /* We know the GPU must have read the request to have * sent us the seqno + interrupt, so use the position * of tail of the request to update the last known position * of the GPU head. */ - ring->buffer->last_retired_head = request->tail; + ringbuf->last_retired_head = request->tail; i915_gem_free_request(request); } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index c2352d1b23fa..cd6ddd80e54c 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -252,6 +252,22 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring) I915_WRITE_MODE(ring, _MASKED_BIT_DISABLE(STOP_RING)); } +int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf) +{ + struct intel_engine_cs *ring = ringbuf->ring; + int ret; + + if (!ring->gpu_caches_dirty) + return 0; + + ret = ring->emit_flush(ringbuf, 0, I915_GEM_GPU_DOMAINS); + if (ret) + return ret; + + ring->gpu_caches_dirty = false; + return 0; +} + void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf) { intel_logical_ring_advance(ringbuf); @@ -262,7 +278,8 @@ void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf) /* TODO: how to submit a context to the ELSP is not here yet */ } -static int logical_ring_alloc_seqno(struct intel_engine_cs *ring) +static int logical_ring_alloc_seqno(struct intel_engine_cs *ring, + struct intel_context *ctx) { if (ring->outstanding_lazy_seqno) return 0; @@ -274,6 +291,13 @@ static int logical_ring_alloc_seqno(struct intel_engine_cs *ring) if (request == NULL) return -ENOMEM; + /* Hold a reference to the context this request belongs to + * (we will need it when the time comes to emit/retire the + * request). + */ + request->ctx = ctx; + i915_gem_context_reference(request->ctx); + ring->preallocated_lazy_request = request; } @@ -312,8 +336,6 @@ static int logical_ring_wait_request(struct intel_ringbuffer *ringbuf, if (ret) return ret; - /* TODO: make sure we update the right ringbuffer's last_retired_head - * when retiring requests */ i915_gem_retire_requests_ring(ring); ringbuf->head = ringbuf->last_retired_head; ringbuf->last_retired_head = -1; @@ -433,7 +455,7 @@ int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords) return ret; /* Preallocate the olr before touching the ring */ - ret = logical_ring_alloc_seqno(ring); + ret = logical_ring_alloc_seqno(ring, ringbuf->FIXME_lrc_ctx); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 4e032875c1fd..460e1af15600 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -29,6 +29,7 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring); void intel_logical_ring_cleanup(struct intel_engine_cs *ring); int intel_logical_rings_init(struct drm_device *dev); +int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf); void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf); static inline void intel_logical_ring_advance(struct intel_ringbuffer *ringbuf) { -- cgit v1.2.3 From 84b790f80e5153d8d54074aa4eae49ff3070f2f1 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Thu, 24 Jul 2014 17:04:36 +0100 Subject: drm/i915/bdw: Implement context switching (somewhat) A context switch occurs by submitting a context descriptor to the ExecList Submission Port. Given that we can now initialize a context, it's possible to begin implementing the context switch by creating the descriptor and submitting it to ELSP (actually two, since the ELSP has two ports). The context object must be mapped in the GGTT, which means it must exist in the 0-4GB graphics VA range. Signed-off-by: Ben Widawsky v2: This code has changed quite a lot in various rebases. Of particular importance is that now we use the globally unique Submission ID to send to the hardware. Also, context pages are now pinned unconditionally to GGTT, so there is no need to bind them. v3: Use LRCA[31:12] as hwCtxId[19:0]. This guarantees that the HW context ID we submit to the ELSP is globally unique and != 0 (Bspec requirements of the software use-only bits of the Context ID in the Context Descriptor Format) without the hassle of the previous submission Id construction. Also, re-add the ELSP porting read (it was dropped somewhere during the rebases). v4: - Squash with "drm/i915/bdw: Add forcewake lock around ELSP writes" (BSPEC says: "SW must set Force Wakeup bit to prevent GT from entering C6 while ELSP writes are in progress") as noted by Thomas Daniel (thomas.daniel@intel.com). - Rename functions and use an execlists/intel_execlists_ namespace. - The BUG_ON only checked that the LRCA was <32 bits, but it didn't make sure that it was properly aligned. Spotted by Alistair Mcaulay . v5: - Improved source code comments as suggested by Chris Wilson. - No need to abstract submit_ctx away, as pointed by Brad Volkin. Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau [danvet: Checkpatch. Sigh.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_lrc.c | 116 ++++++++++++++++++++++++++++++++++++++- drivers/gpu/drm/i915/intel_lrc.h | 1 + 2 files changed, 115 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index cd6ddd80e54c..aa81fd41b9c1 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -47,6 +47,7 @@ #define GEN8_LR_CONTEXT_ALIGN 4096 #define RING_ELSP(ring) ((ring)->mmio_base+0x230) +#define RING_EXECLIST_STATUS(ring) ((ring)->mmio_base+0x234) #define RING_CONTEXT_CONTROL(ring) ((ring)->mmio_base+0x244) #define CTX_LRI_HEADER_0 0x01 @@ -78,6 +79,26 @@ #define CTX_R_PWR_CLK_STATE 0x42 #define CTX_GPGPU_CSR_BASE_ADDRESS 0x44 +#define GEN8_CTX_VALID (1<<0) +#define GEN8_CTX_FORCE_PD_RESTORE (1<<1) +#define GEN8_CTX_FORCE_RESTORE (1<<2) +#define GEN8_CTX_L3LLC_COHERENT (1<<5) +#define GEN8_CTX_PRIVILEGE (1<<8) +enum { + ADVANCED_CONTEXT = 0, + LEGACY_CONTEXT, + ADVANCED_AD_CONTEXT, + LEGACY_64B_CONTEXT +}; +#define GEN8_CTX_MODE_SHIFT 3 +enum { + FAULT_AND_HANG = 0, + FAULT_AND_HALT, /* Debug only */ + FAULT_AND_STREAM, + FAULT_AND_CONTINUE /* Unsupported */ +}; +#define GEN8_CTX_ID_SHIFT 32 + int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists) { WARN_ON(i915.enable_ppgtt == -1); @@ -92,6 +113,93 @@ int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists return 0; } +u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj) +{ + u32 lrca = i915_gem_obj_ggtt_offset(ctx_obj); + + /* LRCA is required to be 4K aligned so the more significant 20 bits + * are globally unique */ + return lrca >> 12; +} + +static uint64_t execlists_ctx_descriptor(struct drm_i915_gem_object *ctx_obj) +{ + uint64_t desc; + uint64_t lrca = i915_gem_obj_ggtt_offset(ctx_obj); + BUG_ON(lrca & 0xFFFFFFFF00000FFFULL); + + desc = GEN8_CTX_VALID; + desc |= LEGACY_CONTEXT << GEN8_CTX_MODE_SHIFT; + desc |= GEN8_CTX_L3LLC_COHERENT; + desc |= GEN8_CTX_PRIVILEGE; + desc |= lrca; + desc |= (u64)intel_execlists_ctx_id(ctx_obj) << GEN8_CTX_ID_SHIFT; + + /* TODO: WaDisableLiteRestore when we start using semaphore + * signalling between Command Streamers */ + /* desc |= GEN8_CTX_FORCE_RESTORE; */ + + return desc; +} + +static void execlists_elsp_write(struct intel_engine_cs *ring, + struct drm_i915_gem_object *ctx_obj0, + struct drm_i915_gem_object *ctx_obj1) +{ + struct drm_i915_private *dev_priv = ring->dev->dev_private; + uint64_t temp = 0; + uint32_t desc[4]; + + /* XXX: You must always write both descriptors in the order below. */ + if (ctx_obj1) + temp = execlists_ctx_descriptor(ctx_obj1); + else + temp = 0; + desc[1] = (u32)(temp >> 32); + desc[0] = (u32)temp; + + temp = execlists_ctx_descriptor(ctx_obj0); + desc[3] = (u32)(temp >> 32); + desc[2] = (u32)temp; + + /* Set Force Wakeup bit to prevent GT from entering C6 while + * ELSP writes are in progress */ + gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL); + + I915_WRITE(RING_ELSP(ring), desc[1]); + I915_WRITE(RING_ELSP(ring), desc[0]); + I915_WRITE(RING_ELSP(ring), desc[3]); + /* The context is automatically loaded after the following */ + I915_WRITE(RING_ELSP(ring), desc[2]); + + /* ELSP is a wo register, so use another nearby reg for posting instead */ + POSTING_READ(RING_EXECLIST_STATUS(ring)); + + gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL); +} + +static int execlists_submit_context(struct intel_engine_cs *ring, + struct intel_context *to0, u32 tail0, + struct intel_context *to1, u32 tail1) +{ + struct drm_i915_gem_object *ctx_obj0; + struct drm_i915_gem_object *ctx_obj1 = NULL; + + ctx_obj0 = to0->engine[ring->id].state; + BUG_ON(!ctx_obj0); + BUG_ON(!i915_gem_obj_is_pinned(ctx_obj0)); + + if (to1) { + ctx_obj1 = to1->engine[ring->id].state; + BUG_ON(!ctx_obj1); + BUG_ON(!i915_gem_obj_is_pinned(ctx_obj1)); + } + + execlists_elsp_write(ring, ctx_obj0, ctx_obj1); + + return 0; +} + static int logical_ring_invalidate_all_caches(struct intel_ringbuffer *ringbuf) { struct intel_engine_cs *ring = ringbuf->ring; @@ -270,12 +378,16 @@ int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf) void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf) { + struct intel_engine_cs *ring = ringbuf->ring; + struct intel_context *ctx = ringbuf->FIXME_lrc_ctx; + intel_logical_ring_advance(ringbuf); - if (intel_ring_stopped(ringbuf->ring)) + if (intel_ring_stopped(ring)) return; - /* TODO: how to submit a context to the ELSP is not here yet */ + /* FIXME: too cheeky, we don't even check if the ELSP is ready */ + execlists_submit_context(ring, ctx, ringbuf->tail, NULL, 0); } static int logical_ring_alloc_seqno(struct intel_engine_cs *ring, diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 460e1af15600..69605b158235 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -57,5 +57,6 @@ int intel_execlists_submission(struct drm_device *dev, struct drm_file *file, struct list_head *vmas, struct drm_i915_gem_object *batch_obj, u64 exec_start, u32 flags); +u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj); #endif /* _INTEL_LRC_H_ */ -- cgit v1.2.3 From ae1250b9da308acd16554365d125b4afb795b825 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:37 +0100 Subject: drm/i915/bdw: Write the tail pointer, LRC style Each logical ring context has the tail pointer in the context object, so update it before submission. v2: New namespace. Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_lrc.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index aa81fd41b9c1..26bc063f137b 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -178,6 +178,21 @@ static void execlists_elsp_write(struct intel_engine_cs *ring, gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL); } +static int execlists_ctx_write_tail(struct drm_i915_gem_object *ctx_obj, u32 tail) +{ + struct page *page; + uint32_t *reg_state; + + page = i915_gem_object_get_page(ctx_obj, 1); + reg_state = kmap_atomic(page); + + reg_state[CTX_RING_TAIL+1] = tail; + + kunmap_atomic(reg_state); + + return 0; +} + static int execlists_submit_context(struct intel_engine_cs *ring, struct intel_context *to0, u32 tail0, struct intel_context *to1, u32 tail1) @@ -189,10 +204,14 @@ static int execlists_submit_context(struct intel_engine_cs *ring, BUG_ON(!ctx_obj0); BUG_ON(!i915_gem_obj_is_pinned(ctx_obj0)); + execlists_ctx_write_tail(ctx_obj0, tail0); + if (to1) { ctx_obj1 = to1->engine[ring->id].state; BUG_ON(!ctx_obj1); BUG_ON(!i915_gem_obj_is_pinned(ctx_obj1)); + + execlists_ctx_write_tail(ctx_obj1, tail1); } execlists_elsp_write(ring, ctx_obj0, ctx_obj1); -- cgit v1.2.3 From acdd884a2e1b873995c120d5eabd8cab77f48f20 Mon Sep 17 00:00:00 2001 From: Michel Thierry Date: Thu, 24 Jul 2014 17:04:38 +0100 Subject: drm/i915/bdw: Two-stage execlist submit process Context switch (and execlist submission) should happen only when other contexts are not active, otherwise pre-emption occurs. To assure this, we place context switch requests in a queue and those request are later consumed when the right context switch interrupt is received (still TODO). v2: Use a spinlock, do not remove the requests on unqueue (wait for context switch completion). Signed-off-by: Thomas Daniel v3: Several rebases and code changes. Use unique ID. v4: - Move the queue/lock init to the late ring initialization. - Damien's kmalloc review comments: check return, use sizeof(*req), do not cast. v5: - Do not reuse drm_i915_gem_request. Instead, create our own. - New namespace. Signed-off-by: Michel Thierry (v1) Signed-off-by: Oscar Mateo (v2-v5) Reviewed-by: Damien Lespiau [davnet: Checkpatch + wash-up s/BUG_ON/WARN_ON/.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_lrc.c | 72 ++++++++++++++++++++++++++++++--- drivers/gpu/drm/i915/intel_lrc.h | 8 ++++ drivers/gpu/drm/i915/intel_ringbuffer.h | 2 + 3 files changed, 77 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 26bc063f137b..e81f5f6c49b9 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -126,7 +126,8 @@ static uint64_t execlists_ctx_descriptor(struct drm_i915_gem_object *ctx_obj) { uint64_t desc; uint64_t lrca = i915_gem_obj_ggtt_offset(ctx_obj); - BUG_ON(lrca & 0xFFFFFFFF00000FFFULL); + + WARN_ON(lrca & 0xFFFFFFFF00000FFFULL); desc = GEN8_CTX_VALID; desc |= LEGACY_CONTEXT << GEN8_CTX_MODE_SHIFT; @@ -202,14 +203,14 @@ static int execlists_submit_context(struct intel_engine_cs *ring, ctx_obj0 = to0->engine[ring->id].state; BUG_ON(!ctx_obj0); - BUG_ON(!i915_gem_obj_is_pinned(ctx_obj0)); + WARN_ON(!i915_gem_obj_is_pinned(ctx_obj0)); execlists_ctx_write_tail(ctx_obj0, tail0); if (to1) { ctx_obj1 = to1->engine[ring->id].state; BUG_ON(!ctx_obj1); - BUG_ON(!i915_gem_obj_is_pinned(ctx_obj1)); + WARN_ON(!i915_gem_obj_is_pinned(ctx_obj1)); execlists_ctx_write_tail(ctx_obj1, tail1); } @@ -219,6 +220,65 @@ static int execlists_submit_context(struct intel_engine_cs *ring, return 0; } +static void execlists_context_unqueue(struct intel_engine_cs *ring) +{ + struct intel_ctx_submit_request *req0 = NULL, *req1 = NULL; + struct intel_ctx_submit_request *cursor = NULL, *tmp = NULL; + + if (list_empty(&ring->execlist_queue)) + return; + + /* Try to read in pairs */ + list_for_each_entry_safe(cursor, tmp, &ring->execlist_queue, + execlist_link) { + if (!req0) { + req0 = cursor; + } else if (req0->ctx == cursor->ctx) { + /* Same ctx: ignore first request, as second request + * will update tail past first request's workload */ + list_del(&req0->execlist_link); + i915_gem_context_unreference(req0->ctx); + kfree(req0); + req0 = cursor; + } else { + req1 = cursor; + break; + } + } + + WARN_ON(execlists_submit_context(ring, req0->ctx, req0->tail, + req1 ? req1->ctx : NULL, + req1 ? req1->tail : 0)); +} + +static int execlists_context_queue(struct intel_engine_cs *ring, + struct intel_context *to, + u32 tail) +{ + struct intel_ctx_submit_request *req = NULL; + unsigned long flags; + bool was_empty; + + req = kzalloc(sizeof(*req), GFP_KERNEL); + if (req == NULL) + return -ENOMEM; + req->ctx = to; + i915_gem_context_reference(req->ctx); + req->ring = ring; + req->tail = tail; + + spin_lock_irqsave(&ring->execlist_lock, flags); + + was_empty = list_empty(&ring->execlist_queue); + list_add_tail(&req->execlist_link, &ring->execlist_queue); + if (was_empty) + execlists_context_unqueue(ring); + + spin_unlock_irqrestore(&ring->execlist_lock, flags); + + return 0; +} + static int logical_ring_invalidate_all_caches(struct intel_ringbuffer *ringbuf) { struct intel_engine_cs *ring = ringbuf->ring; @@ -405,8 +465,7 @@ void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf) if (intel_ring_stopped(ring)) return; - /* FIXME: too cheeky, we don't even check if the ELSP is ready */ - execlists_submit_context(ring, ctx, ringbuf->tail, NULL, 0); + execlists_context_queue(ring, ctx, ringbuf->tail); } static int logical_ring_alloc_seqno(struct intel_engine_cs *ring, @@ -846,6 +905,9 @@ static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *rin INIT_LIST_HEAD(&ring->request_list); init_waitqueue_head(&ring->irq_queue); + INIT_LIST_HEAD(&ring->execlist_queue); + spin_lock_init(&ring->execlist_lock); + ret = intel_lr_context_deferred_create(dctx, ring); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 69605b158235..3c389b3a2b75 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -59,4 +59,12 @@ int intel_execlists_submission(struct drm_device *dev, struct drm_file *file, u64 exec_start, u32 flags); u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj); +struct intel_ctx_submit_request { + struct intel_context *ctx; + struct intel_engine_cs *ring; + u32 tail; + + struct list_head execlist_link; +}; + #endif /* _INTEL_LRC_H_ */ diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 26785ca72530..670262dabb6c 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -231,6 +231,8 @@ struct intel_engine_cs { } semaphore; /* Execlists */ + spinlock_t execlist_lock; + struct list_head execlist_queue; u32 irq_keep_mask; /* bitmask for interrupts that should not be masked */ int (*emit_request)(struct intel_ringbuffer *ringbuf); int (*emit_flush)(struct intel_ringbuffer *ringbuf, -- cgit v1.2.3 From e981e7b17f2b41970e7e2367d4225e0bb3310667 Mon Sep 17 00:00:00 2001 From: Thomas Daniel Date: Thu, 24 Jul 2014 17:04:39 +0100 Subject: drm/i915/bdw: Handle context switch events Handle all context status events in the context status buffer on every context switch interrupt. We only remove work from the execlist queue after a context status buffer reports that it has completed and we only attempt to schedule new contexts on interrupt when a previously submitted context completes (unless no contexts are queued, which means the GPU is free). We canot call intel_runtime_pm_get() in an interrupt (or with a spinlock grabbed, FWIW), because it might sleep, which is not a nice thing to do. Instead, do the runtime_pm get/put together with the create/destroy request, and handle the forcewake get/put directly. Signed-off-by: Thomas Daniel v2: Unreferencing the context when we are freeing the request might free the backing bo, which requires the struct_mutex to be grabbed, so defer unreferencing and freeing to a bottom half. v3: - Ack the interrupt inmediately, before trying to handle it (fix for missing interrupts by Bob Beckett ). - Update the Context Status Buffer Read Pointer, just in case (spotted by Damien Lespiau). v4: New namespace and multiple rebase changes. v5: Squash with "drm/i915/bdw: Do not call intel_runtime_pm_get() in an interrupt", as suggested by Daniel. Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau [danvet: Checkpatch ...] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_irq.c | 35 ++++++--- drivers/gpu/drm/i915/intel_lrc.c | 133 ++++++++++++++++++++++++++++++-- drivers/gpu/drm/i915/intel_lrc.h | 3 + drivers/gpu/drm/i915/intel_ringbuffer.h | 1 + 4 files changed, 155 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 00957fa0b877..f5d6795887d2 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1632,6 +1632,7 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_device *dev, struct drm_i915_private *dev_priv, u32 master_ctl) { + struct intel_engine_cs *ring; u32 rcs, bcs, vcs; uint32_t tmp = 0; irqreturn_t ret = IRQ_NONE; @@ -1641,14 +1642,20 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_device *dev, if (tmp) { I915_WRITE(GEN8_GT_IIR(0), tmp); ret = IRQ_HANDLED; + rcs = tmp >> GEN8_RCS_IRQ_SHIFT; - bcs = tmp >> GEN8_BCS_IRQ_SHIFT; + ring = &dev_priv->ring[RCS]; if (rcs & GT_RENDER_USER_INTERRUPT) - notify_ring(dev, &dev_priv->ring[RCS]); + notify_ring(dev, ring); + if (rcs & GT_CONTEXT_SWITCH_INTERRUPT) + intel_execlists_handle_ctx_events(ring); + + bcs = tmp >> GEN8_BCS_IRQ_SHIFT; + ring = &dev_priv->ring[BCS]; if (bcs & GT_RENDER_USER_INTERRUPT) - notify_ring(dev, &dev_priv->ring[BCS]); - if ((rcs | bcs) & GT_CONTEXT_SWITCH_INTERRUPT) - DRM_DEBUG_DRIVER("TODO: Context switch\n"); + notify_ring(dev, ring); + if (bcs & GT_CONTEXT_SWITCH_INTERRUPT) + intel_execlists_handle_ctx_events(ring); } else DRM_ERROR("The master control interrupt lied (GT0)!\n"); } @@ -1658,16 +1665,20 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_device *dev, if (tmp) { I915_WRITE(GEN8_GT_IIR(1), tmp); ret = IRQ_HANDLED; + vcs = tmp >> GEN8_VCS1_IRQ_SHIFT; + ring = &dev_priv->ring[VCS]; if (vcs & GT_RENDER_USER_INTERRUPT) - notify_ring(dev, &dev_priv->ring[VCS]); + notify_ring(dev, ring); if (vcs & GT_CONTEXT_SWITCH_INTERRUPT) - DRM_DEBUG_DRIVER("TODO: Context switch\n"); + intel_execlists_handle_ctx_events(ring); + vcs = tmp >> GEN8_VCS2_IRQ_SHIFT; + ring = &dev_priv->ring[VCS2]; if (vcs & GT_RENDER_USER_INTERRUPT) - notify_ring(dev, &dev_priv->ring[VCS2]); + notify_ring(dev, ring); if (vcs & GT_CONTEXT_SWITCH_INTERRUPT) - DRM_DEBUG_DRIVER("TODO: Context switch\n"); + intel_execlists_handle_ctx_events(ring); } else DRM_ERROR("The master control interrupt lied (GT1)!\n"); } @@ -1688,11 +1699,13 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_device *dev, if (tmp) { I915_WRITE(GEN8_GT_IIR(3), tmp); ret = IRQ_HANDLED; + vcs = tmp >> GEN8_VECS_IRQ_SHIFT; + ring = &dev_priv->ring[VECS]; if (vcs & GT_RENDER_USER_INTERRUPT) - notify_ring(dev, &dev_priv->ring[VECS]); + notify_ring(dev, ring); if (vcs & GT_CONTEXT_SWITCH_INTERRUPT) - DRM_DEBUG_DRIVER("TODO: Context switch\n"); + intel_execlists_handle_ctx_events(ring); } else DRM_ERROR("The master control interrupt lied (GT3)!\n"); } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index e81f5f6c49b9..22f6a7c0cb18 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -49,6 +49,22 @@ #define RING_ELSP(ring) ((ring)->mmio_base+0x230) #define RING_EXECLIST_STATUS(ring) ((ring)->mmio_base+0x234) #define RING_CONTEXT_CONTROL(ring) ((ring)->mmio_base+0x244) +#define RING_CONTEXT_STATUS_BUF(ring) ((ring)->mmio_base+0x370) +#define RING_CONTEXT_STATUS_PTR(ring) ((ring)->mmio_base+0x3a0) + +#define RING_EXECLIST_QFULL (1 << 0x2) +#define RING_EXECLIST1_VALID (1 << 0x3) +#define RING_EXECLIST0_VALID (1 << 0x4) +#define RING_EXECLIST_ACTIVE_STATUS (3 << 0xE) +#define RING_EXECLIST1_ACTIVE (1 << 0x11) +#define RING_EXECLIST0_ACTIVE (1 << 0x12) + +#define GEN8_CTX_STATUS_IDLE_ACTIVE (1 << 0) +#define GEN8_CTX_STATUS_PREEMPTED (1 << 1) +#define GEN8_CTX_STATUS_ELEMENT_SWITCH (1 << 2) +#define GEN8_CTX_STATUS_ACTIVE_IDLE (1 << 3) +#define GEN8_CTX_STATUS_COMPLETE (1 << 4) +#define GEN8_CTX_STATUS_LITE_RESTORE (1 << 15) #define CTX_LRI_HEADER_0 0x01 #define CTX_CONTEXT_CONTROL 0x02 @@ -150,6 +166,7 @@ static void execlists_elsp_write(struct intel_engine_cs *ring, struct drm_i915_private *dev_priv = ring->dev->dev_private; uint64_t temp = 0; uint32_t desc[4]; + unsigned long flags; /* XXX: You must always write both descriptors in the order below. */ if (ctx_obj1) @@ -163,9 +180,17 @@ static void execlists_elsp_write(struct intel_engine_cs *ring, desc[3] = (u32)(temp >> 32); desc[2] = (u32)temp; - /* Set Force Wakeup bit to prevent GT from entering C6 while - * ELSP writes are in progress */ - gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL); + /* Set Force Wakeup bit to prevent GT from entering C6 while ELSP writes + * are in progress. + * + * The other problem is that we can't just call gen6_gt_force_wake_get() + * because that function calls intel_runtime_pm_get(), which might sleep. + * Instead, we do the runtime_pm_get/put when creating/destroying requests. + */ + spin_lock_irqsave(&dev_priv->uncore.lock, flags); + if (dev_priv->uncore.forcewake_count++ == 0) + dev_priv->uncore.funcs.force_wake_get(dev_priv, FORCEWAKE_ALL); + spin_unlock_irqrestore(&dev_priv->uncore.lock, flags); I915_WRITE(RING_ELSP(ring), desc[1]); I915_WRITE(RING_ELSP(ring), desc[0]); @@ -176,7 +201,11 @@ static void execlists_elsp_write(struct intel_engine_cs *ring, /* ELSP is a wo register, so use another nearby reg for posting instead */ POSTING_READ(RING_EXECLIST_STATUS(ring)); - gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL); + /* Release Force Wakeup (see the big comment above). */ + spin_lock_irqsave(&dev_priv->uncore.lock, flags); + if (--dev_priv->uncore.forcewake_count == 0) + dev_priv->uncore.funcs.force_wake_put(dev_priv, FORCEWAKE_ALL); + spin_unlock_irqrestore(&dev_priv->uncore.lock, flags); } static int execlists_ctx_write_tail(struct drm_i915_gem_object *ctx_obj, u32 tail) @@ -224,6 +253,9 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring) { struct intel_ctx_submit_request *req0 = NULL, *req1 = NULL; struct intel_ctx_submit_request *cursor = NULL, *tmp = NULL; + struct drm_i915_private *dev_priv = ring->dev->dev_private; + + assert_spin_locked(&ring->execlist_lock); if (list_empty(&ring->execlist_queue)) return; @@ -237,8 +269,7 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring) /* Same ctx: ignore first request, as second request * will update tail past first request's workload */ list_del(&req0->execlist_link); - i915_gem_context_unreference(req0->ctx); - kfree(req0); + queue_work(dev_priv->wq, &req0->work); req0 = cursor; } else { req1 = cursor; @@ -251,11 +282,97 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring) req1 ? req1->tail : 0)); } +static bool execlists_check_remove_request(struct intel_engine_cs *ring, + u32 request_id) +{ + struct drm_i915_private *dev_priv = ring->dev->dev_private; + struct intel_ctx_submit_request *head_req; + + assert_spin_locked(&ring->execlist_lock); + + head_req = list_first_entry_or_null(&ring->execlist_queue, + struct intel_ctx_submit_request, + execlist_link); + + if (head_req != NULL) { + struct drm_i915_gem_object *ctx_obj = + head_req->ctx->engine[ring->id].state; + if (intel_execlists_ctx_id(ctx_obj) == request_id) { + list_del(&head_req->execlist_link); + queue_work(dev_priv->wq, &head_req->work); + return true; + } + } + + return false; +} + +void intel_execlists_handle_ctx_events(struct intel_engine_cs *ring) +{ + struct drm_i915_private *dev_priv = ring->dev->dev_private; + u32 status_pointer; + u8 read_pointer; + u8 write_pointer; + u32 status; + u32 status_id; + u32 submit_contexts = 0; + + status_pointer = I915_READ(RING_CONTEXT_STATUS_PTR(ring)); + + read_pointer = ring->next_context_status_buffer; + write_pointer = status_pointer & 0x07; + if (read_pointer > write_pointer) + write_pointer += 6; + + spin_lock(&ring->execlist_lock); + + while (read_pointer < write_pointer) { + read_pointer++; + status = I915_READ(RING_CONTEXT_STATUS_BUF(ring) + + (read_pointer % 6) * 8); + status_id = I915_READ(RING_CONTEXT_STATUS_BUF(ring) + + (read_pointer % 6) * 8 + 4); + + if (status & GEN8_CTX_STATUS_COMPLETE) { + if (execlists_check_remove_request(ring, status_id)) + submit_contexts++; + } + } + + if (submit_contexts != 0) + execlists_context_unqueue(ring); + + spin_unlock(&ring->execlist_lock); + + WARN(submit_contexts > 2, "More than two context complete events?\n"); + ring->next_context_status_buffer = write_pointer % 6; + + I915_WRITE(RING_CONTEXT_STATUS_PTR(ring), + ((u32)ring->next_context_status_buffer & 0x07) << 8); +} + +static void execlists_free_request_task(struct work_struct *work) +{ + struct intel_ctx_submit_request *req = + container_of(work, struct intel_ctx_submit_request, work); + struct drm_device *dev = req->ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + + intel_runtime_pm_put(dev_priv); + + mutex_lock(&dev->struct_mutex); + i915_gem_context_unreference(req->ctx); + mutex_unlock(&dev->struct_mutex); + + kfree(req); +} + static int execlists_context_queue(struct intel_engine_cs *ring, struct intel_context *to, u32 tail) { struct intel_ctx_submit_request *req = NULL; + struct drm_i915_private *dev_priv = ring->dev->dev_private; unsigned long flags; bool was_empty; @@ -266,6 +383,9 @@ static int execlists_context_queue(struct intel_engine_cs *ring, i915_gem_context_reference(req->ctx); req->ring = ring; req->tail = tail; + INIT_WORK(&req->work, execlists_free_request_task); + + intel_runtime_pm_get(dev_priv); spin_lock_irqsave(&ring->execlist_lock, flags); @@ -907,6 +1027,7 @@ static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *rin INIT_LIST_HEAD(&ring->execlist_queue); spin_lock_init(&ring->execlist_lock); + ring->next_context_status_buffer = 0; ret = intel_lr_context_deferred_create(dctx, ring); if (ret) diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 3c389b3a2b75..a3f135cf439e 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -65,6 +65,9 @@ struct intel_ctx_submit_request { u32 tail; struct list_head execlist_link; + struct work_struct work; }; +void intel_execlists_handle_ctx_events(struct intel_engine_cs *ring); + #endif /* _INTEL_LRC_H_ */ diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 670262dabb6c..9cbf7b0ebc99 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -233,6 +233,7 @@ struct intel_engine_cs { /* Execlists */ spinlock_t execlist_lock; struct list_head execlist_queue; + u8 next_context_status_buffer; u32 irq_keep_mask; /* bitmask for interrupts that should not be masked */ int (*emit_request)(struct intel_ringbuffer *ringbuf); int (*emit_flush)(struct intel_ringbuffer *ringbuf, -- cgit v1.2.3 From e1fee72c2ea2e9c0c6e6743d32a6832f21337d6c Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:40 +0100 Subject: drm/i915/bdw: Avoid non-lite-restore preemptions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the current Execlists feeding mechanism, full preemption is not supported yet: only lite-restores are allowed (this is: the GPU simply samples a new tail pointer for the context currently in execution). But we have identified an scenario in which a full preemption occurs: 1) We submit two contexts for execution (A & B). 2) The GPU finishes with the first one (A), switches to the second one (B) and informs us. 3) We submit B again (hoping to cause a lite restore) together with C, but in the time we spend writing to the ELSP, the GPU finishes B. 4) The GPU start executing B again (since we told it so). 5) We receive a B finished interrupt and, mistakenly, we submit C (again) and D, causing a full preemption of B. The race is avoided by keeping track of how many times a context has been submitted to the hardware and by better discriminating the received context switch interrupts: in the example, when we have submitted B twice, we won´t submit C and D as soon as we receive the notification that B is completed because we were expecting to get a LITE_RESTORE and we didn´t, so we know a second completion will be received shortly. Without this explicit checking, somehow, the batch buffer execution order gets messed with. This can be verified with the IGT test I sent together with the series. I don´t know the exact mechanism by which the pre-emption messes with the execution order but, since other people is working on the Scheduler + Preemption on Execlists, I didn´t try to fix it. In these series, only Lite Restores are supported (other kind of preemptions WARN). v2: elsp_submitted belongs in the new intel_ctx_submit_request. Several rebase changes. v3: Clarify how the race is avoided, as requested by Daniel. Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau [danvet: Align function parameters ...] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_lrc.c | 29 +++++++++++++++++++++++++---- drivers/gpu/drm/i915/intel_lrc.h | 2 ++ 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 22f6a7c0cb18..0f1b6b2b0f0e 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -268,6 +268,7 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring) } else if (req0->ctx == cursor->ctx) { /* Same ctx: ignore first request, as second request * will update tail past first request's workload */ + cursor->elsp_submitted = req0->elsp_submitted; list_del(&req0->execlist_link); queue_work(dev_priv->wq, &req0->work); req0 = cursor; @@ -277,9 +278,15 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring) } } + WARN_ON(req1 && req1->elsp_submitted); + WARN_ON(execlists_submit_context(ring, req0->ctx, req0->tail, req1 ? req1->ctx : NULL, req1 ? req1->tail : 0)); + + req0->elsp_submitted++; + if (req1) + req1->elsp_submitted++; } static bool execlists_check_remove_request(struct intel_engine_cs *ring, @@ -298,9 +305,14 @@ static bool execlists_check_remove_request(struct intel_engine_cs *ring, struct drm_i915_gem_object *ctx_obj = head_req->ctx->engine[ring->id].state; if (intel_execlists_ctx_id(ctx_obj) == request_id) { - list_del(&head_req->execlist_link); - queue_work(dev_priv->wq, &head_req->work); - return true; + WARN(head_req->elsp_submitted == 0, + "Never submitted head request\n"); + + if (--head_req->elsp_submitted <= 0) { + list_del(&head_req->execlist_link); + queue_work(dev_priv->wq, &head_req->work); + return true; + } } } @@ -333,7 +345,16 @@ void intel_execlists_handle_ctx_events(struct intel_engine_cs *ring) status_id = I915_READ(RING_CONTEXT_STATUS_BUF(ring) + (read_pointer % 6) * 8 + 4); - if (status & GEN8_CTX_STATUS_COMPLETE) { + if (status & GEN8_CTX_STATUS_PREEMPTED) { + if (status & GEN8_CTX_STATUS_LITE_RESTORE) { + if (execlists_check_remove_request(ring, status_id)) + WARN(1, "Lite Restored request removed from queue\n"); + } else + WARN(1, "Preemption without Lite Restore\n"); + } + + if ((status & GEN8_CTX_STATUS_ACTIVE_IDLE) || + (status & GEN8_CTX_STATUS_ELEMENT_SWITCH)) { if (execlists_check_remove_request(ring, status_id)) submit_contexts++; } diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index a3f135cf439e..331c6c2ba376 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -66,6 +66,8 @@ struct intel_ctx_submit_request { struct list_head execlist_link; struct work_struct work; + + int elsp_submitted; }; void intel_execlists_handle_ctx_events(struct intel_engine_cs *ring); -- cgit v1.2.3 From f1ad5a1fd4127b3a5e21b8f5ef7f1921a5d3063e Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:41 +0100 Subject: drm/i915/bdw: Help out the ctx switch interrupt handler If we receive a storm of requests for the same context (see gem_storedw_loop_*) we might end up iterating over too many elements in interrupt time, looking for contexts to squash together. Instead, share the burden by giving more intelligence to the queue function. At most, the interrupt will iterate over three elements. Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau [danvet: Checkpatch.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_lrc.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 0f1b6b2b0f0e..6f6c5a931faf 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -392,10 +392,10 @@ static int execlists_context_queue(struct intel_engine_cs *ring, struct intel_context *to, u32 tail) { - struct intel_ctx_submit_request *req = NULL; + struct intel_ctx_submit_request *req = NULL, *cursor; struct drm_i915_private *dev_priv = ring->dev->dev_private; unsigned long flags; - bool was_empty; + int num_elements = 0; req = kzalloc(sizeof(*req), GFP_KERNEL); if (req == NULL) @@ -410,9 +410,27 @@ static int execlists_context_queue(struct intel_engine_cs *ring, spin_lock_irqsave(&ring->execlist_lock, flags); - was_empty = list_empty(&ring->execlist_queue); + list_for_each_entry(cursor, &ring->execlist_queue, execlist_link) + if (++num_elements > 2) + break; + + if (num_elements > 2) { + struct intel_ctx_submit_request *tail_req; + + tail_req = list_last_entry(&ring->execlist_queue, + struct intel_ctx_submit_request, + execlist_link); + + if (to == tail_req->ctx) { + WARN(tail_req->elsp_submitted != 0, + "More than 2 already-submitted reqs queued\n"); + list_del(&tail_req->execlist_link); + queue_work(dev_priv->wq, &tail_req->work); + } + } + list_add_tail(&req->execlist_link, &ring->execlist_queue); - if (was_empty) + if (num_elements == 0) execlists_context_unqueue(ring); spin_unlock_irqrestore(&ring->execlist_lock, flags); -- cgit v1.2.3 From 4ed91096881449012b14b1e879f40b4a37533e0e Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 8 Aug 2014 20:27:01 +0200 Subject: drm/i915: Track cursor changes as frontbuffer tracking flushes We treat other plane updates in the same fashion. Spotted because Rodrigo kept reporting a bug in the PSR code where the frontbuffer was eternally stuck with a dirty cursor bit set. The psr testcase should have caught this, but that i-g-t is kaputt. Rodrigo is signed up to fix that. Cc: Rodrigo Vivi Tested-by-and-Reviewed-by: Rodrigo Vivi Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index fd15601f6360..b2e4eac7b70b 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11782,6 +11782,10 @@ intel_cursor_plane_update(struct drm_plane *plane, struct drm_crtc *crtc, return intel_crtc_cursor_set_obj(crtc, obj, crtc_w, crtc_h); } else { intel_crtc_update_cursor(crtc, visible); + + intel_frontbuffer_flip(crtc->dev, + INTEL_FRONTBUFFER_CURSOR(intel_crtc->pipe)); + return 0; } } -- cgit v1.2.3 From b7c71823f11158340b9d61325d3c44124650dc4e Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Fri, 15 Aug 2014 12:01:31 +0100 Subject: drm/i915/bdw: Don't write PDP in the legacy way when using LRCs This is mostly for correctness so that we know we are running the LR context correctly (this is, the PDPs are contained inside the context object). v2: Move the check to inside the enable PPGTT function. The switch happens in two places: the legacy context switch (that we won't hit when Execlists are enabled) and the PPGTT enable, which unfortunately we need. This would look much nicer if the ppgtt->enable was part of the ring init, where it logically belongs. v3: Move the check to the start of the enable PPGTT function. None of the legacy PPGTT enabling is required when using LRCs as the PPGTT is enabled in the context descriptor and the PDPs are written in the LRC. v4: Clarify comment based on review feedback. Signed-off-by: Oscar Mateo Signed-off-by: Thomas Daniel Reviewed-by: Damien Lespiau [danvet: Resolve conflicts with ppgtt_enable rework.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_gtt.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index d97b280861ee..4db237065610 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -826,6 +826,12 @@ static void gen8_ppgtt_enable(struct drm_device *dev) struct intel_engine_cs *ring; int j; + /* In the case of execlists, PPGTT is enabled by the context descriptor + * and the PDPs are contained within the context itself. We don't + * need to do anything here. */ + if (i915.enable_execlists) + return; + for_each_ring(ring, dev_priv, j) { I915_WRITE(RING_MODE_GEN7(ring), _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); -- cgit v1.2.3 From cc9130be805d955f0e06642e57741dd9df1fbc86 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:42 +0100 Subject: drm/i915/bdw: Make sure gpu reset still works with Execlists If we reset a ring after a hang, we have to make sure that we clear out all queued Execlists requests. v2: The ring is, at this point, already being correctly re-programmed for Execlists, and the hangcheck counters cleared. v3: Daniel suggests to drop the "if (execlists)" because the Execlists queue should be empty in legacy mode (which is true, if we do the INIT_LIST_HEAD). v4: Do the pending intel_runtime_pm_put Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 12 ++++++++++++ drivers/gpu/drm/i915/intel_ringbuffer.c | 1 + 2 files changed, 13 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 32fa1e9eb844..aa4103bdd352 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2551,6 +2551,18 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv, i915_gem_free_request(request); } + while (!list_empty(&ring->execlist_queue)) { + struct intel_ctx_submit_request *submit_req; + + submit_req = list_first_entry(&ring->execlist_queue, + struct intel_ctx_submit_request, + execlist_link); + list_del(&submit_req->execlist_link); + intel_runtime_pm_put(dev_priv); + i915_gem_context_unreference(submit_req->ctx); + kfree(submit_req); + } + /* These may not have been flush before the reset, do so now */ kfree(ring->preallocated_lazy_request); ring->preallocated_lazy_request = NULL; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 13543f8528c2..4fb1ec95ec08 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1612,6 +1612,7 @@ static int intel_init_ring_buffer(struct drm_device *dev, ring->dev = dev; INIT_LIST_HEAD(&ring->active_list); INIT_LIST_HEAD(&ring->request_list); + INIT_LIST_HEAD(&ring->execlist_queue); ringbuf->size = 32 * PAGE_SIZE; ringbuf->ring = ring; memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno)); -- cgit v1.2.3 From 71386ef9008817feebd863e46d8711ebe9e7cbbb Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:44 +0100 Subject: drm/i915/bdw: Disable semaphores for Execlists Up until recently, semaphores weren't enabled in BDW so we didn't care about them. But then Rodrigo came and enabled them: commit 521e62e49a42661a4ee0102644517dbe2f100a23 Author: Rodrigo Vivi drm/i915: Enable semaphores on BDW So now we have to explicitly disable them for Execlists until both features play nicely. Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 2f112853c36f..117f5c16df74 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -481,6 +481,10 @@ bool i915_semaphore_is_enabled(struct drm_device *dev) if (i915.semaphores >= 0) return i915.semaphores; + /* TODO: make semaphores and Execlists play nicely together */ + if (i915.enable_execlists) + return false; + /* Until we get further testing... */ if (IS_GEN8(dev)) return false; -- cgit v1.2.3 From 4ba70e448be91f52032595678c306e4aee2fae5c Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 7 Aug 2014 13:23:20 +0100 Subject: drm/i915/bdw: Display execlists info in debugfs v2: Warn and return if LRCs are not enabled. v3: Grab the Execlists spinlock (noticed by Daniel Vetter). Signed-off-by: Oscar Mateo v4: Lock the struct mutex for atomic state capture Signed-off-by: Thomas Daniel Reviewed-by: Damien Lespiau [danvet: Checkpatch.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 81 +++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_lrc.c | 6 --- drivers/gpu/drm/i915/intel_lrc.h | 7 ++++ 3 files changed, 88 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index d42db6bc34e0..68335813ef4c 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1721,6 +1721,86 @@ static int i915_context_status(struct seq_file *m, void *unused) return 0; } +static int i915_execlists(struct seq_file *m, void *data) +{ + struct drm_info_node *node = (struct drm_info_node *)m->private; + struct drm_device *dev = node->minor->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_engine_cs *ring; + u32 status_pointer; + u8 read_pointer; + u8 write_pointer; + u32 status; + u32 ctx_id; + struct list_head *cursor; + int ring_id, i; + int ret; + + if (!i915.enable_execlists) { + seq_puts(m, "Logical Ring Contexts are disabled\n"); + return 0; + } + + ret = mutex_lock_interruptible(&dev->struct_mutex); + if (ret) + return ret; + + for_each_ring(ring, dev_priv, ring_id) { + struct intel_ctx_submit_request *head_req = NULL; + int count = 0; + unsigned long flags; + + seq_printf(m, "%s\n", ring->name); + + status = I915_READ(RING_EXECLIST_STATUS(ring)); + ctx_id = I915_READ(RING_EXECLIST_STATUS(ring) + 4); + seq_printf(m, "\tExeclist status: 0x%08X, context: %u\n", + status, ctx_id); + + status_pointer = I915_READ(RING_CONTEXT_STATUS_PTR(ring)); + seq_printf(m, "\tStatus pointer: 0x%08X\n", status_pointer); + + read_pointer = ring->next_context_status_buffer; + write_pointer = status_pointer & 0x07; + if (read_pointer > write_pointer) + write_pointer += 6; + seq_printf(m, "\tRead pointer: 0x%08X, write pointer 0x%08X\n", + read_pointer, write_pointer); + + for (i = 0; i < 6; i++) { + status = I915_READ(RING_CONTEXT_STATUS_BUF(ring) + 8*i); + ctx_id = I915_READ(RING_CONTEXT_STATUS_BUF(ring) + 8*i + 4); + + seq_printf(m, "\tStatus buffer %d: 0x%08X, context: %u\n", + i, status, ctx_id); + } + + spin_lock_irqsave(&ring->execlist_lock, flags); + list_for_each(cursor, &ring->execlist_queue) + count++; + head_req = list_first_entry_or_null(&ring->execlist_queue, + struct intel_ctx_submit_request, execlist_link); + spin_unlock_irqrestore(&ring->execlist_lock, flags); + + seq_printf(m, "\t%d requests in queue\n", count); + if (head_req) { + struct drm_i915_gem_object *ctx_obj; + + ctx_obj = head_req->ctx->engine[ring_id].state; + seq_printf(m, "\tHead request id: %u\n", + intel_execlists_ctx_id(ctx_obj)); + seq_printf(m, "\tHead request tail: %u\n", + head_req->tail); + } + + seq_putc(m, '\n'); + } + + mutex_unlock(&dev->struct_mutex); + + return 0; +} + static int i915_gen6_forcewake_count_info(struct seq_file *m, void *data) { struct drm_info_node *node = m->private; @@ -3974,6 +4054,7 @@ static const struct drm_info_list i915_debugfs_list[] = { {"i915_opregion", i915_opregion, 0}, {"i915_gem_framebuffer", i915_gem_framebuffer_info, 0}, {"i915_context_status", i915_context_status, 0}, + {"i915_execlists", i915_execlists, 0}, {"i915_gen6_forcewake_count", i915_gen6_forcewake_count_info, 0}, {"i915_swizzle_info", i915_swizzle_info, 0}, {"i915_ppgtt_info", i915_ppgtt_info, 0}, diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 6f6c5a931faf..cc923a96fa4c 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -46,12 +46,6 @@ #define GEN8_LR_CONTEXT_ALIGN 4096 -#define RING_ELSP(ring) ((ring)->mmio_base+0x230) -#define RING_EXECLIST_STATUS(ring) ((ring)->mmio_base+0x234) -#define RING_CONTEXT_CONTROL(ring) ((ring)->mmio_base+0x244) -#define RING_CONTEXT_STATUS_BUF(ring) ((ring)->mmio_base+0x370) -#define RING_CONTEXT_STATUS_PTR(ring) ((ring)->mmio_base+0x3a0) - #define RING_EXECLIST_QFULL (1 << 0x2) #define RING_EXECLIST1_VALID (1 << 0x3) #define RING_EXECLIST0_VALID (1 << 0x4) diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 331c6c2ba376..117d1a4eb3b9 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -24,6 +24,13 @@ #ifndef _INTEL_LRC_H_ #define _INTEL_LRC_H_ +/* Execlists regs */ +#define RING_ELSP(ring) ((ring)->mmio_base+0x230) +#define RING_EXECLIST_STATUS(ring) ((ring)->mmio_base+0x234) +#define RING_CONTEXT_CONTROL(ring) ((ring)->mmio_base+0x244) +#define RING_CONTEXT_STATUS_BUF(ring) ((ring)->mmio_base+0x370) +#define RING_CONTEXT_STATUS_PTR(ring) ((ring)->mmio_base+0x3a0) + /* Logical Rings */ void intel_logical_ring_stop(struct intel_engine_cs *ring); void intel_logical_ring_cleanup(struct intel_engine_cs *ring); -- cgit v1.2.3 From c9fe99bd4c4f8730207fed5e863d8f25224fd20b Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:46 +0100 Subject: drm/i915/bdw: Display context backing obj & ringbuffer info in debugfs Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau [danvet: Checkpatch.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 37 +++++++++++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 68335813ef4c..4f279cfe67b8 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1677,6 +1677,14 @@ static int i915_gem_framebuffer_info(struct seq_file *m, void *data) return 0; } +static void describe_ctx_ringbuf(struct seq_file *m, + struct intel_ringbuffer *ringbuf) +{ + seq_printf(m, " (ringbuffer, space: %d, head: %u, tail: %u, last head: %d)", + ringbuf->space, ringbuf->head, ringbuf->tail, + ringbuf->last_retired_head); +} + static int i915_context_status(struct seq_file *m, void *unused) { struct drm_info_node *node = m->private; @@ -1703,16 +1711,37 @@ static int i915_context_status(struct seq_file *m, void *unused) } list_for_each_entry(ctx, &dev_priv->context_list, link) { - if (ctx->legacy_hw_ctx.rcs_state == NULL) + if (!i915.enable_execlists && + ctx->legacy_hw_ctx.rcs_state == NULL) continue; seq_puts(m, "HW context "); describe_ctx(m, ctx); - for_each_ring(ring, dev_priv, i) + for_each_ring(ring, dev_priv, i) { if (ring->default_context == ctx) - seq_printf(m, "(default context %s) ", ring->name); + seq_printf(m, "(default context %s) ", + ring->name); + } + + if (i915.enable_execlists) { + seq_putc(m, '\n'); + for_each_ring(ring, dev_priv, i) { + struct drm_i915_gem_object *ctx_obj = + ctx->engine[i].state; + struct intel_ringbuffer *ringbuf = + ctx->engine[i].ringbuf; + + seq_printf(m, "%s: ", ring->name); + if (ctx_obj) + describe_obj(m, ctx_obj); + if (ringbuf) + describe_ctx_ringbuf(m, ringbuf); + seq_putc(m, '\n'); + } + } else { + describe_obj(m, ctx->legacy_hw_ctx.rcs_state); + } - describe_obj(m, ctx->legacy_hw_ctx.rcs_state); seq_putc(m, '\n'); } -- cgit v1.2.3 From c0ab1ae9028f14bcb7bfb655bd2120c60681c479 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Thu, 7 Aug 2014 13:24:26 +0100 Subject: drm/i915/bdw: Print context state in debugfs This has turned out to be really handy in debug so far. Update: Since writing this patch, I've gotten similar code upstream for error state. I've used it quite a bit in debugfs however, and I'd like to keep it here at least until preemption is working. Signed-off-by: Ben Widawsky This patch was accidentally dropped in the first Execlists version, and it has been very useful indeed. Put it back again, but as a standalone debugfs file. Signed-off-by: Oscar Mateo v2: Take the device struct_mutex rather than mode_config mutex for atomic state capture. Signed-off-by: Thomas Daniel Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 52 +++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 4f279cfe67b8..6c82bdaa0822 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1750,6 +1750,57 @@ static int i915_context_status(struct seq_file *m, void *unused) return 0; } +static int i915_dump_lrc(struct seq_file *m, void *unused) +{ + struct drm_info_node *node = (struct drm_info_node *) m->private; + struct drm_device *dev = node->minor->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_engine_cs *ring; + struct intel_context *ctx; + int ret, i; + + if (!i915.enable_execlists) { + seq_printf(m, "Logical Ring Contexts are disabled\n"); + return 0; + } + + ret = mutex_lock_interruptible(&dev->struct_mutex); + if (ret) + return ret; + + list_for_each_entry(ctx, &dev_priv->context_list, link) { + for_each_ring(ring, dev_priv, i) { + struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state; + + if (ring->default_context == ctx) + continue; + + if (ctx_obj) { + struct page *page = i915_gem_object_get_page(ctx_obj, 1); + uint32_t *reg_state = kmap_atomic(page); + int j; + + seq_printf(m, "CONTEXT: %s %u\n", ring->name, + intel_execlists_ctx_id(ctx_obj)); + + for (j = 0; j < 0x600 / sizeof(u32) / 4; j += 4) { + seq_printf(m, "\t[0x%08lx] 0x%08x 0x%08x 0x%08x 0x%08x\n", + i915_gem_obj_ggtt_offset(ctx_obj) + 4096 + (j * 4), + reg_state[j], reg_state[j + 1], + reg_state[j + 2], reg_state[j + 3]); + } + kunmap_atomic(reg_state); + + seq_putc(m, '\n'); + } + } + } + + mutex_unlock(&dev->struct_mutex); + + return 0; +} + static int i915_execlists(struct seq_file *m, void *data) { struct drm_info_node *node = (struct drm_info_node *)m->private; @@ -4083,6 +4134,7 @@ static const struct drm_info_list i915_debugfs_list[] = { {"i915_opregion", i915_opregion, 0}, {"i915_gem_framebuffer", i915_gem_framebuffer_info, 0}, {"i915_context_status", i915_context_status, 0}, + {"i915_dump_lrc", i915_dump_lrc, 0}, {"i915_execlists", i915_execlists, 0}, {"i915_gen6_forcewake_count", i915_gen6_forcewake_count_info, 0}, {"i915_swizzle_info", i915_swizzle_info, 0}, -- cgit v1.2.3 From 73e4d07f8ae9cff8c869d73df4e299a3a6f5ad98 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:48 +0100 Subject: drm/i915/bdw: Document Logical Rings, LR contexts and Execlists Add theory of operation notes to intel_lrc.c and comments to externally visible functions. v2: Add notes on logical ring context creation. v3: Use kerneldoc. v4: Integrate it in the DocBook template. Signed-off-by: Thomas Daniel (v1) Signed-off-by: Oscar Mateo (v2, v3) Reviewed-by: Damien Lespiau [danvet: Drop hunk about render ring init function since that's not yet merged.] Signed-off-by: Daniel Vetter --- Documentation/DocBook/drm.tmpl | 5 + drivers/gpu/drm/i915/intel_lrc.c | 203 ++++++++++++++++++++++++++++++++++++++- drivers/gpu/drm/i915/intel_lrc.h | 30 ++++++ 3 files changed, 237 insertions(+), 1 deletion(-) diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl index 972759489376..689e3e38b9c3 100644 --- a/Documentation/DocBook/drm.tmpl +++ b/Documentation/DocBook/drm.tmpl @@ -3919,6 +3919,11 @@ int num_ioctls; !Pdrivers/gpu/drm/i915/i915_cmd_parser.c batch buffer command parser !Idrivers/gpu/drm/i915/i915_cmd_parser.c + + Logical Rings, Logical Ring Contexts and Execlists +!Pdrivers/gpu/drm/i915/intel_lrc.c Logical Rings, Logical Ring Contexts and Execlists +!Idrivers/gpu/drm/i915/intel_lrc.c + diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index cc923a96fa4c..c096b9b7f22a 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -28,13 +28,108 @@ * */ -/* +/** + * DOC: Logical Rings, Logical Ring Contexts and Execlists + * + * Motivation: * GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts". * These expanded contexts enable a number of new abilities, especially * "Execlists" (also implemented in this file). * + * One of the main differences with the legacy HW contexts is that logical + * ring contexts incorporate many more things to the context's state, like + * PDPs or ringbuffer control registers: + * + * The reason why PDPs are included in the context is straightforward: as + * PPGTTs (per-process GTTs) are actually per-context, having the PDPs + * contained there mean you don't need to do a ppgtt->switch_mm yourself, + * instead, the GPU will do it for you on the context switch. + * + * But, what about the ringbuffer control registers (head, tail, etc..)? + * shouldn't we just need a set of those per engine command streamer? This is + * where the name "Logical Rings" starts to make sense: by virtualizing the + * rings, the engine cs shifts to a new "ring buffer" with every context + * switch. When you want to submit a workload to the GPU you: A) choose your + * context, B) find its appropriate virtualized ring, C) write commands to it + * and then, finally, D) tell the GPU to switch to that context. + * + * Instead of the legacy MI_SET_CONTEXT, the way you tell the GPU to switch + * to a contexts is via a context execution list, ergo "Execlists". + * + * LRC implementation: + * Regarding the creation of contexts, we have: + * + * - One global default context. + * - One local default context for each opened fd. + * - One local extra context for each context create ioctl call. + * + * Now that ringbuffers belong per-context (and not per-engine, like before) + * and that contexts are uniquely tied to a given engine (and not reusable, + * like before) we need: + * + * - One ringbuffer per-engine inside each context. + * - One backing object per-engine inside each context. + * + * The global default context starts its life with these new objects fully + * allocated and populated. The local default context for each opened fd is + * more complex, because we don't know at creation time which engine is going + * to use them. To handle this, we have implemented a deferred creation of LR + * contexts: + * + * The local context starts its life as a hollow or blank holder, that only + * gets populated for a given engine once we receive an execbuffer. If later + * on we receive another execbuffer ioctl for the same context but a different + * engine, we allocate/populate a new ringbuffer and context backing object and + * so on. + * + * Finally, regarding local contexts created using the ioctl call: as they are + * only allowed with the render ring, we can allocate & populate them right + * away (no need to defer anything, at least for now). + * + * Execlists implementation: * Execlists are the new method by which, on gen8+ hardware, workloads are * submitted for execution (as opposed to the legacy, ringbuffer-based, method). + * This method works as follows: + * + * When a request is committed, its commands (the BB start and any leading or + * trailing commands, like the seqno breadcrumbs) are placed in the ringbuffer + * for the appropriate context. The tail pointer in the hardware context is not + * updated at this time, but instead, kept by the driver in the ringbuffer + * structure. A structure representing this request is added to a request queue + * for the appropriate engine: this structure contains a copy of the context's + * tail after the request was written to the ring buffer and a pointer to the + * context itself. + * + * If the engine's request queue was empty before the request was added, the + * queue is processed immediately. Otherwise the queue will be processed during + * a context switch interrupt. In any case, elements on the queue will get sent + * (in pairs) to the GPU's ExecLists Submit Port (ELSP, for short) with a + * globally unique 20-bits submission ID. + * + * When execution of a request completes, the GPU updates the context status + * buffer with a context complete event and generates a context switch interrupt. + * During the interrupt handling, the driver examines the events in the buffer: + * for each context complete event, if the announced ID matches that on the head + * of the request queue, then that request is retired and removed from the queue. + * + * After processing, if any requests were retired and the queue is not empty + * then a new execution list can be submitted. The two requests at the front of + * the queue are next to be submitted but since a context may not occur twice in + * an execution list, if subsequent requests have the same ID as the first then + * the two requests must be combined. This is done simply by discarding requests + * at the head of the queue until either only one requests is left (in which case + * we use a NULL second context) or the first two requests have unique IDs. + * + * By always executing the first two requests in the queue the driver ensures + * that the GPU is kept as busy as possible. In the case where a single context + * completes but a second context is still executing, the request for this second + * context will be at the head of the queue when we remove the first one. This + * request will then be resubmitted along with a new request for a different context, + * which will cause the hardware to continue executing the second request and queue + * the new request (the GPU detects the condition of a context getting preempted + * with the same context and optimizes the context switch flow by not doing + * preemption, but just sampling the new tail pointer). + * */ #include @@ -109,6 +204,17 @@ enum { }; #define GEN8_CTX_ID_SHIFT 32 +/** + * intel_sanitize_enable_execlists() - sanitize i915.enable_execlists + * @dev: DRM device. + * @enable_execlists: value of i915.enable_execlists module parameter. + * + * Only certain platforms support Execlists (the prerequisites being + * support for Logical Ring Contexts and Aliasing PPGTT or better), + * and only when enabled via module parameter. + * + * Return: 1 if Execlists is supported and has to be enabled. + */ int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists) { WARN_ON(i915.enable_ppgtt == -1); @@ -123,6 +229,18 @@ int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists return 0; } +/** + * intel_execlists_ctx_id() - get the Execlists Context ID + * @ctx_obj: Logical Ring Context backing object. + * + * Do not confuse with ctx->id! Unfortunately we have a name overload + * here: the old context ID we pass to userspace as a handler so that + * they can refer to a context, and the new context ID we pass to the + * ELSP so that the GPU can inform us of the context status via + * interrupts. + * + * Return: 20-bits globally unique context ID. + */ u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj) { u32 lrca = i915_gem_obj_ggtt_offset(ctx_obj); @@ -313,6 +431,13 @@ static bool execlists_check_remove_request(struct intel_engine_cs *ring, return false; } +/** + * intel_execlists_handle_ctx_events() - handle Context Switch interrupts + * @ring: Engine Command Streamer to handle. + * + * Check the unread Context Status Buffers and manage the submission of new + * contexts to the ELSP accordingly. + */ void intel_execlists_handle_ctx_events(struct intel_engine_cs *ring) { struct drm_i915_private *dev_priv = ring->dev->dev_private; @@ -481,6 +606,23 @@ static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf, return logical_ring_invalidate_all_caches(ringbuf); } +/** + * execlists_submission() - submit a batchbuffer for execution, Execlists style + * @dev: DRM device. + * @file: DRM file. + * @ring: Engine Command Streamer to submit to. + * @ctx: Context to employ for this submission. + * @args: execbuffer call arguments. + * @vmas: list of vmas. + * @batch_obj: the batchbuffer to submit. + * @exec_start: batchbuffer start virtual address pointer. + * @flags: translated execbuffer call flags. + * + * This is the evil twin version of i915_gem_ringbuffer_submission. It abstracts + * away the submission details of the execbuffer ioctl call. + * + * Return: non-zero if the submission fails. + */ int intel_execlists_submission(struct drm_device *dev, struct drm_file *file, struct intel_engine_cs *ring, struct intel_context *ctx, @@ -608,6 +750,15 @@ int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf) return 0; } +/** + * intel_logical_ring_advance_and_submit() - advance the tail and submit the workload + * @ringbuf: Logical Ringbuffer to advance. + * + * The tail is updated in our logical ringbuffer struct, not in the actual context. What + * really happens during submission is that the context and current tail will be placed + * on a queue waiting for the ELSP to be ready to accept a new context submission. At that + * point, the tail *inside* the context is updated and the ELSP written to. + */ void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf) { struct intel_engine_cs *ring = ringbuf->ring; @@ -781,6 +932,19 @@ static int logical_ring_prepare(struct intel_ringbuffer *ringbuf, int bytes) return 0; } +/** + * intel_logical_ring_begin() - prepare the logical ringbuffer to accept some commands + * + * @ringbuf: Logical ringbuffer. + * @num_dwords: number of DWORDs that we plan to write to the ringbuffer. + * + * The ringbuffer might not be ready to accept the commands right away (maybe it needs to + * be wrapped, or wait a bit for the tail to be updated). This function takes care of that + * and also preallocates a request (every workload submission is still mediated through + * requests, same as it did with legacy ringbuffer submission). + * + * Return: non-zero if the ringbuffer is not ready to be written to. + */ int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords) { struct intel_engine_cs *ring = ringbuf->ring; @@ -1021,6 +1185,12 @@ static int gen8_emit_request(struct intel_ringbuffer *ringbuf) return 0; } +/** + * intel_logical_ring_cleanup() - deallocate the Engine Command Streamer + * + * @ring: Engine Command Streamer. + * + */ void intel_logical_ring_cleanup(struct intel_engine_cs *ring) { struct drm_i915_private *dev_priv = ring->dev->dev_private; @@ -1215,6 +1385,16 @@ static int logical_vebox_ring_init(struct drm_device *dev) return logical_ring_init(dev, ring); } +/** + * intel_logical_rings_init() - allocate, populate and init the Engine Command Streamers + * @dev: DRM device. + * + * This function inits the engines for an Execlists submission style (the equivalent in the + * legacy ringbuffer submission world would be i915_gem_init_rings). It does it only for + * those engines that are present in the hardware. + * + * Return: non-zero if the initialization failed. + */ int intel_logical_rings_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -1377,6 +1557,14 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o return 0; } +/** + * intel_lr_context_free() - free the LRC specific bits of a context + * @ctx: the LR context to free. + * + * The real context freeing is done in i915_gem_context_free: this only + * takes care of the bits that are LRC related: the per-engine backing + * objects and the logical ringbuffer. + */ void intel_lr_context_free(struct intel_context *ctx) { int i; @@ -1415,6 +1603,19 @@ static uint32_t get_lr_context_size(struct intel_engine_cs *ring) return ret; } +/** + * intel_lr_context_deferred_create() - create the LRC specific bits of a context + * @ctx: LR context to create. + * @ring: engine to be used with the context. + * + * This function can be called more than once, with different engines, if we plan + * to use the context with them. The context backing objects and the ringbuffers + * (specially the ringbuffer backing objects) suck a lot of memory up, and that's why + * the creation is a deferred call: it's better to make sure first that we need to use + * a given ring with the context. + * + * Return: non-zero on eror. + */ int intel_lr_context_deferred_create(struct intel_context *ctx, struct intel_engine_cs *ring) { diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 117d1a4eb3b9..991d4499fb03 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -38,10 +38,21 @@ int intel_logical_rings_init(struct drm_device *dev); int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf); void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf); +/** + * intel_logical_ring_advance() - advance the ringbuffer tail + * @ringbuf: Ringbuffer to advance. + * + * The tail is only updated in our logical ringbuffer struct. + */ static inline void intel_logical_ring_advance(struct intel_ringbuffer *ringbuf) { ringbuf->tail &= ringbuf->size - 1; } +/** + * intel_logical_ring_emit() - write a DWORD to the ringbuffer. + * @ringbuf: Ringbuffer to write to. + * @data: DWORD to write. + */ static inline void intel_logical_ring_emit(struct intel_ringbuffer *ringbuf, u32 data) { @@ -66,6 +77,25 @@ int intel_execlists_submission(struct drm_device *dev, struct drm_file *file, u64 exec_start, u32 flags); u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj); +/** + * struct intel_ctx_submit_request - queued context submission request + * @ctx: Context to submit to the ELSP. + * @ring: Engine to submit it to. + * @tail: how far in the context's ringbuffer this request goes to. + * @execlist_link: link in the submission queue. + * @work: workqueue for processing this request in a bottom half. + * @elsp_submitted: no. of times this request has been sent to the ELSP. + * + * The ELSP only accepts two elements at a time, so we queue context/tail + * pairs on a given queue (ring->execlist_queue) until the hardware is + * available. The queue serves a double purpose: we also use it to keep track + * of the up to 2 contexts currently in the hardware (usually one in execution + * and the other queued up by the GPU): We only remove elements from the head + * of the queue when the hardware informs us that an element has been + * completed. + * + * All accesses to the queue are mediated by a spinlock (ring->execlist_lock). + */ struct intel_ctx_submit_request { struct intel_context *ctx; struct intel_engine_cs *ring; -- cgit v1.2.3 From d7f621e50704306c348ccb192f17047f1499f9bc Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:49 +0100 Subject: drm/i915/bdw: Enable Logical Ring Contexts (hence, Execlists) The time has come, the Walrus said, to talk of many things. Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ed52ac744105..d4d2abbb8e6c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2088,7 +2088,7 @@ struct drm_i915_cmd_table { #define I915_NEED_GFX_HWS(dev) (INTEL_INFO(dev)->need_gfx_hws) #define HAS_HW_CONTEXTS(dev) (INTEL_INFO(dev)->gen >= 6) -#define HAS_LOGICAL_RING_CONTEXTS(dev) 0 +#define HAS_LOGICAL_RING_CONTEXTS(dev) (INTEL_INFO(dev)->gen >= 8) #define HAS_ALIASING_PPGTT(dev) (INTEL_INFO(dev)->gen >= 6) #define HAS_PPGTT(dev) (INTEL_INFO(dev)->gen >= 7 && !IS_GEN8(dev)) #define USES_PPGTT(dev) (i915.enable_ppgtt) -- cgit v1.2.3 From fd639ac6dcbcbae4f2131bf1390a032df659ffb7 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Fri, 15 Aug 2014 16:48:36 +0100 Subject: drm/i915/bdw: Disable execlists by default We still have a few missing bits and pieces to have execlists enabled by default eg. the error capture or the render state initialization and so it wouldn't be wise to enable it by default on BDW just yet. Cc: Daniel Vetter Cc: Thomas Daniel Signed-off-by: Damien Lespiau Reviewed-by: Paulo Zanoni Tested-by: Paulo Zanoni Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=82740 Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_params.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index f7f8350c3793..1dcb1bed5ef8 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -35,7 +35,7 @@ struct i915_params i915 __read_mostly = { .vbt_sdvo_panel_type = -1, .enable_rc6 = -1, .enable_fbc = -1, - .enable_execlists = -1, + .enable_execlists = 0, .enable_hangcheck = true, .enable_ppgtt = -1, .enable_psr = 1, @@ -122,7 +122,7 @@ MODULE_PARM_DESC(enable_ppgtt, module_param_named(enable_execlists, i915.enable_execlists, int, 0400); MODULE_PARM_DESC(enable_execlists, "Override execlists usage. " - "(-1=auto [default], 0=disabled, 1=enabled)"); + "(-1=auto, 0=disabled [default], 1=enabled)"); module_param_named(enable_psr, i915.enable_psr, int, 0600); MODULE_PARM_DESC(enable_psr, "Enable PSR (default: true)"); -- cgit v1.2.3 From 3a5f87c286515c54ff5c52c3e64d0c522b7570c0 Mon Sep 17 00:00:00 2001 From: Thomas Wood Date: Wed, 20 Aug 2014 14:45:00 +0100 Subject: drm: fix plane rotation when restoring fbdev configuration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make sure plane rotation is reset correctly when restoring the fbdev configuration by using drm_mode_plane_set_obj_prop which calls the driver's set_property callback. The rotation reset feature was introduced in commit 9783de2 (drm: Resetting rotation property) and the callback issue was originally addressed in a previous version of the patch, but the fix was not present in the final version. v2: Fix documentation warning Add some more details to the commit message (Daniel Vetter) Testcase: igt/kms_rotation_crc Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=82236 Cc: Sonika Jindal Cc: Ville Syrjälä Cc: Dave Airlie Cc: Daniel Vetter Signed-off-by: Thomas Wood Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_crtc.c | 25 ++++++++++++++++++++----- drivers/gpu/drm/drm_fb_helper.c | 6 +++--- include/drm/drm_crtc.h | 3 +++ 3 files changed, 26 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index 3c4a62169f28..d7e4c0e2e796 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -4156,12 +4156,25 @@ static int drm_mode_crtc_set_obj_prop(struct drm_mode_object *obj, return ret; } -static int drm_mode_plane_set_obj_prop(struct drm_mode_object *obj, - struct drm_property *property, - uint64_t value) +/** + * drm_mode_plane_set_obj_prop - set the value of a property + * @plane: drm plane object to set property value for + * @property: property to set + * @value: value the property should be set to + * + * This functions sets a given property on a given plane object. This function + * calls the driver's ->set_property callback and changes the software state of + * the property if the callback succeeds. + * + * Returns: + * Zero on success, error code on failure. + */ +int drm_mode_plane_set_obj_prop(struct drm_plane *plane, + struct drm_property *property, + uint64_t value) { int ret = -EINVAL; - struct drm_plane *plane = obj_to_plane(obj); + struct drm_mode_object *obj = &plane->base; if (plane->funcs->set_property) ret = plane->funcs->set_property(plane, property, value); @@ -4170,6 +4183,7 @@ static int drm_mode_plane_set_obj_prop(struct drm_mode_object *obj, return ret; } +EXPORT_SYMBOL(drm_mode_plane_set_obj_prop); /** * drm_mode_getproperty_ioctl - get the current value of a object's property @@ -4308,7 +4322,8 @@ int drm_mode_obj_set_property_ioctl(struct drm_device *dev, void *data, ret = drm_mode_crtc_set_obj_prop(arg_obj, property, arg->value); break; case DRM_MODE_OBJECT_PLANE: - ret = drm_mode_plane_set_obj_prop(arg_obj, property, arg->value); + ret = drm_mode_plane_set_obj_prop(obj_to_plane(arg_obj), + property, arg->value); break; } diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index d139eddb3d61..99569ee5adee 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -350,9 +350,9 @@ static bool restore_fbdev_mode(struct drm_fb_helper *fb_helper) drm_plane_force_disable(plane); if (dev->mode_config.rotation_property) { - drm_object_property_set_value(&plane->base, - dev->mode_config.rotation_property, - BIT(DRM_ROTATE_0)); + drm_mode_plane_set_obj_prop(plane, + dev->mode_config.rotation_property, + BIT(DRM_ROTATE_0)); } } diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index 62f73bdbcc47..38fae5d9ad73 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -1121,6 +1121,9 @@ extern int drm_mode_obj_get_properties_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); extern int drm_mode_obj_set_property_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +extern int drm_mode_plane_set_obj_prop(struct drm_plane *plane, + struct drm_property *property, + uint64_t value); extern void drm_fb_get_bpp_depth(uint32_t format, unsigned int *depth, int *bpp); -- cgit v1.2.3 From c28135481428d0674fcc1da0740ed3f4343df5b2 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 22 Aug 2014 22:39:37 +0200 Subject: drm/i915: Update DRIVER_DATE to 20140822 Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d4d2abbb8e6c..d309725a7d7b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -54,7 +54,7 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20140808" +#define DRIVER_DATE "20140822" enum pipe { INVALID_PIPE = -1, -- cgit v1.2.3 From 604effb782a8a4d9a20c8af16bcbf86d742db119 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 26 Aug 2014 13:26:56 +0300 Subject: drm/i915: fix suspend/resume for GENs w/o runtime PM support Before sharing common parts between the system and runtime s/r handlers we WARNed if the runtime s/r handlers were called on GENs that didn't support RPM. But this WARN is not correct if the same handler is called from the system s/r path, since that can happen on any platform. This also broke system s/r on old platforms. The issue was introduced in commit 016970beb05da6285c2f3ed2bee1c676cb75972e Author: Sagar Kamble Date: Wed Aug 13 23:07:06 2014 +0530 v2: - remove the WARN and depend on the HAS_RUNTIME_PM check in rutime_suspend/resume instead (Daniel) Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=82751 Signed-off-by: Imre Deak Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.c | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 117f5c16df74..0f7a522682a5 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1373,7 +1373,9 @@ static int intel_runtime_suspend(struct device *device) if (WARN_ON_ONCE(!(dev_priv->rps.enabled && intel_enable_rc6(dev)))) return -ENODEV; - WARN_ON(!HAS_RUNTIME_PM(dev)); + if (WARN_ON_ONCE(!HAS_RUNTIME_PM(dev))) + return -ENODEV; + assert_force_wake_inactive(dev_priv); DRM_DEBUG_KMS("Suspending device\n"); @@ -1441,7 +1443,8 @@ static int intel_runtime_resume(struct device *device) struct drm_i915_private *dev_priv = dev->dev_private; int ret; - WARN_ON(!HAS_RUNTIME_PM(dev)); + if (WARN_ON_ONCE(!HAS_RUNTIME_PM(dev))) + return -ENODEV; DRM_DEBUG_KMS("Resuming device\n"); @@ -1476,16 +1479,12 @@ static int intel_suspend_complete(struct drm_i915_private *dev_priv) struct drm_device *dev = dev_priv->dev; int ret; - if (IS_GEN6(dev)) { - ret = 0; - } else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) { + if (IS_HASWELL(dev) || IS_BROADWELL(dev)) ret = hsw_suspend_complete(dev_priv); - } else if (IS_VALLEYVIEW(dev)) { + else if (IS_VALLEYVIEW(dev)) ret = vlv_suspend_complete(dev_priv); - } else { - ret = -ENODEV; - WARN_ON(1); - } + else + ret = 0; return ret; } @@ -1501,16 +1500,14 @@ static int intel_resume_prepare(struct drm_i915_private *dev_priv, struct drm_device *dev = dev_priv->dev; int ret; - if (IS_GEN6(dev)) { + if (IS_GEN6(dev)) ret = snb_resume_prepare(dev_priv, rpm_resume); - } else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) { + else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) ret = hsw_resume_prepare(dev_priv, rpm_resume); - } else if (IS_VALLEYVIEW(dev)) { + else if (IS_VALLEYVIEW(dev)) ret = vlv_resume_prepare(dev_priv, rpm_resume); - } else { - WARN_ON(1); - ret = -ENODEV; - } + else + ret = 0; return ret; } -- cgit v1.2.3