diff options
author | Linus Torvalds | 2019-08-03 03:53:51 +0200 |
---|---|---|
committer | Linus Torvalds | 2019-08-03 03:53:51 +0200 |
commit | 0e31225f99e077d0b8c7f8577aab39e766e2477b (patch) | |
tree | e756d3b365fb9fdb58ef51e30e80a88c7bdd93a6 /drivers/gpu | |
parent | Merge tag 'selinux-pr-20190801' of git://git.kernel.org/pub/scm/linux/kernel/... (diff) | |
parent | Merge tag 'exynos-drm-fixes-for-v5.3-rc3' of git://git.kernel.org/pub/scm/lin... (diff) | |
download | kernel-qcow2-linux-0e31225f99e077d0b8c7f8577aab39e766e2477b.tar.gz kernel-qcow2-linux-0e31225f99e077d0b8c7f8577aab39e766e2477b.tar.xz kernel-qcow2-linux-0e31225f99e077d0b8c7f8577aab39e766e2477b.zip |
Merge tag 'drm-fixes-2019-08-02-1' of git://anongit.freedesktop.org/drm/drm
Pull more drm fixes from Daniel Vetter:
"Dave sends his pull, everyone realizes they've been asleep at the
wheel and hits send on their own pulls :-/
Normally I'd just ignore these all because w/e for me and Dave. But
this time around the latecomers also included drm-intel-fixes, which
failed to send out a -fixes pull thus far for this release (screwed up
vacation coverage, despite that 2/3 maintainers were around ... they
all look appropriately guilty), and that really is overdue to get
landed.
And since I had to do a pull request anyway I pulled the other two
late ones too.
intel fixes (didn't have any ever since the main merge window pull):
- gvt fixes (2 cc: stable)
- fix gpu reset vs mm-shrinker vs wakeup fun (needed a few patches)
- two gem locking fixes (one cc: stable)
- pile of misc fixes all over with minor impact, 6 cc: stable, others
from this window
exynos:
- misc minor fixes
misc:
- some build/Kconfig fixes
- regression fix for vm scalability perf test which seems to mostly
exercise dmesg/console logging ...
- the vgem cache flush fix for arm64 broke the world on x86, so
that's reverted again
* tag 'drm-fixes-2019-08-02-1' of git://anongit.freedesktop.org/drm/drm: (42 commits)
Revert "drm/vgem: fix cache synchronization on arm/arm64"
drm/exynos: fix missing decrement of retry counter
drm/exynos: add CONFIG_MMU dependency
drm/exynos: remove redundant assignment to pointer 'node'
drm/exynos: using dev_get_drvdata directly
drm/bochs: Use shadow buffer for bochs framebuffer console
drm/fb-helper: Instanciate shadow FB if configured in device's mode_config
drm/fb-helper: Map DRM client buffer only when required
drm/client: Support unmapping of DRM client buffers
drm/i915: Only recover active engines
drm/i915: Add a wakeref getter for iff the wakeref is already active
drm/i915: Lift intel_engines_resume() to callers
drm/vgem: fix cache synchronization on arm/arm64
drm/i810: Use CONFIG_PREEMPTION
drm/bridge: tc358764: Fix build error
drm/bridge: lvds-encoder: Fix build error while CONFIG_DRM_KMS_HELPER=m
drm/i915/gvt: Adding ppgtt to GVT GEM context after shadow pdps settled.
drm/i915/gvt: grab runtime pm first for forcewake use
drm/i915/gvt: fix incorrect cache entry for guest page mapping
drm/i915/gvt: Checking workload's gma earlier
...
Diffstat (limited to 'drivers/gpu')
46 files changed, 517 insertions, 256 deletions
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 1d80222587ad..3c88420e3497 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -394,7 +394,7 @@ config DRM_R128 config DRM_I810 tristate "Intel I810" # !PREEMPT because of missing ioctl locking - depends on DRM && AGP && AGP_INTEL && (!PREEMPT || BROKEN) + depends on DRM && AGP && AGP_INTEL && (!PREEMPTION || BROKEN) help Choose this option if you have an Intel I810 graphics card. If M is selected, the module will be called i810. AGP support is required diff --git a/drivers/gpu/drm/bochs/bochs_kms.c b/drivers/gpu/drm/bochs/bochs_kms.c index bc19dbd531ef..359030d5d818 100644 --- a/drivers/gpu/drm/bochs/bochs_kms.c +++ b/drivers/gpu/drm/bochs/bochs_kms.c @@ -191,6 +191,7 @@ int bochs_kms_init(struct bochs_device *bochs) bochs->dev->mode_config.fb_base = bochs->fb_base; bochs->dev->mode_config.preferred_depth = 24; bochs->dev->mode_config.prefer_shadow = 0; + bochs->dev->mode_config.prefer_shadow_fbdev = 1; bochs->dev->mode_config.quirk_addfb_prefer_host_byte_order = true; bochs->dev->mode_config.funcs = &bochs_mode_funcs; diff --git a/drivers/gpu/drm/bridge/Kconfig b/drivers/gpu/drm/bridge/Kconfig index ee777469293a..e4e22bbae2a7 100644 --- a/drivers/gpu/drm/bridge/Kconfig +++ b/drivers/gpu/drm/bridge/Kconfig @@ -48,6 +48,7 @@ config DRM_DUMB_VGA_DAC config DRM_LVDS_ENCODER tristate "Transparent parallel to LVDS encoder support" depends on OF + select DRM_KMS_HELPER select DRM_PANEL_BRIDGE help Support for transparent parallel to LVDS encoders that don't require @@ -116,9 +117,10 @@ config DRM_THINE_THC63LVD1024 config DRM_TOSHIBA_TC358764 tristate "TC358764 DSI/LVDS bridge" - depends on DRM && DRM_PANEL depends on OF select DRM_MIPI_DSI + select DRM_KMS_HELPER + select DRM_PANEL help Toshiba TC358764 DSI/LVDS bridge driver. diff --git a/drivers/gpu/drm/drm_client.c b/drivers/gpu/drm/drm_client.c index 410572f14257..e1dafb0cc5e2 100644 --- a/drivers/gpu/drm/drm_client.c +++ b/drivers/gpu/drm/drm_client.c @@ -254,7 +254,6 @@ drm_client_buffer_create(struct drm_client_dev *client, u32 width, u32 height, u struct drm_device *dev = client->dev; struct drm_client_buffer *buffer; struct drm_gem_object *obj; - void *vaddr; int ret; buffer = kzalloc(sizeof(*buffer), GFP_KERNEL); @@ -281,6 +280,36 @@ drm_client_buffer_create(struct drm_client_dev *client, u32 width, u32 height, u buffer->gem = obj; + return buffer; + +err_delete: + drm_client_buffer_delete(buffer); + + return ERR_PTR(ret); +} + +/** + * drm_client_buffer_vmap - Map DRM client buffer into address space + * @buffer: DRM client buffer + * + * This function maps a client buffer into kernel address space. If the + * buffer is already mapped, it returns the mapping's address. + * + * Client buffer mappings are not ref'counted. Each call to + * drm_client_buffer_vmap() should be followed by a call to + * drm_client_buffer_vunmap(); or the client buffer should be mapped + * throughout its lifetime. + * + * Returns: + * The mapped memory's address + */ +void *drm_client_buffer_vmap(struct drm_client_buffer *buffer) +{ + void *vaddr; + + if (buffer->vaddr) + return buffer->vaddr; + /* * FIXME: The dependency on GEM here isn't required, we could * convert the driver handle to a dma-buf instead and use the @@ -289,21 +318,30 @@ drm_client_buffer_create(struct drm_client_dev *client, u32 width, u32 height, u * fd_install step out of the driver backend hooks, to make that * final step optional for internal users. */ - vaddr = drm_gem_vmap(obj); - if (IS_ERR(vaddr)) { - ret = PTR_ERR(vaddr); - goto err_delete; - } + vaddr = drm_gem_vmap(buffer->gem); + if (IS_ERR(vaddr)) + return vaddr; buffer->vaddr = vaddr; - return buffer; - -err_delete: - drm_client_buffer_delete(buffer); + return vaddr; +} +EXPORT_SYMBOL(drm_client_buffer_vmap); - return ERR_PTR(ret); +/** + * drm_client_buffer_vunmap - Unmap DRM client buffer + * @buffer: DRM client buffer + * + * This function removes a client buffer's memory mapping. Calling this + * function is only required by clients that manage their buffer mappings + * by themselves. + */ +void drm_client_buffer_vunmap(struct drm_client_buffer *buffer) +{ + drm_gem_vunmap(buffer->gem, buffer->vaddr); + buffer->vaddr = NULL; } +EXPORT_SYMBOL(drm_client_buffer_vunmap); static void drm_client_buffer_rmfb(struct drm_client_buffer *buffer) { diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 1984e5c54d58..a7ba5b4902d6 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -403,6 +403,7 @@ static void drm_fb_helper_dirty_work(struct work_struct *work) struct drm_clip_rect *clip = &helper->dirty_clip; struct drm_clip_rect clip_copy; unsigned long flags; + void *vaddr; spin_lock_irqsave(&helper->dirty_lock, flags); clip_copy = *clip; @@ -412,10 +413,20 @@ static void drm_fb_helper_dirty_work(struct work_struct *work) /* call dirty callback only when it has been really touched */ if (clip_copy.x1 < clip_copy.x2 && clip_copy.y1 < clip_copy.y2) { + /* Generic fbdev uses a shadow buffer */ - if (helper->buffer) + if (helper->buffer) { + vaddr = drm_client_buffer_vmap(helper->buffer); + if (IS_ERR(vaddr)) + return; drm_fb_helper_dirty_blit_real(helper, &clip_copy); - helper->fb->funcs->dirty(helper->fb, NULL, 0, 0, &clip_copy, 1); + } + if (helper->fb->funcs->dirty) + helper->fb->funcs->dirty(helper->fb, NULL, 0, 0, + &clip_copy, 1); + + if (helper->buffer) + drm_client_buffer_vunmap(helper->buffer); } } @@ -604,6 +615,16 @@ void drm_fb_helper_unlink_fbi(struct drm_fb_helper *fb_helper) } EXPORT_SYMBOL(drm_fb_helper_unlink_fbi); +static bool drm_fbdev_use_shadow_fb(struct drm_fb_helper *fb_helper) +{ + struct drm_device *dev = fb_helper->dev; + struct drm_framebuffer *fb = fb_helper->fb; + + return dev->mode_config.prefer_shadow_fbdev || + dev->mode_config.prefer_shadow || + fb->funcs->dirty; +} + static void drm_fb_helper_dirty(struct fb_info *info, u32 x, u32 y, u32 width, u32 height) { @@ -611,7 +632,7 @@ static void drm_fb_helper_dirty(struct fb_info *info, u32 x, u32 y, struct drm_clip_rect *clip = &helper->dirty_clip; unsigned long flags; - if (!helper->fb->funcs->dirty) + if (!drm_fbdev_use_shadow_fb(helper)) return; spin_lock_irqsave(&helper->dirty_lock, flags); @@ -2178,6 +2199,7 @@ int drm_fb_helper_generic_probe(struct drm_fb_helper *fb_helper, struct drm_framebuffer *fb; struct fb_info *fbi; u32 format; + void *vaddr; DRM_DEBUG_KMS("surface width(%d), height(%d) and bpp(%d)\n", sizes->surface_width, sizes->surface_height, @@ -2200,16 +2222,10 @@ int drm_fb_helper_generic_probe(struct drm_fb_helper *fb_helper, fbi->fbops = &drm_fbdev_fb_ops; fbi->screen_size = fb->height * fb->pitches[0]; fbi->fix.smem_len = fbi->screen_size; - fbi->screen_buffer = buffer->vaddr; - /* Shamelessly leak the physical address to user-space */ -#if IS_ENABLED(CONFIG_DRM_FBDEV_LEAK_PHYS_SMEM) - if (drm_leak_fbdev_smem && fbi->fix.smem_start == 0) - fbi->fix.smem_start = - page_to_phys(virt_to_page(fbi->screen_buffer)); -#endif + drm_fb_helper_fill_info(fbi, fb_helper, sizes); - if (fb->funcs->dirty) { + if (drm_fbdev_use_shadow_fb(fb_helper)) { struct fb_ops *fbops; void *shadow; @@ -2231,6 +2247,19 @@ int drm_fb_helper_generic_probe(struct drm_fb_helper *fb_helper, fbi->fbdefio = &drm_fbdev_defio; fb_deferred_io_init(fbi); + } else { + /* buffer is mapped for HW framebuffer */ + vaddr = drm_client_buffer_vmap(fb_helper->buffer); + if (IS_ERR(vaddr)) + return PTR_ERR(vaddr); + + fbi->screen_buffer = vaddr; + /* Shamelessly leak the physical address to user-space */ +#if IS_ENABLED(CONFIG_DRM_FBDEV_LEAK_PHYS_SMEM) + if (drm_leak_fbdev_smem && fbi->fix.smem_start == 0) + fbi->fix.smem_start = + page_to_phys(virt_to_page(fbi->screen_buffer)); +#endif } return 0; diff --git a/drivers/gpu/drm/exynos/Kconfig b/drivers/gpu/drm/exynos/Kconfig index 60ce4a8ad9e1..6f7d3b3b3628 100644 --- a/drivers/gpu/drm/exynos/Kconfig +++ b/drivers/gpu/drm/exynos/Kconfig @@ -2,6 +2,7 @@ config DRM_EXYNOS tristate "DRM Support for Samsung SoC EXYNOS Series" depends on OF && DRM && (ARCH_S3C64XX || ARCH_S5PV210 || ARCH_EXYNOS || ARCH_MULTIPLATFORM || COMPILE_TEST) + depends on MMU select DRM_KMS_HELPER select VIDEOMODE_HELPERS select SND_SOC_HDMI_CODEC if SND_SOC diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimc.c b/drivers/gpu/drm/exynos/exynos_drm_fimc.c index a594ab7be2c0..164d914cbe9a 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimc.c @@ -44,7 +44,7 @@ static unsigned int fimc_mask = 0xc; module_param_named(fimc_devs, fimc_mask, uint, 0644); MODULE_PARM_DESC(fimc_devs, "Alias mask for assigning FIMC devices to Exynos DRM"); -#define get_fimc_context(dev) platform_get_drvdata(to_platform_device(dev)) +#define get_fimc_context(dev) dev_get_drvdata(dev) enum { FIMC_CLK_LCLK, diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c index 50904eee96f7..2a3382d43bc9 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c +++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c @@ -267,7 +267,7 @@ static inline void g2d_hw_reset(struct g2d_data *g2d) static int g2d_init_cmdlist(struct g2d_data *g2d) { struct device *dev = g2d->dev; - struct g2d_cmdlist_node *node = g2d->cmdlist_node; + struct g2d_cmdlist_node *node; int nr; int ret; struct g2d_buf_info *buf_info; diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c index 1e4b21c49a06..1c524db9570f 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c @@ -58,7 +58,7 @@ #define GSC_COEF_DEPTH 3 #define GSC_AUTOSUSPEND_DELAY 2000 -#define get_gsc_context(dev) platform_get_drvdata(to_platform_device(dev)) +#define get_gsc_context(dev) dev_get_drvdata(dev) #define gsc_read(offset) readl(ctx->regs + (offset)) #define gsc_write(cfg, offset) writel(cfg, ctx->regs + (offset)) diff --git a/drivers/gpu/drm/exynos/exynos_drm_scaler.c b/drivers/gpu/drm/exynos/exynos_drm_scaler.c index 9af096479e1c..b24ba948b725 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_scaler.c +++ b/drivers/gpu/drm/exynos/exynos_drm_scaler.c @@ -94,12 +94,12 @@ static inline int scaler_reset(struct scaler_context *scaler) scaler_write(SCALER_CFG_SOFT_RESET, SCALER_CFG); do { cpu_relax(); - } while (retry > 1 && + } while (--retry > 1 && scaler_read(SCALER_CFG) & SCALER_CFG_SOFT_RESET); do { cpu_relax(); scaler_write(1, SCALER_INT_EN); - } while (retry > 0 && scaler_read(SCALER_INT_EN) != 1); + } while (--retry > 0 && scaler_read(SCALER_INT_EN) != 1); return retry ? 0 : -EIO; } diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index c4710889cb32..3ef4e9f573cf 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -765,7 +765,7 @@ parse_psr(struct drm_i915_private *dev_priv, const struct bdb_header *bdb) } if (bdb->version >= 226) { - u32 wakeup_time = psr_table->psr2_tp2_tp3_wakeup_time; + u32 wakeup_time = psr->psr2_tp2_tp3_wakeup_time; wakeup_time = (wakeup_time >> (2 * panel_type)) & 0x3; switch (wakeup_time) { diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c index 753ac3165061..7b908e10d32e 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.c +++ b/drivers/gpu/drm/i915/display/intel_bw.c @@ -178,6 +178,8 @@ static int icl_get_bw_info(struct drm_i915_private *dev_priv) clpchgroup = (sa->deburst * deinterleave / num_channels) << i; bi->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1; + bi->num_qgv_points = qi.num_points; + for (j = 0; j < qi.num_points; j++) { const struct intel_qgv_point *sp = &qi.points[j]; int ct, bw; @@ -195,7 +197,7 @@ static int icl_get_bw_info(struct drm_i915_private *dev_priv) bi->deratedbw[j] = min(maxdebw, bw * 9 / 10); /* 90% */ - DRM_DEBUG_KMS("BW%d / QGV %d: num_planes=%d deratedbw=%d\n", + DRM_DEBUG_KMS("BW%d / QGV %d: num_planes=%d deratedbw=%u\n", i, j, bi->num_planes, bi->deratedbw[j]); } @@ -211,14 +213,17 @@ static unsigned int icl_max_bw(struct drm_i915_private *dev_priv, { int i; - /* Did we initialize the bw limits successfully? */ - if (dev_priv->max_bw[0].num_planes == 0) - return UINT_MAX; - for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) { const struct intel_bw_info *bi = &dev_priv->max_bw[i]; + /* + * Pcode will not expose all QGV points when + * SAGV is forced to off/min/med/max. + */ + if (qgv_point >= bi->num_qgv_points) + return UINT_MAX; + if (num_planes >= bi->num_planes) return bi->deratedbw[qgv_point]; } diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 8993ab283562..0d19bbd08122 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -2240,6 +2240,17 @@ int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state) min_cdclk = max(2 * 96000, min_cdclk); /* + * "For DP audio configuration, cdclk frequency shall be set to + * meet the following requirements: + * DP Link Frequency(MHz) | Cdclk frequency(MHz) + * 270 | 320 or higher + * 162 | 200 or higher" + */ + if ((IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) && + intel_crtc_has_dp_encoder(crtc_state) && crtc_state->has_audio) + min_cdclk = max(crtc_state->port_clock, min_cdclk); + + /* * On Valleyview some DSI panels lose (v|h)sync when the clock is lower * than 320000KHz. */ diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 30b97ded6fdd..592b92782fab 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -1839,7 +1839,7 @@ static void intel_enable_pipe(const struct intel_crtc_state *new_crtc_state) /* FIXME: assert CPU port conditions for SNB+ */ } - trace_intel_pipe_enable(dev_priv, pipe); + trace_intel_pipe_enable(crtc); reg = PIPECONF(cpu_transcoder); val = I915_READ(reg); @@ -1880,7 +1880,7 @@ static void intel_disable_pipe(const struct intel_crtc_state *old_crtc_state) */ assert_planes_disabled(crtc); - trace_intel_pipe_disable(dev_priv, pipe); + trace_intel_pipe_disable(crtc); reg = PIPECONF(cpu_transcoder); val = I915_READ(reg); diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index c93ad512014c..2d1939db108f 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -438,16 +438,23 @@ icl_combo_phy_aux_power_well_disable(struct drm_i915_private *dev_priv, #define ICL_AUX_PW_TO_CH(pw_idx) \ ((pw_idx) - ICL_PW_CTL_IDX_AUX_A + AUX_CH_A) +#define ICL_TBT_AUX_PW_TO_CH(pw_idx) \ + ((pw_idx) - ICL_PW_CTL_IDX_AUX_TBT1 + AUX_CH_C) + static void icl_tc_phy_aux_power_well_enable(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { - enum aux_ch aux_ch = ICL_AUX_PW_TO_CH(power_well->desc->hsw.idx); + int pw_idx = power_well->desc->hsw.idx; + bool is_tbt = power_well->desc->hsw.is_tc_tbt; + enum aux_ch aux_ch; u32 val; + aux_ch = is_tbt ? ICL_TBT_AUX_PW_TO_CH(pw_idx) : + ICL_AUX_PW_TO_CH(pw_idx); val = I915_READ(DP_AUX_CH_CTL(aux_ch)); val &= ~DP_AUX_CH_CTL_TBT_IO; - if (power_well->desc->hsw.is_tc_tbt) + if (is_tbt) val |= DP_AUX_CH_CTL_TBT_IO; I915_WRITE(DP_AUX_CH_CTL(aux_ch), val); diff --git a/drivers/gpu/drm/i915/display/intel_vbt_defs.h b/drivers/gpu/drm/i915/display/intel_vbt_defs.h index 2f4894e9a03d..5ddbe71ab423 100644 --- a/drivers/gpu/drm/i915/display/intel_vbt_defs.h +++ b/drivers/gpu/drm/i915/display/intel_vbt_defs.h @@ -478,13 +478,13 @@ struct psr_table { /* TP wake up time in multiple of 100 */ u16 tp1_wakeup_time; u16 tp2_tp3_wakeup_time; - - /* PSR2 TP2/TP3 wakeup time for 16 panels */ - u32 psr2_tp2_tp3_wakeup_time; } __packed; struct bdb_psr { struct psr_table psr_table[16]; + + /* PSR2 TP2/TP3 wakeup time for 16 panels */ + u32 psr2_tp2_tp3_wakeup_time; } __packed; /* diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index 05011d4a3b88..914b5d4112bb 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -253,14 +253,15 @@ void i915_gem_resume(struct drm_i915_private *i915) i915_gem_restore_gtt_mappings(i915); i915_gem_restore_fences(i915); + if (i915_gem_init_hw(i915)) + goto err_wedged; + /* * As we didn't flush the kernel context before suspend, we cannot * guarantee that the context image is complete. So let's just reset * it and start again. */ - intel_gt_resume(i915); - - if (i915_gem_init_hw(i915)) + if (intel_gt_resume(i915)) goto err_wedged; intel_uc_resume(i915); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 528b61678334..2caa594322bc 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -664,7 +664,15 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj, for_each_sgt_page(page, sgt_iter, pages) { if (obj->mm.dirty) - set_page_dirty(page); + /* + * As this may not be anonymous memory (e.g. shmem) + * but exist on a real mapping, we have to lock + * the page in order to dirty it -- holding + * the page reference is not sufficient to + * prevent the inode from being truncated. + * Play safe and take the lock. + */ + set_page_dirty_lock(page); mark_page_accessed(page); put_page(page); diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 2c454f227c2e..23120901c55f 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -126,6 +126,7 @@ static void intel_context_retire(struct i915_active *active) if (ce->state) __context_unpin_state(ce->state); + intel_ring_unpin(ce->ring); intel_context_put(ce); } @@ -160,27 +161,35 @@ int intel_context_active_acquire(struct intel_context *ce, unsigned long flags) intel_context_get(ce); + err = intel_ring_pin(ce->ring); + if (err) + goto err_put; + if (!ce->state) return 0; err = __context_pin_state(ce->state, flags); - if (err) { - i915_active_cancel(&ce->active); - intel_context_put(ce); - return err; - } + if (err) + goto err_ring; /* Preallocate tracking nodes */ if (!i915_gem_context_is_kernel(ce->gem_context)) { err = i915_active_acquire_preallocate_barrier(&ce->active, ce->engine); - if (err) { - i915_active_release(&ce->active); - return err; - } + if (err) + goto err_state; } return 0; + +err_state: + __context_unpin_state(ce->state); +err_ring: + intel_ring_unpin(ce->ring); +err_put: + intel_context_put(ce); + i915_active_cancel(&ce->active); + return err; } void intel_context_active_release(struct intel_context *ce) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 7fd33e81c2d9..f25632c9b292 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -969,9 +969,14 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type) u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv) { const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu; + unsigned int slice = fls(sseu->slice_mask) - 1; + unsigned int subslice; u32 mcr_s_ss_select; - u32 slice = fls(sseu->slice_mask); - u32 subslice = fls(sseu->subslice_mask[slice]); + + GEM_BUG_ON(slice >= ARRAY_SIZE(sseu->subslice_mask)); + subslice = fls(sseu->subslice_mask[slice]); + GEM_BUG_ON(!subslice); + subslice--; if (IS_GEN(dev_priv, 10)) mcr_s_ss_select = GEN8_MCR_SLICE(slice) | @@ -1471,6 +1476,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, struct i915_gpu_error * const error = &engine->i915->gpu_error; struct i915_request *rq; intel_wakeref_t wakeref; + unsigned long flags; if (header) { va_list ap; @@ -1490,10 +1496,9 @@ void intel_engine_dump(struct intel_engine_cs *engine, i915_reset_engine_count(error, engine), i915_reset_count(error)); - rcu_read_lock(); - drm_printf(m, "\tRequests:\n"); + spin_lock_irqsave(&engine->active.lock, flags); rq = intel_engine_find_active_request(engine); if (rq) { print_request(m, rq, "\t\tactive "); @@ -1513,8 +1518,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, print_request_ring(m, rq); } - - rcu_read_unlock(); + spin_unlock_irqrestore(&engine->active.lock, flags); wakeref = intel_runtime_pm_get_if_in_use(&engine->i915->runtime_pm); if (wakeref) { @@ -1672,7 +1676,6 @@ struct i915_request * intel_engine_find_active_request(struct intel_engine_cs *engine) { struct i915_request *request, *active = NULL; - unsigned long flags; /* * We are called by the error capture, reset and to dump engine @@ -1685,7 +1688,7 @@ intel_engine_find_active_request(struct intel_engine_cs *engine) * At all other times, we must assume the GPU is still running, but * we only care about the snapshot of this moment. */ - spin_lock_irqsave(&engine->active.lock, flags); + lockdep_assert_held(&engine->active.lock); list_for_each_entry(request, &engine->active.requests, sched.link) { if (i915_request_completed(request)) continue; @@ -1700,7 +1703,6 @@ intel_engine_find_active_request(struct intel_engine_cs *engine) active = request; break; } - spin_unlock_irqrestore(&engine->active.lock, flags); return active; } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 2ce00d3dc42a..ae5b6baf6dff 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -142,27 +142,3 @@ void intel_engine_init__pm(struct intel_engine_cs *engine) { intel_wakeref_init(&engine->wakeref); } - -int intel_engines_resume(struct drm_i915_private *i915) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - int err = 0; - - intel_gt_pm_get(i915); - for_each_engine(engine, i915, id) { - intel_engine_pm_get(engine); - engine->serial++; /* kernel context lost */ - err = engine->resume(engine); - intel_engine_pm_put(engine); - if (err) { - dev_err(i915->drm.dev, - "Failed to restart %s (%d)\n", - engine->name, err); - break; - } - } - intel_gt_pm_put(i915); - - return err; -} diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h index b326cd993d60..a11c893f64c6 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h @@ -7,16 +7,22 @@ #ifndef INTEL_ENGINE_PM_H #define INTEL_ENGINE_PM_H +#include "intel_engine_types.h" +#include "intel_wakeref.h" + struct drm_i915_private; -struct intel_engine_cs; void intel_engine_pm_get(struct intel_engine_cs *engine); void intel_engine_pm_put(struct intel_engine_cs *engine); +static inline bool +intel_engine_pm_get_if_awake(struct intel_engine_cs *engine) +{ + return intel_wakeref_get_if_active(&engine->wakeref); +} + void intel_engine_park(struct intel_engine_cs *engine); void intel_engine_init__pm(struct intel_engine_cs *engine); -int intel_engines_resume(struct drm_i915_private *i915); - #endif /* INTEL_ENGINE_PM_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 868b220214f8..43e975a26016 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -70,6 +70,18 @@ struct intel_ring { struct list_head request_list; struct list_head active_link; + /* + * As we have two types of rings, one global to the engine used + * by ringbuffer submission and those that are exclusive to a + * context used by execlists, we have to play safe and allow + * atomic updates to the pin_count. However, the actual pinning + * of the context is either done during initialisation for + * ringbuffer submission or serialised as part of the context + * pinning for execlists, and so we do not need a mutex ourselves + * to serialise intel_ring_pin/intel_ring_unpin. + */ + atomic_t pin_count; + u32 head; u32 tail; u32 emit; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index 7b5967751762..9f8f7f54191f 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -5,6 +5,7 @@ */ #include "i915_drv.h" +#include "intel_engine_pm.h" #include "intel_gt_pm.h" #include "intel_pm.h" #include "intel_wakeref.h" @@ -118,10 +119,11 @@ void intel_gt_sanitize(struct drm_i915_private *i915, bool force) intel_engine_reset(engine, false); } -void intel_gt_resume(struct drm_i915_private *i915) +int intel_gt_resume(struct drm_i915_private *i915) { struct intel_engine_cs *engine; enum intel_engine_id id; + int err = 0; /* * After resume, we may need to poke into the pinned kernel @@ -129,9 +131,12 @@ void intel_gt_resume(struct drm_i915_private *i915) * Only the kernel contexts should remain pinned over suspend, * allowing us to fixup the user contexts on their first pin. */ + intel_gt_pm_get(i915); for_each_engine(engine, i915, id) { struct intel_context *ce; + intel_engine_pm_get(engine); + ce = engine->kernel_context; if (ce) ce->ops->reset(ce); @@ -139,5 +144,19 @@ void intel_gt_resume(struct drm_i915_private *i915) ce = engine->preempt_context; if (ce) ce->ops->reset(ce); + + engine->serial++; /* kernel context lost */ + err = engine->resume(engine); + + intel_engine_pm_put(engine); + if (err) { + dev_err(i915->drm.dev, + "Failed to restart %s (%d)\n", + engine->name, err); + break; + } } + intel_gt_pm_put(i915); + + return err; } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h index 7dd1130a19a4..53f342b20181 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h @@ -22,6 +22,6 @@ void intel_gt_pm_put(struct drm_i915_private *i915); void intel_gt_pm_init(struct drm_i915_private *i915); void intel_gt_sanitize(struct drm_i915_private *i915, bool force); -void intel_gt_resume(struct drm_i915_private *i915); +int intel_gt_resume(struct drm_i915_private *i915); #endif /* INTEL_GT_PM_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index b42b5f158295..82b7ace62d97 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1414,6 +1414,7 @@ static void execlists_context_destroy(struct kref *kref) { struct intel_context *ce = container_of(kref, typeof(*ce), ref); + GEM_BUG_ON(!i915_active_is_idle(&ce->active)); GEM_BUG_ON(intel_context_is_pinned(ce)); if (ce->state) @@ -1426,7 +1427,6 @@ static void execlists_context_unpin(struct intel_context *ce) { i915_gem_context_unpin_hw_id(ce->gem_context); i915_gem_object_unpin_map(ce->state->obj); - intel_ring_unpin(ce->ring); } static void @@ -1478,13 +1478,9 @@ __execlists_context_pin(struct intel_context *ce, goto unpin_active; } - ret = intel_ring_pin(ce->ring); - if (ret) - goto unpin_map; - ret = i915_gem_context_pin_hw_id(ce->gem_context); if (ret) - goto unpin_ring; + goto unpin_map; ce->lrc_desc = lrc_descriptor(ce, engine); ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; @@ -1492,8 +1488,6 @@ __execlists_context_pin(struct intel_context *ce, return 0; -unpin_ring: - intel_ring_unpin(ce->ring); unpin_map: i915_gem_object_unpin_map(ce->state->obj); unpin_active: diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 4c478b38e420..3f907701ef4d 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -687,7 +687,6 @@ static void reset_prepare_engine(struct intel_engine_cs *engine) * written to the powercontext is undefined and so we may lose * GPU state upon resume, i.e. fail to restart after a reset. */ - intel_engine_pm_get(engine); intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL); engine->reset.prepare(engine); } @@ -718,16 +717,21 @@ static void revoke_mmaps(struct drm_i915_private *i915) } } -static void reset_prepare(struct drm_i915_private *i915) +static intel_engine_mask_t reset_prepare(struct drm_i915_private *i915) { struct intel_engine_cs *engine; + intel_engine_mask_t awake = 0; enum intel_engine_id id; - intel_gt_pm_get(i915); - for_each_engine(engine, i915, id) + for_each_engine(engine, i915, id) { + if (intel_engine_pm_get_if_awake(engine)) + awake |= engine->mask; reset_prepare_engine(engine); + } intel_uc_reset_prepare(i915); + + return awake; } static void gt_revoke(struct drm_i915_private *i915) @@ -761,20 +765,22 @@ static int gt_reset(struct drm_i915_private *i915, static void reset_finish_engine(struct intel_engine_cs *engine) { engine->reset.finish(engine); - intel_engine_pm_put(engine); intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL); + + intel_engine_signal_breadcrumbs(engine); } -static void reset_finish(struct drm_i915_private *i915) +static void reset_finish(struct drm_i915_private *i915, + intel_engine_mask_t awake) { struct intel_engine_cs *engine; enum intel_engine_id id; for_each_engine(engine, i915, id) { reset_finish_engine(engine); - intel_engine_signal_breadcrumbs(engine); + if (awake & engine->mask) + intel_engine_pm_put(engine); } - intel_gt_pm_put(i915); } static void nop_submit_request(struct i915_request *request) @@ -798,6 +804,7 @@ static void __i915_gem_set_wedged(struct drm_i915_private *i915) { struct i915_gpu_error *error = &i915->gpu_error; struct intel_engine_cs *engine; + intel_engine_mask_t awake; enum intel_engine_id id; if (test_bit(I915_WEDGED, &error->flags)) @@ -817,7 +824,7 @@ static void __i915_gem_set_wedged(struct drm_i915_private *i915) * rolling the global seqno forward (since this would complete requests * for which we haven't set the fence error to EIO yet). */ - reset_prepare(i915); + awake = reset_prepare(i915); /* Even if the GPU reset fails, it should still stop the engines */ if (!INTEL_INFO(i915)->gpu_reset_clobbers_display) @@ -841,7 +848,7 @@ static void __i915_gem_set_wedged(struct drm_i915_private *i915) for_each_engine(engine, i915, id) engine->cancel_requests(engine); - reset_finish(i915); + reset_finish(i915, awake); GEM_TRACE("end\n"); } @@ -951,6 +958,21 @@ static int do_reset(struct drm_i915_private *i915, return gt_reset(i915, stalled_mask); } +static int resume(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + int ret; + + for_each_engine(engine, i915, id) { + ret = engine->resume(engine); + if (ret) + return ret; + } + + return 0; +} + /** * i915_reset - reset chip after a hang * @i915: #drm_i915_private to reset @@ -973,6 +995,7 @@ void i915_reset(struct drm_i915_private *i915, const char *reason) { struct i915_gpu_error *error = &i915->gpu_error; + intel_engine_mask_t awake; int ret; GEM_TRACE("flags=%lx\n", error->flags); @@ -989,7 +1012,7 @@ void i915_reset(struct drm_i915_private *i915, dev_notice(i915->drm.dev, "Resetting chip for %s\n", reason); error->reset_count++; - reset_prepare(i915); + awake = reset_prepare(i915); if (!intel_has_gpu_reset(i915)) { if (i915_modparams.reset) @@ -1024,13 +1047,17 @@ void i915_reset(struct drm_i915_private *i915, if (ret) { DRM_ERROR("Failed to initialise HW following reset (%d)\n", ret); - goto error; + goto taint; } + ret = resume(i915); + if (ret) + goto taint; + i915_queue_hangcheck(i915); finish: - reset_finish(i915); + reset_finish(i915, awake); unlock: mutex_unlock(&error->wedge_mutex); return; @@ -1081,7 +1108,7 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg) GEM_TRACE("%s flags=%lx\n", engine->name, error->flags); GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags)); - if (!intel_wakeref_active(&engine->wakeref)) + if (!intel_engine_pm_get_if_awake(engine)) return 0; reset_prepare_engine(engine); @@ -1116,12 +1143,11 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg) * process to program RING_MODE, HWSP and re-enable submission. */ ret = engine->resume(engine); - if (ret) - goto out; out: intel_engine_cancel_stop_cs(engine); reset_finish_engine(engine); + intel_engine_pm_put(engine); return ret; } diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c index c6023bc9452d..12010e798868 100644 --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c @@ -1149,16 +1149,16 @@ i915_emit_bb_start(struct i915_request *rq, int intel_ring_pin(struct intel_ring *ring) { struct i915_vma *vma = ring->vma; - enum i915_map_type map = i915_coherent_map_type(vma->vm->i915); unsigned int flags; void *addr; int ret; - GEM_BUG_ON(ring->vaddr); + if (atomic_fetch_inc(&ring->pin_count)) + return 0; ret = i915_timeline_pin(ring->timeline); if (ret) - return ret; + goto err_unpin; flags = PIN_GLOBAL; @@ -1172,26 +1172,31 @@ int intel_ring_pin(struct intel_ring *ring) ret = i915_vma_pin(vma, 0, 0, flags); if (unlikely(ret)) - goto unpin_timeline; + goto err_timeline; if (i915_vma_is_map_and_fenceable(vma)) addr = (void __force *)i915_vma_pin_iomap(vma); else - addr = i915_gem_object_pin_map(vma->obj, map); + addr = i915_gem_object_pin_map(vma->obj, + i915_coherent_map_type(vma->vm->i915)); if (IS_ERR(addr)) { ret = PTR_ERR(addr); - goto unpin_ring; + goto err_ring; } vma->obj->pin_global++; + GEM_BUG_ON(ring->vaddr); ring->vaddr = addr; + return 0; -unpin_ring: +err_ring: i915_vma_unpin(vma); -unpin_timeline: +err_timeline: i915_timeline_unpin(ring->timeline); +err_unpin: + atomic_dec(&ring->pin_count); return ret; } @@ -1207,16 +1212,19 @@ void intel_ring_reset(struct intel_ring *ring, u32 tail) void intel_ring_unpin(struct intel_ring *ring) { - GEM_BUG_ON(!ring->vma); - GEM_BUG_ON(!ring->vaddr); + if (!atomic_dec_and_test(&ring->pin_count)) + return; /* Discard any unused bytes beyond that submitted to hw. */ intel_ring_reset(ring, ring->tail); + GEM_BUG_ON(!ring->vma); if (i915_vma_is_map_and_fenceable(ring->vma)) i915_vma_unpin_iomap(ring->vma); else i915_gem_object_unpin_map(ring->vma->obj); + + GEM_BUG_ON(!ring->vaddr); ring->vaddr = NULL; ring->vma->obj->pin_global--; @@ -2081,10 +2089,11 @@ static void ring_destroy(struct intel_engine_cs *engine) WARN_ON(INTEL_GEN(dev_priv) > 2 && (ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0); + intel_engine_cleanup_common(engine); + intel_ring_unpin(engine->buffer); intel_ring_put(engine->buffer); - intel_engine_cleanup_common(engine); kfree(engine); } diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 15e90fd2cfdc..98dfb086320f 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1098,10 +1098,25 @@ static void glk_whitelist_build(struct intel_engine_cs *engine) static void cfl_whitelist_build(struct intel_engine_cs *engine) { + struct i915_wa_list *w = &engine->whitelist; + if (engine->class != RENDER_CLASS) return; - gen9_whitelist_build(&engine->whitelist); + gen9_whitelist_build(w); + + /* + * WaAllowPMDepthAndInvocationCountAccessFromUMD:cfl,whl,cml,aml + * + * This covers 4 register which are next to one another : + * - PS_INVOCATION_COUNT + * - PS_INVOCATION_COUNT_UDW + * - PS_DEPTH_COUNT + * - PS_DEPTH_COUNT_UDW + */ + whitelist_reg_ext(w, PS_INVOCATION_COUNT, + RING_FORCE_TO_NONPRIV_RD | + RING_FORCE_TO_NONPRIV_RANGE_4); } static void cnl_whitelist_build(struct intel_engine_cs *engine) @@ -1129,6 +1144,19 @@ static void icl_whitelist_build(struct intel_engine_cs *engine) /* WaEnableStateCacheRedirectToCS:icl */ whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1); + + /* + * WaAllowPMDepthAndInvocationCountAccessFromUMD:icl + * + * This covers 4 register which are next to one another : + * - PS_INVOCATION_COUNT + * - PS_INVOCATION_COUNT_UDW + * - PS_DEPTH_COUNT + * - PS_DEPTH_COUNT_UDW + */ + whitelist_reg_ext(w, PS_INVOCATION_COUNT, + RING_FORCE_TO_NONPRIV_RD | + RING_FORCE_TO_NONPRIV_RANGE_4); break; case VIDEO_DECODE_CLASS: @@ -1258,8 +1286,12 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0)) wa_write_or(wal, GEN7_SARCHKMD, - GEN7_DISABLE_DEMAND_PREFETCH | - GEN7_DISABLE_SAMPLER_PREFETCH); + GEN7_DISABLE_DEMAND_PREFETCH); + + /* Wa_1606682166:icl */ + wa_write_or(wal, + GEN7_SARCHKMD, + GEN7_DISABLE_SAMPLER_PREFETCH); } if (IS_GEN_RANGE(i915, 9, 11)) { diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index 086801b51441..486c6953dcb1 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -66,6 +66,7 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine) ring->base.effective_size = sz; ring->base.vaddr = (void *)(ring + 1); ring->base.timeline = &ring->timeline; + atomic_set(&ring->base.pin_count, 1); INIT_LIST_HEAD(&ring->base.request_list); intel_ring_update_space(&ring->base); diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c index 89da9e7cc1ba..b5c590c9ccba 100644 --- a/drivers/gpu/drm/i915/gt/selftest_reset.c +++ b/drivers/gpu/drm/i915/gt/selftest_reset.c @@ -71,13 +71,16 @@ static int igt_atomic_reset(void *arg) goto unlock; for (p = igt_atomic_phases; p->name; p++) { + intel_engine_mask_t awake; + GEM_TRACE("intel_gpu_reset under %s\n", p->name); + awake = reset_prepare(i915); p->critical_section_begin(); reset_prepare(i915); err = intel_gpu_reset(i915, ALL_ENGINES); - reset_finish(i915); p->critical_section_end(); + reset_finish(i915, awake); if (err) { pr_err("intel_gpu_reset failed under %s\n", p->name); diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index 9eaf030affd0..44becd9538be 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -925,7 +925,12 @@ check_whitelisted_registers(struct intel_engine_cs *engine, err = 0; for (i = 0; i < engine->whitelist.count; i++) { - if (!fn(engine, a[i], b[i], engine->whitelist.list[i].reg)) + const struct i915_wa *wa = &engine->whitelist.list[i]; + + if (i915_mmio_reg_offset(wa->reg) & RING_FORCE_TO_NONPRIV_RD) + continue; + + if (!fn(engine, a[i], b[i], wa->reg)) err = -EINVAL; } diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 6ea88270c818..b09dc315e2da 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -2674,11 +2674,6 @@ static int scan_workload(struct intel_vgpu_workload *workload) gma_head == gma_tail) return 0; - if (!intel_gvt_ggtt_validate_range(s.vgpu, s.ring_start, s.ring_size)) { - ret = -EINVAL; - goto out; - } - ret = ip_gma_set(&s, gma_head); if (ret) goto out; @@ -2724,11 +2719,6 @@ static int scan_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) s.workload = workload; s.is_ctx_wa = true; - if (!intel_gvt_ggtt_validate_range(s.vgpu, s.ring_start, s.ring_size)) { - ret = -EINVAL; - goto out; - } - ret = ip_gma_set(&s, gma_head); if (ret) goto out; diff --git a/drivers/gpu/drm/i915/gvt/fb_decoder.c b/drivers/gpu/drm/i915/gvt/fb_decoder.c index 65e847392aea..8bb292b01271 100644 --- a/drivers/gpu/drm/i915/gvt/fb_decoder.c +++ b/drivers/gpu/drm/i915/gvt/fb_decoder.c @@ -245,7 +245,7 @@ int intel_vgpu_decode_primary_plane(struct intel_vgpu *vgpu, plane->hw_format = fmt; plane->base = vgpu_vreg_t(vgpu, DSPSURF(pipe)) & I915_GTT_PAGE_MASK; - if (!intel_gvt_ggtt_validate_range(vgpu, plane->base, 0)) + if (!vgpu_gmadr_is_valid(vgpu, plane->base)) return -EINVAL; plane->base_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, plane->base); @@ -368,7 +368,7 @@ int intel_vgpu_decode_cursor_plane(struct intel_vgpu *vgpu, alpha_plane, alpha_force); plane->base = vgpu_vreg_t(vgpu, CURBASE(pipe)) & I915_GTT_PAGE_MASK; - if (!intel_gvt_ggtt_validate_range(vgpu, plane->base, 0)) + if (!vgpu_gmadr_is_valid(vgpu, plane->base)) return -EINVAL; plane->base_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, plane->base); @@ -472,7 +472,7 @@ int intel_vgpu_decode_sprite_plane(struct intel_vgpu *vgpu, plane->drm_format = drm_format; plane->base = vgpu_vreg_t(vgpu, SPRSURF(pipe)) & I915_GTT_PAGE_MASK; - if (!intel_gvt_ggtt_validate_range(vgpu, plane->base, 0)) + if (!vgpu_gmadr_is_valid(vgpu, plane->base)) return -EINVAL; plane->base_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, plane->base); diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 53115bdae12b..4b04af569c05 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -2141,11 +2141,20 @@ static int emulate_ggtt_mmio_read(struct intel_vgpu *vgpu, struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm; const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; unsigned long index = off >> info->gtt_entry_size_shift; + unsigned long gma; struct intel_gvt_gtt_entry e; if (bytes != 4 && bytes != 8) return -EINVAL; + gma = index << I915_GTT_PAGE_SHIFT; + if (!intel_gvt_ggtt_validate_range(vgpu, + gma, 1 << I915_GTT_PAGE_SHIFT)) { + gvt_dbg_mm("read invalid ggtt at 0x%lx\n", gma); + memset(p_data, 0, bytes); + return 0; + } + ggtt_get_guest_entry(ggtt_mm, &e, index); memcpy(p_data, (void *)&e.val64 + (off & (info->gtt_entry_size - 1)), bytes); diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 144301b778df..23aa3e50cbf8 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -1911,6 +1911,18 @@ static int kvmgt_dma_map_guest_page(unsigned long handle, unsigned long gfn, ret = __gvt_cache_add(info->vgpu, gfn, *dma_addr, size); if (ret) goto err_unmap; + } else if (entry->size != size) { + /* the same gfn with different size: unmap and re-map */ + gvt_dma_unmap_page(vgpu, gfn, entry->dma_addr, entry->size); + __gvt_cache_remove_entry(vgpu, entry); + + ret = gvt_dma_map_page(vgpu, gfn, dma_addr, size); + if (ret) + goto err_unlock; + + ret = __gvt_cache_add(info->vgpu, gfn, *dma_addr, size); + if (ret) + goto err_unmap; } else { kref_get(&entry->ref); *dma_addr = entry->dma_addr; diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 2144fb46d0e1..9f3fd7d96a69 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -364,16 +364,13 @@ static void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) wa_ctx->indirect_ctx.shadow_va = NULL; } -static int set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload, - struct i915_gem_context *ctx) +static void set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload, + struct i915_gem_context *ctx) { struct intel_vgpu_mm *mm = workload->shadow_mm; struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(ctx->vm); int i = 0; - if (mm->type != INTEL_GVT_MM_PPGTT || !mm->ppgtt_mm.shadowed) - return -EINVAL; - if (mm->ppgtt_mm.root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) { px_dma(ppgtt->pd) = mm->ppgtt_mm.shadow_pdps[0]; } else { @@ -384,8 +381,6 @@ static int set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload, px_dma(pd) = mm->ppgtt_mm.shadow_pdps[i]; } } - - return 0; } static int @@ -614,6 +609,8 @@ static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload) static int prepare_workload(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; + struct intel_vgpu_submission *s = &vgpu->submission; + int ring = workload->ring_id; int ret = 0; ret = intel_vgpu_pin_mm(workload->shadow_mm); @@ -622,8 +619,16 @@ static int prepare_workload(struct intel_vgpu_workload *workload) return ret; } + if (workload->shadow_mm->type != INTEL_GVT_MM_PPGTT || + !workload->shadow_mm->ppgtt_mm.shadowed) { + gvt_vgpu_err("workload shadow ppgtt isn't ready\n"); + return -EINVAL; + } + update_shadow_pdps(workload); + set_context_ppgtt_from_shadow(workload, s->shadow[ring]->gem_context); + ret = intel_vgpu_sync_oos_pages(workload->vgpu); if (ret) { gvt_vgpu_err("fail to vgpu sync oos pages\n"); @@ -674,7 +679,6 @@ static int dispatch_workload(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - struct intel_vgpu_submission *s = &vgpu->submission; struct i915_request *rq; int ring_id = workload->ring_id; int ret; @@ -685,13 +689,6 @@ static int dispatch_workload(struct intel_vgpu_workload *workload) mutex_lock(&vgpu->vgpu_lock); mutex_lock(&dev_priv->drm.struct_mutex); - ret = set_context_ppgtt_from_shadow(workload, - s->shadow[ring_id]->gem_context); - if (ret < 0) { - gvt_vgpu_err("workload shadow ppgtt isn't ready\n"); - goto err_req; - } - ret = intel_gvt_workload_req_alloc(workload); if (ret) goto err_req; @@ -990,6 +987,7 @@ static int workload_thread(void *priv) int ret; bool need_force_wake = (INTEL_GEN(gvt->dev_priv) >= 9); DEFINE_WAIT_FUNC(wait, woken_wake_function); + struct intel_runtime_pm *rpm = &gvt->dev_priv->runtime_pm; kfree(p); @@ -1013,6 +1011,8 @@ static int workload_thread(void *priv) workload->ring_id, workload, workload->vgpu->id); + intel_runtime_pm_get(rpm); + gvt_dbg_sched("ring id %d will dispatch workload %p\n", workload->ring_id, workload); @@ -1042,6 +1042,7 @@ complete: intel_uncore_forcewake_put(&gvt->dev_priv->uncore, FORCEWAKE_ALL); + intel_runtime_pm_put_unchecked(rpm); if (ret && (vgpu_is_vm_unhealthy(ret))) enter_failsafe_mode(vgpu, GVT_FAILSAFE_GUEST_ERR); } @@ -1492,6 +1493,12 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id, intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + RING_CTX_OFF(ctx_ctrl.val), &ctx_ctl, 4); + if (!intel_gvt_ggtt_validate_range(vgpu, start, + _RING_CTL_BUF_SIZE(ctl))) { + gvt_vgpu_err("context contain invalid rb at: 0x%x\n", start); + return ERR_PTR(-EINVAL); + } + workload = alloc_workload(vgpu); if (IS_ERR(workload)) return workload; @@ -1516,9 +1523,31 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id, workload->wa_ctx.indirect_ctx.size = (indirect_ctx & INDIRECT_CTX_SIZE_MASK) * CACHELINE_BYTES; + + if (workload->wa_ctx.indirect_ctx.size != 0) { + if (!intel_gvt_ggtt_validate_range(vgpu, + workload->wa_ctx.indirect_ctx.guest_gma, + workload->wa_ctx.indirect_ctx.size)) { + kmem_cache_free(s->workloads, workload); + gvt_vgpu_err("invalid wa_ctx at: 0x%lx\n", + workload->wa_ctx.indirect_ctx.guest_gma); + return ERR_PTR(-EINVAL); + } + } + workload->wa_ctx.per_ctx.guest_gma = per_ctx & PER_CTX_ADDR_MASK; workload->wa_ctx.per_ctx.valid = per_ctx & 1; + if (workload->wa_ctx.per_ctx.valid) { + if (!intel_gvt_ggtt_validate_range(vgpu, + workload->wa_ctx.per_ctx.guest_gma, + CACHELINE_BYTES)) { + kmem_cache_free(s->workloads, workload); + gvt_vgpu_err("invalid per_ctx at: 0x%lx\n", + workload->wa_ctx.per_ctx.guest_gma); + return ERR_PTR(-EINVAL); + } + } } gvt_dbg_el("workload %p ring id %d head %x tail %x start %x ctl %x\n", diff --git a/drivers/gpu/drm/i915/gvt/trace_points.c b/drivers/gpu/drm/i915/gvt/trace_points.c index a3deed692b9c..fe552e877e09 100644 --- a/drivers/gpu/drm/i915/gvt/trace_points.c +++ b/drivers/gpu/drm/i915/gvt/trace_points.c @@ -28,8 +28,6 @@ * */ -#include "trace.h" - #ifndef __CHECKER__ #define CREATE_TRACE_POINTS #include "trace.h" diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index bc909ec5d9c3..fe7a6ec2c199 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1674,8 +1674,9 @@ struct drm_i915_private { } dram_info; struct intel_bw_info { - int num_planes; - int deratedbw[3]; + unsigned int deratedbw[3]; /* for each QGV point */ + u8 num_qgv_points; + u8 num_planes; } max_bw[6]; struct drm_private_obj bw_obj; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 190ad54fb072..8a659d3d7435 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -46,7 +46,6 @@ #include "gem/i915_gem_ioctls.h" #include "gem/i915_gem_pm.h" #include "gem/i915_gemfs.h" -#include "gt/intel_engine_pm.h" #include "gt/intel_gt_pm.h" #include "gt/intel_mocs.h" #include "gt/intel_reset.h" @@ -1307,21 +1306,13 @@ int i915_gem_init_hw(struct drm_i915_private *dev_priv) intel_mocs_init_l3cc_table(dev_priv); - /* Only when the HW is re-initialised, can we replay the requests */ - ret = intel_engines_resume(dev_priv); - if (ret) - goto cleanup_uc; - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); intel_engines_set_scheduler_caps(dev_priv); return 0; -cleanup_uc: - intel_uc_fini_hw(dev_priv); out: intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); - return ret; } @@ -1580,6 +1571,11 @@ int i915_gem_init(struct drm_i915_private *dev_priv) if (ret) goto err_uc_init; + /* Only when the HW is re-initialised, can we replay the requests */ + ret = intel_gt_resume(dev_priv); + if (ret) + goto err_init_hw; + /* * Despite its name intel_init_clock_gating applies both display * clock gating workarounds; GT mmio workarounds and the occasional @@ -1593,20 +1589,20 @@ int i915_gem_init(struct drm_i915_private *dev_priv) ret = intel_engines_verify_workarounds(dev_priv); if (ret) - goto err_init_hw; + goto err_gt; ret = __intel_engines_record_defaults(dev_priv); if (ret) - goto err_init_hw; + goto err_gt; if (i915_inject_load_failure()) { ret = -ENODEV; - goto err_init_hw; + goto err_gt; } if (i915_inject_load_failure()) { ret = -EIO; - goto err_init_hw; + goto err_gt; } intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); @@ -1620,7 +1616,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv) * HW as irrevisibly wedged, but keep enough state around that the * driver doesn't explode during runtime. */ -err_init_hw: +err_gt: mutex_unlock(&dev_priv->drm.struct_mutex); i915_gem_set_wedged(dev_priv); @@ -1630,6 +1626,7 @@ err_init_hw: i915_gem_drain_workqueue(dev_priv); mutex_lock(&dev_priv->drm.struct_mutex); +err_init_hw: intel_uc_fini_hw(dev_priv); err_uc_init: intel_uc_fini(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 8ab820145ea6..7015a97b1097 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1444,9 +1444,11 @@ unwind_pd: spin_lock(&pdp->lock); if (atomic_dec_and_test(&pd->used)) { gen8_ppgtt_set_pdpe(pdp, vm->scratch_pd, pdpe); + pdp->entry[pdpe] = vm->scratch_pd; GEM_BUG_ON(!atomic_read(&pdp->used)); atomic_dec(&pdp->used); - free_pd(vm, pd); + GEM_BUG_ON(alloc); + alloc = pd; /* defer the free to after the lock */ } spin_unlock(&pdp->lock); unwind: @@ -1515,7 +1517,9 @@ unwind_pdp: spin_lock(&pml4->lock); if (atomic_dec_and_test(&pdp->used)) { gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e); - free_pd(vm, pdp); + pml4->entry[pml4e] = vm->scratch_pdp; + GEM_BUG_ON(alloc); + alloc = pdp; /* defer the free until after the lock */ } spin_unlock(&pml4->lock); unwind: diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 41a511d5267f..8bc76fcff70d 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1418,6 +1418,7 @@ static void gem_record_rings(struct i915_gpu_state *error) struct intel_engine_cs *engine = i915->engine[i]; struct drm_i915_error_engine *ee = &error->engine[i]; struct i915_request *request; + unsigned long flags; ee->engine_id = -1; @@ -1429,10 +1430,11 @@ static void gem_record_rings(struct i915_gpu_state *error) error_record_engine_registers(error, engine, ee); error_record_engine_execlists(engine, ee); + spin_lock_irqsave(&engine->active.lock, flags); request = intel_engine_find_active_request(engine); if (request) { struct i915_gem_context *ctx = request->gem_context; - struct intel_ring *ring; + struct intel_ring *ring = request->ring; ee->vm = ctx->vm ?: &ggtt->vm; @@ -1462,7 +1464,6 @@ static void gem_record_rings(struct i915_gpu_state *error) ee->rq_post = request->postfix; ee->rq_tail = request->tail; - ring = request->ring; ee->cpu_ring_head = ring->head; ee->cpu_ring_tail = ring->tail; ee->ringbuffer = @@ -1470,6 +1471,7 @@ static void gem_record_rings(struct i915_gpu_state *error) engine_record_requests(engine, request, ee); } + spin_unlock_irqrestore(&engine->active.lock, flags); ee->hws_page = i915_error_object_create(i915, diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index a700c5c3d167..5140017f9a39 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1567,28 +1567,10 @@ static void config_oa_regs(struct drm_i915_private *dev_priv, } } -static int hsw_enable_metric_set(struct i915_perf_stream *stream) +static void delay_after_mux(void) { - struct drm_i915_private *dev_priv = stream->dev_priv; - const struct i915_oa_config *oa_config = stream->oa_config; - - /* PRM: - * - * OA unit is using “crclk” for its functionality. When trunk - * level clock gating takes place, OA clock would be gated, - * unable to count the events from non-render clock domain. - * Render clock gating must be disabled when OA is enabled to - * count the events from non-render domain. Unit level clock - * gating for RCS should also be disabled. - */ - I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) & - ~GEN7_DOP_CLOCK_GATE_ENABLE)); - I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) | - GEN6_CSUNIT_CLOCK_GATE_DISABLE)); - - config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len); - - /* It apparently takes a fairly long time for a new MUX + /* + * It apparently takes a fairly long time for a new MUX * configuration to be be applied after these register writes. * This delay duration was derived empirically based on the * render_basic config but hopefully it covers the maximum @@ -1610,6 +1592,30 @@ static int hsw_enable_metric_set(struct i915_perf_stream *stream) * a delay at this location would mitigate any invalid reports. */ usleep_range(15000, 20000); +} + +static int hsw_enable_metric_set(struct i915_perf_stream *stream) +{ + struct drm_i915_private *dev_priv = stream->dev_priv; + const struct i915_oa_config *oa_config = stream->oa_config; + + /* + * PRM: + * + * OA unit is using “crclk” for its functionality. When trunk + * level clock gating takes place, OA clock would be gated, + * unable to count the events from non-render clock domain. + * Render clock gating must be disabled when OA is enabled to + * count the events from non-render domain. Unit level clock + * gating for RCS should also be disabled. + */ + I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) & + ~GEN7_DOP_CLOCK_GATE_ENABLE)); + I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) | + GEN6_CSUNIT_CLOCK_GATE_DISABLE)); + + config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len); + delay_after_mux(); config_oa_regs(dev_priv, oa_config->b_counter_regs, oa_config->b_counter_regs_len); @@ -1835,6 +1841,7 @@ static int gen8_enable_metric_set(struct i915_perf_stream *stream) return ret; config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len); + delay_after_mux(); config_oa_regs(dev_priv, oa_config->b_counter_regs, oa_config->b_counter_regs_len); @@ -2515,6 +2522,9 @@ static int i915_perf_release(struct inode *inode, struct file *file) i915_perf_destroy_locked(stream); mutex_unlock(&dev_priv->perf.lock); + /* Release the reference the perf stream kept on the driver. */ + drm_dev_put(&dev_priv->drm); + return 0; } @@ -2650,6 +2660,11 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, if (!(param->flags & I915_PERF_FLAG_DISABLED)) i915_perf_enable_locked(stream); + /* Take a reference on the driver that will be kept with stream_fd + * until its release. + */ + drm_dev_get(&dev_priv->drm); + return stream_fd; err_open: @@ -3477,9 +3492,13 @@ void i915_perf_init(struct drm_i915_private *dev_priv) dev_priv->perf.oa.ops.enable_metric_set = gen8_enable_metric_set; dev_priv->perf.oa.ops.disable_metric_set = gen10_disable_metric_set; - dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128; - dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de; - + if (IS_GEN(dev_priv, 10)) { + dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128; + dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de; + } else { + dev_priv->perf.oa.ctx_oactxctrl_offset = 0x124; + dev_priv->perf.oa.ctx_flexeu0_offset = 0x78e; + } dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16); } } diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index f4ce643b3bc3..cce426b23a24 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -21,24 +21,22 @@ /* watermark/fifo updates */ TRACE_EVENT(intel_pipe_enable, - TP_PROTO(struct drm_i915_private *dev_priv, enum pipe pipe), - TP_ARGS(dev_priv, pipe), + TP_PROTO(struct intel_crtc *crtc), + TP_ARGS(crtc), TP_STRUCT__entry( __array(u32, frame, 3) __array(u32, scanline, 3) __field(enum pipe, pipe) ), - TP_fast_assign( - enum pipe _pipe; - for_each_pipe(dev_priv, _pipe) { - __entry->frame[_pipe] = - dev_priv->drm.driver->get_vblank_counter(&dev_priv->drm, _pipe); - __entry->scanline[_pipe] = - intel_get_crtc_scanline(intel_get_crtc_for_pipe(dev_priv, _pipe)); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_crtc *it__; + for_each_intel_crtc(&dev_priv->drm, it__) { + __entry->frame[it__->pipe] = intel_crtc_get_vblank_counter(it__); + __entry->scanline[it__->pipe] = intel_get_crtc_scanline(it__); } - __entry->pipe = pipe; + __entry->pipe = crtc->pipe; ), TP_printk("pipe %c enable, pipe A: frame=%u, scanline=%u, pipe B: frame=%u, scanline=%u, pipe C: frame=%u, scanline=%u", @@ -49,8 +47,8 @@ TRACE_EVENT(intel_pipe_enable, ); TRACE_EVENT(intel_pipe_disable, - TP_PROTO(struct drm_i915_private *dev_priv, enum pipe pipe), - TP_ARGS(dev_priv, pipe), + TP_PROTO(struct intel_crtc *crtc), + TP_ARGS(crtc), TP_STRUCT__entry( __array(u32, frame, 3) @@ -59,14 +57,13 @@ TRACE_EVENT(intel_pipe_disable, ), TP_fast_assign( - enum pipe _pipe; - for_each_pipe(dev_priv, _pipe) { - __entry->frame[_pipe] = - dev_priv->drm.driver->get_vblank_counter(&dev_priv->drm, _pipe); - __entry->scanline[_pipe] = - intel_get_crtc_scanline(intel_get_crtc_for_pipe(dev_priv, _pipe)); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_crtc *it__; + for_each_intel_crtc(&dev_priv->drm, it__) { + __entry->frame[it__->pipe] = intel_crtc_get_vblank_counter(it__); + __entry->scanline[it__->pipe] = intel_get_crtc_scanline(it__); } - __entry->pipe = pipe; + __entry->pipe = crtc->pipe; ), TP_printk("pipe %c disable, pipe A: frame=%u, scanline=%u, pipe B: frame=%u, scanline=%u, pipe C: frame=%u, scanline=%u", @@ -89,8 +86,7 @@ TRACE_EVENT(intel_pipe_crc, TP_fast_assign( __entry->pipe = crtc->pipe; - __entry->frame = crtc->base.dev->driver->get_vblank_counter(crtc->base.dev, - crtc->pipe); + __entry->frame = intel_crtc_get_vblank_counter(crtc); __entry->scanline = intel_get_crtc_scanline(crtc); memcpy(__entry->crcs, crcs, sizeof(__entry->crcs)); ), @@ -112,9 +108,10 @@ TRACE_EVENT(intel_cpu_fifo_underrun, ), TP_fast_assign( + struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe); __entry->pipe = pipe; - __entry->frame = dev_priv->drm.driver->get_vblank_counter(&dev_priv->drm, pipe); - __entry->scanline = intel_get_crtc_scanline(intel_get_crtc_for_pipe(dev_priv, pipe)); + __entry->frame = intel_crtc_get_vblank_counter(crtc); + __entry->scanline = intel_get_crtc_scanline(crtc); ), TP_printk("pipe %c, frame=%u, scanline=%u", @@ -134,9 +131,10 @@ TRACE_EVENT(intel_pch_fifo_underrun, TP_fast_assign( enum pipe pipe = pch_transcoder; + struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe); __entry->pipe = pipe; - __entry->frame = dev_priv->drm.driver->get_vblank_counter(&dev_priv->drm, pipe); - __entry->scanline = intel_get_crtc_scanline(intel_get_crtc_for_pipe(dev_priv, pipe)); + __entry->frame = intel_crtc_get_vblank_counter(crtc); + __entry->scanline = intel_get_crtc_scanline(crtc); ), TP_printk("pch transcoder %c, frame=%u, scanline=%u", @@ -156,12 +154,10 @@ TRACE_EVENT(intel_memory_cxsr, ), TP_fast_assign( - enum pipe pipe; - for_each_pipe(dev_priv, pipe) { - __entry->frame[pipe] = - dev_priv->drm.driver->get_vblank_counter(&dev_priv->drm, pipe); - __entry->scanline[pipe] = - intel_get_crtc_scanline(intel_get_crtc_for_pipe(dev_priv, pipe)); + struct intel_crtc *crtc; + for_each_intel_crtc(&dev_priv->drm, crtc) { + __entry->frame[crtc->pipe] = intel_crtc_get_vblank_counter(crtc); + __entry->scanline[crtc->pipe] = intel_get_crtc_scanline(crtc); } __entry->old = old; __entry->new = new; @@ -198,8 +194,7 @@ TRACE_EVENT(g4x_wm, TP_fast_assign( __entry->pipe = crtc->pipe; - __entry->frame = crtc->base.dev->driver->get_vblank_counter(crtc->base.dev, - crtc->pipe); + __entry->frame = intel_crtc_get_vblank_counter(crtc); __entry->scanline = intel_get_crtc_scanline(crtc); __entry->primary = wm->pipe[crtc->pipe].plane[PLANE_PRIMARY]; __entry->sprite = wm->pipe[crtc->pipe].plane[PLANE_SPRITE0]; @@ -243,8 +238,7 @@ TRACE_EVENT(vlv_wm, TP_fast_assign( __entry->pipe = crtc->pipe; - __entry->frame = crtc->base.dev->driver->get_vblank_counter(crtc->base.dev, - crtc->pipe); + __entry->frame = intel_crtc_get_vblank_counter(crtc); __entry->scanline = intel_get_crtc_scanline(crtc); __entry->level = wm->level; __entry->cxsr = wm->cxsr; @@ -278,8 +272,7 @@ TRACE_EVENT(vlv_fifo_size, TP_fast_assign( __entry->pipe = crtc->pipe; - __entry->frame = crtc->base.dev->driver->get_vblank_counter(crtc->base.dev, - crtc->pipe); + __entry->frame = intel_crtc_get_vblank_counter(crtc); __entry->scanline = intel_get_crtc_scanline(crtc); __entry->sprite0_start = sprite0_start; __entry->sprite1_start = sprite1_start; @@ -310,8 +303,7 @@ TRACE_EVENT(intel_update_plane, TP_fast_assign( __entry->pipe = crtc->pipe; __entry->name = plane->name; - __entry->frame = crtc->base.dev->driver->get_vblank_counter(crtc->base.dev, - crtc->pipe); + __entry->frame = intel_crtc_get_vblank_counter(crtc); __entry->scanline = intel_get_crtc_scanline(crtc); memcpy(__entry->src, &plane->state->src, sizeof(__entry->src)); memcpy(__entry->dst, &plane->state->dst, sizeof(__entry->dst)); @@ -338,8 +330,7 @@ TRACE_EVENT(intel_disable_plane, TP_fast_assign( __entry->pipe = crtc->pipe; __entry->name = plane->name; - __entry->frame = crtc->base.dev->driver->get_vblank_counter(crtc->base.dev, - crtc->pipe); + __entry->frame = intel_crtc_get_vblank_counter(crtc); __entry->scanline = intel_get_crtc_scanline(crtc); ), @@ -364,8 +355,7 @@ TRACE_EVENT(i915_pipe_update_start, TP_fast_assign( __entry->pipe = crtc->pipe; - __entry->frame = crtc->base.dev->driver->get_vblank_counter(crtc->base.dev, - crtc->pipe); + __entry->frame = intel_crtc_get_vblank_counter(crtc); __entry->scanline = intel_get_crtc_scanline(crtc); __entry->min = crtc->debug.min_vbl; __entry->max = crtc->debug.max_vbl; diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 502c54428570..8d1aebc3e857 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -221,13 +221,11 @@ __untrack_all_wakerefs(struct intel_runtime_pm_debug *debug, static void dump_and_free_wakeref_tracking(struct intel_runtime_pm_debug *debug) { - struct drm_printer p; + if (debug->count) { + struct drm_printer p = drm_debug_printer("i915"); - if (!debug->count) - return; - - p = drm_debug_printer("i915"); - __print_intel_runtime_pm_wakeref(&p, debug); + __print_intel_runtime_pm_wakeref(&p, debug); + } kfree(debug->owners); } diff --git a/drivers/gpu/drm/i915/intel_wakeref.h b/drivers/gpu/drm/i915/intel_wakeref.h index 9cbb2ebf575b..38275310b196 100644 --- a/drivers/gpu/drm/i915/intel_wakeref.h +++ b/drivers/gpu/drm/i915/intel_wakeref.h @@ -66,6 +66,21 @@ intel_wakeref_get(struct intel_runtime_pm *rpm, } /** + * intel_wakeref_get_if_in_use: Acquire the wakeref + * @wf: the wakeref + * + * Acquire a hold on the wakeref, but only if the wakeref is already + * active. + * + * Returns: true if the wakeref was acquired, false otherwise. + */ +static inline bool +intel_wakeref_get_if_active(struct intel_wakeref *wf) +{ + return atomic_inc_not_zero(&wf->count); +} + +/** * intel_wakeref_put: Release the wakeref * @i915: the drm_i915_private device * @wf: the wakeref |