summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/intel_hangcheck.c
diff options
context:
space:
mode:
authorChris Wilson2018-06-02 12:48:53 +0200
committerChris Wilson2018-06-14 20:20:33 +0200
commit1fd00c0faeec0a89dfe97de842facbecc8a04efb (patch)
tree8487effa542a9d531262026bdeffea4d346ec190 /drivers/gpu/drm/i915/intel_hangcheck.c
parentdrm/i915/execlists: Push the tasklet kick after reset to reset_finish (diff)
downloadkernel-qcow2-linux-1fd00c0faeec0a89dfe97de842facbecc8a04efb.tar.gz
kernel-qcow2-linux-1fd00c0faeec0a89dfe97de842facbecc8a04efb.tar.xz
kernel-qcow2-linux-1fd00c0faeec0a89dfe97de842facbecc8a04efb.zip
drm/i915: Declare the driver wedged if hangcheck makes no progress
Hangcheck is our back up in case the GPU or the driver gets stuck. It detects when the GPU is not making any progress and issues a GPU reset. However, if the driver is failing to make any progress, we can get ourselves into a situation where we continually try resetting the GPU to no avail. Employ a second timeout such that if we continue to see the same seqno (the stalled engine has made no progress at all) over the course of several hangchecks, declare the driver wedged and attempt to start afresh. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Mika Kuoppala <mika.kuoppala@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180602104853.17140-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Diffstat (limited to 'drivers/gpu/drm/i915/intel_hangcheck.c')
-rw-r--r--drivers/gpu/drm/i915/intel_hangcheck.c17
1 files changed, 16 insertions, 1 deletions
diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c
index d47e346bd49e..2fc7a0dd0df9 100644
--- a/drivers/gpu/drm/i915/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/intel_hangcheck.c
@@ -294,6 +294,7 @@ static void hangcheck_store_sample(struct intel_engine_cs *engine,
engine->hangcheck.seqno = hc->seqno;
engine->hangcheck.action = hc->action;
engine->hangcheck.stalled = hc->stalled;
+ engine->hangcheck.wedged = hc->wedged;
}
static enum intel_engine_hangcheck_action
@@ -368,6 +369,9 @@ static void hangcheck_accumulate_sample(struct intel_engine_cs *engine,
hc->stalled = time_after(jiffies,
engine->hangcheck.action_timestamp + timeout);
+ hc->wedged = time_after(jiffies,
+ engine->hangcheck.action_timestamp +
+ I915_ENGINE_WEDGED_TIMEOUT);
}
static void hangcheck_declare_hang(struct drm_i915_private *i915,
@@ -409,7 +413,7 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
gpu_error.hangcheck_work.work);
struct intel_engine_cs *engine;
enum intel_engine_id id;
- unsigned int hung = 0, stuck = 0;
+ unsigned int hung = 0, stuck = 0, wedged = 0;
if (!i915_modparams.enable_hangcheck)
return;
@@ -440,6 +444,17 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
if (hc.action != ENGINE_DEAD)
stuck |= intel_engine_flag(engine);
}
+
+ if (engine->hangcheck.wedged)
+ wedged |= intel_engine_flag(engine);
+ }
+
+ if (wedged) {
+ dev_err(dev_priv->drm.dev,
+ "GPU recovery timed out,"
+ " cancelling all in-flight rendering.\n");
+ GEM_TRACE_DUMP();
+ i915_gem_set_wedged(dev_priv);
}
if (hung)