summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm')
-rw-r--r--drivers/gpu/drm/i915/i915_gem_context.c4
-rw-r--r--drivers/gpu/drm/i915/i915_gem_context.h9
-rw-r--r--drivers/gpu/drm/i915/i915_gpu_error.c31
-rw-r--r--drivers/gpu/drm/i915/i915_gpu_error.h3
-rw-r--r--drivers/gpu/drm/i915/i915_request.c2
-rw-r--r--drivers/gpu/drm/i915/i915_reset.c29
6 files changed, 34 insertions, 44 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index da21c843fed8..7541c6f961b3 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -355,6 +355,7 @@ __create_hw_context(struct drm_i915_private *dev_priv,
struct i915_gem_context *ctx;
unsigned int n;
int ret;
+ int i;
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
if (ctx == NULL)
@@ -407,6 +408,9 @@ __create_hw_context(struct drm_i915_private *dev_priv,
ctx->desc_template =
default_desc_template(dev_priv, dev_priv->mm.aliasing_ppgtt);
+ for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++)
+ ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES;
+
return ctx;
err_pid:
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 071108d34ae0..dc6c58f38cfa 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -209,10 +209,11 @@ struct i915_gem_context {
*/
atomic_t active_count;
-#define CONTEXT_SCORE_GUILTY 10
-#define CONTEXT_SCORE_BAN_THRESHOLD 40
- /** ban_score: Accumulated score of all hangs caused by this context. */
- atomic_t ban_score;
+ /**
+ * @hang_timestamp: The last time(s) this context caused a GPU hang
+ */
+ unsigned long hang_timestamp[2];
+#define CONTEXT_FAST_HANG_JIFFIES (120 * HZ) /* 3 hangs within 120s? Banned! */
/** remap_slice: Bitmask of cache lines that need remapping */
u8 remap_slice;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 9a65341fec09..3f6eddb6f6de 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -434,11 +434,6 @@ static void error_print_instdone(struct drm_i915_error_state_buf *m,
ee->instdone.row[slice][subslice]);
}
-static const char *bannable(const struct drm_i915_error_context *ctx)
-{
- return ctx->bannable ? "" : " (unbannable)";
-}
-
static void error_print_request(struct drm_i915_error_state_buf *m,
const char *prefix,
const struct drm_i915_error_request *erq,
@@ -447,9 +442,8 @@ static void error_print_request(struct drm_i915_error_state_buf *m,
if (!erq->seqno)
return;
- err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x%s%s, prio %d, emitted %dms, start %08x, head %08x, tail %08x\n",
- prefix, erq->pid, erq->ban_score,
- erq->context, erq->seqno,
+ err_printf(m, "%s pid %d, seqno %8x:%08x%s%s, prio %d, emitted %dms, start %08x, head %08x, tail %08x\n",
+ prefix, erq->pid, erq->context, erq->seqno,
test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
&erq->flags) ? "!" : "",
test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
@@ -463,10 +457,9 @@ static void error_print_context(struct drm_i915_error_state_buf *m,
const char *header,
const struct drm_i915_error_context *ctx)
{
- err_printf(m, "%s%s[%d] user_handle %d hw_id %d, prio %d, ban score %d%s guilty %d active %d\n",
+ err_printf(m, "%s%s[%d] user_handle %d hw_id %d, prio %d, guilty %d active %d\n",
header, ctx->comm, ctx->pid, ctx->handle, ctx->hw_id,
- ctx->sched_attr.priority, ctx->ban_score, bannable(ctx),
- ctx->guilty, ctx->active);
+ ctx->sched_attr.priority, ctx->guilty, ctx->active);
}
static void error_print_engine(struct drm_i915_error_state_buf *m,
@@ -688,12 +681,10 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
if (!error->engine[i].context.pid)
continue;
- err_printf(m, "Active process (on ring %s): %s [%d], score %d%s\n",
+ err_printf(m, "Active process (on ring %s): %s [%d]\n",
engine_name(m->i915, i),
error->engine[i].context.comm,
- error->engine[i].context.pid,
- error->engine[i].context.ban_score,
- bannable(&error->engine[i].context));
+ error->engine[i].context.pid);
}
err_printf(m, "Reset count: %u\n", error->reset_count);
err_printf(m, "Suspend count: %u\n", error->suspend_count);
@@ -779,13 +770,11 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
if (obj) {
err_puts(m, m->i915->engine[i]->name);
if (ee->context.pid)
- err_printf(m, " (submitted by %s [%d], ctx %d [%d], score %d%s)",
+ err_printf(m, " (submitted by %s [%d], ctx %d [%d])",
ee->context.comm,
ee->context.pid,
ee->context.handle,
- ee->context.hw_id,
- ee->context.ban_score,
- bannable(&ee->context));
+ ee->context.hw_id);
err_printf(m, " --- gtt_offset = 0x%08x %08x\n",
upper_32_bits(obj->gtt_offset),
lower_32_bits(obj->gtt_offset));
@@ -1301,8 +1290,6 @@ static void record_request(struct i915_request *request,
erq->flags = request->fence.flags;
erq->context = ctx->hw_id;
erq->sched_attr = request->sched.attr;
- erq->ban_score = atomic_read(&ctx->ban_score);
- erq->seqno = request->global_seqno;
erq->jiffies = request->emitted_jiffies;
erq->start = i915_ggtt_offset(request->ring->vma);
erq->head = request->head;
@@ -1396,8 +1383,6 @@ static void record_context(struct drm_i915_error_context *e,
e->handle = ctx->user_handle;
e->hw_id = ctx->hw_id;
e->sched_attr = ctx->sched;
- e->ban_score = atomic_read(&ctx->ban_score);
- e->bannable = i915_gem_context_is_bannable(ctx);
e->guilty = atomic_read(&ctx->guilty_count);
e->active = atomic_read(&ctx->active_count);
}
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index afa3adb28f02..94eaf8ab9051 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -122,10 +122,8 @@ struct i915_gpu_state {
pid_t pid;
u32 handle;
u32 hw_id;
- int ban_score;
int active;
int guilty;
- bool bannable;
struct i915_sched_attr sched_attr;
} context;
@@ -149,7 +147,6 @@ struct i915_gpu_state {
long jiffies;
pid_t pid;
u32 context;
- int ban_score;
u32 seqno;
u32 start;
u32 head;
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 5ab4e1c01618..a61e3a4fc9dc 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -290,8 +290,6 @@ static void i915_request_retire(struct i915_request *request)
i915_request_remove_from_client(request);
- /* Retirement decays the ban score as it is a sign of ctx progress */
- atomic_dec_if_positive(&request->gem_context->ban_score);
intel_context_unpin(request->hw_context);
__retire_engine_upto(request->engine, request);
diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c
index 034b654be74c..c234feb5fdf5 100644
--- a/drivers/gpu/drm/i915/i915_reset.c
+++ b/drivers/gpu/drm/i915/i915_reset.c
@@ -59,24 +59,29 @@ static void client_mark_guilty(struct drm_i915_file_private *file_priv,
static bool context_mark_guilty(struct i915_gem_context *ctx)
{
- unsigned int score;
- bool banned, bannable;
+ unsigned long prev_hang;
+ bool banned;
+ int i;
atomic_inc(&ctx->guilty_count);
- bannable = i915_gem_context_is_bannable(ctx);
- score = atomic_add_return(CONTEXT_SCORE_GUILTY, &ctx->ban_score);
- banned = (!i915_gem_context_is_recoverable(ctx) ||
- score >= CONTEXT_SCORE_BAN_THRESHOLD);
-
- /* Cool contexts don't accumulate client ban score */
- if (!bannable)
+ /* Cool contexts are too cool to be banned! (Used for reset testing.) */
+ if (!i915_gem_context_is_bannable(ctx))
return false;
+ /* Record the timestamp for the last N hangs */
+ prev_hang = ctx->hang_timestamp[0];
+ for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp) - 1; i++)
+ ctx->hang_timestamp[i] = ctx->hang_timestamp[i + 1];
+ ctx->hang_timestamp[i] = jiffies;
+
+ /* If we have hung N+1 times in rapid succession, we ban the context! */
+ banned = !i915_gem_context_is_recoverable(ctx);
+ if (time_before(jiffies, prev_hang + CONTEXT_FAST_HANG_JIFFIES))
+ banned = true;
if (banned) {
- DRM_DEBUG_DRIVER("context %s: guilty %d, score %u, banned\n",
- ctx->name, atomic_read(&ctx->guilty_count),
- score);
+ DRM_DEBUG_DRIVER("context %s: guilty %d, banned\n",
+ ctx->name, atomic_read(&ctx->guilty_count));
i915_gem_context_set_banned(ctx);
}