summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/i915_gem_execbuffer.c
diff options
context:
space:
mode:
authorChris Wilson2014-05-23 08:48:08 +0200
committerJani Nikula2014-05-27 10:18:40 +0200
commitd23db88c3ab233daed18709e3a24d6c95344117f (patch)
tree9e1ff705b415f76a882aa5eb3e546aaf543ae0cb /drivers/gpu/drm/i915/i915_gem_execbuffer.c
parentdrm/i915: Only copy back the modified fields to userspace from execbuffer (diff)
downloadkernel-qcow2-linux-d23db88c3ab233daed18709e3a24d6c95344117f.tar.gz
kernel-qcow2-linux-d23db88c3ab233daed18709e3a24d6c95344117f.tar.xz
kernel-qcow2-linux-d23db88c3ab233daed18709e3a24d6c95344117f.zip
drm/i915: Prevent negative relocation deltas from wrapping
This is pure evil. Userspace, I'm looking at you SNA, repacks batch buffers on the fly after generation as they are being passed to the kernel for execution. These batches also contain self-referenced relocations as a single buffer encompasses the state commands, kernels, vertices and sampler. During generation the buffers are placed at known offsets within the full batch, and then the relocation deltas (as passed to the kernel) are tweaked as the batch is repacked into a smaller buffer. This means that userspace is passing negative relocations deltas, which subsequently wrap to large values if the batch is at a low address. The GPU hangs when it then tries to use the large value as a base for its address offsets, rather than wrapping back to the real value (as one would hope). As the GPU uses positive offsets from the base, we can treat the relocation address as the minimum address read by the GPU. For the upper bound, we trust that userspace will not read beyond the end of the buffer. So, how do we fix negative relocations from wrapping? We can either check that every relocation looks valid when we write it, and then position each object such that we prevent the offset wraparound, or we just special-case the self-referential behaviour of SNA and force all batches to be above 256k. Daniel prefers the latter approach. This fixes a GPU hang when it tries to use an address (relocation + offset) greater than the GTT size. The issue would occur quite easily with full-ppgtt as each fd gets its own VM space, so low offsets would often be handed out. However, with the rearrangement of the low GTT due to capturing the BIOS framebuffer, it is already affecting kernels 3.15 onwards. I think only IVB+ is susceptible to this bug, but the workaround should only kick in rarely, so it seems sensible to always apply it. v3: Use a bias for batch buffers to prevent small negative delta relocations from wrapping. v4 from Daniel: - s/BIAS/BATCH_OFFSET_BIAS/ - Extract eb_vma_misplaced/i915_vma_misplaced since the conditions were growing rather cumbersome. - Add a comment to eb_get_batch explaining why we do this. - Apply the batch offset bias everywhere but mention that we've only observed it on gen7 gpus. - Drop PIN_OFFSET_FIX for now, that slipped in from a feature patch. v5: Add static to eb_get_batch, spotted by 0-day tester. Testcase: igt/gem_bad_reloc Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=78533 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> (v3) Cc: stable@vger.kernel.org Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gem_execbuffer.c76
1 files changed, 57 insertions, 19 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 7aaf6390cfaf..20fef6c50267 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -35,6 +35,9 @@
#define __EXEC_OBJECT_HAS_PIN (1<<31)
#define __EXEC_OBJECT_HAS_FENCE (1<<30)
+#define __EXEC_OBJECT_NEEDS_BIAS (1<<28)
+
+#define BATCH_OFFSET_BIAS (256*1024)
struct eb_vmas {
struct list_head vmas;
@@ -545,7 +548,7 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
bool need_fence;
- unsigned flags;
+ uint64_t flags;
int ret;
flags = 0;
@@ -559,6 +562,8 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
flags |= PIN_GLOBAL;
+ if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
+ flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags);
if (ret)
@@ -592,6 +597,36 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
return 0;
}
+static bool
+eb_vma_misplaced(struct i915_vma *vma, bool has_fenced_gpu_access)
+{
+ struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
+ struct drm_i915_gem_object *obj = vma->obj;
+ bool need_fence, need_mappable;
+
+ need_fence =
+ has_fenced_gpu_access &&
+ entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
+ obj->tiling_mode != I915_TILING_NONE;
+ need_mappable = need_fence || need_reloc_mappable(vma);
+
+ WARN_ON((need_mappable || need_fence) &&
+ !i915_is_ggtt(vma->vm));
+
+ if (entry->alignment &&
+ vma->node.start & (entry->alignment - 1))
+ return true;
+
+ if (need_mappable && !obj->map_and_fenceable)
+ return true;
+
+ if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
+ vma->node.start < BATCH_OFFSET_BIAS)
+ return true;
+
+ return false;
+}
+
static int
i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
struct list_head *vmas,
@@ -653,26 +688,10 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
/* Unbind any ill-fitting objects or pin. */
list_for_each_entry(vma, vmas, exec_list) {
- struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
- bool need_fence, need_mappable;
-
- obj = vma->obj;
-
if (!drm_mm_node_allocated(&vma->node))
continue;
- need_fence =
- has_fenced_gpu_access &&
- entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
- obj->tiling_mode != I915_TILING_NONE;
- need_mappable = need_fence || need_reloc_mappable(vma);
-
- WARN_ON((need_mappable || need_fence) &&
- !i915_is_ggtt(vma->vm));
-
- if ((entry->alignment &&
- vma->node.start & (entry->alignment - 1)) ||
- (need_mappable && !obj->map_and_fenceable))
+ if (eb_vma_misplaced(vma, has_fenced_gpu_access))
ret = i915_vma_unbind(vma);
else
ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
@@ -999,6 +1018,25 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev,
return 0;
}
+static struct drm_i915_gem_object *
+eb_get_batch(struct eb_vmas *eb)
+{
+ struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list);
+
+ /*
+ * SNA is doing fancy tricks with compressing batch buffers, which leads
+ * to negative relocation deltas. Usually that works out ok since the
+ * relocate address is still positive, except when the batch is placed
+ * very low in the GTT. Ensure this doesn't happen.
+ *
+ * Note that actual hangs have only been observed on gen7, but for
+ * paranoia do it everywhere.
+ */
+ vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
+
+ return vma->obj;
+}
+
static int
i915_gem_do_execbuffer(struct drm_device *dev, void *data,
struct drm_file *file,
@@ -1153,7 +1191,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
goto err;
/* take note of the batch buffer before we might reorder the lists */
- batch_obj = list_entry(eb->vmas.prev, struct i915_vma, exec_list)->obj;
+ batch_obj = eb_get_batch(eb);
/* Move the objects en-masse into the GTT, evicting if necessary. */
need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;