drm/i915: Move the modulus for ring emission to the register write

Space reservation is already safe with respect to the ring->size modulus, but hardware only expects to see values in the range 0...ring->size-1 (inclusive) and so requires the modulus to prevent us writing the value ring->size instead of 0. As this is only required for the register itself, we can defer the modulus to the register update and not perform it after every command packet. We keep the intel_ring_advance() around in the code to provide demarcation for the end-of-packet (which then can be compared against intel_ring_begin() as the number of dwords emitted must match the reserved space). v2: Assert that the ring size is a power-of-two to match assumptions in the code. Simplify the comment before writing the tail value to explain why the modulus is necessary. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Dave Gordon <david.s.gordon@intel.com> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1470174640-18242-13-git-send-email-chris@chris-wilson.co.uk
author: Chris Wilson 2016-08-02 23:50:30 +0200
committer: Chris Wilson 2016-08-02 23:58:25 +0200
commit: 8f9420184a3d62f9ca1ddb2b2f716989c461489b (patch)
tree: 08d02ff129de9c2aac9253cfce6b4568a17987da
parent: drm/i915: Convert engine->write_tail to operate on a request (diff)
download: kernel-qcow2-linux-8f9420184a3d62f9ca1ddb2b2f716989c461489b.tar.gz
kernel-qcow2-linux-8f9420184a3d62f9ca1ddb2b2f716989c461489b.tar.xz
kernel-qcow2-linux-8f9420184a3d62f9ca1ddb2b2f716989c461489b.zip
3 files changed, 20 insertions, 10 deletions
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index a74006b804dd..0c3ca467a72c 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -373,7 +373,7 @@ static void execlists_update_context(struct drm_i915_gem_request *rq)
 	struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt;
 	uint32_t *reg_state = rq->ctx->engine[engine->id].lrc_reg_state;
 
-	reg_state[CTX_RING_TAIL+1] = rq->tail;
+	reg_state[CTX_RING_TAIL+1] = intel_ring_offset(rq->ring, rq->tail);
 
 	/* True 32b PPGTT with dynamic page allocation: update PDP
 	 * registers and point the unallocated PDPs to scratch page.
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 3142085b5cc0..a03935338019 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1718,7 +1718,8 @@ static void i9xx_submit_request(struct drm_i915_gem_request *request)
 {
 	struct drm_i915_private *dev_priv = request->i915;
 
-	I915_WRITE_TAIL(request->engine, request->tail);
+	I915_WRITE_TAIL(request->engine,
+			intel_ring_offset(request->ring, request->tail));
 }
 
 static void
@@ -2081,6 +2082,8 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size)
 	struct intel_ring *ring;
 	int ret;
 
+	GEM_BUG_ON(!is_power_of_2(size));
+
 	ring = kzalloc(sizeof(*ring), GFP_KERNEL);
 	if (ring == NULL) {
 		DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n",
@@ -2505,7 +2508,8 @@ static void gen6_bsd_submit_request(struct drm_i915_gem_request *request)
 		DRM_ERROR("timed out waiting for the BSD ring to wake up\n");
 
 	/* Now that the ring is fully powered up, update the tail */
-	I915_WRITE_FW(RING_TAIL(request->engine->mmio_base), request->tail);
+	I915_WRITE_FW(RING_TAIL(request->engine->mmio_base),
+		      intel_ring_offset(request->ring, request->tail));
 	POSTING_READ_FW(RING_TAIL(request->engine->mmio_base));
 
 	/* Let the ring send IDLE messages to the GT again,
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 14d2ea36fb88..7d4a2814b331 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -460,14 +460,20 @@ static inline void intel_ring_emit_reg(struct intel_ring *ring, i915_reg_t reg)
 
 static inline void intel_ring_advance(struct intel_ring *ring)
 {
-	/* The modulus is required so that we avoid writing
-	 * request->tail == ring->size, rather than the expected 0,
-	 * into the RING_TAIL register as that can cause a GPU hang.
-	 * As this is only strictly required for the request->tail,
-	 * and only then as we write the value into hardware, we can
-	 * one day remove the modulus after every command packet.
+	/* Dummy function.
+	 *
+	 * This serves as a placeholder in the code so that the reader
+	 * can compare against the preceding intel_ring_begin() and
+	 * check that the number of dwords emitted matches the space
+	 * reserved for the command packet (i.e. the value passed to
+	 * intel_ring_begin()).
 	 */
-	ring->tail &= ring->size - 1;
+}
+
+static inline u32 intel_ring_offset(struct intel_ring *ring, u32 value)
+{
+	/* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */
+	return value & (ring->size - 1);
 }
 
 int __intel_ring_space(int head, int tail, int size);
author	Chris Wilson	2016-08-02 23:50:30 +0200
committer	Chris Wilson	2016-08-02 23:58:25 +0200
commit	8f9420184a3d62f9ca1ddb2b2f716989c461489b (patch)
tree	08d02ff129de9c2aac9253cfce6b4568a17987da
parent	drm/i915: Convert engine->write_tail to operate on a request (diff)
download	kernel-qcow2-linux-8f9420184a3d62f9ca1ddb2b2f716989c461489b.tar.gz kernel-qcow2-linux-8f9420184a3d62f9ca1ddb2b2f716989c461489b.tar.xz kernel-qcow2-linux-8f9420184a3d62f9ca1ddb2b2f716989c461489b.zip