drm/i915/execlists: Pack the count into the low bits of the port.request

add/remove: 1/1 grow/shrink: 5/4 up/down: 391/-578 (-187) function old new delta execlists_submit_ports 262 471 +209 port_assign.isra - 136 +136 capture 6344 6359 +15 reset_common_ring 438 452 +14 execlists_submit_request 228 238 +10 gen8_init_common_ring 334 341 +7 intel_engine_is_idle 106 105 -1 i915_engine_info 2314 2290 -24 __i915_gem_set_wedged_BKL 485 411 -74 intel_lrc_irq_handler 1789 1604 -185 execlists_update_context 294 - -294 The most important change there is the improve to the intel_lrc_irq_handler and excclist_submit_ports (net improvement since execlists_update_context is now inlined). v2: Use the port_api() for guc as well (even though currently we do not pack any counters in there, yet) and hide all port->request_count inside the helpers. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Mika Kuoppala <mika.kuoppala@intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20170517121007.27224-5-chris@chris-wilson.co.uk
author: Chris Wilson 2017-05-17 14:10:00 +0200
committer: Chris Wilson 2017-05-17 14:38:06 +0200
commit: 77f0d0e925e8a0f17a927a1f4e266d1f0e95cb72 (patch)
tree: 28700f9afc77e5efd73163b00c18f3a24f9fa4e1 /drivers/gpu/drm/i915/intel_lrc.c
parent: drm/i915: Redefine ptr_pack_bits() and friends (diff)
download: kernel-qcow2-linux-77f0d0e925e8a0f17a927a1f4e266d1f0e95cb72.tar.gz
kernel-qcow2-linux-77f0d0e925e8a0f17a927a1f4e266d1f0e95cb72.tar.xz
kernel-qcow2-linux-77f0d0e925e8a0f17a927a1f4e266d1f0e95cb72.zip
1 files changed, 65 insertions, 56 deletions
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 9a1192d61538..53ec0d5713ad 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -337,39 +337,32 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq)
 
 static void execlists_submit_ports(struct intel_engine_cs *engine)
 {
-	struct drm_i915_private *dev_priv = engine->i915;
 	struct execlist_port *port = engine->execlist_port;
 	u32 __iomem *elsp =
-		dev_priv->regs + i915_mmio_reg_offset(RING_ELSP(engine));
-	u64 desc[2];
-
-	GEM_BUG_ON(port[0].count > 1);
-	if (!port[0].count)
-		execlists_context_status_change(port[0].request,
-						INTEL_CONTEXT_SCHEDULE_IN);
-	desc[0] = execlists_update_context(port[0].request);
-	GEM_DEBUG_EXEC(port[0].context_id = upper_32_bits(desc[0]));
-	port[0].count++;
-
-	if (port[1].request) {
-		GEM_BUG_ON(port[1].count);
-		execlists_context_status_change(port[1].request,
-						INTEL_CONTEXT_SCHEDULE_IN);
-		desc[1] = execlists_update_context(port[1].request);
-		GEM_DEBUG_EXEC(port[1].context_id = upper_32_bits(desc[1]));
-		port[1].count = 1;
-	} else {
-		desc[1] = 0;
-	}
-	GEM_BUG_ON(desc[0] == desc[1]);
+		engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine));
+	unsigned int n;
 
-	/* You must always write both descriptors in the order below. */
-	writel(upper_32_bits(desc[1]), elsp);
-	writel(lower_32_bits(desc[1]), elsp);
+	for (n = ARRAY_SIZE(engine->execlist_port); n--; ) {
+		struct drm_i915_gem_request *rq;
+		unsigned int count;
+		u64 desc;
+
+		rq = port_unpack(&port[n], &count);
+		if (rq) {
+			GEM_BUG_ON(count > !n);
+			if (!count++)
+				execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
+			port_set(&port[n], port_pack(rq, count));
+			desc = execlists_update_context(rq);
+			GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
+		} else {
+			GEM_BUG_ON(!n);
+			desc = 0;
+		}
 
-	writel(upper_32_bits(desc[0]), elsp);
-	/* The context is automatically loaded after the following */
-	writel(lower_32_bits(desc[0]), elsp);
+		writel(upper_32_bits(desc), elsp);
+		writel(lower_32_bits(desc), elsp);
+	}
 }
 
 static bool ctx_single_port_submission(const struct i915_gem_context *ctx)
@@ -390,6 +383,17 @@ static bool can_merge_ctx(const struct i915_gem_context *prev,
 	return true;
 }
 
+static void port_assign(struct execlist_port *port,
+			struct drm_i915_gem_request *rq)
+{
+	GEM_BUG_ON(rq == port_request(port));
+
+	if (port_isset(port))
+		i915_gem_request_put(port_request(port));
+
+	port_set(port, port_pack(i915_gem_request_get(rq), port_count(port)));
+}
+
 static void execlists_dequeue(struct intel_engine_cs *engine)
 {
 	struct drm_i915_gem_request *last;
@@ -397,7 +401,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 	struct rb_node *rb;
 	bool submit = false;
 
-	last = port->request;
+	last = port_request(port);
 	if (last)
 		/* WaIdleLiteRestore:bdw,skl
 		 * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL
@@ -407,7 +411,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		 */
 		last->tail = last->wa_tail;
 
-	GEM_BUG_ON(port[1].request);
+	GEM_BUG_ON(port_isset(&port[1]));
 
 	/* Hardware submission is through 2 ports. Conceptually each port
 	 * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
@@ -464,7 +468,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 
 			GEM_BUG_ON(last->ctx == cursor->ctx);
 
-			i915_gem_request_assign(&port->request, last);
+			if (submit)
+				port_assign(port, last);
 			port++;
 		}
 
@@ -474,12 +479,12 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		cursor->priotree.priority = INT_MAX;
 
 		__i915_gem_request_submit(cursor);
-		trace_i915_gem_request_in(cursor, port - engine->execlist_port);
+		trace_i915_gem_request_in(cursor, port_index(port, engine));
 		last = cursor;
 		submit = true;
 	}
 	if (submit) {
-		i915_gem_request_assign(&port->request, last);
+		port_assign(port, last);
 		engine->execlist_first = rb;
 	}
 	spin_unlock_irq(&engine->timeline->lock);
@@ -488,16 +493,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		execlists_submit_ports(engine);
 }
 
-static bool execlists_elsp_idle(struct intel_engine_cs *engine)
-{
-	return !engine->execlist_port[0].request;
-}
-
 static bool execlists_elsp_ready(const struct intel_engine_cs *engine)
 {
 	const struct execlist_port *port = engine->execlist_port;
 
-	return port[0].count + port[1].count < 2;
+	return port_count(&port[0]) + port_count(&port[1]) < 2;
 }
 
 /*
@@ -547,7 +547,9 @@ static void intel_lrc_irq_handler(unsigned long data)
 		tail = GEN8_CSB_WRITE_PTR(head);
 		head = GEN8_CSB_READ_PTR(head);
 		while (head != tail) {
+			struct drm_i915_gem_request *rq;
 			unsigned int status;
+			unsigned int count;
 
 			if (++head == GEN8_CSB_ENTRIES)
 				head = 0;
@@ -575,22 +577,26 @@ static void intel_lrc_irq_handler(unsigned long data)
 
 			/* Check the context/desc id for this event matches */
 			GEM_DEBUG_BUG_ON(readl(buf + 2 * head + 1) !=
-					 port[0].context_id);
+					 port->context_id);
 
-			GEM_BUG_ON(port[0].count == 0);
-			if (--port[0].count == 0) {
+			rq = port_unpack(port, &count);
+			GEM_BUG_ON(count == 0);
+			if (--count == 0) {
 				GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
-				GEM_BUG_ON(!i915_gem_request_completed(port[0].request));
-				execlists_context_status_change(port[0].request,
-								INTEL_CONTEXT_SCHEDULE_OUT);
+				GEM_BUG_ON(!i915_gem_request_completed(rq));
+				execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
+
+				trace_i915_gem_request_out(rq);
+				i915_gem_request_put(rq);
 
-				trace_i915_gem_request_out(port[0].request);
-				i915_gem_request_put(port[0].request);
 				port[0] = port[1];
 				memset(&port[1], 0, sizeof(port[1]));
+			} else {
+				port_set(port, port_pack(rq, count));
 			}
 
-			GEM_BUG_ON(port[0].count == 0 &&
+			/* After the final element, the hw should be idle */
+			GEM_BUG_ON(port_count(port) == 0 &&
 				   !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
 		}
 
@@ -1147,6 +1153,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
 	struct drm_i915_private *dev_priv = engine->i915;
 	struct execlist_port *port = engine->execlist_port;
 	unsigned int n;
+	bool submit;
 	int ret;
 
 	ret = intel_mocs_init_engine(engine);
@@ -1168,19 +1175,21 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
 	/* After a GPU reset, we may have requests to replay */
 	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
 
+	submit = false;
 	for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) {
-		if (!port[n].request)
+		if (!port_isset(&port[n]))
 			break;
 
 		DRM_DEBUG_DRIVER("Restarting %s:%d from 0x%x\n",
 				 engine->name, n,
-				 port[n].request->global_seqno);
+				 port_request(&port[n])->global_seqno);
 
 		/* Discard the current inflight count */
-		port[n].count = 0;
+		port_set(&port[n], port_request(&port[n]));
+		submit = true;
 	}
 
-	if (!i915.enable_guc_submission && !execlists_elsp_idle(engine))
+	if (submit && !i915.enable_guc_submission)
 		execlists_submit_ports(engine);
 
 	return 0;
@@ -1258,13 +1267,13 @@ static void reset_common_ring(struct intel_engine_cs *engine,
 	intel_ring_update_space(request->ring);
 
 	/* Catch up with any missed context-switch interrupts */
-	if (request->ctx != port[0].request->ctx) {
-		i915_gem_request_put(port[0].request);
+	if (request->ctx != port_request(port)->ctx) {
+		i915_gem_request_put(port_request(port));
 		port[0] = port[1];
 		memset(&port[1], 0, sizeof(port[1]));
 	}
 
-	GEM_BUG_ON(request->ctx != port[0].request->ctx);
+	GEM_BUG_ON(request->ctx != port_request(port)->ctx);
 
 	/* Reset WaIdleLiteRestore:bdw,skl as well */
 	request->tail =
author	Chris Wilson	2017-05-17 14:10:00 +0200
committer	Chris Wilson	2017-05-17 14:38:06 +0200
commit	77f0d0e925e8a0f17a927a1f4e266d1f0e95cb72 (patch)
tree	28700f9afc77e5efd73163b00c18f3a24f9fa4e1 /drivers/gpu/drm/i915/intel_lrc.c
parent	drm/i915: Redefine ptr_pack_bits() and friends (diff)
download	kernel-qcow2-linux-77f0d0e925e8a0f17a927a1f4e266d1f0e95cb72.tar.gz kernel-qcow2-linux-77f0d0e925e8a0f17a927a1f4e266d1f0e95cb72.tar.xz kernel-qcow2-linux-77f0d0e925e8a0f17a927a1f4e266d1f0e95cb72.zip