diff options
author | Chris Wilson | 2017-05-17 14:10:00 +0200 |
---|---|---|
committer | Chris Wilson | 2017-05-17 14:38:06 +0200 |
commit | 77f0d0e925e8a0f17a927a1f4e266d1f0e95cb72 (patch) | |
tree | 28700f9afc77e5efd73163b00c18f3a24f9fa4e1 /drivers/gpu/drm/i915/intel_lrc.c | |
parent | drm/i915: Redefine ptr_pack_bits() and friends (diff) | |
download | kernel-qcow2-linux-77f0d0e925e8a0f17a927a1f4e266d1f0e95cb72.tar.gz kernel-qcow2-linux-77f0d0e925e8a0f17a927a1f4e266d1f0e95cb72.tar.xz kernel-qcow2-linux-77f0d0e925e8a0f17a927a1f4e266d1f0e95cb72.zip |
drm/i915/execlists: Pack the count into the low bits of the port.request
add/remove: 1/1 grow/shrink: 5/4 up/down: 391/-578 (-187)
function old new delta
execlists_submit_ports 262 471 +209
port_assign.isra - 136 +136
capture 6344 6359 +15
reset_common_ring 438 452 +14
execlists_submit_request 228 238 +10
gen8_init_common_ring 334 341 +7
intel_engine_is_idle 106 105 -1
i915_engine_info 2314 2290 -24
__i915_gem_set_wedged_BKL 485 411 -74
intel_lrc_irq_handler 1789 1604 -185
execlists_update_context 294 - -294
The most important change there is the improve to the
intel_lrc_irq_handler and excclist_submit_ports (net improvement since
execlists_update_context is now inlined).
v2: Use the port_api() for guc as well (even though currently we do not
pack any counters in there, yet) and hide all port->request_count inside
the helpers.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170517121007.27224-5-chris@chris-wilson.co.uk
Diffstat (limited to 'drivers/gpu/drm/i915/intel_lrc.c')
-rw-r--r-- | drivers/gpu/drm/i915/intel_lrc.c | 121 |
1 files changed, 65 insertions, 56 deletions
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 9a1192d61538..53ec0d5713ad 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -337,39 +337,32 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq) static void execlists_submit_ports(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = engine->i915; struct execlist_port *port = engine->execlist_port; u32 __iomem *elsp = - dev_priv->regs + i915_mmio_reg_offset(RING_ELSP(engine)); - u64 desc[2]; - - GEM_BUG_ON(port[0].count > 1); - if (!port[0].count) - execlists_context_status_change(port[0].request, - INTEL_CONTEXT_SCHEDULE_IN); - desc[0] = execlists_update_context(port[0].request); - GEM_DEBUG_EXEC(port[0].context_id = upper_32_bits(desc[0])); - port[0].count++; - - if (port[1].request) { - GEM_BUG_ON(port[1].count); - execlists_context_status_change(port[1].request, - INTEL_CONTEXT_SCHEDULE_IN); - desc[1] = execlists_update_context(port[1].request); - GEM_DEBUG_EXEC(port[1].context_id = upper_32_bits(desc[1])); - port[1].count = 1; - } else { - desc[1] = 0; - } - GEM_BUG_ON(desc[0] == desc[1]); + engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine)); + unsigned int n; - /* You must always write both descriptors in the order below. */ - writel(upper_32_bits(desc[1]), elsp); - writel(lower_32_bits(desc[1]), elsp); + for (n = ARRAY_SIZE(engine->execlist_port); n--; ) { + struct drm_i915_gem_request *rq; + unsigned int count; + u64 desc; + + rq = port_unpack(&port[n], &count); + if (rq) { + GEM_BUG_ON(count > !n); + if (!count++) + execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); + port_set(&port[n], port_pack(rq, count)); + desc = execlists_update_context(rq); + GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc)); + } else { + GEM_BUG_ON(!n); + desc = 0; + } - writel(upper_32_bits(desc[0]), elsp); - /* The context is automatically loaded after the following */ - writel(lower_32_bits(desc[0]), elsp); + writel(upper_32_bits(desc), elsp); + writel(lower_32_bits(desc), elsp); + } } static bool ctx_single_port_submission(const struct i915_gem_context *ctx) @@ -390,6 +383,17 @@ static bool can_merge_ctx(const struct i915_gem_context *prev, return true; } +static void port_assign(struct execlist_port *port, + struct drm_i915_gem_request *rq) +{ + GEM_BUG_ON(rq == port_request(port)); + + if (port_isset(port)) + i915_gem_request_put(port_request(port)); + + port_set(port, port_pack(i915_gem_request_get(rq), port_count(port))); +} + static void execlists_dequeue(struct intel_engine_cs *engine) { struct drm_i915_gem_request *last; @@ -397,7 +401,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) struct rb_node *rb; bool submit = false; - last = port->request; + last = port_request(port); if (last) /* WaIdleLiteRestore:bdw,skl * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL @@ -407,7 +411,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) */ last->tail = last->wa_tail; - GEM_BUG_ON(port[1].request); + GEM_BUG_ON(port_isset(&port[1])); /* Hardware submission is through 2 ports. Conceptually each port * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is @@ -464,7 +468,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine) GEM_BUG_ON(last->ctx == cursor->ctx); - i915_gem_request_assign(&port->request, last); + if (submit) + port_assign(port, last); port++; } @@ -474,12 +479,12 @@ static void execlists_dequeue(struct intel_engine_cs *engine) cursor->priotree.priority = INT_MAX; __i915_gem_request_submit(cursor); - trace_i915_gem_request_in(cursor, port - engine->execlist_port); + trace_i915_gem_request_in(cursor, port_index(port, engine)); last = cursor; submit = true; } if (submit) { - i915_gem_request_assign(&port->request, last); + port_assign(port, last); engine->execlist_first = rb; } spin_unlock_irq(&engine->timeline->lock); @@ -488,16 +493,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine) execlists_submit_ports(engine); } -static bool execlists_elsp_idle(struct intel_engine_cs *engine) -{ - return !engine->execlist_port[0].request; -} - static bool execlists_elsp_ready(const struct intel_engine_cs *engine) { const struct execlist_port *port = engine->execlist_port; - return port[0].count + port[1].count < 2; + return port_count(&port[0]) + port_count(&port[1]) < 2; } /* @@ -547,7 +547,9 @@ static void intel_lrc_irq_handler(unsigned long data) tail = GEN8_CSB_WRITE_PTR(head); head = GEN8_CSB_READ_PTR(head); while (head != tail) { + struct drm_i915_gem_request *rq; unsigned int status; + unsigned int count; if (++head == GEN8_CSB_ENTRIES) head = 0; @@ -575,22 +577,26 @@ static void intel_lrc_irq_handler(unsigned long data) /* Check the context/desc id for this event matches */ GEM_DEBUG_BUG_ON(readl(buf + 2 * head + 1) != - port[0].context_id); + port->context_id); - GEM_BUG_ON(port[0].count == 0); - if (--port[0].count == 0) { + rq = port_unpack(port, &count); + GEM_BUG_ON(count == 0); + if (--count == 0) { GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED); - GEM_BUG_ON(!i915_gem_request_completed(port[0].request)); - execlists_context_status_change(port[0].request, - INTEL_CONTEXT_SCHEDULE_OUT); + GEM_BUG_ON(!i915_gem_request_completed(rq)); + execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); + + trace_i915_gem_request_out(rq); + i915_gem_request_put(rq); - trace_i915_gem_request_out(port[0].request); - i915_gem_request_put(port[0].request); port[0] = port[1]; memset(&port[1], 0, sizeof(port[1])); + } else { + port_set(port, port_pack(rq, count)); } - GEM_BUG_ON(port[0].count == 0 && + /* After the final element, the hw should be idle */ + GEM_BUG_ON(port_count(port) == 0 && !(status & GEN8_CTX_STATUS_ACTIVE_IDLE)); } @@ -1147,6 +1153,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine) struct drm_i915_private *dev_priv = engine->i915; struct execlist_port *port = engine->execlist_port; unsigned int n; + bool submit; int ret; ret = intel_mocs_init_engine(engine); @@ -1168,19 +1175,21 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine) /* After a GPU reset, we may have requests to replay */ clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); + submit = false; for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) { - if (!port[n].request) + if (!port_isset(&port[n])) break; DRM_DEBUG_DRIVER("Restarting %s:%d from 0x%x\n", engine->name, n, - port[n].request->global_seqno); + port_request(&port[n])->global_seqno); /* Discard the current inflight count */ - port[n].count = 0; + port_set(&port[n], port_request(&port[n])); + submit = true; } - if (!i915.enable_guc_submission && !execlists_elsp_idle(engine)) + if (submit && !i915.enable_guc_submission) execlists_submit_ports(engine); return 0; @@ -1258,13 +1267,13 @@ static void reset_common_ring(struct intel_engine_cs *engine, intel_ring_update_space(request->ring); /* Catch up with any missed context-switch interrupts */ - if (request->ctx != port[0].request->ctx) { - i915_gem_request_put(port[0].request); + if (request->ctx != port_request(port)->ctx) { + i915_gem_request_put(port_request(port)); port[0] = port[1]; memset(&port[1], 0, sizeof(port[1])); } - GEM_BUG_ON(request->ctx != port[0].request->ctx); + GEM_BUG_ON(request->ctx != port_request(port)->ctx); /* Reset WaIdleLiteRestore:bdw,skl as well */ request->tail = |