summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/i915_request.c
diff options
context:
space:
mode:
authorChris Wilson2019-03-22 10:23:25 +0100
committerChris Wilson2019-03-22 14:12:38 +0100
commitea593dbba4c8ed841630fa5445202627e1046ba6 (patch)
tree99a69fa921000464f8b5993618fd67ca2aecf5ec /drivers/gpu/drm/i915/i915_request.c
parentdrm/i915: Extend CONTEXT_CREATE to set parameters upon construction (diff)
downloadkernel-qcow2-linux-ea593dbba4c8ed841630fa5445202627e1046ba6.tar.gz
kernel-qcow2-linux-ea593dbba4c8ed841630fa5445202627e1046ba6.tar.xz
kernel-qcow2-linux-ea593dbba4c8ed841630fa5445202627e1046ba6.zip
drm/i915: Allow contexts to share a single timeline across all engines
Previously, our view has been always to run the engines independently within a context. (Multiple engines happened before we had contexts and timelines, so they always operated independently and that behaviour persisted into contexts.) However, at the user level the context often represents a single timeline (e.g. GL contexts) and userspace must ensure that the individual engines are serialised to present that ordering to the client (or forgot about this detail entirely and hope no one notices - a fair ploy if the client can only directly control one engine themselves ;) In the next patch, we will want to construct a set of engines that operate as one, that have a single timeline interwoven between them, to present a single virtual engine to the user. (They submit to the virtual engine, then we decide which engine to execute on based.) To that end, we want to be able to create contexts which have a single timeline (fence context) shared between all engines, rather than multiple timelines. v2: Move the specialised timeline ordering to its own function. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190322092325.5883-4-chris@chris-wilson.co.uk
Diffstat (limited to 'drivers/gpu/drm/i915/i915_request.c')
-rw-r--r--drivers/gpu/drm/i915/i915_request.c80
1 files changed, 55 insertions, 25 deletions
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 1529824d7c61..e9c2094ab8ea 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -992,6 +992,60 @@ void i915_request_skip(struct i915_request *rq, int error)
memset(vaddr + head, 0, rq->postfix - head);
}
+static struct i915_request *
+__i915_request_add_to_timeline(struct i915_request *rq)
+{
+ struct i915_timeline *timeline = rq->timeline;
+ struct i915_request *prev;
+
+ /*
+ * Dependency tracking and request ordering along the timeline
+ * is special cased so that we can eliminate redundant ordering
+ * operations while building the request (we know that the timeline
+ * itself is ordered, and here we guarantee it).
+ *
+ * As we know we will need to emit tracking along the timeline,
+ * we embed the hooks into our request struct -- at the cost of
+ * having to have specialised no-allocation interfaces (which will
+ * be beneficial elsewhere).
+ *
+ * A second benefit to open-coding i915_request_await_request is
+ * that we can apply a slight variant of the rules specialised
+ * for timelines that jump between engines (such as virtual engines).
+ * If we consider the case of virtual engine, we must emit a dma-fence
+ * to prevent scheduling of the second request until the first is
+ * complete (to maximise our greedy late load balancing) and this
+ * precludes optimising to use semaphores serialisation of a single
+ * timeline across engines.
+ */
+ prev = i915_active_request_raw(&timeline->last_request,
+ &rq->i915->drm.struct_mutex);
+ if (prev && !i915_request_completed(prev)) {
+ if (is_power_of_2(prev->engine->mask | rq->engine->mask))
+ i915_sw_fence_await_sw_fence(&rq->submit,
+ &prev->submit,
+ &rq->submitq);
+ else
+ __i915_sw_fence_await_dma_fence(&rq->submit,
+ &prev->fence,
+ &rq->dmaq);
+ if (rq->engine->schedule)
+ __i915_sched_node_add_dependency(&rq->sched,
+ &prev->sched,
+ &rq->dep,
+ 0);
+ }
+
+ spin_lock_irq(&timeline->lock);
+ list_add_tail(&rq->link, &timeline->requests);
+ spin_unlock_irq(&timeline->lock);
+
+ GEM_BUG_ON(timeline->seqno != rq->fence.seqno);
+ __i915_active_request_set(&timeline->last_request, rq);
+
+ return prev;
+}
+
/*
* NB: This function is not allowed to fail. Doing so would mean the the
* request is not being tracked for completion but the work itself is
@@ -1036,31 +1090,7 @@ void i915_request_add(struct i915_request *request)
GEM_BUG_ON(IS_ERR(cs));
request->postfix = intel_ring_offset(request, cs);
- /*
- * Seal the request and mark it as pending execution. Note that
- * we may inspect this state, without holding any locks, during
- * hangcheck. Hence we apply the barrier to ensure that we do not
- * see a more recent value in the hws than we are tracking.
- */
-
- prev = i915_active_request_raw(&timeline->last_request,
- &request->i915->drm.struct_mutex);
- if (prev && !i915_request_completed(prev)) {
- i915_sw_fence_await_sw_fence(&request->submit, &prev->submit,
- &request->submitq);
- if (engine->schedule)
- __i915_sched_node_add_dependency(&request->sched,
- &prev->sched,
- &request->dep,
- 0);
- }
-
- spin_lock_irq(&timeline->lock);
- list_add_tail(&request->link, &timeline->requests);
- spin_unlock_irq(&timeline->lock);
-
- GEM_BUG_ON(timeline->seqno != request->fence.seqno);
- __i915_active_request_set(&timeline->last_request, request);
+ prev = __i915_request_add_to_timeline(request);
list_add_tail(&request->ring_link, &ring->request_list);
if (list_is_first(&request->ring_link, &ring->request_list))