summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/i915_gem.c
diff options
context:
space:
mode:
authorChris Wilson2016-10-28 14:58:27 +0200
committerChris Wilson2016-10-28 21:53:43 +0200
commite95433c73a11759203af1cae5958f998c9673370 (patch)
tree021dc483a7beff3d2924089cf1c6d20105f11b8a /drivers/gpu/drm/i915/i915_gem.c
parentdrm/i915: Remove superfluous wait_for_error() from throttle-ioctl (diff)
downloadkernel-qcow2-linux-e95433c73a11759203af1cae5958f998c9673370.tar.gz
kernel-qcow2-linux-e95433c73a11759203af1cae5958f998c9673370.tar.xz
kernel-qcow2-linux-e95433c73a11759203af1cae5958f998c9673370.zip
drm/i915: Rearrange i915_wait_request() accounting with callers
Our low-level wait routine has evolved from our generic wait interface that handled unlocked, RPS boosting, waits with time tracking. If we push our GEM fence tracking to use reservation_objects (required for handling multiple timelines), we lose the ability to pass the required information down to i915_wait_request(). However, if we push the extra functionality from i915_wait_request() to the individual callsites (i915_gem_object_wait_rendering and i915_gem_wait_ioctl) that make use of those extras, we can both simplify our low level wait and prepare for extending the GEM interface for use of reservation_objects. v2: Rewrite i915_wait_request() kerneldocs Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Matthew Auld <matthew.william.auld@gmail.com> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-4-chris@chris-wilson.co.uk
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c309
1 files changed, 231 insertions, 78 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 1254143ab121..537f502123ea 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -292,7 +292,12 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
* must wait for all rendering to complete to the object (as unbinding
* must anyway), and retire the requests.
*/
- ret = i915_gem_object_wait_rendering(obj, false);
+ ret = i915_gem_object_wait(obj,
+ I915_WAIT_INTERRUPTIBLE |
+ I915_WAIT_LOCKED |
+ I915_WAIT_ALL,
+ MAX_SCHEDULE_TIMEOUT,
+ NULL);
if (ret)
return ret;
@@ -311,88 +316,172 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
return ret;
}
-/**
- * Ensures that all rendering to the object has completed and the object is
- * safe to unbind from the GTT or access from the CPU.
- * @obj: i915 gem object
- * @readonly: waiting for just read access or read-write access
- */
-int
-i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
- bool readonly)
+static long
+i915_gem_object_wait_fence(struct dma_fence *fence,
+ unsigned int flags,
+ long timeout,
+ struct intel_rps_client *rps)
{
- struct reservation_object *resv;
- struct i915_gem_active *active;
- unsigned long active_mask;
- int idx;
+ struct drm_i915_gem_request *rq;
- lockdep_assert_held(&obj->base.dev->struct_mutex);
+ BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1);
- if (!readonly) {
- active = obj->last_read;
- active_mask = i915_gem_object_get_active(obj);
- } else {
- active_mask = 1;
- active = &obj->last_write;
+ if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+ return timeout;
+
+ if (!dma_fence_is_i915(fence))
+ return dma_fence_wait_timeout(fence,
+ flags & I915_WAIT_INTERRUPTIBLE,
+ timeout);
+
+ rq = to_request(fence);
+ if (i915_gem_request_completed(rq))
+ goto out;
+
+ /* This client is about to stall waiting for the GPU. In many cases
+ * this is undesirable and limits the throughput of the system, as
+ * many clients cannot continue processing user input/output whilst
+ * blocked. RPS autotuning may take tens of milliseconds to respond
+ * to the GPU load and thus incurs additional latency for the client.
+ * We can circumvent that by promoting the GPU frequency to maximum
+ * before we wait. This makes the GPU throttle up much more quickly
+ * (good for benchmarks and user experience, e.g. window animations),
+ * but at a cost of spending more power processing the workload
+ * (bad for battery). Not all clients even want their results
+ * immediately and for them we should just let the GPU select its own
+ * frequency to maximise efficiency. To prevent a single client from
+ * forcing the clocks too high for the whole system, we only allow
+ * each client to waitboost once in a busy period.
+ */
+ if (rps) {
+ if (INTEL_GEN(rq->i915) >= 6)
+ gen6_rps_boost(rq->i915, rps, rq->emitted_jiffies);
+ else
+ rps = NULL;
}
- for_each_active(active_mask, idx) {
+ timeout = i915_wait_request(rq, flags, timeout);
+
+out:
+ if (flags & I915_WAIT_LOCKED && i915_gem_request_completed(rq))
+ i915_gem_request_retire_upto(rq);
+
+ if (rps && rq->fence.seqno == rq->engine->last_submitted_seqno) {
+ /* The GPU is now idle and this client has stalled.
+ * Since no other client has submitted a request in the
+ * meantime, assume that this client is the only one
+ * supplying work to the GPU but is unable to keep that
+ * work supplied because it is waiting. Since the GPU is
+ * then never kept fully busy, RPS autoclocking will
+ * keep the clocks relatively low, causing further delays.
+ * Compensate by giving the synchronous client credit for
+ * a waitboost next time.
+ */
+ spin_lock(&rq->i915->rps.client_lock);
+ list_del_init(&rps->link);
+ spin_unlock(&rq->i915->rps.client_lock);
+ }
+
+ return timeout;
+}
+
+static long
+i915_gem_object_wait_reservation(struct reservation_object *resv,
+ unsigned int flags,
+ long timeout,
+ struct intel_rps_client *rps)
+{
+ struct dma_fence *excl;
+
+ if (flags & I915_WAIT_ALL) {
+ struct dma_fence **shared;
+ unsigned int count, i;
int ret;
- ret = i915_gem_active_wait(&active[idx],
- &obj->base.dev->struct_mutex);
+ ret = reservation_object_get_fences_rcu(resv,
+ &excl, &count, &shared);
if (ret)
return ret;
- }
- resv = i915_gem_object_get_dmabuf_resv(obj);
- if (resv) {
- long err;
+ for (i = 0; i < count; i++) {
+ timeout = i915_gem_object_wait_fence(shared[i],
+ flags, timeout,
+ rps);
+ if (timeout <= 0)
+ break;
+
+ dma_fence_put(shared[i]);
+ }
- err = reservation_object_wait_timeout_rcu(resv, !readonly, true,
- MAX_SCHEDULE_TIMEOUT);
- if (err < 0)
- return err;
+ for (; i < count; i++)
+ dma_fence_put(shared[i]);
+ kfree(shared);
+ } else {
+ excl = reservation_object_get_excl_rcu(resv);
}
- return 0;
+ if (excl && timeout > 0)
+ timeout = i915_gem_object_wait_fence(excl, flags, timeout, rps);
+
+ dma_fence_put(excl);
+
+ return timeout;
}
-/* A nonblocking variant of the above wait. Must be called prior to
- * acquiring the mutex for the object, as the object state may change
- * during this call. A reference must be held by the caller for the object.
+/**
+ * Waits for rendering to the object to be completed
+ * @obj: i915 gem object
+ * @flags: how to wait (under a lock, for all rendering or just for writes etc)
+ * @timeout: how long to wait
+ * @rps: client (user process) to charge for any waitboosting
*/
-static __must_check int
-__unsafe_wait_rendering(struct drm_i915_gem_object *obj,
- struct intel_rps_client *rps,
- bool readonly)
+int
+i915_gem_object_wait(struct drm_i915_gem_object *obj,
+ unsigned int flags,
+ long timeout,
+ struct intel_rps_client *rps)
{
+ struct reservation_object *resv;
struct i915_gem_active *active;
unsigned long active_mask;
int idx;
- active_mask = __I915_BO_ACTIVE(obj);
- if (!active_mask)
- return 0;
+ might_sleep();
+#if IS_ENABLED(CONFIG_LOCKDEP)
+ GEM_BUG_ON(debug_locks &&
+ !!lockdep_is_held(&obj->base.dev->struct_mutex) !=
+ !!(flags & I915_WAIT_LOCKED));
+#endif
+ GEM_BUG_ON(timeout < 0);
- if (!readonly) {
+ if (flags & I915_WAIT_ALL) {
active = obj->last_read;
+ active_mask = i915_gem_object_get_active(obj);
} else {
active_mask = 1;
active = &obj->last_write;
}
for_each_active(active_mask, idx) {
- int ret;
-
- ret = i915_gem_active_wait_unlocked(&active[idx],
- I915_WAIT_INTERRUPTIBLE,
- NULL, rps);
- if (ret)
- return ret;
+ struct drm_i915_gem_request *request;
+
+ request = i915_gem_active_get_unlocked(&active[idx]);
+ if (request) {
+ timeout = i915_gem_object_wait_fence(&request->fence,
+ flags, timeout,
+ rps);
+ i915_gem_request_put(request);
+ }
+ if (timeout < 0)
+ return timeout;
}
- return 0;
+ resv = i915_gem_object_get_dmabuf_resv(obj);
+ if (resv)
+ timeout = i915_gem_object_wait_reservation(resv,
+ flags, timeout,
+ rps);
+ return timeout < 0 ? timeout : 0;
}
static struct intel_rps_client *to_rps_client(struct drm_file *file)
@@ -449,12 +538,18 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
struct drm_device *dev = obj->base.dev;
void *vaddr = obj->phys_handle->vaddr + args->offset;
char __user *user_data = u64_to_user_ptr(args->data_ptr);
- int ret = 0;
+ int ret;
/* We manually control the domain here and pretend that it
* remains coherent i.e. in the GTT domain, like shmem_pwrite.
*/
- ret = i915_gem_object_wait_rendering(obj, false);
+ lockdep_assert_held(&obj->base.dev->struct_mutex);
+ ret = i915_gem_object_wait(obj,
+ I915_WAIT_INTERRUPTIBLE |
+ I915_WAIT_LOCKED |
+ I915_WAIT_ALL,
+ MAX_SCHEDULE_TIMEOUT,
+ to_rps_client(file_priv));
if (ret)
return ret;
@@ -614,12 +709,17 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
{
int ret;
- *needs_clflush = 0;
+ lockdep_assert_held(&obj->base.dev->struct_mutex);
+ *needs_clflush = 0;
if (!i915_gem_object_has_struct_page(obj))
return -ENODEV;
- ret = i915_gem_object_wait_rendering(obj, true);
+ ret = i915_gem_object_wait(obj,
+ I915_WAIT_INTERRUPTIBLE |
+ I915_WAIT_LOCKED,
+ MAX_SCHEDULE_TIMEOUT,
+ NULL);
if (ret)
return ret;
@@ -661,11 +761,18 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
{
int ret;
+ lockdep_assert_held(&obj->base.dev->struct_mutex);
+
*needs_clflush = 0;
if (!i915_gem_object_has_struct_page(obj))
return -ENODEV;
- ret = i915_gem_object_wait_rendering(obj, false);
+ ret = i915_gem_object_wait(obj,
+ I915_WAIT_INTERRUPTIBLE |
+ I915_WAIT_LOCKED |
+ I915_WAIT_ALL,
+ MAX_SCHEDULE_TIMEOUT,
+ NULL);
if (ret)
return ret;
@@ -1051,7 +1158,10 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
trace_i915_gem_object_pread(obj, args->offset, args->size);
- ret = __unsafe_wait_rendering(obj, to_rps_client(file), true);
+ ret = i915_gem_object_wait(obj,
+ I915_WAIT_INTERRUPTIBLE,
+ MAX_SCHEDULE_TIMEOUT,
+ to_rps_client(file));
if (ret)
goto err;
@@ -1449,7 +1559,11 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
trace_i915_gem_object_pwrite(obj, args->offset, args->size);
- ret = __unsafe_wait_rendering(obj, to_rps_client(file), false);
+ ret = i915_gem_object_wait(obj,
+ I915_WAIT_INTERRUPTIBLE |
+ I915_WAIT_ALL,
+ MAX_SCHEDULE_TIMEOUT,
+ to_rps_client(file));
if (ret)
goto err;
@@ -1536,7 +1650,11 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
* We will repeat the flush holding the lock in the normal manner
* to catch cases where we are gazumped.
*/
- ret = __unsafe_wait_rendering(obj, to_rps_client(file), !write_domain);
+ ret = i915_gem_object_wait(obj,
+ I915_WAIT_INTERRUPTIBLE |
+ (write_domain ? I915_WAIT_ALL : 0),
+ MAX_SCHEDULE_TIMEOUT,
+ to_rps_client(file));
if (ret)
goto err;
@@ -1772,7 +1890,10 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
* repeat the flush holding the lock in the normal manner to catch cases
* where we are gazumped.
*/
- ret = __unsafe_wait_rendering(obj, NULL, !write);
+ ret = i915_gem_object_wait(obj,
+ I915_WAIT_INTERRUPTIBLE,
+ MAX_SCHEDULE_TIMEOUT,
+ NULL);
if (ret)
goto err;
@@ -2817,6 +2938,17 @@ void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
mutex_unlock(&obj->base.dev->struct_mutex);
}
+static unsigned long to_wait_timeout(s64 timeout_ns)
+{
+ if (timeout_ns < 0)
+ return MAX_SCHEDULE_TIMEOUT;
+
+ if (timeout_ns == 0)
+ return 0;
+
+ return nsecs_to_jiffies_timeout(timeout_ns);
+}
+
/**
* i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
* @dev: drm device pointer
@@ -2845,10 +2977,9 @@ int
i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
{
struct drm_i915_gem_wait *args = data;
- struct intel_rps_client *rps = to_rps_client(file);
struct drm_i915_gem_object *obj;
- unsigned long active;
- int idx, ret = 0;
+ ktime_t start;
+ long ret;
if (args->flags != 0)
return -EINVAL;
@@ -2857,14 +2988,17 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
if (!obj)
return -ENOENT;
- active = __I915_BO_ACTIVE(obj);
- for_each_active(active, idx) {
- s64 *timeout = args->timeout_ns >= 0 ? &args->timeout_ns : NULL;
- ret = i915_gem_active_wait_unlocked(&obj->last_read[idx],
- I915_WAIT_INTERRUPTIBLE,
- timeout, rps);
- if (ret)
- break;
+ start = ktime_get();
+
+ ret = i915_gem_object_wait(obj,
+ I915_WAIT_INTERRUPTIBLE | I915_WAIT_ALL,
+ to_wait_timeout(args->timeout_ns),
+ to_rps_client(file));
+
+ if (args->timeout_ns > 0) {
+ args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start));
+ if (args->timeout_ns < 0)
+ args->timeout_ns = 0;
}
i915_gem_object_put_unlocked(obj);
@@ -3283,7 +3417,13 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
uint32_t old_write_domain, old_read_domains;
int ret;
- ret = i915_gem_object_wait_rendering(obj, !write);
+ lockdep_assert_held(&obj->base.dev->struct_mutex);
+ ret = i915_gem_object_wait(obj,
+ I915_WAIT_INTERRUPTIBLE |
+ I915_WAIT_LOCKED |
+ (write ? I915_WAIT_ALL : 0),
+ MAX_SCHEDULE_TIMEOUT,
+ NULL);
if (ret)
return ret;
@@ -3400,7 +3540,12 @@ restart:
* If we wait upon the object, we know that all the bound
* VMA are no longer active.
*/
- ret = i915_gem_object_wait_rendering(obj, false);
+ ret = i915_gem_object_wait(obj,
+ I915_WAIT_INTERRUPTIBLE |
+ I915_WAIT_LOCKED |
+ I915_WAIT_ALL,
+ MAX_SCHEDULE_TIMEOUT,
+ NULL);
if (ret)
return ret;
@@ -3647,7 +3792,13 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
uint32_t old_write_domain, old_read_domains;
int ret;
- ret = i915_gem_object_wait_rendering(obj, !write);
+ lockdep_assert_held(&obj->base.dev->struct_mutex);
+ ret = i915_gem_object_wait(obj,
+ I915_WAIT_INTERRUPTIBLE |
+ I915_WAIT_LOCKED |
+ (write ? I915_WAIT_ALL : 0),
+ MAX_SCHEDULE_TIMEOUT,
+ NULL);
if (ret)
return ret;
@@ -3703,7 +3854,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
struct drm_i915_file_private *file_priv = file->driver_priv;
unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
struct drm_i915_gem_request *request, *target = NULL;
- int ret;
+ long ret;
/* ABI: return -EIO if already wedged */
if (i915_terminally_wedged(&dev_priv->gpu_error))
@@ -3730,10 +3881,12 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
if (target == NULL)
return 0;
- ret = i915_wait_request(target, I915_WAIT_INTERRUPTIBLE, NULL, NULL);
+ ret = i915_wait_request(target,
+ I915_WAIT_INTERRUPTIBLE,
+ MAX_SCHEDULE_TIMEOUT);
i915_gem_request_put(target);
- return ret;
+ return ret < 0 ? ret : 0;
}
static bool