summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/i915_gem_context.h
diff options
context:
space:
mode:
authorChris Wilson2017-06-16 16:05:16 +0200
committerChris Wilson2017-06-16 17:54:04 +0200
commit4ff4b44cbb70c269259958cbcc48d7b8a2cb9ec8 (patch)
tree19b5eda36527b1f71caa77ada46083263fc01e69 /drivers/gpu/drm/i915/i915_gem_context.h
parentdrm/i915: Fix retrieval of hangcheck stats (diff)
downloadkernel-qcow2-linux-4ff4b44cbb70c269259958cbcc48d7b8a2cb9ec8.tar.gz
kernel-qcow2-linux-4ff4b44cbb70c269259958cbcc48d7b8a2cb9ec8.tar.xz
kernel-qcow2-linux-4ff4b44cbb70c269259958cbcc48d7b8a2cb9ec8.zip
drm/i915: Store a direct lookup from object handle to vma
The advent of full-ppgtt lead to an extra indirection between the object and its binding. That extra indirection has a noticeable impact on how fast we can convert from the user handles to our internal vma for execbuffer. In order to bypass the extra indirection, we use a resizable hashtable to jump from the object to the per-ctx vma. rhashtable was considered but we don't need the online resizing feature and the extra complexity proved to undermine its usefulness. Instead, we simply reallocate the hastable on demand in a background task and serialize it before iterating. In non-full-ppgtt modes, multiple files and multiple contexts can share the same vma. This leads to having multiple possible handle->vma links, so we only use the first to establish the fast path. The majority of buffers are not shared and so we should still be able to realise speedups with multiple clients. v2: Prettier names, more magic. v3: Many style tweaks, most notably hiding the misuse of execobj[].rsvd2 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem_context.h')
-rw-r--r--drivers/gpu/drm/i915/i915_gem_context.h26
1 files changed, 26 insertions, 0 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 4af2ab94558b..82c99ba92ad3 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -143,6 +143,32 @@ struct i915_gem_context {
/** ggtt_offset_bias: placement restriction for context objects */
u32 ggtt_offset_bias;
+ struct i915_gem_context_vma_lut {
+ /** ht_size: last request size to allocate the hashtable for. */
+ unsigned int ht_size;
+#define I915_CTX_RESIZE_IN_PROGRESS BIT(0)
+ /** ht_bits: real log2(size) of hashtable. */
+ unsigned int ht_bits;
+ /** ht_count: current number of entries inside the hashtable */
+ unsigned int ht_count;
+
+ /** ht: the array of buckets comprising the simple hashtable */
+ struct hlist_head *ht;
+
+ /**
+ * resize: After an execbuf completes, we check the load factor
+ * of the hashtable. If the hashtable is too full, or too empty,
+ * we schedule a task to resize the hashtable. During the
+ * resize, the entries are moved between different buckets and
+ * so we cannot simultaneously read the hashtable as it is
+ * being resized (unlike rhashtable). Therefore we treat the
+ * active work as a strong barrier, pausing a subsequent
+ * execbuf to wait for the resize worker to complete, if
+ * required.
+ */
+ struct work_struct resize;
+ } vma_lut;
+
/** engine: per-engine logical HW state */
struct intel_context {
struct i915_vma *state;