summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/i915_gem_execbuffer.c
diff options
context:
space:
mode:
authorChris Wilson2017-08-16 10:52:08 +0200
committerChris Wilson2017-08-18 12:59:02 +0200
commitd1b48c1e7184d9bc4ae6d7f9fe2eed9efed11ffc (patch)
treecbb1cb9f87c16ba6c17cbd47279979c823555d70 /drivers/gpu/drm/i915/i915_gem_execbuffer.c
parentdrm/i915: Simplify eb_lookup_vmas() (diff)
downloadkernel-qcow2-linux-d1b48c1e7184d9bc4ae6d7f9fe2eed9efed11ffc.tar.gz
kernel-qcow2-linux-d1b48c1e7184d9bc4ae6d7f9fe2eed9efed11ffc.tar.xz
kernel-qcow2-linux-d1b48c1e7184d9bc4ae6d7f9fe2eed9efed11ffc.zip
drm/i915: Replace execbuf vma ht with an idr
This was the competing idea long ago, but it was only with the rewrite of the idr as an radixtree and using the radixtree directly ourselves, along with the realisation that we can store the vma directly in the radixtree and only need a list for the reverse mapping, that made the patch performant enough to displace using a hashtable. Though the vma ht is fast and doesn't require any extra allocation (as we can embed the node inside the vma), it does require a thread for resizing and serialization and will have the occasional slow lookup. That is hairy enough to investigate alternatives and favour them if equivalent in peak performance. One advantage of allocating an indirection entry is that we can support a single shared bo between many clients, something that was done on a first-come first-serve basis for shared GGTT vma previously. To offset the extra allocations, we create yet another kmem_cache for them. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20170816085210.4199-5-chris@chris-wilson.co.uk
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gem_execbuffer.c79
1 files changed, 24 insertions, 55 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 95e461259d24..9f1057c4cf1c 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -450,9 +450,7 @@ eb_validate_vma(struct i915_execbuffer *eb,
}
static int
-eb_add_vma(struct i915_execbuffer *eb,
- unsigned int i, struct i915_vma *vma,
- unsigned int flags)
+eb_add_vma(struct i915_execbuffer *eb, unsigned int i, struct i915_vma *vma)
{
struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
int err;
@@ -482,7 +480,7 @@ eb_add_vma(struct i915_execbuffer *eb,
* to find the right target VMA when doing relocations.
*/
eb->vma[i] = vma;
- eb->flags[i] = entry->flags | flags;
+ eb->flags[i] = entry->flags;
vma->exec_flags = &eb->flags[i];
err = 0;
@@ -647,19 +645,6 @@ static int eb_reserve(struct i915_execbuffer *eb)
} while (1);
}
-static inline struct hlist_head *
-ht_head(const struct i915_gem_context_vma_lut *lut, u32 handle)
-{
- return &lut->ht[hash_32(handle, lut->ht_bits)];
-}
-
-static inline bool
-ht_needs_resize(const struct i915_gem_context_vma_lut *lut)
-{
- return (4*lut->ht_count > 3*lut->ht_size ||
- 4*lut->ht_count + 1 < lut->ht_size);
-}
-
static unsigned int eb_batch_index(const struct i915_execbuffer *eb)
{
if (eb->args->flags & I915_EXEC_BATCH_FIRST)
@@ -688,7 +673,7 @@ static int eb_select_context(struct i915_execbuffer *eb)
static int eb_lookup_vmas(struct i915_execbuffer *eb)
{
- struct i915_gem_context_vma_lut *lut = &eb->ctx->vma_lut;
+ struct radix_tree_root *handles_vma = &eb->ctx->handles_vma;
struct drm_i915_gem_object *uninitialized_var(obj);
unsigned int i;
int err;
@@ -702,24 +687,14 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
INIT_LIST_HEAD(&eb->relocs);
INIT_LIST_HEAD(&eb->unbound);
- if (unlikely(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS))
- flush_work(&lut->resize);
- GEM_BUG_ON(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS);
-
for (i = 0; i < eb->buffer_count; i++) {
u32 handle = eb->exec[i].handle;
- struct hlist_head *hl = ht_head(lut, handle);
- unsigned int flags = 0;
+ struct i915_lut_handle *lut;
struct i915_vma *vma;
- hlist_for_each_entry(vma, hl, ctx_node) {
- GEM_BUG_ON(vma->ctx != eb->ctx);
-
- if (vma->ctx_handle != handle)
- continue;
-
+ vma = radix_tree_lookup(handles_vma, handle);
+ if (likely(vma))
goto add_vma;
- }
obj = i915_gem_object_lookup(eb->file, handle);
if (unlikely(!obj)) {
@@ -733,26 +708,28 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
goto err_obj;
}
- /* First come, first served */
- if (!vma->ctx) {
- vma->ctx = eb->ctx;
- vma->ctx_handle = handle;
- hlist_add_head(&vma->ctx_node, hl);
- lut->ht_count++;
- lut->ht_size |= I915_CTX_RESIZE_IN_PROGRESS;
- if (i915_vma_is_ggtt(vma)) {
- GEM_BUG_ON(obj->vma_hashed);
- obj->vma_hashed = vma;
- }
+ lut = kmem_cache_alloc(eb->i915->luts, GFP_KERNEL);
+ if (unlikely(!lut)) {
+ err = -ENOMEM;
+ goto err_obj;
+ }
- /* transfer ref to ctx */
- obj = NULL;
- } else {
- flags = __EXEC_OBJECT_HAS_REF;
+ err = radix_tree_insert(handles_vma, handle, vma);
+ if (unlikely(err)) {
+ kfree(lut);
+ goto err_obj;
}
+ list_add(&lut->obj_link, &obj->lut_list);
+ list_add(&lut->ctx_link, &eb->ctx->handles_list);
+ lut->ctx = eb->ctx;
+ lut->handle = handle;
+
+ /* transfer ref to ctx */
+ obj = NULL;
+
add_vma:
- err = eb_add_vma(eb, i, vma, flags);
+ err = eb_add_vma(eb, i, vma);
if (unlikely(err))
goto err_obj;
@@ -760,13 +737,6 @@ add_vma:
GEM_BUG_ON(vma->exec_flags != &eb->flags[i]);
}
- if (lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS) {
- if (ht_needs_resize(lut))
- queue_work(system_highpri_wq, &lut->resize);
- else
- lut->ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS;
- }
-
/* take note of the batch buffer before we might reorder the lists */
i = eb_batch_index(eb);
eb->batch = eb->vma[i];
@@ -794,7 +764,6 @@ err_obj:
i915_gem_object_put(obj);
err_vma:
eb->vma[i] = NULL;
- lut->ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS;
return err;
}