summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/i915_gem_execbuffer.c
diff options
context:
space:
mode:
authorChris Wilson2012-03-26 10:10:27 +0200
committerDaniel Vetter2012-03-27 13:16:17 +0200
commitdabdfe021ab1e985e6566009c774fb03f14b568e (patch)
tree12372e0f40a428cef1e86f02886b978d37fc30af /drivers/gpu/drm/i915/i915_gem_execbuffer.c
parentdrm/i915: clear the entire gtt when using gem (diff)
downloadkernel-qcow2-linux-dabdfe021ab1e985e6566009c774fb03f14b568e.tar.gz
kernel-qcow2-linux-dabdfe021ab1e985e6566009c774fb03f14b568e.tar.xz
kernel-qcow2-linux-dabdfe021ab1e985e6566009c774fb03f14b568e.zip
drm/i915: Avoid using mappable space for relocation processing through the CPU
We try to avoid writing the relocations through the uncached GTT, if the buffer is currently in the CPU write domain and so will be flushed out to main memory afterwards anyway. Also on SandyBridge we can safely write to the pages in cacheable memory, so long as the buffer is LLC mapped. In either of these cases, we therefore do not need to force the reallocation of the buffer into the mappable region of the GTT, reducing the aperture pressure. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gem_execbuffer.c36
1 files changed, 25 insertions, 11 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 1fa01313d89f..eb85860001ec 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -266,6 +266,12 @@ eb_destroy(struct eb_objects *eb)
kfree(eb);
}
+static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
+{
+ return (obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
+ obj->cache_level != I915_CACHE_NONE);
+}
+
static int
i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
struct eb_objects *eb,
@@ -354,11 +360,19 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
return ret;
}
+ /* We can't wait for rendering with pagefaults disabled */
+ if (obj->active && in_atomic())
+ return -EFAULT;
+
reloc->delta += target_offset;
- if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) {
+ if (use_cpu_reloc(obj)) {
uint32_t page_offset = reloc->offset & ~PAGE_MASK;
char *vaddr;
+ ret = i915_gem_object_set_to_cpu_domain(obj, 1);
+ if (ret)
+ return ret;
+
vaddr = kmap_atomic(obj->pages[reloc->offset >> PAGE_SHIFT]);
*(uint32_t *)(vaddr + page_offset) = reloc->delta;
kunmap_atomic(vaddr);
@@ -367,10 +381,6 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
uint32_t __iomem *reloc_entry;
void __iomem *reloc_page;
- /* We can't wait for rendering with pagefaults disabled */
- if (obj->active && in_atomic())
- return -EFAULT;
-
ret = i915_gem_object_set_to_gtt_domain(obj, 1);
if (ret)
return ret;
@@ -493,6 +503,13 @@ i915_gem_execbuffer_relocate(struct drm_device *dev,
#define __EXEC_OBJECT_HAS_FENCE (1<<31)
static int
+need_reloc_mappable(struct drm_i915_gem_object *obj)
+{
+ struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
+ return entry->relocation_count && !use_cpu_reloc(obj);
+}
+
+static int
pin_and_fence_object(struct drm_i915_gem_object *obj,
struct intel_ring_buffer *ring)
{
@@ -505,8 +522,7 @@ pin_and_fence_object(struct drm_i915_gem_object *obj,
has_fenced_gpu_access &&
entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
obj->tiling_mode != I915_TILING_NONE;
- need_mappable =
- entry->relocation_count ? true : need_fence;
+ need_mappable = need_fence || need_reloc_mappable(obj);
ret = i915_gem_object_pin(obj, entry->alignment, need_mappable);
if (ret)
@@ -563,8 +579,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
has_fenced_gpu_access &&
entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
obj->tiling_mode != I915_TILING_NONE;
- need_mappable =
- entry->relocation_count ? true : need_fence;
+ need_mappable = need_fence || need_reloc_mappable(obj);
if (need_mappable)
list_move(&obj->exec_list, &ordered_objects);
@@ -604,8 +619,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
has_fenced_gpu_access &&
entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
obj->tiling_mode != I915_TILING_NONE;
- need_mappable =
- entry->relocation_count ? true : need_fence;
+ need_mappable = need_fence || need_reloc_mappable(obj);
if ((entry->alignment && obj->gtt_offset & (entry->alignment - 1)) ||
(need_mappable && !obj->map_and_fenceable))