From 5c693b2b8ae4ec51f0890b7a1368425f8898f0bb Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Tue, 13 Dec 2016 16:05:10 +0000 Subject: drm/i915: s/gen8_setup_page_directory/gen8_setup_pdpe/ The function name gen8_setup_page_directory is misleading, and only serves to confuse the reader, it's not setting up a pd, but rather encoding a specific pdpe with a given pd. Cc: Joonas Lahtinen Cc: Chris Wilson Signed-off-by: Matthew Auld Reviewed-by: Chris Wilson Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_gtt.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index f698006fe883..f0bbf1df029e 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -626,10 +626,10 @@ static void gen8_initialize_pml4(struct i915_address_space *vm, } static void -gen8_setup_page_directory(struct i915_hw_ppgtt *ppgtt, - struct i915_page_directory_pointer *pdp, - struct i915_page_directory *pd, - int index) +gen8_setup_pdpe(struct i915_hw_ppgtt *ppgtt, + struct i915_page_directory_pointer *pdp, + struct i915_page_directory *pd, + int index) { gen8_ppgtt_pdpe_t *page_directorypo; @@ -1366,7 +1366,7 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, kunmap_px(ppgtt, page_directory); __set_bit(pdpe, pdp->used_pdpes); - gen8_setup_page_directory(ppgtt, pdp, pd, pdpe); + gen8_setup_pdpe(ppgtt, pdp, pd, pdpe); } free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); -- cgit v1.2.3-55-g7522 From 5684310760536867cf89efc3162dd2e057fed2ac Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Tue, 13 Dec 2016 16:05:11 +0000 Subject: drm/i915: s/gen8_setup_page_directory_pointer/gen8_setup_pml4e/ The function name gen8_setup_page_directory_pointer is misleading, and only serves to confuse the reader, it's not setting up a pdp, but rather encoding a specific pml4e with a given pdp. Cc: Joonas Lahtinen Cc: Chris Wilson Signed-off-by: Matthew Auld Reviewed-by: Chris Wilson Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_gtt.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index f0bbf1df029e..aa1820d709a5 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -642,10 +642,10 @@ gen8_setup_pdpe(struct i915_hw_ppgtt *ppgtt, } static void -gen8_setup_page_directory_pointer(struct i915_hw_ppgtt *ppgtt, - struct i915_pml4 *pml4, - struct i915_page_directory_pointer *pdp, - int index) +gen8_setup_pml4e(struct i915_hw_ppgtt *ppgtt, + struct i915_pml4 *pml4, + struct i915_page_directory_pointer *pdp, + int index) { gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4); @@ -1425,7 +1425,7 @@ static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm, if (ret) goto err_out; - gen8_setup_page_directory_pointer(ppgtt, pml4, pdp, pml4e); + gen8_setup_pml4e(ppgtt, pml4, pdp, pml4e); } bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es, -- cgit v1.2.3-55-g7522 From 9e65a37872174bd3615b16fa556377ebf5a3f0cd Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Tue, 13 Dec 2016 16:05:12 +0000 Subject: drm/i915: don't open code the pdpe/pml4e clearing Now that it's obvious what the helpers do, we can simplify the code somewhat by using them when clearing the pdpe/pml4e with the relevant scratch entry. Cc: Joonas Lahtinen Cc: Chris Wilson Signed-off-by: Matthew Auld Reviewed-by: Michał Winiarski Signed-off-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20161213160512.7008-1-matthew.auld@intel.com --- drivers/gpu/drm/i915/i915_gem_gtt.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index aa1820d709a5..2d7ab1d35e47 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -793,9 +793,6 @@ static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm, struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); struct i915_page_directory *pd; uint64_t pdpe; - gen8_ppgtt_pdpe_t *pdpe_vaddr; - gen8_ppgtt_pdpe_t scratch_pdpe = - gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC); gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { if (WARN_ON(!pdp->page_directory[pdpe])) @@ -803,11 +800,7 @@ static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm, if (gen8_ppgtt_clear_pd(vm, pd, start, length)) { __clear_bit(pdpe, pdp->used_pdpes); - if (USES_FULL_48BIT_PPGTT(dev_priv)) { - pdpe_vaddr = kmap_px(pdp); - pdpe_vaddr[pdpe] = scratch_pdpe; - kunmap_px(ppgtt, pdpe_vaddr); - } + gen8_setup_pdpe(ppgtt, pdp, vm->scratch_pd, pdpe); free_pd(vm->i915, pd); } } @@ -832,9 +825,6 @@ static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm, struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); struct i915_page_directory_pointer *pdp; uint64_t pml4e; - gen8_ppgtt_pml4e_t *pml4e_vaddr; - gen8_ppgtt_pml4e_t scratch_pml4e = - gen8_pml4e_encode(px_dma(vm->scratch_pdp), I915_CACHE_LLC); GEM_BUG_ON(!USES_FULL_48BIT_PPGTT(vm->i915)); @@ -844,9 +834,7 @@ static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm, if (gen8_ppgtt_clear_pdp(vm, pdp, start, length)) { __clear_bit(pml4e, pml4->used_pml4es); - pml4e_vaddr = kmap_px(pml4); - pml4e_vaddr[pml4e] = scratch_pml4e; - kunmap_px(ppgtt, pml4e_vaddr); + gen8_setup_pml4e(ppgtt, pml4, vm->scratch_pdp, pml4e); free_pdp(vm->i915, pdp); } } -- cgit v1.2.3-55-g7522 From 6649a0b6501d78042fd0fffaaefab1aeee27e75d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 9 Jan 2017 16:16:08 +0000 Subject: drm/i915: Extract tile_row_size for fencing Computing the tile row size of a tiled object (for use with fence registers) is repeated, so extract it to a common helper. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170109161613.11881-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem.c | 7 +------ drivers/gpu/drm/i915/i915_gem_fence_reg.c | 6 ++---- drivers/gpu/drm/i915/i915_gem_object.h | 20 ++++++++++++++++++++ 3 files changed, 23 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index ed3cd1a5f9bb..e8e278f6312e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1696,12 +1696,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, static unsigned int tile_row_pages(struct drm_i915_gem_object *obj) { - u64 size; - - size = i915_gem_object_get_stride(obj); - size *= i915_gem_object_get_tiling(obj) == I915_TILING_Y ? 32 : 8; - - return size >> PAGE_SHIFT; + return i915_gem_object_get_tile_row_size(obj) >> PAGE_SHIFT; } /** diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c index 775059e19ab9..399ae7f73184 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c @@ -77,16 +77,14 @@ static void i965_write_fence_reg(struct drm_i915_fence_reg *fence, val = 0; if (vma) { - unsigned int tiling = i915_gem_object_get_tiling(vma->obj); - bool is_y_tiled = tiling == I915_TILING_Y; unsigned int stride = i915_gem_object_get_stride(vma->obj); - u32 row_size = stride * (is_y_tiled ? 32 : 8); + u32 row_size = i915_gem_object_get_tile_row_size(vma->obj); u32 size = rounddown((u32)vma->node.size, row_size); val = ((vma->node.start + size - 4096) & 0xfffff000) << 32; val |= vma->node.start & 0xfffff000; val |= (u64)((stride / 128) - 1) << fence_pitch_shift; - if (is_y_tiled) + if (i915_gem_object_get_tiling(vma->obj) == I915_TILING_Y) val |= BIT(I965_FENCE_TILING_Y_SHIFT); val |= I965_FENCE_REG_VALID; } diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index 6a368de9d81e..ed3f4d5fd49f 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -317,6 +317,26 @@ i915_gem_object_get_stride(struct drm_i915_gem_object *obj) return obj->tiling_and_stride & STRIDE_MASK; } +static inline unsigned int +i915_gem_tile_height(unsigned int tiling) +{ + GEM_BUG_ON(!tiling); + return tiling == I915_TILING_Y ? 32 : 8; +} + +static inline unsigned int +i915_gem_object_get_tile_height(struct drm_i915_gem_object *obj) +{ + return i915_gem_tile_height(i915_gem_object_get_tiling(obj)); +} + +static inline unsigned int +i915_gem_object_get_tile_row_size(struct drm_i915_gem_object *obj) +{ + return (i915_gem_object_get_stride(obj) * + i915_gem_object_get_tile_height(obj)); +} + static inline struct intel_engine_cs * i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj) { -- cgit v1.2.3-55-g7522 From 5b30694b474d00f8588fa367f9562d8f2e4c7075 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 9 Jan 2017 16:16:09 +0000 Subject: drm/i915: Align GGTT sizes to a fence tile row Ensure the view occupies the full tile row so that reads/writes into the VMA do not escape (via fenced detiling) into neighbouring objects - we will pad the object with scratch pages to satisfy the fence. This applies the lazy-tiling we employed on gen2/3 to gen4+. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170109161613.11881-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 5 +++-- drivers/gpu/drm/i915/i915_gem.c | 27 +++++++++++++++++++-------- drivers/gpu/drm/i915/i915_gem_tiling.c | 18 +++++++++--------- drivers/gpu/drm/i915/i915_vma.c | 10 ++++++++-- 4 files changed, 39 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 52d01be956cc..8185229f370f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3361,9 +3361,10 @@ int i915_gem_open(struct drm_device *dev, struct drm_file *file); void i915_gem_release(struct drm_device *dev, struct drm_file *file); u64 i915_gem_get_ggtt_size(struct drm_i915_private *dev_priv, u64 size, - int tiling_mode); + int tiling_mode, unsigned int stride); u64 i915_gem_get_ggtt_alignment(struct drm_i915_private *dev_priv, u64 size, - int tiling_mode, bool fenced); + int tiling_mode, unsigned int stride, + bool fenced); int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, enum i915_cache_level cache_level); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e8e278f6312e..07cc0d01915f 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2021,21 +2021,29 @@ void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv) * @dev_priv: i915 device * @size: object size * @tiling_mode: tiling mode + * @stride: tiling stride * * Return the required global GTT size for an object, taking into account * potential fence register mapping. */ u64 i915_gem_get_ggtt_size(struct drm_i915_private *dev_priv, - u64 size, int tiling_mode) + u64 size, int tiling_mode, unsigned int stride) { u64 ggtt_size; - GEM_BUG_ON(size == 0); + GEM_BUG_ON(!size); - if (INTEL_GEN(dev_priv) >= 4 || - tiling_mode == I915_TILING_NONE) + if (tiling_mode == I915_TILING_NONE) return size; + GEM_BUG_ON(!stride); + + if (INTEL_GEN(dev_priv) >= 4) { + stride *= i915_gem_tile_height(tiling_mode); + GEM_BUG_ON(stride & 4095); + return roundup(size, stride); + } + /* Previous chips need a power-of-two fence region when tiling */ if (IS_GEN3(dev_priv)) ggtt_size = 1024*1024; @@ -2053,15 +2061,17 @@ u64 i915_gem_get_ggtt_size(struct drm_i915_private *dev_priv, * @dev_priv: i915 device * @size: object size * @tiling_mode: tiling mode + * @stride: tiling stride * @fenced: is fenced alignment required or not * * Return the required global GTT alignment for an object, taking into account * potential fence register mapping. */ u64 i915_gem_get_ggtt_alignment(struct drm_i915_private *dev_priv, u64 size, - int tiling_mode, bool fenced) + int tiling_mode, unsigned int stride, + bool fenced) { - GEM_BUG_ON(size == 0); + GEM_BUG_ON(!size); /* * Minimum alignment is 4k (GTT page size), but might be greater @@ -2076,7 +2086,7 @@ u64 i915_gem_get_ggtt_alignment(struct drm_i915_private *dev_priv, u64 size, * Previous chips need to be aligned to the size of the smallest * fence register that can contain the object. */ - return i915_gem_get_ggtt_size(dev_priv, size, tiling_mode); + return i915_gem_get_ggtt_size(dev_priv, size, tiling_mode, stride); } static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) @@ -3696,7 +3706,8 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, u32 fence_size; fence_size = i915_gem_get_ggtt_size(dev_priv, vma->size, - i915_gem_object_get_tiling(obj)); + i915_gem_object_get_tiling(obj), + i915_gem_object_get_stride(obj)); /* If the required space is larger than the available * aperture, we will not able to find a slot for the * object and unbinding the object now will be in diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 62ad375de6ca..51b8d71876b7 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -117,7 +117,8 @@ i915_tiling_ok(struct drm_i915_private *dev_priv, return true; } -static bool i915_vma_fence_prepare(struct i915_vma *vma, int tiling_mode) +static bool i915_vma_fence_prepare(struct i915_vma *vma, + int tiling_mode, unsigned int stride) { struct drm_i915_private *dev_priv = vma->vm->i915; u32 size; @@ -133,7 +134,7 @@ static bool i915_vma_fence_prepare(struct i915_vma *vma, int tiling_mode) return false; } - size = i915_gem_get_ggtt_size(dev_priv, vma->size, tiling_mode); + size = i915_gem_get_ggtt_size(dev_priv, vma->size, tiling_mode, stride); if (vma->node.size < size) return false; @@ -145,20 +146,17 @@ static bool i915_vma_fence_prepare(struct i915_vma *vma, int tiling_mode) /* Make the current GTT allocation valid for the change in tiling. */ static int -i915_gem_object_fence_prepare(struct drm_i915_gem_object *obj, int tiling_mode) +i915_gem_object_fence_prepare(struct drm_i915_gem_object *obj, + int tiling_mode, unsigned int stride) { - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); struct i915_vma *vma; int ret; if (tiling_mode == I915_TILING_NONE) return 0; - if (INTEL_GEN(dev_priv) >= 4) - return 0; - list_for_each_entry(vma, &obj->vma_list, obj_link) { - if (i915_vma_fence_prepare(vma, tiling_mode)) + if (i915_vma_fence_prepare(vma, tiling_mode, stride)) continue; ret = i915_vma_unbind(vma); @@ -255,7 +253,9 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, * whilst executing a fenced command for an untiled object. */ - err = i915_gem_object_fence_prepare(obj, args->tiling_mode); + err = i915_gem_object_fence_prepare(obj, + args->tiling_mode, + args->stride); if (!err) { struct i915_vma *vma; diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 58f2483362ad..734f77b7697f 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -284,11 +284,14 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) fence_size = i915_gem_get_ggtt_size(dev_priv, vma->size, - i915_gem_object_get_tiling(obj)); + i915_gem_object_get_tiling(obj), + i915_gem_object_get_stride(obj)); fence_alignment = i915_gem_get_ggtt_alignment(dev_priv, vma->size, i915_gem_object_get_tiling(obj), + i915_gem_object_get_stride(obj), true); + GEM_BUG_ON(!is_power_of_2(fence_alignment)); fenceable = (vma->node.size == fence_size && (vma->node.start & (fence_alignment - 1)) == 0); @@ -370,12 +373,15 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) size = max(size, vma->size); if (flags & PIN_MAPPABLE) size = i915_gem_get_ggtt_size(dev_priv, size, - i915_gem_object_get_tiling(obj)); + i915_gem_object_get_tiling(obj), + i915_gem_object_get_stride(obj)); alignment = max(max(alignment, vma->display_alignment), i915_gem_get_ggtt_alignment(dev_priv, size, i915_gem_object_get_tiling(obj), + i915_gem_object_get_stride(obj), flags & PIN_MAPPABLE)); + GEM_BUG_ON(!is_power_of_2(alignment)); start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; -- cgit v1.2.3-55-g7522 From 0d4e8f1dbcab9cf68fec951e7e5dbb6d5d8e3425 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 9 Jan 2017 16:16:10 +0000 Subject: drm/i915: Replace WARNs in fence register writes with extensive asserts All of these conditions are prechecked by i915_tiling_ok() before we allow setting the tiling/stride on the object and so we should never fail asserting those conditions before writing the register. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170109161613.11881-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_fence_reg.c | 50 ++++++++++++++----------------- 1 file changed, 22 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c index 399ae7f73184..26f242359fbd 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c @@ -81,8 +81,13 @@ static void i965_write_fence_reg(struct drm_i915_fence_reg *fence, u32 row_size = i915_gem_object_get_tile_row_size(vma->obj); u32 size = rounddown((u32)vma->node.size, row_size); - val = ((vma->node.start + size - 4096) & 0xfffff000) << 32; - val |= vma->node.start & 0xfffff000; + GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma)); + GEM_BUG_ON(vma->node.start & 4095); + GEM_BUG_ON(vma->node.size & 4095); + GEM_BUG_ON(stride & 127); + + val = (vma->node.start + size - 4096) << 32; + val |= vma->node.start; val |= (u64)((stride / 128) - 1) << fence_pitch_shift; if (i915_gem_object_get_tiling(vma->obj) == I915_TILING_Y) val |= BIT(I965_FENCE_TILING_Y_SHIFT); @@ -120,31 +125,24 @@ static void i915_write_fence_reg(struct drm_i915_fence_reg *fence, unsigned int tiling = i915_gem_object_get_tiling(vma->obj); bool is_y_tiled = tiling == I915_TILING_Y; unsigned int stride = i915_gem_object_get_stride(vma->obj); - int pitch_val; - int tile_width; - WARN((vma->node.start & ~I915_FENCE_START_MASK) || - !is_power_of_2(vma->node.size) || - (vma->node.start & (vma->node.size - 1)), - "object 0x%08llx [fenceable? %d] not 1M or pot-size (0x%08llx) aligned\n", - vma->node.start, - i915_vma_is_map_and_fenceable(vma), - vma->node.size); + GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma)); + GEM_BUG_ON(vma->node.start & ~I915_FENCE_START_MASK); + GEM_BUG_ON(!is_power_of_2(vma->node.size)); + GEM_BUG_ON(vma->node.start & (vma->node.size - 1)); if (is_y_tiled && HAS_128_BYTE_Y_TILING(fence->i915)) - tile_width = 128; + stride /= 128; else - tile_width = 512; - - /* Note: pitch better be a power of two tile widths */ - pitch_val = stride / tile_width; - pitch_val = ffs(pitch_val) - 1; + stride /= 512; + GEM_BUG_ON(!is_power_of_2(stride)); val = vma->node.start; if (is_y_tiled) val |= BIT(I830_FENCE_TILING_Y_SHIFT); val |= I915_FENCE_SIZE_BITS(vma->node.size); - val |= pitch_val << I830_FENCE_PITCH_SHIFT; + val |= ilog2(stride) << I830_FENCE_PITCH_SHIFT; + val |= I830_FENCE_REG_VALID; } @@ -167,22 +165,18 @@ static void i830_write_fence_reg(struct drm_i915_fence_reg *fence, unsigned int tiling = i915_gem_object_get_tiling(vma->obj); bool is_y_tiled = tiling == I915_TILING_Y; unsigned int stride = i915_gem_object_get_stride(vma->obj); - u32 pitch_val; - - WARN((vma->node.start & ~I830_FENCE_START_MASK) || - !is_power_of_2(vma->node.size) || - (vma->node.start & (vma->node.size - 1)), - "object 0x%08llx not 512K or pot-size 0x%08llx aligned\n", - vma->node.start, vma->node.size); - pitch_val = stride / 128; - pitch_val = ffs(pitch_val) - 1; + GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma)); + GEM_BUG_ON(vma->node.start & ~I830_FENCE_START_MASK); + GEM_BUG_ON(!is_power_of_2(vma->node.size)); + GEM_BUG_ON(!is_power_of_2(stride / 128)); + GEM_BUG_ON(vma->node.start & (vma->node.size - 1)); val = vma->node.start; if (is_y_tiled) val |= BIT(I830_FENCE_TILING_Y_SHIFT); val |= I830_FENCE_SIZE_BITS(vma->node.size); - val |= pitch_val << I830_FENCE_PITCH_SHIFT; + val |= ilog2(stride / 128) << I830_FENCE_PITCH_SHIFT; val |= I830_FENCE_REG_VALID; } -- cgit v1.2.3-55-g7522 From 944397f04f24eaf05125896dcb601c0e1c917879 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 9 Jan 2017 16:16:11 +0000 Subject: drm/i915: Store required fence size/alignment for GGTT vma The fence size/alignment is a combination of the vma size plus object tiling parameters. Those parameters are rarely changed, making the fence size/alignemnt roughly constant for the lifetime of the VMA. We can simplify subsequent calculations by precalculating the size/alignment required for GGTT vma taking fencing into account (with an update if we do change the tiling or stride). Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170109161613.11881-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 7 ++-- drivers/gpu/drm/i915/i915_gem.c | 27 +++++--------- drivers/gpu/drm/i915/i915_gem_fence_reg.c | 20 +++++----- drivers/gpu/drm/i915/i915_gem_tiling.c | 36 ++++++++++-------- drivers/gpu/drm/i915/i915_vma.c | 61 +++++++++++++++---------------- drivers/gpu/drm/i915/i915_vma.h | 3 ++ 6 files changed, 73 insertions(+), 81 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 8185229f370f..5ee76628be98 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3360,11 +3360,10 @@ int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int i915_gem_open(struct drm_device *dev, struct drm_file *file); void i915_gem_release(struct drm_device *dev, struct drm_file *file); -u64 i915_gem_get_ggtt_size(struct drm_i915_private *dev_priv, u64 size, +u32 i915_gem_get_ggtt_size(struct drm_i915_private *dev_priv, u32 size, int tiling_mode, unsigned int stride); -u64 i915_gem_get_ggtt_alignment(struct drm_i915_private *dev_priv, u64 size, - int tiling_mode, unsigned int stride, - bool fenced); +u32 i915_gem_get_ggtt_alignment(struct drm_i915_private *dev_priv, u32 size, + int tiling_mode, unsigned int stride); int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, enum i915_cache_level cache_level); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 07cc0d01915f..1f9496e587dc 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2026,10 +2026,10 @@ void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv) * Return the required global GTT size for an object, taking into account * potential fence register mapping. */ -u64 i915_gem_get_ggtt_size(struct drm_i915_private *dev_priv, - u64 size, int tiling_mode, unsigned int stride) +u32 i915_gem_get_ggtt_size(struct drm_i915_private *dev_priv, + u32 size, int tiling_mode, unsigned int stride) { - u64 ggtt_size; + u32 ggtt_size; GEM_BUG_ON(!size); @@ -2062,14 +2062,12 @@ u64 i915_gem_get_ggtt_size(struct drm_i915_private *dev_priv, * @size: object size * @tiling_mode: tiling mode * @stride: tiling stride - * @fenced: is fenced alignment required or not * * Return the required global GTT alignment for an object, taking into account * potential fence register mapping. */ -u64 i915_gem_get_ggtt_alignment(struct drm_i915_private *dev_priv, u64 size, - int tiling_mode, unsigned int stride, - bool fenced) +u32 i915_gem_get_ggtt_alignment(struct drm_i915_private *dev_priv, u32 size, + int tiling_mode, unsigned int stride) { GEM_BUG_ON(!size); @@ -2077,9 +2075,7 @@ u64 i915_gem_get_ggtt_alignment(struct drm_i915_private *dev_priv, u64 size, * Minimum alignment is 4k (GTT page size), but might be greater * if a fence register is needed for the object. */ - if (INTEL_GEN(dev_priv) >= 4 || - (!fenced && (IS_G33(dev_priv) || IS_PINEVIEW(dev_priv))) || - tiling_mode == I915_TILING_NONE) + if (INTEL_GEN(dev_priv) >= 4 || tiling_mode == I915_TILING_NONE) return 4096; /* @@ -3558,7 +3554,7 @@ i915_gem_object_unpin_from_display_plane(struct i915_vma *vma) return; if (--vma->obj->pin_display == 0) - vma->display_alignment = 0; + vma->display_alignment = 4096; /* Bump the LRU to try and avoid premature eviction whilst flipping */ if (!i915_vma_is_active(vma)) @@ -3703,11 +3699,6 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, return ERR_PTR(-ENOSPC); if (flags & PIN_MAPPABLE) { - u32 fence_size; - - fence_size = i915_gem_get_ggtt_size(dev_priv, vma->size, - i915_gem_object_get_tiling(obj), - i915_gem_object_get_stride(obj)); /* If the required space is larger than the available * aperture, we will not able to find a slot for the * object and unbinding the object now will be in @@ -3715,7 +3706,7 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, * the object in and out of the Global GTT and * waste a lot of cycles under the mutex. */ - if (fence_size > dev_priv->ggtt.mappable_end) + if (vma->fence_size > dev_priv->ggtt.mappable_end) return ERR_PTR(-E2BIG); /* If NONBLOCK is set the caller is optimistically @@ -3734,7 +3725,7 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, * we could try to minimise harm to others. */ if (flags & PIN_NONBLOCK && - fence_size > dev_priv->ggtt.mappable_end / 2) + vma->fence_size > dev_priv->ggtt.mappable_end / 2) return ERR_PTR(-ENOSPC); } diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c index 26f242359fbd..8b37c4cb311a 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c @@ -79,11 +79,11 @@ static void i965_write_fence_reg(struct drm_i915_fence_reg *fence, if (vma) { unsigned int stride = i915_gem_object_get_stride(vma->obj); u32 row_size = i915_gem_object_get_tile_row_size(vma->obj); - u32 size = rounddown((u32)vma->node.size, row_size); + u32 size = rounddown((u32)vma->fence_size, row_size); GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma)); GEM_BUG_ON(vma->node.start & 4095); - GEM_BUG_ON(vma->node.size & 4095); + GEM_BUG_ON(vma->fence_size & 4095); GEM_BUG_ON(stride & 127); val = (vma->node.start + size - 4096) << 32; @@ -128,8 +128,8 @@ static void i915_write_fence_reg(struct drm_i915_fence_reg *fence, GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma)); GEM_BUG_ON(vma->node.start & ~I915_FENCE_START_MASK); - GEM_BUG_ON(!is_power_of_2(vma->node.size)); - GEM_BUG_ON(vma->node.start & (vma->node.size - 1)); + GEM_BUG_ON(!is_power_of_2(vma->fence_size)); + GEM_BUG_ON(vma->node.start & (vma->fence_size - 1)); if (is_y_tiled && HAS_128_BYTE_Y_TILING(fence->i915)) stride /= 128; @@ -140,7 +140,7 @@ static void i915_write_fence_reg(struct drm_i915_fence_reg *fence, val = vma->node.start; if (is_y_tiled) val |= BIT(I830_FENCE_TILING_Y_SHIFT); - val |= I915_FENCE_SIZE_BITS(vma->node.size); + val |= I915_FENCE_SIZE_BITS(vma->fence_size); val |= ilog2(stride) << I830_FENCE_PITCH_SHIFT; val |= I830_FENCE_REG_VALID; @@ -162,20 +162,18 @@ static void i830_write_fence_reg(struct drm_i915_fence_reg *fence, val = 0; if (vma) { - unsigned int tiling = i915_gem_object_get_tiling(vma->obj); - bool is_y_tiled = tiling == I915_TILING_Y; unsigned int stride = i915_gem_object_get_stride(vma->obj); GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma)); GEM_BUG_ON(vma->node.start & ~I830_FENCE_START_MASK); - GEM_BUG_ON(!is_power_of_2(vma->node.size)); + GEM_BUG_ON(!is_power_of_2(vma->fence_size)); GEM_BUG_ON(!is_power_of_2(stride / 128)); - GEM_BUG_ON(vma->node.start & (vma->node.size - 1)); + GEM_BUG_ON(vma->node.start & (vma->fence_size - 1)); val = vma->node.start; - if (is_y_tiled) + if (i915_gem_object_get_tiling(vma->obj) == I915_TILING_Y) val |= BIT(I830_FENCE_TILING_Y_SHIFT); - val |= I830_FENCE_SIZE_BITS(vma->node.size); + val |= I830_FENCE_SIZE_BITS(vma->fence_size); val |= ilog2(stride / 128) << I830_FENCE_PITCH_SHIFT; val |= I830_FENCE_REG_VALID; } diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 51b8d71876b7..23a896cd934f 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -120,25 +120,18 @@ i915_tiling_ok(struct drm_i915_private *dev_priv, static bool i915_vma_fence_prepare(struct i915_vma *vma, int tiling_mode, unsigned int stride) { - struct drm_i915_private *dev_priv = vma->vm->i915; - u32 size; + struct drm_i915_private *i915 = vma->vm->i915; + u32 size, alignment; if (!i915_vma_is_map_and_fenceable(vma)) return true; - if (INTEL_GEN(dev_priv) == 3) { - if (vma->node.start & ~I915_FENCE_START_MASK) - return false; - } else { - if (vma->node.start & ~I830_FENCE_START_MASK) - return false; - } - - size = i915_gem_get_ggtt_size(dev_priv, vma->size, tiling_mode, stride); + size = i915_gem_get_ggtt_size(i915, vma->size, tiling_mode, stride); if (vma->node.size < size) return false; - if (vma->node.start & (size - 1)) + alignment = i915_gem_get_ggtt_alignment(i915, vma->size, tiling_mode, stride); + if (vma->node.start & (alignment - 1)) return false; return true; @@ -156,6 +149,9 @@ i915_gem_object_fence_prepare(struct drm_i915_gem_object *obj, return 0; list_for_each_entry(vma, &obj->vma_list, obj_link) { + if (!i915_vma_is_ggtt(vma)) + break; + if (i915_vma_fence_prepare(vma, tiling_mode, stride)) continue; @@ -277,10 +273,18 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, mutex_unlock(&obj->mm.lock); list_for_each_entry(vma, &obj->vma_list, obj_link) { - if (!vma->fence) - continue; - - vma->fence->dirty = true; + if (!i915_vma_is_ggtt(vma)) + break; + + vma->fence_size = i915_gem_get_ggtt_size(dev_priv, vma->size, + args->tiling_mode, + args->stride); + vma->fence_alignment = i915_gem_get_ggtt_alignment(dev_priv, vma->size, + args->tiling_mode, + args->stride); + + if (vma->fence) + vma->fence->dirty = true; } obj->tiling_and_stride = args->stride | args->tiling_mode; diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 734f77b7697f..a605d735662c 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -91,6 +91,7 @@ __i915_vma_create(struct drm_i915_gem_object *obj, vma->vm = vm; vma->obj = obj; vma->size = obj->base.size; + vma->display_alignment = 4096; if (view) { vma->ggtt_view = *view; @@ -110,6 +111,17 @@ __i915_vma_create(struct drm_i915_gem_object *obj, } if (i915_is_ggtt(vm)) { + GEM_BUG_ON(overflows_type(vma->size, u32)); + vma->fence_size = i915_gem_get_ggtt_size(vm->i915, vma->size, + i915_gem_object_get_tiling(obj), + i915_gem_object_get_stride(obj)); + GEM_BUG_ON(vma->fence_size & 4095); + + vma->fence_alignment = i915_gem_get_ggtt_alignment(vm->i915, vma->size, + i915_gem_object_get_tiling(obj), + i915_gem_object_get_stride(obj)); + GEM_BUG_ON(!is_power_of_2(vma->fence_alignment)); + vma->flags |= I915_VMA_GGTT; list_add(&vma->obj_link, &obj->vma_list); } else { @@ -277,34 +289,24 @@ i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) { - struct drm_i915_gem_object *obj = vma->obj; - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); bool mappable, fenceable; - u32 fence_size, fence_alignment; - fence_size = i915_gem_get_ggtt_size(dev_priv, - vma->size, - i915_gem_object_get_tiling(obj), - i915_gem_object_get_stride(obj)); - fence_alignment = i915_gem_get_ggtt_alignment(dev_priv, - vma->size, - i915_gem_object_get_tiling(obj), - i915_gem_object_get_stride(obj), - true); - GEM_BUG_ON(!is_power_of_2(fence_alignment)); - - fenceable = (vma->node.size == fence_size && - (vma->node.start & (fence_alignment - 1)) == 0); - - mappable = (vma->node.start + fence_size <= - dev_priv->ggtt.mappable_end); + GEM_BUG_ON(!i915_vma_is_ggtt(vma)); + GEM_BUG_ON(!vma->fence_size); /* * Explicitly disable for rotated VMA since the display does not * need the fence and the VMA is not accessible to other users. */ - if (mappable && fenceable && - vma->ggtt_view.type != I915_GGTT_VIEW_ROTATED) + if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED) + return; + + fenceable = (vma->node.size >= vma->fence_size && + (vma->node.start & (vma->fence_alignment - 1)) == 0); + + mappable = vma->node.start + vma->fence_size <= i915_vm_to_ggtt(vma->vm)->mappable_end; + + if (mappable && fenceable) vma->flags |= I915_VMA_CAN_FENCE; else vma->flags &= ~I915_VMA_CAN_FENCE; @@ -371,17 +373,12 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); size = max(size, vma->size); - if (flags & PIN_MAPPABLE) - size = i915_gem_get_ggtt_size(dev_priv, size, - i915_gem_object_get_tiling(obj), - i915_gem_object_get_stride(obj)); - - alignment = max(max(alignment, vma->display_alignment), - i915_gem_get_ggtt_alignment(dev_priv, size, - i915_gem_object_get_tiling(obj), - i915_gem_object_get_stride(obj), - flags & PIN_MAPPABLE)); - GEM_BUG_ON(!is_power_of_2(alignment)); + alignment = max(alignment, vma->display_alignment); + if (flags & PIN_MAPPABLE) { + size = max_t(typeof(size), size, vma->fence_size); + alignment = max_t(typeof(alignment), + alignment, vma->fence_alignment); + } start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index e3b2b3b1e056..a969bbb65871 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -55,6 +55,9 @@ struct i915_vma { u64 size; u64 display_alignment; + u32 fence_size; + u32 fence_alignment; + unsigned int flags; /** * How many users have pinned this object in GTT space. The following -- cgit v1.2.3-55-g7522 From cea84d16c3da717e99ea43108ca50bcc72ff50a1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 9 Jan 2017 16:16:12 +0000 Subject: drm/i915: Remove the rounding down of the gen4+ fence region Restricting the fence to the end of the previous tile-row breaks access to the final portion of the object. On gen2/3 we employed lazy fencing to pad out the fence with scratch page to provide access to the tail, and now we also pad out the object on gen4+ we can apply the same fix. Fixes: af1a7301c7cf ("drm/i915: Only fence tiled region of object.") Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170109161613.11881-5-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_fence_reg.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c index 8b37c4cb311a..9e65696a960c 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c @@ -78,15 +78,13 @@ static void i965_write_fence_reg(struct drm_i915_fence_reg *fence, val = 0; if (vma) { unsigned int stride = i915_gem_object_get_stride(vma->obj); - u32 row_size = i915_gem_object_get_tile_row_size(vma->obj); - u32 size = rounddown((u32)vma->fence_size, row_size); GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma)); GEM_BUG_ON(vma->node.start & 4095); GEM_BUG_ON(vma->fence_size & 4095); GEM_BUG_ON(stride & 127); - val = (vma->node.start + size - 4096) << 32; + val = (vma->node.start + vma->fence_size - 4096) << 32; val |= vma->node.start; val |= (u64)((stride / 128) - 1) << fence_pitch_shift; if (i915_gem_object_get_tiling(vma->obj) == I915_TILING_Y) -- cgit v1.2.3-55-g7522 From 91d4e0aa923e13ef832e9d793b6d080b6318f2d9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 9 Jan 2017 16:16:13 +0000 Subject: drm/i915: Move ggtt fence/alignment to i915_gem_tiling.c Rename i915_gem_get_ggtt_size() and i915_gem_get_ggtt_alignment() to i915_gem_fence_size() and i915_gem_fence_alignment() respectively to better match usage. Similarly move the pair of functions into i915_gem_tiling.c next to the fence restrictions. Suggested-by: Joonas Lahtinen Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170109161613.11881-6-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_drv.h | 10 ++-- drivers/gpu/drm/i915/i915_gem.c | 69 --------------------------- drivers/gpu/drm/i915/i915_gem_tiling.c | 85 ++++++++++++++++++++++++++++++---- drivers/gpu/drm/i915/i915_vma.c | 12 ++--- 4 files changed, 88 insertions(+), 88 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 5ee76628be98..2b325032fedc 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3360,11 +3360,6 @@ int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int i915_gem_open(struct drm_device *dev, struct drm_file *file); void i915_gem_release(struct drm_device *dev, struct drm_file *file); -u32 i915_gem_get_ggtt_size(struct drm_i915_private *dev_priv, u32 size, - int tiling_mode, unsigned int stride); -u32 i915_gem_get_ggtt_alignment(struct drm_i915_private *dev_priv, u32 size, - int tiling_mode, unsigned int stride); - int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, enum i915_cache_level cache_level); @@ -3531,6 +3526,11 @@ static inline bool i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_objec i915_gem_object_is_tiled(obj); } +u32 i915_gem_fence_size(struct drm_i915_private *dev_priv, u32 size, + unsigned int tiling, unsigned int stride); +u32 i915_gem_fence_alignment(struct drm_i915_private *dev_priv, u32 size, + unsigned int tiling, unsigned int stride); + /* i915_debugfs.c */ #ifdef CONFIG_DEBUG_FS int i915_debugfs_register(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1f9496e587dc..d1cf0ed6821d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2016,75 +2016,6 @@ void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv) } } -/** - * i915_gem_get_ggtt_size - return required global GTT size for an object - * @dev_priv: i915 device - * @size: object size - * @tiling_mode: tiling mode - * @stride: tiling stride - * - * Return the required global GTT size for an object, taking into account - * potential fence register mapping. - */ -u32 i915_gem_get_ggtt_size(struct drm_i915_private *dev_priv, - u32 size, int tiling_mode, unsigned int stride) -{ - u32 ggtt_size; - - GEM_BUG_ON(!size); - - if (tiling_mode == I915_TILING_NONE) - return size; - - GEM_BUG_ON(!stride); - - if (INTEL_GEN(dev_priv) >= 4) { - stride *= i915_gem_tile_height(tiling_mode); - GEM_BUG_ON(stride & 4095); - return roundup(size, stride); - } - - /* Previous chips need a power-of-two fence region when tiling */ - if (IS_GEN3(dev_priv)) - ggtt_size = 1024*1024; - else - ggtt_size = 512*1024; - - while (ggtt_size < size) - ggtt_size <<= 1; - - return ggtt_size; -} - -/** - * i915_gem_get_ggtt_alignment - return required global GTT alignment - * @dev_priv: i915 device - * @size: object size - * @tiling_mode: tiling mode - * @stride: tiling stride - * - * Return the required global GTT alignment for an object, taking into account - * potential fence register mapping. - */ -u32 i915_gem_get_ggtt_alignment(struct drm_i915_private *dev_priv, u32 size, - int tiling_mode, unsigned int stride) -{ - GEM_BUG_ON(!size); - - /* - * Minimum alignment is 4k (GTT page size), but might be greater - * if a fence register is needed for the object. - */ - if (INTEL_GEN(dev_priv) >= 4 || tiling_mode == I915_TILING_NONE) - return 4096; - - /* - * Previous chips need to be aligned to the size of the smallest - * fence register that can contain the object. - */ - return i915_gem_get_ggtt_size(dev_priv, size, tiling_mode, stride); -} - static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) { struct drm_i915_private *dev_priv = to_i915(obj->base.dev); diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 23a896cd934f..30cb869759fb 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -58,6 +58,75 @@ * invovlement. */ +/** + * i915_gem_fence_size - required global GTT size for a fence + * @i915: i915 device + * @size: object size + * @tiling: tiling mode + * @stride: tiling stride + * + * Return the required global GTT size for a fence (view of a tiled object), + * taking into account potential fence register mapping. + */ +u32 i915_gem_fence_size(struct drm_i915_private *i915, + u32 size, unsigned int tiling, unsigned int stride) +{ + u32 ggtt_size; + + GEM_BUG_ON(!size); + + if (tiling == I915_TILING_NONE) + return size; + + GEM_BUG_ON(!stride); + + if (INTEL_GEN(i915) >= 4) { + stride *= i915_gem_tile_height(tiling); + GEM_BUG_ON(stride & 4095); + return roundup(size, stride); + } + + /* Previous chips need a power-of-two fence region when tiling */ + if (IS_GEN3(i915)) + ggtt_size = 1024*1024; + else + ggtt_size = 512*1024; + + while (ggtt_size < size) + ggtt_size <<= 1; + + return ggtt_size; +} + +/** + * i915_gem_fence_alignment - required global GTT alignment for a fence + * @i915: i915 device + * @size: object size + * @tiling: tiling mode + * @stride: tiling stride + * + * Return the required global GTT alignment for a fence (a view of a tiled + * object), taking into account potential fence register mapping. + */ +u32 i915_gem_fence_alignment(struct drm_i915_private *i915, u32 size, + unsigned int tiling, unsigned int stride) +{ + GEM_BUG_ON(!size); + + /* + * Minimum alignment is 4k (GTT page size), but might be greater + * if a fence register is needed for the object. + */ + if (INTEL_GEN(i915) >= 4 || tiling == I915_TILING_NONE) + return 4096; + + /* + * Previous chips need to be aligned to the size of the smallest + * fence register that can contain the object. + */ + return i915_gem_fence_size(i915, size, tiling, stride); +} + /* Check pitch constriants for all chips & tiling formats */ static bool i915_tiling_ok(struct drm_i915_private *dev_priv, @@ -126,11 +195,11 @@ static bool i915_vma_fence_prepare(struct i915_vma *vma, if (!i915_vma_is_map_and_fenceable(vma)) return true; - size = i915_gem_get_ggtt_size(i915, vma->size, tiling_mode, stride); + size = i915_gem_fence_size(i915, vma->size, tiling_mode, stride); if (vma->node.size < size) return false; - alignment = i915_gem_get_ggtt_alignment(i915, vma->size, tiling_mode, stride); + alignment = i915_gem_fence_alignment(i915, vma->size, tiling_mode, stride); if (vma->node.start & (alignment - 1)) return false; @@ -276,12 +345,12 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, if (!i915_vma_is_ggtt(vma)) break; - vma->fence_size = i915_gem_get_ggtt_size(dev_priv, vma->size, - args->tiling_mode, - args->stride); - vma->fence_alignment = i915_gem_get_ggtt_alignment(dev_priv, vma->size, - args->tiling_mode, - args->stride); + vma->fence_size = i915_gem_fence_size(dev_priv, vma->size, + args->tiling_mode, + args->stride); + vma->fence_alignment = i915_gem_fence_alignment(dev_priv, vma->size, + args->tiling_mode, + args->stride); if (vma->fence) vma->fence->dirty = true; diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index a605d735662c..f137475fab51 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -112,14 +112,14 @@ __i915_vma_create(struct drm_i915_gem_object *obj, if (i915_is_ggtt(vm)) { GEM_BUG_ON(overflows_type(vma->size, u32)); - vma->fence_size = i915_gem_get_ggtt_size(vm->i915, vma->size, - i915_gem_object_get_tiling(obj), - i915_gem_object_get_stride(obj)); + vma->fence_size = i915_gem_fence_size(vm->i915, vma->size, + i915_gem_object_get_tiling(obj), + i915_gem_object_get_stride(obj)); GEM_BUG_ON(vma->fence_size & 4095); - vma->fence_alignment = i915_gem_get_ggtt_alignment(vm->i915, vma->size, - i915_gem_object_get_tiling(obj), - i915_gem_object_get_stride(obj)); + vma->fence_alignment = i915_gem_fence_alignment(vm->i915, vma->size, + i915_gem_object_get_tiling(obj), + i915_gem_object_get_stride(obj)); GEM_BUG_ON(!is_power_of_2(vma->fence_alignment)); vma->flags |= I915_VMA_GGTT; -- cgit v1.2.3-55-g7522 From dbb28b5c3d3cb945a63030fab8d3894cf335ce19 Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Fri, 16 Dec 2016 17:42:24 +0200 Subject: drm/i915/DMC/GLK: Load DMC on GLK This patch loads the DMC on GLK. There is a single firmware image for all steppings on a GLK. Cc: Rodrigo Vivi Signed-off-by: Anusha Srivatsa Reviewed-by: Rodrigo Vivi Link: http://patchwork.freedesktop.org/patch/msgid/1481902946-18593-1-git-send-email-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/intel_csr.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c index 9cbb8d8363b4..0085bc745f6a 100644 --- a/drivers/gpu/drm/i915/intel_csr.c +++ b/drivers/gpu/drm/i915/intel_csr.c @@ -34,6 +34,10 @@ * low-power state and comes back to normal. */ +#define I915_CSR_GLK "i915/glk_dmc_ver1_01.bin" +MODULE_FIRMWARE(I915_CSR_GLK); +#define GLK_CSR_VERSION_REQUIRED CSR_VERSION(1, 1) + #define I915_CSR_KBL "i915/kbl_dmc_ver1_01.bin" MODULE_FIRMWARE(I915_CSR_KBL); #define KBL_CSR_VERSION_REQUIRED CSR_VERSION(1, 1) @@ -286,7 +290,9 @@ static uint32_t *parse_csr_fw(struct drm_i915_private *dev_priv, csr->version = css_header->version; - if (IS_KABYLAKE(dev_priv)) { + if (IS_GEMINILAKE(dev_priv)) { + required_version = GLK_CSR_VERSION_REQUIRED; + } else if (IS_KABYLAKE(dev_priv)) { required_version = KBL_CSR_VERSION_REQUIRED; } else if (IS_SKYLAKE(dev_priv)) { required_version = SKL_CSR_VERSION_REQUIRED; @@ -435,7 +441,9 @@ void intel_csr_ucode_init(struct drm_i915_private *dev_priv) if (!HAS_CSR(dev_priv)) return; - if (IS_KABYLAKE(dev_priv)) + if (IS_GEMINILAKE(dev_priv)) + csr->fw_path = I915_CSR_GLK; + else if (IS_KABYLAKE(dev_priv)) csr->fw_path = I915_CSR_KBL; else if (IS_SKYLAKE(dev_priv)) csr->fw_path = I915_CSR_SKL; -- cgit v1.2.3-55-g7522 From b9fd799e408f992060cb3e03150479661eb7a14d Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 16 Dec 2016 17:42:25 +0200 Subject: drm/i915/glk: Add missing bits to allow runtime pm suspend on GLK. Besides having the DMC firmware in place and loaded let's handle runtime suspend and dc9 as we do for Broxton. Cc: Ander Conselvan de Oliveira Signed-off-by: Rodrigo Vivi Reviewed-by: Ander Conselvan de Oliveira Link: http://patchwork.freedesktop.org/patch/msgid/1481902946-18593-2-git-send-email-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/i915_drv.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 4d22b4b479b8..8001fd1165e0 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1471,7 +1471,7 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation) intel_display_set_init_power(dev_priv, false); - fw_csr = !IS_BROXTON(dev_priv) && + fw_csr = !IS_GEN9_LP(dev_priv) && suspend_to_idle(dev_priv) && dev_priv->csr.dmc_payload; /* * In case of firmware assisted context save/restore don't manually @@ -1484,7 +1484,7 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation) intel_power_domains_suspend(dev_priv); ret = 0; - if (IS_BROXTON(dev_priv)) + if (IS_GEN9_LP(dev_priv)) bxt_enable_dc9(dev_priv); else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) hsw_enable_pc8(dev_priv); @@ -1692,7 +1692,7 @@ static int i915_drm_resume_early(struct drm_device *dev) intel_uncore_early_sanitize(dev_priv, true); - if (IS_BROXTON(dev_priv)) { + if (IS_GEN9_LP(dev_priv)) { if (!dev_priv->suspended_to_idle) gen9_sanitize_dc_state(dev_priv); bxt_disable_dc9(dev_priv); @@ -1702,7 +1702,7 @@ static int i915_drm_resume_early(struct drm_device *dev) intel_uncore_sanitize(dev_priv); - if (IS_BROXTON(dev_priv) || + if (IS_GEN9_LP(dev_priv) || !(dev_priv->suspended_to_idle && dev_priv->csr.dmc_payload)) intel_power_domains_init_hw(dev_priv, true); @@ -2326,7 +2326,7 @@ static int intel_runtime_suspend(struct device *kdev) intel_runtime_pm_disable_interrupts(dev_priv); ret = 0; - if (IS_BROXTON(dev_priv)) { + if (IS_GEN9_LP(dev_priv)) { bxt_display_core_uninit(dev_priv); bxt_enable_dc9(dev_priv); } else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { @@ -2411,7 +2411,7 @@ static int intel_runtime_resume(struct device *kdev) if (IS_GEN6(dev_priv)) intel_init_pch_refclk(dev_priv); - if (IS_BROXTON(dev_priv)) { + if (IS_GEN9_LP(dev_priv)) { bxt_disable_dc9(dev_priv); bxt_display_core_init(dev_priv, true); if (dev_priv->csr.dmc_payload && -- cgit v1.2.3-55-g7522 From 254e0931f5b98119cc87208308213024b8c4da9c Mon Sep 17 00:00:00 2001 From: Michel Thierry Date: Mon, 9 Jan 2017 16:51:35 +0200 Subject: drm/i915/glk: Convert a few more IS_BROXTON() to IS_GEN9_LP() Commit cc3f90f0633c ("drm/i915/glk: Reuse broxton code for geminilake") missed a few of occurences of IS_BROXTON() that should have been coverted to IS_GEN9_LP(). v2: Cite the right commit. (Ander) Fixes: cc3f90f0633c ("drm/i915/glk: Reuse broxton code for geminilake") Cc: Rodrigo Vivi Cc: Ander Conselvan de Oliveira Cc: Daniel Vetter Cc: Jani Nikula Cc: intel-gfx@lists.freedesktop.org Signed-off-by: Michel Thierry Signed-off-by: Tomasz Lis Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Tomasz Lis (v1) Reviewed-by: Rodrigo Vivi Link: http://patchwork.freedesktop.org/patch/msgid/1483973495-15138-1-git-send-email-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/i915_sysfs.c | 2 +- drivers/gpu/drm/i915/intel_device_info.c | 2 +- drivers/gpu/drm/i915/intel_dp.c | 2 +- drivers/gpu/drm/i915/intel_guc_loader.c | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index 40c0ac70d79d..376ac957cd1c 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -58,7 +58,7 @@ static u32 calc_residency(struct drm_i915_private *dev_priv, if (I915_READ(VLV_COUNTER_CONTROL) & VLV_COUNT_RANGE_HIGH) units <<= 8; - } else if (IS_BROXTON(dev_priv)) { + } else if (IS_GEN9_LP(dev_priv)) { units = 1; div = 1200; /* 833.33ns */ } diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index f642f6ded4ae..fcf81815daff 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -192,7 +192,7 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv) (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) && hweight8(sseu->slice_mask) > 1; sseu->has_subslice_pg = - IS_BROXTON(dev_priv) && sseu_subslice_total(sseu) > 1; + IS_GEN9_LP(dev_priv) && sseu_subslice_total(sseu) > 1; sseu->has_eu_pg = sseu->eu_per_subslice > 2; if (IS_BROXTON(dev_priv)) { diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index fb12896bafee..8c18f723b061 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -3412,7 +3412,7 @@ intel_dp_set_signal_levels(struct intel_dp *intel_dp) if (HAS_DDI(dev_priv)) { signal_levels = ddi_signal_levels(intel_dp); - if (IS_BROXTON(dev_priv)) + if (IS_GEN9_LP(dev_priv)) signal_levels = 0; else mask = DDI_BUF_EMP_MASK; diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index 35d5690f47a2..aa2b866474be 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -339,7 +339,7 @@ static u32 guc_wopcm_size(struct drm_i915_private *dev_priv) u32 wopcm_size = GUC_WOPCM_TOP; /* On BXT, the top of WOPCM is reserved for RC6 context */ - if (IS_BROXTON(dev_priv)) + if (IS_GEN9_LP(dev_priv)) wopcm_size -= BXT_GUC_WOPCM_RC6_RESERVED; return wopcm_size; @@ -388,7 +388,7 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv) if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0)) I915_WRITE(GEN6_GFXPAUSE, 0x30FFF); - if (IS_BROXTON(dev_priv)) + if (IS_GEN9_LP(dev_priv)) I915_WRITE(GEN9LP_GT_PM_CONFIG, GT_DOORBELL_ENABLE); else I915_WRITE(GEN9_GT_PM_CONFIG, GT_DOORBELL_ENABLE); -- cgit v1.2.3-55-g7522 From 2d4281bb93046fc97fb8ad5dc17ea291a1fb38e2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 10 Jan 2017 09:56:32 +0000 Subject: drm/i915: Extract compute_partial_view() In order to reuse the partial view for selftesting, extract the common function for computing the view. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170110095633.6612-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 47 ++++++++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d1cf0ed6821d..341f7f025828 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1749,6 +1749,31 @@ int i915_gem_mmap_gtt_version(void) return 1; } +static inline struct i915_ggtt_view +compute_partial_view(struct drm_i915_gem_object *obj, + struct vm_area_struct *area, + pgoff_t page_offset, + unsigned int chunk) +{ + struct i915_ggtt_view view; + + if (i915_gem_object_is_tiled(obj)) + chunk = roundup(chunk, tile_row_pages(obj)); + + memset(&view, 0, sizeof(view)); + view.type = I915_GGTT_VIEW_PARTIAL; + view.params.partial.offset = rounddown(page_offset, chunk); + view.params.partial.size = + min_t(unsigned int, chunk, + vma_pages(area) - view.params.partial.offset); + + /* If the partial covers the entire object, just create a normal VMA. */ + if (chunk >= obj->base.size >> PAGE_SHIFT) + view.type = I915_GGTT_VIEW_NORMAL; + + return view; +} + /** * i915_gem_fault - fault a page into the GTT * @area: CPU VMA in question @@ -1825,26 +1850,10 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf) /* Now pin it into the GTT as needed */ vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, flags); if (IS_ERR(vma)) { - struct i915_ggtt_view view; - unsigned int chunk_size; - /* Use a partial view if it is bigger than available space */ - chunk_size = MIN_CHUNK_PAGES; - if (i915_gem_object_is_tiled(obj)) - chunk_size = roundup(chunk_size, tile_row_pages(obj)); - - memset(&view, 0, sizeof(view)); - view.type = I915_GGTT_VIEW_PARTIAL; - view.params.partial.offset = rounddown(page_offset, chunk_size); - view.params.partial.size = - min_t(unsigned int, chunk_size, - vma_pages(area) - view.params.partial.offset); - - /* If the partial covers the entire object, just create a - * normal VMA. - */ - if (chunk_size >= obj->base.size >> PAGE_SHIFT) - view.type = I915_GGTT_VIEW_NORMAL; + struct i915_ggtt_view view = + compute_partial_view(obj, area, + page_offset, MIN_CHUNK_PAGES); /* Userspace is now writing through an untracked VMA, abandon * all hope that the hardware is able to track future writes. -- cgit v1.2.3-55-g7522 From 8201c1fad4f4d5b2952255af11189ec676ebf340 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 10 Jan 2017 09:56:33 +0000 Subject: drm/i915: Clip the partial view against the object not vma The VMA is later clipped against the vm_area_struct before insertion of the faulting PTE so we are free to create the partial view as we desire. If we use the object as the extents rather than the area, this partial can then be used for other areas. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170110095633.6612-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 341f7f025828..e02afd8728f1 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1751,7 +1751,6 @@ int i915_gem_mmap_gtt_version(void) static inline struct i915_ggtt_view compute_partial_view(struct drm_i915_gem_object *obj, - struct vm_area_struct *area, pgoff_t page_offset, unsigned int chunk) { @@ -1765,7 +1764,7 @@ compute_partial_view(struct drm_i915_gem_object *obj, view.params.partial.offset = rounddown(page_offset, chunk); view.params.partial.size = min_t(unsigned int, chunk, - vma_pages(area) - view.params.partial.offset); + (obj->base.size >> PAGE_SHIFT) - view.params.partial.offset); /* If the partial covers the entire object, just create a normal VMA. */ if (chunk >= obj->base.size >> PAGE_SHIFT) @@ -1852,8 +1851,7 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf) if (IS_ERR(vma)) { /* Use a partial view if it is bigger than available space */ struct i915_ggtt_view view = - compute_partial_view(obj, area, - page_offset, MIN_CHUNK_PAGES); + compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES); /* Userspace is now writing through an untracked VMA, abandon * all hope that the hardware is able to track future writes. -- cgit v1.2.3-55-g7522 From 111dbcab3d5baf5ef7a377471dbd10c23484f11d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 10 Jan 2017 12:10:44 +0000 Subject: drm/i915: Include ioctl in set-tiling and get-tiling function names Make it clear that these functions are the user entry points for the tiling/fence registers. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170110121045.27144-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.c | 4 ++-- drivers/gpu/drm/i915/i915_drv.h | 8 ++++---- drivers/gpu/drm/i915/i915_gem_tiling.c | 16 ++++++++-------- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 8001fd1165e0..aefab9a1a68e 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -2549,8 +2549,8 @@ static const struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF_DRV(I915_GEM_MMAP_GTT, i915_gem_mmap_gtt_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_SET_DOMAIN, i915_gem_set_domain_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_SW_FINISH, i915_gem_sw_finish_ioctl, DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(I915_GEM_SET_TILING, i915_gem_set_tiling, DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(I915_GEM_GET_TILING, i915_gem_get_tiling, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(I915_GEM_SET_TILING, i915_gem_set_tiling_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(I915_GEM_GET_TILING, i915_gem_get_tiling_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_GET_APERTURE, i915_gem_get_aperture_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GET_PIPE_FROM_CRTC_ID, intel_get_pipe_from_crtc_id, 0), DRM_IOCTL_DEF_DRV(I915_GEM_MADVISE, i915_gem_madvise_ioctl, DRM_RENDER_ALLOW), diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 2b325032fedc..a7fcf9aa5075 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3101,10 +3101,10 @@ int i915_gem_throttle_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int i915_gem_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); -int i915_gem_set_tiling(struct drm_device *dev, void *data, - struct drm_file *file_priv); -int i915_gem_get_tiling(struct drm_device *dev, void *data, - struct drm_file *file_priv); +int i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); +int i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); void i915_gem_init_userptr(struct drm_i915_private *dev_priv); int i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file); diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 30cb869759fb..59c010d8f47b 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -34,8 +34,8 @@ /** * DOC: buffer object tiling * - * i915_gem_set_tiling() and i915_gem_get_tiling() is the userspace interface to - * declare fence register requirements. + * i915_gem_set_tiling_ioctl() and i915_gem_get_tiling_ioctl() is the userspace + * interface to declare fence register requirements. * * In principle GEM doesn't care at all about the internal data layout of an * object, and hence it also doesn't care about tiling or swizzling. There's two @@ -233,7 +233,7 @@ i915_gem_object_fence_prepare(struct drm_i915_gem_object *obj, } /** - * i915_gem_set_tiling - IOCTL handler to set tiling mode + * i915_gem_set_tiling_ioctl - IOCTL handler to set tiling mode * @dev: DRM device * @data: data pointer for the ioctl * @file: DRM file for the ioctl call @@ -247,8 +247,8 @@ i915_gem_object_fence_prepare(struct drm_i915_gem_object *obj, * Zero on success, negative errno on failure. */ int -i915_gem_set_tiling(struct drm_device *dev, void *data, - struct drm_file *file) +i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) { struct drm_i915_gem_set_tiling *args = data; struct drm_i915_private *dev_priv = to_i915(dev); @@ -385,7 +385,7 @@ err: } /** - * i915_gem_get_tiling - IOCTL handler to get tiling mode + * i915_gem_get_tiling_ioctl - IOCTL handler to get tiling mode * @dev: DRM device * @data: data pointer for the ioctl * @file: DRM file for the ioctl call @@ -398,8 +398,8 @@ err: * Zero on success, negative errno on failure. */ int -i915_gem_get_tiling(struct drm_device *dev, void *data, - struct drm_file *file) +i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) { struct drm_i915_gem_get_tiling *args = data; struct drm_i915_private *dev_priv = to_i915(dev); -- cgit v1.2.3-55-g7522 From 957870f9341201b176e41eb5fa8a750b13e501aa Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 10 Jan 2017 12:10:45 +0000 Subject: drm/i915: Split out i915_gem_object_set_tiling() Expose an interface for changing the tiling and stride on an object, that includes the complexity of checking for conflicting bindings and fence registers. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170110121045.27144-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_object.h | 3 + drivers/gpu/drm/i915/i915_gem_tiling.c | 237 +++++++++++++++++---------------- 2 files changed, 127 insertions(+), 113 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index ed3f4d5fd49f..290eaa7fc9eb 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -337,6 +337,9 @@ i915_gem_object_get_tile_row_size(struct drm_i915_gem_object *obj) i915_gem_object_get_tile_height(obj)); } +int i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, + unsigned int tiling, unsigned int stride); + static inline struct intel_engine_cs * i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj) { diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 59c010d8f47b..4f83e331f598 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -129,61 +129,56 @@ u32 i915_gem_fence_alignment(struct drm_i915_private *i915, u32 size, /* Check pitch constriants for all chips & tiling formats */ static bool -i915_tiling_ok(struct drm_i915_private *dev_priv, - int stride, int size, int tiling_mode) +i915_tiling_ok(struct drm_i915_gem_object *obj, + unsigned int tiling, unsigned int stride) { - int tile_width; + struct drm_i915_private *i915 = to_i915(obj->base.dev); + unsigned int tile_width; /* Linear is always fine */ - if (tiling_mode == I915_TILING_NONE) + if (tiling == I915_TILING_NONE) return true; - if (tiling_mode > I915_TILING_LAST) + if (tiling > I915_TILING_LAST) return false; - if (IS_GEN2(dev_priv) || - (tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev_priv))) - tile_width = 128; - else - tile_width = 512; - /* check maximum stride & object size */ /* i965+ stores the end address of the gtt mapping in the fence * reg, so dont bother to check the size */ - if (INTEL_GEN(dev_priv) >= 7) { + if (INTEL_GEN(i915) >= 7) { if (stride / 128 > GEN7_FENCE_MAX_PITCH_VAL) return false; - } else if (INTEL_GEN(dev_priv) >= 4) { + } else if (INTEL_GEN(i915) >= 4) { if (stride / 128 > I965_FENCE_MAX_PITCH_VAL) return false; } else { if (stride > 8192) return false; - if (IS_GEN3(dev_priv)) { - if (size > I830_FENCE_MAX_SIZE_VAL << 20) + if (IS_GEN3(i915)) { + if (obj->base.size > I830_FENCE_MAX_SIZE_VAL << 20) return false; } else { - if (size > I830_FENCE_MAX_SIZE_VAL << 19) + if (obj->base.size > I830_FENCE_MAX_SIZE_VAL << 19) return false; } } - if (stride < tile_width) + if (IS_GEN2(i915) || + (tiling == I915_TILING_Y && HAS_128_BYTE_Y_TILING(i915))) + tile_width = 128; + else + tile_width = 512; + + if (stride & (tile_width - 1)) return false; /* 965+ just needs multiples of tile width */ - if (INTEL_GEN(dev_priv) >= 4) { - if (stride & (tile_width - 1)) - return false; + if (INTEL_GEN(i915) >= 4) return true; - } /* Pre-965 needs power of two tile widths */ - if (stride & (stride - 1)) - return false; - - return true; + return is_power_of_2(stride); } static bool i915_vma_fence_prepare(struct i915_vma *vma, @@ -232,6 +227,98 @@ i915_gem_object_fence_prepare(struct drm_i915_gem_object *obj, return 0; } +int +i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, + unsigned int tiling, unsigned int stride) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_vma *vma; + int err; + + /* Make sure we don't cross-contaminate obj->tiling_and_stride */ + BUILD_BUG_ON(I915_TILING_LAST & STRIDE_MASK); + + GEM_BUG_ON(!i915_tiling_ok(obj, tiling, stride)); + GEM_BUG_ON(!stride ^ (tiling == I915_TILING_NONE)); + lockdep_assert_held(&i915->drm.struct_mutex); + + if ((tiling | stride) == obj->tiling_and_stride) + return 0; + + if (obj->framebuffer_references) + return -EBUSY; + + /* We need to rebind the object if its current allocation + * no longer meets the alignment restrictions for its new + * tiling mode. Otherwise we can just leave it alone, but + * need to ensure that any fence register is updated before + * the next fenced (either through the GTT or by the BLT unit + * on older GPUs) access. + * + * After updating the tiling parameters, we then flag whether + * we need to update an associated fence register. Note this + * has to also include the unfenced register the GPU uses + * whilst executing a fenced command for an untiled object. + */ + + err = i915_gem_object_fence_prepare(obj, tiling, stride); + if (err) + return err; + + /* If the memory has unknown (i.e. varying) swizzling, we pin the + * pages to prevent them being swapped out and causing corruption + * due to the change in swizzling. + */ + mutex_lock(&obj->mm.lock); + if (obj->mm.pages && + obj->mm.madv == I915_MADV_WILLNEED && + i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) { + if (tiling == I915_TILING_NONE) { + GEM_BUG_ON(!obj->mm.quirked); + __i915_gem_object_unpin_pages(obj); + obj->mm.quirked = false; + } + if (!i915_gem_object_is_tiled(obj)) { + GEM_BUG_ON(!obj->mm.quirked); + __i915_gem_object_pin_pages(obj); + obj->mm.quirked = true; + } + } + mutex_unlock(&obj->mm.lock); + + list_for_each_entry(vma, &obj->vma_list, obj_link) { + if (!i915_vma_is_ggtt(vma)) + break; + + vma->fence_size = + i915_gem_fence_size(i915, vma->size, tiling, stride); + vma->fence_alignment = + i915_gem_fence_alignment(i915, + vma->size, tiling, stride); + + if (vma->fence) + vma->fence->dirty = true; + } + + obj->tiling_and_stride = tiling | stride; + + /* Force the fence to be reacquired for GTT access */ + i915_gem_release_mmap(obj); + + /* Try to preallocate memory required to save swizzling on put-pages */ + if (i915_gem_object_needs_bit17_swizzle(obj)) { + if (!obj->bit_17) { + obj->bit_17 = kcalloc(BITS_TO_LONGS(obj->base.size >> PAGE_SHIFT), + sizeof(long), GFP_KERNEL); + } + } else { + kfree(obj->bit_17); + obj->bit_17 = NULL; + } + + return 0; +} + /** * i915_gem_set_tiling_ioctl - IOCTL handler to set tiling mode * @dev: DRM device @@ -251,26 +338,15 @@ i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct drm_i915_gem_set_tiling *args = data; - struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_gem_object *obj; - int err = 0; - - /* Make sure we don't cross-contaminate obj->tiling_and_stride */ - BUILD_BUG_ON(I915_TILING_LAST & STRIDE_MASK); + int err; obj = i915_gem_object_lookup(file, args->handle); if (!obj) return -ENOENT; - if (!i915_tiling_ok(dev_priv, - args->stride, obj->base.size, args->tiling_mode)) { - i915_gem_object_put(obj); - return -EINVAL; - } - - mutex_lock(&dev->struct_mutex); - if (obj->pin_display || obj->framebuffer_references) { - err = -EBUSY; + if (!i915_tiling_ok(obj, args->tiling_mode, args->stride)) { + err = -EINVAL; goto err; } @@ -279,9 +355,9 @@ i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data, args->stride = 0; } else { if (args->tiling_mode == I915_TILING_X) - args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x; + args->swizzle_mode = to_i915(dev)->mm.bit_6_swizzle_x; else - args->swizzle_mode = dev_priv->mm.bit_6_swizzle_y; + args->swizzle_mode = to_i915(dev)->mm.bit_6_swizzle_y; /* Hide bit 17 swizzling from the user. This prevents old Mesa * from aborting the application on sw fallbacks to bit 17, @@ -303,84 +379,19 @@ i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data, } } - if (args->tiling_mode != i915_gem_object_get_tiling(obj) || - args->stride != i915_gem_object_get_stride(obj)) { - /* We need to rebind the object if its current allocation - * no longer meets the alignment restrictions for its new - * tiling mode. Otherwise we can just leave it alone, but - * need to ensure that any fence register is updated before - * the next fenced (either through the GTT or by the BLT unit - * on older GPUs) access. - * - * After updating the tiling parameters, we then flag whether - * we need to update an associated fence register. Note this - * has to also include the unfenced register the GPU uses - * whilst executing a fenced command for an untiled object. - */ + err = mutex_lock_interruptible(&dev->struct_mutex); + if (err) + goto err; - err = i915_gem_object_fence_prepare(obj, - args->tiling_mode, - args->stride); - if (!err) { - struct i915_vma *vma; - - mutex_lock(&obj->mm.lock); - if (obj->mm.pages && - obj->mm.madv == I915_MADV_WILLNEED && - dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { - if (args->tiling_mode == I915_TILING_NONE) { - GEM_BUG_ON(!obj->mm.quirked); - __i915_gem_object_unpin_pages(obj); - obj->mm.quirked = false; - } - if (!i915_gem_object_is_tiled(obj)) { - GEM_BUG_ON(!obj->mm.quirked); - __i915_gem_object_pin_pages(obj); - obj->mm.quirked = true; - } - } - mutex_unlock(&obj->mm.lock); - - list_for_each_entry(vma, &obj->vma_list, obj_link) { - if (!i915_vma_is_ggtt(vma)) - break; - - vma->fence_size = i915_gem_fence_size(dev_priv, vma->size, - args->tiling_mode, - args->stride); - vma->fence_alignment = i915_gem_fence_alignment(dev_priv, vma->size, - args->tiling_mode, - args->stride); - - if (vma->fence) - vma->fence->dirty = true; - } - obj->tiling_and_stride = - args->stride | args->tiling_mode; - - /* Force the fence to be reacquired for GTT access */ - i915_gem_release_mmap(obj); - } - } - /* we have to maintain this existing ABI... */ + err = i915_gem_object_set_tiling(obj, args->tiling_mode, args->stride); + mutex_unlock(&dev->struct_mutex); + + /* We have to maintain this existing ABI... */ args->stride = i915_gem_object_get_stride(obj); args->tiling_mode = i915_gem_object_get_tiling(obj); - /* Try to preallocate memory required to save swizzling on put-pages */ - if (i915_gem_object_needs_bit17_swizzle(obj)) { - if (obj->bit_17 == NULL) { - obj->bit_17 = kcalloc(BITS_TO_LONGS(obj->base.size >> PAGE_SHIFT), - sizeof(long), GFP_KERNEL); - } - } else { - kfree(obj->bit_17); - obj->bit_17 = NULL; - } - err: i915_gem_object_put(obj); - mutex_unlock(&dev->struct_mutex); - return err; } -- cgit v1.2.3-55-g7522 From 8c6b709d96cb99777d06c35142e9c3d0dfd8ddb6 Mon Sep 17 00:00:00 2001 From: Tomeu Vizoso Date: Tue, 10 Jan 2017 14:43:04 +0100 Subject: drm/i915: Use new CRC debugfs API The core provides now an ABI to userspace for generation of frame CRCs, so implement the ->set_crc_source() callback and reuse as much code as possible with the previous ABI implementation. When handling the pageflip interrupt, we skip 1 or 2 frames depending on the HW because they contain wrong values. For the legacy ABI for generating frame CRCs, this was done in userspace but now that we have a generic ABI it's better if it's not exposed by the kernel. v2: - Leave the legacy implementation in place as the ABI implementation in the core is incompatible with it. v3: - Use the "cooked" vblank counter so we have a whole 32 bits. - Make sure we don't mess with the state of the legacy CRC capture ABI implementation. v4: - Keep use of get_vblank_counter as in the legacy code, will be changed in a followup commit. v5: - Skip first frame or two as it's known that they contain wrong data. - A few fixes suggested by Emil Velikov. v6: - Rework programming of the HW registers to preserve previous behavior. v7: - Address whitespace issue. - Added a comment on why in the implementation of the new ABI we skip the 1st or 2nd frames. v9: - Add stub for intel_crtc_set_crc_source. v12: - Rebased. - Remove stub for intel_crtc_set_crc_source and instead set the callback to NULL (Jani Nikula). v15: - Rebased. Signed-off-by: Tomeu Vizoso Reviewed-by: Emil Velikov Reviewed-by: Robert Foss irq Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170110134305.26326-2-tomeu.vizoso@collabora.com --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_irq.c | 77 ++++++++++++++++++---------- drivers/gpu/drm/i915/intel_display.c | 1 + drivers/gpu/drm/i915/intel_drv.h | 6 +++ drivers/gpu/drm/i915/intel_pipe_crc.c | 94 ++++++++++++++++++++++++++++++----- 5 files changed, 142 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index a7fcf9aa5075..58c44085a30f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1809,6 +1809,7 @@ struct intel_pipe_crc { enum intel_pipe_crc_source source; int head, tail; wait_queue_head_t wq; + int skipped; }; struct i915_frontbuffer_tracking { diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index a0e70f5b3aad..b9beb5955dae 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1553,41 +1553,68 @@ static void display_pipe_crc_irq_handler(struct drm_i915_private *dev_priv, { struct intel_pipe_crc *pipe_crc = &dev_priv->pipe_crc[pipe]; struct intel_pipe_crc_entry *entry; + struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe); + struct drm_driver *driver = dev_priv->drm.driver; + uint32_t crcs[5]; int head, tail; + u32 frame; spin_lock(&pipe_crc->lock); + if (pipe_crc->source) { + if (!pipe_crc->entries) { + spin_unlock(&pipe_crc->lock); + DRM_DEBUG_KMS("spurious interrupt\n"); + return; + } - if (!pipe_crc->entries) { - spin_unlock(&pipe_crc->lock); - DRM_DEBUG_KMS("spurious interrupt\n"); - return; - } - - head = pipe_crc->head; - tail = pipe_crc->tail; + head = pipe_crc->head; + tail = pipe_crc->tail; - if (CIRC_SPACE(head, tail, INTEL_PIPE_CRC_ENTRIES_NR) < 1) { - spin_unlock(&pipe_crc->lock); - DRM_ERROR("CRC buffer overflowing\n"); - return; - } + if (CIRC_SPACE(head, tail, INTEL_PIPE_CRC_ENTRIES_NR) < 1) { + spin_unlock(&pipe_crc->lock); + DRM_ERROR("CRC buffer overflowing\n"); + return; + } - entry = &pipe_crc->entries[head]; + entry = &pipe_crc->entries[head]; - entry->frame = dev_priv->drm.driver->get_vblank_counter(&dev_priv->drm, - pipe); - entry->crc[0] = crc0; - entry->crc[1] = crc1; - entry->crc[2] = crc2; - entry->crc[3] = crc3; - entry->crc[4] = crc4; + entry->frame = driver->get_vblank_counter(&dev_priv->drm, pipe); + entry->crc[0] = crc0; + entry->crc[1] = crc1; + entry->crc[2] = crc2; + entry->crc[3] = crc3; + entry->crc[4] = crc4; - head = (head + 1) & (INTEL_PIPE_CRC_ENTRIES_NR - 1); - pipe_crc->head = head; + head = (head + 1) & (INTEL_PIPE_CRC_ENTRIES_NR - 1); + pipe_crc->head = head; - spin_unlock(&pipe_crc->lock); + spin_unlock(&pipe_crc->lock); - wake_up_interruptible(&pipe_crc->wq); + wake_up_interruptible(&pipe_crc->wq); + } else { + /* + * For some not yet identified reason, the first CRC is + * bonkers. So let's just wait for the next vblank and read + * out the buggy result. + * + * On CHV sometimes the second CRC is bonkers as well, so + * don't trust that one either. + */ + if (pipe_crc->skipped == 0 || + (IS_CHERRYVIEW(dev_priv) && pipe_crc->skipped == 1)) { + pipe_crc->skipped++; + spin_unlock(&pipe_crc->lock); + return; + } + spin_unlock(&pipe_crc->lock); + crcs[0] = crc0; + crcs[1] = crc1; + crcs[2] = crc2; + crcs[3] = crc3; + crcs[4] = crc4; + frame = driver->get_vblank_counter(&dev_priv->drm, pipe); + drm_crtc_add_crc_entry(&crtc->base, true, frame, crcs); + } } #else static inline void diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index e2150a64860c..56047018391c 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14737,6 +14737,7 @@ static const struct drm_crtc_funcs intel_crtc_funcs = { .page_flip = intel_crtc_page_flip, .atomic_duplicate_state = intel_crtc_duplicate_state, .atomic_destroy_state = intel_crtc_destroy_state, + .set_crc_source = intel_crtc_set_crc_source, }; /** diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 6b02dac6ea26..84258df3e8f1 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1880,5 +1880,11 @@ void lspcon_wait_pcon_mode(struct intel_lspcon *lspcon); /* intel_pipe_crc.c */ int intel_pipe_crc_create(struct drm_minor *minor); void intel_pipe_crc_cleanup(struct drm_minor *minor); +#ifdef CONFIG_DEBUG_FS +int intel_crtc_set_crc_source(struct drm_crtc *crtc, const char *source_name, + size_t *values_cnt); +#else +#define intel_crtc_set_crc_source NULL +#endif extern const struct file_operations i915_display_crc_ctl_fops; #endif /* __INTEL_DRV_H__ */ diff --git a/drivers/gpu/drm/i915/intel_pipe_crc.c b/drivers/gpu/drm/i915/intel_pipe_crc.c index ef0c0e195164..0f1da810cff0 100644 --- a/drivers/gpu/drm/i915/intel_pipe_crc.c +++ b/drivers/gpu/drm/i915/intel_pipe_crc.c @@ -613,6 +613,22 @@ static int ivb_pipe_crc_ctl_reg(struct drm_i915_private *dev_priv, return 0; } +static int get_new_crc_ctl_reg(struct drm_i915_private *dev_priv, + enum pipe pipe, + enum intel_pipe_crc_source *source, u32 *val) +{ + if (IS_GEN2(dev_priv)) + return i8xx_pipe_crc_ctl_reg(source, val); + else if (INTEL_GEN(dev_priv) < 5) + return i9xx_pipe_crc_ctl_reg(dev_priv, pipe, source, val); + else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + return vlv_pipe_crc_ctl_reg(dev_priv, pipe, source, val); + else if (IS_GEN5(dev_priv) || IS_GEN6(dev_priv)) + return ilk_pipe_crc_ctl_reg(source, val); + else + return ivb_pipe_crc_ctl_reg(dev_priv, pipe, source, val); +} + static int pipe_crc_set_source(struct drm_i915_private *dev_priv, enum pipe pipe, enum intel_pipe_crc_source source) @@ -636,17 +652,7 @@ static int pipe_crc_set_source(struct drm_i915_private *dev_priv, return -EIO; } - if (IS_GEN2(dev_priv)) - ret = i8xx_pipe_crc_ctl_reg(&source, &val); - else if (INTEL_GEN(dev_priv) < 5) - ret = i9xx_pipe_crc_ctl_reg(dev_priv, pipe, &source, &val); - else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - ret = vlv_pipe_crc_ctl_reg(dev_priv, pipe, &source, &val); - else if (IS_GEN5(dev_priv) || IS_GEN6(dev_priv)) - ret = ilk_pipe_crc_ctl_reg(&source, &val); - else - ret = ivb_pipe_crc_ctl_reg(dev_priv, pipe, &source, &val); - + ret = get_new_crc_ctl_reg(dev_priv, pipe, &source, &val); if (ret != 0) goto out; @@ -687,7 +693,7 @@ static int pipe_crc_set_source(struct drm_i915_private *dev_priv, POSTING_READ(PIPE_CRC_CTL(pipe)); /* real source -> none transition */ - if (source == INTEL_PIPE_CRC_SOURCE_NONE) { + if (!source) { struct intel_pipe_crc_entry *entries; struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe); @@ -809,6 +815,11 @@ display_crc_ctl_parse_source(const char *buf, enum intel_pipe_crc_source *s) { int i; + if (!buf) { + *s = INTEL_PIPE_CRC_SOURCE_NONE; + return 0; + } + for (i = 0; i < ARRAY_SIZE(pipe_crc_sources); i++) if (!strcmp(buf, pipe_crc_sources[i])) { *s = i; @@ -937,3 +948,62 @@ void intel_pipe_crc_cleanup(struct drm_minor *minor) drm_debugfs_remove_files(info_list, 1, minor); } } + +int intel_crtc_set_crc_source(struct drm_crtc *crtc, const char *source_name, + size_t *values_cnt) +{ + struct drm_i915_private *dev_priv = crtc->dev->dev_private; + struct intel_pipe_crc *pipe_crc = &dev_priv->pipe_crc[crtc->index]; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + enum intel_display_power_domain power_domain; + enum intel_pipe_crc_source source; + u32 val = 0; /* shut up gcc */ + int ret = 0; + + if (display_crc_ctl_parse_source(source_name, &source) < 0) { + DRM_DEBUG_DRIVER("unknown source %s\n", source_name); + return -EINVAL; + } + + power_domain = POWER_DOMAIN_PIPE(crtc->index); + if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) { + DRM_DEBUG_KMS("Trying to capture CRC while pipe is off\n"); + return -EIO; + } + + ret = get_new_crc_ctl_reg(dev_priv, crtc->index, &source, &val); + if (ret != 0) + goto out; + + if (source) { + /* + * When IPS gets enabled, the pipe CRC changes. Since IPS gets + * enabled and disabled dynamically based on package C states, + * user space can't make reliable use of the CRCs, so let's just + * completely disable it. + */ + hsw_disable_ips(intel_crtc); + } + + I915_WRITE(PIPE_CRC_CTL(crtc->index), val); + POSTING_READ(PIPE_CRC_CTL(crtc->index)); + + if (!source) { + if (IS_G4X(dev_priv)) + g4x_undo_pipe_scramble_reset(dev_priv, crtc->index); + else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + vlv_undo_pipe_scramble_reset(dev_priv, crtc->index); + else if (IS_HASWELL(dev_priv) && crtc->index == PIPE_A) + hsw_trans_edp_pipe_A_crc_wa(dev_priv, false); + + hsw_enable_ips(intel_crtc); + } + + pipe_crc->skipped = 0; + *values_cnt = 5; + +out: + intel_display_power_put(dev_priv, power_domain); + + return ret; +} -- cgit v1.2.3-55-g7522 From 246ee524a2846fc0f32b8ac9d846bc93e38851e8 Mon Sep 17 00:00:00 2001 From: Tomeu Vizoso Date: Tue, 10 Jan 2017 14:43:05 +0100 Subject: drm/i915: Put "cooked" vlank counters in frame CRC lines Use drm_accurate_vblank_count so we have the full 32 bit to represent the frame counter and userspace has a simpler way of knowing when the counter wraps around. Signed-off-by: Tomeu Vizoso Reviewed-by: Emil Velikov Reviewed-by: Robert Foss Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170110134305.26326-3-tomeu.vizoso@collabora.com --- drivers/gpu/drm/i915/i915_irq.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index b9beb5955dae..75fb1f66cc0c 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1557,7 +1557,6 @@ static void display_pipe_crc_irq_handler(struct drm_i915_private *dev_priv, struct drm_driver *driver = dev_priv->drm.driver; uint32_t crcs[5]; int head, tail; - u32 frame; spin_lock(&pipe_crc->lock); if (pipe_crc->source) { @@ -1612,8 +1611,9 @@ static void display_pipe_crc_irq_handler(struct drm_i915_private *dev_priv, crcs[2] = crc2; crcs[3] = crc3; crcs[4] = crc4; - frame = driver->get_vblank_counter(&dev_priv->drm, pipe); - drm_crtc_add_crc_entry(&crtc->base, true, frame, crcs); + drm_crtc_add_crc_entry(&crtc->base, true, + drm_accurate_vblank_count(&crtc->base), + crcs); } } #else -- cgit v1.2.3-55-g7522 From 2edc6e0df1c15c0cc665a8aa61dcf2c540e90a70 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 10 Jan 2017 17:22:42 +0000 Subject: drm/i915: Consolidate reset_request() Always reset the requests of the guilty context, including the hung request that we tell the hardware to skip. This should help if the reprogram fails entirely, but more importantly makes the guilty path more uniform (and simplifies the subsequent patch to tweak the cancelled requests). Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170110172246.27297-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 91d726f8bdfa..fb2433175a3d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2658,13 +2658,13 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine) ring_hung = false; } - if (ring_hung) + if (ring_hung) { i915_gem_context_mark_guilty(hung_ctx); - else + reset_request(request); + } else { i915_gem_context_mark_innocent(hung_ctx); - - if (!ring_hung) return; + } DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n", engine->name, request->global_seqno); -- cgit v1.2.3-55-g7522 From c0d5f32c5043e8568ffdbcff60dfba26f408fa10 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 10 Jan 2017 17:22:43 +0000 Subject: drm/i915: Set guilty-flag on fence after detecting a hang The struct dma_fence carries a status field exposed to userspace by sync_file. This is inspected after the fence is signaled and can convey whether or not the request completed successfully, or in our case if we detected a hang during the request (signaled via -EIO in SYNC_IOC_FILE_INFO). v2: Mark all cancelled requests as failed. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170110172246.27297-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index fb2433175a3d..324a49813668 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2626,6 +2626,8 @@ static void reset_request(struct drm_i915_gem_request *request) head = 0; } memset(vaddr + head, 0, request->postfix - head); + + dma_fence_set_error(&request->fence, -EIO); } void i915_gem_reset_prepare(struct drm_i915_private *dev_priv) -- cgit v1.2.3-55-g7522 From 3c1b284759876785e31335d0f2278bfd507a1862 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 10 Jan 2017 17:22:44 +0000 Subject: drm/i915: Set an error status for a resubmitted request Let userspace know if its request was resubmitted due to it being executed at the time of a global reset. In this case, the reset was for a guilty request on another engine, and this request was an innocent victim that will be re-executed upon restarting. However, since it was running at the time of the reset, we can not guarantee that it suffered no ill-effects from the reset (e.g. some context state may be lost, or some self-modifying fragment shaders will be restarted from the final state not their initial state), to let userspace know that it has been corrupted set a special value on the fence->error, -EAGAIN. If the request does hang on resubmission, the error will be overwritten with -EIO. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170110172246.27297-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 324a49813668..94ad9eb83a5c 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2665,6 +2665,7 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine) reset_request(request); } else { i915_gem_context_mark_innocent(hung_ctx); + dma_fence_set_error(&request->fence, -EAGAIN); return; } -- cgit v1.2.3-55-g7522 From 3cd9442f66dc116089c5ac6257cf51652d8112bd Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 10 Jan 2017 17:22:45 +0000 Subject: drm/i915: Mark all incomplete requests as -EIO when wedged Similarly to a normal reset, after we mark the GPU as wedged (completely fubar and no more requests can be executed), set the error status on all the in flight requests. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170110172246.27297-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 94ad9eb83a5c..7f73a35c7725 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2730,12 +2730,16 @@ void i915_gem_reset_finish(struct drm_i915_private *dev_priv) static void nop_submit_request(struct drm_i915_gem_request *request) { + dma_fence_set_error(&request->fence, -EIO); i915_gem_request_submit(request); intel_engine_init_global_seqno(request->engine, request->global_seqno); } static void i915_gem_cleanup_engine(struct intel_engine_cs *engine) { + struct drm_i915_gem_request *request; + unsigned long flags; + /* We need to be sure that no thread is running the old callback as * we install the nop handler (otherwise we would submit a request * to hardware that will never complete). In order to prevent this @@ -2744,6 +2748,12 @@ static void i915_gem_cleanup_engine(struct intel_engine_cs *engine) */ engine->submit_request = nop_submit_request; + /* Mark all executing requests as skipped */ + spin_lock_irqsave(&engine->timeline->lock, flags); + list_for_each_entry(request, &engine->timeline->requests, link) + dma_fence_set_error(&request->fence, -EIO); + spin_unlock_irqrestore(&engine->timeline->lock, flags); + /* Mark all pending requests as complete so that any concurrent * (lockless) lookup doesn't try and wait upon the request as we * reset it. -- cgit v1.2.3-55-g7522 From 2a20d6f8582d6783bd2e2ef9c7536f1b8a6c2fbe Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 10 Jan 2017 17:22:46 +0000 Subject: drm/i915: Rename i915_gem_engine_cleanup() to engine_set_wedged() It has been some time since i915_gem_engine_cleanup was only called from the module unload path, and now it is only called when the GPU is wedged. Mika complained that the name is confusing, especially in light of the existence of i915_gem_cleanup_engines(). Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170110172246.27297-5-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 7f73a35c7725..e5d96de61c14 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2735,7 +2735,7 @@ static void nop_submit_request(struct drm_i915_gem_request *request) intel_engine_init_global_seqno(request->engine, request->global_seqno); } -static void i915_gem_cleanup_engine(struct intel_engine_cs *engine) +static void engine_set_wedged(struct intel_engine_cs *engine) { struct drm_i915_gem_request *request; unsigned long flags; @@ -2789,7 +2789,7 @@ static int __i915_gem_set_wedged_BKL(void *data) enum intel_engine_id id; for_each_engine(engine, i915, id) - i915_gem_cleanup_engine(engine); + engine_set_wedged(engine); return 0; } -- cgit v1.2.3-55-g7522 From f51455d442c0fa97e4600960f19bf23b66f0b386 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 10 Jan 2017 14:47:34 +0000 Subject: drm/i915: Replace 4096 with PAGE_SIZE or I915_GTT_PAGE_SIZE Start converting over from the byte count to its semantic macro, either we want to allocate the size of a physical page in main memory or we want the size of a virtual page in the GTT. 4096 could mean either, but PAGE_SIZE and I915_GTT_PAGE_SIZE are explicit and should help improve code comprehension and future changes. In the future, we may want to use variable GTT page sizes and so have the challenge of knowing which hardcoded values were used to represent a physical page vs the virtual page. v2: Look for a few more 4096s to convert, discover IS_ALIGNED(). v3: 4096ul paranoia, make fence alignment a distinct value of 4096, keep bdw stolen w/a as 4096 until we know better. v4: Add asserts that i915_vma_insert() start/end are aligned to GTT page sizes. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170110144734.26052-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem.c | 2 +- drivers/gpu/drm/i915/i915_gem_context.c | 7 ++++--- drivers/gpu/drm/i915/i915_gem_evict.c | 4 ++-- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 5 ++--- drivers/gpu/drm/i915/i915_gem_fence_reg.c | 12 ++++++------ drivers/gpu/drm/i915/i915_gem_fence_reg.h | 2 ++ drivers/gpu/drm/i915/i915_gem_gtt.c | 10 +++++----- drivers/gpu/drm/i915/i915_gem_gtt.h | 5 ++++- drivers/gpu/drm/i915/i915_gem_render_state.c | 4 ++-- drivers/gpu/drm/i915/i915_gem_stolen.c | 5 +++-- drivers/gpu/drm/i915/i915_gem_tiling.c | 13 ++++++++----- drivers/gpu/drm/i915/i915_vma.c | 21 ++++++++++++++------- drivers/gpu/drm/i915/intel_lrc.c | 5 +++-- drivers/gpu/drm/i915/intel_lrc.h | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 10 +++++----- 15 files changed, 62 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e5d96de61c14..3bf517e2430a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3496,7 +3496,7 @@ i915_gem_object_unpin_from_display_plane(struct i915_vma *vma) return; if (--vma->obj->pin_display == 0) - vma->display_alignment = 4096; + vma->display_alignment = I915_GTT_MIN_ALIGNMENT; /* Bump the LRU to try and avoid premature eviction whilst flipping */ if (!i915_vma_is_active(vma)) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 40a6939e3956..ed31133b3ce3 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -97,7 +97,7 @@ * part. It should be safe to decrease this, but it's more future proof as is. */ #define GEN6_CONTEXT_ALIGN (64<<10) -#define GEN7_CONTEXT_ALIGN 4096 +#define GEN7_CONTEXT_ALIGN I915_GTT_MIN_ALIGNMENT static size_t get_context_alignment(struct drm_i915_private *dev_priv) { @@ -341,7 +341,7 @@ __create_hw_context(struct drm_i915_private *dev_priv, if (HAS_GUC(dev_priv) && i915.enable_guc_loading) ctx->ggtt_offset_bias = GUC_WOPCM_TOP; else - ctx->ggtt_offset_bias = 4096; + ctx->ggtt_offset_bias = I915_GTT_PAGE_SIZE; return ctx; @@ -456,7 +456,8 @@ int i915_gem_context_init(struct drm_i915_private *dev_priv) dev_priv->hw_context_size = 0; } else if (HAS_HW_CONTEXTS(dev_priv)) { dev_priv->hw_context_size = - round_up(get_context_size(dev_priv), 4096); + round_up(get_context_size(dev_priv), + I915_GTT_PAGE_SIZE); if (dev_priv->hw_context_size > (1<<20)) { DRM_DEBUG_DRIVER("Disabling HW Contexts; invalid size %d\n", dev_priv->hw_context_size); diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 026ebc5a452a..6a5415e31acf 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -264,9 +264,9 @@ int i915_gem_evict_for_vma(struct i915_vma *target, unsigned int flags) if (check_color) { /* Expand search to cover neighbouring guard pages (or lack!) */ if (start > target->vm->start) - start -= 4096; + start -= I915_GTT_PAGE_SIZE; if (end < target->vm->start + target->vm->total) - end += 4096; + end += I915_GTT_PAGE_SIZE; } drm_mm_for_each_node_in_range(node, &target->vm->mm, start, end) { diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index a5fe299da1d3..259fe4aa8d41 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -438,7 +438,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, memset(&cache->node, 0, sizeof(cache->node)); ret = drm_mm_insert_node_in_range_generic (&ggtt->base.mm, &cache->node, - 4096, 0, I915_COLOR_UNEVICTABLE, + PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE, 0, ggtt->mappable_end, DRM_MM_SEARCH_DEFAULT, DRM_MM_CREATE_DEFAULT); @@ -851,8 +851,7 @@ eb_vma_misplaced(struct i915_vma *vma) WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP && !i915_vma_is_ggtt(vma)); - if (entry->alignment && - vma->node.start & (entry->alignment - 1)) + if (entry->alignment && !IS_ALIGNED(vma->node.start, entry->alignment)) return true; if (vma->node.size < entry->pad_to_size) diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c index 9e65696a960c..fadbe8f4c745 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c @@ -80,11 +80,11 @@ static void i965_write_fence_reg(struct drm_i915_fence_reg *fence, unsigned int stride = i915_gem_object_get_stride(vma->obj); GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma)); - GEM_BUG_ON(vma->node.start & 4095); - GEM_BUG_ON(vma->fence_size & 4095); - GEM_BUG_ON(stride & 127); + GEM_BUG_ON(!IS_ALIGNED(vma->node.start, I965_FENCE_PAGE)); + GEM_BUG_ON(!IS_ALIGNED(vma->fence_size, I965_FENCE_PAGE)); + GEM_BUG_ON(!IS_ALIGNED(stride, 128)); - val = (vma->node.start + vma->fence_size - 4096) << 32; + val = (vma->node.start + vma->fence_size - I965_FENCE_PAGE) << 32; val |= vma->node.start; val |= (u64)((stride / 128) - 1) << fence_pitch_shift; if (i915_gem_object_get_tiling(vma->obj) == I915_TILING_Y) @@ -127,7 +127,7 @@ static void i915_write_fence_reg(struct drm_i915_fence_reg *fence, GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma)); GEM_BUG_ON(vma->node.start & ~I915_FENCE_START_MASK); GEM_BUG_ON(!is_power_of_2(vma->fence_size)); - GEM_BUG_ON(vma->node.start & (vma->fence_size - 1)); + GEM_BUG_ON(!IS_ALIGNED(vma->node.start, vma->fence_size)); if (is_y_tiled && HAS_128_BYTE_Y_TILING(fence->i915)) stride /= 128; @@ -166,7 +166,7 @@ static void i830_write_fence_reg(struct drm_i915_fence_reg *fence, GEM_BUG_ON(vma->node.start & ~I830_FENCE_START_MASK); GEM_BUG_ON(!is_power_of_2(vma->fence_size)); GEM_BUG_ON(!is_power_of_2(stride / 128)); - GEM_BUG_ON(vma->node.start & (vma->fence_size - 1)); + GEM_BUG_ON(!IS_ALIGNED(vma->node.start, vma->fence_size)); val = vma->node.start; if (i915_gem_object_get_tiling(vma->obj) == I915_TILING_Y) diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.h b/drivers/gpu/drm/i915/i915_gem_fence_reg.h index 22c4a2d01adf..99a31ded4dfd 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.h +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.h @@ -30,6 +30,8 @@ struct drm_i915_private; struct i915_vma; +#define I965_FENCE_PAGE 4096UL + struct drm_i915_fence_reg { struct list_head link; struct drm_i915_private *i915; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 2d7ab1d35e47..8aca11f5f446 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -329,7 +329,7 @@ static int __setup_page_dma(struct drm_i915_private *dev_priv, return -ENOMEM; p->daddr = dma_map_page(kdev, - p->page, 0, 4096, PCI_DMA_BIDIRECTIONAL); + p->page, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); if (dma_mapping_error(kdev, p->daddr)) { __free_page(p->page); @@ -353,7 +353,7 @@ static void cleanup_page_dma(struct drm_i915_private *dev_priv, if (WARN_ON(!p->page)) return; - dma_unmap_page(&pdev->dev, p->daddr, 4096, PCI_DMA_BIDIRECTIONAL); + dma_unmap_page(&pdev->dev, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); __free_page(p->page); memset(p, 0, sizeof(*p)); } @@ -2711,11 +2711,11 @@ static void i915_gtt_color_adjust(const struct drm_mm_node *node, u64 *end) { if (node->color != color) - *start += 4096; + *start += I915_GTT_PAGE_SIZE; node = list_next_entry(node, node_list); if (node->allocated && node->color != color) - *end -= 4096; + *end -= I915_GTT_PAGE_SIZE; } int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) @@ -2742,7 +2742,7 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) /* Reserve a mappable slot for our lockless error capture */ ret = drm_mm_insert_node_in_range_generic(&ggtt->base.mm, &ggtt->error_capture, - 4096, 0, + PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE, 0, ggtt->mappable_end, 0, 0); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 9e91d7e6149c..34a4fd560fa2 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -40,6 +40,9 @@ #include "i915_gem_timeline.h" #include "i915_gem_request.h" +#define I915_GTT_PAGE_SIZE 4096UL +#define I915_GTT_MIN_ALIGNMENT I915_GTT_PAGE_SIZE + #define I915_FENCE_REG_NONE -1 #define I915_MAX_NUM_FENCES 32 /* 32 fences + sign bit for FENCE_REG_NONE */ @@ -543,6 +546,6 @@ void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj, #define PIN_HIGH BIT(9) #define PIN_OFFSET_BIAS BIT(10) #define PIN_OFFSET_FIXED BIT(11) -#define PIN_OFFSET_MASK (~4095) +#define PIN_OFFSET_MASK (-I915_GTT_PAGE_SIZE) #endif diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index 5af19b0bf713..63ae7e813335 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -187,14 +187,14 @@ int i915_gem_render_state_init(struct intel_engine_cs *engine) if (!rodata) return 0; - if (rodata->batch_items * 4 > 4096) + if (rodata->batch_items * 4 > PAGE_SIZE) return -EINVAL; so = kmalloc(sizeof(*so), GFP_KERNEL); if (!so) return -ENOMEM; - obj = i915_gem_object_create_internal(engine->i915, 4096); + obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE); if (IS_ERR(obj)) { ret = PTR_ERR(obj); goto err_free; diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index f1a1d33febcd..e5be8e04bf3b 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -647,8 +647,9 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv stolen_offset, gtt_offset, size); /* KISS and expect everything to be page-aligned */ - if (WARN_ON(size == 0) || WARN_ON(size & 4095) || - WARN_ON(stolen_offset & 4095)) + if (WARN_ON(size == 0) || + WARN_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)) || + WARN_ON(!IS_ALIGNED(stolen_offset, I915_GTT_MIN_ALIGNMENT))) return NULL; stolen = kzalloc(sizeof(*stolen), GFP_KERNEL); diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 4f83e331f598..b1361cfd4c5c 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -82,7 +82,7 @@ u32 i915_gem_fence_size(struct drm_i915_private *i915, if (INTEL_GEN(i915) >= 4) { stride *= i915_gem_tile_height(tiling); - GEM_BUG_ON(stride & 4095); + GEM_BUG_ON(!IS_ALIGNED(stride, I965_FENCE_PAGE)); return roundup(size, stride); } @@ -117,8 +117,11 @@ u32 i915_gem_fence_alignment(struct drm_i915_private *i915, u32 size, * Minimum alignment is 4k (GTT page size), but might be greater * if a fence register is needed for the object. */ - if (INTEL_GEN(i915) >= 4 || tiling == I915_TILING_NONE) - return 4096; + if (tiling == I915_TILING_NONE) + return I915_GTT_MIN_ALIGNMENT; + + if (INTEL_GEN(i915) >= 4) + return I965_FENCE_PAGE; /* * Previous chips need to be aligned to the size of the smallest @@ -170,7 +173,7 @@ i915_tiling_ok(struct drm_i915_gem_object *obj, else tile_width = 512; - if (stride & (tile_width - 1)) + if (!IS_ALIGNED(stride, tile_width)) return false; /* 965+ just needs multiples of tile width */ @@ -195,7 +198,7 @@ static bool i915_vma_fence_prepare(struct i915_vma *vma, return false; alignment = i915_gem_fence_alignment(i915, vma->size, tiling_mode, stride); - if (vma->node.start & (alignment - 1)) + if (!IS_ALIGNED(vma->node.start, alignment)) return false; return true; diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index f137475fab51..490914f89663 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -91,7 +91,7 @@ __i915_vma_create(struct drm_i915_gem_object *obj, vma->vm = vm; vma->obj = obj; vma->size = obj->base.size; - vma->display_alignment = 4096; + vma->display_alignment = I915_GTT_MIN_ALIGNMENT; if (view) { vma->ggtt_view = *view; @@ -115,7 +115,7 @@ __i915_vma_create(struct drm_i915_gem_object *obj, vma->fence_size = i915_gem_fence_size(vm->i915, vma->size, i915_gem_object_get_tiling(obj), i915_gem_object_get_stride(obj)); - GEM_BUG_ON(vma->fence_size & 4095); + GEM_BUG_ON(!IS_ALIGNED(vma->fence_size, I915_GTT_MIN_ALIGNMENT)); vma->fence_alignment = i915_gem_fence_alignment(vm->i915, vma->size, i915_gem_object_get_tiling(obj), @@ -270,7 +270,8 @@ i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) if (vma->node.size < size) return true; - if (alignment && vma->node.start & (alignment - 1)) + GEM_BUG_ON(alignment && !is_power_of_2(alignment)); + if (alignment && !IS_ALIGNED(vma->node.start, alignment)) return true; if (flags & PIN_MAPPABLE && !i915_vma_is_map_and_fenceable(vma)) @@ -302,7 +303,7 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) return; fenceable = (vma->node.size >= vma->fence_size && - (vma->node.start & (vma->fence_alignment - 1)) == 0); + IS_ALIGNED(vma->node.start, vma->fence_alignment)); mappable = vma->node.start + vma->fence_size <= i915_vm_to_ggtt(vma->vm)->mappable_end; @@ -380,13 +381,19 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) alignment, vma->fence_alignment); } + GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)); + GEM_BUG_ON(!IS_ALIGNED(alignment, I915_GTT_MIN_ALIGNMENT)); + GEM_BUG_ON(!is_power_of_2(alignment)); + start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; + GEM_BUG_ON(!IS_ALIGNED(start, I915_GTT_PAGE_SIZE)); end = vma->vm->total; if (flags & PIN_MAPPABLE) end = min_t(u64, end, dev_priv->ggtt.mappable_end); if (flags & PIN_ZONE_4G) - end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE); + end = min_t(u64, end, (1ULL << 32) - I915_GTT_PAGE_SIZE); + GEM_BUG_ON(!IS_ALIGNED(end, I915_GTT_PAGE_SIZE)); /* If binding the object/GGTT view requires more space than the entire * aperture has, reject it early before evicting everything in a vain @@ -406,7 +413,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) if (flags & PIN_OFFSET_FIXED) { u64 offset = flags & PIN_OFFSET_MASK; - if (offset & (alignment - 1) || + if (!IS_ALIGNED(offset, alignment) || range_overflows(offset, size, end)) { ret = -EINVAL; goto err_unpin; @@ -440,7 +447,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) * with zero alignment, so where possible use the optimal * path. */ - if (alignment <= 4096) + if (alignment <= I915_GTT_MIN_ALIGNMENT) alignment = 0; search_free: diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 6db246ad2f13..81665a9eb43f 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1927,7 +1927,7 @@ int logical_render_ring_init(struct intel_engine_cs *engine) engine->emit_breadcrumb = gen8_emit_breadcrumb_render; engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_render_sz; - ret = intel_engine_create_scratch(engine, 4096); + ret = intel_engine_create_scratch(engine, PAGE_SIZE); if (ret) return ret; @@ -2209,7 +2209,8 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, WARN_ON(ce->state); - context_size = round_up(intel_lr_context_size(engine), 4096); + context_size = round_up(intel_lr_context_size(engine), + I915_GTT_PAGE_SIZE); /* One extra page as the sharing data between driver and GuC */ context_size += PAGE_SIZE * LRC_PPHWSP_PN; diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 01ba36ea125e..0c852c024227 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -26,7 +26,7 @@ #include "intel_ringbuffer.h" -#define GEN8_LR_CONTEXT_ALIGN 4096 +#define GEN8_LR_CONTEXT_ALIGN I915_GTT_MIN_ALIGNMENT /* Execlists regs */ #define RING_ELSP(engine) _MMIO((engine)->mmio_base + 0x230) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 0971ac396b60..ab83fc22d207 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1736,7 +1736,7 @@ static int init_status_page(struct intel_engine_cs *engine) void *vaddr; int ret; - obj = i915_gem_object_create_internal(engine->i915, 4096); + obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE); if (IS_ERR(obj)) { DRM_ERROR("Failed to allocate status page\n"); return PTR_ERR(obj); @@ -1777,7 +1777,7 @@ static int init_status_page(struct intel_engine_cs *engine) engine->status_page.vma = vma; engine->status_page.ggtt_offset = i915_ggtt_offset(vma); - engine->status_page.page_addr = memset(vaddr, 0, 4096); + engine->status_page.page_addr = memset(vaddr, 0, PAGE_SIZE); DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n", engine->name, i915_ggtt_offset(vma)); @@ -2049,7 +2049,7 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine) } /* Ring wraparound at offset 0 sometimes hangs. No idea why. */ - ret = intel_ring_pin(ring, 4096); + ret = intel_ring_pin(ring, I915_GTT_PAGE_SIZE); if (ret) { intel_ring_free(ring); goto error; @@ -2466,7 +2466,7 @@ static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv, if (INTEL_GEN(dev_priv) >= 8 && !dev_priv->semaphore) { struct i915_vma *vma; - obj = i915_gem_object_create(dev_priv, 4096); + obj = i915_gem_object_create(dev_priv, PAGE_SIZE); if (IS_ERR(obj)) goto err; @@ -2683,7 +2683,7 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine) return ret; if (INTEL_GEN(dev_priv) >= 6) { - ret = intel_engine_create_scratch(engine, 4096); + ret = intel_engine_create_scratch(engine, PAGE_SIZE); if (ret) return ret; } else if (HAS_BROKEN_CS_TLB(dev_priv)) { -- cgit v1.2.3-55-g7522 From d0ce9062912062fd1f6fafd35e89aef1b852511b Mon Sep 17 00:00:00 2001 From: Nagaraju, Vathsala Date: Mon, 2 Jan 2017 17:00:54 +0530 Subject: drm : adds Y-coordinate and Colorimetry Format PSR2 vsc revision number hb2( as per table 6-11)is updated to 4 or 5 based on Y cordinate and Colorimetry Format as below 04h = 3D stereo + PSR/PSR2 + Y-coordinate. 05h = -3D stereo- + PSR/PSR2 + Y-coordinate + Pixel Encoding/Colorimetry Format indication. A DP Source device is allowed to indicate the pixel encoding/colorimetry format to the DP Sink device with VSC SDP only when the DP Sink device supports it ( i.e.,VSC_SDP_EXTENSION_FOR_COLORIMETRY_SUPPORTED bit in the DPRX_FEATURE_ENUMERATION_LIST register (DPCD Address 02210h, bit 3; is set to 1). v2: (Jani) - Change DP_PSR_Y_COORDINATE to DP_PSR2_SU_Y_COORDINATE_REQUIRED. - Add DP_PSR2_SU_GRANULARITY_REQUIRED. - Change DPRX_FEATURE_ENUMERATION_LIST to DP_DPRX. - Add GTC_CAP and AV_SYNC_CAP, other bits in DPRX_FEATURE_ENUMERATION_LIST. v3: (Jani) - Add support for bits 7:4 and 1 as per DP v1.4 for DPRX_FEATURE_ENUMERATION_LIST. Cc: Rodrigo Vivi Cc: Jim Bride Signed-off-by: Vathsala Nagaraju Signed-off-by: Patil Deepti Reviewed-by: Jani Nikula Signed-off-by: Rodrigo Vivi Link: http://patchwork.freedesktop.org/patch/msgid/1483356663-32668-2-git-send-email-vathsala.nagaraju@intel.com --- include/drm/drm_dp_helper.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h index 55bbeb0ff594..04681359a6f5 100644 --- a/include/drm/drm_dp_helper.h +++ b/include/drm/drm_dp_helper.h @@ -194,7 +194,8 @@ # define DP_PSR_SETUP_TIME_0 (6 << 1) # define DP_PSR_SETUP_TIME_MASK (7 << 1) # define DP_PSR_SETUP_TIME_SHIFT 1 - +# define DP_PSR2_SU_Y_COORDINATE_REQUIRED (1 << 4) /* eDP 1.4a */ +# define DP_PSR2_SU_GRANULARITY_REQUIRED (1 << 5) /* eDP 1.4b */ /* * 0x80-0x8f describe downstream port capabilities, but there are two layouts * based on whether DP_DETAILED_CAP_INFO_AVAILABLE was set. If it was not, @@ -568,6 +569,16 @@ #define DP_RECEIVER_ALPM_STATUS 0x200b /* eDP 1.4 */ # define DP_ALPM_LOCK_TIMEOUT_ERROR (1 << 0) +#define DP_DPRX_FEATURE_ENUMERATION_LIST 0x2210 /* DP 1.3 */ +# define DP_GTC_CAP (1 << 0) /* DP 1.3 */ +# define DP_SST_SPLIT_SDP_CAP (1 << 1) /* DP 1.4 */ +# define DP_AV_SYNC_CAP (1 << 2) /* DP 1.3 */ +# define DP_VSC_SDP_EXT_FOR_COLORIMETRY_SUPPORTED (1 << 3) /* DP 1.3 */ +# define DP_VSC_EXT_VESA_SDP_SUPPORTED (1 << 4) /* DP 1.4 */ +# define DP_VSC_EXT_VESA_SDP_CHAINING_SUPPORTED (1 << 5) /* DP 1.4 */ +# define DP_VSC_EXT_CEA_SDP_SUPPORTED (1 << 6) /* DP 1.4 */ +# define DP_VSC_EXT_CEA_SDP_CHAINING_SUPPORTED (1 << 7) /* DP 1.4 */ + /* DP 1.2 Sideband message defines */ /* peer device type - DP 1.2a Table 2-92 */ #define DP_PEER_DEVICE_NONE 0x0 -- cgit v1.2.3-55-g7522 From 97da2ef449c44f4fffe02c2a9db6dec706cfa918 Mon Sep 17 00:00:00 2001 From: Nagaraju, Vathsala Date: Mon, 2 Jan 2017 17:00:55 +0530 Subject: drm/i915/psr: program vsc header for psr2 Function hsw_psr_setup handles vsc header setup for psr1 and skl_psr_setup_vsc handles vsc header setup for psr2. Setup VSC header in function skl_psr_setup_vsc for psr2 support, as per edp 1.4 spec, table 6-11:VSC SDP HEADER Extension for psr2 operation. v2: (Jani) - Initialize variables to 0 - intel_dp_get_y_cord_status and intel_dp_get_y_cord_status made static - Correct indentation for continuation lines - Change DP_PSR_Y_COORDINATE to DP_PSR2_SU_Y_COORDINATE_REQUIRED - Change DPRX_FEATURE_ENUMERATION_LIST to DP_DPRX_* - Change VSC_SDP_EXT_FOR_COLORIMETRY_SUPPORTED to DP_VSC_* Cc: Rodrigo Vivi Cc: Jim Bride Signed-off-by: Vathsala Nagaraju Signed-off-by: Patil Deepti Reviewed-by: Jim Bride Signed-off-by: Rodrigo Vivi Link: http://patchwork.freedesktop.org/patch/msgid/1483356663-32668-3-git-send-email-vathsala.nagaraju@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/intel_dp.c | 26 ++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_psr.c | 17 +++++++++++++++-- 3 files changed, 43 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 58c44085a30f..2827dab61edf 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1154,6 +1154,8 @@ struct i915_psr { bool psr2_support; bool aux_frame_sync; bool link_standby; + bool y_cord_support; + bool colorimetry_support; }; enum intel_pch { diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 8c18f723b061..343e1d9fa761 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -3042,6 +3042,24 @@ intel_dp_get_link_status(struct intel_dp *intel_dp, uint8_t link_status[DP_LINK_ DP_LINK_STATUS_SIZE) == DP_LINK_STATUS_SIZE; } +static bool intel_dp_get_y_cord_status(struct intel_dp *intel_dp) +{ + uint8_t psr_caps = 0; + + drm_dp_dpcd_readb(&intel_dp->aux, DP_PSR_CAPS, &psr_caps); + return psr_caps & DP_PSR2_SU_Y_COORDINATE_REQUIRED; +} + +static bool intel_dp_get_colorimetry_status(struct intel_dp *intel_dp) +{ + uint8_t dprx = 0; + + drm_dp_dpcd_readb(&intel_dp->aux, + DP_DPRX_FEATURE_ENUMERATION_LIST, + &dprx); + return dprx & DP_VSC_SDP_EXT_FOR_COLORIMETRY_SUPPORTED; +} + /* These are source-specific values. */ uint8_t intel_dp_voltage_max(struct intel_dp *intel_dp) @@ -3620,6 +3638,14 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp) dev_priv->psr.psr2_support = dev_priv->psr.aux_frame_sync; DRM_DEBUG_KMS("PSR2 %s on sink", dev_priv->psr.psr2_support ? "supported" : "not supported"); + + if (dev_priv->psr.psr2_support) { + dev_priv->psr.y_cord_support = + intel_dp_get_y_cord_status(intel_dp); + dev_priv->psr.colorimetry_support = + intel_dp_get_colorimetry_status(intel_dp); + } + } /* Read the eDP Display control capabilities registers */ diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index 6aca8ff14989..c3aa64959621 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -122,13 +122,26 @@ static void vlv_psr_setup_vsc(struct intel_dp *intel_dp) static void skl_psr_setup_su_vsc(struct intel_dp *intel_dp) { struct edp_vsc_psr psr_vsc; + struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); + struct drm_device *dev = intel_dig_port->base.base.dev; + struct drm_i915_private *dev_priv = to_i915(dev); /* Prepare VSC Header for SU as per EDP 1.4 spec, Table 6.11 */ memset(&psr_vsc, 0, sizeof(psr_vsc)); psr_vsc.sdp_header.HB0 = 0; psr_vsc.sdp_header.HB1 = 0x7; - psr_vsc.sdp_header.HB2 = 0x3; - psr_vsc.sdp_header.HB3 = 0xb; + if (dev_priv->psr.colorimetry_support && + dev_priv->psr.y_cord_support) { + psr_vsc.sdp_header.HB2 = 0x5; + psr_vsc.sdp_header.HB3 = 0x13; + } else if (dev_priv->psr.y_cord_support) { + psr_vsc.sdp_header.HB2 = 0x4; + psr_vsc.sdp_header.HB3 = 0xe; + } else { + psr_vsc.sdp_header.HB2 = 0x3; + psr_vsc.sdp_header.HB3 = 0xc; + } + intel_psr_write_vsc(intel_dp, &psr_vsc); } -- cgit v1.2.3-55-g7522 From acf45d11050abd751dcec986ab121cb2367dcbba Mon Sep 17 00:00:00 2001 From: Nagaraju, Vathsala Date: Tue, 10 Jan 2017 12:32:26 +0530 Subject: drm/i915/psr: disable psr2 for resolution greater than 32X20 PSR2 is restricted to work with panel resolutions upto 3200x2000, move the check to intel_psr_match_conditions and fully block psr. Cc: Rodrigo Vivi Cc: Jim Bride Suggested-by: Rodrigo Vivi Signed-off-by: Vathsala Nagaraju Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: http://patchwork.freedesktop.org/patch/msgid/1484031746-20874-1-git-send-email-vathsala.nagaraju@intel.com --- drivers/gpu/drm/i915/intel_psr.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index c3aa64959621..707cae8dc980 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -400,6 +400,13 @@ static bool intel_psr_match_conditions(struct intel_dp *intel_dp) return false; } + /* PSR2 is restricted to work with panel resolutions upto 3200x2000 */ + if (intel_crtc->config->pipe_src_w > 3200 || + intel_crtc->config->pipe_src_h > 2000) { + dev_priv->psr.psr2_support = false; + return false; + } + dev_priv->psr.source_ok = true; return true; } @@ -438,7 +445,6 @@ void intel_psr_enable(struct intel_dp *intel_dp) struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); struct drm_device *dev = intel_dig_port->base.base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *crtc = to_intel_crtc(intel_dig_port->base.base.crtc); if (!HAS_PSR(dev_priv)) { DRM_DEBUG_KMS("PSR not supported on this platform\n"); @@ -465,12 +471,7 @@ void intel_psr_enable(struct intel_dp *intel_dp) hsw_psr_setup_vsc(intel_dp); if (dev_priv->psr.psr2_support) { - /* PSR2 is restricted to work with panel resolutions upto 3200x2000 */ - if (crtc->config->pipe_src_w > 3200 || - crtc->config->pipe_src_h > 2000) - dev_priv->psr.psr2_support = false; - else - skl_psr_setup_su_vsc(intel_dp); + skl_psr_setup_su_vsc(intel_dp); } /* -- cgit v1.2.3-55-g7522 From e007b19d7ba7424735fd4f17a355b145ae153e4c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 11 Jan 2017 11:23:10 +0000 Subject: drm/i915: Use the MRU stack search after evicting When we evict from the GTT to make room for an object, the hole we create is put onto the MRU stack inside the drm_mm range manager. On the next search pass, we can speed up a PIN_HIGH allocation by referencing that stack for the new hole. v2: Pull together the 3 identical implements (ahem, a couple were outdated) into a common routine for allocating a node and evicting as necessary. v3: Detect invalid calls to i915_gem_gtt_insert() v4: kerneldoc Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170111112312.31493-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gvt/aperture_gm.c | 33 +++------ drivers/gpu/drm/i915/i915_gem_gtt.c | 121 +++++++++++++++++++++++++++------ drivers/gpu/drm/i915/i915_gem_gtt.h | 5 ++ drivers/gpu/drm/i915/i915_vma.c | 40 ++--------- 4 files changed, 119 insertions(+), 80 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/aperture_gm.c b/drivers/gpu/drm/i915/gvt/aperture_gm.c index 7d33b607bc89..1bb7a5b80d47 100644 --- a/drivers/gpu/drm/i915/gvt/aperture_gm.c +++ b/drivers/gpu/drm/i915/gvt/aperture_gm.c @@ -48,47 +48,34 @@ static int alloc_gm(struct intel_vgpu *vgpu, bool high_gm) { struct intel_gvt *gvt = vgpu->gvt; struct drm_i915_private *dev_priv = gvt->dev_priv; - u32 alloc_flag, search_flag; + unsigned int flags; u64 start, end, size; struct drm_mm_node *node; - int retried = 0; int ret; if (high_gm) { - search_flag = DRM_MM_SEARCH_BELOW; - alloc_flag = DRM_MM_CREATE_TOP; node = &vgpu->gm.high_gm_node; size = vgpu_hidden_sz(vgpu); start = gvt_hidden_gmadr_base(gvt); end = gvt_hidden_gmadr_end(gvt); + flags = PIN_HIGH; } else { - search_flag = DRM_MM_SEARCH_DEFAULT; - alloc_flag = DRM_MM_CREATE_DEFAULT; node = &vgpu->gm.low_gm_node; size = vgpu_aperture_sz(vgpu); start = gvt_aperture_gmadr_base(gvt); end = gvt_aperture_gmadr_end(gvt); + flags = PIN_MAPPABLE; } mutex_lock(&dev_priv->drm.struct_mutex); -search_again: - ret = drm_mm_insert_node_in_range_generic(&dev_priv->ggtt.base.mm, - node, size, 4096, - I915_COLOR_UNEVICTABLE, - start, end, search_flag, - alloc_flag); - if (ret) { - ret = i915_gem_evict_something(&dev_priv->ggtt.base, - size, 4096, - I915_COLOR_UNEVICTABLE, - start, end, 0); - if (ret == 0 && ++retried < 3) - goto search_again; - - gvt_err("fail to alloc %s gm space from host, retried %d\n", - high_gm ? "high" : "low", retried); - } + ret = i915_gem_gtt_insert(&dev_priv->ggtt.base, node, + size, 4096, I915_COLOR_UNEVICTABLE, + start, end, flags); mutex_unlock(&dev_priv->drm.struct_mutex); + if (ret) + gvt_err("fail to alloc %s gm space from host\n", + high_gm ? "high" : "low"); + return ret; } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 8aca11f5f446..f0f17b2b7215 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -23,10 +23,13 @@ * */ +#include #include #include + #include #include + #include "i915_drv.h" #include "i915_vgpu.h" #include "i915_trace.h" @@ -2032,7 +2035,6 @@ static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt) struct i915_address_space *vm = &ppgtt->base; struct drm_i915_private *dev_priv = ppgtt->base.i915; struct i915_ggtt *ggtt = &dev_priv->ggtt; - bool retried = false; int ret; /* PPGTT PDEs reside in the GGTT and consists of 512 entries. The @@ -2045,29 +2047,14 @@ static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt) if (ret) return ret; -alloc: - ret = drm_mm_insert_node_in_range_generic(&ggtt->base.mm, &ppgtt->node, - GEN6_PD_SIZE, GEN6_PD_ALIGN, - I915_COLOR_UNEVICTABLE, - 0, ggtt->base.total, - DRM_MM_TOPDOWN); - if (ret == -ENOSPC && !retried) { - ret = i915_gem_evict_something(&ggtt->base, - GEN6_PD_SIZE, GEN6_PD_ALIGN, - I915_COLOR_UNEVICTABLE, - 0, ggtt->base.total, - 0); - if (ret) - goto err_out; - - retried = true; - goto alloc; - } - + ret = i915_gem_gtt_insert(&ggtt->base, &ppgtt->node, + GEN6_PD_SIZE, GEN6_PD_ALIGN, + I915_COLOR_UNEVICTABLE, + 0, ggtt->base.total, + PIN_HIGH); if (ret) goto err_out; - if (ppgtt->node.start < ggtt->mappable_end) DRM_DEBUG("Forced to use aperture for PDEs\n"); @@ -3567,3 +3554,95 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma) return ret; } +/** + * i915_gem_gtt_insert - insert a node into an address_space (GTT) + * @vm - the &struct i915_address_space + * @node - the &struct drm_mm_node (typically i915_vma.node) + * @size - how much space to allocate inside the GTT, + * must be #I915_GTT_PAGE_SIZE aligned + * @alignment - required alignment of starting offset, may be 0 but + * if specified, this must be a power-of-two and at least + * #I915_GTT_MIN_ALIGNMENT + * @color - color to apply to node + * @start - start of any range restriction inside GTT (0 for all), + * must be #I915_GTT_PAGE_SIZE aligned + * @end - end of any range restriction inside GTT (U64_MAX for all), + * must be #I915_GTT_PAGE_SIZE aligned if not U64_MAX + * @flags - control search and eviction behaviour + * + * i915_gem_gtt_insert() first searches for an available hole into which + * is can insert the node. The hole address is aligned to @alignment and + * its @size must then fit entirely within the [@start, @end] bounds. The + * nodes on either side of the hole must match @color, or else a guard page + * will be inserted between the two nodes (or the node evicted). If no + * suitable hole is found, then the LRU list of objects within the GTT + * is scanned to find the first set of replacement nodes to create the hole. + * Those old overlapping nodes are evicted from the GTT (and so must be + * rebound before any future use). Any node that is currently pinned cannot + * be evicted (see i915_vma_pin()). Similar if the node's VMA is currently + * active and #PIN_NONBLOCK is specified, that node is also skipped when + * searching for an eviction candidate. See i915_gem_evict_something() for + * the gory details on the eviction algorithm. + * + * Returns: 0 on success, -ENOSPC if no suitable hole is found, -EINTR if + * asked to wait for eviction and interrupted. + */ +int i915_gem_gtt_insert(struct i915_address_space *vm, + struct drm_mm_node *node, + u64 size, u64 alignment, unsigned long color, + u64 start, u64 end, unsigned int flags) +{ + u32 search_flag, alloc_flag; + int err; + + lockdep_assert_held(&vm->i915->drm.struct_mutex); + GEM_BUG_ON(!size); + GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)); + GEM_BUG_ON(alignment && !is_power_of_2(alignment)); + GEM_BUG_ON(alignment && !IS_ALIGNED(alignment, I915_GTT_MIN_ALIGNMENT)); + GEM_BUG_ON(start >= end); + GEM_BUG_ON(start > 0 && !IS_ALIGNED(start, I915_GTT_PAGE_SIZE)); + GEM_BUG_ON(end < U64_MAX && !IS_ALIGNED(end, I915_GTT_PAGE_SIZE)); + + if (unlikely(range_overflows(start, size, end))) + return -ENOSPC; + + if (unlikely(round_up(start, alignment) > round_down(end - size, alignment))) + return -ENOSPC; + + if (flags & PIN_HIGH) { + search_flag = DRM_MM_SEARCH_BELOW; + alloc_flag = DRM_MM_CREATE_TOP; + } else { + search_flag = DRM_MM_SEARCH_DEFAULT; + alloc_flag = DRM_MM_CREATE_DEFAULT; + } + + /* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks, + * so we know that we always have a minimum alignment of 4096. + * The drm_mm range manager is optimised to return results + * with zero alignment, so where possible use the optimal + * path. + */ + BUILD_BUG_ON(I915_GTT_MIN_ALIGNMENT > I915_GTT_PAGE_SIZE); + if (alignment <= I915_GTT_MIN_ALIGNMENT) + alignment = 0; + + err = drm_mm_insert_node_in_range_generic(&vm->mm, node, + size, alignment, color, + start, end, + search_flag, alloc_flag); + if (err != -ENOSPC) + return err; + + err = i915_gem_evict_something(vm, size, alignment, color, + start, end, flags); + if (err) + return err; + + search_flag = DRM_MM_SEARCH_DEFAULT; + return drm_mm_insert_node_in_range_generic(&vm->mm, node, + size, alignment, color, + start, end, + search_flag, alloc_flag); +} diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 34a4fd560fa2..79198352a491 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -532,6 +532,11 @@ int __must_check i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj, void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj, struct sg_table *pages); +int i915_gem_gtt_insert(struct i915_address_space *vm, + struct drm_mm_node *node, + u64 size, u64 alignment, unsigned long color, + u64 start, u64 end, unsigned int flags); + /* Flags used by pin/bind&friends. */ #define PIN_NONBLOCK BIT(0) #define PIN_MAPPABLE BIT(1) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 490914f89663..df3750d4c907 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -431,43 +431,11 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) goto err_unpin; } } else { - u32 search_flag, alloc_flag; - - if (flags & PIN_HIGH) { - search_flag = DRM_MM_SEARCH_BELOW; - alloc_flag = DRM_MM_CREATE_TOP; - } else { - search_flag = DRM_MM_SEARCH_DEFAULT; - alloc_flag = DRM_MM_CREATE_DEFAULT; - } - - /* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks, - * so we know that we always have a minimum alignment of 4096. - * The drm_mm range manager is optimised to return results - * with zero alignment, so where possible use the optimal - * path. - */ - if (alignment <= I915_GTT_MIN_ALIGNMENT) - alignment = 0; - -search_free: - ret = drm_mm_insert_node_in_range_generic(&vma->vm->mm, - &vma->node, - size, alignment, - obj->cache_level, - start, end, - search_flag, - alloc_flag); - if (ret) { - ret = i915_gem_evict_something(vma->vm, size, alignment, - obj->cache_level, - start, end, - flags); - if (ret == 0) - goto search_free; - + ret = i915_gem_gtt_insert(vma->vm, &vma->node, + size, alignment, obj->cache_level, + start, end, flags); + if (ret) goto err_unpin; - } GEM_BUG_ON(vma->node.start < start); GEM_BUG_ON(vma->node.start + vma->node.size > end); -- cgit v1.2.3-55-g7522 From 625d988acc28f3fe1d44f3798426561c17387a59 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 11 Jan 2017 11:23:11 +0000 Subject: drm/i915: Extract reserving space in the GTT to a helper Extract drm_mm_reserve_node + calling i915_gem_evict_for_node into its own routine so that it can be shared rather than duplicated. v2: Kerneldoc Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: igvt-g-dev@lists.01.org Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170111112312.31493-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 5 ++-- drivers/gpu/drm/i915/i915_gem_evict.c | 33 +++++++++++---------- drivers/gpu/drm/i915/i915_gem_gtt.c | 52 ++++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_gem_gtt.h | 5 ++++ drivers/gpu/drm/i915/i915_gem_stolen.c | 7 ++--- drivers/gpu/drm/i915/i915_trace.h | 16 +++++------ drivers/gpu/drm/i915/i915_vgpu.c | 33 ++++++++------------- drivers/gpu/drm/i915/i915_vma.c | 16 ++++------- 8 files changed, 106 insertions(+), 61 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 2827dab61edf..e9b4ece689d0 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3469,8 +3469,9 @@ int __must_check i915_gem_evict_something(struct i915_address_space *vm, unsigned cache_level, u64 start, u64 end, unsigned flags); -int __must_check i915_gem_evict_for_vma(struct i915_vma *vma, - unsigned int flags); +int __must_check i915_gem_evict_for_node(struct i915_address_space *vm, + struct drm_mm_node *node, + unsigned int flags); int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle); /* belongs in i915_gem_gtt.h */ diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 6a5415e31acf..50b4645bf627 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -231,7 +231,8 @@ found: /** * i915_gem_evict_for_vma - Evict vmas to make room for binding a new one - * @target: address space and range to evict for + * @vm: address space to evict from + * @target: range (and color) to evict for * @flags: additional flags to control the eviction algorithm * * This function will try to evict vmas that overlap the target node. @@ -239,18 +240,20 @@ found: * To clarify: This is for freeing up virtual address space, not for freeing * memory in e.g. the shrinker. */ -int i915_gem_evict_for_vma(struct i915_vma *target, unsigned int flags) +int i915_gem_evict_for_node(struct i915_address_space *vm, + struct drm_mm_node *target, + unsigned int flags) { LIST_HEAD(eviction_list); struct drm_mm_node *node; - u64 start = target->node.start; - u64 end = start + target->node.size; + u64 start = target->start; + u64 end = start + target->size; struct i915_vma *vma, *next; bool check_color; int ret = 0; - lockdep_assert_held(&target->vm->i915->drm.struct_mutex); - trace_i915_gem_evict_vma(target, flags); + lockdep_assert_held(&vm->i915->drm.struct_mutex); + trace_i915_gem_evict_node(vm, target, flags); /* Retire before we search the active list. Although we have * reasonable accuracy in our retirement lists, we may have @@ -258,18 +261,18 @@ int i915_gem_evict_for_vma(struct i915_vma *target, unsigned int flags) * retiring. */ if (!(flags & PIN_NONBLOCK)) - i915_gem_retire_requests(target->vm->i915); + i915_gem_retire_requests(vm->i915); - check_color = target->vm->mm.color_adjust; + check_color = vm->mm.color_adjust; if (check_color) { /* Expand search to cover neighbouring guard pages (or lack!) */ - if (start > target->vm->start) + if (start > vm->start) start -= I915_GTT_PAGE_SIZE; - if (end < target->vm->start + target->vm->total) + if (end < vm->start + vm->total) end += I915_GTT_PAGE_SIZE; } - drm_mm_for_each_node_in_range(node, &target->vm->mm, start, end) { + drm_mm_for_each_node_in_range(node, &vm->mm, start, end) { /* If we find any non-objects (!vma), we cannot evict them */ if (node->color == I915_COLOR_UNEVICTABLE) { ret = -ENOSPC; @@ -285,12 +288,12 @@ int i915_gem_evict_for_vma(struct i915_vma *target, unsigned int flags) * those as well to make room for our guard pages. */ if (check_color) { - if (vma->node.start + vma->node.size == target->node.start) { - if (vma->node.color == target->node.color) + if (vma->node.start + vma->node.size == node->start) { + if (vma->node.color == node->color) continue; } - if (vma->node.start == target->node.start + target->node.size) { - if (vma->node.color == target->node.color) + if (vma->node.start == node->start + node->size) { + if (vma->node.color == node->color) continue; } } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index f0f17b2b7215..09a5eca494b3 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3554,6 +3554,58 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma) return ret; } +/** + * i915_gem_gtt_reserve - reserve a node in an address_space (GTT) + * @vm - the &struct i915_address_space + * @node - the &struct drm_mm_node (typically i915_vma.mode) + * @size - how much space to allocate inside the GTT, + * must be #I915_GTT_PAGE_SIZE aligned + * @offset - where to insert inside the GTT, + * must be #I915_GTT_MIN_ALIGNMENT aligned, and the node + * (@offset + @size) must fit within the address space + * @color - color to apply to node, if this node is not from a VMA, + * color must be #I915_COLOR_UNEVICTABLE + * @flags - control search and eviction behaviour + * + * i915_gem_gtt_reserve() tries to insert the @node at the exact @offset inside + * the address space (using @size and @color). If the @node does not fit, it + * tries to evict any overlapping nodes from the GTT, including any + * neighbouring nodes if the colors do not match (to ensure guard pages between + * differing domains). See i915_gem_evict_for_node() for the gory details + * on the eviction algorithm. #PIN_NONBLOCK may used to prevent waiting on + * evicting active overlapping objects, and any overlapping node that is pinned + * or marked as unevictable will also result in failure. + * + * Returns: 0 on success, -ENOSPC if no suitable hole is found, -EINTR if + * asked to wait for eviction and interrupted. + */ +int i915_gem_gtt_reserve(struct i915_address_space *vm, + struct drm_mm_node *node, + u64 size, u64 offset, unsigned long color, + unsigned int flags) +{ + int err; + + GEM_BUG_ON(!size); + GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)); + GEM_BUG_ON(!IS_ALIGNED(offset, I915_GTT_MIN_ALIGNMENT)); + GEM_BUG_ON(range_overflows(offset, size, vm->total)); + + node->size = size; + node->start = offset; + node->color = color; + + err = drm_mm_reserve_node(&vm->mm, node); + if (err != -ENOSPC) + return err; + + err = i915_gem_evict_for_node(vm, node, flags); + if (err == 0) + err = drm_mm_reserve_node(&vm->mm, node); + + return err; +} + /** * i915_gem_gtt_insert - insert a node into an address_space (GTT) * @vm - the &struct i915_address_space diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 79198352a491..3e031a057f78 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -532,6 +532,11 @@ int __must_check i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj, void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj, struct sg_table *pages); +int i915_gem_gtt_reserve(struct i915_address_space *vm, + struct drm_mm_node *node, + u64 size, u64 offset, unsigned long color, + unsigned int flags); + int i915_gem_gtt_insert(struct i915_address_space *vm, struct drm_mm_node *node, u64 size, u64 alignment, unsigned long color, diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index e5be8e04bf3b..52dbb9bab268 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -694,10 +694,9 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv * setting up the GTT space. The actual reservation will occur * later. */ - vma->node.start = gtt_offset; - vma->node.size = size; - - ret = drm_mm_reserve_node(&ggtt->base.mm, &vma->node); + ret = i915_gem_gtt_reserve(&ggtt->base, &vma->node, + size, gtt_offset, obj->cache_level, + 0); if (ret) { DRM_DEBUG_KMS("failed to allocate stolen GTT space\n"); goto err_pages; diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 18ae37c411fd..4461df5a94fe 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -450,9 +450,9 @@ TRACE_EVENT(i915_gem_evict_vm, TP_printk("dev=%d, vm=%p", __entry->dev, __entry->vm) ); -TRACE_EVENT(i915_gem_evict_vma, - TP_PROTO(struct i915_vma *vma, unsigned int flags), - TP_ARGS(vma, flags), +TRACE_EVENT(i915_gem_evict_node, + TP_PROTO(struct i915_address_space *vm, struct drm_mm_node *node, unsigned int flags), + TP_ARGS(vm, node, flags), TP_STRUCT__entry( __field(u32, dev) @@ -464,11 +464,11 @@ TRACE_EVENT(i915_gem_evict_vma, ), TP_fast_assign( - __entry->dev = vma->vm->i915->drm.primary->index; - __entry->vm = vma->vm; - __entry->start = vma->node.start; - __entry->size = vma->node.size; - __entry->color = vma->node.color; + __entry->dev = vm->i915->drm.primary->index; + __entry->vm = vm; + __entry->start = node->start; + __entry->size = node->size; + __entry->color = node->color; __entry->flags = flags; ), diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c index dae340cfc6c7..f1ad4fbb5ba7 100644 --- a/drivers/gpu/drm/i915/i915_vgpu.c +++ b/drivers/gpu/drm/i915/i915_vgpu.c @@ -116,22 +116,20 @@ void intel_vgt_deballoon(struct drm_i915_private *dev_priv) memset(&bl_info, 0, sizeof(bl_info)); } -static int vgt_balloon_space(struct drm_mm *mm, +static int vgt_balloon_space(struct i915_ggtt *ggtt, struct drm_mm_node *node, unsigned long start, unsigned long end) { unsigned long size = end - start; - if (start == end) + if (start <= end) return -EINVAL; DRM_INFO("balloon space: range [ 0x%lx - 0x%lx ] %lu KiB.\n", start, end, size / 1024); - - node->start = start; - node->size = size; - - return drm_mm_reserve_node(mm, node); + return i915_gem_gtt_reserve(&ggtt->base, node, + size, start, I915_COLOR_UNEVICTABLE, + 0); } /** @@ -214,10 +212,8 @@ int intel_vgt_balloon(struct drm_i915_private *dev_priv) /* Unmappable graphic memory ballooning */ if (unmappable_base > ggtt->mappable_end) { - ret = vgt_balloon_space(&ggtt->base.mm, - &bl_info.space[2], - ggtt->mappable_end, - unmappable_base); + ret = vgt_balloon_space(ggtt, &bl_info.space[2], + ggtt->mappable_end, unmappable_base); if (ret) goto err; @@ -228,18 +224,15 @@ int intel_vgt_balloon(struct drm_i915_private *dev_priv) * because it is reserved to the guard page. */ if (unmappable_end < ggtt_end - PAGE_SIZE) { - ret = vgt_balloon_space(&ggtt->base.mm, - &bl_info.space[3], - unmappable_end, - ggtt_end - PAGE_SIZE); + ret = vgt_balloon_space(ggtt, &bl_info.space[3], + unmappable_end, ggtt_end - PAGE_SIZE); if (ret) goto err; } /* Mappable graphic memory ballooning */ if (mappable_base > ggtt->base.start) { - ret = vgt_balloon_space(&ggtt->base.mm, - &bl_info.space[0], + ret = vgt_balloon_space(ggtt, &bl_info.space[0], ggtt->base.start, mappable_base); if (ret) @@ -247,10 +240,8 @@ int intel_vgt_balloon(struct drm_i915_private *dev_priv) } if (mappable_end < ggtt->mappable_end) { - ret = vgt_balloon_space(&ggtt->base.mm, - &bl_info.space[1], - mappable_end, - ggtt->mappable_end); + ret = vgt_balloon_space(ggtt, &bl_info.space[1], + mappable_end, ggtt->mappable_end); if (ret) goto err; diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index df3750d4c907..b74eeb73ae41 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -419,17 +419,11 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) goto err_unpin; } - vma->node.start = offset; - vma->node.size = size; - vma->node.color = obj->cache_level; - ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node); - if (ret) { - ret = i915_gem_evict_for_vma(vma, flags); - if (ret == 0) - ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node); - if (ret) - goto err_unpin; - } + ret = i915_gem_gtt_reserve(vma->vm, &vma->node, + size, offset, obj->cache_level, + flags); + if (ret) + goto err_unpin; } else { ret = i915_gem_gtt_insert(vma->vm, &vma->node, size, alignment, obj->cache_level, -- cgit v1.2.3-55-g7522 From 606fec956c0e8c53bbad19c918e43147daa76429 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 11 Jan 2017 11:23:12 +0000 Subject: drm/i915: Prefer random replacement before eviction search Performing an eviction search can be very, very slow especially for a range restricted replacement. For example, a workload like gem_concurrent_blit will populate the entire GTT and then cause aperture thrashing. Since the GTT is a mix of active and inactive tiny objects, we have to search through almost 400k objects before finding anything inside the mappable region, and as this search is required before every operation performance falls off a cliff. Instead of performing the full search, we do a trial replacement of the node at a random location fitting the specified restrictions. We lose the strict LRU property of the GTT in exchange for avoiding the slow search (several orders of runtime improvement for gem_concurrent_blit 4KiB-global-gtt, e.g. from 5000s to 20s). The loss of LRU replacement is (later) mitigated firstly by only doing replacement if we find no freespace and secondly by execbuf doing a PIN_NONBLOCK search first before it starts thrashing (i.e. the random replacement will only occur from the already inactive set of objects). v2: Ascii-art, and check preconditionst v3: Rephrase final sentence in comment to explain why we don't bother with if (i915_is_ggtt(vm)) for preferring random replacement. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170111112312.31493-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 59 ++++++++++++++++++++++++++++++++++++- 1 file changed, 58 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 09a5eca494b3..0ed99adfd0da 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -24,6 +24,7 @@ */ #include +#include #include #include @@ -3606,6 +3607,31 @@ int i915_gem_gtt_reserve(struct i915_address_space *vm, return err; } +static u64 random_offset(u64 start, u64 end, u64 len, u64 align) +{ + u64 range, addr; + + GEM_BUG_ON(range_overflows(start, len, end)); + GEM_BUG_ON(round_up(start, align) > round_down(end - len, align)); + + range = round_down(end - len, align) - round_up(start, align); + if (range) { + if (sizeof(unsigned long) == sizeof(u64)) { + addr = get_random_long(); + } else { + addr = get_random_int(); + if (range > U32_MAX) { + addr <<= 32; + addr |= get_random_int(); + } + } + div64_u64_rem(addr, range, &addr); + start += addr; + } + + return round_up(start, align); +} + /** * i915_gem_gtt_insert - insert a node into an address_space (GTT) * @vm - the &struct i915_address_space @@ -3627,7 +3653,8 @@ int i915_gem_gtt_reserve(struct i915_address_space *vm, * its @size must then fit entirely within the [@start, @end] bounds. The * nodes on either side of the hole must match @color, or else a guard page * will be inserted between the two nodes (or the node evicted). If no - * suitable hole is found, then the LRU list of objects within the GTT + * suitable hole is found, first a victim is randomly selected and tested + * for eviction, otherwise then the LRU list of objects within the GTT * is scanned to find the first set of replacement nodes to create the hole. * Those old overlapping nodes are evicted from the GTT (and so must be * rebound before any future use). Any node that is currently pinned cannot @@ -3645,6 +3672,7 @@ int i915_gem_gtt_insert(struct i915_address_space *vm, u64 start, u64 end, unsigned int flags) { u32 search_flag, alloc_flag; + u64 offset; int err; lockdep_assert_held(&vm->i915->drm.struct_mutex); @@ -3687,6 +3715,35 @@ int i915_gem_gtt_insert(struct i915_address_space *vm, if (err != -ENOSPC) return err; + /* No free space, pick a slot at random. + * + * There is a pathological case here using a GTT shared between + * mmap and GPU (i.e. ggtt/aliasing_ppgtt but not full-ppgtt): + * + * |<-- 256 MiB aperture -->||<-- 1792 MiB unmappable -->| + * (64k objects) (448k objects) + * + * Now imagine that the eviction LRU is ordered top-down (just because + * pathology meets real life), and that we need to evict an object to + * make room inside the aperture. The eviction scan then has to walk + * the 448k list before it finds one within range. And now imagine that + * it has to search for a new hole between every byte inside the memcpy, + * for several simultaneous clients. + * + * On a full-ppgtt system, if we have run out of available space, there + * will be lots and lots of objects in the eviction list! Again, + * searching that LRU list may be slow if we are also applying any + * range restrictions (e.g. restriction to low 4GiB) and so, for + * simplicity and similarilty between different GTT, try the single + * random replacement first. + */ + offset = random_offset(start, end, + size, alignment ?: I915_GTT_MIN_ALIGNMENT); + err = i915_gem_gtt_reserve(vm, node, size, offset, color, flags); + if (err != -ENOSPC) + return err; + + /* Randomly selected placement is pinned, do a search */ err = i915_gem_evict_something(vm, size, alignment, color, start, end, flags); if (err) -- cgit v1.2.3-55-g7522 From c781c978e784c50dcd7cb312fe17f5281923f55b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 11 Jan 2017 14:08:58 +0000 Subject: drm/i915: Add a sanity check that no request is submitted in the middle It is an error to start a new request on the same timeline (ringbuffer) as the current one before the current is submitted. If there are two requests emitting to the ringbuffer at the same time, the operation is undefined. We can catch this by checking for the timeline having a later seqno than ours when we come to submit our request. Currently we have this check at the end of __i915_add_request, but having an early check as well isolates a failure in the caller versus a failure in sealing the request (i.e. from inside __i915_add_request itself). For example, CI is currently tripping over this late assertion on ctg/ilk: [ 100.329399] [IGT] gem_cs_tlb: starting subtest basic-default [ 100.336333] ------------[ cut here ]------------ [ 100.336341] kernel BUG at drivers/gpu/drm/i915/i915_gem_request.c:908! [ 100.336347] invalid opcode: 0000 [#1] PREEMPT SMP [ 100.336351] Modules linked in: snd_hda_intel i915 snd_hda_codec_generic snd_hda_codec snd_hwdep snd_hda_core snd_pcm coretemp mei_me lpc_ich mei e1000e ptp pps_core [last unloaded: i915] [ 100.336373] CPU: 0 PID: 6308 Comm: gem_cs_tlb Tainted: G U 4.10.0-rc3-CI-CI_DRM_2045+ #1 [ 100.336380] Hardware name: LENOVO 7465CTO/7465CTO, BIOS 6DET44WW (2.08 ) 04/22/2009 [ 100.336386] task: ffff88012b738040 task.stack: ffffc90000560000 [ 100.336441] RIP: 0010:__i915_add_request+0x4aa/0x510 [i915] [ 100.336445] RSP: 0018:ffffc90000563ac0 EFLAGS: 00010212 [ 100.336451] RAX: 0000000000005d52 RBX: ffff880133bb84c0 RCX: 0000000000000001 [ 100.336456] RDX: 0000000080000001 RSI: ffff88012b738860 RDI: 00000000ffffffff [ 100.336461] RBP: ffffc90000563b00 R08: ffff880133bb8780 R09: 0000000000000000 [ 100.336466] R10: 0000000000000000 R11: 0000000000000000 R12: ffff88012f53d950 [ 100.336472] R13: ffff88012a2b0af8 R14: ffff88012a5b0008 R15: ffff88012f53d960 [ 100.336477] FS: 00007f0d19da38c0(0000) GS:ffff88013bc00000(0000) knlGS:0000000000000000 [ 100.336483] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 100.336488] CR2: 00007f0d17706000 CR3: 000000012aa3e000 CR4: 00000000000406f0 [ 100.336496] Call Trace: [ 100.336527] i915_gem_switch_to_kernel_context+0x131/0x1b0 [i915] [ 100.336559] i915_gem_evict_vm+0x202/0x2b0 [i915] [ 100.336590] i915_gem_execbuffer_reserve.isra.9+0x3ae/0x440 [i915] [ 100.336623] i915_gem_do_execbuffer.isra.15+0x6d9/0x1b20 [i915] [ 100.336656] i915_gem_execbuffer2+0xc0/0x250 [i915] [ 100.336666] drm_ioctl+0x200/0x450 [ 100.336697] ? i915_gem_execbuffer+0x330/0x330 [i915] [ 100.336708] do_vfs_ioctl+0x90/0x6e0 [ 100.336716] ? up_read+0x1a/0x40 [ 100.336723] ? trace_hardirqs_on_caller+0x122/0x1b0 [ 100.336730] SyS_ioctl+0x3c/0x70 [ 100.336738] entry_SYSCALL_64_fastpath+0x1c/0xb1 [ 100.336745] RIP: 0033:0x7f0d187cb357 [ 100.336750] RSP: 002b:00007ffe0b2f7c28 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [ 100.336761] RAX: ffffffffffffffda RBX: 00007ffe0b2f7d60 RCX: 00007f0d187cb357 [ 100.336768] RDX: 00007ffe0b2f7d00 RSI: 0000000040406469 RDI: 0000000000000003 [ 100.336775] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000022 [ 100.336782] R10: 0000000000000007 R11: 0000000000000246 R12: 0000000000000002 [ 100.336789] R13: 0000000000419101 R14: 00007ffe0b2f7d60 R15: 00007ffe0b2f7d50 [ 100.336797] Code: 5f 74 1e e9 d4 fb ff ff e8 bc 1e 9c e0 e9 ae fb ff ff 4c 89 e7 e8 77 22 fd ff e9 88 fd ff ff 0f 0b e8 a3 1e 9c e0 e9 b1 fb ff ff <0f> 0b 0f 0b e8 fd af ab e0 85 c0 75 c2 48 c7 c2 80 2c 71 a0 be [ 100.336877] RIP: __i915_add_request+0x4aa/0x510 [i915] RSP: ffffc90000563ac0 [ 100.336886] ---[ end trace 22b36545479e5eb7 ]--- Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170111140858.1922-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_request.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 99056b948eda..e614398abe2f 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -851,6 +851,13 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) lockdep_assert_held(&request->i915->drm.struct_mutex); trace_i915_gem_request_add(request); + /* Make sure that no request gazumped us - if it was allocated after + * our i915_gem_request_alloc() and called __i915_add_request() before + * us, the timeline will hold its seqno which is later than ours. + */ + GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno, + request->fence.seqno)); + /* * To ensure that this call will not fail, space for its emissions * should already have been reserved in the ring buffer. Let the ring -- cgit v1.2.3-55-g7522 From 16ee20619f636abc4c72b4215f5cb9cf2f3e1b5b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 11 Jan 2017 18:15:16 +0000 Subject: drm/i915: Detect vma reserved for execbuf in evict-for-node The vma->exec_list is still the only means we have for both reserving an object in execbuf, and for constructing the eviction list. So during the construction of the eviction list, we must treat anything already on the exec_list as being pinned. Yes, this sharing of two semantically different lists will be fixed! But in the meantime, we have the issue that this is tripping up CI since we started using i915_gem_gtt_reserve_node() + i915_gem_evict_for_node() from the regular execbuf reservation path in commit 606fec956c0e ("drm/i915: Prefer random replacement before eviction search"): [ 108.424063] kernel BUG at drivers/gpu/drm/i915/i915_vma.h:254! [ 108.424072] invalid opcode: 0000 [#1] PREEMPT SMP [ 108.424079] Modules linked in: snd_hda_intel i915 intel_powerclamp coretemp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel snd_hda_codec_hdmi snd_hda_codec_generic snd_hda_codec snd_hwdep snd_hda_core mei_me snd_pcm lpc_ich mei sdhci_pci sdhci mmc_core e1000e ptp pps_core [last unloaded: i915] [ 108.424132] CPU: 1 PID: 6865 Comm: gem_cs_tlb Tainted: G U 4.10.0-rc3-CI-CI_DRM_2049+ #1 [ 108.424143] Hardware name: Hewlett-Packard HP EliteBook 8440p/172A, BIOS 68CCU Ver. F.24 09/13/2013 [ 108.424154] task: ffff88012ae22600 task.stack: ffffc90000a14000 [ 108.424220] RIP: 0010:i915_gem_evict_for_node+0x237/0x410 [i915] [ 108.424229] RSP: 0018:ffffc90000a17a58 EFLAGS: 00010202 [ 108.424237] RAX: 0000000000005871 RBX: ffff88012d1ad778 RCX: 0000000000000000 [ 108.424246] RDX: 000000007ffff000 RSI: ffffc90000a17a68 RDI: ffff880127e694d8 [ 108.424255] RBP: ffffc90000a17aa0 R08: ffffc90000a17a68 R09: 0000000000000000 [ 108.424264] R10: 0000000000000001 R11: 0000000000000000 R12: 0000000080000000 [ 108.424273] R13: ffffc90000a17a68 R14: ffff880127e694d8 R15: ffffffffa0387330 [ 108.424283] FS: 00007f8236e3d8c0(0000) GS:ffff880137c40000(0000) knlGS:0000000000000000 [ 108.424293] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 108.424305] CR2: 00007f82347a2000 CR3: 000000012c866000 CR4: 00000000000006e0 [ 108.424317] Call Trace: [ 108.424368] i915_gem_gtt_reserve+0x67/0x80 [i915] [ 108.424424] __i915_vma_do_pin+0x248/0x620 [i915] [ 108.424487] ? __i915_vma_do_pin+0x162/0x620 [i915] [ 108.424540] i915_gem_execbuffer_reserve_vma.isra.8+0x153/0x1f0 [i915] [ 108.424591] i915_gem_execbuffer_reserve.isra.9+0x40e/0x440 [i915] [ 108.424643] i915_gem_do_execbuffer.isra.15+0x6d9/0x1b20 [i915] [ 108.424696] i915_gem_execbuffer2+0xc0/0x250 [i915] [ 108.424712] drm_ioctl+0x200/0x450 [ 108.424760] ? i915_gem_execbuffer+0x330/0x330 [i915] [ 108.424776] do_vfs_ioctl+0x90/0x6e0 [ 108.424789] ? up_read+0x1a/0x40 [ 108.424800] ? trace_hardirqs_on_caller+0x122/0x1b0 [ 108.424813] SyS_ioctl+0x3c/0x70 [ 108.424828] entry_SYSCALL_64_fastpath+0x1c/0xb1 [ 108.424839] RIP: 0033:0x7f8235867357 [ 108.424848] RSP: 002b:00007ffdc14504c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [ 108.424866] RAX: ffffffffffffffda RBX: 00007ffdc1450600 RCX: 00007f8235867357 [ 108.424878] RDX: 00007ffdc14505a0 RSI: 0000000040406469 RDI: 0000000000000003 [ 108.424890] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000022 [ 108.424903] R10: 0000000000000007 R11: 0000000000000246 R12: 0000000000000002 [ 108.424915] R13: 0000000000419101 R14: 00007ffdc1450600 R15: 00007ffdc14505f0 [ 108.424928] Code: 45 b8 8b 4d c0 4c 89 f2 48 89 de ff d0 49 8b 07 4c 8b 45 b8 48 85 c0 75 dd 65 ff 0d d4 a1 c8 5f 0f 84 47 01 00 00 e9 0d fe ff ff <0f> 0b 45 31 f6 4c 8b 65 c8 49 8b 04 24 4d 39 ec 49 8d 9c 24 28 [ 108.425055] RIP: i915_gem_evict_for_node+0x237/0x410 [i915] RSP: ffffc90000a17a58 Fixes: 172ae5b4c8c1 ("drm/i915: Fix i915_gem_evict_for_vma (soft-pinning)") Fixes: 606fec956c0e ("drm/i915: Prefer random replacement before eviction search") Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170111182132.19174-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_evict.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 50b4645bf627..a43e44e18042 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -305,7 +305,7 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, } /* Overlap of objects in the same batch? */ - if (i915_vma_is_pinned(vma)) { + if (i915_vma_is_pinned(vma) || !list_empty(&vma->exec_list)) { ret = -ENOSPC; if (vma->exec_entry && vma->exec_entry->flags & EXEC_OBJECT_PINNED) -- cgit v1.2.3-55-g7522 From 34869776c76b0aa3bb882ec8a4b6f1ebb6d6f937 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Mon, 9 Jan 2017 21:14:53 +0800 Subject: drm/i915: check ppgtt validity when init reg state Check if ppgtt is valid for context when init reg state. For gvt context which has no i915 allocated ppgtt, failed to check that would cause kernel null ptr reference error. v2: remove !48bit ppgtt case as we'll always update before submit (Chris) Signed-off-by: Zhenyu Wang Reviewed-by: Chris Wilson Signed-off-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170109131453.3943-1-zhenyuw@linux.intel.com --- drivers/gpu/drm/i915/intel_lrc.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 81665a9eb43f..db714dcf92a6 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -2103,19 +2103,12 @@ static void execlists_init_reg_state(u32 *reg_state, ASSIGN_CTX_REG(reg_state, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(engine, 0), 0); - if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { + if (ppgtt && USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { /* 64b PPGTT (48bit canonical) * PDP0_DESCRIPTOR contains the base address to PML4 and * other PDP Descriptors are ignored. */ ASSIGN_CTX_PML4(ppgtt, reg_state); - } else { - /* 32b PPGTT - * PDP*_DESCRIPTOR contains the base address of space supported. - * With dynamic page allocation, PDPs may not be allocated at - * this point. Point the unallocated PDPs to the scratch page - */ - execlists_update_context_pdps(ppgtt, reg_state); } if (engine->id == RCS) { -- cgit v1.2.3-55-g7522 From 13800f397ee3b4996f316b9caa8482cb90edef0d Mon Sep 17 00:00:00 2001 From: Libin Yang Date: Thu, 12 Jan 2017 16:04:52 +0800 Subject: ALSA: hda - add DP mst verb support Add snd_hda_get_dev_select() and snd_hda_set_dev_select() functions for DP MST audio support. Reviewed-by: Takashi Iwai Signed-off-by: Libin Yang Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1484208294-8637-2-git-send-email-libin.yang@intel.com --- sound/pci/hda/hda_codec.c | 72 ++++++++++++++++++++++++++++++++++++++++++++--- sound/pci/hda/hda_codec.h | 3 ++ 2 files changed, 71 insertions(+), 4 deletions(-) diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index 9913be8532ab..9dc847db1cc4 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -311,9 +311,15 @@ int snd_hda_get_conn_index(struct hda_codec *codec, hda_nid_t mux, } EXPORT_SYMBOL_GPL(snd_hda_get_conn_index); - -/* return DEVLIST_LEN parameter of the given widget */ -static unsigned int get_num_devices(struct hda_codec *codec, hda_nid_t nid) +/** + * snd_hda_get_num_devices - get DEVLIST_LEN parameter of the given widget + * @codec: the HDA codec + * @nid: NID of the pin to parse + * + * Get the device entry number on the given widget. This is a feature of + * DP MST audio. Each pin can have several device entries in it. + */ +unsigned int snd_hda_get_num_devices(struct hda_codec *codec, hda_nid_t nid) { unsigned int wcaps = get_wcaps(codec, nid); unsigned int parm; @@ -327,6 +333,7 @@ static unsigned int get_num_devices(struct hda_codec *codec, hda_nid_t nid) parm = 0; return parm & AC_DEV_LIST_LEN_MASK; } +EXPORT_SYMBOL_GPL(snd_hda_get_num_devices); /** * snd_hda_get_devices - copy device list without cache @@ -344,7 +351,7 @@ int snd_hda_get_devices(struct hda_codec *codec, hda_nid_t nid, unsigned int parm; int i, dev_len, devices; - parm = get_num_devices(codec, nid); + parm = snd_hda_get_num_devices(codec, nid); if (!parm) /* not multi-stream capable */ return 0; @@ -368,6 +375,63 @@ int snd_hda_get_devices(struct hda_codec *codec, hda_nid_t nid, return devices; } +/** + * snd_hda_get_dev_select - get device entry select on the pin + * @codec: the HDA codec + * @nid: NID of the pin to get device entry select + * + * Get the devcie entry select on the pin. Return the device entry + * id selected on the pin. Return 0 means the first device entry + * is selected or MST is not supported. + */ +int snd_hda_get_dev_select(struct hda_codec *codec, hda_nid_t nid) +{ + /* not support dp_mst will always return 0, using first dev_entry */ + if (!codec->dp_mst) + return 0; + + return snd_hda_codec_read(codec, nid, 0, AC_VERB_GET_DEVICE_SEL, 0); +} +EXPORT_SYMBOL_GPL(snd_hda_get_dev_select); + +/** + * snd_hda_set_dev_select - set device entry select on the pin + * @codec: the HDA codec + * @nid: NID of the pin to set device entry select + * @dev_id: device entry id to be set + * + * Set the device entry select on the pin nid. + */ +int snd_hda_set_dev_select(struct hda_codec *codec, hda_nid_t nid, int dev_id) +{ + int ret, num_devices; + + /* not support dp_mst will always return 0, using first dev_entry */ + if (!codec->dp_mst) + return 0; + + /* AC_PAR_DEVLIST_LEN is 0 based. */ + num_devices = snd_hda_get_num_devices(codec, nid) + 1; + /* If Device List Length is 0 (num_device = 1), + * the pin is not multi stream capable. + * Do nothing in this case. + */ + if (num_devices == 1) + return 0; + + /* Behavior of setting index being equal to or greater than + * Device List Length is not predictable + */ + if (num_devices <= dev_id) + return -EINVAL; + + ret = snd_hda_codec_write(codec, nid, 0, + AC_VERB_SET_DEVICE_SEL, dev_id); + + return ret; +} +EXPORT_SYMBOL_GPL(snd_hda_set_dev_select); + /* * read widget caps for each widget and store in cache */ diff --git a/sound/pci/hda/hda_codec.h b/sound/pci/hda/hda_codec.h index 373fcad840ea..f17f25245e52 100644 --- a/sound/pci/hda/hda_codec.h +++ b/sound/pci/hda/hda_codec.h @@ -347,8 +347,11 @@ int snd_hda_override_conn_list(struct hda_codec *codec, hda_nid_t nid, int nums, const hda_nid_t *list); int snd_hda_get_conn_index(struct hda_codec *codec, hda_nid_t mux, hda_nid_t nid, int recursive); +unsigned int snd_hda_get_num_devices(struct hda_codec *codec, hda_nid_t nid); int snd_hda_get_devices(struct hda_codec *codec, hda_nid_t nid, u8 *dev_list, int max_devices); +int snd_hda_get_dev_select(struct hda_codec *codec, hda_nid_t nid); +int snd_hda_set_dev_select(struct hda_codec *codec, hda_nid_t nid, int dev_id); struct hda_verb { hda_nid_t nid; -- cgit v1.2.3-55-g7522 From 9152085defb6426ce8f9989ca27e4450daefbd89 Mon Sep 17 00:00:00 2001 From: Libin Yang Date: Thu, 12 Jan 2017 16:04:53 +0800 Subject: ALSA: hda - add DP MST audio support This patch adds the DP MST audio support on i915 platform and it will enable dyn_pcm_assign feature. DP MST supports several device entry on the same port and each device entry can map to one pcm stream. For example, on i915, there are 3 pins, and each pin has 3 device entries. This means there should be 3x3 pcms. However, there is only 3 pipe lines in i915. This means 3 pcms are actived at most at the same moment. We will create 5 pcms (pin number + dev entry num - 1) in this case. For the details, please refer commit a76056f2e57e ("ALSA: hda - hdmi dynamically bind PCM to pin when monitor hotplug") Each device entry is a virtual pin. It is described by pin_nid and dev_id in struct hdmi_spec_per_pin. Reviewed-by: Takashi Iwai Signed-off-by: Libin Yang Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1484208294-8637-3-git-send-email-libin.yang@intel.com --- sound/pci/hda/hda_codec.c | 4 + sound/pci/hda/patch_hdmi.c | 245 +++++++++++++++++++++++++++++++++++---------- 2 files changed, 197 insertions(+), 52 deletions(-) diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index 9dc847db1cc4..8fd745cb3f36 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -467,6 +467,10 @@ static int read_pin_defaults(struct hda_codec *codec) pin->nid = nid; pin->cfg = snd_hda_codec_read(codec, nid, 0, AC_VERB_GET_CONFIG_DEFAULT, 0); + /* + * all device entries are the same widget control so far + * fixme: if any codec is different, need fix here + */ pin->ctrl = snd_hda_codec_read(codec, nid, 0, AC_VERB_GET_PIN_WIDGET_CONTROL, 0); diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index cf9bc042fe96..32105cee56ca 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -76,6 +76,7 @@ struct hdmi_spec_per_cvt { struct hdmi_spec_per_pin { hda_nid_t pin_nid; + int dev_id; /* pin idx, different device entries on the same pin use the same idx */ int pin_nid_idx; int num_mux_nids; @@ -130,7 +131,23 @@ struct hdmi_spec { struct snd_array cvts; /* struct hdmi_spec_per_cvt */ hda_nid_t cvt_nids[4]; /* only for haswell fix */ + /* + * num_pins is the number of virtual pins + * for example, there are 3 pins, and each pin + * has 4 device entries, then the num_pins is 12 + */ int num_pins; + /* + * num_nids is the number of real pins + * In the above example, num_nids is 3 + */ + int num_nids; + /* + * dev_num is the number of device entries + * on each pin. + * In the above example, dev_num is 4 + */ + int dev_num; struct snd_array pins; /* struct hdmi_spec_per_pin */ struct hdmi_pcm pcm_rec[16]; struct mutex pcm_lock; @@ -217,14 +234,26 @@ union audio_infoframe { /* obtain hda_pcm object assigned to idx */ #define get_pcm_rec(spec, idx) (get_hdmi_pcm(spec, idx)->pcm) -static int pin_nid_to_pin_index(struct hda_codec *codec, hda_nid_t pin_nid) +static int pin_id_to_pin_index(struct hda_codec *codec, + hda_nid_t pin_nid, int dev_id) { struct hdmi_spec *spec = codec->spec; int pin_idx; + struct hdmi_spec_per_pin *per_pin; + + /* + * (dev_id == -1) means it is NON-MST pin + * return the first virtual pin on this port + */ + if (dev_id == -1) + dev_id = 0; - for (pin_idx = 0; pin_idx < spec->num_pins; pin_idx++) - if (get_pin(spec, pin_idx)->pin_nid == pin_nid) + for (pin_idx = 0; pin_idx < spec->num_pins; pin_idx++) { + per_pin = get_pin(spec, pin_idx); + if ((per_pin->pin_nid == pin_nid) && + (per_pin->dev_id == dev_id)) return pin_idx; + } codec_warn(codec, "HDMI: pin nid %d not registered\n", pin_nid); return -EINVAL; @@ -724,10 +753,11 @@ static void hdmi_setup_audio_infoframe(struct hda_codec *codec, static bool hdmi_present_sense(struct hdmi_spec_per_pin *per_pin, int repoll); -static void check_presence_and_report(struct hda_codec *codec, hda_nid_t nid) +static void check_presence_and_report(struct hda_codec *codec, hda_nid_t nid, + int dev_id) { struct hdmi_spec *spec = codec->spec; - int pin_idx = pin_nid_to_pin_index(codec, nid); + int pin_idx = pin_id_to_pin_index(codec, nid, dev_id); if (pin_idx < 0) return; @@ -738,7 +768,8 @@ static void check_presence_and_report(struct hda_codec *codec, hda_nid_t nid) static void jack_callback(struct hda_codec *codec, struct hda_jack_callback *jack) { - check_presence_and_report(codec, jack->nid); + /* hda_jack don't support DP MST */ + check_presence_and_report(codec, jack->nid, 0); } static void hdmi_intrinsic_event(struct hda_codec *codec, unsigned int res) @@ -747,6 +778,12 @@ static void hdmi_intrinsic_event(struct hda_codec *codec, unsigned int res) struct hda_jack_tbl *jack; int dev_entry = (res & AC_UNSOL_RES_DE) >> AC_UNSOL_RES_DE_SHIFT; + /* + * assume DP MST uses dyn_pcm_assign and acomp and + * never comes here + * if DP MST supports unsol event, below code need + * consider dev_entry + */ jack = snd_hda_jack_tbl_get_from_tag(codec, tag); if (!jack) return; @@ -757,7 +794,8 @@ static void hdmi_intrinsic_event(struct hda_codec *codec, unsigned int res) codec->addr, jack->nid, dev_entry, !!(res & AC_UNSOL_RES_IA), !!(res & AC_UNSOL_RES_PD), !!(res & AC_UNSOL_RES_ELDV)); - check_presence_and_report(codec, jack->nid); + /* hda_jack don't support DP MST */ + check_presence_and_report(codec, jack->nid, 0); } static void hdmi_non_intrinsic_event(struct hda_codec *codec, unsigned int res) @@ -970,28 +1008,60 @@ static int intel_cvt_id_to_mux_idx(struct hdmi_spec *spec, * by any other pins. */ static void intel_not_share_assigned_cvt(struct hda_codec *codec, - hda_nid_t pin_nid, int mux_idx) + hda_nid_t pin_nid, + int dev_id, int mux_idx) { struct hdmi_spec *spec = codec->spec; hda_nid_t nid; int cvt_idx, curr; struct hdmi_spec_per_cvt *per_cvt; + struct hdmi_spec_per_pin *per_pin; + int pin_idx; + + /* configure the pins connections */ + for (pin_idx = 0; pin_idx < spec->num_pins; pin_idx++) { + int dev_id_saved; + int dev_num; - /* configure all pins, including "no physical connection" ones */ - for_each_hda_codec_node(nid, codec) { - unsigned int wid_caps = get_wcaps(codec, nid); - unsigned int wid_type = get_wcaps_type(wid_caps); + per_pin = get_pin(spec, pin_idx); + /* + * pin not connected to monitor + * no need to operate on it + */ + if (!per_pin->pcm) + continue; - if (wid_type != AC_WID_PIN) + if ((per_pin->pin_nid == pin_nid) && + (per_pin->dev_id == dev_id)) continue; - if (nid == pin_nid) + /* + * if per_pin->dev_id >= dev_num, + * snd_hda_get_dev_select() will fail, + * and the following operation is unpredictable. + * So skip this situation. + */ + dev_num = snd_hda_get_num_devices(codec, per_pin->pin_nid) + 1; + if (per_pin->dev_id >= dev_num) continue; + nid = per_pin->pin_nid; + + /* + * Calling this function should not impact + * on the device entry selection + * So let's save the dev id for each pin, + * and restore it when return + */ + dev_id_saved = snd_hda_get_dev_select(codec, nid); + snd_hda_set_dev_select(codec, nid, per_pin->dev_id); curr = snd_hda_codec_read(codec, nid, 0, AC_VERB_GET_CONNECT_SEL, 0); - if (curr != mux_idx) + if (curr != mux_idx) { + snd_hda_set_dev_select(codec, nid, dev_id_saved); continue; + } + /* choose an unassigned converter. The conveters in the * connection list are in the same order as in the codec. @@ -1008,12 +1078,13 @@ static void intel_not_share_assigned_cvt(struct hda_codec *codec, break; } } + snd_hda_set_dev_select(codec, nid, dev_id_saved); } } /* A wrapper of intel_not_share_asigned_cvt() */ static void intel_not_share_assigned_cvt_nid(struct hda_codec *codec, - hda_nid_t pin_nid, hda_nid_t cvt_nid) + hda_nid_t pin_nid, int dev_id, hda_nid_t cvt_nid) { int mux_idx; struct hdmi_spec *spec = codec->spec; @@ -1025,7 +1096,7 @@ static void intel_not_share_assigned_cvt_nid(struct hda_codec *codec, */ mux_idx = intel_cvt_id_to_mux_idx(spec, cvt_nid); if (mux_idx >= 0) - intel_not_share_assigned_cvt(codec, pin_nid, mux_idx); + intel_not_share_assigned_cvt(codec, pin_nid, dev_id, mux_idx); } /* skeleton caller of pin_cvt_fixup ops */ @@ -1140,6 +1211,7 @@ static int hdmi_pcm_open(struct hda_pcm_stream *hinfo, per_pin->cvt_nid = per_cvt->cvt_nid; hinfo->nid = per_cvt->cvt_nid; + snd_hda_set_dev_select(codec, per_pin->pin_nid, per_pin->dev_id); snd_hda_codec_write_cache(codec, per_pin->pin_nid, 0, AC_VERB_SET_CONNECT_SEL, per_pin->mux_idx); @@ -1198,6 +1270,7 @@ static int hdmi_read_pin_conn(struct hda_codec *codec, int pin_idx) return -EINVAL; } + /* all the device entries on the same pin have the same conn list */ per_pin->num_mux_nids = snd_hda_get_connections(codec, pin_nid, per_pin->mux_nids, HDA_MAX_CONNECTIONS); @@ -1215,13 +1288,13 @@ static int hdmi_find_pcm_slot(struct hdmi_spec *spec, return per_pin->pin_nid_idx; /* have a second try; check the "reserved area" over num_pins */ - for (i = spec->num_pins; i < spec->pcm_used; i++) { + for (i = spec->num_nids; i < spec->pcm_used; i++) { if (!test_bit(i, &spec->pcm_bitmap)) return i; } /* the last try; check the empty slots in pins */ - for (i = 0; i < spec->num_pins; i++) { + for (i = 0; i < spec->num_nids; i++) { if (!test_bit(i, &spec->pcm_bitmap)) return i; } @@ -1296,10 +1369,13 @@ static void hdmi_pcm_setup_pin(struct hdmi_spec *spec, per_pin->cvt_nid = hinfo->nid; mux_idx = hdmi_get_pin_cvt_mux(spec, per_pin, hinfo->nid); - if (mux_idx < per_pin->num_mux_nids) + if (mux_idx < per_pin->num_mux_nids) { + snd_hda_set_dev_select(codec, per_pin->pin_nid, + per_pin->dev_id); snd_hda_codec_write_cache(codec, per_pin->pin_nid, 0, AC_VERB_SET_CONNECT_SEL, mux_idx); + } snd_hda_spdif_ctls_assign(codec, per_pin->pcm_idx, hinfo->nid); non_pcm = check_non_pcm_per_cvt(codec, hinfo->nid); @@ -1467,6 +1543,11 @@ static struct snd_jack *pin_idx_to_jack(struct hda_codec *codec, if (per_pin->pcm_idx >= 0 && spec->dyn_pcm_assign) jack = spec->pcm_rec[per_pin->pcm_idx].jack; else if (!spec->dyn_pcm_assign) { + /* + * jack tbl doesn't support DP MST + * DP MST will use dyn_pcm_assign, + * so DP MST will never come here + */ jack_tbl = snd_hda_jack_tbl_get(codec, per_pin->pin_nid); if (jack_tbl) jack = jack_tbl->jack; @@ -1485,9 +1566,9 @@ static void sync_eld_via_acomp(struct hda_codec *codec, mutex_lock(&per_pin->lock); eld->monitor_present = false; - size = snd_hdac_acomp_get_eld(&codec->core, per_pin->pin_nid, -1, - &eld->monitor_present, eld->eld_buffer, - ELD_MAX_SIZE); + size = snd_hdac_acomp_get_eld(&codec->core, per_pin->pin_nid, + per_pin->dev_id, &eld->monitor_present, + eld->eld_buffer, ELD_MAX_SIZE); if (size > 0) { size = min(size, ELD_MAX_SIZE); if (snd_hdmi_parse_eld(codec, &eld->info, @@ -1565,38 +1646,81 @@ static int hdmi_add_pin(struct hda_codec *codec, hda_nid_t pin_nid) int pin_idx; struct hdmi_spec_per_pin *per_pin; int err; + int dev_num, i; caps = snd_hda_query_pin_caps(codec, pin_nid); if (!(caps & (AC_PINCAP_HDMI | AC_PINCAP_DP))) return 0; + /* + * For DP MST audio, Configuration Default is the same for + * all device entries on the same pin + */ config = snd_hda_codec_get_pincfg(codec, pin_nid); if (get_defcfg_connect(config) == AC_JACK_PORT_NONE) return 0; - if (is_haswell_plus(codec)) - intel_haswell_fixup_connect_list(codec, pin_nid); - - pin_idx = spec->num_pins; - per_pin = snd_array_new(&spec->pins); - if (!per_pin) - return -ENOMEM; - - per_pin->pin_nid = pin_nid; - per_pin->non_pcm = false; - if (spec->dyn_pcm_assign) - per_pin->pcm_idx = -1; - else { - per_pin->pcm = get_hdmi_pcm(spec, pin_idx); - per_pin->pcm_idx = pin_idx; + /* + * To simplify the implementation, malloc all + * the virtual pins in the initialization statically + */ + if (is_haswell_plus(codec)) { + /* + * On Intel platforms, device entries number is + * changed dynamically. If there is a DP MST + * hub connected, the device entries number is 3. + * Otherwise, it is 1. + * Here we manually set dev_num to 3, so that + * we can initialize all the device entries when + * bootup statically. + */ + dev_num = 3; + spec->dev_num = 3; + } else if (spec->dyn_pcm_assign && codec->dp_mst) { + dev_num = snd_hda_get_num_devices(codec, pin_nid) + 1; + /* + * spec->dev_num is the maxinum number of device entries + * among all the pins + */ + spec->dev_num = (spec->dev_num > dev_num) ? + spec->dev_num : dev_num; + } else { + /* + * If the platform doesn't support DP MST, + * manually set dev_num to 1. This means + * the pin has only one device entry. + */ + dev_num = 1; + spec->dev_num = 1; } - per_pin->pin_nid_idx = pin_idx; - err = hdmi_read_pin_conn(codec, pin_idx); - if (err < 0) - return err; + for (i = 0; i < dev_num; i++) { + pin_idx = spec->num_pins; + per_pin = snd_array_new(&spec->pins); - spec->num_pins++; + if (!per_pin) + return -ENOMEM; + + if (spec->dyn_pcm_assign) { + per_pin->pcm = NULL; + per_pin->pcm_idx = -1; + } else { + per_pin->pcm = get_hdmi_pcm(spec, pin_idx); + per_pin->pcm_idx = pin_idx; + } + per_pin->pin_nid = pin_nid; + per_pin->pin_nid_idx = spec->num_nids; + per_pin->dev_id = i; + per_pin->non_pcm = false; + snd_hda_set_dev_select(codec, pin_nid, i); + if (is_haswell_plus(codec)) + intel_haswell_fixup_connect_list(codec, pin_nid); + err = hdmi_read_pin_conn(codec, pin_idx); + if (err < 0) + return err; + spec->num_pins++; + } + spec->num_nids++; return 0; } @@ -1744,7 +1868,7 @@ static int generic_hdmi_playback_pcm_prepare(struct hda_pcm_stream *hinfo, /* Call sync_audio_rate to set the N/CTS/M manually if necessary */ /* Todo: add DP1.2 MST audio support later */ if (codec_has_acomp(codec)) - snd_hdac_sync_audio_rate(&codec->core, pin_nid, -1, + snd_hdac_sync_audio_rate(&codec->core, pin_nid, per_pin->dev_id, runtime->rate); non_pcm = check_non_pcm_per_cvt(codec, cvt_nid); @@ -1762,6 +1886,7 @@ static int generic_hdmi_playback_pcm_prepare(struct hda_pcm_stream *hinfo, pinctl | PIN_OUT); } + /* snd_hda_set_dev_select() has been called before */ err = spec->ops.setup_stream(codec, cvt_nid, pin_nid, stream_tag, format); mutex_unlock(&spec->pcm_lock); @@ -1897,17 +2022,23 @@ static bool is_hdmi_pcm_attached(struct hdac_device *hdac, int pcm_idx) static int generic_hdmi_build_pcms(struct hda_codec *codec) { struct hdmi_spec *spec = codec->spec; - int pin_idx; + int idx; - for (pin_idx = 0; pin_idx < spec->num_pins; pin_idx++) { + /* + * for non-mst mode, pcm number is the same as before + * for DP MST mode, pcm number is (nid number + dev_num - 1) + * dev_num is the device entry number in a pin + * + */ + for (idx = 0; idx < spec->num_nids + spec->dev_num - 1; idx++) { struct hda_pcm *info; struct hda_pcm_stream *pstr; - info = snd_hda_codec_pcm_new(codec, "HDMI %d", pin_idx); + info = snd_hda_codec_pcm_new(codec, "HDMI %d", idx); if (!info) return -ENOMEM; - spec->pcm_rec[pin_idx].pcm = info; + spec->pcm_rec[idx].pcm = info; spec->pcm_used++; info->pcm_type = HDA_PCM_TYPE_HDMI; info->own_chmap = true; @@ -1915,6 +2046,9 @@ static int generic_hdmi_build_pcms(struct hda_codec *codec) pstr = &info->stream[SNDRV_PCM_STREAM_PLAYBACK]; pstr->substreams = 1; pstr->ops = generic_ops; + /* pcm number is less than 16 */ + if (spec->pcm_used >= 16) + break; /* other pstr fields are set in open */ } @@ -2070,7 +2204,9 @@ static int generic_hdmi_init(struct hda_codec *codec) for (pin_idx = 0; pin_idx < spec->num_pins; pin_idx++) { struct hdmi_spec_per_pin *per_pin = get_pin(spec, pin_idx); hda_nid_t pin_nid = per_pin->pin_nid; + int dev_id = per_pin->dev_id; + snd_hda_set_dev_select(codec, pin_nid, dev_id); hdmi_init_pin(codec, pin_nid); if (!codec_has_acomp(codec)) snd_hda_jack_detect_enable_callback(codec, pin_nid, @@ -2178,6 +2314,7 @@ static int alloc_generic_hdmi(struct hda_codec *codec) return -ENOMEM; spec->ops = generic_standard_hdmi_ops; + spec->dev_num = 1; /* initialize to 1 */ mutex_init(&spec->pcm_lock); snd_hdac_register_chmap_ops(&codec->core, &spec->chmap); @@ -2295,6 +2432,7 @@ static void intel_pin_eld_notify(void *audio_ptr, int port, int pipe) { struct hda_codec *codec = audio_ptr; int pin_nid; + int dev_id = pipe; /* we assume only from port-B to port-D */ if (port < 1 || port > 3) @@ -2321,7 +2459,7 @@ static void intel_pin_eld_notify(void *audio_ptr, int port, int pipe) return; snd_hdac_i915_set_bclk(&codec->bus->core); - check_presence_and_report(codec, pin_nid); + check_presence_and_report(codec, pin_nid, dev_id); } /* register i915 component pin_eld_notify callback */ @@ -2354,11 +2492,13 @@ static void i915_pin_cvt_fixup(struct hda_codec *codec, hda_nid_t cvt_nid) { if (per_pin) { + snd_hda_set_dev_select(codec, per_pin->pin_nid, + per_pin->dev_id); intel_verify_pin_cvt_connect(codec, per_pin); intel_not_share_assigned_cvt(codec, per_pin->pin_nid, - per_pin->mux_idx); + per_pin->dev_id, per_pin->mux_idx); } else { - intel_not_share_assigned_cvt_nid(codec, 0, cvt_nid); + intel_not_share_assigned_cvt_nid(codec, 0, 0, cvt_nid); } } @@ -2378,6 +2518,8 @@ static int patch_i915_hsw_hdmi(struct hda_codec *codec) if (err < 0) return err; spec = codec->spec; + codec->dp_mst = true; + spec->dyn_pcm_assign = true; intel_haswell_enable_all_pins(codec, true); intel_haswell_fixup_enable_dp12(codec); @@ -2389,7 +2531,6 @@ static int patch_i915_hsw_hdmi(struct hda_codec *codec) codec->core.link_power_control = 1; codec->patch_ops.set_power_state = haswell_set_power_state; - codec->dp_mst = true; codec->depop_delay = 0; codec->auto_runtime_pm = 1; -- cgit v1.2.3-55-g7522 From dd48e8ed3bfdee879ce60508230bed7fb2ca5fa8 Mon Sep 17 00:00:00 2001 From: Libin Yang Date: Thu, 12 Jan 2017 16:04:54 +0800 Subject: ALSA: Documentation about HDA DP MST pin init and connection Add the documentation about HD-audio DP MST: 1. pin initialization 2. device entry connection list Reviewed-by: Takashi Iwai Signed-off-by: Libin Yang Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1484208294-8637-4-git-send-email-libin.yang@intel.com --- Documentation/sound/hd-audio/dp-mst.rst | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/Documentation/sound/hd-audio/dp-mst.rst b/Documentation/sound/hd-audio/dp-mst.rst index 58b72437e6c3..1617459e332f 100644 --- a/Documentation/sound/hd-audio/dp-mst.rst +++ b/Documentation/sound/hd-audio/dp-mst.rst @@ -19,6 +19,23 @@ PCM === To be added +Pin Initialization +================== +Each pin may have several device entries (virtual pins). On Intel platform, +the device entries number is dynamically changed. If DP MST hub is connected, +it is in DP MST mode, and the device entries number is 3. Otherwise, the +device entries number is 1. + +To simplify the implementation, all the device entries will be initialized +when bootup no matter whether it is in DP MST mode or not. + +Connection list +=============== +DP MST reuses connection list code. The code can be reused because +device entries on the same pin have the same connection list. + +This means DP MST gets the device entry connection list without the +device entry setting. Jack ==== -- cgit v1.2.3-55-g7522 From 83796f2652bc31170eee5563e37c6af92db41de9 Mon Sep 17 00:00:00 2001 From: Michał Winiarski Date: Wed, 11 Jan 2017 16:17:39 +0100 Subject: drm/i915/guc: Make sure vma containing firmware is GuC mappable Since commit 4741da925fa3 ("drm/i915/guc: Assert that all GGTT offsets used by the GuC are mappable"), we're asserting that GuC firmware is in the GuC mappable range. Except we're not pinning the object with bias, which means it's possible to trigger this assert. Let's add a proper bias. Fixes: 4741da925fa3 ("drm/i915/guc: Assert that all GGTT offsets used by the GuC are mappable") Cc: Chris Wilson Cc: Daniele Ceraolo Spurio Signed-off-by: Michał Winiarski Reviewed-by: Chris Wilson Tested-by: Tomi Sarvela Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170111151739.28965-1-michal.winiarski@intel.com --- drivers/gpu/drm/i915/intel_guc_loader.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index aa2b866474be..5a6ab8728d48 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -360,7 +360,8 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv) return ret; } - vma = i915_gem_object_ggtt_pin(guc_fw->guc_fw_obj, NULL, 0, 0, 0); + vma = i915_gem_object_ggtt_pin(guc_fw->guc_fw_obj, NULL, 0, 0, + PIN_OFFSET_BIAS | GUC_WOPCM_TOP); if (IS_ERR(vma)) { DRM_DEBUG_DRIVER("pin failed %d\n", (int)PTR_ERR(vma)); return PTR_ERR(vma); -- cgit v1.2.3-55-g7522 From 8726f2faa371514fba2f594d799db95203dfeee0 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Thu, 12 Jan 2017 12:44:54 +0200 Subject: drm/i915: Remove WaDisableLSQCROPERFforOCL KBL workaround. The WaDisableLSQCROPERFforOCL workaround has the side effect of disabling an L3SQ optimization that has huge performance implications and is unlikely to be necessary for the correct functioning of usual graphic workloads. Userspace is free to re-enable the workaround on demand, and is generally in a better position to determine whether the workaround is necessary than the DRM is (e.g. only during the execution of compute kernels that rely on both L3 fences and HDC R/W requests). The same workaround seems to apply to BDW (at least to production stepping G1) and SKL as well (the internal workaround database claims that it does for all steppings, while the BSpec workaround table only mentions pre-production steppings), but the DRM doesn't do anything beyond whitelisting the L3SQCREG4 register so userspace can enable it when it sees fit. Do the same on KBL platforms. Improves performance of the GFXBench4 gl_manhattan31 benchmark by 60%, and gl_4 (AKA car chase) by 14% on a KBL GT2 running Mesa master -- This is followed by a regression of 35% and 10% respectively for the same benchmarks and platform caused by my recent patch series switching userspace to use the dataport constant cache instead of the sampler to implement uniform pull constant loads, which caused us to hit more heavily the L3 cache (and on platforms other than KBL had the opposite effect of improving performance of the same two benchmarks). The overall effect on KBL of this change combined with the recent userspace change is respectively 4.6% and 2.6%. SynMark2 OglShMapPcf was affected by the constant cache changes (though it improved as it did on other platforms rather than regressing), but is not significantly affected by this patch (with statistical significance of 5% and sample size 20). v2: Drop some more code to avoid unused variable warning. Fixes: 738fa1b3123f ("drm/i915/kbl: Add WaDisableLSQCROPERFforOCL") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99256 Signed-off-by: Francisco Jerez Cc: Matthew Auld Cc: Eero Tamminen Cc: Jani Nikula Cc: Mika Kuoppala Cc: beignet@lists.freedesktop.org Cc: # v4.7+ Reviewed-by: Mika Kuoppala [Removed double Fixes tag] Signed-off-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/1484217894-20505-1-git-send-email-mika.kuoppala@intel.com --- drivers/gpu/drm/i915/intel_lrc.c | 10 ---------- drivers/gpu/drm/i915/intel_ringbuffer.c | 8 -------- 2 files changed, 18 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index db714dcf92a6..8acab875fcfc 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -970,18 +970,8 @@ static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, uint32_t *batch, uint32_t index) { - struct drm_i915_private *dev_priv = engine->i915; uint32_t l3sqc4_flush = (0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES); - /* - * WaDisableLSQCROPERFforOCL:kbl - * This WA is implemented in skl_init_clock_gating() but since - * this batch updates GEN8_L3SQCREG4 with default value we need to - * set this bit here to retain the WA during flush. - */ - if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_E0)) - l3sqc4_flush |= GEN8_LQSC_RO_PERF_DIS; - wa_ctx_emit(batch, index, (MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT)); wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index ab83fc22d207..49fa8006c6a2 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1095,14 +1095,6 @@ static int kbl_init_workarounds(struct intel_engine_cs *engine) WA_SET_BIT_MASKED(HDC_CHICKEN0, HDC_FENCE_DEST_SLM_DISABLE); - /* GEN8_L3SQCREG4 has a dependency with WA batch so any new changes - * involving this register should also be added to WA batch as required. - */ - if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_E0)) - /* WaDisableLSQCROPERFforOCL:kbl */ - I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) | - GEN8_LQSC_RO_PERF_DIS); - /* WaToEnableHwFixForPushConstHWBug:kbl */ if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER)) WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, -- cgit v1.2.3-55-g7522 From 87c390b67bdf8aa64c30b0e84cfc83d8753e0909 Mon Sep 17 00:00:00 2001 From: Michel Thierry Date: Wed, 11 Jan 2017 20:18:08 -0800 Subject: drm/i915: Keep i915_handle_error kerneldoc parameters together And before the function description. Tidy up from commit 14bb2c11796d70b ("drm/i915: Fix a buch of kerneldoc warnings"), all others kerneldoc blocks look ok. Cc: Tvrtko Ursulin Reviewed-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Signed-off-by: Michel Thierry Signed-off-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170112041817.1102-2-michel.thierry@intel.com --- drivers/gpu/drm/i915/i915_irq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 75fb1f66cc0c..ce5663d94839 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2730,12 +2730,13 @@ static void i915_clear_error_registers(struct drm_i915_private *dev_priv) * i915_handle_error - handle a gpu error * @dev_priv: i915 device private * @engine_mask: mask representing engines that are hung + * @fmt: Error message format string + * * Do some basic checking of register state at error time and * dump it to the syslog. Also call i915_capture_error_state() to make * sure we get a record and make it available in debugfs. Fire a uevent * so userspace knows something bad happened (should trigger collection * of a ring dump etc.). - * @fmt: Error message format string */ void i915_handle_error(struct drm_i915_private *dev_priv, u32 engine_mask, -- cgit v1.2.3-55-g7522 From df2105749a8a0b7f7da8b2fb0b9cee1b3008586b Mon Sep 17 00:00:00 2001 From: Michel Thierry Date: Wed, 11 Jan 2017 20:18:09 -0800 Subject: drm/i915: Update i915_reset parameter for kerneldoc Since commit c033666a94b57 ("drm/i915: Store a i915 backpointer from engine, and use it") i915_reset receives dev_priv, but the kerneldoc was not updated. Signed-off-by: Michel Thierry Reviewed-by: Mika Kuoppala Signed-off-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170112041817.1102-3-michel.thierry@intel.com --- drivers/gpu/drm/i915/i915_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index aefab9a1a68e..4e5ea5898e06 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1746,7 +1746,7 @@ static void enable_engines_irq(struct drm_i915_private *dev_priv) /** * i915_reset - reset chip after a hang - * @dev: drm device to reset + * @dev_priv: device private to reset * * Reset the chip. Useful if a hang is detected. Marks the device as wedged * on failure. -- cgit v1.2.3-55-g7522 From 1d4258db3e0bf14d4c782028ab7be721fb6619ba Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 12 Jan 2017 10:43:45 +0100 Subject: drm/i915: Remove useless casts to intel_plane_state The visible member used to be in intel_plane_state->visible, but has been moved to drm_plane_state->visible. In the conversion some casts were left in that are now useless. to_intel_plane_state(x)->base.visible is the same as x->visible, so use the latter to clear up the code a little. Signed-off-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/1484214225-30328-1-git-send-email-maarten.lankhorst@linux.intel.com Reviewed-by: Chris Wilson Reviewed-by: Ville Syrjälä --- drivers/gpu/drm/i915/intel_display.c | 12 ++++++------ drivers/gpu/drm/i915/intel_fbc.c | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 56047018391c..fd5fbc83c69e 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2796,7 +2796,7 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, * simplest solution is to just disable the primary plane now and * pretend the BIOS never had it enabled. */ - to_intel_plane_state(plane_state)->base.visible = false; + plane_state->visible = false; crtc_state->plane_mask &= ~(1 << drm_plane_index(primary)); intel_pre_disable_primary_noatomic(&intel_crtc->base); intel_plane->disable_plane(primary, &intel_crtc->base); @@ -6883,13 +6883,13 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc) if (!intel_crtc->active) return; - if (to_intel_plane_state(crtc->primary->state)->base.visible) { + if (crtc->primary->state->visible) { WARN_ON(intel_crtc->flip_work); intel_pre_disable_primary_noatomic(crtc); intel_crtc_disable_planes(crtc, 1 << drm_plane_index(crtc->primary)); - to_intel_plane_state(crtc->primary->state)->base.visible = false; + crtc->primary->state->visible = false; } state = drm_atomic_state_alloc(crtc->dev); @@ -12463,7 +12463,7 @@ int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state, } was_visible = old_plane_state->base.visible; - visible = to_intel_plane_state(plane_state)->base.visible; + visible = plane_state->visible; if (!was_crtc_enabled && WARN_ON(was_visible)) was_visible = false; @@ -12479,7 +12479,7 @@ int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state, * only combine the results from all planes in the current place? */ if (!is_crtc_enabled) - to_intel_plane_state(plane_state)->base.visible = visible = false; + plane_state->visible = visible = false; if (!was_visible && !visible) return 0; @@ -16841,7 +16841,7 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc) * Temporarily change the plane mapping and disable everything * ... */ plane = crtc->plane; - to_intel_plane_state(crtc->base.primary->state)->base.visible = true; + crtc->base.primary->state->visible = true; crtc->plane = !plane; intel_crtc_disable_noatomic(&crtc->base); crtc->plane = plane; diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c index 26a81a9e9c1d..98cb85c88aff 100644 --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@ -1296,7 +1296,7 @@ void intel_fbc_init_pipe_state(struct drm_i915_private *dev_priv) for_each_intel_crtc(&dev_priv->drm, crtc) if (intel_crtc_active(crtc) && - to_intel_plane_state(crtc->base.primary->state)->base.visible) + crtc->base.primary->state->visible) dev_priv->fbc.visible_pipes_mask |= (1 << crtc->pipe); } -- cgit v1.2.3-55-g7522 From 7c3f86b6dc51b38ee30aaac00cdf39d20b2e7b38 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 12 Jan 2017 11:00:49 +0000 Subject: drm/i915: Invalidate the guc ggtt TLB upon insertion Move the GuC invalidation of its ggtt TLB to where we perform the ggtt modification rather than proliferate it into all the callers of the insert (which may or may not in fact have to do the insertion). v2: Just do the guc invalidate unconditionally, (afaict) it has no impact without the guc loaded on gen8+ v3: Conditionally invalidate the guc - just in case that register has not been validated for other modes. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Daniel Vetter Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170112110050.25333-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 78 +++++++++++++++++++----------- drivers/gpu/drm/i915/i915_gem_gtt.h | 3 ++ drivers/gpu/drm/i915/i915_guc_submission.c | 3 -- drivers/gpu/drm/i915/intel_guc_loader.c | 7 +-- drivers/gpu/drm/i915/intel_lrc.c | 6 --- 5 files changed, 57 insertions(+), 40 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 0ed99adfd0da..ed120a1e7f93 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -110,6 +110,30 @@ const struct i915_ggtt_view i915_ggtt_view_rotated = { .type = I915_GGTT_VIEW_ROTATED, }; +static void gen6_ggtt_invalidate(struct drm_i915_private *dev_priv) +{ + /* Note that as an uncached mmio write, this should flush the + * WCB of the writes into the GGTT before it triggers the invalidate. + */ + I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); +} + +static void guc_ggtt_invalidate(struct drm_i915_private *dev_priv) +{ + gen6_ggtt_invalidate(dev_priv); + I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE); +} + +static void gmch_ggtt_invalidate(struct drm_i915_private *dev_priv) +{ + intel_gtt_chipset_flush(); +} + +static inline void i915_ggtt_invalidate(struct drm_i915_private *i915) +{ + i915->ggtt.invalidate(i915); +} + int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv, int enable_ppgtt) { @@ -2307,16 +2331,6 @@ void i915_check_and_clear_faults(struct drm_i915_private *dev_priv) POSTING_READ(RING_FAULT_REG(dev_priv->engine[RCS])); } -static void i915_ggtt_flush(struct drm_i915_private *dev_priv) -{ - if (INTEL_INFO(dev_priv)->gen < 6) { - intel_gtt_chipset_flush(); - } else { - I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); - POSTING_READ(GFX_FLSH_CNTL_GEN6); - } -} - void i915_gem_suspend_gtt_mappings(struct drm_i915_private *dev_priv) { struct i915_ggtt *ggtt = &dev_priv->ggtt; @@ -2331,7 +2345,7 @@ void i915_gem_suspend_gtt_mappings(struct drm_i915_private *dev_priv) ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total); - i915_ggtt_flush(dev_priv); + i915_ggtt_invalidate(dev_priv); } int i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj, @@ -2370,15 +2384,13 @@ static void gen8_ggtt_insert_page(struct i915_address_space *vm, enum i915_cache_level level, u32 unused) { - struct drm_i915_private *dev_priv = vm->i915; + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); gen8_pte_t __iomem *pte = - (gen8_pte_t __iomem *)dev_priv->ggtt.gsm + - (offset >> PAGE_SHIFT); + (gen8_pte_t __iomem *)ggtt->gsm + (offset >> PAGE_SHIFT); gen8_set_pte(pte, gen8_pte_encode(addr, level)); - I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); - POSTING_READ(GFX_FLSH_CNTL_GEN6); + ggtt->invalidate(vm->i915); } static void gen8_ggtt_insert_entries(struct i915_address_space *vm, @@ -2386,7 +2398,6 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm, uint64_t start, enum i915_cache_level level, u32 unused) { - struct drm_i915_private *dev_priv = vm->i915; struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); struct sgt_iter sgt_iter; gen8_pte_t __iomem *gtt_entries; @@ -2415,8 +2426,7 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm, * want to flush the TLBs only after we're certain all the PTE updates * have finished. */ - I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); - POSTING_READ(GFX_FLSH_CNTL_GEN6); + ggtt->invalidate(vm->i915); } struct insert_entries { @@ -2451,15 +2461,13 @@ static void gen6_ggtt_insert_page(struct i915_address_space *vm, enum i915_cache_level level, u32 flags) { - struct drm_i915_private *dev_priv = vm->i915; + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); gen6_pte_t __iomem *pte = - (gen6_pte_t __iomem *)dev_priv->ggtt.gsm + - (offset >> PAGE_SHIFT); + (gen6_pte_t __iomem *)ggtt->gsm + (offset >> PAGE_SHIFT); iowrite32(vm->pte_encode(addr, level, flags), pte); - I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); - POSTING_READ(GFX_FLSH_CNTL_GEN6); + ggtt->invalidate(vm->i915); } /* @@ -2473,7 +2481,6 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm, uint64_t start, enum i915_cache_level level, u32 flags) { - struct drm_i915_private *dev_priv = vm->i915; struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); struct sgt_iter sgt_iter; gen6_pte_t __iomem *gtt_entries; @@ -2501,8 +2508,7 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm, * want to flush the TLBs only after we're certain all the PTE updates * have finished. */ - I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); - POSTING_READ(GFX_FLSH_CNTL_GEN6); + ggtt->invalidate(vm->i915); } static void nop_clear_range(struct i915_address_space *vm, @@ -3062,6 +3068,8 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) if (IS_CHERRYVIEW(dev_priv)) ggtt->base.insert_entries = gen8_ggtt_insert_entries__BKL; + ggtt->invalidate = gen6_ggtt_invalidate; + return ggtt_probe_common(ggtt, size); } @@ -3099,6 +3107,8 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt) ggtt->base.unbind_vma = ggtt_unbind_vma; ggtt->base.cleanup = gen6_gmch_remove; + ggtt->invalidate = gen6_ggtt_invalidate; + if (HAS_EDRAM(dev_priv)) ggtt->base.pte_encode = iris_pte_encode; else if (IS_HASWELL(dev_priv)) @@ -3142,6 +3152,8 @@ static int i915_gmch_probe(struct i915_ggtt *ggtt) ggtt->base.unbind_vma = ggtt_unbind_vma; ggtt->base.cleanup = i915_gmch_remove; + ggtt->invalidate = gmch_ggtt_invalidate; + if (unlikely(ggtt->do_idle_maps)) DRM_INFO("applying Ironlake quirks for intel_iommu\n"); @@ -3260,6 +3272,16 @@ int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv) return 0; } +void i915_ggtt_enable_guc(struct drm_i915_private *i915) +{ + i915->ggtt.invalidate = guc_ggtt_invalidate; +} + +void i915_ggtt_disable_guc(struct drm_i915_private *i915) +{ + i915->ggtt.invalidate = gen6_ggtt_invalidate; +} + void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv) { struct i915_ggtt *ggtt = &dev_priv->ggtt; @@ -3323,7 +3345,7 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv) } } - i915_ggtt_flush(dev_priv); + i915_ggtt_invalidate(dev_priv); } struct i915_vma * diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 3e031a057f78..6c40088f8cf4 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -336,6 +336,7 @@ struct i915_ggtt { /** "Graphics Stolen Memory" holds the global PTEs */ void __iomem *gsm; + void (*invalidate)(struct drm_i915_private *dev_priv); bool do_idle_maps; @@ -504,6 +505,8 @@ i915_vm_to_ggtt(struct i915_address_space *vm) int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv); int i915_ggtt_init_hw(struct drm_i915_private *dev_priv); int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv); +void i915_ggtt_enable_guc(struct drm_i915_private *i915); +void i915_ggtt_disable_guc(struct drm_i915_private *i915); int i915_gem_init_ggtt(struct drm_i915_private *dev_priv); void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 710fbb9fc63f..913d87358972 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -579,9 +579,6 @@ static struct i915_vma *guc_allocate_vma(struct intel_guc *guc, u32 size) goto err; } - /* Invalidate GuC TLB to let GuC take the latest updates to GTT. */ - I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE); - return vma; err: diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index 5a6ab8728d48..b8891914287e 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -367,9 +367,6 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv) return PTR_ERR(vma); } - /* Invalidate GuC TLB to let GuC take the latest updates to GTT. */ - I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE); - intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); /* init WOPCM */ @@ -487,6 +484,9 @@ int intel_guc_setup(struct drm_i915_private *dev_priv) guc_interrupts_release(dev_priv); gen9_reset_guc_interrupts(dev_priv); + /* We need to notify the guc whenever we change the GGTT */ + i915_ggtt_enable_guc(dev_priv); + guc_fw->guc_fw_load_status = GUC_FIRMWARE_PENDING; DRM_DEBUG_DRIVER("GuC fw status: fetch %s, load %s\n", @@ -548,6 +548,7 @@ fail: guc_interrupts_release(dev_priv); i915_guc_submission_disable(dev_priv); i915_guc_submission_fini(dev_priv); + i915_ggtt_disable_guc(dev_priv); /* * We've failed to load the firmware :( diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 8acab875fcfc..8f8dcd9a9524 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -811,12 +811,6 @@ static int execlists_context_pin(struct intel_engine_cs *engine, ce->state->obj->mm.dirty = true; - /* Invalidate GuC TLB. */ - if (i915.enable_guc_submission) { - struct drm_i915_private *dev_priv = ctx->i915; - I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE); - } - i915_gem_context_get(ctx); return 0; -- cgit v1.2.3-55-g7522 From fcd46e53449c4d659ffbedcd2823ea2f73e39927 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 12 Jan 2017 13:04:31 +0000 Subject: drm/i915: Declare i915_gem_object_create_internal() as taking phys_addr_t size The internal object is a collection of struct pages and so is intrinsically linked to the available physical memory on the machine, and not an arbitrary type from the uabi. Use phys_addr_t so the link between size and memory consumption is clear, and then double check that we don't overflow the maximum object size. v2: Also assert that size is not zero - a mistake I made a few times while writing selftests. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170112130431.1844-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_gem_internal.c | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e9b4ece689d0..234c5890134f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3505,7 +3505,7 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv /* i915_gem_internal.c */ struct drm_i915_gem_object * i915_gem_object_create_internal(struct drm_i915_private *dev_priv, - unsigned int size); + phys_addr_t size); /* i915_gem_shrinker.c */ unsigned long i915_gem_shrink(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c index 2222863e505f..9b39472396ef 100644 --- a/drivers/gpu/drm/i915/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/i915_gem_internal.c @@ -151,10 +151,15 @@ static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = { */ struct drm_i915_gem_object * i915_gem_object_create_internal(struct drm_i915_private *i915, - unsigned int size) + phys_addr_t size) { struct drm_i915_gem_object *obj; + GEM_BUG_ON(!size); + + if (overflows_type(size, obj->base.size)) + return ERR_PTR(-E2BIG); + obj = i915_gem_object_alloc(i915); if (!obj) return ERR_PTR(-ENOMEM); -- cgit v1.2.3-55-g7522 From 21976853fcb0bb8a507264a0cf2f44db29ecca82 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 12 Jan 2017 11:21:08 +0000 Subject: drm/i915: Expand ggtt_view parameters for debugfs When dumping the VMA, include the parameters of the different GGTT views so that we can distinguish them. v2: Contract output and add MISSING_CASE for any unknown types. v3: Show both stride and offset for rotated planes. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170112112108.31632-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 9d7b5a8c8dea..e367f06f5883 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -159,8 +159,35 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) seq_printf(m, " (%sgtt offset: %08llx, size: %08llx", i915_vma_is_ggtt(vma) ? "g" : "pp", vma->node.start, vma->node.size); - if (i915_vma_is_ggtt(vma)) - seq_printf(m, ", type: %u", vma->ggtt_view.type); + if (i915_vma_is_ggtt(vma)) { + switch (vma->ggtt_view.type) { + case I915_GGTT_VIEW_NORMAL: + seq_puts(m, ", normal"); + break; + + case I915_GGTT_VIEW_PARTIAL: + seq_printf(m, ", partial [%08llx+%x]", + vma->ggtt_view.params.partial.offset << PAGE_SHIFT, + vma->ggtt_view.params.partial.size << PAGE_SHIFT); + break; + + case I915_GGTT_VIEW_ROTATED: + seq_printf(m, ", rotated [(%ux%u, stride=%u, offset=%u), (%ux%u, stride=%u, offset=%u)]", + vma->ggtt_view.params.rotated.plane[0].width, + vma->ggtt_view.params.rotated.plane[0].height, + vma->ggtt_view.params.rotated.plane[0].stride, + vma->ggtt_view.params.rotated.plane[0].offset, + vma->ggtt_view.params.rotated.plane[1].width, + vma->ggtt_view.params.rotated.plane[1].height, + vma->ggtt_view.params.rotated.plane[1].stride, + vma->ggtt_view.params.rotated.plane[1].offset); + break; + + default: + MISSING_CASE(vma->ggtt_view.type); + break; + } + } if (vma->fence) seq_printf(m, " , fence: %d%s", vma->fence->id, -- cgit v1.2.3-55-g7522 From a4dbf7cf17a537c0788819bfe575b2067f0310bc Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 12 Jan 2017 16:45:59 +0000 Subject: drm/i915: Fix up kerneldoc parameters for i915_gem_gtt_*() Parameter: good. Parameter - bad. One day I'll learn the syntax. Fixes: 625d988acc28 ("drm/i915: Extract reserving space in the GTT to a helper") Fixes: e007b19d7ba7 ("drm/i915: Use the MRU stack search after evicting") Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170112164559.27232-1-chris@chris-wilson.co.uk Reviewed-by: Matthew Auld --- drivers/gpu/drm/i915/i915_gem_gtt.c | 44 ++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index ed120a1e7f93..6d2ff20ec973 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3579,16 +3579,16 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma) /** * i915_gem_gtt_reserve - reserve a node in an address_space (GTT) - * @vm - the &struct i915_address_space - * @node - the &struct drm_mm_node (typically i915_vma.mode) - * @size - how much space to allocate inside the GTT, - * must be #I915_GTT_PAGE_SIZE aligned - * @offset - where to insert inside the GTT, - * must be #I915_GTT_MIN_ALIGNMENT aligned, and the node - * (@offset + @size) must fit within the address space - * @color - color to apply to node, if this node is not from a VMA, - * color must be #I915_COLOR_UNEVICTABLE - * @flags - control search and eviction behaviour + * @vm: the &struct i915_address_space + * @node: the &struct drm_mm_node (typically i915_vma.mode) + * @size: how much space to allocate inside the GTT, + * must be #I915_GTT_PAGE_SIZE aligned + * @offset: where to insert inside the GTT, + * must be #I915_GTT_MIN_ALIGNMENT aligned, and the node + * (@offset + @size) must fit within the address space + * @color: color to apply to node, if this node is not from a VMA, + * color must be #I915_COLOR_UNEVICTABLE + * @flags: control search and eviction behaviour * * i915_gem_gtt_reserve() tries to insert the @node at the exact @offset inside * the address space (using @size and @color). If the @node does not fit, it @@ -3656,19 +3656,19 @@ static u64 random_offset(u64 start, u64 end, u64 len, u64 align) /** * i915_gem_gtt_insert - insert a node into an address_space (GTT) - * @vm - the &struct i915_address_space - * @node - the &struct drm_mm_node (typically i915_vma.node) - * @size - how much space to allocate inside the GTT, + * @vm: the &struct i915_address_space + * @node: the &struct drm_mm_node (typically i915_vma.node) + * @size: how much space to allocate inside the GTT, + * must be #I915_GTT_PAGE_SIZE aligned + * @alignment: required alignment of starting offset, may be 0 but + * if specified, this must be a power-of-two and at least + * #I915_GTT_MIN_ALIGNMENT + * @color: color to apply to node + * @start: start of any range restriction inside GTT (0 for all), * must be #I915_GTT_PAGE_SIZE aligned - * @alignment - required alignment of starting offset, may be 0 but - * if specified, this must be a power-of-two and at least - * #I915_GTT_MIN_ALIGNMENT - * @color - color to apply to node - * @start - start of any range restriction inside GTT (0 for all), - * must be #I915_GTT_PAGE_SIZE aligned - * @end - end of any range restriction inside GTT (U64_MAX for all), - * must be #I915_GTT_PAGE_SIZE aligned if not U64_MAX - * @flags - control search and eviction behaviour + * @end: end of any range restriction inside GTT (U64_MAX for all), + * must be #I915_GTT_PAGE_SIZE aligned if not U64_MAX + * @flags: control search and eviction behaviour * * i915_gem_gtt_insert() first searches for an available hole into which * is can insert the node. The hole address is aligned to @alignment and -- cgit v1.2.3-55-g7522 From 3fcb0ca1d8dbfdc4759f5816e39443cfe0f298f7 Mon Sep 17 00:00:00 2001 From: Nagaraju, Vathsala Date: Thu, 12 Jan 2017 23:30:59 +0530 Subject: drm/i915/psr: fix blank screen issue for psr2 Psr1 and psr2 are mutually exclusive,ie when psr2 is enabled, psr1 should be disabled.When psr2 is exited , bit 31 of reg PSR2_CTL must be set to 0 but currently bit 31 of SRD_CTL (psr1 control register)is set to 0. Also ,PSR2_IDLE state is looked up from SRD_STATUS(psr1 register) instead of PSR2_STATUS register, which has wrong data, resulting in blankscreen. hsw_enable_source is split into hsw_enable_source_psr1 and hsw_enable_source_psr2 for easier code review and maintenance, as suggested by rodrigo and jim. v2: (Rodrigo) - Rename hsw_enable_source_psr* to intel_enable_source_psr* v3: (Rodrigo) - In hsw_psr_disable , 1) for psr active case, handle psr2 followed by psr1. 2) psr inactive case, handle psr2 followed by psr1 v4:(Rodrigo) - move psr2 restriction(32X20) to match_conditions function returning false and fully blocking PSR to a new patch before this one. v5: in source_psr2, removed val = EDP_PSR_ENABLE Cc: Rodrigo Vivi Cc: Jim Bride Signed-off-by: Vathsala Nagaraju Signed-off-by: Patil Deepti Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: http://patchwork.freedesktop.org/patch/msgid/1484244059-9201-1-git-send-email-vathsala.nagaraju@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 3 + drivers/gpu/drm/i915/intel_psr.c | 122 +++++++++++++++++++++++++++++---------- 2 files changed, 95 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 00970aa77afa..7830e6e5fc68 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3615,6 +3615,9 @@ enum { #define EDP_PSR2_FRAME_BEFORE_SU_MASK (0xf<<4) #define EDP_PSR2_IDLE_MASK 0xf +#define EDP_PSR2_STATUS_CTL _MMIO(0x6f940) +#define EDP_PSR2_STATUS_STATE_MASK (0xf<<28) + /* VGA port control */ #define ADPA _MMIO(0x61100) #define PCH_ADPA _MMIO(0xe1100) diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index 707cae8dc980..882764779520 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -261,7 +261,7 @@ static void vlv_psr_activate(struct intel_dp *intel_dp) VLV_EDP_PSR_ACTIVE_ENTRY); } -static void hsw_psr_enable_source(struct intel_dp *intel_dp) +static void intel_enable_source_psr1(struct intel_dp *intel_dp) { struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); struct drm_device *dev = dig_port->base.base.dev; @@ -312,14 +312,29 @@ static void hsw_psr_enable_source(struct intel_dp *intel_dp) val |= EDP_PSR_TP1_TP2_SEL; I915_WRITE(EDP_PSR_CTL, val); +} - if (!dev_priv->psr.psr2_support) - return; +static void intel_enable_source_psr2(struct intel_dp *intel_dp) +{ + struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); + struct drm_device *dev = dig_port->base.base.dev; + struct drm_i915_private *dev_priv = to_i915(dev); + /* + * Let's respect VBT in case VBT asks a higher idle_frame value. + * Let's use 6 as the minimum to cover all known cases including + * the off-by-one issue that HW has in some cases. Also there are + * cases where sink should be able to train + * with the 5 or 6 idle patterns. + */ + uint32_t idle_frames = max(6, dev_priv->vbt.psr.idle_frames); + uint32_t val; + + val = idle_frames << EDP_PSR_IDLE_FRAME_SHIFT; /* FIXME: selective update is probably totally broken because it doesn't * mesh at all with our frontbuffer tracking. And the hw alone isn't * good enough. */ - val = EDP_PSR2_ENABLE | EDP_SU_TRACK_ENABLE; + val |= EDP_PSR2_ENABLE | EDP_SU_TRACK_ENABLE; if (dev_priv->vbt.psr.tp2_tp3_wakeup_time > 5) val |= EDP_PSR2_TP2_TIME_2500; @@ -333,6 +348,19 @@ static void hsw_psr_enable_source(struct intel_dp *intel_dp) I915_WRITE(EDP_PSR2_CTL, val); } +static void hsw_psr_enable_source(struct intel_dp *intel_dp) +{ + struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); + struct drm_device *dev = dig_port->base.base.dev; + struct drm_i915_private *dev_priv = to_i915(dev); + + /* psr1 and psr2 are mutually exclusive.*/ + if (dev_priv->psr.psr2_support) + intel_enable_source_psr2(intel_dp); + else + intel_enable_source_psr1(intel_dp); +} + static bool intel_psr_match_conditions(struct intel_dp *intel_dp) { struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); @@ -417,7 +445,10 @@ static void intel_psr_activate(struct intel_dp *intel_dp) struct drm_device *dev = intel_dig_port->base.base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - WARN_ON(I915_READ(EDP_PSR_CTL) & EDP_PSR_ENABLE); + if (dev_priv->psr.psr2_support) + WARN_ON(I915_READ(EDP_PSR2_CTL) & EDP_PSR2_ENABLE); + else + WARN_ON(I915_READ(EDP_PSR_CTL) & EDP_PSR_ENABLE); WARN_ON(dev_priv->psr.active); lockdep_assert_held(&dev_priv->psr.lock); @@ -468,10 +499,11 @@ void intel_psr_enable(struct intel_dp *intel_dp) dev_priv->psr.busy_frontbuffer_bits = 0; if (HAS_DDI(dev_priv)) { - hsw_psr_setup_vsc(intel_dp); - if (dev_priv->psr.psr2_support) { skl_psr_setup_su_vsc(intel_dp); + } else { + /* set up vsc header for psr1 */ + hsw_psr_setup_vsc(intel_dp); } /* @@ -558,20 +590,35 @@ static void hsw_psr_disable(struct intel_dp *intel_dp) struct drm_i915_private *dev_priv = to_i915(dev); if (dev_priv->psr.active) { - I915_WRITE(EDP_PSR_CTL, - I915_READ(EDP_PSR_CTL) & ~EDP_PSR_ENABLE); - - /* Wait till PSR is idle */ - if (intel_wait_for_register(dev_priv, - EDP_PSR_STATUS_CTL, - EDP_PSR_STATUS_STATE_MASK, - 0, - 2000)) + if (dev_priv->psr.psr2_support) { + I915_WRITE(EDP_PSR2_CTL, + I915_READ(EDP_PSR2_CTL) & + ~(EDP_PSR2_ENABLE | + EDP_SU_TRACK_ENABLE)); + /* Wait till PSR2 is idle */ + if (intel_wait_for_register(dev_priv, + EDP_PSR2_STATUS_CTL, + EDP_PSR2_STATUS_STATE_MASK, + 0, + 2000)) + DRM_ERROR("Timed out waiting for PSR2 Idle State\n"); + } else { + I915_WRITE(EDP_PSR_CTL, + I915_READ(EDP_PSR_CTL) & ~EDP_PSR_ENABLE); + /* Wait till PSR1 is idle */ + if (intel_wait_for_register(dev_priv, + EDP_PSR_STATUS_CTL, + EDP_PSR_STATUS_STATE_MASK, + 0, + 2000)) DRM_ERROR("Timed out waiting for PSR Idle State\n"); - + } dev_priv->psr.active = false; } else { - WARN_ON(I915_READ(EDP_PSR_CTL) & EDP_PSR_ENABLE); + if (dev_priv->psr.psr2_support) + WARN_ON(I915_READ(EDP_PSR2_CTL) & EDP_PSR2_ENABLE); + else + WARN_ON(I915_READ(EDP_PSR_CTL) & EDP_PSR_ENABLE); } } @@ -622,13 +669,24 @@ static void intel_psr_work(struct work_struct *work) * and be ready for re-enable. */ if (HAS_DDI(dev_priv)) { - if (intel_wait_for_register(dev_priv, - EDP_PSR_STATUS_CTL, - EDP_PSR_STATUS_STATE_MASK, - 0, - 50)) { - DRM_ERROR("Timed out waiting for PSR Idle for re-enable\n"); - return; + if (dev_priv->psr.psr2_support) { + if (intel_wait_for_register(dev_priv, + EDP_PSR2_STATUS_CTL, + EDP_PSR2_STATUS_STATE_MASK, + 0, + 50)) { + DRM_ERROR("Timed out waiting for PSR2 Idle for re-enable\n"); + return; + } + } else { + if (intel_wait_for_register(dev_priv, + EDP_PSR_STATUS_CTL, + EDP_PSR_STATUS_STATE_MASK, + 0, + 50)) { + DRM_ERROR("Timed out waiting for PSR Idle for re-enable\n"); + return; + } } } else { if (intel_wait_for_register(dev_priv, @@ -670,11 +728,15 @@ static void intel_psr_exit(struct drm_i915_private *dev_priv) return; if (HAS_DDI(dev_priv)) { - val = I915_READ(EDP_PSR_CTL); - - WARN_ON(!(val & EDP_PSR_ENABLE)); - - I915_WRITE(EDP_PSR_CTL, val & ~EDP_PSR_ENABLE); + if (dev_priv->psr.psr2_support) { + val = I915_READ(EDP_PSR2_CTL); + WARN_ON(!(val & EDP_PSR2_ENABLE)); + I915_WRITE(EDP_PSR2_CTL, val & ~EDP_PSR2_ENABLE); + } else { + val = I915_READ(EDP_PSR_CTL); + WARN_ON(!(val & EDP_PSR_ENABLE)); + I915_WRITE(EDP_PSR_CTL, val & ~EDP_PSR_ENABLE); + } } else { val = I915_READ(VLV_PSRCTL(pipe)); -- cgit v1.2.3-55-g7522 From f40c484b78a3f9d4e469a11e7c6047ea6cb6f3b6 Mon Sep 17 00:00:00 2001 From: Nagaraju, Vathsala Date: Wed, 11 Jan 2017 20:44:33 +0530 Subject: drm/i915/psr: disable aux_frame_sync on psr2 exit Screen freeze observed if AUX_FRAME_SYNC is not disabled on psr2 exit.AUX_FRAME_SYNC needed for psr2 is enabled during psr2 entry. It must be disabled on psr2 exit. v2: rebase Cc: Rodrigo Vivi Cc: Jim Bride Signed-off-by: Vathsala Nagaraju Signed-off-by: Patil Deepti Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: http://patchwork.freedesktop.org/patch/msgid/1484147673-2044-1-git-send-email-vathsala.nagaraju@intel.com --- drivers/gpu/drm/i915/intel_psr.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index 882764779520..d57ec780959a 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -590,6 +590,11 @@ static void hsw_psr_disable(struct intel_dp *intel_dp) struct drm_i915_private *dev_priv = to_i915(dev); if (dev_priv->psr.active) { + if (dev_priv->psr.aux_frame_sync) + drm_dp_dpcd_writeb(&intel_dp->aux, + DP_SINK_DEVICE_AUX_FRAME_SYNC_CONF, + 0); + if (dev_priv->psr.psr2_support) { I915_WRITE(EDP_PSR2_CTL, I915_READ(EDP_PSR2_CTL) & @@ -728,6 +733,10 @@ static void intel_psr_exit(struct drm_i915_private *dev_priv) return; if (HAS_DDI(dev_priv)) { + if (dev_priv->psr.aux_frame_sync) + drm_dp_dpcd_writeb(&intel_dp->aux, + DP_SINK_DEVICE_AUX_FRAME_SYNC_CONF, + 0); if (dev_priv->psr.psr2_support) { val = I915_READ(EDP_PSR2_CTL); WARN_ON(!(val & EDP_PSR2_ENABLE)); -- cgit v1.2.3-55-g7522 From 340c93c0a32acc916bb7655926398b8591e0a30b Mon Sep 17 00:00:00 2001 From: Nagaraju, Vathsala Date: Mon, 2 Jan 2017 17:00:58 +0530 Subject: drm/i915/psr: enable ALPM for psr2 As per edp1.4 spec , alpm is required for psr2 operation as it's used for all psr2 main link power down management and alpm enable bit must be set for psr2 operation. Cc: Rodrigo Vivi Cc: Jim Bride Signed-off-by: vathsala nagaraju Signed-off-by: Patil Deepti Reviewed-by: Jim Bride Signed-off-by: Rodrigo Vivi Link: http://patchwork.freedesktop.org/patch/msgid/1483356663-32668-6-git-send-email-vathsala.nagaraju@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/intel_dp.c | 10 ++++++++++ drivers/gpu/drm/i915/intel_psr.c | 6 +++++- 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 234c5890134f..1cd485c314e6 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1156,6 +1156,7 @@ struct i915_psr { bool link_standby; bool y_cord_support; bool colorimetry_support; + bool alpm; }; enum intel_pch { diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 343e1d9fa761..4f33115f5ca3 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -3060,6 +3060,14 @@ static bool intel_dp_get_colorimetry_status(struct intel_dp *intel_dp) return dprx & DP_VSC_SDP_EXT_FOR_COLORIMETRY_SUPPORTED; } +bool intel_dp_get_alpm_status(struct intel_dp *intel_dp) +{ + uint8_t alpm_caps = 0; + + drm_dp_dpcd_readb(&intel_dp->aux, DP_RECEIVER_ALPM_CAP, &alpm_caps); + return alpm_caps & DP_ALPM_CAP; +} + /* These are source-specific values. */ uint8_t intel_dp_voltage_max(struct intel_dp *intel_dp) @@ -3644,6 +3652,8 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp) intel_dp_get_y_cord_status(intel_dp); dev_priv->psr.colorimetry_support = intel_dp_get_colorimetry_status(intel_dp); + dev_priv->psr.alpm = + intel_dp_get_alpm_status(intel_dp); } } diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index d57ec780959a..36c404511883 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -209,7 +209,11 @@ static void hsw_psr_enable_sink(struct intel_dp *intel_dp) drm_dp_dpcd_writeb(&intel_dp->aux, DP_SINK_DEVICE_AUX_FRAME_SYNC_CONF, DP_AUX_FRAME_SYNC_ENABLE); - + /* Enable ALPM at sink for psr2 */ + if (dev_priv->psr.psr2_support && dev_priv->psr.alpm) + drm_dp_dpcd_writeb(&intel_dp->aux, + DP_RECEIVER_ALPM_CONFIG, + DP_ALPM_ENABLE); if (dev_priv->psr.link_standby) drm_dp_dpcd_writeb(&intel_dp->aux, DP_PSR_EN_CFG, DP_PSR_ENABLE | DP_PSR_MAIN_LINK_ACTIVE); -- cgit v1.2.3-55-g7522 From 0c7eeda1af05693fbecd021eeab37477aa49c162 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 11 Jan 2017 21:09:25 +0000 Subject: drm/i915: Move i915_ppgtt_close() into i915_gem_gtt.c Move it alongside its ppgtt counterparts, in order to make it available for the ppgtt selftests. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170111210937.29252-26-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_context.c | 21 --------------------- drivers/gpu/drm/i915/i915_gem_gtt.c | 21 +++++++++++++++++++++ drivers/gpu/drm/i915/i915_gem_gtt.h | 1 + 3 files changed, 22 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index ed31133b3ce3..ae99c25397ca 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -205,27 +205,6 @@ alloc_context_obj(struct drm_i915_private *dev_priv, u64 size) return obj; } -static void i915_ppgtt_close(struct i915_address_space *vm) -{ - struct list_head *phases[] = { - &vm->active_list, - &vm->inactive_list, - &vm->unbound_list, - NULL, - }, **phase; - - GEM_BUG_ON(vm->closed); - vm->closed = true; - - for (phase = phases; *phase; phase++) { - struct i915_vma *vma, *vn; - - list_for_each_entry_safe(vma, vn, *phase, vm_link) - if (!i915_vma_is_closed(vma)) - i915_vma_close(vma); - } -} - static void context_close(struct i915_gem_context *ctx) { i915_gem_context_set_closed(ctx); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 6d2ff20ec973..7d21cdfc6b0e 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2267,6 +2267,27 @@ i915_ppgtt_create(struct drm_i915_private *dev_priv, return ppgtt; } +void i915_ppgtt_close(struct i915_address_space *vm) +{ + struct list_head *phases[] = { + &vm->active_list, + &vm->inactive_list, + &vm->unbound_list, + NULL, + }, **phase; + + GEM_BUG_ON(vm->closed); + vm->closed = true; + + for (phase = phases; *phase; phase++) { + struct i915_vma *vma, *vn; + + list_for_each_entry_safe(vma, vn, *phase, vm_link) + if (!i915_vma_is_closed(vma)) + i915_vma_close(vma); + } +} + void i915_ppgtt_release(struct kref *kref) { struct i915_hw_ppgtt *ppgtt = diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 6c40088f8cf4..9f04c9febe4d 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -515,6 +515,7 @@ void i915_ppgtt_release(struct kref *kref); struct i915_hw_ppgtt *i915_ppgtt_create(struct drm_i915_private *dev_priv, struct drm_i915_file_private *fpriv, const char *name); +void i915_ppgtt_close(struct i915_address_space *vm); static inline void i915_ppgtt_get(struct i915_hw_ppgtt *ppgtt) { if (ppgtt) -- cgit v1.2.3-55-g7522 From 0325701a34f96ecc2502228d07f046d479b8d328 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 11 Jan 2017 21:09:26 +0000 Subject: drm/i915: Assert that we have allocated the drm_mm_node upon pinning We currently check after the slow path that the vma is bound correctly, but we don't currently check after the fast path. This is important in case we accidentally take the fast path and leave the vma misplaced. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170111210937.29252-27-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_vma.c | 1 + drivers/gpu/drm/i915/i915_vma.h | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index b74eeb73ae41..379364b8fef9 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -476,6 +476,7 @@ int __i915_vma_do_pin(struct i915_vma *vma, if ((bound ^ vma->flags) & I915_VMA_GLOBAL_BIND) __i915_vma_set_map_and_fenceable(vma); + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags)); return 0; diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index a969bbb65871..008cf115f38f 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -232,8 +232,11 @@ i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) /* Pin early to prevent the shrinker/eviction logic from destroying * our vma as we insert and bind. */ - if (likely(((++vma->flags ^ flags) & I915_VMA_BIND_MASK) == 0)) + if (likely(((++vma->flags ^ flags) & I915_VMA_BIND_MASK) == 0)) { + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags)); return 0; + } return __i915_vma_do_pin(vma, size, alignment, flags); } -- cgit v1.2.3-55-g7522 From d86f0482cd03c70482f2061bc320520b4e8de5ce Mon Sep 17 00:00:00 2001 From: Nagaraju, Vathsala Date: Fri, 13 Jan 2017 00:31:31 +0530 Subject: drm/i915/psr: set CHICKEN_TRANS for psr2 As per bpsec, CHICKEN_TRANS_EDP bit 12 ,15 must be programmed in psr2 enable sequence. bit 12 : Program Transcoder EDP VSC DIP header with a valid setting for PSR2 and Set CHICKEN_TRANS_EDP(0x420cc) bit 12 for programmable header packet. bit 15 : Set CHICKEN_TRANS_EDP(0x420cc) bit 15 if Y coordinate is supported v2: (Rodrigo) - move CHICKEN_TRANS_EDP bit set logic right after setup_vsc v3:(Rodrigo) - initialize chicken_trans to CHICKEN_TRANS_BIT12 instead of 0 v4:(chris wilson) - use BIT(12), remove CHICKEN_TRANS_BIT12 - remove unnecessary comments - update commit message v5: - rename bit 12 PSR2_VSC_ENABLE_PROG_HEADER - rename bit 15 PSR2_ADD_VERTICAL_LINE_COUNT v6:(Rodrigo) - remove TRANS_EDP=3, use cpu_transcoder Cc: Rodrigo Vivi Cc: Jim Bride Signed-off-by: vathsala nagaraju Signed-off-by: Patil Deepti Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: http://patchwork.freedesktop.org/patch/msgid/1484247691-20930-1-git-send-email-vathsala.nagaraju@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 6 ++++++ drivers/gpu/drm/i915/intel_psr.c | 7 +++++++ 2 files changed, 13 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 7830e6e5fc68..c9c1ccd4bc77 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -6449,6 +6449,12 @@ enum { #define BDW_DPRS_MASK_VBLANK_SRD (1 << 0) #define CHICKEN_PIPESL_1(pipe) _MMIO_PIPE(pipe, _CHICKEN_PIPESL_1_A, _CHICKEN_PIPESL_1_B) +#define CHICKEN_TRANS_A 0x420c0 +#define CHICKEN_TRANS_B 0x420c4 +#define CHICKEN_TRANS(trans) _MMIO_TRANS(trans, CHICKEN_TRANS_A, CHICKEN_TRANS_B) +#define PSR2_VSC_ENABLE_PROG_HEADER (1<<12) +#define PSR2_ADD_VERTICAL_LINE_COUNT (1<<15) + #define DISP_ARB_CTL _MMIO(0x45000) #define DISP_FBC_MEMORY_WAKE (1<<31) #define DISP_TILE_SURFACE_SWIZZLING (1<<13) diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index 36c404511883..935402e0f786 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -480,6 +480,9 @@ void intel_psr_enable(struct intel_dp *intel_dp) struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); struct drm_device *dev = intel_dig_port->base.base.dev; struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_crtc *crtc = to_intel_crtc(intel_dig_port->base.base.crtc); + enum transcoder cpu_transcoder = crtc->config->cpu_transcoder; + u32 chicken; if (!HAS_PSR(dev_priv)) { DRM_DEBUG_KMS("PSR not supported on this platform\n"); @@ -505,6 +508,10 @@ void intel_psr_enable(struct intel_dp *intel_dp) if (HAS_DDI(dev_priv)) { if (dev_priv->psr.psr2_support) { skl_psr_setup_su_vsc(intel_dp); + chicken = PSR2_VSC_ENABLE_PROG_HEADER; + if (dev_priv->psr.y_cord_support) + chicken |= PSR2_ADD_VERTICAL_LINE_COUNT; + I915_WRITE(CHICKEN_TRANS(cpu_transcoder), chicken); } else { /* set up vsc header for psr1 */ hsw_psr_setup_vsc(intel_dp); -- cgit v1.2.3-55-g7522 From 6433226b0f51cdd9f08e23fa8b0c376088c33357 Mon Sep 17 00:00:00 2001 From: Nagaraju, Vathsala Date: Fri, 13 Jan 2017 06:01:24 +0530 Subject: drm/i915/psr: set PSR_MASK bits for deep sleep Program EDP_PSR_DEBUG_CTL (PSR_MASK) to enable system to go to deep sleep while in psr2.PSR2_STATUS bit 31:28 should report value 8 , if system enters deep sleep state. Also, EDP_FRAMES_BEFORE_SU_ENTRY is set 1 , if not set, flickering is observed on psr2 panel. v2: (Ilia Mirkin) - Remove duplicate bit definition 25:27 v3: rebase v4: rebase v5: rebase Cc: Rodrigo Vivi Cc: Jim Bride Signed-off-by: Vathsala Nagaraju Signed-off-by: Patil Deepti Reviewed-by: Jim Bride Signed-off-by: Rodrigo Vivi Link: http://patchwork.freedesktop.org/patch/msgid/1484267484-21843-1-git-send-email-vathsala.nagaraju@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 10 +++++++--- drivers/gpu/drm/i915/intel_psr.c | 30 ++++++++++++++++++++---------- 2 files changed, 27 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index c9c1ccd4bc77..ca76887120ee 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3597,9 +3597,12 @@ enum { #define EDP_PSR_PERF_CNT_MASK 0xffffff #define EDP_PSR_DEBUG_CTL _MMIO(dev_priv->psr_mmio_base + 0x60) -#define EDP_PSR_DEBUG_MASK_LPSP (1<<27) -#define EDP_PSR_DEBUG_MASK_MEMUP (1<<26) -#define EDP_PSR_DEBUG_MASK_HPD (1<<25) +#define EDP_PSR_DEBUG_MASK_MAX_SLEEP (1<<28) +#define EDP_PSR_DEBUG_MASK_LPSP (1<<27) +#define EDP_PSR_DEBUG_MASK_MEMUP (1<<26) +#define EDP_PSR_DEBUG_MASK_HPD (1<<25) +#define EDP_PSR_DEBUG_MASK_DISP_REG_WRITE (1<<16) +#define EDP_PSR_DEBUG_EXIT_ON_PIXEL_UNDERRUN (1<<15) #define EDP_PSR2_CTL _MMIO(0x6f900) #define EDP_PSR2_ENABLE (1<<31) @@ -3614,6 +3617,7 @@ enum { #define EDP_PSR2_FRAME_BEFORE_SU_SHIFT 4 #define EDP_PSR2_FRAME_BEFORE_SU_MASK (0xf<<4) #define EDP_PSR2_IDLE_MASK 0xf +#define EDP_FRAMES_BEFORE_SU_ENTRY (1<<4) #define EDP_PSR2_STATUS_CTL _MMIO(0x6f940) #define EDP_PSR2_STATUS_STATE_MASK (0xf<<28) diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index 935402e0f786..3611c42bff7f 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -338,7 +338,9 @@ static void intel_enable_source_psr2(struct intel_dp *intel_dp) /* FIXME: selective update is probably totally broken because it doesn't * mesh at all with our frontbuffer tracking. And the hw alone isn't * good enough. */ - val |= EDP_PSR2_ENABLE | EDP_SU_TRACK_ENABLE; + val |= EDP_PSR2_ENABLE | + EDP_SU_TRACK_ENABLE | + EDP_FRAMES_BEFORE_SU_ENTRY; if (dev_priv->vbt.psr.tp2_tp3_wakeup_time > 5) val |= EDP_PSR2_TP2_TIME_2500; @@ -512,20 +514,28 @@ void intel_psr_enable(struct intel_dp *intel_dp) if (dev_priv->psr.y_cord_support) chicken |= PSR2_ADD_VERTICAL_LINE_COUNT; I915_WRITE(CHICKEN_TRANS(cpu_transcoder), chicken); + I915_WRITE(EDP_PSR_DEBUG_CTL, + EDP_PSR_DEBUG_MASK_MEMUP | + EDP_PSR_DEBUG_MASK_HPD | + EDP_PSR_DEBUG_MASK_LPSP | + EDP_PSR_DEBUG_MASK_MAX_SLEEP | + EDP_PSR_DEBUG_MASK_DISP_REG_WRITE); } else { /* set up vsc header for psr1 */ hsw_psr_setup_vsc(intel_dp); + /* + * Per Spec: Avoid continuous PSR exit by masking MEMUP + * and HPD. also mask LPSP to avoid dependency on other + * drivers that might block runtime_pm besides + * preventing other hw tracking issues now we can rely + * on frontbuffer tracking. + */ + I915_WRITE(EDP_PSR_DEBUG_CTL, + EDP_PSR_DEBUG_MASK_MEMUP | + EDP_PSR_DEBUG_MASK_HPD | + EDP_PSR_DEBUG_MASK_LPSP); } - /* - * Per Spec: Avoid continuous PSR exit by masking MEMUP and HPD. - * Also mask LPSP to avoid dependency on other drivers that - * might block runtime_pm besides preventing other hw tracking - * issues now we can rely on frontbuffer tracking. - */ - I915_WRITE(EDP_PSR_DEBUG_CTL, EDP_PSR_DEBUG_MASK_MEMUP | - EDP_PSR_DEBUG_MASK_HPD | EDP_PSR_DEBUG_MASK_LPSP); - /* Enable PSR on the panel */ hsw_psr_enable_sink(intel_dp); -- cgit v1.2.3-55-g7522 From 18b9bf3ee5a4fcce8b284a1bd4ddb593f1397daa Mon Sep 17 00:00:00 2001 From: Nagaraju, Vathsala Date: Thu, 12 Jan 2017 03:58:30 +0530 Subject: drm/i915/psr: enable psr2 for y cordinate panels Psr2 is enabled only for y cordinate panels.Once GTC (global time code) is implemented,this restriction is removed so that psr2 can work on panels without y cordinate support. v2: (Rodrigo) - Move the check to intel_psr_match_conditions v3: (Rodrigo) - add return false v4: rebase Cc: Rodrigo Vivi Cc: Jim Bride Signed-off-by: Vathsala Nagaraju Signed-off-by: Patil Deepti Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: http://patchwork.freedesktop.org/patch/msgid/1484173710-3138-1-git-send-email-vathsala.nagaraju@intel.com --- drivers/gpu/drm/i915/intel_psr.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index 3611c42bff7f..38419e57d2aa 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -441,6 +441,15 @@ static bool intel_psr_match_conditions(struct intel_dp *intel_dp) return false; } + /* + * FIXME:enable psr2 only for y-cordinate psr2 panels + * After gtc implementation , remove this restriction. + */ + if (!dev_priv->psr.y_cord_support && dev_priv->psr.psr2_support) { + DRM_DEBUG_KMS("PSR2 disabled, panel does not support Y coordinate\n"); + return false; + } + dev_priv->psr.source_ok = true; return true; } -- cgit v1.2.3-55-g7522 From 6ba1f9e1772f3ff9dc18e81fc01fa97c059b1d06 Mon Sep 17 00:00:00 2001 From: Nagaraju, Vathsala Date: Fri, 6 Jan 2017 22:02:32 +0530 Subject: drm/i915/psr: report live PSR2 State Reports live state of PSR2 form PSR2_STATUS register. bit field 31:28 gives the live state of PSR2. It can be used to check if system is in deep sleep, selective update or selective update standby. During video play back, we can use this to check if system is entering SU mode or not. when system is in idle state, DEEP_SLEEP(8) must be entered. When video playback is happening, system must be in SLEEP(3 / selective update) or SU_STANDBY( 6 / selective update standby) v2: (Rodrigo) - Remove EDP_PSR2_STATUS_TG_ON=a ,instead use ARRAY_SIZE Cc: Rodrigo Vivi Cc: Jim Bride Signed-off-by: Vathsala Nagaraju Signed-off-by: Patil Deepti Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: http://patchwork.freedesktop.org/patch/msgid/1483720352-24761-1-git-send-email-vathsala.nagaraju@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 24 ++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_reg.h | 1 + 2 files changed, 25 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index e367f06f5883..32ede342ab94 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2633,6 +2633,30 @@ static int i915_edp_psr_status(struct seq_file *m, void *data) seq_printf(m, "Performance_Counter: %u\n", psrperf); } + if (dev_priv->psr.psr2_support) { + static const char * const live_status[] = { + "IDLE", + "CAPTURE", + "CAPTURE_FS", + "SLEEP", + "BUFON_FW", + "ML_UP", + "SU_STANDBY", + "FAST_SLEEP", + "DEEP_SLEEP", + "BUF_ON", + "TG_ON" }; + u8 pos = (I915_READ(EDP_PSR2_STATUS_CTL) & + EDP_PSR2_STATUS_STATE_MASK) >> + EDP_PSR2_STATUS_STATE_SHIFT; + + seq_printf(m, "PSR2_STATUS_EDP: %x\n", + I915_READ(EDP_PSR2_STATUS_CTL)); + + if (pos < ARRAY_SIZE(live_status)) + seq_printf(m, "PSR2 live state %s\n", + live_status[pos]); + } mutex_unlock(&dev_priv->psr.lock); intel_runtime_pm_put(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index ca76887120ee..72f9f36ae5ce 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3621,6 +3621,7 @@ enum { #define EDP_PSR2_STATUS_CTL _MMIO(0x6f940) #define EDP_PSR2_STATUS_STATE_MASK (0xf<<28) +#define EDP_PSR2_STATUS_STATE_SHIFT 28 /* VGA port control */ #define ADPA _MMIO(0x61100) -- cgit v1.2.3-55-g7522 From 7ff19c560fb279211670e0f5a06c8b3b594ecc2d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 14 Jan 2017 00:28:21 +0000 Subject: drm/i915: Name the anonymous structs inside i915_ggtt_view Naming this pair will become useful shortly... Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170114002827.31315-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 9f04c9febe4d..80f0cd534db6 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -152,20 +152,22 @@ enum i915_ggtt_view_type { }; struct intel_rotation_info { - struct { + struct intel_rotation_plane_info { /* tiles */ unsigned int width, height, stride, offset; } plane[2]; }; +struct intel_partial_info { + u64 offset; + unsigned int size; +}; + struct i915_ggtt_view { enum i915_ggtt_view_type type; union { - struct { - u64 offset; - unsigned int size; - } partial; + struct intel_partial_info partial; struct intel_rotation_info rotated; } params; }; -- cgit v1.2.3-55-g7522 From 8d9046ad5d9b7e5ede7cdf566a190f136f239614 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 14 Jan 2017 00:28:22 +0000 Subject: drm/i915: Mark the ggtt_view structs as packed In the next few patches, we will depend upon there being no uninitialised bits inside the ggtt_view. To ensure this we add the __packed attribute and double check with a build bug that gcc hasn't expanded the struct to include some padding bytes. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170114002827.31315-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 80f0cd534db6..334b61b84376 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -156,12 +156,22 @@ struct intel_rotation_info { /* tiles */ unsigned int width, height, stride, offset; } plane[2]; -}; +} __packed; + +static inline void assert_intel_rotation_info_is_packed(void) +{ + BUILD_BUG_ON(sizeof(struct intel_rotation_info) != 8*sizeof(unsigned int)); +} struct intel_partial_info { u64 offset; unsigned int size; -}; +} __packed; + +static inline void assert_intel_partial_info_is_packed(void) +{ + BUILD_BUG_ON(sizeof(struct intel_partial_info) != sizeof(u64) + sizeof(unsigned int)); +} struct i915_ggtt_view { enum i915_ggtt_view_type type; -- cgit v1.2.3-55-g7522 From 992e418dd945755fe947c8df7329e71e76b9e4f8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 14 Jan 2017 00:28:23 +0000 Subject: drm/i915: Compact memcmp in i915_vma_compare() In preparation for the next patch to convert to using an anonymous union and leaving the excess bytes in the union uninitialised, we first need to make sure we do not compare using those uninitialised bytes. We also want to preserve the compactness of the code, avoiding a second call to memcmp or introducing a switch, so we take advantage of using the type as an encoded size (as well as a unique identifier for each type of view). v2: Add the rationale for why we encode size into ggtt_view.type as a comment before the memcmp() v3: Use a switch to also assert that no two i915_ggtt_view_type have the same value. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170114002827.31315-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.h | 28 +++++++++++++++++++++------- drivers/gpu/drm/i915/i915_vma.h | 20 ++++++++++++++------ 2 files changed, 35 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 334b61b84376..35ea4a18dc77 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -145,12 +145,6 @@ typedef uint64_t gen8_ppgtt_pml4e_t; struct sg_table; -enum i915_ggtt_view_type { - I915_GGTT_VIEW_NORMAL = 0, - I915_GGTT_VIEW_ROTATED, - I915_GGTT_VIEW_PARTIAL, -}; - struct intel_rotation_info { struct intel_rotation_plane_info { /* tiles */ @@ -173,10 +167,30 @@ static inline void assert_intel_partial_info_is_packed(void) BUILD_BUG_ON(sizeof(struct intel_partial_info) != sizeof(u64) + sizeof(unsigned int)); } +enum i915_ggtt_view_type { + I915_GGTT_VIEW_NORMAL = 0, + I915_GGTT_VIEW_ROTATED = sizeof(struct intel_rotation_info), + I915_GGTT_VIEW_PARTIAL = sizeof(struct intel_partial_info), +}; + +static inline void assert_i915_ggtt_view_type_is_unique(void) +{ + /* As we encode the size of each branch inside the union into its type, + * we have to be careful that each branch has a unique size. + */ + switch ((enum i915_ggtt_view_type)0) { + case I915_GGTT_VIEW_NORMAL: + case I915_GGTT_VIEW_PARTIAL: + case I915_GGTT_VIEW_ROTATED: + /* gcc complains if these are identical cases */ + break; + } +} + struct i915_ggtt_view { enum i915_ggtt_view_type type; - union { + /* Members need to contain no holes/padding */ struct intel_partial_info partial; struct intel_rotation_info rotated; } params; diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 008cf115f38f..fdbacc036080 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -199,15 +199,23 @@ i915_vma_compare(struct i915_vma *vma, if (cmp) return cmp; + BUILD_BUG_ON(I915_GGTT_VIEW_NORMAL != 0); + cmp = vma->ggtt_view.type; if (!view) - return vma->ggtt_view.type; + return cmp; - if (vma->ggtt_view.type != view->type) - return vma->ggtt_view.type - view->type; + cmp -= view->type; + if (cmp) + return cmp; - return memcmp(&vma->ggtt_view.params, - &view->params, - sizeof(view->params)); + /* ggtt_view.type also encodes its size so that we both distinguish + * different views using it as a "type" and also use a compact (no + * accessing of uninitialised padding bytes) memcmp without storing + * an extra parameter or adding more code. + */ + BUILD_BUG_ON(I915_GGTT_VIEW_NORMAL >= I915_GGTT_VIEW_PARTIAL); + BUILD_BUG_ON(I915_GGTT_VIEW_PARTIAL >= I915_GGTT_VIEW_ROTATED); + return memcmp(&vma->ggtt_view.params, &view->params, view->type); } int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, -- cgit v1.2.3-55-g7522 From 3bf4d5751943cb7692ae2e0c01e420ed20629d5d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 14 Jan 2017 00:28:24 +0000 Subject: drm/i915: Stop clearing i915_ggtt_view As we now use a compact memcmp in i915_vma_compare(), we can forgo clearing the entire view and only set the precise parameters used in this view. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170114002827.31315-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 3bf517e2430a..f034d8d2dd4c 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1759,7 +1759,6 @@ compute_partial_view(struct drm_i915_gem_object *obj, if (i915_gem_object_is_tiled(obj)) chunk = roundup(chunk, tile_row_pages(obj)); - memset(&view, 0, sizeof(view)); view.type = I915_GGTT_VIEW_PARTIAL; view.params.partial.offset = rounddown(page_offset, chunk); view.params.partial.size = -- cgit v1.2.3-55-g7522 From 8bab1193c193fa1a695aa9bb881bb4cb0ea2ba85 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 14 Jan 2017 00:28:25 +0000 Subject: drm/i915: Convert i915_ggtt_view to use an anonymous union Reading the ggtt_views is much more pleasant without the extra characters from specifying the union (i.e. ggtt_view.partial rather than ggtt_view.params.partial). To make this work inside i915_vma_compare() with only a single memcmp requires us to ensure that there are no uninitialised bytes within each branch of the union (we make sure the structs are packed) and we need to store the size of each branch. v4: Rewrite changelog and add comments explaining the assert. Signed-off-by: Chris Wilson Cc: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170114002827.31315-5-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_debugfs.c | 20 ++++++++++---------- drivers/gpu/drm/i915/i915_gem.c | 8 ++++---- drivers/gpu/drm/i915/i915_gem_gtt.c | 9 ++++----- drivers/gpu/drm/i915/i915_gem_gtt.h | 2 +- drivers/gpu/drm/i915/i915_vma.c | 9 ++++----- drivers/gpu/drm/i915/i915_vma.h | 9 ++++++++- drivers/gpu/drm/i915/intel_display.c | 2 +- 7 files changed, 32 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 32ede342ab94..01fdbbf0fd43 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -167,20 +167,20 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) case I915_GGTT_VIEW_PARTIAL: seq_printf(m, ", partial [%08llx+%x]", - vma->ggtt_view.params.partial.offset << PAGE_SHIFT, - vma->ggtt_view.params.partial.size << PAGE_SHIFT); + vma->ggtt_view.partial.offset << PAGE_SHIFT, + vma->ggtt_view.partial.size << PAGE_SHIFT); break; case I915_GGTT_VIEW_ROTATED: seq_printf(m, ", rotated [(%ux%u, stride=%u, offset=%u), (%ux%u, stride=%u, offset=%u)]", - vma->ggtt_view.params.rotated.plane[0].width, - vma->ggtt_view.params.rotated.plane[0].height, - vma->ggtt_view.params.rotated.plane[0].stride, - vma->ggtt_view.params.rotated.plane[0].offset, - vma->ggtt_view.params.rotated.plane[1].width, - vma->ggtt_view.params.rotated.plane[1].height, - vma->ggtt_view.params.rotated.plane[1].stride, - vma->ggtt_view.params.rotated.plane[1].offset); + vma->ggtt_view.rotated.plane[0].width, + vma->ggtt_view.rotated.plane[0].height, + vma->ggtt_view.rotated.plane[0].stride, + vma->ggtt_view.rotated.plane[0].offset, + vma->ggtt_view.rotated.plane[1].width, + vma->ggtt_view.rotated.plane[1].height, + vma->ggtt_view.rotated.plane[1].stride, + vma->ggtt_view.rotated.plane[1].offset); break; default: diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index f034d8d2dd4c..d8622fd23f5d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1760,10 +1760,10 @@ compute_partial_view(struct drm_i915_gem_object *obj, chunk = roundup(chunk, tile_row_pages(obj)); view.type = I915_GGTT_VIEW_PARTIAL; - view.params.partial.offset = rounddown(page_offset, chunk); - view.params.partial.size = + view.partial.offset = rounddown(page_offset, chunk); + view.partial.size = min_t(unsigned int, chunk, - (obj->base.size >> PAGE_SHIFT) - view.params.partial.offset); + (obj->base.size >> PAGE_SHIFT) - view.partial.offset); /* If the partial covers the entire object, just create a normal VMA. */ if (chunk >= obj->base.size >> PAGE_SHIFT) @@ -1879,7 +1879,7 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf) /* Finally, remap it using the new GTT offset */ ret = remap_io_mapping(area, - area->vm_start + (vma->ggtt_view.params.partial.offset << PAGE_SHIFT), + area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT), (ggtt->mappable_base + vma->node.start) >> PAGE_SHIFT, min_t(u64, vma->size, area->vm_end - area->vm_start), &ggtt->mappable); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 7d21cdfc6b0e..e24b961c30c6 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3511,7 +3511,7 @@ intel_partial_pages(const struct i915_ggtt_view *view, { struct sg_table *st; struct scatterlist *sg, *iter; - unsigned int count = view->params.partial.size; + unsigned int count = view->partial.size; unsigned int offset; int ret = -ENOMEM; @@ -3523,9 +3523,7 @@ intel_partial_pages(const struct i915_ggtt_view *view, if (ret) goto err_sg_alloc; - iter = i915_gem_object_get_sg(obj, - view->params.partial.offset, - &offset); + iter = i915_gem_object_get_sg(obj, view->partial.offset, &offset); GEM_BUG_ON(!iter); sg = st->sgl; @@ -3577,7 +3575,8 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma) vma->pages = vma->obj->mm.pages; else if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED) vma->pages = - intel_rotate_fb_obj_pages(&vma->ggtt_view.params.rotated, vma->obj); + intel_rotate_fb_obj_pages(&vma->ggtt_view.rotated, + vma->obj); else if (vma->ggtt_view.type == I915_GGTT_VIEW_PARTIAL) vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj); else diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 35ea4a18dc77..71e7e0a7e2b6 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -193,7 +193,7 @@ struct i915_ggtt_view { /* Members need to contain no holes/padding */ struct intel_partial_info partial; struct intel_rotation_info rotated; - } params; + }; }; extern const struct i915_ggtt_view i915_ggtt_view_normal; diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 379364b8fef9..fe93ed1e012f 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -97,15 +97,14 @@ __i915_vma_create(struct drm_i915_gem_object *obj, vma->ggtt_view = *view; if (view->type == I915_GGTT_VIEW_PARTIAL) { GEM_BUG_ON(range_overflows_t(u64, - view->params.partial.offset, - view->params.partial.size, + view->partial.offset, + view->partial.size, obj->base.size >> PAGE_SHIFT)); - vma->size = view->params.partial.size; + vma->size = view->partial.size; vma->size <<= PAGE_SHIFT; GEM_BUG_ON(vma->size >= obj->base.size); } else if (view->type == I915_GGTT_VIEW_ROTATED) { - vma->size = - intel_rotation_info_size(&view->params.rotated); + vma->size = intel_rotation_info_size(&view->rotated); vma->size <<= PAGE_SHIFT; } } diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index fdbacc036080..86b60fb4e954 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -212,10 +212,17 @@ i915_vma_compare(struct i915_vma *vma, * different views using it as a "type" and also use a compact (no * accessing of uninitialised padding bytes) memcmp without storing * an extra parameter or adding more code. + * + * To ensure that the memcmp is valid for all branches of the union, + * even though the code looks like it is just comparing one branch, + * we assert above that all branches have the same address, and that + * each branch has a unique type/size. */ BUILD_BUG_ON(I915_GGTT_VIEW_NORMAL >= I915_GGTT_VIEW_PARTIAL); BUILD_BUG_ON(I915_GGTT_VIEW_PARTIAL >= I915_GGTT_VIEW_ROTATED); - return memcmp(&vma->ggtt_view.params, &view->params, view->type); + BUILD_BUG_ON(offsetof(typeof(*view), rotated) != + offsetof(typeof(*view), partial)); + return memcmp(&vma->ggtt_view.partial, &view->partial, view->type); } int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index fd5fbc83c69e..f4be20f0198a 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2139,7 +2139,7 @@ intel_fill_fb_ggtt_view(struct i915_ggtt_view *view, { if (drm_rotation_90_or_270(rotation)) { *view = i915_ggtt_view_rotated; - view->params.rotated = to_intel_framebuffer(fb)->rot_info; + view->rotated = to_intel_framebuffer(fb)->rot_info; } else { *view = i915_ggtt_view_normal; } -- cgit v1.2.3-55-g7522 From 7b92c047bae2210874d64ae8bbb56fbd18ab6731 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 14 Jan 2017 00:28:26 +0000 Subject: drm/i915: Eliminate superfluous i915_ggtt_view_rotated It is only being used to clear a struct and set the type, after which it is overwritten. Since we no longer check the unset bits of the union, skipping the clear is permissible. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170114002827.31315-6-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 3 --- drivers/gpu/drm/i915/i915_gem_gtt.h | 1 - drivers/gpu/drm/i915/intel_display.c | 5 ++--- 3 files changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index e24b961c30c6..169d10d81334 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -106,9 +106,6 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma); const struct i915_ggtt_view i915_ggtt_view_normal = { .type = I915_GGTT_VIEW_NORMAL, }; -const struct i915_ggtt_view i915_ggtt_view_rotated = { - .type = I915_GGTT_VIEW_ROTATED, -}; static void gen6_ggtt_invalidate(struct drm_i915_private *dev_priv) { diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 71e7e0a7e2b6..f673544e570e 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -197,7 +197,6 @@ struct i915_ggtt_view { }; extern const struct i915_ggtt_view i915_ggtt_view_normal; -extern const struct i915_ggtt_view i915_ggtt_view_rotated; enum i915_cache_level; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index f4be20f0198a..f523256ef77c 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2137,11 +2137,10 @@ intel_fill_fb_ggtt_view(struct i915_ggtt_view *view, const struct drm_framebuffer *fb, unsigned int rotation) { + view->type = I915_GGTT_VIEW_NORMAL; if (drm_rotation_90_or_270(rotation)) { - *view = i915_ggtt_view_rotated; + view->type = I915_GGTT_VIEW_ROTATED; view->rotated = to_intel_framebuffer(fb)->rot_info; - } else { - *view = i915_ggtt_view_normal; } } -- cgit v1.2.3-55-g7522 From 47a8e3f6aebd4d6048572b3aeaef1dd51acb0d82 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 14 Jan 2017 00:28:27 +0000 Subject: drm/i915: Eliminate superfluous i915_ggtt_view_normal Since commit 058d88c4330f ("drm/i915: Track pinned VMA"), there is only one user of i915_ggtt_view_normal rodate. Just treat NULL as no special view in pin_to_display() like everywhere else. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170114002827.31315-7-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 2 +- drivers/gpu/drm/i915/i915_gem_gtt.c | 4 ---- drivers/gpu/drm/i915/i915_gem_gtt.h | 2 -- drivers/gpu/drm/i915/intel_overlay.c | 3 +-- 4 files changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d8622fd23f5d..d4c59b53532e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3436,7 +3436,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, * try to preserve the existing ABI). */ vma = ERR_PTR(-ENOSPC); - if (view->type == I915_GGTT_VIEW_NORMAL) + if (!view || view->type == I915_GGTT_VIEW_NORMAL) vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, PIN_MAPPABLE | PIN_NONBLOCK); if (IS_ERR(vma)) { diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 169d10d81334..4c88745b6078 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -103,10 +103,6 @@ static int i915_get_ggtt_vma_pages(struct i915_vma *vma); -const struct i915_ggtt_view i915_ggtt_view_normal = { - .type = I915_GGTT_VIEW_NORMAL, -}; - static void gen6_ggtt_invalidate(struct drm_i915_private *dev_priv) { /* Note that as an uncached mmio write, this should flush the diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index f673544e570e..3c5ef5358cef 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -196,8 +196,6 @@ struct i915_ggtt_view { }; }; -extern const struct i915_ggtt_view i915_ggtt_view_normal; - enum i915_cache_level; struct i915_vma; diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index 4473a611c664..0608fad7f593 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -811,8 +811,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, if (ret != 0) return ret; - vma = i915_gem_object_pin_to_display_plane(new_bo, 0, - &i915_ggtt_view_normal); + vma = i915_gem_object_pin_to_display_plane(new_bo, 0, NULL); if (IS_ERR(vma)) return PTR_ERR(vma); -- cgit v1.2.3-55-g7522 From dda35931ef2ee74faa535d17368b5a26daad143f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 14 Jan 2017 10:51:12 +0000 Subject: drm/i915: Use __printf markup to silence compiler drivers/gpu/drm/i915/i915_gpu_error.c: In function ‘i915_error_vprintf’: drivers/gpu/drm/i915/i915_gpu_error.c:137:3: warning: function might be possible candidate for ‘gnu_printf’ format attribute [-Wsuggest-attribute=format] len = vsnprintf(NULL, 0, f, tmp); ^~~ drivers/gpu/drm/i915/i915_gpu_error.c:144:2: warning: function might be possible candidate for ‘gnu_printf’ format attribute [-Wsuggest-attribute=format] len = vsnprintf(e->buf + e->bytes, e->size - e->bytes, f, args); ^~~ Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170114105113.1231-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gpu_error.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 396c6f0fd033..9cd22cda17af 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -121,6 +121,7 @@ static void __i915_error_advance(struct drm_i915_error_state_buf *e, e->pos += len; } +__printf(2, 0) static void i915_error_vprintf(struct drm_i915_error_state_buf *e, const char *f, va_list args) { -- cgit v1.2.3-55-g7522 From a76f73dcd036d53616058aaeaf6986f57061dfa0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 14 Jan 2017 10:51:13 +0000 Subject: drm/i915/dp: Silence compiler for missing prototype drivers/gpu/drm/i915/intel_dp.c:3063:6: warning: no previous prototype for ‘intel_dp_get_alpm_status’ [-Wmissing-prototypes] bool intel_dp_get_alpm_status(struct intel_dp *intel_dp) Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170114105113.1231-2-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_dp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 4f33115f5ca3..e80d620846c8 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -3060,7 +3060,7 @@ static bool intel_dp_get_colorimetry_status(struct intel_dp *intel_dp) return dprx & DP_VSC_SDP_EXT_FOR_COLORIMETRY_SUPPORTED; } -bool intel_dp_get_alpm_status(struct intel_dp *intel_dp) +static bool intel_dp_get_alpm_status(struct intel_dp *intel_dp) { uint8_t alpm_caps = 0; -- cgit v1.2.3-55-g7522 From 6f13f29f2c19d1845698480ea5b7f6c77abb2b8a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 13 Jan 2017 21:43:35 +0000 Subject: drm/i915: Flush the change in debugobject before reallocation When marking the debugobject as freed, be sure that write is flushed before another CPU may see it on a reallocation path. Only seen once in CI: [ 159.240873] WARNING: CPU: 3 PID: 6735 at lib/debugobjects.c:263 debug_print_object+0x87/0xb0 [ 159.240897] ODEBUG: init destroyed (active state 0) object type: i915_sw_fence hint: submit_notify+0x0/0x4c [i915] [ 159.240902] Modules linked in: snd_hda_intel i915 x86_pkg_temp_thermal intel_powerclamp coretemp crct10dif_pclmul snd_hda_codec_realtek crc32_pclmul snd_hda_codec_generic snd_hda_codec_hdmi ghash_clmulni_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm mei_me lpc_ich mei e1000e ptp pps_core [last unloaded: i915] [ 159.240913] CPU: 3 PID: 6735 Comm: gem_exec_nop Tainted: G U 4.10.0-rc3-CI-Trybot_479+ #1 [ 159.240913] Hardware name: LENOVO 10AGS00601/SHARKBAY, BIOS FBKT34AUS 04/24/2013 [ 159.240914] Call Trace: [ 159.240916] dump_stack+0x67/0x92 [ 159.240919] __warn+0xc6/0xe0 [ 159.240920] warn_slowpath_fmt+0x4a/0x50 [ 159.240921] debug_print_object+0x87/0xb0 [ 159.240935] ? __i915_request_wait_for_execute+0x1d0/0x1d0 [i915] [ 159.240936] __debug_object_init+0xb2/0x410 [ 159.240950] ? __i915_request_wait_for_execute+0x1d0/0x1d0 [i915] [ 159.240951] debug_object_init+0x16/0x20 [ 159.240962] __i915_sw_fence_init+0x29/0x60 [i915] [ 159.240975] i915_gem_request_alloc+0x1fb/0x450 [i915] [ 159.240987] i915_gem_do_execbuffer.isra.15+0x798/0x1b20 [i915] [ 159.241000] i915_gem_execbuffer2+0xc0/0x250 [i915] [ 159.241003] drm_ioctl+0x200/0x450 [ 159.241016] ? i915_gem_execbuffer+0x330/0x330 [i915] [ 159.241018] do_vfs_ioctl+0x90/0x6e0 [ 159.241020] ? trace_hardirqs_on_caller+0x122/0x1b0 [ 159.241021] SyS_ioctl+0x3c/0x70 [ 159.241023] entry_SYSCALL_64_fastpath+0x1c/0xb1 [ 159.241024] RIP: 0033:0x7f9bc4f41357 [ 159.241025] RSP: 002b:00007ffc6cd5c568 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [ 159.241026] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f9bc4f41357 [ 159.241026] RDX: 00007ffc6cd5c640 RSI: 0000000040406469 RDI: 0000000000000003 [ 159.241027] RBP: 00007ffc6cd5c640 R08: 0000000000047508 R09: 0000000000000001 [ 159.241027] R10: 000b58552d323c3d R11: 0000000000000246 R12: 0000000040406469 [ 159.241028] R13: 0000000000000003 R14: 0000000000000004 R15: 0000000000000001 Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170113214335.5829-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_sw_fence.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index f5a88092dacf..40f4e5efaf83 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -63,6 +63,7 @@ static inline void debug_fence_destroy(struct i915_sw_fence *fence) static inline void debug_fence_free(struct i915_sw_fence *fence) { debug_object_free(fence, &i915_sw_fence_debug_descr); + smp_wmb(); /* flush the change in state before reallocation */ } static inline void debug_fence_assert(struct i915_sw_fence *fence) -- cgit v1.2.3-55-g7522 From 3fec7ec4450fee2f18fe20088209193999d8ff94 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 15 Jan 2017 13:47:46 +0000 Subject: drm/i915: Catch attempting to use the aliasing_gtt's drm_mm The aliasing_gtt is just that, an alias of the global GTT. We do not populate it directly, instead we always use the global GTT. Catch any attempt to incorrectly allocate ranges from the aliasing_gtt. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170115134746.29325-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_gtt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 4c88745b6078..786229137f91 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3626,6 +3626,7 @@ int i915_gem_gtt_reserve(struct i915_address_space *vm, GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)); GEM_BUG_ON(!IS_ALIGNED(offset, I915_GTT_MIN_ALIGNMENT)); GEM_BUG_ON(range_overflows(offset, size, vm->total)); + GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->base); node->size = size; node->start = offset; @@ -3718,6 +3719,7 @@ int i915_gem_gtt_insert(struct i915_address_space *vm, GEM_BUG_ON(start >= end); GEM_BUG_ON(start > 0 && !IS_ALIGNED(start, I915_GTT_PAGE_SIZE)); GEM_BUG_ON(end < U64_MAX && !IS_ALIGNED(end, I915_GTT_PAGE_SIZE)); + GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->base); if (unlikely(range_overflows(start, size, end))) return -ENOSPC; -- cgit v1.2.3-55-g7522 From 9734ad13c2321e8efc1fab4665d40d88e947c560 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 15 Jan 2017 17:27:40 +0000 Subject: drm/i915: Assert we do not attempt to reuse an allocated node i915_gem_gtt_reserve() and i915_gem_gtt_insert() can only work on unallocated nodes. Check that the callers complies. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170115172740.28995-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_gtt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 786229137f91..64f241bffc80 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3627,6 +3627,7 @@ int i915_gem_gtt_reserve(struct i915_address_space *vm, GEM_BUG_ON(!IS_ALIGNED(offset, I915_GTT_MIN_ALIGNMENT)); GEM_BUG_ON(range_overflows(offset, size, vm->total)); GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->base); + GEM_BUG_ON(drm_mm_node_allocated(node)); node->size = size; node->start = offset; @@ -3720,6 +3721,7 @@ int i915_gem_gtt_insert(struct i915_address_space *vm, GEM_BUG_ON(start > 0 && !IS_ALIGNED(start, I915_GTT_PAGE_SIZE)); GEM_BUG_ON(end < U64_MAX && !IS_ALIGNED(end, I915_GTT_PAGE_SIZE)); GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->base); + GEM_BUG_ON(drm_mm_node_allocated(node)); if (unlikely(range_overflows(start, size, end))) return -ENOSPC; -- cgit v1.2.3-55-g7522 From 6ffb7d0756e34427a39f6ffdf861fe93b49fc0e8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 14 Jan 2017 16:23:33 +0000 Subject: drm/i915: Construct a request even if the GPU is currently hung As we now have the ability to directly reset the GPU from the waiter (and so do not need to drop the lock in order to let the reset proceed) and also do not lose requests over a reset, we can now simply queue the request to occur after the reset rather than roundtripping to userspace (or worse failing with EIO). Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170114162334.10271-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_request.c | 28 +++------------------------- 1 file changed, 3 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index e614398abe2f..72b7f7d9461d 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -307,26 +307,6 @@ void i915_gem_request_retire_upto(struct drm_i915_gem_request *req) } while (tmp != req); } -static int i915_gem_check_wedge(struct drm_i915_private *dev_priv) -{ - struct i915_gpu_error *error = &dev_priv->gpu_error; - - if (i915_terminally_wedged(error)) - return -EIO; - - if (i915_reset_in_progress(error)) { - /* Non-interruptible callers can't handle -EAGAIN, hence return - * -EIO unconditionally for these. - */ - if (!dev_priv->mm.interruptible) - return -EIO; - - return -EAGAIN; - } - - return 0; -} - static int i915_gem_init_global_seqno(struct drm_i915_private *i915, u32 seqno) { struct i915_gem_timeline *timeline = &i915->gt.global_timeline; @@ -521,12 +501,10 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, lockdep_assert_held(&dev_priv->drm.struct_mutex); /* ABI: Before userspace accesses the GPU (e.g. execbuffer), report - * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex - * and restart. + * EIO if the GPU is already wedged. */ - ret = i915_gem_check_wedge(dev_priv); - if (ret) - return ERR_PTR(ret); + if (i915_terminally_wedged(&dev_priv->gpu_error)) + return ERR_PTR(-EIO); /* Pinning the contexts may generate requests in order to acquire * GGTT space, so do this first before we reserve a seqno for -- cgit v1.2.3-55-g7522 From f131e3562ef0e0aa418eeb60a8fd562c5b9b0122 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 29 Dec 2016 14:40:37 +0000 Subject: drm/i915: Skip switch to kernel context if already done Some engines are never user or already sitting idle in the kernel context and for those we can skip flushing the current context for i915_gem_switch_to_kernel_context(). We used to perform this optimisation but that was removed for convenience of converting over to multiple timelines and handling the pending request queues. From the perspective of writing selftests, reducing the number of background operations on the engines makes defining assertions easier. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170114162334.10271-2-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem.c | 3 ++- drivers/gpu/drm/i915/i915_gem_context.c | 25 +++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d4c59b53532e..83cd2eff37af 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4140,7 +4140,8 @@ static void assert_kernel_context_is_current(struct drm_i915_private *dev_priv) enum intel_engine_id id; for_each_engine(engine, dev_priv, id) - GEM_BUG_ON(!i915_gem_context_is_kernel(engine->last_retired_context)); + GEM_BUG_ON(engine->last_retired_context && + !i915_gem_context_is_kernel(engine->last_retired_context)); } int i915_gem_suspend(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index ae99c25397ca..0a4728fdecdc 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -877,6 +877,26 @@ int i915_switch_context(struct drm_i915_gem_request *req) return do_rcs_switch(req); } +static bool engine_has_kernel_context(struct intel_engine_cs *engine) +{ + struct i915_gem_timeline *timeline; + + list_for_each_entry(timeline, &engine->i915->gt.timelines, link) { + struct intel_timeline *tl; + + if (timeline == &engine->i915->gt.global_timeline) + continue; + + tl = &timeline->engine[engine->id]; + if (i915_gem_active_peek(&tl->last_request, + &engine->i915->drm.struct_mutex)) + return false; + } + + return (!engine->last_retired_context || + i915_gem_context_is_kernel(engine->last_retired_context)); +} + int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; @@ -885,10 +905,15 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv) lockdep_assert_held(&dev_priv->drm.struct_mutex); + i915_gem_retire_requests(dev_priv); + for_each_engine(engine, dev_priv, id) { struct drm_i915_gem_request *req; int ret; + if (engine_has_kernel_context(engine)) + continue; + req = i915_gem_request_alloc(engine, dev_priv->kernel_context); if (IS_ERR(req)) return PTR_ERR(req); -- cgit v1.2.3-55-g7522 From bf6b2030bed9371240127afa4a3219c78cf0119d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 16 Jan 2017 14:52:42 +0000 Subject: drm/i915: Assert internal objects are page aligned Internal objects must be passed a page-aligned size. Check it. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170116145242.13875-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_internal.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c index 9b39472396ef..17ce53d0d092 100644 --- a/drivers/gpu/drm/i915/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/i915_gem_internal.c @@ -156,6 +156,7 @@ i915_gem_object_create_internal(struct drm_i915_private *i915, struct drm_i915_gem_object *obj; GEM_BUG_ON(!size); + GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE)); if (overflows_type(size, obj->base.size)) return ERR_PTR(-E2BIG); -- cgit v1.2.3-55-g7522 From b368f53378a4dc193239b5b8ffe8789bbb8b90a8 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Tue, 17 Jan 2017 22:06:11 +0800 Subject: drm/i915: Fix a typo in vgt_balloon_space() Commit 625d988acc28 ("drm/i915: Extract reserving space in the GTT to a helper") introduces this typo which can cause a driver loading failure in Linux GVT-g guest. Fixes: 625d988acc28 ("drm/i915: Extract reserving space in the GTT to a helper") Cc: Joonas Lahtinen Cc: Chris Wilson Cc: Zhiyuan Lv Signed-off-by: Zhenyu Wang Signed-off-by: Zhi Wang Link: http://patchwork.freedesktop.org/patch/msgid/1484661972-9366-1-git-send-email-zhi.a.wang@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_vgpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c index f1ad4fbb5ba7..d0abfd08a01c 100644 --- a/drivers/gpu/drm/i915/i915_vgpu.c +++ b/drivers/gpu/drm/i915/i915_vgpu.c @@ -122,7 +122,7 @@ static int vgt_balloon_space(struct i915_ggtt *ggtt, { unsigned long size = end - start; - if (start <= end) + if (start >= end) return -EINVAL; DRM_INFO("balloon space: range [ 0x%lx - 0x%lx ] %lu KiB.\n", -- cgit v1.2.3-55-g7522 From a5e4c7d0aa6784d8abe95c3ceef0da9656d17468 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 7 Nov 2016 22:20:54 +0200 Subject: drm/i915: Ignore bogus plane coordinates on SKL when the plane is not visible When the plane is invisible we may have all sorts of bogus stuff in the coordinates, which we must ignore or else we might fail the plane update. This started to happen on SKL when I moved the plane offset computation to happen in the check phase. Previously we happily ignored it all since we never called the update_plane hook with an invisible plane. Cc: Sivakumar Thulasimani Cc: drm-intel-fixes@lists.freedesktop.org Fixes: b63a16f6cd89 ("drm/i915: Compute display surface offset in the plane check hook for SKL+") Signed-off-by: Ville Syrjälä Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98258 Testcase: igt/pm_rpm/legacy-planes Testcase: igt/pm_rpm/universal-planes Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: http://patchwork.freedesktop.org/patch/msgid/1478550057-24864-3-git-send-email-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_display.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index f523256ef77c..31ca73d9ee49 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2965,6 +2965,9 @@ int skl_check_plane_surface(struct intel_plane_state *plane_state) unsigned int rotation = plane_state->base.rotation; int ret; + if (!plane_state->base.visible) + return 0; + /* Rotate src coordinates to match rotated GTT view */ if (drm_rotation_90_or_270(rotation)) drm_rect_rotate(&plane_state->base.src, -- cgit v1.2.3-55-g7522 From f9cda04867804e232987a2bc8832ee109907ed6f Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 13 Jan 2017 17:41:57 +0000 Subject: drm/i915/guc: Move GuC log related functions into dedicated file Functions supporting GuC logging capabilities were spread across many files, with unnecessary exposures and mixed with unrelated code. Dedicate file will make maintenance of all GuC functions easier as more functions are coming to support GuC submissions. Signed-off-by: Michal Wajdeczko Cc: Chris Wilson Reviewed-by: Arkadiusz Hiler Signed-off-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170113174157.104492-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/i915_drv.c | 4 +- drivers/gpu/drm/i915/i915_guc_submission.c | 594 +------------------------- drivers/gpu/drm/i915/intel_guc_loader.c | 6 - drivers/gpu/drm/i915/intel_guc_log.c | 658 +++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_uc.c | 26 -- drivers/gpu/drm/i915/intel_uc.h | 13 +- 7 files changed, 673 insertions(+), 629 deletions(-) create mode 100644 drivers/gpu/drm/i915/intel_guc_log.c diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 5196509e71cf..40185fdd350f 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -56,6 +56,7 @@ i915-y += i915_cmd_parser.o \ # general-purpose microcontroller (GuC) support i915-y += intel_uc.o \ + intel_guc_log.o \ intel_guc_loader.o \ i915_guc_submission.o diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 4e5ea5898e06..350ee06c8d05 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1114,7 +1114,7 @@ static void i915_driver_register(struct drm_i915_private *dev_priv) /* Reveal our presence to userspace */ if (drm_dev_register(dev, 0) == 0) { i915_debugfs_register(dev_priv); - i915_guc_register(dev_priv); + i915_guc_log_register(dev_priv); i915_setup_sysfs(dev_priv); /* Depends on sysfs having been initialized */ @@ -1158,7 +1158,7 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv) i915_perf_unregister(dev_priv); i915_teardown_sysfs(dev_priv); - i915_guc_unregister(dev_priv); + i915_guc_log_unregister(dev_priv); i915_debugfs_unregister(dev_priv); drm_dev_unregister(&dev_priv->drm); diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 913d87358972..73de5035017a 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -22,8 +22,6 @@ * */ #include -#include -#include #include "i915_drv.h" #include "intel_uc.h" @@ -545,7 +543,7 @@ static void i915_guc_submit(struct drm_i915_gem_request *rq) */ /** - * guc_allocate_vma() - Allocate a GGTT VMA for GuC usage + * intel_guc_allocate_vma() - Allocate a GGTT VMA for GuC usage * @guc: the guc * @size: size of area to allocate (both virtual space and memory) * @@ -557,7 +555,7 @@ static void i915_guc_submit(struct drm_i915_gem_request *rq) * * Return: A i915_vma if successful, otherwise an ERR_PTR. */ -static struct i915_vma *guc_allocate_vma(struct intel_guc *guc, u32 size) +struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size) { struct drm_i915_private *dev_priv = guc_to_i915(guc); struct drm_i915_gem_object *obj; @@ -718,7 +716,7 @@ guc_client_alloc(struct drm_i915_private *dev_priv, } /* The first page is doorbell/proc_desc. Two followed pages are wq. */ - vma = guc_allocate_vma(guc, GUC_DB_SIZE + GUC_WQ_SIZE); + vma = intel_guc_allocate_vma(guc, GUC_DB_SIZE + GUC_WQ_SIZE); if (IS_ERR(vma)) goto err; @@ -774,488 +772,7 @@ err: return NULL; } -/* - * Sub buffer switch callback. Called whenever relay has to switch to a new - * sub buffer, relay stays on the same sub buffer if 0 is returned. - */ -static int subbuf_start_callback(struct rchan_buf *buf, - void *subbuf, - void *prev_subbuf, - size_t prev_padding) -{ - /* Use no-overwrite mode by default, where relay will stop accepting - * new data if there are no empty sub buffers left. - * There is no strict synchronization enforced by relay between Consumer - * and Producer. In overwrite mode, there is a possibility of getting - * inconsistent/garbled data, the producer could be writing on to the - * same sub buffer from which Consumer is reading. This can't be avoided - * unless Consumer is fast enough and can always run in tandem with - * Producer. - */ - if (relay_buf_full(buf)) - return 0; - - return 1; -} - -/* - * file_create() callback. Creates relay file in debugfs. - */ -static struct dentry *create_buf_file_callback(const char *filename, - struct dentry *parent, - umode_t mode, - struct rchan_buf *buf, - int *is_global) -{ - struct dentry *buf_file; - - /* This to enable the use of a single buffer for the relay channel and - * correspondingly have a single file exposed to User, through which - * it can collect the logs in order without any post-processing. - * Need to set 'is_global' even if parent is NULL for early logging. - */ - *is_global = 1; - - if (!parent) - return NULL; - - /* Not using the channel filename passed as an argument, since for each - * channel relay appends the corresponding CPU number to the filename - * passed in relay_open(). This should be fine as relay just needs a - * dentry of the file associated with the channel buffer and that file's - * name need not be same as the filename passed as an argument. - */ - buf_file = debugfs_create_file("guc_log", mode, - parent, buf, &relay_file_operations); - return buf_file; -} - -/* - * file_remove() default callback. Removes relay file in debugfs. - */ -static int remove_buf_file_callback(struct dentry *dentry) -{ - debugfs_remove(dentry); - return 0; -} - -/* relay channel callbacks */ -static struct rchan_callbacks relay_callbacks = { - .subbuf_start = subbuf_start_callback, - .create_buf_file = create_buf_file_callback, - .remove_buf_file = remove_buf_file_callback, -}; - -static void guc_log_remove_relay_file(struct intel_guc *guc) -{ - relay_close(guc->log.relay_chan); -} - -static int guc_log_create_relay_channel(struct intel_guc *guc) -{ - struct drm_i915_private *dev_priv = guc_to_i915(guc); - struct rchan *guc_log_relay_chan; - size_t n_subbufs, subbuf_size; - - /* Keep the size of sub buffers same as shared log buffer */ - subbuf_size = guc->log.vma->obj->base.size; - - /* Store up to 8 snapshots, which is large enough to buffer sufficient - * boot time logs and provides enough leeway to User, in terms of - * latency, for consuming the logs from relay. Also doesn't take - * up too much memory. - */ - n_subbufs = 8; - - guc_log_relay_chan = relay_open(NULL, NULL, subbuf_size, - n_subbufs, &relay_callbacks, dev_priv); - if (!guc_log_relay_chan) { - DRM_ERROR("Couldn't create relay chan for GuC logging\n"); - return -ENOMEM; - } - - GEM_BUG_ON(guc_log_relay_chan->subbuf_size < subbuf_size); - guc->log.relay_chan = guc_log_relay_chan; - return 0; -} - -static int guc_log_create_relay_file(struct intel_guc *guc) -{ - struct drm_i915_private *dev_priv = guc_to_i915(guc); - struct dentry *log_dir; - int ret; - - /* For now create the log file in /sys/kernel/debug/dri/0 dir */ - log_dir = dev_priv->drm.primary->debugfs_root; - - /* If /sys/kernel/debug/dri/0 location do not exist, then debugfs is - * not mounted and so can't create the relay file. - * The relay API seems to fit well with debugfs only, for availing relay - * there are 3 requirements which can be met for debugfs file only in a - * straightforward/clean manner :- - * i) Need the associated dentry pointer of the file, while opening the - * relay channel. - * ii) Should be able to use 'relay_file_operations' fops for the file. - * iii) Set the 'i_private' field of file's inode to the pointer of - * relay channel buffer. - */ - if (!log_dir) { - DRM_ERROR("Debugfs dir not available yet for GuC log file\n"); - return -ENODEV; - } - - ret = relay_late_setup_files(guc->log.relay_chan, "guc_log", log_dir); - if (ret) { - DRM_ERROR("Couldn't associate relay chan with file %d\n", ret); - return ret; - } - - return 0; -} - -static void guc_move_to_next_buf(struct intel_guc *guc) -{ - /* Make sure the updates made in the sub buffer are visible when - * Consumer sees the following update to offset inside the sub buffer. - */ - smp_wmb(); - - /* All data has been written, so now move the offset of sub buffer. */ - relay_reserve(guc->log.relay_chan, guc->log.vma->obj->base.size); - - /* Switch to the next sub buffer */ - relay_flush(guc->log.relay_chan); -} - -static void *guc_get_write_buffer(struct intel_guc *guc) -{ - if (!guc->log.relay_chan) - return NULL; - - /* Just get the base address of a new sub buffer and copy data into it - * ourselves. NULL will be returned in no-overwrite mode, if all sub - * buffers are full. Could have used the relay_write() to indirectly - * copy the data, but that would have been bit convoluted, as we need to - * write to only certain locations inside a sub buffer which cannot be - * done without using relay_reserve() along with relay_write(). So its - * better to use relay_reserve() alone. - */ - return relay_reserve(guc->log.relay_chan, 0); -} - -static bool -guc_check_log_buf_overflow(struct intel_guc *guc, - enum guc_log_buffer_type type, unsigned int full_cnt) -{ - unsigned int prev_full_cnt = guc->log.prev_overflow_count[type]; - bool overflow = false; - - if (full_cnt != prev_full_cnt) { - overflow = true; - - guc->log.prev_overflow_count[type] = full_cnt; - guc->log.total_overflow_count[type] += full_cnt - prev_full_cnt; - - if (full_cnt < prev_full_cnt) { - /* buffer_full_cnt is a 4 bit counter */ - guc->log.total_overflow_count[type] += 16; - } - DRM_ERROR_RATELIMITED("GuC log buffer overflow\n"); - } - - return overflow; -} - -static unsigned int guc_get_log_buffer_size(enum guc_log_buffer_type type) -{ - switch (type) { - case GUC_ISR_LOG_BUFFER: - return (GUC_LOG_ISR_PAGES + 1) * PAGE_SIZE; - case GUC_DPC_LOG_BUFFER: - return (GUC_LOG_DPC_PAGES + 1) * PAGE_SIZE; - case GUC_CRASH_DUMP_LOG_BUFFER: - return (GUC_LOG_CRASH_PAGES + 1) * PAGE_SIZE; - default: - MISSING_CASE(type); - } - - return 0; -} - -static void guc_read_update_log_buffer(struct intel_guc *guc) -{ - unsigned int buffer_size, read_offset, write_offset, bytes_to_copy, full_cnt; - struct guc_log_buffer_state *log_buf_state, *log_buf_snapshot_state; - struct guc_log_buffer_state log_buf_state_local; - enum guc_log_buffer_type type; - void *src_data, *dst_data; - bool new_overflow; - - if (WARN_ON(!guc->log.buf_addr)) - return; - - /* Get the pointer to shared GuC log buffer */ - log_buf_state = src_data = guc->log.buf_addr; - - /* Get the pointer to local buffer to store the logs */ - log_buf_snapshot_state = dst_data = guc_get_write_buffer(guc); - - /* Actual logs are present from the 2nd page */ - src_data += PAGE_SIZE; - dst_data += PAGE_SIZE; - - for (type = GUC_ISR_LOG_BUFFER; type < GUC_MAX_LOG_BUFFER; type++) { - /* Make a copy of the state structure, inside GuC log buffer - * (which is uncached mapped), on the stack to avoid reading - * from it multiple times. - */ - memcpy(&log_buf_state_local, log_buf_state, - sizeof(struct guc_log_buffer_state)); - buffer_size = guc_get_log_buffer_size(type); - read_offset = log_buf_state_local.read_ptr; - write_offset = log_buf_state_local.sampled_write_ptr; - full_cnt = log_buf_state_local.buffer_full_cnt; - - /* Bookkeeping stuff */ - guc->log.flush_count[type] += log_buf_state_local.flush_to_file; - new_overflow = guc_check_log_buf_overflow(guc, type, full_cnt); - - /* Update the state of shared log buffer */ - log_buf_state->read_ptr = write_offset; - log_buf_state->flush_to_file = 0; - log_buf_state++; - - if (unlikely(!log_buf_snapshot_state)) - continue; - - /* First copy the state structure in snapshot buffer */ - memcpy(log_buf_snapshot_state, &log_buf_state_local, - sizeof(struct guc_log_buffer_state)); - - /* The write pointer could have been updated by GuC firmware, - * after sending the flush interrupt to Host, for consistency - * set write pointer value to same value of sampled_write_ptr - * in the snapshot buffer. - */ - log_buf_snapshot_state->write_ptr = write_offset; - log_buf_snapshot_state++; - - /* Now copy the actual logs. */ - if (unlikely(new_overflow)) { - /* copy the whole buffer in case of overflow */ - read_offset = 0; - write_offset = buffer_size; - } else if (unlikely((read_offset > buffer_size) || - (write_offset > buffer_size))) { - DRM_ERROR("invalid log buffer state\n"); - /* copy whole buffer as offsets are unreliable */ - read_offset = 0; - write_offset = buffer_size; - } - - /* Just copy the newly written data */ - if (read_offset > write_offset) { - i915_memcpy_from_wc(dst_data, src_data, write_offset); - bytes_to_copy = buffer_size - read_offset; - } else { - bytes_to_copy = write_offset - read_offset; - } - i915_memcpy_from_wc(dst_data + read_offset, - src_data + read_offset, bytes_to_copy); - - src_data += buffer_size; - dst_data += buffer_size; - } - - if (log_buf_snapshot_state) - guc_move_to_next_buf(guc); - else { - /* Used rate limited to avoid deluge of messages, logs might be - * getting consumed by User at a slow rate. - */ - DRM_ERROR_RATELIMITED("no sub-buffer to capture logs\n"); - guc->log.capture_miss_count++; - } -} - -static void guc_capture_logs_work(struct work_struct *work) -{ - struct drm_i915_private *dev_priv = - container_of(work, struct drm_i915_private, guc.log.flush_work); - - i915_guc_capture_logs(dev_priv); -} - -static void guc_log_cleanup(struct intel_guc *guc) -{ - struct drm_i915_private *dev_priv = guc_to_i915(guc); - - lockdep_assert_held(&dev_priv->drm.struct_mutex); - - /* First disable the flush interrupt */ - gen9_disable_guc_interrupts(dev_priv); - if (guc->log.flush_wq) - destroy_workqueue(guc->log.flush_wq); - - guc->log.flush_wq = NULL; - - if (guc->log.relay_chan) - guc_log_remove_relay_file(guc); - - guc->log.relay_chan = NULL; - - if (guc->log.buf_addr) - i915_gem_object_unpin_map(guc->log.vma->obj); - - guc->log.buf_addr = NULL; -} - -static int guc_log_create_extras(struct intel_guc *guc) -{ - struct drm_i915_private *dev_priv = guc_to_i915(guc); - void *vaddr; - int ret; - - lockdep_assert_held(&dev_priv->drm.struct_mutex); - - /* Nothing to do */ - if (i915.guc_log_level < 0) - return 0; - - if (!guc->log.buf_addr) { - /* Create a WC (Uncached for read) vmalloc mapping of log - * buffer pages, so that we can directly get the data - * (up-to-date) from memory. - */ - vaddr = i915_gem_object_pin_map(guc->log.vma->obj, I915_MAP_WC); - if (IS_ERR(vaddr)) { - ret = PTR_ERR(vaddr); - DRM_ERROR("Couldn't map log buffer pages %d\n", ret); - return ret; - } - - guc->log.buf_addr = vaddr; - } - - if (!guc->log.relay_chan) { - /* Create a relay channel, so that we have buffers for storing - * the GuC firmware logs, the channel will be linked with a file - * later on when debugfs is registered. - */ - ret = guc_log_create_relay_channel(guc); - if (ret) - return ret; - } - - if (!guc->log.flush_wq) { - INIT_WORK(&guc->log.flush_work, guc_capture_logs_work); - - /* - * GuC log buffer flush work item has to do register access to - * send the ack to GuC and this work item, if not synced before - * suspend, can potentially get executed after the GFX device is - * suspended. - * By marking the WQ as freezable, we don't have to bother about - * flushing of this work item from the suspend hooks, the pending - * work item if any will be either executed before the suspend - * or scheduled later on resume. This way the handling of work - * item can be kept same between system suspend & rpm suspend. - */ - guc->log.flush_wq = alloc_ordered_workqueue("i915-guc_log", - WQ_HIGHPRI | WQ_FREEZABLE); - if (guc->log.flush_wq == NULL) { - DRM_ERROR("Couldn't allocate the wq for GuC logging\n"); - return -ENOMEM; - } - } - - return 0; -} - -static void guc_log_create(struct intel_guc *guc) -{ - struct i915_vma *vma; - unsigned long offset; - uint32_t size, flags; - - if (i915.guc_log_level > GUC_LOG_VERBOSITY_MAX) - i915.guc_log_level = GUC_LOG_VERBOSITY_MAX; - - /* The first page is to save log buffer state. Allocate one - * extra page for others in case for overlap */ - size = (1 + GUC_LOG_DPC_PAGES + 1 + - GUC_LOG_ISR_PAGES + 1 + - GUC_LOG_CRASH_PAGES + 1) << PAGE_SHIFT; - - vma = guc->log.vma; - if (!vma) { - /* We require SSE 4.1 for fast reads from the GuC log buffer and - * it should be present on the chipsets supporting GuC based - * submisssions. - */ - if (WARN_ON(!i915_has_memcpy_from_wc())) { - /* logging will not be enabled */ - i915.guc_log_level = -1; - return; - } - - vma = guc_allocate_vma(guc, size); - if (IS_ERR(vma)) { - /* logging will be off */ - i915.guc_log_level = -1; - return; - } - - guc->log.vma = vma; - - if (guc_log_create_extras(guc)) { - guc_log_cleanup(guc); - i915_vma_unpin_and_release(&guc->log.vma); - i915.guc_log_level = -1; - return; - } - } - - /* each allocated unit is a page */ - flags = GUC_LOG_VALID | GUC_LOG_NOTIFY_ON_HALF_FULL | - (GUC_LOG_DPC_PAGES << GUC_LOG_DPC_SHIFT) | - (GUC_LOG_ISR_PAGES << GUC_LOG_ISR_SHIFT) | - (GUC_LOG_CRASH_PAGES << GUC_LOG_CRASH_SHIFT); - - offset = guc_ggtt_offset(vma) >> PAGE_SHIFT; /* in pages */ - guc->log.flags = (offset << GUC_LOG_BUF_ADDR_SHIFT) | flags; -} - -static int guc_log_late_setup(struct intel_guc *guc) -{ - struct drm_i915_private *dev_priv = guc_to_i915(guc); - int ret; - - lockdep_assert_held(&dev_priv->drm.struct_mutex); - - if (i915.guc_log_level < 0) - return -EINVAL; - - /* If log_level was set as -1 at boot time, then setup needed to - * handle log buffer flush interrupts would not have been done yet, - * so do that now. - */ - ret = guc_log_create_extras(guc); - if (ret) - goto err; - - ret = guc_log_create_relay_file(guc); - if (ret) - goto err; - - return 0; -err: - guc_log_cleanup(guc); - /* logging will remain off */ - i915.guc_log_level = -1; - return ret; -} static void guc_policies_init(struct guc_policies *policies) { @@ -1298,7 +815,7 @@ static void guc_addon_create(struct intel_guc *guc) vma = guc->ads_vma; if (!vma) { - vma = guc_allocate_vma(guc, PAGE_ALIGN(size)); + vma = intel_guc_allocate_vma(guc, PAGE_ALIGN(size)); if (IS_ERR(vma)) return; @@ -1373,13 +890,13 @@ int i915_guc_submission_init(struct drm_i915_private *dev_priv) if (guc->ctx_pool_vma) return 0; /* already allocated */ - vma = guc_allocate_vma(guc, gemsize); + vma = intel_guc_allocate_vma(guc, gemsize); if (IS_ERR(vma)) return PTR_ERR(vma); guc->ctx_pool_vma = vma; ida_init(&guc->ctx_ids); - guc_log_create(guc); + intel_guc_log_create(guc); guc_addon_create(guc); guc->execbuf_client = guc_client_alloc(dev_priv, @@ -1524,103 +1041,4 @@ int intel_guc_resume(struct drm_i915_private *dev_priv) return intel_guc_send(guc, data, ARRAY_SIZE(data)); } -void i915_guc_capture_logs(struct drm_i915_private *dev_priv) -{ - guc_read_update_log_buffer(&dev_priv->guc); - - /* Generally device is expected to be active only at this - * time, so get/put should be really quick. - */ - intel_runtime_pm_get(dev_priv); - intel_guc_log_flush_complete(&dev_priv->guc); - intel_runtime_pm_put(dev_priv); -} - -void i915_guc_flush_logs(struct drm_i915_private *dev_priv) -{ - if (!i915.enable_guc_submission || (i915.guc_log_level < 0)) - return; - - /* First disable the interrupts, will be renabled afterwards */ - gen9_disable_guc_interrupts(dev_priv); - - /* Before initiating the forceful flush, wait for any pending/ongoing - * flush to complete otherwise forceful flush may not actually happen. - */ - flush_work(&dev_priv->guc.log.flush_work); - - /* Ask GuC to update the log buffer state */ - intel_guc_log_flush(&dev_priv->guc); - - /* GuC would have updated log buffer by now, so capture it */ - i915_guc_capture_logs(dev_priv); -} - -void i915_guc_unregister(struct drm_i915_private *dev_priv) -{ - if (!i915.enable_guc_submission) - return; - mutex_lock(&dev_priv->drm.struct_mutex); - guc_log_cleanup(&dev_priv->guc); - mutex_unlock(&dev_priv->drm.struct_mutex); -} - -void i915_guc_register(struct drm_i915_private *dev_priv) -{ - if (!i915.enable_guc_submission) - return; - - mutex_lock(&dev_priv->drm.struct_mutex); - guc_log_late_setup(&dev_priv->guc); - mutex_unlock(&dev_priv->drm.struct_mutex); -} - -int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val) -{ - union guc_log_control log_param; - int ret; - - log_param.value = control_val; - - if (log_param.verbosity < GUC_LOG_VERBOSITY_MIN || - log_param.verbosity > GUC_LOG_VERBOSITY_MAX) - return -EINVAL; - - /* This combination doesn't make sense & won't have any effect */ - if (!log_param.logging_enabled && (i915.guc_log_level < 0)) - return 0; - - ret = intel_guc_log_control(&dev_priv->guc, log_param.value); - if (ret < 0) { - DRM_DEBUG_DRIVER("guc_logging_control action failed %d\n", ret); - return ret; - } - - i915.guc_log_level = log_param.verbosity; - - /* If log_level was set as -1 at boot time, then the relay channel file - * wouldn't have been created by now and interrupts also would not have - * been enabled. - */ - if (!dev_priv->guc.log.relay_chan) { - ret = guc_log_late_setup(&dev_priv->guc); - if (!ret) - gen9_enable_guc_interrupts(dev_priv); - } else if (!log_param.logging_enabled) { - /* Once logging is disabled, GuC won't generate logs & send an - * interrupt. But there could be some data in the log buffer - * which is yet to be captured. So request GuC to update the log - * buffer state and then collect the left over logs. - */ - i915_guc_flush_logs(dev_priv); - - /* As logging is disabled, update log level to reflect that */ - i915.guc_log_level = -1; - } else { - /* In case interrupts were disabled, enable them now */ - gen9_enable_guc_interrupts(dev_priv); - } - - return ret; -} diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index b8891914287e..4bcef5d2c011 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -51,12 +51,6 @@ * 512K. In order to exclude 0-512K address space from GGTT, all gfx objects * used by GuC is pinned with PIN_OFFSET_BIAS along with size of WOPCM. * - * Firmware log: - * Firmware log is enabled by setting i915.guc_log_level to non-negative level. - * Log data is printed out via reading debugfs i915_guc_log_dump. Reading from - * i915_guc_load_status will print out firmware loading status and scratch - * registers value. - * */ #define SKL_FW_MAJOR 6 diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c new file mode 100644 index 000000000000..5c0f9a49da0e --- /dev/null +++ b/drivers/gpu/drm/i915/intel_guc_log.c @@ -0,0 +1,658 @@ +/* + * Copyright © 2014-2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ +#include +#include +#include "i915_drv.h" + +static void guc_log_capture_logs(struct intel_guc *guc); + +/** + * DOC: GuC firmware log + * + * Firmware log is enabled by setting i915.guc_log_level to non-negative level. + * Log data is printed out via reading debugfs i915_guc_log_dump. Reading from + * i915_guc_load_status will print out firmware loading status and scratch + * registers value. + * + */ + +static int guc_log_flush_complete(struct intel_guc *guc) +{ + u32 action[] = { + INTEL_GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE + }; + + return intel_guc_send(guc, action, ARRAY_SIZE(action)); +} + +static int guc_log_flush(struct intel_guc *guc) +{ + u32 action[] = { + INTEL_GUC_ACTION_FORCE_LOG_BUFFER_FLUSH, + 0 + }; + + return intel_guc_send(guc, action, ARRAY_SIZE(action)); +} + +static int guc_log_control(struct intel_guc *guc, u32 control_val) +{ + u32 action[] = { + INTEL_GUC_ACTION_UK_LOG_ENABLE_LOGGING, + control_val + }; + + return intel_guc_send(guc, action, ARRAY_SIZE(action)); +} + + +/* + * Sub buffer switch callback. Called whenever relay has to switch to a new + * sub buffer, relay stays on the same sub buffer if 0 is returned. + */ +static int subbuf_start_callback(struct rchan_buf *buf, + void *subbuf, + void *prev_subbuf, + size_t prev_padding) +{ + /* Use no-overwrite mode by default, where relay will stop accepting + * new data if there are no empty sub buffers left. + * There is no strict synchronization enforced by relay between Consumer + * and Producer. In overwrite mode, there is a possibility of getting + * inconsistent/garbled data, the producer could be writing on to the + * same sub buffer from which Consumer is reading. This can't be avoided + * unless Consumer is fast enough and can always run in tandem with + * Producer. + */ + if (relay_buf_full(buf)) + return 0; + + return 1; +} + +/* + * file_create() callback. Creates relay file in debugfs. + */ +static struct dentry *create_buf_file_callback(const char *filename, + struct dentry *parent, + umode_t mode, + struct rchan_buf *buf, + int *is_global) +{ + struct dentry *buf_file; + + /* This to enable the use of a single buffer for the relay channel and + * correspondingly have a single file exposed to User, through which + * it can collect the logs in order without any post-processing. + * Need to set 'is_global' even if parent is NULL for early logging. + */ + *is_global = 1; + + if (!parent) + return NULL; + + /* Not using the channel filename passed as an argument, since for each + * channel relay appends the corresponding CPU number to the filename + * passed in relay_open(). This should be fine as relay just needs a + * dentry of the file associated with the channel buffer and that file's + * name need not be same as the filename passed as an argument. + */ + buf_file = debugfs_create_file("guc_log", mode, + parent, buf, &relay_file_operations); + return buf_file; +} + +/* + * file_remove() default callback. Removes relay file in debugfs. + */ +static int remove_buf_file_callback(struct dentry *dentry) +{ + debugfs_remove(dentry); + return 0; +} + +/* relay channel callbacks */ +static struct rchan_callbacks relay_callbacks = { + .subbuf_start = subbuf_start_callback, + .create_buf_file = create_buf_file_callback, + .remove_buf_file = remove_buf_file_callback, +}; + +static void guc_log_remove_relay_file(struct intel_guc *guc) +{ + relay_close(guc->log.relay_chan); +} + +static int guc_log_create_relay_channel(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + struct rchan *guc_log_relay_chan; + size_t n_subbufs, subbuf_size; + + /* Keep the size of sub buffers same as shared log buffer */ + subbuf_size = guc->log.vma->obj->base.size; + + /* Store up to 8 snapshots, which is large enough to buffer sufficient + * boot time logs and provides enough leeway to User, in terms of + * latency, for consuming the logs from relay. Also doesn't take + * up too much memory. + */ + n_subbufs = 8; + + guc_log_relay_chan = relay_open(NULL, NULL, subbuf_size, + n_subbufs, &relay_callbacks, dev_priv); + if (!guc_log_relay_chan) { + DRM_ERROR("Couldn't create relay chan for GuC logging\n"); + return -ENOMEM; + } + + GEM_BUG_ON(guc_log_relay_chan->subbuf_size < subbuf_size); + guc->log.relay_chan = guc_log_relay_chan; + return 0; +} + +static int guc_log_create_relay_file(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + struct dentry *log_dir; + int ret; + + /* For now create the log file in /sys/kernel/debug/dri/0 dir */ + log_dir = dev_priv->drm.primary->debugfs_root; + + /* If /sys/kernel/debug/dri/0 location do not exist, then debugfs is + * not mounted and so can't create the relay file. + * The relay API seems to fit well with debugfs only, for availing relay + * there are 3 requirements which can be met for debugfs file only in a + * straightforward/clean manner :- + * i) Need the associated dentry pointer of the file, while opening the + * relay channel. + * ii) Should be able to use 'relay_file_operations' fops for the file. + * iii) Set the 'i_private' field of file's inode to the pointer of + * relay channel buffer. + */ + if (!log_dir) { + DRM_ERROR("Debugfs dir not available yet for GuC log file\n"); + return -ENODEV; + } + + ret = relay_late_setup_files(guc->log.relay_chan, "guc_log", log_dir); + if (ret) { + DRM_ERROR("Couldn't associate relay chan with file %d\n", ret); + return ret; + } + + return 0; +} + +static void guc_move_to_next_buf(struct intel_guc *guc) +{ + /* Make sure the updates made in the sub buffer are visible when + * Consumer sees the following update to offset inside the sub buffer. + */ + smp_wmb(); + + /* All data has been written, so now move the offset of sub buffer. */ + relay_reserve(guc->log.relay_chan, guc->log.vma->obj->base.size); + + /* Switch to the next sub buffer */ + relay_flush(guc->log.relay_chan); +} + +static void *guc_get_write_buffer(struct intel_guc *guc) +{ + if (!guc->log.relay_chan) + return NULL; + + /* Just get the base address of a new sub buffer and copy data into it + * ourselves. NULL will be returned in no-overwrite mode, if all sub + * buffers are full. Could have used the relay_write() to indirectly + * copy the data, but that would have been bit convoluted, as we need to + * write to only certain locations inside a sub buffer which cannot be + * done without using relay_reserve() along with relay_write(). So its + * better to use relay_reserve() alone. + */ + return relay_reserve(guc->log.relay_chan, 0); +} + +static bool guc_check_log_buf_overflow(struct intel_guc *guc, + enum guc_log_buffer_type type, + unsigned int full_cnt) +{ + unsigned int prev_full_cnt = guc->log.prev_overflow_count[type]; + bool overflow = false; + + if (full_cnt != prev_full_cnt) { + overflow = true; + + guc->log.prev_overflow_count[type] = full_cnt; + guc->log.total_overflow_count[type] += full_cnt - prev_full_cnt; + + if (full_cnt < prev_full_cnt) { + /* buffer_full_cnt is a 4 bit counter */ + guc->log.total_overflow_count[type] += 16; + } + DRM_ERROR_RATELIMITED("GuC log buffer overflow\n"); + } + + return overflow; +} + +static unsigned int guc_get_log_buffer_size(enum guc_log_buffer_type type) +{ + switch (type) { + case GUC_ISR_LOG_BUFFER: + return (GUC_LOG_ISR_PAGES + 1) * PAGE_SIZE; + case GUC_DPC_LOG_BUFFER: + return (GUC_LOG_DPC_PAGES + 1) * PAGE_SIZE; + case GUC_CRASH_DUMP_LOG_BUFFER: + return (GUC_LOG_CRASH_PAGES + 1) * PAGE_SIZE; + default: + MISSING_CASE(type); + } + + return 0; +} + +static void guc_read_update_log_buffer(struct intel_guc *guc) +{ + unsigned int buffer_size, read_offset, write_offset, bytes_to_copy, full_cnt; + struct guc_log_buffer_state *log_buf_state, *log_buf_snapshot_state; + struct guc_log_buffer_state log_buf_state_local; + enum guc_log_buffer_type type; + void *src_data, *dst_data; + bool new_overflow; + + if (WARN_ON(!guc->log.buf_addr)) + return; + + /* Get the pointer to shared GuC log buffer */ + log_buf_state = src_data = guc->log.buf_addr; + + /* Get the pointer to local buffer to store the logs */ + log_buf_snapshot_state = dst_data = guc_get_write_buffer(guc); + + /* Actual logs are present from the 2nd page */ + src_data += PAGE_SIZE; + dst_data += PAGE_SIZE; + + for (type = GUC_ISR_LOG_BUFFER; type < GUC_MAX_LOG_BUFFER; type++) { + /* Make a copy of the state structure, inside GuC log buffer + * (which is uncached mapped), on the stack to avoid reading + * from it multiple times. + */ + memcpy(&log_buf_state_local, log_buf_state, + sizeof(struct guc_log_buffer_state)); + buffer_size = guc_get_log_buffer_size(type); + read_offset = log_buf_state_local.read_ptr; + write_offset = log_buf_state_local.sampled_write_ptr; + full_cnt = log_buf_state_local.buffer_full_cnt; + + /* Bookkeeping stuff */ + guc->log.flush_count[type] += log_buf_state_local.flush_to_file; + new_overflow = guc_check_log_buf_overflow(guc, type, full_cnt); + + /* Update the state of shared log buffer */ + log_buf_state->read_ptr = write_offset; + log_buf_state->flush_to_file = 0; + log_buf_state++; + + if (unlikely(!log_buf_snapshot_state)) + continue; + + /* First copy the state structure in snapshot buffer */ + memcpy(log_buf_snapshot_state, &log_buf_state_local, + sizeof(struct guc_log_buffer_state)); + + /* The write pointer could have been updated by GuC firmware, + * after sending the flush interrupt to Host, for consistency + * set write pointer value to same value of sampled_write_ptr + * in the snapshot buffer. + */ + log_buf_snapshot_state->write_ptr = write_offset; + log_buf_snapshot_state++; + + /* Now copy the actual logs. */ + if (unlikely(new_overflow)) { + /* copy the whole buffer in case of overflow */ + read_offset = 0; + write_offset = buffer_size; + } else if (unlikely((read_offset > buffer_size) || + (write_offset > buffer_size))) { + DRM_ERROR("invalid log buffer state\n"); + /* copy whole buffer as offsets are unreliable */ + read_offset = 0; + write_offset = buffer_size; + } + + /* Just copy the newly written data */ + if (read_offset > write_offset) { + i915_memcpy_from_wc(dst_data, src_data, write_offset); + bytes_to_copy = buffer_size - read_offset; + } else { + bytes_to_copy = write_offset - read_offset; + } + i915_memcpy_from_wc(dst_data + read_offset, + src_data + read_offset, bytes_to_copy); + + src_data += buffer_size; + dst_data += buffer_size; + } + + if (log_buf_snapshot_state) + guc_move_to_next_buf(guc); + else { + /* Used rate limited to avoid deluge of messages, logs might be + * getting consumed by User at a slow rate. + */ + DRM_ERROR_RATELIMITED("no sub-buffer to capture logs\n"); + guc->log.capture_miss_count++; + } +} + +static void guc_log_cleanup(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + + lockdep_assert_held(&dev_priv->drm.struct_mutex); + + /* First disable the flush interrupt */ + gen9_disable_guc_interrupts(dev_priv); + + if (guc->log.flush_wq) + destroy_workqueue(guc->log.flush_wq); + + guc->log.flush_wq = NULL; + + if (guc->log.relay_chan) + guc_log_remove_relay_file(guc); + + guc->log.relay_chan = NULL; + + if (guc->log.buf_addr) + i915_gem_object_unpin_map(guc->log.vma->obj); + + guc->log.buf_addr = NULL; +} + +static void capture_logs_work(struct work_struct *work) +{ + struct intel_guc *guc = + container_of(work, struct intel_guc, log.flush_work); + + guc_log_capture_logs(guc); +} + +static int guc_log_create_extras(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + void *vaddr; + int ret; + + lockdep_assert_held(&dev_priv->drm.struct_mutex); + + /* Nothing to do */ + if (i915.guc_log_level < 0) + return 0; + + if (!guc->log.buf_addr) { + /* Create a WC (Uncached for read) vmalloc mapping of log + * buffer pages, so that we can directly get the data + * (up-to-date) from memory. + */ + vaddr = i915_gem_object_pin_map(guc->log.vma->obj, I915_MAP_WC); + if (IS_ERR(vaddr)) { + ret = PTR_ERR(vaddr); + DRM_ERROR("Couldn't map log buffer pages %d\n", ret); + return ret; + } + + guc->log.buf_addr = vaddr; + } + + if (!guc->log.relay_chan) { + /* Create a relay channel, so that we have buffers for storing + * the GuC firmware logs, the channel will be linked with a file + * later on when debugfs is registered. + */ + ret = guc_log_create_relay_channel(guc); + if (ret) + return ret; + } + + if (!guc->log.flush_wq) { + INIT_WORK(&guc->log.flush_work, capture_logs_work); + + /* + * GuC log buffer flush work item has to do register access to + * send the ack to GuC and this work item, if not synced before + * suspend, can potentially get executed after the GFX device is + * suspended. + * By marking the WQ as freezable, we don't have to bother about + * flushing of this work item from the suspend hooks, the pending + * work item if any will be either executed before the suspend + * or scheduled later on resume. This way the handling of work + * item can be kept same between system suspend & rpm suspend. + */ + guc->log.flush_wq = alloc_ordered_workqueue("i915-guc_log", + WQ_HIGHPRI | WQ_FREEZABLE); + if (guc->log.flush_wq == NULL) { + DRM_ERROR("Couldn't allocate the wq for GuC logging\n"); + return -ENOMEM; + } + } + + return 0; +} + +void intel_guc_log_create(struct intel_guc *guc) +{ + struct i915_vma *vma; + unsigned long offset; + uint32_t size, flags; + + if (i915.guc_log_level > GUC_LOG_VERBOSITY_MAX) + i915.guc_log_level = GUC_LOG_VERBOSITY_MAX; + + /* The first page is to save log buffer state. Allocate one + * extra page for others in case for overlap */ + size = (1 + GUC_LOG_DPC_PAGES + 1 + + GUC_LOG_ISR_PAGES + 1 + + GUC_LOG_CRASH_PAGES + 1) << PAGE_SHIFT; + + vma = guc->log.vma; + if (!vma) { + /* We require SSE 4.1 for fast reads from the GuC log buffer and + * it should be present on the chipsets supporting GuC based + * submisssions. + */ + if (WARN_ON(!i915_has_memcpy_from_wc())) { + /* logging will not be enabled */ + i915.guc_log_level = -1; + return; + } + + vma = intel_guc_allocate_vma(guc, size); + if (IS_ERR(vma)) { + /* logging will be off */ + i915.guc_log_level = -1; + return; + } + + guc->log.vma = vma; + + if (guc_log_create_extras(guc)) { + guc_log_cleanup(guc); + i915_vma_unpin_and_release(&guc->log.vma); + i915.guc_log_level = -1; + return; + } + } + + /* each allocated unit is a page */ + flags = GUC_LOG_VALID | GUC_LOG_NOTIFY_ON_HALF_FULL | + (GUC_LOG_DPC_PAGES << GUC_LOG_DPC_SHIFT) | + (GUC_LOG_ISR_PAGES << GUC_LOG_ISR_SHIFT) | + (GUC_LOG_CRASH_PAGES << GUC_LOG_CRASH_SHIFT); + + offset = guc_ggtt_offset(vma) >> PAGE_SHIFT; /* in pages */ + guc->log.flags = (offset << GUC_LOG_BUF_ADDR_SHIFT) | flags; +} + +static int guc_log_late_setup(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + int ret; + + lockdep_assert_held(&dev_priv->drm.struct_mutex); + + if (i915.guc_log_level < 0) + return -EINVAL; + + /* If log_level was set as -1 at boot time, then setup needed to + * handle log buffer flush interrupts would not have been done yet, + * so do that now. + */ + ret = guc_log_create_extras(guc); + if (ret) + goto err; + + ret = guc_log_create_relay_file(guc); + if (ret) + goto err; + + return 0; +err: + guc_log_cleanup(guc); + /* logging will remain off */ + i915.guc_log_level = -1; + return ret; +} + +static void guc_log_capture_logs(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + + guc_read_update_log_buffer(guc); + + /* Generally device is expected to be active only at this + * time, so get/put should be really quick. + */ + intel_runtime_pm_get(dev_priv); + guc_log_flush_complete(guc); + intel_runtime_pm_put(dev_priv); +} + +static void guc_flush_logs(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + + if (!i915.enable_guc_submission || (i915.guc_log_level < 0)) + return; + + /* First disable the interrupts, will be renabled afterwards */ + gen9_disable_guc_interrupts(dev_priv); + + /* Before initiating the forceful flush, wait for any pending/ongoing + * flush to complete otherwise forceful flush may not actually happen. + */ + flush_work(&guc->log.flush_work); + + /* Ask GuC to update the log buffer state */ + guc_log_flush(guc); + + /* GuC would have updated log buffer by now, so capture it */ + guc_log_capture_logs(guc); +} + +int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val) +{ + struct intel_guc *guc = &dev_priv->guc; + + union guc_log_control log_param; + int ret; + + log_param.value = control_val; + + if (log_param.verbosity < GUC_LOG_VERBOSITY_MIN || + log_param.verbosity > GUC_LOG_VERBOSITY_MAX) + return -EINVAL; + + /* This combination doesn't make sense & won't have any effect */ + if (!log_param.logging_enabled && (i915.guc_log_level < 0)) + return 0; + + ret = guc_log_control(guc, log_param.value); + if (ret < 0) { + DRM_DEBUG_DRIVER("guc_logging_control action failed %d\n", ret); + return ret; + } + + i915.guc_log_level = log_param.verbosity; + + /* If log_level was set as -1 at boot time, then the relay channel file + * wouldn't have been created by now and interrupts also would not have + * been enabled. + */ + if (!dev_priv->guc.log.relay_chan) { + ret = guc_log_late_setup(guc); + if (!ret) + gen9_enable_guc_interrupts(dev_priv); + } else if (!log_param.logging_enabled) { + /* Once logging is disabled, GuC won't generate logs & send an + * interrupt. But there could be some data in the log buffer + * which is yet to be captured. So request GuC to update the log + * buffer state and then collect the left over logs. + */ + guc_flush_logs(guc); + + /* As logging is disabled, update log level to reflect that */ + i915.guc_log_level = -1; + } else { + /* In case interrupts were disabled, enable them now */ + gen9_enable_guc_interrupts(dev_priv); + } + + return ret; +} + +void i915_guc_log_register(struct drm_i915_private *dev_priv) +{ + if (!i915.enable_guc_submission) + return; + + mutex_lock(&dev_priv->drm.struct_mutex); + guc_log_late_setup(&dev_priv->guc); + mutex_unlock(&dev_priv->drm.struct_mutex); +} + +void i915_guc_log_unregister(struct drm_i915_private *dev_priv) +{ + if (!i915.enable_guc_submission) + return; + + mutex_lock(&dev_priv->drm.struct_mutex); + guc_log_cleanup(&dev_priv->guc); + mutex_unlock(&dev_priv->drm.struct_mutex); +} diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index c6be35220955..c46bc8594f22 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -114,29 +114,3 @@ int intel_guc_sample_forcewake(struct intel_guc *guc) return intel_guc_send(guc, action, ARRAY_SIZE(action)); } -int intel_guc_log_flush_complete(struct intel_guc *guc) -{ - u32 action[] = { INTEL_GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE }; - - return intel_guc_send(guc, action, ARRAY_SIZE(action)); -} - -int intel_guc_log_flush(struct intel_guc *guc) -{ - u32 action[] = { - INTEL_GUC_ACTION_FORCE_LOG_BUFFER_FLUSH, - 0 - }; - - return intel_guc_send(guc, action, ARRAY_SIZE(action)); -} - -int intel_guc_log_control(struct intel_guc *guc, u32 control_val) -{ - u32 action[] = { - INTEL_GUC_ACTION_UK_LOG_ENABLE_LOGGING, - control_val - }; - - return intel_guc_send(guc, action, ARRAY_SIZE(action)); -} diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h index 9490a8e049c3..57f90885a321 100644 --- a/drivers/gpu/drm/i915/intel_uc.h +++ b/drivers/gpu/drm/i915/intel_uc.h @@ -174,9 +174,6 @@ struct intel_guc { void intel_uc_init_early(struct drm_i915_private *dev_priv); int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len); int intel_guc_sample_forcewake(struct intel_guc *guc); -int intel_guc_log_flush_complete(struct intel_guc *guc); -int intel_guc_log_flush(struct intel_guc *guc); -int intel_guc_log_control(struct intel_guc *guc, u32 control_val); /* intel_guc_loader.c */ extern void intel_guc_init(struct drm_i915_private *dev_priv); @@ -193,10 +190,12 @@ int i915_guc_wq_reserve(struct drm_i915_gem_request *rq); void i915_guc_wq_unreserve(struct drm_i915_gem_request *request); void i915_guc_submission_disable(struct drm_i915_private *dev_priv); void i915_guc_submission_fini(struct drm_i915_private *dev_priv); -void i915_guc_capture_logs(struct drm_i915_private *dev_priv); -void i915_guc_flush_logs(struct drm_i915_private *dev_priv); -void i915_guc_register(struct drm_i915_private *dev_priv); -void i915_guc_unregister(struct drm_i915_private *dev_priv); +struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size); + +/* intel_guc_log.c */ +void intel_guc_log_create(struct intel_guc *guc); +void i915_guc_log_register(struct drm_i915_private *dev_priv); +void i915_guc_log_unregister(struct drm_i915_private *dev_priv); int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val); static inline u32 guc_ggtt_offset(struct i915_vma *vma) -- cgit v1.2.3-55-g7522 From db0a091bcd141e4cd56df54d021440505e5307cb Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Fri, 13 Jan 2017 17:17:04 -0800 Subject: drm/i915/guc: Make the GuC fw loading helper functions general Rename some of the GuC fw loading code to make them more general. We will utilise them for HuC loading as well. s/intel_guc_fw/intel_uc_fw/g s/GUC_FIRMWARE/INTEL_UC_FIRMWARE/g Struct intel_guc_fw is renamed to intel_uc_fw. Prefix of tts members, such as 'guc' or 'guc_fw' either is renamed to 'uc' or removed for same purpose. v2: rebased on top of nightly. reapplied the search/replace as upstream code as changed. v3: removed G from messages in shared fw fetch function. v4: rebased.Updated dev to dev_priv in intel_guc_setup(), guc_fw_getch() and intel_guc_init(). v5: rebased. Remove uint32_t fw_type to patch 2. Add INTEL_ prefix for fields in enum intel_uc_fw_status. Remove uc_dev field since its never used.Rename uc_fw to just fw and guc_fw to fw to avoid redundency. v6: rebased. Remove sections of code that were commented and no longer required. v7: rebased. Remove uc_fw_ prefix from path and obj fields in intel_uc_fw struct as suggested by Michal. v8: rebased. Add declaration of intel_guc_wopcm_size() in this patch instead of patch 3. Cc: Michal Wajdeczko Cc: Arkadiusz Hiler Signed-off-by: Anusha Srivatsa Signed-off-by: Alex Dai Signed-off-by: Peter Antoine Reviewed-by: Arkadiusz Hiler Reviewed-by: Michal Wajdeczko Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1484356631-16139-2-git-send-email-anusha.srivatsa@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 12 +-- drivers/gpu/drm/i915/i915_guc_submission.c | 4 +- drivers/gpu/drm/i915/intel_guc_loader.c | 156 ++++++++++++++--------------- drivers/gpu/drm/i915/intel_uc.h | 37 +++---- 4 files changed, 105 insertions(+), 104 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 01fdbbf0fd43..5c7cd9d3d27d 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2355,7 +2355,7 @@ static int i915_llc(struct seq_file *m, void *data) static int i915_guc_load_status_info(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw; + struct intel_uc_fw *guc_fw = &dev_priv->guc.fw; u32 tmp, i; if (!HAS_GUC_UCODE(dev_priv)) @@ -2363,15 +2363,15 @@ static int i915_guc_load_status_info(struct seq_file *m, void *data) seq_printf(m, "GuC firmware status:\n"); seq_printf(m, "\tpath: %s\n", - guc_fw->guc_fw_path); + guc_fw->path); seq_printf(m, "\tfetch: %s\n", - intel_guc_fw_status_repr(guc_fw->guc_fw_fetch_status)); + intel_uc_fw_status_repr(guc_fw->fetch_status)); seq_printf(m, "\tload: %s\n", - intel_guc_fw_status_repr(guc_fw->guc_fw_load_status)); + intel_uc_fw_status_repr(guc_fw->load_status)); seq_printf(m, "\tversion wanted: %d.%d\n", - guc_fw->guc_fw_major_wanted, guc_fw->guc_fw_minor_wanted); + guc_fw->major_ver_wanted, guc_fw->minor_ver_wanted); seq_printf(m, "\tversion found: %d.%d\n", - guc_fw->guc_fw_major_found, guc_fw->guc_fw_minor_found); + guc_fw->major_ver_found, guc_fw->minor_ver_found); seq_printf(m, "\theader: offset is %d; size = %d\n", guc_fw->header_offset, guc_fw->header_size); seq_printf(m, "\tuCode: offset is %d; size = %d\n", diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 73de5035017a..c3277858cd8c 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -998,7 +998,7 @@ int intel_guc_suspend(struct drm_i915_private *dev_priv) struct i915_gem_context *ctx; u32 data[3]; - if (guc->guc_fw.guc_fw_load_status != GUC_FIRMWARE_SUCCESS) + if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS) return 0; gen9_disable_guc_interrupts(dev_priv); @@ -1025,7 +1025,7 @@ int intel_guc_resume(struct drm_i915_private *dev_priv) struct i915_gem_context *ctx; u32 data[3]; - if (guc->guc_fw.guc_fw_load_status != GUC_FIRMWARE_SUCCESS) + if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS) return 0; if (i915.guc_log_level >= 0) diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index 4bcef5d2c011..90434b2c4575 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -75,16 +75,16 @@ MODULE_FIRMWARE(I915_BXT_GUC_UCODE); MODULE_FIRMWARE(I915_KBL_GUC_UCODE); /* User-friendly representation of an enum */ -const char *intel_guc_fw_status_repr(enum intel_guc_fw_status status) +const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status) { switch (status) { - case GUC_FIRMWARE_FAIL: + case INTEL_UC_FIRMWARE_FAIL: return "FAIL"; - case GUC_FIRMWARE_NONE: + case INTEL_UC_FIRMWARE_NONE: return "NONE"; - case GUC_FIRMWARE_PENDING: + case INTEL_UC_FIRMWARE_PENDING: return "PENDING"; - case GUC_FIRMWARE_SUCCESS: + case INTEL_UC_FIRMWARE_SUCCESS: return "SUCCESS"; default: return "UNKNOWN!"; @@ -272,7 +272,7 @@ static inline bool guc_ucode_response(struct drm_i915_private *dev_priv, static int guc_ucode_xfer_dma(struct drm_i915_private *dev_priv, struct i915_vma *vma) { - struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw; + struct intel_uc_fw *guc_fw = &dev_priv->guc.fw; unsigned long offset; struct sg_table *sg = vma->pages; u32 status, rsa[UOS_RSA_SCRATCH_MAX_COUNT]; @@ -344,17 +344,17 @@ static u32 guc_wopcm_size(struct drm_i915_private *dev_priv) */ static int guc_ucode_xfer(struct drm_i915_private *dev_priv) { - struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw; + struct intel_uc_fw *guc_fw = &dev_priv->guc.fw; struct i915_vma *vma; int ret; - ret = i915_gem_object_set_to_gtt_domain(guc_fw->guc_fw_obj, false); + ret = i915_gem_object_set_to_gtt_domain(guc_fw->obj, false); if (ret) { DRM_DEBUG_DRIVER("set-domain failed %d\n", ret); return ret; } - vma = i915_gem_object_ggtt_pin(guc_fw->guc_fw_obj, NULL, 0, 0, + vma = i915_gem_object_ggtt_pin(guc_fw->obj, NULL, 0, 0, PIN_OFFSET_BIAS | GUC_WOPCM_TOP); if (IS_ERR(vma)) { DRM_DEBUG_DRIVER("pin failed %d\n", (int)PTR_ERR(vma)); @@ -442,14 +442,14 @@ static int guc_hw_reset(struct drm_i915_private *dev_priv) */ int intel_guc_setup(struct drm_i915_private *dev_priv) { - struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw; - const char *fw_path = guc_fw->guc_fw_path; + struct intel_uc_fw *guc_fw = &dev_priv->guc.fw; + const char *fw_path = guc_fw->path; int retries, ret, err; DRM_DEBUG_DRIVER("GuC fw status: path %s, fetch %s, load %s\n", fw_path, - intel_guc_fw_status_repr(guc_fw->guc_fw_fetch_status), - intel_guc_fw_status_repr(guc_fw->guc_fw_load_status)); + intel_uc_fw_status_repr(guc_fw->fetch_status), + intel_uc_fw_status_repr(guc_fw->load_status)); /* Loading forbidden, or no firmware to load? */ if (!i915.enable_guc_loading) { @@ -467,10 +467,10 @@ int intel_guc_setup(struct drm_i915_private *dev_priv) } /* Fetch failed, or already fetched but failed to load? */ - if (guc_fw->guc_fw_fetch_status != GUC_FIRMWARE_SUCCESS) { + if (guc_fw->fetch_status != INTEL_UC_FIRMWARE_SUCCESS) { err = -EIO; goto fail; - } else if (guc_fw->guc_fw_load_status == GUC_FIRMWARE_FAIL) { + } else if (guc_fw->load_status == INTEL_UC_FIRMWARE_FAIL) { err = -ENOEXEC; goto fail; } @@ -481,11 +481,11 @@ int intel_guc_setup(struct drm_i915_private *dev_priv) /* We need to notify the guc whenever we change the GGTT */ i915_ggtt_enable_guc(dev_priv); - guc_fw->guc_fw_load_status = GUC_FIRMWARE_PENDING; + guc_fw->load_status = INTEL_UC_FIRMWARE_PENDING; DRM_DEBUG_DRIVER("GuC fw status: fetch %s, load %s\n", - intel_guc_fw_status_repr(guc_fw->guc_fw_fetch_status), - intel_guc_fw_status_repr(guc_fw->guc_fw_load_status)); + intel_uc_fw_status_repr(guc_fw->fetch_status), + intel_uc_fw_status_repr(guc_fw->load_status)); err = i915_guc_submission_init(dev_priv); if (err) @@ -517,11 +517,11 @@ int intel_guc_setup(struct drm_i915_private *dev_priv) "retry %d more time(s)\n", err, retries); } - guc_fw->guc_fw_load_status = GUC_FIRMWARE_SUCCESS; + guc_fw->load_status = INTEL_UC_FIRMWARE_SUCCESS; DRM_DEBUG_DRIVER("GuC fw status: fetch %s, load %s\n", - intel_guc_fw_status_repr(guc_fw->guc_fw_fetch_status), - intel_guc_fw_status_repr(guc_fw->guc_fw_load_status)); + intel_uc_fw_status_repr(guc_fw->fetch_status), + intel_uc_fw_status_repr(guc_fw->load_status)); if (i915.enable_guc_submission) { if (i915.guc_log_level >= 0) @@ -536,8 +536,8 @@ int intel_guc_setup(struct drm_i915_private *dev_priv) return 0; fail: - if (guc_fw->guc_fw_load_status == GUC_FIRMWARE_PENDING) - guc_fw->guc_fw_load_status = GUC_FIRMWARE_FAIL; + if (guc_fw->load_status == INTEL_UC_FIRMWARE_PENDING) + guc_fw->load_status = INTEL_UC_FIRMWARE_FAIL; guc_interrupts_release(dev_priv); i915_guc_submission_disable(dev_priv); @@ -583,8 +583,8 @@ fail: return ret; } -static void guc_fw_fetch(struct drm_i915_private *dev_priv, - struct intel_guc_fw *guc_fw) +void intel_uc_fw_fetch(struct drm_i915_private *dev_priv, + struct intel_uc_fw *uc_fw) { struct pci_dev *pdev = dev_priv->drm.pdev; struct drm_i915_gem_object *obj; @@ -593,17 +593,17 @@ static void guc_fw_fetch(struct drm_i915_private *dev_priv, size_t size; int err; - DRM_DEBUG_DRIVER("before requesting firmware: GuC fw fetch status %s\n", - intel_guc_fw_status_repr(guc_fw->guc_fw_fetch_status)); + DRM_DEBUG_DRIVER("before requesting firmware: uC fw fetch status %s\n", + intel_uc_fw_status_repr(uc_fw->fetch_status)); - err = request_firmware(&fw, guc_fw->guc_fw_path, &pdev->dev); + err = request_firmware(&fw, uc_fw->path, &pdev->dev); if (err) goto fail; if (!fw) goto fail; - DRM_DEBUG_DRIVER("fetch GuC fw from %s succeeded, fw %p\n", - guc_fw->guc_fw_path, fw); + DRM_DEBUG_DRIVER("fetch uC fw from %s succeeded, fw %p\n", + uc_fw->path, fw); /* Check the size of the blob before examining buffer contents */ if (fw->size < sizeof(struct guc_css_header)) { @@ -614,36 +614,36 @@ static void guc_fw_fetch(struct drm_i915_private *dev_priv, css = (struct guc_css_header *)fw->data; /* Firmware bits always start from header */ - guc_fw->header_offset = 0; - guc_fw->header_size = (css->header_size_dw - css->modulus_size_dw - + uc_fw->header_offset = 0; + uc_fw->header_size = (css->header_size_dw - css->modulus_size_dw - css->key_size_dw - css->exponent_size_dw) * sizeof(u32); - if (guc_fw->header_size != sizeof(struct guc_css_header)) { + if (uc_fw->header_size != sizeof(struct guc_css_header)) { DRM_NOTE("CSS header definition mismatch\n"); goto fail; } /* then, uCode */ - guc_fw->ucode_offset = guc_fw->header_offset + guc_fw->header_size; - guc_fw->ucode_size = (css->size_dw - css->header_size_dw) * sizeof(u32); + uc_fw->ucode_offset = uc_fw->header_offset + uc_fw->header_size; + uc_fw->ucode_size = (css->size_dw - css->header_size_dw) * sizeof(u32); /* now RSA */ if (css->key_size_dw != UOS_RSA_SCRATCH_MAX_COUNT) { DRM_NOTE("RSA key size is bad\n"); goto fail; } - guc_fw->rsa_offset = guc_fw->ucode_offset + guc_fw->ucode_size; - guc_fw->rsa_size = css->key_size_dw * sizeof(u32); + uc_fw->rsa_offset = uc_fw->ucode_offset + uc_fw->ucode_size; + uc_fw->rsa_size = css->key_size_dw * sizeof(u32); /* At least, it should have header, uCode and RSA. Size of all three. */ - size = guc_fw->header_size + guc_fw->ucode_size + guc_fw->rsa_size; + size = uc_fw->header_size + uc_fw->ucode_size + uc_fw->rsa_size; if (fw->size < size) { DRM_NOTE("Missing firmware components\n"); goto fail; } /* Header and uCode will be loaded to WOPCM. Size of the two. */ - size = guc_fw->header_size + guc_fw->ucode_size; + size = uc_fw->header_size + uc_fw->ucode_size; if (size > guc_wopcm_size(dev_priv)) { DRM_NOTE("Firmware is too large to fit in WOPCM\n"); goto fail; @@ -655,21 +655,21 @@ static void guc_fw_fetch(struct drm_i915_private *dev_priv, * TWO bytes each (i.e. u16), although all pointers and offsets are defined * in terms of bytes (u8). */ - guc_fw->guc_fw_major_found = css->guc_sw_version >> 16; - guc_fw->guc_fw_minor_found = css->guc_sw_version & 0xFFFF; - - if (guc_fw->guc_fw_major_found != guc_fw->guc_fw_major_wanted || - guc_fw->guc_fw_minor_found < guc_fw->guc_fw_minor_wanted) { - DRM_NOTE("GuC firmware version %d.%d, required %d.%d\n", - guc_fw->guc_fw_major_found, guc_fw->guc_fw_minor_found, - guc_fw->guc_fw_major_wanted, guc_fw->guc_fw_minor_wanted); + uc_fw->major_ver_found = css->guc_sw_version >> 16; + uc_fw->minor_ver_found = css->guc_sw_version & 0xFFFF; + + if (uc_fw->major_ver_found != uc_fw->major_ver_wanted || + uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) { + DRM_NOTE("uC firmware version %d.%d, required %d.%d\n", + uc_fw->major_ver_found, uc_fw->minor_ver_found, + uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted); err = -ENOEXEC; goto fail; } DRM_DEBUG_DRIVER("firmware version %d.%d OK (minimum %d.%d)\n", - guc_fw->guc_fw_major_found, guc_fw->guc_fw_minor_found, - guc_fw->guc_fw_major_wanted, guc_fw->guc_fw_minor_wanted); + uc_fw->major_ver_found, uc_fw->minor_ver_found, + uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted); mutex_lock(&dev_priv->drm.struct_mutex); obj = i915_gem_object_create_from_data(dev_priv, fw->data, fw->size); @@ -679,31 +679,31 @@ static void guc_fw_fetch(struct drm_i915_private *dev_priv, goto fail; } - guc_fw->guc_fw_obj = obj; - guc_fw->guc_fw_size = fw->size; + uc_fw->obj = obj; + uc_fw->size = fw->size; - DRM_DEBUG_DRIVER("GuC fw fetch status SUCCESS, obj %p\n", - guc_fw->guc_fw_obj); + DRM_DEBUG_DRIVER("uC fw fetch status SUCCESS, obj %p\n", + uc_fw->obj); release_firmware(fw); - guc_fw->guc_fw_fetch_status = GUC_FIRMWARE_SUCCESS; + uc_fw->fetch_status = INTEL_UC_FIRMWARE_SUCCESS; return; fail: - DRM_WARN("Failed to fetch valid GuC firmware from %s (error %d)\n", - guc_fw->guc_fw_path, err); - DRM_DEBUG_DRIVER("GuC fw fetch status FAIL; err %d, fw %p, obj %p\n", - err, fw, guc_fw->guc_fw_obj); + DRM_WARN("Failed to fetch valid uC firmware from %s (error %d)\n", + uc_fw->path, err); + DRM_DEBUG_DRIVER("uC fw fetch status FAIL; err %d, fw %p, obj %p\n", + err, fw, uc_fw->obj); mutex_lock(&dev_priv->drm.struct_mutex); - obj = guc_fw->guc_fw_obj; + obj = uc_fw->obj; if (obj) i915_gem_object_put(obj); - guc_fw->guc_fw_obj = NULL; + uc_fw->obj = NULL; mutex_unlock(&dev_priv->drm.struct_mutex); release_firmware(fw); /* OK even if fw is NULL */ - guc_fw->guc_fw_fetch_status = GUC_FIRMWARE_FAIL; + uc_fw->fetch_status = INTEL_UC_FIRMWARE_FAIL; } /** @@ -717,7 +717,7 @@ fail: */ void intel_guc_init(struct drm_i915_private *dev_priv) { - struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw; + struct intel_uc_fw *guc_fw = &dev_priv->guc.fw; const char *fw_path; if (!HAS_GUC(dev_priv)) { @@ -735,23 +735,23 @@ void intel_guc_init(struct drm_i915_private *dev_priv) fw_path = NULL; } else if (IS_SKYLAKE(dev_priv)) { fw_path = I915_SKL_GUC_UCODE; - guc_fw->guc_fw_major_wanted = SKL_FW_MAJOR; - guc_fw->guc_fw_minor_wanted = SKL_FW_MINOR; + guc_fw->major_ver_wanted = SKL_FW_MAJOR; + guc_fw->minor_ver_wanted = SKL_FW_MINOR; } else if (IS_BROXTON(dev_priv)) { fw_path = I915_BXT_GUC_UCODE; - guc_fw->guc_fw_major_wanted = BXT_FW_MAJOR; - guc_fw->guc_fw_minor_wanted = BXT_FW_MINOR; + guc_fw->major_ver_wanted = BXT_FW_MAJOR; + guc_fw->minor_ver_wanted = BXT_FW_MINOR; } else if (IS_KABYLAKE(dev_priv)) { fw_path = I915_KBL_GUC_UCODE; - guc_fw->guc_fw_major_wanted = KBL_FW_MAJOR; - guc_fw->guc_fw_minor_wanted = KBL_FW_MINOR; + guc_fw->major_ver_wanted = KBL_FW_MAJOR; + guc_fw->minor_ver_wanted = KBL_FW_MINOR; } else { fw_path = ""; /* unknown device */ } - guc_fw->guc_fw_path = fw_path; - guc_fw->guc_fw_fetch_status = GUC_FIRMWARE_NONE; - guc_fw->guc_fw_load_status = GUC_FIRMWARE_NONE; + guc_fw->path = fw_path; + guc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE; + guc_fw->load_status = INTEL_UC_FIRMWARE_NONE; /* Early (and silent) return if GuC loading is disabled */ if (!i915.enable_guc_loading) @@ -761,9 +761,9 @@ void intel_guc_init(struct drm_i915_private *dev_priv) if (*fw_path == '\0') return; - guc_fw->guc_fw_fetch_status = GUC_FIRMWARE_PENDING; + guc_fw->fetch_status = INTEL_UC_FIRMWARE_PENDING; DRM_DEBUG_DRIVER("GuC firmware pending, path %s\n", fw_path); - guc_fw_fetch(dev_priv, guc_fw); + intel_uc_fw_fetch(dev_priv, guc_fw); /* status must now be FAIL or SUCCESS */ } @@ -773,17 +773,17 @@ void intel_guc_init(struct drm_i915_private *dev_priv) */ void intel_guc_fini(struct drm_i915_private *dev_priv) { - struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw; + struct intel_uc_fw *guc_fw = &dev_priv->guc.fw; mutex_lock(&dev_priv->drm.struct_mutex); guc_interrupts_release(dev_priv); i915_guc_submission_disable(dev_priv); i915_guc_submission_fini(dev_priv); - if (guc_fw->guc_fw_obj) - i915_gem_object_put(guc_fw->guc_fw_obj); - guc_fw->guc_fw_obj = NULL; + if (guc_fw->obj) + i915_gem_object_put(guc_fw->obj); + guc_fw->obj = NULL; mutex_unlock(&dev_priv->drm.struct_mutex); - guc_fw->guc_fw_fetch_status = GUC_FIRMWARE_NONE; + guc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE; } diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h index 57f90885a321..905ed445d559 100644 --- a/drivers/gpu/drm/i915/intel_uc.h +++ b/drivers/gpu/drm/i915/intel_uc.h @@ -93,28 +93,28 @@ struct i915_guc_client { uint64_t submissions[I915_NUM_ENGINES]; }; -enum intel_guc_fw_status { - GUC_FIRMWARE_FAIL = -1, - GUC_FIRMWARE_NONE = 0, - GUC_FIRMWARE_PENDING, - GUC_FIRMWARE_SUCCESS +enum intel_uc_fw_status { + INTEL_UC_FIRMWARE_FAIL = -1, + INTEL_UC_FIRMWARE_NONE = 0, + INTEL_UC_FIRMWARE_PENDING, + INTEL_UC_FIRMWARE_SUCCESS }; /* * This structure encapsulates all the data needed during the process * of fetching, caching, and loading the firmware image into the GuC. */ -struct intel_guc_fw { - const char * guc_fw_path; - size_t guc_fw_size; - struct drm_i915_gem_object * guc_fw_obj; - enum intel_guc_fw_status guc_fw_fetch_status; - enum intel_guc_fw_status guc_fw_load_status; - - uint16_t guc_fw_major_wanted; - uint16_t guc_fw_minor_wanted; - uint16_t guc_fw_major_found; - uint16_t guc_fw_minor_found; +struct intel_uc_fw { + const char *path; + size_t size; + struct drm_i915_gem_object *obj; + enum intel_uc_fw_status fetch_status; + enum intel_uc_fw_status load_status; + + uint16_t major_ver_wanted; + uint16_t minor_ver_wanted; + uint16_t major_ver_found; + uint16_t minor_ver_found; uint32_t header_size; uint32_t header_offset; @@ -141,7 +141,7 @@ struct intel_guc_log { }; struct intel_guc { - struct intel_guc_fw guc_fw; + struct intel_uc_fw fw; struct intel_guc_log log; /* intel_guc_recv interrupt related state */ @@ -179,9 +179,10 @@ int intel_guc_sample_forcewake(struct intel_guc *guc); extern void intel_guc_init(struct drm_i915_private *dev_priv); extern int intel_guc_setup(struct drm_i915_private *dev_priv); extern void intel_guc_fini(struct drm_i915_private *dev_priv); -extern const char *intel_guc_fw_status_repr(enum intel_guc_fw_status status); +extern const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status); extern int intel_guc_suspend(struct drm_i915_private *dev_priv); extern int intel_guc_resume(struct drm_i915_private *dev_priv); +u32 intel_guc_wopcm_size(struct drm_i915_private *dev_priv); /* i915_guc_submission.c */ int i915_guc_submission_init(struct drm_i915_private *dev_priv); -- cgit v1.2.3-55-g7522 From fbbad73e1b78d2e3d7ce7f82613a8136f2116f13 Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Fri, 13 Jan 2017 17:17:05 -0800 Subject: drm/i915/huc: Unified css_header struct for GuC and HuC HuC firmware css header has almost exactly same definition as GuC firmware except for the sw_version. Also, add a new member fw_type into intel_uc_fw to indicate what kind of fw it is. So, the loader will pull right sw_version from header. v2: rebased on-top of drm-intel-nightly v3: rebased. Rename device_id to guc_branch_client_version, make guc_sw_version a union. . Put UC_FW_TYPE_GUC and UC_FW_TYPE_HUC into an enum. v4: rebased on top of drm-tip.Update dev to dev_priv in intel_uc_fw_fetch. v5: rebased. Add INTEL_ prefix to an enum. Add fw_type declaration from patch 1.Combine two different unions for huc and guc version, reserved etc into one union with two structs. v6: rebased. Change fw_type to enum. v7: rebased. Rename the enum fw_type to intel_uc_fw_type. Cc: Michal Wajdeczko Tested-by: Xiang Haihao Signed-off-by: Anusha Srivatsa Signed-off-by: Alex Dai Signed-off-by: Peter Antoine Reviewed-by: Arkadiusz Hiler Reviewed-by: Michal Wajdeczko Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1484356631-16139-3-git-send-email-anusha.srivatsa@intel.com --- drivers/gpu/drm/i915/intel_guc_fwif.h | 23 ++++++++++++++---- drivers/gpu/drm/i915/intel_guc_loader.c | 41 ++++++++++++++++++++++----------- drivers/gpu/drm/i915/intel_uc.h | 6 +++++ 3 files changed, 53 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h b/drivers/gpu/drm/i915/intel_guc_fwif.h index 3202b32b5638..ed1ab40854a4 100644 --- a/drivers/gpu/drm/i915/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/intel_guc_fwif.h @@ -145,7 +145,7 @@ * The GuC firmware layout looks like this: * * +-------------------------------+ - * | guc_css_header | + * | uc_css_header | * | | * | contains major/minor version | * +-------------------------------+ @@ -172,9 +172,16 @@ * 3. Length info of each component can be found in header, in dwords. * 4. Modulus and exponent key are not required by driver. They may not appear * in fw. So driver will load a truncated firmware in this case. + * + * HuC firmware layout is same as GuC firmware. + * + * HuC firmware css header is different. However, the only difference is where + * the version information is saved. The uc_css_header is unified to support + * both. Driver should get HuC version from uc_css_header.huc_sw_version, while + * uc_css_header.guc_sw_version for GuC. */ -struct guc_css_header { +struct uc_css_header { uint32_t module_type; /* header_size includes all non-uCode bits, including css_header, rsa * key, modulus key and exponent data. */ @@ -205,8 +212,16 @@ struct guc_css_header { char username[8]; char buildnumber[12]; - uint32_t device_id; - uint32_t guc_sw_version; + union { + struct { + uint32_t branch_client_version; + uint32_t sw_version; + } guc; + struct { + uint32_t sw_version; + uint32_t reserved; + } huc; + }; uint32_t prod_preprod_fw; uint32_t reserved[12]; uint32_t header_info; diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index 90434b2c4575..2d523575d763 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -589,7 +589,7 @@ void intel_uc_fw_fetch(struct drm_i915_private *dev_priv, struct pci_dev *pdev = dev_priv->drm.pdev; struct drm_i915_gem_object *obj; const struct firmware *fw = NULL; - struct guc_css_header *css; + struct uc_css_header *css; size_t size; int err; @@ -606,19 +606,19 @@ void intel_uc_fw_fetch(struct drm_i915_private *dev_priv, uc_fw->path, fw); /* Check the size of the blob before examining buffer contents */ - if (fw->size < sizeof(struct guc_css_header)) { + if (fw->size < sizeof(struct uc_css_header)) { DRM_NOTE("Firmware header is missing\n"); goto fail; } - css = (struct guc_css_header *)fw->data; + css = (struct uc_css_header *)fw->data; /* Firmware bits always start from header */ uc_fw->header_offset = 0; uc_fw->header_size = (css->header_size_dw - css->modulus_size_dw - css->key_size_dw - css->exponent_size_dw) * sizeof(u32); - if (uc_fw->header_size != sizeof(struct guc_css_header)) { + if (uc_fw->header_size != sizeof(struct uc_css_header)) { DRM_NOTE("CSS header definition mismatch\n"); goto fail; } @@ -642,21 +642,36 @@ void intel_uc_fw_fetch(struct drm_i915_private *dev_priv, goto fail; } - /* Header and uCode will be loaded to WOPCM. Size of the two. */ - size = uc_fw->header_size + uc_fw->ucode_size; - if (size > guc_wopcm_size(dev_priv)) { - DRM_NOTE("Firmware is too large to fit in WOPCM\n"); - goto fail; - } - /* * The GuC firmware image has the version number embedded at a well-known * offset within the firmware blob; note that major / minor version are * TWO bytes each (i.e. u16), although all pointers and offsets are defined * in terms of bytes (u8). */ - uc_fw->major_ver_found = css->guc_sw_version >> 16; - uc_fw->minor_ver_found = css->guc_sw_version & 0xFFFF; + switch (uc_fw->fw) { + case INTEL_UC_FW_TYPE_GUC: + /* Header and uCode will be loaded to WOPCM. Size of the two. */ + size = uc_fw->header_size + uc_fw->ucode_size; + + /* Top 32k of WOPCM is reserved (8K stack + 24k RC6 context). */ + if (size > guc_wopcm_size(dev_priv)) { + DRM_ERROR("Firmware is too large to fit in WOPCM\n"); + goto fail; + } + uc_fw->major_ver_found = css->guc.sw_version >> 16; + uc_fw->minor_ver_found = css->guc.sw_version & 0xFFFF; + break; + + case INTEL_UC_FW_TYPE_HUC: + uc_fw->major_ver_found = css->huc.sw_version >> 16; + uc_fw->minor_ver_found = css->huc.sw_version & 0xFFFF; + break; + + default: + DRM_ERROR("Unknown firmware type %d\n", uc_fw->fw); + err = -ENOEXEC; + goto fail; + } if (uc_fw->major_ver_found != uc_fw->major_ver_wanted || uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) { diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h index 905ed445d559..bb7faa404352 100644 --- a/drivers/gpu/drm/i915/intel_uc.h +++ b/drivers/gpu/drm/i915/intel_uc.h @@ -100,6 +100,11 @@ enum intel_uc_fw_status { INTEL_UC_FIRMWARE_SUCCESS }; +enum intel_uc_fw_type { + INTEL_UC_FW_TYPE_GUC, + INTEL_UC_FW_TYPE_HUC +}; + /* * This structure encapsulates all the data needed during the process * of fetching, caching, and loading the firmware image into the GuC. @@ -116,6 +121,7 @@ struct intel_uc_fw { uint16_t major_ver_found; uint16_t minor_ver_found; + enum intel_uc_fw_type fw; uint32_t header_size; uint32_t header_offset; uint32_t rsa_size; -- cgit v1.2.3-55-g7522 From b86bef20e0f62dab61a1f9b33465061bb27cdb49 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 16 Jan 2017 13:06:21 +0000 Subject: drm/i915: Fix compiler warnings for i915_edp_psr_status() Appease both the poor reader and the compiler by rewriting the string lookup for EDP_PSR2_STATUS_CTL: drivers/gpu/drm/i915/i915_debugfs.c:2662 i915_edp_psr_status() warn: if statement not indented Fixes: 6ba1f9e1772f ("drm/i915/psr: report live PSR2 State") Signed-off-by: Chris Wilson Cc: Rodrigo Vivi Cc: Jim Bride Cc: Vathsala Nagaraju Cc: Patil Deepti Link: http://patchwork.freedesktop.org/patch/msgid/20170116130622.20369-1-chris@chris-wilson.co.uk Reviewed-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_debugfs.c | 49 ++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 5c7cd9d3d27d..129aab71fadf 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2559,6 +2559,29 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_guc_log_control_fops, i915_guc_log_control_get, i915_guc_log_control_set, "%lld\n"); +static const char *psr2_live_status(u32 val) +{ + static const char * const live_status[] = { + "IDLE", + "CAPTURE", + "CAPTURE_FS", + "SLEEP", + "BUFON_FW", + "ML_UP", + "SU_STANDBY", + "FAST_SLEEP", + "DEEP_SLEEP", + "BUF_ON", + "TG_ON" + }; + + val = (val & EDP_PSR2_STATUS_STATE_MASK) >> EDP_PSR2_STATUS_STATE_SHIFT; + if (val < ARRAY_SIZE(live_status)) + return live_status[val]; + + return "unknown"; +} + static int i915_edp_psr_status(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); @@ -2634,28 +2657,10 @@ static int i915_edp_psr_status(struct seq_file *m, void *data) seq_printf(m, "Performance_Counter: %u\n", psrperf); } if (dev_priv->psr.psr2_support) { - static const char * const live_status[] = { - "IDLE", - "CAPTURE", - "CAPTURE_FS", - "SLEEP", - "BUFON_FW", - "ML_UP", - "SU_STANDBY", - "FAST_SLEEP", - "DEEP_SLEEP", - "BUF_ON", - "TG_ON" }; - u8 pos = (I915_READ(EDP_PSR2_STATUS_CTL) & - EDP_PSR2_STATUS_STATE_MASK) >> - EDP_PSR2_STATUS_STATE_SHIFT; - - seq_printf(m, "PSR2_STATUS_EDP: %x\n", - I915_READ(EDP_PSR2_STATUS_CTL)); - - if (pos < ARRAY_SIZE(live_status)) - seq_printf(m, "PSR2 live state %s\n", - live_status[pos]); + u32 psr2 = I915_READ(EDP_PSR2_STATUS_CTL); + + seq_printf(m, "EDP_PSR2_STATUS_CTL: %x [%s]\n", + psr2, psr2_live_status(psr2)); } mutex_unlock(&dev_priv->psr.lock); -- cgit v1.2.3-55-g7522 From 77affa31722be7c584b4b1be792423079e64a47d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 16 Jan 2017 13:06:22 +0000 Subject: drm/i915/psr: Fix compiler warnings for hsw_psr_disable() drivers/gpu/drm/i915/intel_psr.c:634 hsw_psr_disable() warn: if statement not indented drivers/gpu/drm/i915/intel_psr.c:644 hsw_psr_disable() warn: if statement not indented Fixes: 3fcb0ca1d8db ("drm/i915/psr: fix blank screen issue for psr2") Signed-off-by: Chris Wilson Cc: Rodrigo Vivi Cc: Jim Bride Cc: Vathsala Nagaraju Cc: Patil Deepti Link: http://patchwork.freedesktop.org/patch/msgid/20170116130622.20369-2-chris@chris-wilson.co.uk Reviewed-by: Rodrigo Vivi --- drivers/gpu/drm/i915/intel_psr.c | 42 +++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index 38419e57d2aa..c3780d0d2baf 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -620,34 +620,36 @@ static void hsw_psr_disable(struct intel_dp *intel_dp) struct drm_i915_private *dev_priv = to_i915(dev); if (dev_priv->psr.active) { + i915_reg_t psr_ctl; + u32 psr_status_mask; + if (dev_priv->psr.aux_frame_sync) drm_dp_dpcd_writeb(&intel_dp->aux, DP_SINK_DEVICE_AUX_FRAME_SYNC_CONF, 0); if (dev_priv->psr.psr2_support) { - I915_WRITE(EDP_PSR2_CTL, - I915_READ(EDP_PSR2_CTL) & - ~(EDP_PSR2_ENABLE | - EDP_SU_TRACK_ENABLE)); - /* Wait till PSR2 is idle */ - if (intel_wait_for_register(dev_priv, - EDP_PSR2_STATUS_CTL, - EDP_PSR2_STATUS_STATE_MASK, - 0, - 2000)) - DRM_ERROR("Timed out waiting for PSR2 Idle State\n"); + psr_ctl = EDP_PSR2_CTL; + psr_status_mask = EDP_PSR2_STATUS_STATE_MASK; + + I915_WRITE(psr_ctl, + I915_READ(psr_ctl) & + ~(EDP_PSR2_ENABLE | EDP_SU_TRACK_ENABLE)); + } else { - I915_WRITE(EDP_PSR_CTL, - I915_READ(EDP_PSR_CTL) & ~EDP_PSR_ENABLE); - /* Wait till PSR1 is idle */ - if (intel_wait_for_register(dev_priv, - EDP_PSR_STATUS_CTL, - EDP_PSR_STATUS_STATE_MASK, - 0, - 2000)) - DRM_ERROR("Timed out waiting for PSR Idle State\n"); + psr_ctl = EDP_PSR_STATUS_CTL; + psr_status_mask = EDP_PSR_STATUS_STATE_MASK; + + I915_WRITE(psr_ctl, + I915_READ(psr_ctl) & ~EDP_PSR_ENABLE); } + + /* Wait till PSR is idle */ + if (intel_wait_for_register(dev_priv, + psr_ctl, psr_status_mask, 0, + 2000)) + DRM_ERROR("Timed out waiting for PSR Idle State\n"); + dev_priv->psr.active = false; } else { if (dev_priv->psr.psr2_support) -- cgit v1.2.3-55-g7522 From 4c9655436522eaf4ba35572851150ccb71f3866e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 17 Jan 2017 17:59:01 +0200 Subject: drm/i915: Move engine reset preparation to i915_gem_reset_prepare() Now that we have prepare/finish routines for the GEM reset, move the disabling of the engine->irq_tasklet into them to reduce repetition. The device irq enable/disable is split out to ensure it is run first and last always (even if the GPU reset fails). Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/1484668747-9120-1-git-send-email-mika.kuoppala@intel.com --- drivers/gpu/drm/i915/i915_drv.c | 21 ++------------------- drivers/gpu/drm/i915/i915_gem.c | 17 ++++++++++++----- 2 files changed, 14 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 350ee06c8d05..fbad2b64b890 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1728,22 +1728,6 @@ static int i915_resume_switcheroo(struct drm_device *dev) return i915_drm_resume(dev); } -static void disable_engines_irq(struct drm_i915_private *dev_priv) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - /* Ensure irq handler finishes, and not run again. */ - disable_irq(dev_priv->drm.irq); - for_each_engine(engine, dev_priv, id) - tasklet_kill(&engine->irq_tasklet); -} - -static void enable_engines_irq(struct drm_i915_private *dev_priv) -{ - enable_irq(dev_priv->drm.irq); -} - /** * i915_reset - reset chip after a hang * @dev_priv: device private to reset @@ -1776,12 +1760,10 @@ void i915_reset(struct drm_i915_private *dev_priv) error->reset_count++; pr_notice("drm/i915: Resetting chip after gpu hang\n"); + disable_irq(dev_priv->drm.irq); i915_gem_reset_prepare(dev_priv); - disable_engines_irq(dev_priv); ret = intel_gpu_reset(dev_priv, ALL_ENGINES); - enable_engines_irq(dev_priv); - if (ret) { if (ret != -ENODEV) DRM_ERROR("Failed to reset chip: %i\n", ret); @@ -1816,6 +1798,7 @@ void i915_reset(struct drm_i915_private *dev_priv) i915_queue_hangcheck(dev_priv); wakeup: + enable_irq(dev_priv->drm.irq); wake_up_bit(&error->flags, I915_RESET_IN_PROGRESS); return; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 83cd2eff37af..249a6673b433 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2610,6 +2610,18 @@ i915_gem_find_active_request(struct intel_engine_cs *engine) return NULL; } +void i915_gem_reset_prepare(struct drm_i915_private *dev_priv) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + /* Ensure irq handler finishes, and not run again. */ + for_each_engine(engine, dev_priv, id) + tasklet_kill(&engine->irq_tasklet); + + i915_gem_revoke_fences(dev_priv); +} + static void reset_request(struct drm_i915_gem_request *request) { void *vaddr = request->ring->vaddr; @@ -2629,11 +2641,6 @@ static void reset_request(struct drm_i915_gem_request *request) dma_fence_set_error(&request->fence, -EIO); } -void i915_gem_reset_prepare(struct drm_i915_private *dev_priv) -{ - i915_gem_revoke_fences(dev_priv); -} - static void i915_gem_reset_engine(struct intel_engine_cs *engine) { struct drm_i915_gem_request *request; -- cgit v1.2.3-55-g7522 From 36193acd54bdf1b790bfebfb132e37ece4af4982 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Tue, 17 Jan 2017 17:59:02 +0200 Subject: drm/i915: Introduce engine_skip_context Add a new function for skipping all pending requests for a context in order to make engine reset flow more readable. Cc: Chris Wilson Cc: Tvrtko Ursulin Signed-off-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/1484668747-9120-2-git-send-email-mika.kuoppala@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c | 45 ++++++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 249a6673b433..a83a4cc52fc9 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2604,6 +2604,7 @@ i915_gem_find_active_request(struct intel_engine_cs *engine) if (__i915_gem_request_completed(request)) continue; + GEM_BUG_ON(request->engine != engine); return request; } @@ -2622,7 +2623,7 @@ void i915_gem_reset_prepare(struct drm_i915_private *dev_priv) i915_gem_revoke_fences(dev_priv); } -static void reset_request(struct drm_i915_gem_request *request) +static void skip_request(struct drm_i915_gem_request *request) { void *vaddr = request->ring->vaddr; u32 head; @@ -2641,12 +2642,33 @@ static void reset_request(struct drm_i915_gem_request *request) dma_fence_set_error(&request->fence, -EIO); } +static void engine_skip_context(struct drm_i915_gem_request *request) +{ + struct intel_engine_cs *engine = request->engine; + struct i915_gem_context *hung_ctx = request->ctx; + struct intel_timeline *timeline; + unsigned long flags; + + timeline = i915_gem_context_lookup_timeline(hung_ctx, engine); + + spin_lock_irqsave(&engine->timeline->lock, flags); + spin_lock(&timeline->lock); + + list_for_each_entry_continue(request, &engine->timeline->requests, link) + if (request->ctx == hung_ctx) + skip_request(request); + + list_for_each_entry(request, &timeline->requests, link) + skip_request(request); + + spin_unlock(&timeline->lock); + spin_unlock_irqrestore(&engine->timeline->lock, flags); +} + static void i915_gem_reset_engine(struct intel_engine_cs *engine) { struct drm_i915_gem_request *request; struct i915_gem_context *hung_ctx; - struct intel_timeline *timeline; - unsigned long flags; bool ring_hung; if (engine->irq_seqno_barrier) @@ -2668,7 +2690,7 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine) if (ring_hung) { i915_gem_context_mark_guilty(hung_ctx); - reset_request(request); + skip_request(request); } else { i915_gem_context_mark_innocent(hung_ctx); dma_fence_set_error(&request->fence, -EAGAIN); @@ -2696,20 +2718,7 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine) if (i915_gem_context_is_default(hung_ctx)) return; - timeline = i915_gem_context_lookup_timeline(hung_ctx, engine); - - spin_lock_irqsave(&engine->timeline->lock, flags); - spin_lock(&timeline->lock); - - list_for_each_entry_continue(request, &engine->timeline->requests, link) - if (request->ctx == hung_ctx) - reset_request(request); - - list_for_each_entry(request, &timeline->requests, link) - reset_request(request); - - spin_unlock(&timeline->lock); - spin_unlock_irqrestore(&engine->timeline->lock, flags); + engine_skip_context(request); } void i915_gem_reset_finish(struct drm_i915_private *dev_priv) -- cgit v1.2.3-55-g7522 From 211b12afe68c5d3b01f828957420e224329a9d99 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Tue, 17 Jan 2017 17:59:03 +0200 Subject: drm/i915: Cleanup request skip decision Since we now only skip banned contexts, preventing the skip of default contexts is no longer sensible. For a similar argument as before 'commit 7ec73b7e36d0 ("drm/i915: Only skip requests once a context is banned")' we end up with an inconsistent API if we only mark future execbufs from the default ctx as banned but fail to mark those currently executing as failed. Cc: Chris Wilson Cc: Tvrtko Ursulin Signed-off-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/1484668747-9120-3-git-send-email-mika.kuoppala@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index a83a4cc52fc9..0fe4875580fd 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2704,21 +2704,8 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine) engine->reset_hw(engine, request); /* If this context is now banned, skip all of its pending requests. */ - if (!i915_gem_context_is_banned(hung_ctx)) - return; - - /* Users of the default context do not rely on logical state - * preserved between batches. They have to emit full state on - * every batch and so it is safe to execute queued requests following - * the hang. - * - * Other contexts preserve state, now corrupt. We want to skip all - * queued requests that reference the corrupt context. - */ - if (i915_gem_context_is_default(hung_ctx)) - return; - - engine_skip_context(request); + if (i915_gem_context_is_banned(hung_ctx)) + engine_skip_context(request); } void i915_gem_reset_finish(struct drm_i915_private *dev_priv) -- cgit v1.2.3-55-g7522 From bf2f04366c1e3920fb23b57226ecb39207b1a104 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Tue, 17 Jan 2017 17:59:04 +0200 Subject: drm/i915: Introduce engine_stalled helper Move the engine stalled/pardoned check into a helper function. Cc: Chris Wilson Cc: Tvrtko Ursulin Signed-off-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/1484668747-9120-4-git-send-email-mika.kuoppala@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 0fe4875580fd..e2a6f482b80f 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2611,6 +2611,20 @@ i915_gem_find_active_request(struct intel_engine_cs *engine) return NULL; } +static bool engine_stalled(struct intel_engine_cs *engine) +{ + if (!engine->hangcheck.stalled) + return false; + + /* Check for possible seqno movement after hang declaration */ + if (engine->hangcheck.seqno != intel_engine_get_seqno(engine)) { + DRM_DEBUG_DRIVER("%s pardoned\n", engine->name); + return false; + } + + return true; +} + void i915_gem_reset_prepare(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; @@ -2669,7 +2683,6 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine) { struct drm_i915_gem_request *request; struct i915_gem_context *hung_ctx; - bool ring_hung; if (engine->irq_seqno_barrier) engine->irq_seqno_barrier(engine); @@ -2680,15 +2693,7 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine) hung_ctx = request->ctx; - ring_hung = engine->hangcheck.stalled; - if (engine->hangcheck.seqno != intel_engine_get_seqno(engine)) { - DRM_DEBUG_DRIVER("%s pardoned, was guilty? %s\n", - engine->name, - yesno(ring_hung)); - ring_hung = false; - } - - if (ring_hung) { + if (engine_stalled(engine)) { i915_gem_context_mark_guilty(hung_ctx); skip_request(request); } else { -- cgit v1.2.3-55-g7522 From 61da536204ca0de24f8fc725c8297019f1e4963d Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Tue, 17 Jan 2017 17:59:05 +0200 Subject: drm/i915: Tidy up engine reset logic Split engine reset for engine and request specific parts. Cc: Chris Wilson Cc: Tvrtko Ursulin Signed-off-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/1484668747-9120-5-git-send-email-mika.kuoppala@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e2a6f482b80f..832ad092f462 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2679,10 +2679,26 @@ static void engine_skip_context(struct drm_i915_gem_request *request) spin_unlock_irqrestore(&engine->timeline->lock, flags); } +/* Returns true if the request was guilty of hang */ +static bool i915_gem_reset_request(struct drm_i915_gem_request *request) +{ + /* Read once and return the resolution */ + const bool guilty = engine_stalled(request->engine); + + if (guilty) { + i915_gem_context_mark_guilty(request->ctx); + skip_request(request); + } else { + i915_gem_context_mark_innocent(request->ctx); + dma_fence_set_error(&request->fence, -EAGAIN); + } + + return guilty; +} + static void i915_gem_reset_engine(struct intel_engine_cs *engine) { struct drm_i915_gem_request *request; - struct i915_gem_context *hung_ctx; if (engine->irq_seqno_barrier) engine->irq_seqno_barrier(engine); @@ -2691,16 +2707,8 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine) if (!request) return; - hung_ctx = request->ctx; - - if (engine_stalled(engine)) { - i915_gem_context_mark_guilty(hung_ctx); - skip_request(request); - } else { - i915_gem_context_mark_innocent(hung_ctx); - dma_fence_set_error(&request->fence, -EAGAIN); + if (!i915_gem_reset_request(request)) return; - } DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n", engine->name, request->global_seqno); @@ -2709,7 +2717,7 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine) engine->reset_hw(engine, request); /* If this context is now banned, skip all of its pending requests. */ - if (i915_gem_context_is_banned(hung_ctx)) + if (i915_gem_context_is_banned(request->ctx)) engine_skip_context(request); } -- cgit v1.2.3-55-g7522 From 0e178aef8f13ff11dc9dec82c2cd849981cb1ad1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 17 Jan 2017 17:59:06 +0200 Subject: drm/i915: Detect a failed GPU reset+recovery If we can't recover the GPU after the reset, mark it as wedged to cancel the outstanding tasks and to prevent new users from trying to use the broken GPU. v2: Check the same ring is hung again before declaring the reset broken. v3: use engine_stalled (Mika) Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/1484668747-9120-6-git-send-email-mika.kuoppala@intel.com --- drivers/gpu/drm/i915/i915_drv.c | 7 ++++++- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_gem.c | 16 ++++++++++++++-- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index fbad2b64b890..348dec482502 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1761,7 +1761,12 @@ void i915_reset(struct drm_i915_private *dev_priv) pr_notice("drm/i915: Resetting chip after gpu hang\n"); disable_irq(dev_priv->drm.irq); - i915_gem_reset_prepare(dev_priv); + ret = i915_gem_reset_prepare(dev_priv); + if (ret) { + DRM_ERROR("GPU recovery failed\n"); + intel_gpu_reset(dev_priv, ALL_ENGINES); + goto error; + } ret = intel_gpu_reset(dev_priv, ALL_ENGINES); if (ret) { diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 1cd485c314e6..e75e367f253a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3327,7 +3327,7 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error) return READ_ONCE(error->reset_count); } -void i915_gem_reset_prepare(struct drm_i915_private *dev_priv); +int i915_gem_reset_prepare(struct drm_i915_private *dev_priv); void i915_gem_reset_finish(struct drm_i915_private *dev_priv); void i915_gem_set_wedged(struct drm_i915_private *dev_priv); void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 832ad092f462..3e10e8101014 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2625,16 +2625,28 @@ static bool engine_stalled(struct intel_engine_cs *engine) return true; } -void i915_gem_reset_prepare(struct drm_i915_private *dev_priv) +int i915_gem_reset_prepare(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; enum intel_engine_id id; + int err = 0; /* Ensure irq handler finishes, and not run again. */ - for_each_engine(engine, dev_priv, id) + for_each_engine(engine, dev_priv, id) { + struct drm_i915_gem_request *request; + tasklet_kill(&engine->irq_tasklet); + if (engine_stalled(engine)) { + request = i915_gem_find_active_request(engine); + if (request && request->fence.error == -EIO) + err = -EIO; /* Previous reset failed! */ + } + } + i915_gem_revoke_fences(dev_priv); + + return err; } static void skip_request(struct drm_i915_gem_request *request) -- cgit v1.2.3-55-g7522 From 71895a085879d7bc490611994c7efb156162f33d Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Tue, 17 Jan 2017 17:59:07 +0200 Subject: drm/i915: Add comment how we treat hung contexts Explain in a comment how and why we treat hung context like we do. Cc: Chris Wilson Cc: Tvrtko Ursulin Signed-off-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/1484668747-9120-7-git-send-email-mika.kuoppala@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 3e10e8101014..d6ec63997ca3 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2697,6 +2697,27 @@ static bool i915_gem_reset_request(struct drm_i915_gem_request *request) /* Read once and return the resolution */ const bool guilty = engine_stalled(request->engine); + /* The guilty request will get skipped on a hung engine. + * + * Users of client default contexts do not rely on logical + * state preserved between batches so it is safe to execute + * queued requests following the hang. Non default contexts + * rely on preserved state, so skipping a batch loses the + * evolution of the state and it needs to be considered corrupted. + * Executing more queued batches on top of corrupted state is + * risky. But we take the risk by trying to advance through + * the queued requests in order to make the client behaviour + * more predictable around resets, by not throwing away random + * amount of batches it has prepared for execution. Sophisticated + * clients can use gem_reset_stats_ioctl and dma fence status + * (exported via sync_file info ioctl on explicit fences) to observe + * when it loses the context state and should rebuild accordingly. + * + * The context ban, and ultimately the client ban, mechanism are safety + * valves if client submission ends up resulting in nothing more than + * subsequent hangs. + */ + if (guilty) { i915_gem_context_mark_guilty(request->ctx); skip_request(request); -- cgit v1.2.3-55-g7522 From 3c5e37f169cb67cbd03c6116fbc93e0805815d29 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 15 Jan 2017 12:58:25 +0000 Subject: drm/i915: Avoid drm_atomic_state_put(NULL) in intel_display_resume intel_display_resume() may be called without an atomic state to restore, i.e. dev_priv->modeset_reset_restore state is NULL. One such case is following a lid open/close event and the forced modeset in intel_lid_notify(). Reported-by: Stefan Seyfried Tested-by: Stefan Seyfried Fixes: 0853695c3ba4 ("drm: Add reference counting to drm_atomic_state") Signed-off-by: Chris Wilson Cc: Daniel Vetter Cc: Jani Nikula Cc: # v4.10-rc1+ Link: http://patchwork.freedesktop.org/patch/msgid/20170115125825.18597-1-chris@chris-wilson.co.uk Reviewed-by: Ander Conselvan de Oliveira --- drivers/gpu/drm/i915/intel_display.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 31ca73d9ee49..e4ea4fad9eb9 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -17221,7 +17221,8 @@ void intel_display_resume(struct drm_device *dev) if (ret) DRM_ERROR("Restoring old state failed with %i\n", ret); - drm_atomic_state_put(state); + if (state) + drm_atomic_state_put(state); } void intel_modeset_gem_init(struct drm_device *dev) -- cgit v1.2.3-55-g7522 From 1e5a15d65d4cecb553d7e81c3ac05b1ffbc4bb27 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Wed, 18 Jan 2017 14:34:28 +0200 Subject: drm/i915: Avoid drm_atomic_state_put(NULL) on error paths The error paths in hsw_trans_edp_pipe_A_crc_wa() and intel_prepare_reset() would potentially call drm_atomic_state_put with a NULL state, which would lead to a NULL pointer dereference. Found by coverity. v2: Improve the error paths. (Chris) Fixes: 0853695c3ba4 ("drm: Add reference counting to drm_atomic_state") Cc: Chris Wilson Cc: Daniel Vetter Cc: # v4.10-rc1+ Signed-off-by: Ander Conselvan de Oliveira Link: http://patchwork.freedesktop.org/patch/msgid/1484742868-9551-1-git-send-email-ander.conselvan.de.oliveira@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_display.c | 10 +++------- drivers/gpu/drm/i915/intel_pipe_crc.c | 10 ++++++---- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index e4ea4fad9eb9..bf7ebdb0b604 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3560,23 +3560,19 @@ void intel_prepare_reset(struct drm_i915_private *dev_priv) state = drm_atomic_helper_duplicate_state(dev, ctx); if (IS_ERR(state)) { ret = PTR_ERR(state); - state = NULL; DRM_ERROR("Duplicating state failed with %i\n", ret); - goto err; + return; } ret = drm_atomic_helper_disable_all(dev, ctx); if (ret) { DRM_ERROR("Suspending crtc's failed with %i\n", ret); - goto err; + drm_atomic_state_put(state); + return; } dev_priv->modeset_restore_state = state; state->acquire_ctx = ctx; - return; - -err: - drm_atomic_state_put(state); } void intel_finish_reset(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/intel_pipe_crc.c b/drivers/gpu/drm/i915/intel_pipe_crc.c index 0f1da810cff0..c0b1f99da37b 100644 --- a/drivers/gpu/drm/i915/intel_pipe_crc.c +++ b/drivers/gpu/drm/i915/intel_pipe_crc.c @@ -560,14 +560,14 @@ static void hsw_trans_edp_pipe_A_crc_wa(struct drm_i915_private *dev_priv, state = drm_atomic_state_alloc(dev); if (!state) { ret = -ENOMEM; - goto out; + goto unlock; } state->acquire_ctx = drm_modeset_legacy_acquire_ctx(&crtc->base); pipe_config = intel_atomic_get_crtc_state(state, crtc); if (IS_ERR(pipe_config)) { ret = PTR_ERR(pipe_config); - goto out; + goto put_state; } pipe_config->pch_pfit.force_thru = enable; @@ -576,10 +576,12 @@ static void hsw_trans_edp_pipe_A_crc_wa(struct drm_i915_private *dev_priv, pipe_config->base.connectors_changed = true; ret = drm_atomic_commit(state); -out: + +put_state: + drm_atomic_state_put(state); +unlock: WARN(ret, "Toggling workaround to %i returns %i\n", enable, ret); drm_modeset_unlock_all(dev); - drm_atomic_state_put(state); } static int ivb_pipe_crc_ctl_reg(struct drm_i915_private *dev_priv, -- cgit v1.2.3-55-g7522 From 5b8cd0755f8a06a851c436a013e7be0823fb155a Mon Sep 17 00:00:00 2001 From: Clint Taylor Date: Wed, 18 Jan 2017 13:38:43 -0800 Subject: drm/i915: prevent crash with .disable_display parameter The .disable_display parameter was causing a fatal crash when fbdev was dereferenced during driver init. V1: protection in i915_drv.c V2: Moved protection to intel_fbdev.c Fixes: 43cee314345a ("drm/i915/fbdev: Limit the global async-domain synchronization") Testcase: igt/drv_module_reload/basic-no-display Cc: Chris Wilson Signed-off-by: Clint Taylor Link: http://patchwork.freedesktop.org/patch/msgid/1484775523-29428-1-git-send-email-clinton.a.taylor@intel.com Reviewed-by: Chris Wilson Cc: Lukas Wunner Cc: Daniel Vetter Cc: Jani Nikula Cc: # v4.8+ Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_fbdev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index bdefa61f2e60..bb0e9bf80e45 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -742,6 +742,9 @@ void intel_fbdev_initial_config_async(struct drm_device *dev) { struct intel_fbdev *ifbdev = to_i915(dev)->fbdev; + if (!ifbdev) + return; + ifbdev->cookie = async_schedule(intel_fbdev_initial_config, ifbdev); } -- cgit v1.2.3-55-g7522 From bd132858e9f9ad1c03a7d4982deaac7f0c0a4f0d Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Wed, 18 Jan 2017 08:05:53 -0800 Subject: drm/i915/huc: Add HuC fw loading support The HuC loading process is similar to GuC. The intel_uc_fw_fetch() is used for both cases. HuC loading needs to be before GuC loading. The WOPCM setting must be done early before loading any of them. v2: rebased on-top of drm-intel-nightly. removed if(HAS_GUC()) before the guc call. (D.Gordon) update huc_version number of format. v3: rebased to drm-intel-nightly, changed the file name format to match the one in the huc package. Changed dev->dev_private to to_i915() v4: moved function back to where it was. change wait_for_atomic to wait_for. v5: rebased. Changed the year in the copyright message to reflect the right year.Correct the comments,remove the unwanted WARN message, replace drm_gem_object_unreference() with i915_gem_object_put().Make the prototypes in intel_huc.h non-extern. v6: rebased. Update the file construction done by HuC. It is similar to GuC.Adopted the approach used in- https://patchwork.freedesktop.org/patch/104355/ v7: Change dev to dev_priv in macro definition. Corrected comments. v8: rebased on top of drm-tip. Updated functions intel_huc_load(), intel_huc_init() and intel_uc_fw_fetch() to accept dev_priv instead of dev. Moved contents of intel_huc.h to intel_uc.h. v9: change SKL_FW_ to SKL_HUC_FW_. Add intel_ prefix to guc_wopcm_size(). Remove unwanted checks in intel_uc.h. Rename huc_fw in struct intel_huc to simply fw to avoid redundency. v10: rebased. Correct comments. Make intel_huc_fini() accept dev_priv instead of dev like intel_huc_init() and intel_huc_load().Move definition to i915_guc_reg.h from intel_uc.h. Clean DMA_CTRL bits after HuC DMA transfer in huc_ucode_xfer() instead of guc_ucode_xfer(). Add suitable WARNs to give extra info. v11: rebased. Add proper bias for HuC and make sure there are asserts on failure by using guc_ggtt_offset_vma(). Introduce intel_huc.c and remove intel_huc_loader.c since it has functions that do more than just loading.Correct year in copyright. v12: remove invalidates that are not required anymore. Cc: Arkadiusz Hiler Cc: Michal Wajdeczko Tested-by: Xiang Haihao Signed-off-by: Anusha Srivatsa Signed-off-by: Alex Dai Signed-off-by: Peter Antoine Reviewed-by: Michal Wajdeczko Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1484755558-1234-1-git-send-email-anusha.srivatsa@intel.com --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/i915_drv.c | 3 + drivers/gpu/drm/i915/i915_drv.h | 2 + drivers/gpu/drm/i915/i915_guc_reg.h | 6 + drivers/gpu/drm/i915/intel_guc_loader.c | 7 +- drivers/gpu/drm/i915/intel_huc.c | 262 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_uc.h | 14 ++ 7 files changed, 292 insertions(+), 3 deletions(-) create mode 100644 drivers/gpu/drm/i915/intel_huc.c diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 40185fdd350f..74ca2e8b2494 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -58,6 +58,7 @@ i915-y += i915_cmd_parser.o \ i915-y += intel_uc.o \ intel_guc_log.o \ intel_guc_loader.o \ + intel_huc.o \ i915_guc_submission.o # autogenerated null render state diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 348dec482502..d1197aa68af5 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -599,6 +599,7 @@ static int i915_load_modeset_init(struct drm_device *dev) if (ret) goto cleanup_irq; + intel_huc_init(dev_priv); intel_guc_init(dev_priv); ret = i915_gem_init(dev_priv); @@ -627,6 +628,7 @@ cleanup_gem: i915_gem_fini(dev_priv); cleanup_irq: intel_guc_fini(dev_priv); + intel_huc_fini(dev_priv); drm_irq_uninstall(dev); intel_teardown_gmbus(dev_priv); cleanup_csr: @@ -1314,6 +1316,7 @@ void i915_driver_unload(struct drm_device *dev) drain_workqueue(dev_priv->wq); intel_guc_fini(dev_priv); + intel_huc_fini(dev_priv); i915_gem_fini(dev_priv); intel_fbc_cleanup_cfb(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e75e367f253a..0cb448d0e651 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2073,6 +2073,7 @@ struct drm_i915_private { struct intel_gvt *gvt; + struct intel_huc huc; struct intel_guc guc; struct intel_csr csr; @@ -2847,6 +2848,7 @@ intel_info(const struct drm_i915_private *dev_priv) #define HAS_GUC(dev_priv) ((dev_priv)->info.has_guc) #define HAS_GUC_UCODE(dev_priv) (HAS_GUC(dev_priv)) #define HAS_GUC_SCHED(dev_priv) (HAS_GUC(dev_priv)) +#define HAS_HUC_UCODE(dev_priv) (HAS_GUC(dev_priv)) #define HAS_RESOURCE_STREAMER(dev_priv) ((dev_priv)->info.has_resource_streamer) diff --git a/drivers/gpu/drm/i915/i915_guc_reg.h b/drivers/gpu/drm/i915/i915_guc_reg.h index 6a0adafe0523..35cf9918d09a 100644 --- a/drivers/gpu/drm/i915/i915_guc_reg.h +++ b/drivers/gpu/drm/i915/i915_guc_reg.h @@ -61,12 +61,18 @@ #define DMA_ADDRESS_SPACE_GTT (8 << 16) #define DMA_COPY_SIZE _MMIO(0xc310) #define DMA_CTRL _MMIO(0xc314) +#define HUC_UKERNEL (1<<9) #define UOS_MOVE (1<<4) #define START_DMA (1<<0) #define DMA_GUC_WOPCM_OFFSET _MMIO(0xc340) +#define HUC_LOADING_AGENT_VCR (0<<1) +#define HUC_LOADING_AGENT_GUC (1<<1) #define GUC_WOPCM_OFFSET_VALUE 0x80000 /* 512KB */ #define GUC_MAX_IDLE_COUNT _MMIO(0xC3E4) +#define HUC_STATUS2 _MMIO(0xD3B0) +#define HUC_FW_VERIFIED (1<<7) + /* Defines WOPCM space available to GuC firmware */ #define GUC_WOPCM_SIZE _MMIO(0xc050) /* GuC addresses below GUC_WOPCM_TOP don't map through the GTT */ diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index 2d523575d763..8c96a998c867 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -328,7 +328,7 @@ static int guc_ucode_xfer_dma(struct drm_i915_private *dev_priv, return ret; } -static u32 guc_wopcm_size(struct drm_i915_private *dev_priv) +u32 intel_guc_wopcm_size(struct drm_i915_private *dev_priv) { u32 wopcm_size = GUC_WOPCM_TOP; @@ -364,7 +364,7 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv) intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); /* init WOPCM */ - I915_WRITE(GUC_WOPCM_SIZE, guc_wopcm_size(dev_priv)); + I915_WRITE(GUC_WOPCM_SIZE, intel_guc_wopcm_size(dev_priv)); I915_WRITE(DMA_GUC_WOPCM_OFFSET, GUC_WOPCM_OFFSET_VALUE); /* Enable MIA caching. GuC clock gating is disabled. */ @@ -506,6 +506,7 @@ int intel_guc_setup(struct drm_i915_private *dev_priv) if (err) goto fail; + intel_huc_load(dev_priv); err = guc_ucode_xfer(dev_priv); if (!err) break; @@ -654,7 +655,7 @@ void intel_uc_fw_fetch(struct drm_i915_private *dev_priv, size = uc_fw->header_size + uc_fw->ucode_size; /* Top 32k of WOPCM is reserved (8K stack + 24k RC6 context). */ - if (size > guc_wopcm_size(dev_priv)) { + if (size > intel_guc_wopcm_size(dev_priv)) { DRM_ERROR("Firmware is too large to fit in WOPCM\n"); goto fail; } diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c new file mode 100644 index 000000000000..57471f26709b --- /dev/null +++ b/drivers/gpu/drm/i915/intel_huc.c @@ -0,0 +1,262 @@ +/* + * Copyright © 2016-2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ +#include +#include "i915_drv.h" +#include "intel_uc.h" + +/** + * DOC: HuC Firmware + * + * Motivation: + * GEN9 introduces a new dedicated firmware for usage in media HEVC (High + * Efficiency Video Coding) operations. Userspace can use the firmware + * capabilities by adding HuC specific commands to batch buffers. + * + * Implementation: + * The same firmware loader is used as the GuC. However, the actual + * loading to HW is deferred until GEM initialization is done. + * + * Note that HuC firmware loading must be done before GuC loading. + */ + +#define SKL_HUC_FW_MAJOR 01 +#define SKL_HUC_FW_MINOR 07 +#define SKL_BLD_NUM 1398 + +#define HUC_FW_PATH(platform, major, minor, bld_num) \ + "i915/" __stringify(platform) "_huc_ver" __stringify(major) "_" \ + __stringify(minor) "_" __stringify(bld_num) ".bin" + +#define I915_SKL_HUC_UCODE HUC_FW_PATH(skl, SKL_HUC_FW_MAJOR, \ + SKL_HUC_FW_MINOR, SKL_BLD_NUM) +MODULE_FIRMWARE(I915_SKL_HUC_UCODE); + +/** + * huc_ucode_xfer() - DMA's the firmware + * @dev_priv: the drm_i915_private device + * + * Transfer the firmware image to RAM for execution by the microcontroller. + * + * Return: 0 on success, non-zero on failure + */ +static int huc_ucode_xfer(struct drm_i915_private *dev_priv) +{ + struct intel_uc_fw *huc_fw = &dev_priv->huc.fw; + struct i915_vma *vma; + unsigned long offset = 0; + u32 size; + int ret; + + ret = i915_gem_object_set_to_gtt_domain(huc_fw->obj, false); + if (ret) { + DRM_DEBUG_DRIVER("set-domain failed %d\n", ret); + return ret; + } + + vma = i915_gem_object_ggtt_pin(huc_fw->obj, NULL, 0, 0, + PIN_OFFSET_BIAS | GUC_WOPCM_TOP); + if (IS_ERR(vma)) { + DRM_DEBUG_DRIVER("pin failed %d\n", (int)PTR_ERR(vma)); + return PTR_ERR(vma); + } + + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); + + /* init WOPCM */ + I915_WRITE(GUC_WOPCM_SIZE, intel_guc_wopcm_size(dev_priv)); + I915_WRITE(DMA_GUC_WOPCM_OFFSET, GUC_WOPCM_OFFSET_VALUE | + HUC_LOADING_AGENT_GUC); + + /* Set the source address for the uCode */ + offset = guc_ggtt_offset(vma) + huc_fw->header_offset; + I915_WRITE(DMA_ADDR_0_LOW, lower_32_bits(offset)); + I915_WRITE(DMA_ADDR_0_HIGH, upper_32_bits(offset) & 0xFFFF); + + /* Hardware doesn't look at destination address for HuC. Set it to 0, + * but still program the correct address space. + */ + I915_WRITE(DMA_ADDR_1_LOW, 0); + I915_WRITE(DMA_ADDR_1_HIGH, DMA_ADDRESS_SPACE_WOPCM); + + size = huc_fw->header_size + huc_fw->ucode_size; + I915_WRITE(DMA_COPY_SIZE, size); + + /* Start the DMA */ + I915_WRITE(DMA_CTRL, _MASKED_BIT_ENABLE(HUC_UKERNEL | START_DMA)); + + /* Wait for DMA to finish */ + ret = wait_for((I915_READ(DMA_CTRL) & START_DMA) == 0, 100); + + DRM_DEBUG_DRIVER("HuC DMA transfer wait over with ret %d\n", ret); + + /* Disable the bits once DMA is over */ + I915_WRITE(DMA_CTRL, _MASKED_BIT_DISABLE(HUC_UKERNEL)); + + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); + + /* + * We keep the object pages for reuse during resume. But we can unpin it + * now that DMA has completed, so it doesn't continue to take up space. + */ + i915_vma_unpin(vma); + + return ret; +} + +/** + * intel_huc_init() - initiate HuC firmware loading request + * @dev_priv: the drm_i915_private device + * + * Called early during driver load, but after GEM is initialised. The loading + * will continue only when driver explicitly specify firmware name and version. + * All other cases are considered as INTEL_UC_FIRMWARE_NONE either because HW + * is not capable or driver yet support it. And there will be no error message + * for INTEL_UC_FIRMWARE_NONE cases. + * + * The DMA-copying to HW is done later when intel_huc_load() is called. + */ +void intel_huc_init(struct drm_i915_private *dev_priv) +{ + struct intel_huc *huc = &dev_priv->huc; + struct intel_uc_fw *huc_fw = &huc->fw; + const char *fw_path = NULL; + + huc_fw->path = NULL; + huc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE; + huc_fw->load_status = INTEL_UC_FIRMWARE_NONE; + huc_fw->fw = INTEL_UC_FW_TYPE_HUC; + + if (!HAS_HUC_UCODE(dev_priv)) + return; + + if (IS_SKYLAKE(dev_priv)) { + fw_path = I915_SKL_HUC_UCODE; + huc_fw->major_ver_wanted = SKL_HUC_FW_MAJOR; + huc_fw->minor_ver_wanted = SKL_HUC_FW_MINOR; + } + + huc_fw->path = fw_path; + huc_fw->fetch_status = INTEL_UC_FIRMWARE_PENDING; + + DRM_DEBUG_DRIVER("HuC firmware pending, path %s\n", fw_path); + + WARN(huc_fw->path == NULL, "HuC present but no fw path\n"); + + intel_uc_fw_fetch(dev_priv, huc_fw); +} + +/** + * intel_huc_load() - load HuC uCode to device + * @dev_priv: the drm_i915_private device + * + * Called from guc_setup() during driver loading and also after a GPU reset. + * Be note that HuC loading must be done before GuC loading. + * + * The firmware image should have already been fetched into memory by the + * earlier call to intel_huc_init(), so here we need only check that + * is succeeded, and then transfer the image to the h/w. + * + * Return: non-zero code on error + */ +int intel_huc_load(struct drm_i915_private *dev_priv) +{ + struct intel_uc_fw *huc_fw = &dev_priv->huc.fw; + int err; + + if (huc_fw->fetch_status == INTEL_UC_FIRMWARE_NONE) + return 0; + + DRM_DEBUG_DRIVER("%s fw status: fetch %s, load %s\n", + huc_fw->path, + intel_uc_fw_status_repr(huc_fw->fetch_status), + intel_uc_fw_status_repr(huc_fw->load_status)); + + if (huc_fw->fetch_status == INTEL_UC_FIRMWARE_SUCCESS && + huc_fw->load_status == INTEL_UC_FIRMWARE_FAIL) + return -ENOEXEC; + + huc_fw->load_status = INTEL_UC_FIRMWARE_PENDING; + + switch (huc_fw->fetch_status) { + case INTEL_UC_FIRMWARE_FAIL: + /* something went wrong :( */ + err = -EIO; + goto fail; + + case INTEL_UC_FIRMWARE_NONE: + case INTEL_UC_FIRMWARE_PENDING: + default: + /* "can't happen" */ + WARN_ONCE(1, "HuC fw %s invalid fetch_status %s [%d]\n", + huc_fw->path, + intel_uc_fw_status_repr(huc_fw->fetch_status), + huc_fw->fetch_status); + err = -ENXIO; + goto fail; + + case INTEL_UC_FIRMWARE_SUCCESS: + break; + } + + err = huc_ucode_xfer(dev_priv); + if (err) + goto fail; + + huc_fw->load_status = INTEL_UC_FIRMWARE_SUCCESS; + + DRM_DEBUG_DRIVER("%s fw status: fetch %s, load %s\n", + huc_fw->path, + intel_uc_fw_status_repr(huc_fw->fetch_status), + intel_uc_fw_status_repr(huc_fw->load_status)); + + return 0; + +fail: + if (huc_fw->load_status == INTEL_UC_FIRMWARE_PENDING) + huc_fw->load_status = INTEL_UC_FIRMWARE_FAIL; + + DRM_ERROR("Failed to complete HuC uCode load with ret %d\n", err); + + return err; +} + +/** + * intel_huc_fini() - clean up resources allocated for HuC + * @dev_priv: the drm_i915_private device + * + * Cleans up by releasing the huc firmware GEM obj. + */ +void intel_huc_fini(struct drm_i915_private *dev_priv) +{ + struct intel_uc_fw *huc_fw = &dev_priv->huc.fw; + + mutex_lock(&dev_priv->drm.struct_mutex); + if (huc_fw->obj) + i915_gem_object_put(huc_fw->obj); + huc_fw->obj = NULL; + mutex_unlock(&dev_priv->drm.struct_mutex); + + huc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE; +} + diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h index bb7faa404352..4fd411b02ceb 100644 --- a/drivers/gpu/drm/i915/intel_uc.h +++ b/drivers/gpu/drm/i915/intel_uc.h @@ -176,6 +176,13 @@ struct intel_guc { struct mutex send_mutex; }; +struct intel_huc { + /* Generic uC firmware management */ + struct intel_uc_fw fw; + + /* HuC-specific additions */ +}; + /* intel_uc.c */ void intel_uc_init_early(struct drm_i915_private *dev_priv); int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len); @@ -188,6 +195,8 @@ extern void intel_guc_fini(struct drm_i915_private *dev_priv); extern const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status); extern int intel_guc_suspend(struct drm_i915_private *dev_priv); extern int intel_guc_resume(struct drm_i915_private *dev_priv); +void intel_uc_fw_fetch(struct drm_i915_private *dev_priv, + struct intel_uc_fw *uc_fw); u32 intel_guc_wopcm_size(struct drm_i915_private *dev_priv); /* i915_guc_submission.c */ @@ -213,4 +222,9 @@ static inline u32 guc_ggtt_offset(struct i915_vma *vma) return offset; } +/* intel_huc.c */ +void intel_huc_init(struct drm_i915_private *dev_priv); +void intel_huc_fini(struct drm_i915_private *dev_priv); +int intel_huc_load(struct drm_i915_private *dev_priv); + #endif -- cgit v1.2.3-55-g7522 From cd69098579d4c7862fb3330a129ddb71c0d0da16 Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Wed, 18 Jan 2017 08:05:54 -0800 Subject: drm/i915/huc: Add BXT HuC Loading Support This patch adds the HuC Loading for the BXT by using the updated file construction. Version 1.7 of the HuC firmware. v2: rebased on to top drm-tip. Rename BXT_FW_MAJOR to BXT_HUC_FW_ Cc: Michal Wajdeczko Cc: Tvrtko Ursulin Signed-off-by: Anusha Srivatsa Reviewed-by: Arkadiusz Hiler Reviewed-by: Michal Wajdeczko Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1484755558-1234-2-git-send-email-anusha.srivatsa@intel.com --- drivers/gpu/drm/i915/intel_huc.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c index 57471f26709b..887752488ea9 100644 --- a/drivers/gpu/drm/i915/intel_huc.c +++ b/drivers/gpu/drm/i915/intel_huc.c @@ -40,6 +40,10 @@ * Note that HuC firmware loading must be done before GuC loading. */ +#define BXT_HUC_FW_MAJOR 01 +#define BXT_HUC_FW_MINOR 07 +#define BXT_BLD_NUM 1398 + #define SKL_HUC_FW_MAJOR 01 #define SKL_HUC_FW_MINOR 07 #define SKL_BLD_NUM 1398 @@ -52,6 +56,9 @@ SKL_HUC_FW_MINOR, SKL_BLD_NUM) MODULE_FIRMWARE(I915_SKL_HUC_UCODE); +#define I915_BXT_HUC_UCODE HUC_FW_PATH(bxt, BXT_HUC_FW_MAJOR, \ + BXT_HUC_FW_MINOR, BXT_BLD_NUM) +MODULE_FIRMWARE(I915_BXT_HUC_UCODE); /** * huc_ucode_xfer() - DMA's the firmware * @dev_priv: the drm_i915_private device @@ -154,6 +161,10 @@ void intel_huc_init(struct drm_i915_private *dev_priv) fw_path = I915_SKL_HUC_UCODE; huc_fw->major_ver_wanted = SKL_HUC_FW_MAJOR; huc_fw->minor_ver_wanted = SKL_HUC_FW_MINOR; + } else if (IS_BROXTON(dev_priv)) { + fw_path = I915_BXT_HUC_UCODE; + huc_fw->major_ver_wanted = BXT_HUC_FW_MAJOR; + huc_fw->minor_ver_wanted = BXT_HUC_FW_MINOR; } huc_fw->path = fw_path; -- cgit v1.2.3-55-g7522 From f2ec71d550d3c7891c509ba831e67563044278d4 Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Wed, 18 Jan 2017 08:05:55 -0800 Subject: drm/i915/HuC: Add KBL huC loading Support This patch adds the support to load HuC on KBL Version 2.0 v2: rebased on top of drm-tip. Rename KBL_FW_ to KBL_HUC_FW_ v3: rebased. Remove old checks. Cc: Michal Wajdeczko Cc: Tvrtko Ursulin Signed-off-by: Anusha Srivatsa Reviewed-by: Arkadiusz Hiler Reviewed-by: Michal Wajdeczko Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1484755558-1234-3-git-send-email-anusha.srivatsa@intel.com --- drivers/gpu/drm/i915/intel_huc.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c index 887752488ea9..8b84ba835b1f 100644 --- a/drivers/gpu/drm/i915/intel_huc.c +++ b/drivers/gpu/drm/i915/intel_huc.c @@ -48,6 +48,10 @@ #define SKL_HUC_FW_MINOR 07 #define SKL_BLD_NUM 1398 +#define KBL_HUC_FW_MAJOR 02 +#define KBL_HUC_FW_MINOR 00 +#define KBL_BLD_NUM 1810 + #define HUC_FW_PATH(platform, major, minor, bld_num) \ "i915/" __stringify(platform) "_huc_ver" __stringify(major) "_" \ __stringify(minor) "_" __stringify(bld_num) ".bin" @@ -59,6 +63,11 @@ MODULE_FIRMWARE(I915_SKL_HUC_UCODE); #define I915_BXT_HUC_UCODE HUC_FW_PATH(bxt, BXT_HUC_FW_MAJOR, \ BXT_HUC_FW_MINOR, BXT_BLD_NUM) MODULE_FIRMWARE(I915_BXT_HUC_UCODE); + +#define I915_KBL_HUC_UCODE HUC_FW_PATH(kbl, KBL_HUC_FW_MAJOR, \ + KBL_HUC_FW_MINOR, KBL_BLD_NUM) +MODULE_FIRMWARE(I915_KBL_HUC_UCODE); + /** * huc_ucode_xfer() - DMA's the firmware * @dev_priv: the drm_i915_private device @@ -165,6 +174,10 @@ void intel_huc_init(struct drm_i915_private *dev_priv) fw_path = I915_BXT_HUC_UCODE; huc_fw->major_ver_wanted = BXT_HUC_FW_MAJOR; huc_fw->minor_ver_wanted = BXT_HUC_FW_MINOR; + } else if (IS_KABYLAKE(dev_priv)) { + fw_path = I915_KBL_HUC_UCODE; + huc_fw->major_ver_wanted = KBL_HUC_FW_MAJOR; + huc_fw->minor_ver_wanted = KBL_HUC_FW_MINOR; } huc_fw->path = fw_path; -- cgit v1.2.3-55-g7522 From 0509ead114937436368b8862156f33b9773e99c4 Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Wed, 18 Jan 2017 08:05:56 -0800 Subject: drm/i915/huc: Add debugfs for HuC loading status check Add debugfs entry for HuC loading status check. v2: rebased on top of drm-tip. Cc: Michal wajdeczko Tested-by: Xiang Haihao Signed-off-by: Anusha Srivatsa Signed-off-by: Alex Dai Signed-off-by: Peter Antoine Reviewed-by: Jeff McGee Reviewed-by: Michal Wajdeczko Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1484755558-1234-4-git-send-email-anusha.srivatsa@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 129aab71fadf..fa69d72fdcb9 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2352,6 +2352,36 @@ static int i915_llc(struct seq_file *m, void *data) return 0; } +static int i915_huc_load_status_info(struct seq_file *m, void *data) +{ + struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct intel_uc_fw *huc_fw = &dev_priv->huc.fw; + + if (!HAS_HUC_UCODE(dev_priv)) + return 0; + + seq_puts(m, "HuC firmware status:\n"); + seq_printf(m, "\tpath: %s\n", huc_fw->path); + seq_printf(m, "\tfetch: %s\n", + intel_uc_fw_status_repr(huc_fw->fetch_status)); + seq_printf(m, "\tload: %s\n", + intel_uc_fw_status_repr(huc_fw->load_status)); + seq_printf(m, "\tversion wanted: %d.%d\n", + huc_fw->major_ver_wanted, huc_fw->minor_ver_wanted); + seq_printf(m, "\tversion found: %d.%d\n", + huc_fw->major_ver_found, huc_fw->minor_ver_found); + seq_printf(m, "\theader: offset is %d; size = %d\n", + huc_fw->header_offset, huc_fw->header_size); + seq_printf(m, "\tuCode: offset is %d; size = %d\n", + huc_fw->ucode_offset, huc_fw->ucode_size); + seq_printf(m, "\tRSA: offset is %d; size = %d\n", + huc_fw->rsa_offset, huc_fw->rsa_size); + + seq_printf(m, "\nHuC status 0x%08x:\n", I915_READ(HUC_STATUS2)); + + return 0; +} + static int i915_guc_load_status_info(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); @@ -4609,6 +4639,7 @@ static const struct drm_info_list i915_debugfs_list[] = { {"i915_guc_info", i915_guc_info, 0}, {"i915_guc_load_status", i915_guc_load_status_info, 0}, {"i915_guc_log_dump", i915_guc_log_dump, 0}, + {"i915_huc_load_status", i915_huc_load_status_info, 0}, {"i915_frequency_info", i915_frequency_info, 0}, {"i915_hangcheck_info", i915_hangcheck_info, 0}, {"i915_drpc_info", i915_drpc_info, 0}, -- cgit v1.2.3-55-g7522 From dac84a388528ab002b517213be47f53dc0fd978a Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Wed, 18 Jan 2017 08:05:57 -0800 Subject: drm/i915/huc: Support HuC authentication The HuC authentication is done by host2guc call. The HuC RSA keys are sent to GuC for authentication. v2: rebased on top of drm-tip. Changed name format and upped version 1.7. v3: changed wait_for_atomic to wait_for v4: rebased. Rename intel_huc_auh() to intel_guc_auth_huc() and place the prototype in intel_guc.h,correct the comments. v5: rebased. Moved intel_guc_auth_huc from i915_guc_submission.c to intel_uc.c.Update dev to dev_priv in intel_guc_auth_huc(). Renamed HOST2GUC_ACTION_AUTHENTICATE_HUC TO INTEL_GUC_ACTION_ AUTHENTICATE_HUC v6: rebased. Add newline on DRM_ERRORs that already dont have one. v7: rebased. Replace wait_for with intel_wait_for_register() since the latter employs sleep optimisations for quick responses- as pointed out by Chris Wilson. v8: rebased. Cleanup the intel_guc_auth_huc() by removing checks already performed in earlier functions. Make comments more descriptive. v9: rebased. Changed the bias for pinning the HuC object. Move intel_guc_auth_huc() to intel_huc.c. Change DRM_DEBUGs to DRM_ERRORs in intel_guc_auth_huc(). Add return status to DRM_ERRORs. v10: Remove message not required for the user.. Cc: Chris Wilson Cc: Arkadiusz Hiler Cc: Michal Wajdeczko Tested-by: Xiang Haihao Signed-off-by: Anusha Srivatsa Signed-off-by: Alex Dai Signed-off-by: Peter Antoine Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1484755558-1234-5-git-send-email-anusha.srivatsa@intel.com --- drivers/gpu/drm/i915/intel_guc_fwif.h | 1 + drivers/gpu/drm/i915/intel_guc_loader.c | 2 ++ drivers/gpu/drm/i915/intel_huc.c | 49 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_uc.h | 1 + 4 files changed, 53 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h b/drivers/gpu/drm/i915/intel_guc_fwif.h index ed1ab40854a4..25691f0e4c50 100644 --- a/drivers/gpu/drm/i915/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/intel_guc_fwif.h @@ -505,6 +505,7 @@ enum intel_guc_action { INTEL_GUC_ACTION_ENTER_S_STATE = 0x501, INTEL_GUC_ACTION_EXIT_S_STATE = 0x502, INTEL_GUC_ACTION_SLPC_REQUEST = 0x3003, + INTEL_GUC_ACTION_AUTHENTICATE_HUC = 0x4000, INTEL_GUC_ACTION_UK_LOG_ENABLE_LOGGING = 0x0E000, INTEL_GUC_ACTION_LIMIT }; diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index 8c96a998c867..2f1cf9aea04e 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -524,6 +524,8 @@ int intel_guc_setup(struct drm_i915_private *dev_priv) intel_uc_fw_status_repr(guc_fw->fetch_status), intel_uc_fw_status_repr(guc_fw->load_status)); + intel_guc_auth_huc(dev_priv); + if (i915.enable_guc_submission) { if (i915.guc_log_level >= 0) gen9_enable_guc_interrupts(dev_priv); diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c index 8b84ba835b1f..897ef31d865e 100644 --- a/drivers/gpu/drm/i915/intel_huc.c +++ b/drivers/gpu/drm/i915/intel_huc.c @@ -284,3 +284,52 @@ void intel_huc_fini(struct drm_i915_private *dev_priv) huc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE; } +/** + * intel_guc_auth_huc() - authenticate ucode + * @dev_priv: the drm_i915_device + * + * Triggers a HuC fw authentication request to the GuC via intel_guc_action_ + * authenticate_huc interface. + */ +void intel_guc_auth_huc(struct drm_i915_private *dev_priv) +{ + struct intel_guc *guc = &dev_priv->guc; + struct intel_huc *huc = &dev_priv->huc; + struct i915_vma *vma; + int ret; + u32 data[2]; + + vma = i915_gem_object_ggtt_pin(huc->fw.obj, NULL, 0, 0, + PIN_OFFSET_BIAS | GUC_WOPCM_TOP); + if (IS_ERR(vma)) { + DRM_ERROR("failed to pin huc fw object %d\n", + (int)PTR_ERR(vma)); + return; + } + + /* Specify auth action and where public signature is. */ + data[0] = INTEL_GUC_ACTION_AUTHENTICATE_HUC; + data[1] = i915_ggtt_offset(vma) + huc->fw.rsa_offset; + + ret = intel_guc_send(guc, data, ARRAY_SIZE(data)); + if (ret) { + DRM_ERROR("HuC: GuC did not ack Auth request %d\n", ret); + goto out; + } + + /* Check authentication status, it should be done by now */ + ret = intel_wait_for_register(dev_priv, + HUC_STATUS2, + HUC_FW_VERIFIED, + HUC_FW_VERIFIED, + 50); + + if (ret) { + DRM_ERROR("HuC: Authentication failed %d\n", ret); + goto out; + } + +out: + i915_vma_unpin(vma); +} + diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h index 4fd411b02ceb..d74f4d3ad8dc 100644 --- a/drivers/gpu/drm/i915/intel_uc.h +++ b/drivers/gpu/drm/i915/intel_uc.h @@ -226,5 +226,6 @@ static inline u32 guc_ggtt_offset(struct i915_vma *vma) void intel_huc_init(struct drm_i915_private *dev_priv); void intel_huc_fini(struct drm_i915_private *dev_priv); int intel_huc_load(struct drm_i915_private *dev_priv); +void intel_guc_auth_huc(struct drm_i915_private *dev_priv); #endif -- cgit v1.2.3-55-g7522 From 5464cd657632502c61c64775dffef1cc7ebd4839 Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Wed, 18 Jan 2017 08:05:58 -0800 Subject: drm/i915/get_params: Add HuC status to getparams This patch will allow for getparams to return the status of the HuC. As the HuC has to be validated by the GuC this patch uses the validated status to show when the HuC is loaded and ready for use. You cannot use the loaded status as with the GuC as the HuC is verified after it is loaded and is not usable until it is verified. v2: removed the forewakes as the registers are already force-woken. (T.Ursulin) v3: rebased on top of drm-tip. Removed any reference to intel_huc.h v4: rebased. Rename I915_PARAM_HAS_HUC to I915_PARAM_HUC_STATUS. Remove intel_is_huc_valid() since it is used only in one place. Put the case of I915_PARAM_HAS_HUC() in the right place. v5: rebased. Add a comment to specify that I915_READ(reg) does not read garbage value. The register HUC_STATUS2 is force woken and no rpm is needed. Signed-off-by: Anusha Srivatsa Signed-off-by: Peter Antoine Reviewed-by: Arkadiusz Hiler Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1484755558-1234-6-git-send-email-anusha.srivatsa@intel.com --- drivers/gpu/drm/i915/i915_drv.c | 7 +++++++ include/uapi/drm/i915_drm.h | 1 + 2 files changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index d1197aa68af5..4ae69ebe166e 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -49,6 +49,7 @@ #include "i915_trace.h" #include "i915_vgpu.h" #include "intel_drv.h" +#include "intel_uc.h" static struct drm_driver driver; @@ -315,6 +316,12 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_MIN_EU_IN_POOL: value = INTEL_INFO(dev_priv)->sseu.min_eu_in_pool; break; + case I915_PARAM_HUC_STATUS: + /* The register is already force-woken. We dont need + * any rpm here + */ + value = I915_READ(HUC_STATUS2) & HUC_FW_VERIFIED; + break; case I915_PARAM_MMAP_GTT_VERSION: /* Though we've started our numbering from 1, and so class all * earlier versions as 0, in effect their value is undefined as diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index da32c2f6c3f9..57093b455db6 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -395,6 +395,7 @@ typedef struct drm_i915_irq_wait { * priorities and the driver will attempt to execute batches in priority order. */ #define I915_PARAM_HAS_SCHEDULER 41 +#define I915_PARAM_HUC_STATUS 42 typedef struct drm_i915_getparam { __s32 param; -- cgit v1.2.3-55-g7522 From be1e341513ca23b0668b7b0f26fa6e2ffc46ba20 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 16 Jan 2017 15:21:27 +0000 Subject: drm/i915: Track pinned vma in intel_plane_state With atomic plane states we are able to track an allocation right from preparation, during use and through to the final free after being swapped out for a new plane. We can couple the VMA we pin for the framebuffer (and its rotation) to this lifetime and avoid all the clumsy lookups in between. v2: Remove residual vma on plane cleanup (Chris) v3: Add a description for the vma destruction in intel_plane_destroy_state (Maarten) References: https://bugs.freedesktop.org/show_bug.cgi?id=98829 Signed-off-by: Chris Wilson Signed-off-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170116152131.18089-1-chris@chris-wilson.co.uk Acked-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_drv.h | 16 +--- drivers/gpu/drm/i915/intel_atomic_plane.c | 20 +++++ drivers/gpu/drm/i915/intel_display.c | 130 +++++++++++------------------- drivers/gpu/drm/i915/intel_drv.h | 9 ++- drivers/gpu/drm/i915/intel_fbc.c | 52 +++++------- drivers/gpu/drm/i915/intel_fbdev.c | 4 +- drivers/gpu/drm/i915/intel_sprite.c | 8 +- 7 files changed, 104 insertions(+), 135 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 0cb448d0e651..b67f419ffba6 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1069,6 +1069,8 @@ struct intel_fbc { struct work_struct underrun_work; struct intel_fbc_state_cache { + struct i915_vma *vma; + struct { unsigned int mode_flags; uint32_t hsw_bdw_pixel_rate; @@ -1082,15 +1084,14 @@ struct intel_fbc { } plane; struct { - u64 ilk_ggtt_offset; const struct drm_format_info *format; unsigned int stride; - int fence_reg; - unsigned int tiling_mode; } fb; } state_cache; struct intel_fbc_reg_params { + struct i915_vma *vma; + struct { enum pipe pipe; enum plane plane; @@ -1098,10 +1099,8 @@ struct intel_fbc { } crtc; struct { - u64 ggtt_offset; const struct drm_format_info *format; unsigned int stride; - int fence_reg; } fb; int cfb_size; @@ -3398,13 +3397,6 @@ i915_gem_object_to_ggtt(struct drm_i915_gem_object *obj, return i915_gem_obj_to_vma(obj, &to_i915(obj->base.dev)->ggtt.base, view); } -static inline unsigned long -i915_gem_object_ggtt_offset(struct drm_i915_gem_object *o, - const struct i915_ggtt_view *view) -{ - return i915_ggtt_offset(i915_gem_object_to_ggtt(o, view)); -} - /* i915_gem_fence_reg.c */ int __must_check i915_vma_get_fence(struct i915_vma *vma); int __must_check i915_vma_put_fence(struct i915_vma *vma); diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c index 4612ffd555a7..41fd94e62d3c 100644 --- a/drivers/gpu/drm/i915/intel_atomic_plane.c +++ b/drivers/gpu/drm/i915/intel_atomic_plane.c @@ -85,6 +85,8 @@ intel_plane_duplicate_state(struct drm_plane *plane) __drm_atomic_helper_plane_duplicate_state(plane, state); + intel_state->vma = NULL; + return state; } @@ -100,6 +102,24 @@ void intel_plane_destroy_state(struct drm_plane *plane, struct drm_plane_state *state) { + struct i915_vma *vma; + + vma = fetch_and_zero(&to_intel_plane_state(state)->vma); + + /* + * FIXME: Normally intel_cleanup_plane_fb handles destruction of vma. + * We currently don't clear all planes during driver unload, so we have + * to be able to unpin vma here for now. + * + * Normally this can only happen during unload when kmscon is disabled + * and userspace doesn't attempt to set a framebuffer at all. + */ + if (vma) { + mutex_lock(&plane->dev->struct_mutex); + intel_unpin_fb_vma(vma); + mutex_unlock(&plane->dev->struct_mutex); + } + drm_atomic_helper_plane_destroy_state(plane, state); } diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index bf7ebdb0b604..bddadd97db8d 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2234,24 +2234,19 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation) i915_vma_pin_fence(vma); } + i915_vma_get(vma); err: intel_runtime_pm_put(dev_priv); return vma; } -void intel_unpin_fb_obj(struct drm_framebuffer *fb, unsigned int rotation) +void intel_unpin_fb_vma(struct i915_vma *vma) { - struct drm_i915_gem_object *obj = intel_fb_obj(fb); - struct i915_ggtt_view view; - struct i915_vma *vma; - - WARN_ON(!mutex_is_locked(&obj->base.dev->struct_mutex)); - - intel_fill_fb_ggtt_view(&view, fb, rotation); - vma = i915_gem_object_to_ggtt(obj, &view); + lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); i915_vma_unpin_fence(vma); i915_gem_object_unpin_from_display_plane(vma); + i915_vma_put(vma); } static int intel_fb_pitch(const struct drm_framebuffer *fb, int plane, @@ -2744,7 +2739,6 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, struct drm_device *dev = intel_crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); struct drm_crtc *c; - struct intel_crtc *i; struct drm_i915_gem_object *obj; struct drm_plane *primary = intel_crtc->base.primary; struct drm_plane_state *plane_state = primary->state; @@ -2769,20 +2763,20 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, * an fb with another CRTC instead */ for_each_crtc(dev, c) { - i = to_intel_crtc(c); + struct intel_plane_state *state; if (c == &intel_crtc->base) continue; - if (!i->active) + if (!to_intel_crtc(c)->active) continue; - fb = c->primary->fb; - if (!fb) + state = to_intel_plane_state(c->primary->state); + if (!state->vma) continue; - obj = intel_fb_obj(fb); - if (i915_gem_object_ggtt_offset(obj, NULL) == plane_config->base) { + if (intel_plane_ggtt_offset(state) == plane_config->base) { + fb = c->primary->fb; drm_framebuffer_reference(fb); goto valid_fb; } @@ -2803,6 +2797,19 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, return; valid_fb: + mutex_lock(&dev->struct_mutex); + intel_state->vma = + intel_pin_and_fence_fb_obj(fb, primary->state->rotation); + mutex_unlock(&dev->struct_mutex); + if (IS_ERR(intel_state->vma)) { + DRM_ERROR("failed to pin boot fb on pipe %d: %li\n", + intel_crtc->pipe, PTR_ERR(intel_state->vma)); + + intel_state->vma = NULL; + drm_framebuffer_unreference(fb); + return; + } + plane_state->src_x = 0; plane_state->src_y = 0; plane_state->src_w = fb->width << 16; @@ -3098,13 +3105,13 @@ static void i9xx_update_primary_plane(struct drm_plane *primary, I915_WRITE(DSPSTRIDE(plane), fb->pitches[0]); if (INTEL_GEN(dev_priv) >= 4) { I915_WRITE(DSPSURF(plane), - intel_fb_gtt_offset(fb, rotation) + + intel_plane_ggtt_offset(plane_state) + intel_crtc->dspaddr_offset); I915_WRITE(DSPTILEOFF(plane), (y << 16) | x); I915_WRITE(DSPLINOFF(plane), linear_offset); } else { I915_WRITE(DSPADDR(plane), - intel_fb_gtt_offset(fb, rotation) + + intel_plane_ggtt_offset(plane_state) + intel_crtc->dspaddr_offset); } POSTING_READ(reg); @@ -3201,7 +3208,7 @@ static void ironlake_update_primary_plane(struct drm_plane *primary, I915_WRITE(DSPSTRIDE(plane), fb->pitches[0]); I915_WRITE(DSPSURF(plane), - intel_fb_gtt_offset(fb, rotation) + + intel_plane_ggtt_offset(plane_state) + intel_crtc->dspaddr_offset); if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { I915_WRITE(DSPOFFSET(plane), (y << 16) | x); @@ -3224,23 +3231,6 @@ u32 intel_fb_stride_alignment(const struct drm_i915_private *dev_priv, } } -u32 intel_fb_gtt_offset(struct drm_framebuffer *fb, - unsigned int rotation) -{ - struct drm_i915_gem_object *obj = intel_fb_obj(fb); - struct i915_ggtt_view view; - struct i915_vma *vma; - - intel_fill_fb_ggtt_view(&view, fb, rotation); - - vma = i915_gem_object_to_ggtt(obj, &view); - if (WARN(!vma, "ggtt vma for display object not found! (view=%u)\n", - view.type)) - return -1; - - return i915_ggtt_offset(vma); -} - static void skl_detach_scaler(struct intel_crtc *intel_crtc, int id) { struct drm_device *dev = intel_crtc->base.dev; @@ -3436,7 +3426,7 @@ static void skylake_update_primary_plane(struct drm_plane *plane, } I915_WRITE(PLANE_SURF(pipe, plane_id), - intel_fb_gtt_offset(fb, rotation) + surf_addr); + intel_plane_ggtt_offset(plane_state) + surf_addr); POSTING_READ(PLANE_SURF(pipe, plane_id)); } @@ -11565,7 +11555,7 @@ static void intel_unpin_work_fn(struct work_struct *__work) flush_work(&work->mmio_work); mutex_lock(&dev->struct_mutex); - intel_unpin_fb_obj(work->old_fb, primary->state->rotation); + intel_unpin_fb_vma(work->old_vma); i915_gem_object_put(work->pending_flip_obj); mutex_unlock(&dev->struct_mutex); @@ -12275,8 +12265,10 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, goto cleanup_pending; } - work->gtt_offset = intel_fb_gtt_offset(fb, primary->state->rotation); - work->gtt_offset += intel_crtc->dspaddr_offset; + work->old_vma = to_intel_plane_state(primary->state)->vma; + to_intel_plane_state(primary->state)->vma = vma; + + work->gtt_offset = i915_ggtt_offset(vma) + intel_crtc->dspaddr_offset; work->rotation = crtc->primary->state->rotation; /* @@ -12331,7 +12323,8 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, cleanup_request: i915_add_request_no_flush(request); cleanup_unpin: - intel_unpin_fb_obj(fb, crtc->primary->state->rotation); + to_intel_plane_state(primary->state)->vma = work->old_vma; + intel_unpin_fb_vma(vma); cleanup_pending: atomic_dec(&intel_crtc->unpin_work_count); unlock: @@ -14832,6 +14825,8 @@ intel_prepare_plane_fb(struct drm_plane *plane, DRM_DEBUG_KMS("failed to pin object\n"); return PTR_ERR(vma); } + + to_intel_plane_state(new_state)->vma = vma; } return 0; @@ -14850,19 +14845,12 @@ void intel_cleanup_plane_fb(struct drm_plane *plane, struct drm_plane_state *old_state) { - struct drm_i915_private *dev_priv = to_i915(plane->dev); - struct intel_plane_state *old_intel_state; - struct drm_i915_gem_object *old_obj = intel_fb_obj(old_state->fb); - struct drm_i915_gem_object *obj = intel_fb_obj(plane->state->fb); - - old_intel_state = to_intel_plane_state(old_state); - - if (!obj && !old_obj) - return; + struct i915_vma *vma; - if (old_obj && (plane->type != DRM_PLANE_TYPE_CURSOR || - !INTEL_INFO(dev_priv)->cursor_needs_physical)) - intel_unpin_fb_obj(old_state->fb, old_state->rotation); + /* Should only be called after a successful intel_prepare_plane_fb()! */ + vma = fetch_and_zero(&to_intel_plane_state(old_state)->vma); + if (vma) + intel_unpin_fb_vma(vma); } int @@ -15014,6 +15002,7 @@ intel_legacy_cursor_update(struct drm_plane *plane, struct intel_plane *intel_plane = to_intel_plane(plane); struct drm_framebuffer *old_fb; struct drm_crtc_state *crtc_state = crtc->state; + struct i915_vma *old_vma; /* * When crtc is inactive or there is a modeset pending, @@ -15085,9 +15074,12 @@ intel_legacy_cursor_update(struct drm_plane *plane, ret = PTR_ERR(vma); goto out_unlock; } + + to_intel_plane_state(new_plane_state)->vma = vma; } old_fb = old_plane_state->fb; + old_vma = to_intel_plane_state(old_plane_state)->vma; i915_gem_track_fb(intel_fb_obj(old_fb), intel_fb_obj(fb), intel_plane->frontbuffer_bit); @@ -15097,6 +15089,7 @@ intel_legacy_cursor_update(struct drm_plane *plane, *to_intel_plane_state(old_plane_state) = *to_intel_plane_state(new_plane_state); new_plane_state->fence = NULL; new_plane_state->fb = old_fb; + to_intel_plane_state(new_plane_state)->vma = old_vma; intel_plane->update_plane(plane, to_intel_crtc_state(crtc->state), @@ -15335,7 +15328,7 @@ intel_update_cursor_plane(struct drm_plane *plane, if (!obj) addr = 0; else if (!INTEL_INFO(dev_priv)->cursor_needs_physical) - addr = i915_gem_object_ggtt_offset(obj, NULL); + addr = intel_plane_ggtt_offset(state); else addr = obj->phys_handle->busaddr; @@ -17224,41 +17217,12 @@ void intel_display_resume(struct drm_device *dev) void intel_modeset_gem_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_crtc *c; - struct drm_i915_gem_object *obj; intel_init_gt_powersave(dev_priv); intel_modeset_init_hw(dev); intel_setup_overlay(dev_priv); - - /* - * Make sure any fbs we allocated at startup are properly - * pinned & fenced. When we do the allocation it's too early - * for this. - */ - for_each_crtc(dev, c) { - struct i915_vma *vma; - - obj = intel_fb_obj(c->primary->fb); - if (obj == NULL) - continue; - - mutex_lock(&dev->struct_mutex); - vma = intel_pin_and_fence_fb_obj(c->primary->fb, - c->primary->state->rotation); - mutex_unlock(&dev->struct_mutex); - if (IS_ERR(vma)) { - DRM_ERROR("failed to pin boot fb on pipe %d\n", - to_intel_crtc(c)->pipe); - drm_framebuffer_unreference(c->primary->fb); - c->primary->fb = NULL; - c->primary->crtc = c->primary->state->crtc = NULL; - update_state_fb(c->primary); - c->state->plane_mask &= ~(1 << drm_plane_index(c->primary)); - } - } } int intel_connector_register(struct drm_connector *connector) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 84258df3e8f1..0cec0013ace0 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -376,6 +376,7 @@ struct intel_atomic_state { struct intel_plane_state { struct drm_plane_state base; struct drm_rect clip; + struct i915_vma *vma; struct { u32 offset; @@ -1067,6 +1068,7 @@ struct intel_flip_work { struct work_struct mmio_work; struct drm_crtc *crtc; + struct i915_vma *old_vma; struct drm_framebuffer *old_fb; struct drm_i915_gem_object *pending_flip_obj; struct drm_pending_vblank_event *event; @@ -1302,7 +1304,7 @@ void intel_release_load_detect_pipe(struct drm_connector *connector, struct drm_modeset_acquire_ctx *ctx); struct i915_vma * intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation); -void intel_unpin_fb_obj(struct drm_framebuffer *fb, unsigned int rotation); +void intel_unpin_fb_vma(struct i915_vma *vma); struct drm_framebuffer * __intel_framebuffer_create(struct drm_device *dev, struct drm_mode_fb_cmd2 *mode_cmd, @@ -1391,7 +1393,10 @@ void intel_mode_from_pipe_config(struct drm_display_mode *mode, int skl_update_scaler_crtc(struct intel_crtc_state *crtc_state); int skl_max_scale(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state); -u32 intel_fb_gtt_offset(struct drm_framebuffer *fb, unsigned int rotation); +static inline u32 intel_plane_ggtt_offset(const struct intel_plane_state *state) +{ + return i915_ggtt_offset(state->vma); +} u32 skl_plane_ctl_format(uint32_t pixel_format); u32 skl_plane_ctl_tiling(uint64_t fb_modifier); diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c index 98cb85c88aff..89fe5c8464df 100644 --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@ -173,7 +173,7 @@ static void i8xx_fbc_activate(struct drm_i915_private *dev_priv) if (IS_I945GM(dev_priv)) fbc_ctl |= FBC_CTL_C3_IDLE; /* 945 needs special SR handling */ fbc_ctl |= (cfb_pitch & 0xff) << FBC_CTL_STRIDE_SHIFT; - fbc_ctl |= params->fb.fence_reg; + fbc_ctl |= params->vma->fence->id; I915_WRITE(FBC_CONTROL, fbc_ctl); } @@ -193,8 +193,8 @@ static void g4x_fbc_activate(struct drm_i915_private *dev_priv) else dpfc_ctl |= DPFC_CTL_LIMIT_1X; - if (params->fb.fence_reg != I915_FENCE_REG_NONE) { - dpfc_ctl |= DPFC_CTL_FENCE_EN | params->fb.fence_reg; + if (params->vma->fence) { + dpfc_ctl |= DPFC_CTL_FENCE_EN | params->vma->fence->id; I915_WRITE(DPFC_FENCE_YOFF, params->crtc.fence_y_offset); } else { I915_WRITE(DPFC_FENCE_YOFF, 0); @@ -251,13 +251,14 @@ static void ilk_fbc_activate(struct drm_i915_private *dev_priv) break; } - if (params->fb.fence_reg != I915_FENCE_REG_NONE) { + if (params->vma->fence) { dpfc_ctl |= DPFC_CTL_FENCE_EN; if (IS_GEN5(dev_priv)) - dpfc_ctl |= params->fb.fence_reg; + dpfc_ctl |= params->vma->fence->id; if (IS_GEN6(dev_priv)) { I915_WRITE(SNB_DPFC_CTL_SA, - SNB_CPU_FENCE_ENABLE | params->fb.fence_reg); + SNB_CPU_FENCE_ENABLE | + params->vma->fence->id); I915_WRITE(DPFC_CPU_FENCE_OFFSET, params->crtc.fence_y_offset); } @@ -269,7 +270,8 @@ static void ilk_fbc_activate(struct drm_i915_private *dev_priv) } I915_WRITE(ILK_DPFC_FENCE_YOFF, params->crtc.fence_y_offset); - I915_WRITE(ILK_FBC_RT_BASE, params->fb.ggtt_offset | ILK_FBC_RT_VALID); + I915_WRITE(ILK_FBC_RT_BASE, + i915_ggtt_offset(params->vma) | ILK_FBC_RT_VALID); /* enable it... */ I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN); @@ -319,10 +321,11 @@ static void gen7_fbc_activate(struct drm_i915_private *dev_priv) break; } - if (params->fb.fence_reg != I915_FENCE_REG_NONE) { + if (params->vma->fence) { dpfc_ctl |= IVB_DPFC_CTL_FENCE_EN; I915_WRITE(SNB_DPFC_CTL_SA, - SNB_CPU_FENCE_ENABLE | params->fb.fence_reg); + SNB_CPU_FENCE_ENABLE | + params->vma->fence->id); I915_WRITE(DPFC_CPU_FENCE_OFFSET, params->crtc.fence_y_offset); } else { I915_WRITE(SNB_DPFC_CTL_SA,0); @@ -727,14 +730,6 @@ static bool intel_fbc_hw_tracking_covers_screen(struct intel_crtc *crtc) return effective_w <= max_w && effective_h <= max_h; } -/* XXX replace me when we have VMA tracking for intel_plane_state */ -static int get_fence_id(struct drm_framebuffer *fb) -{ - struct i915_vma *vma = i915_gem_object_to_ggtt(intel_fb_obj(fb), NULL); - - return vma && vma->fence ? vma->fence->id : I915_FENCE_REG_NONE; -} - static void intel_fbc_update_state_cache(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state, struct intel_plane_state *plane_state) @@ -743,7 +738,8 @@ static void intel_fbc_update_state_cache(struct intel_crtc *crtc, struct intel_fbc *fbc = &dev_priv->fbc; struct intel_fbc_state_cache *cache = &fbc->state_cache; struct drm_framebuffer *fb = plane_state->base.fb; - struct drm_i915_gem_object *obj; + + cache->vma = NULL; cache->crtc.mode_flags = crtc_state->base.adjusted_mode.flags; if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) @@ -758,16 +754,10 @@ static void intel_fbc_update_state_cache(struct intel_crtc *crtc, if (!cache->plane.visible) return; - obj = intel_fb_obj(fb); - - /* FIXME: We lack the proper locking here, so only run this on the - * platforms that need. */ - if (IS_GEN(dev_priv, 5, 6)) - cache->fb.ilk_ggtt_offset = i915_gem_object_ggtt_offset(obj, NULL); cache->fb.format = fb->format; cache->fb.stride = fb->pitches[0]; - cache->fb.fence_reg = get_fence_id(fb); - cache->fb.tiling_mode = i915_gem_object_get_tiling(obj); + + cache->vma = plane_state->vma; } static bool intel_fbc_can_activate(struct intel_crtc *crtc) @@ -784,7 +774,7 @@ static bool intel_fbc_can_activate(struct intel_crtc *crtc) return false; } - if (!cache->plane.visible) { + if (!cache->vma) { fbc->no_fbc_reason = "primary plane not visible"; return false; } @@ -807,8 +797,7 @@ static bool intel_fbc_can_activate(struct intel_crtc *crtc) * so have no fence associated with it) due to aperture constaints * at the time of pinning. */ - if (cache->fb.tiling_mode != I915_TILING_X || - cache->fb.fence_reg == I915_FENCE_REG_NONE) { + if (!cache->vma->fence) { fbc->no_fbc_reason = "framebuffer not tiled or fenced"; return false; } @@ -888,17 +877,16 @@ static void intel_fbc_get_reg_params(struct intel_crtc *crtc, * zero. */ memset(params, 0, sizeof(*params)); + params->vma = cache->vma; + params->crtc.pipe = crtc->pipe; params->crtc.plane = crtc->plane; params->crtc.fence_y_offset = get_crtc_fence_y_offset(crtc); params->fb.format = cache->fb.format; params->fb.stride = cache->fb.stride; - params->fb.fence_reg = cache->fb.fence_reg; params->cfb_size = intel_fbc_calculate_cfb_size(dev_priv, cache); - - params->fb.ggtt_offset = cache->fb.ilk_ggtt_offset; } static bool intel_fbc_reg_params_equal(struct intel_fbc_reg_params *params1, diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index bb0e9bf80e45..e0d9e72cf3d1 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -284,7 +284,7 @@ static int intelfb_create(struct drm_fb_helper *helper, out_destroy_fbi: drm_fb_helper_release_fbi(helper); out_unpin: - intel_unpin_fb_obj(&ifbdev->fb->base, DRM_ROTATE_0); + intel_unpin_fb_vma(vma); out_unlock: mutex_unlock(&dev->struct_mutex); return ret; @@ -549,7 +549,7 @@ static void intel_fbdev_destroy(struct intel_fbdev *ifbdev) if (ifbdev->fb) { mutex_lock(&ifbdev->helper.dev->struct_mutex); - intel_unpin_fb_obj(&ifbdev->fb->base, DRM_ROTATE_0); + intel_unpin_fb_vma(ifbdev->vma); mutex_unlock(&ifbdev->helper.dev->struct_mutex); drm_framebuffer_remove(&ifbdev->fb->base); diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 7031bc733d97..9ef54688872a 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -273,7 +273,7 @@ skl_update_plane(struct drm_plane *drm_plane, I915_WRITE(PLANE_CTL(pipe, plane_id), plane_ctl); I915_WRITE(PLANE_SURF(pipe, plane_id), - intel_fb_gtt_offset(fb, rotation) + surf_addr); + intel_plane_ggtt_offset(plane_state) + surf_addr); POSTING_READ(PLANE_SURF(pipe, plane_id)); } @@ -458,7 +458,7 @@ vlv_update_plane(struct drm_plane *dplane, I915_WRITE(SPSIZE(pipe, plane_id), (crtc_h << 16) | crtc_w); I915_WRITE(SPCNTR(pipe, plane_id), sprctl); I915_WRITE(SPSURF(pipe, plane_id), - intel_fb_gtt_offset(fb, rotation) + sprsurf_offset); + intel_plane_ggtt_offset(plane_state) + sprsurf_offset); POSTING_READ(SPSURF(pipe, plane_id)); } @@ -594,7 +594,7 @@ ivb_update_plane(struct drm_plane *plane, I915_WRITE(SPRSCALE(pipe), sprscale); I915_WRITE(SPRCTL(pipe), sprctl); I915_WRITE(SPRSURF(pipe), - intel_fb_gtt_offset(fb, rotation) + sprsurf_offset); + intel_plane_ggtt_offset(plane_state) + sprsurf_offset); POSTING_READ(SPRSURF(pipe)); } @@ -721,7 +721,7 @@ ilk_update_plane(struct drm_plane *plane, I915_WRITE(DVSSCALE(pipe), dvsscale); I915_WRITE(DVSCNTR(pipe), dvscntr); I915_WRITE(DVSSURF(pipe), - intel_fb_gtt_offset(fb, rotation) + dvssurf_offset); + intel_plane_ggtt_offset(plane_state) + dvssurf_offset); POSTING_READ(DVSSURF(pipe)); } -- cgit v1.2.3-55-g7522 From 718659a63054261d052cd94f2f9502111ff5173f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 16 Jan 2017 15:21:28 +0000 Subject: drm/i915: Rename some warts in the VMA API Whilst writing testcases to exercise the VMA API, some oddities came to light, such as i915_gem_obj_lookup_or_create(). Joonas suggested i915_vma_instance() as a neat replacement, so rename them, move them to i915_vma.c and add some kerneldoc as a sugary bonus. s/i915_gem_obj_to_vma/i915_vma_lookup/ s/i915_gem_obj_lookup_or_create_vma/i915_vma_instance/ Suggested-by: Joonas Lahtinen Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170116152131.18089-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 12 +--- drivers/gpu/drm/i915/i915_gem.c | 2 +- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 +- drivers/gpu/drm/i915/i915_gem_gtt.c | 45 --------------- drivers/gpu/drm/i915/i915_gem_stolen.c | 2 +- drivers/gpu/drm/i915/i915_vma.c | 90 +++++++++++++++++++++++++++++- drivers/gpu/drm/i915/i915_vma.h | 10 ++++ 7 files changed, 103 insertions(+), 60 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index b67f419ffba6..566adcc8a0eb 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3374,16 +3374,6 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev, struct dma_buf *i915_gem_prime_export(struct drm_device *dev, struct drm_gem_object *gem_obj, int flags); -struct i915_vma * -i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, - struct i915_address_space *vm, - const struct i915_ggtt_view *view); - -struct i915_vma * -i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, - struct i915_address_space *vm, - const struct i915_ggtt_view *view); - static inline struct i915_hw_ppgtt * i915_vm_to_ppgtt(struct i915_address_space *vm) { @@ -3394,7 +3384,7 @@ static inline struct i915_vma * i915_gem_object_to_ggtt(struct drm_i915_gem_object *obj, const struct i915_ggtt_view *view) { - return i915_gem_obj_to_vma(obj, &to_i915(obj->base.dev)->ggtt.base, view); + return i915_vma_lookup(obj, &to_i915(obj->base.dev)->ggtt.base, view); } /* i915_gem_fence_reg.c */ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d6ec63997ca3..b2f8ac1386a2 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3679,7 +3679,7 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, lockdep_assert_held(&obj->base.dev->struct_mutex); - vma = i915_gem_obj_lookup_or_create_vma(obj, vm, view); + vma = i915_vma_instance(obj, vm, view); if (IS_ERR(vma)) return vma; diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 259fe4aa8d41..c66e90571031 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -184,7 +184,7 @@ eb_lookup_vmas(struct eb_vmas *eb, * from the (obj, vm) we don't run the risk of creating * duplicated vmas for the same vm. */ - vma = i915_gem_obj_lookup_or_create_vma(obj, vm, NULL); + vma = i915_vma_instance(obj, vm, NULL); if (unlikely(IS_ERR(vma))) { DRM_DEBUG("Failed to lookup VMA\n"); ret = PTR_ERR(vma); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 64f241bffc80..e808aad203d8 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3362,51 +3362,6 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv) i915_ggtt_invalidate(dev_priv); } -struct i915_vma * -i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, - struct i915_address_space *vm, - const struct i915_ggtt_view *view) -{ - struct rb_node *rb; - - rb = obj->vma_tree.rb_node; - while (rb) { - struct i915_vma *vma = rb_entry(rb, struct i915_vma, obj_node); - long cmp; - - cmp = i915_vma_compare(vma, vm, view); - if (cmp == 0) - return vma; - - if (cmp < 0) - rb = rb->rb_right; - else - rb = rb->rb_left; - } - - return NULL; -} - -struct i915_vma * -i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, - struct i915_address_space *vm, - const struct i915_ggtt_view *view) -{ - struct i915_vma *vma; - - lockdep_assert_held(&obj->base.dev->struct_mutex); - GEM_BUG_ON(view && !i915_is_ggtt(vm)); - - vma = i915_gem_obj_to_vma(obj, vm, view); - if (!vma) { - vma = i915_vma_create(obj, vm, view); - GEM_BUG_ON(vma != i915_gem_obj_to_vma(obj, vm, view)); - } - - GEM_BUG_ON(i915_vma_is_closed(vma)); - return vma; -} - static struct scatterlist * rotate_pages(const dma_addr_t *in, unsigned int offset, unsigned int width, unsigned int height, diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 52dbb9bab268..61cc0fcae3d8 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -683,7 +683,7 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv if (ret) goto err; - vma = i915_gem_obj_lookup_or_create_vma(obj, &ggtt->base, NULL); + vma = i915_vma_instance(obj, &ggtt->base, NULL); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto err_pages; diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index fe93ed1e012f..87273b0137ec 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -146,6 +146,59 @@ __i915_vma_create(struct drm_i915_gem_object *obj, return vma; } +/** + * i915_vma_lookup - finds a matching VMA + * @obj: parent &struct drm_i915_gem_object to be mapped + * @vm: address space in which the mapping is located + * @view: additional mapping requirements + * + * i915_vma_lookup() looks up an existing VMA of the @obj in the @vm with + * the same @view characteristics. + * + * Must be called with struct_mutex held. + * + * Returns the vma if found, or NULL. + */ +struct i915_vma * +i915_vma_lookup(struct drm_i915_gem_object *obj, + struct i915_address_space *vm, + const struct i915_ggtt_view *view) +{ + struct rb_node *rb; + + lockdep_assert_held(&obj->base.dev->struct_mutex); + + rb = obj->vma_tree.rb_node; + while (rb) { + struct i915_vma *vma = rb_entry(rb, struct i915_vma, obj_node); + long cmp; + + cmp = i915_vma_compare(vma, vm, view); + if (cmp == 0) + return vma; + + if (cmp < 0) + rb = rb->rb_right; + else + rb = rb->rb_left; + } + + return NULL; +} + +/** + * i915_vma_create - creates a VMA + * @obj: parent &struct drm_i915_gem_object to be mapped + * @vm: address space in which the mapping is located + * @view: additional mapping requirements + * + * i915_vma_create() allocates a new VMA of the @obj in the @vm with + * @view characteristics. + * + * Must be called with struct_mutex held. + * + * Returns the vma if found, or an error pointer. + */ struct i915_vma * i915_vma_create(struct drm_i915_gem_object *obj, struct i915_address_space *vm, @@ -153,11 +206,46 @@ i915_vma_create(struct drm_i915_gem_object *obj, { lockdep_assert_held(&obj->base.dev->struct_mutex); GEM_BUG_ON(view && !i915_is_ggtt(vm)); - GEM_BUG_ON(i915_gem_obj_to_vma(obj, vm, view)); + GEM_BUG_ON(i915_vma_lookup(obj, vm, view)); return __i915_vma_create(obj, vm, view); } +/** + * i915_vma_instance - return the singleton instance of the VMA + * @obj: parent &struct drm_i915_gem_object to be mapped + * @vm: address space in which the mapping is located + * @view: additional mapping requirements + * + * i915_vma_instance() looks up an existing VMA of the @obj in the @vm with + * the same @view characteristics. If a match is not found, one is created. + * Once created, the VMA is kept until either the object is freed, or the + * address space is closed. + * + * Must be called with struct_mutex held. + * + * Returns the vma, or an error pointer. + */ +struct i915_vma * +i915_vma_instance(struct drm_i915_gem_object *obj, + struct i915_address_space *vm, + const struct i915_ggtt_view *view) +{ + struct i915_vma *vma; + + lockdep_assert_held(&obj->base.dev->struct_mutex); + GEM_BUG_ON(view && !i915_is_ggtt(vm)); + GEM_BUG_ON(vm->closed); + + vma = i915_vma_lookup(obj, vm, view); + if (!vma) + vma = i915_vma_create(obj, vm, view); + + GEM_BUG_ON(!IS_ERR(vma) && i915_vma_is_closed(vma)); + GEM_BUG_ON(!IS_ERR(vma) && i915_vma_lookup(obj, vm, view) != vma); + return vma; +} + /** * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space. * @vma: VMA to map diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 86b60fb4e954..b3c81190b4a0 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -116,6 +116,16 @@ i915_vma_create(struct drm_i915_gem_object *obj, struct i915_address_space *vm, const struct i915_ggtt_view *view); +struct i915_vma * +i915_vma_lookup(struct drm_i915_gem_object *obj, + struct i915_address_space *vm, + const struct i915_ggtt_view *view); + +struct i915_vma * +i915_vma_instance(struct drm_i915_gem_object *obj, + struct i915_address_space *vm, + const struct i915_ggtt_view *view); + void i915_vma_unpin_and_release(struct i915_vma **p_vma); static inline bool i915_vma_is_ggtt(const struct i915_vma *vma) -- cgit v1.2.3-55-g7522 From 4ea9527cc03e73afc6a4ac5979d1ecde43b85cf3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 16 Jan 2017 15:21:29 +0000 Subject: drm/i915: Add a check that the VMA instance we lookup matches the request Just as added paranoia against our future-selves add another check that the lookup/created VMA instance matches the request. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170116152131.18089-3-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_vma.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 87273b0137ec..b4d7b51266d2 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -242,6 +242,7 @@ i915_vma_instance(struct drm_i915_gem_object *obj, vma = i915_vma_create(obj, vm, view); GEM_BUG_ON(!IS_ERR(vma) && i915_vma_is_closed(vma)); + GEM_BUG_ON(!IS_ERR(vma) && i915_vma_compare(vma, vm, view)); GEM_BUG_ON(!IS_ERR(vma) && i915_vma_lookup(obj, vm, view) != vma); return vma; } -- cgit v1.2.3-55-g7522 From a01cb37affb7ac698ed260c0e31d02af8df6b785 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 16 Jan 2017 15:21:30 +0000 Subject: drm/i915: Remove i915_vma_create from VMA API With the introduce of i915_vma_instance() for obtaining the VMA singleton for a (obj, vm, view) tuple, we can remove the i915_vma_create() in favour of a single entry point. We do incur a lookup onto an empty tree, but the i915_vma_create() were being called infrequently and during initialisation, so the small overhead is negligible. v2: Drop the i915_ prefix from the now static vma_create() function Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170116152131.18089-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_context.c | 2 +- drivers/gpu/drm/i915/i915_gem_render_state.c | 2 +- drivers/gpu/drm/i915/i915_guc_submission.c | 2 +- drivers/gpu/drm/i915/i915_vma.c | 35 ++++------------------------ drivers/gpu/drm/i915/i915_vma.h | 5 ---- drivers/gpu/drm/i915/intel_engine_cs.c | 2 +- drivers/gpu/drm/i915/intel_lrc.c | 4 ++-- drivers/gpu/drm/i915/intel_ringbuffer.c | 6 ++--- 8 files changed, 13 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 0a4728fdecdc..17f90c618208 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -269,7 +269,7 @@ __create_hw_context(struct drm_i915_private *dev_priv, goto err_out; } - vma = i915_vma_create(obj, &dev_priv->ggtt.base, NULL); + vma = i915_vma_instance(obj, &dev_priv->ggtt.base, NULL); if (IS_ERR(vma)) { i915_gem_object_put(obj); ret = PTR_ERR(vma); diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index 63ae7e813335..b42c81b42487 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -200,7 +200,7 @@ int i915_gem_render_state_init(struct intel_engine_cs *engine) goto err_free; } - so->vma = i915_vma_create(obj, &engine->i915->ggtt.base, NULL); + so->vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL); if (IS_ERR(so->vma)) { ret = PTR_ERR(so->vma); goto err_obj; diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index c3277858cd8c..8ced9e26f075 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -566,7 +566,7 @@ struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size) if (IS_ERR(obj)) return ERR_CAST(obj); - vma = i915_vma_create(obj, &dev_priv->ggtt.base, NULL); + vma = i915_vma_instance(obj, &dev_priv->ggtt.base, NULL); if (IS_ERR(vma)) goto err; diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index b4d7b51266d2..cb415bfe22d7 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -69,16 +69,14 @@ i915_vma_retire(struct i915_gem_active *active, } static struct i915_vma * -__i915_vma_create(struct drm_i915_gem_object *obj, - struct i915_address_space *vm, - const struct i915_ggtt_view *view) +vma_create(struct drm_i915_gem_object *obj, + struct i915_address_space *vm, + const struct i915_ggtt_view *view) { struct i915_vma *vma; struct rb_node *rb, **p; int i; - GEM_BUG_ON(vm->closed); - vma = kmem_cache_zalloc(to_i915(obj->base.dev)->vmas, GFP_KERNEL); if (vma == NULL) return ERR_PTR(-ENOMEM); @@ -186,31 +184,6 @@ i915_vma_lookup(struct drm_i915_gem_object *obj, return NULL; } -/** - * i915_vma_create - creates a VMA - * @obj: parent &struct drm_i915_gem_object to be mapped - * @vm: address space in which the mapping is located - * @view: additional mapping requirements - * - * i915_vma_create() allocates a new VMA of the @obj in the @vm with - * @view characteristics. - * - * Must be called with struct_mutex held. - * - * Returns the vma if found, or an error pointer. - */ -struct i915_vma * -i915_vma_create(struct drm_i915_gem_object *obj, - struct i915_address_space *vm, - const struct i915_ggtt_view *view) -{ - lockdep_assert_held(&obj->base.dev->struct_mutex); - GEM_BUG_ON(view && !i915_is_ggtt(vm)); - GEM_BUG_ON(i915_vma_lookup(obj, vm, view)); - - return __i915_vma_create(obj, vm, view); -} - /** * i915_vma_instance - return the singleton instance of the VMA * @obj: parent &struct drm_i915_gem_object to be mapped @@ -239,7 +212,7 @@ i915_vma_instance(struct drm_i915_gem_object *obj, vma = i915_vma_lookup(obj, vm, view); if (!vma) - vma = i915_vma_create(obj, vm, view); + vma = vma_create(obj, vm, view); GEM_BUG_ON(!IS_ERR(vma) && i915_vma_is_closed(vma)); GEM_BUG_ON(!IS_ERR(vma) && i915_vma_compare(vma, vm, view)); diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index b3c81190b4a0..82a56193985c 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -111,11 +111,6 @@ struct i915_vma { struct drm_i915_gem_exec_object2 *exec_entry; }; -struct i915_vma * -i915_vma_create(struct drm_i915_gem_object *obj, - struct i915_address_space *vm, - const struct i915_ggtt_view *view); - struct i915_vma * i915_vma_lookup(struct drm_i915_gem_object *obj, struct i915_address_space *vm, diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 97bbbc3d6aa8..371acf109e34 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -264,7 +264,7 @@ int intel_engine_create_scratch(struct intel_engine_cs *engine, int size) return PTR_ERR(obj); } - vma = i915_vma_create(obj, &engine->i915->ggtt.base, NULL); + vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto err_unref; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 8f8dcd9a9524..432ee495dec2 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1225,7 +1225,7 @@ static int lrc_setup_wa_ctx_obj(struct intel_engine_cs *engine, u32 size) if (IS_ERR(obj)) return PTR_ERR(obj); - vma = i915_vma_create(obj, &engine->i915->ggtt.base, NULL); + vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto err; @@ -2198,7 +2198,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, return PTR_ERR(ctx_obj); } - vma = i915_vma_create(ctx_obj, &ctx->i915->ggtt.base, NULL); + vma = i915_vma_instance(ctx_obj, &ctx->i915->ggtt.base, NULL); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto error_deref_obj; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 49fa8006c6a2..69035e4f9b3b 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1738,7 +1738,7 @@ static int init_status_page(struct intel_engine_cs *engine) if (ret) goto err; - vma = i915_vma_create(obj, &engine->i915->ggtt.base, NULL); + vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto err; @@ -1872,7 +1872,7 @@ intel_ring_create_vma(struct drm_i915_private *dev_priv, int size) /* mark ring buffers as read-only from GPU side by default */ obj->gt_ro = 1; - vma = i915_vma_create(obj, &dev_priv->ggtt.base, NULL); + vma = i915_vma_instance(obj, &dev_priv->ggtt.base, NULL); if (IS_ERR(vma)) goto err; @@ -2462,7 +2462,7 @@ static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv, if (IS_ERR(obj)) goto err; - vma = i915_vma_create(obj, &dev_priv->ggtt.base, NULL); + vma = i915_vma_instance(obj, &dev_priv->ggtt.base, NULL); if (IS_ERR(vma)) goto err_obj; -- cgit v1.2.3-55-g7522 From 481a6f7dcf43c73644e483c7b736cafd1bc305a2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 16 Jan 2017 15:21:31 +0000 Subject: drm/i915: Remove i915_gem_object_to_ggtt() With the last user of this convenience wrapper gone, we can kill the wrapper and in the process make the lookup function static. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170116152131.18089-5-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_drv.h | 7 ------- drivers/gpu/drm/i915/i915_vma.c | 27 ++++++--------------------- drivers/gpu/drm/i915/i915_vma.h | 5 ----- 3 files changed, 6 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 566adcc8a0eb..15078f8a577a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3380,13 +3380,6 @@ i915_vm_to_ppgtt(struct i915_address_space *vm) return container_of(vm, struct i915_hw_ppgtt, base); } -static inline struct i915_vma * -i915_gem_object_to_ggtt(struct drm_i915_gem_object *obj, - const struct i915_ggtt_view *view) -{ - return i915_vma_lookup(obj, &to_i915(obj->base.dev)->ggtt.base, view); -} - /* i915_gem_fence_reg.c */ int __must_check i915_vma_get_fence(struct i915_vma *vma); int __must_check i915_vma_put_fence(struct i915_vma *vma); diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index cb415bfe22d7..635f2635b1f2 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -144,28 +144,13 @@ vma_create(struct drm_i915_gem_object *obj, return vma; } -/** - * i915_vma_lookup - finds a matching VMA - * @obj: parent &struct drm_i915_gem_object to be mapped - * @vm: address space in which the mapping is located - * @view: additional mapping requirements - * - * i915_vma_lookup() looks up an existing VMA of the @obj in the @vm with - * the same @view characteristics. - * - * Must be called with struct_mutex held. - * - * Returns the vma if found, or NULL. - */ -struct i915_vma * -i915_vma_lookup(struct drm_i915_gem_object *obj, - struct i915_address_space *vm, - const struct i915_ggtt_view *view) +static struct i915_vma * +vma_lookup(struct drm_i915_gem_object *obj, + struct i915_address_space *vm, + const struct i915_ggtt_view *view) { struct rb_node *rb; - lockdep_assert_held(&obj->base.dev->struct_mutex); - rb = obj->vma_tree.rb_node; while (rb) { struct i915_vma *vma = rb_entry(rb, struct i915_vma, obj_node); @@ -210,13 +195,13 @@ i915_vma_instance(struct drm_i915_gem_object *obj, GEM_BUG_ON(view && !i915_is_ggtt(vm)); GEM_BUG_ON(vm->closed); - vma = i915_vma_lookup(obj, vm, view); + vma = vma_lookup(obj, vm, view); if (!vma) vma = vma_create(obj, vm, view); GEM_BUG_ON(!IS_ERR(vma) && i915_vma_is_closed(vma)); GEM_BUG_ON(!IS_ERR(vma) && i915_vma_compare(vma, vm, view)); - GEM_BUG_ON(!IS_ERR(vma) && i915_vma_lookup(obj, vm, view) != vma); + GEM_BUG_ON(!IS_ERR(vma) && vma_lookup(obj, vm, view) != vma); return vma; } diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 82a56193985c..e39d922cfb6f 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -111,11 +111,6 @@ struct i915_vma { struct drm_i915_gem_exec_object2 *exec_entry; }; -struct i915_vma * -i915_vma_lookup(struct drm_i915_gem_object *obj, - struct i915_address_space *vm, - const struct i915_ggtt_view *view); - struct i915_vma * i915_vma_instance(struct drm_i915_gem_object *obj, struct i915_address_space *vm, -- cgit v1.2.3-55-g7522 From 7abbd11f344aa7abe29befb218774a1ea26018ac Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 19 Jan 2017 11:37:49 +0000 Subject: drm/i915: Release temporary load-detect state upon switching After we call drm_atomic_commit() on the load-detect state, we can free our local reference. Upon restore, we only apply and free the previous state. Fixes: 0853695c3ba4 ("drm: Add reference counting to drm_atomic_state") Signed-off-by: Chris Wilson Cc: Chris Wilson Cc: Daniel Vetter Cc: # v4.10-rc1+ Link: http://patchwork.freedesktop.org/patch/msgid/20170119113749.2517-1-chris@chris-wilson.co.uk Reviewed-by: Ville Syrjälä --- drivers/gpu/drm/i915/intel_display.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index bddadd97db8d..ca3e7116438c 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11276,6 +11276,7 @@ found: } old->restore_state = restore_state; + drm_atomic_state_put(state); /* let the connector get through one full cycle before testing */ intel_wait_for_vblank(dev_priv, intel_crtc->pipe); -- cgit v1.2.3-55-g7522 From a8cd6da0c0d5b9dc7533f1d88098e9852dc2d5aa Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 22 Dec 2016 16:04:41 +0200 Subject: drm/i915: Remove crtc->config usage from intel_modeset_readout_hw_state() crtc->config is on its way out. Let's reduce our dependence on it a little bit by removing it from intel_modeset_readout_hw_state(). Also replace crtc->acttive checks with crtc_state->base.active checks. Cc: Maarten Lankhorst Suggested-by: Maarten Lankhorst Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20161222140442.7204-1-ville.syrjala@linux.intel.com Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 38 +++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index ca3e7116438c..f900e30e8d7a 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -16985,7 +16985,8 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) dev_priv->active_crtcs = 0; for_each_intel_crtc(dev, crtc) { - struct intel_crtc_state *crtc_state = crtc->config; + struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); __drm_atomic_helper_crtc_destroy_state(&crtc_state->base); memset(crtc_state, 0, sizeof(*crtc_state)); @@ -17004,7 +17005,7 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) DRM_DEBUG_KMS("[CRTC:%d:%s] hw state readout: %s\n", crtc->base.base.id, crtc->base.name, - enableddisabled(crtc->active)); + enableddisabled(crtc_state->base.active)); } for (i = 0; i < dev_priv->num_shared_dpll; i++) { @@ -17014,7 +17015,11 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) &pll->state.hw_state); pll->state.crtc_mask = 0; for_each_intel_crtc(dev, crtc) { - if (crtc->active && crtc->config->shared_dpll == pll) + struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); + + if (crtc_state->base.active && + crtc_state->shared_dpll == pll) pll->state.crtc_mask |= 1 << crtc->pipe; } pll->active_mask = pll->state.crtc_mask; @@ -17027,11 +17032,14 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) pipe = 0; if (encoder->get_hw_state(encoder, &pipe)) { + struct intel_crtc_state *crtc_state; + crtc = intel_get_crtc_for_pipe(dev_priv, pipe); + crtc_state = to_intel_crtc_state(crtc->base.state); encoder->base.crtc = &crtc->base; - crtc->config->output_types |= 1 << encoder->type; - encoder->get_config(encoder, crtc->config); + crtc_state->output_types |= 1 << encoder->type; + encoder->get_config(encoder, crtc_state); } else { encoder->base.crtc = NULL; } @@ -17072,14 +17080,16 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) } for_each_intel_crtc(dev, crtc) { + struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); int pixclk = 0; - crtc->base.hwmode = crtc->config->base.adjusted_mode; + crtc->base.hwmode = crtc_state->base.adjusted_mode; memset(&crtc->base.mode, 0, sizeof(crtc->base.mode)); - if (crtc->base.state->active) { - intel_mode_from_pipe_config(&crtc->base.mode, crtc->config); - intel_mode_from_pipe_config(&crtc->base.state->adjusted_mode, crtc->config); + if (crtc_state->base.active) { + intel_mode_from_pipe_config(&crtc->base.mode, crtc_state); + intel_mode_from_pipe_config(&crtc_state->base.adjusted_mode, crtc_state); WARN_ON(drm_atomic_set_mode_for_crtc(crtc->base.state, &crtc->base.mode)); /* @@ -17091,17 +17101,17 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) * set a flag to indicate that a full recalculation is * needed on the next commit. */ - crtc->base.state->mode.private_flags = I915_MODE_FLAG_INHERITED; + crtc_state->base.mode.private_flags = I915_MODE_FLAG_INHERITED; if (INTEL_GEN(dev_priv) >= 9 || IS_BROADWELL(dev_priv)) - pixclk = ilk_pipe_pixel_rate(crtc->config); + pixclk = ilk_pipe_pixel_rate(crtc_state); else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - pixclk = crtc->config->base.adjusted_mode.crtc_clock; + pixclk = crtc_state->base.adjusted_mode.crtc_clock; else WARN_ON(dev_priv->display.modeset_calc_cdclk); /* pixel rate mustn't exceed 95% of cdclk with IPS on BDW */ - if (IS_BROADWELL(dev_priv) && crtc->config->ips_enabled) + if (IS_BROADWELL(dev_priv) && crtc_state->ips_enabled) pixclk = DIV_ROUND_UP(pixclk * 100, 95); drm_calc_timestamping_constants(&crtc->base, &crtc->base.hwmode); @@ -17110,7 +17120,7 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) dev_priv->min_pixclk[crtc->pipe] = pixclk; - intel_pipe_config_sanity_check(dev_priv, crtc->config); + intel_pipe_config_sanity_check(dev_priv, crtc_state); } } -- cgit v1.2.3-55-g7522 From e96128235b0fbddff1228b37033077397dbd21d9 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 22 Dec 2016 16:04:42 +0200 Subject: drm/i915: Remove the double handling of 'flags from intel_mode_from_pipe_config() We're trying to copy the flags from the adjusted mode to the passed in mode twice. Once is enough. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20161222140442.7204-2-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_display.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index f900e30e8d7a..0f4272f98648 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -8391,7 +8391,6 @@ void intel_mode_from_pipe_config(struct drm_display_mode *mode, mode->type = DRM_MODE_TYPE_DRIVER; mode->clock = pipe_config->base.adjusted_mode.crtc_clock; - mode->flags |= pipe_config->base.adjusted_mode.flags; mode->hsync = drm_mode_hsync(mode); mode->vrefresh = drm_mode_vrefresh(mode); -- cgit v1.2.3-55-g7522 From 7e79a6836cfd1bac20e2be3fafdc3caca27c9d69 Mon Sep 17 00:00:00 2001 From: Sagar Arun Kamble Date: Fri, 20 Jan 2017 09:18:24 +0530 Subject: drm/i915: Set adjustment to zero on Up/Down interrupts if freq is already max/min When we reach the user's RPS limits, stop requesting an adjustment. Even though we will clamp the requested frequency later, we rely on interrupt masking to disable further adjustments in the same direction. Even though it is unlikely (one scenario is a bug in the driver, another is careful manipulation through the uAPI) if we keep exponentially increasing the adjustment value, it will wrap and cause a negative adjustment. Signed-off-by: Chris Wilson Reviewed-by: Sagar Arun Kamble Link: http://patchwork.freedesktop.org/patch/msgid/1484884104-28134-2-git-send-email-sagar.a.kamble@intel.com --- drivers/gpu/drm/i915/i915_irq.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index ce5663d94839..6fefc34ef602 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1170,6 +1170,9 @@ static void gen6_pm_rps_work(struct work_struct *work) adj *= 2; else /* CHV needs even encode values */ adj = IS_CHERRYVIEW(dev_priv) ? 2 : 1; + + if (new_delay >= dev_priv->rps.max_freq_softlimit) + adj = 0; /* * For better performance, jump directly * to RPe if we're below it. @@ -1191,6 +1194,9 @@ static void gen6_pm_rps_work(struct work_struct *work) adj *= 2; else /* CHV needs even encode values */ adj = IS_CHERRYVIEW(dev_priv) ? -2 : -1; + + if (new_delay <= dev_priv->rps.min_freq_softlimit) + adj = 0; } else { /* unknown event */ adj = 0; } -- cgit v1.2.3-55-g7522 From 7e8d12bc733fc9deeb5d78140cda169030fa2ac5 Mon Sep 17 00:00:00 2001 From: Michał Winiarski Date: Fri, 20 Jan 2017 20:23:46 +0100 Subject: drm/i915/huc: Avoid attempting to authenticate non-existent fw HuC authentication is called even if HuC firmware is not present in the system, leading to NULL ptr dereference on not allocated gem_object. Let's avoid trying to authenticate HuC if its firmware is not loaded successfully. Fixes: dac84a388528 ("drm/i915/huc: Support HuC authentication") v2: Check inside the auth function, split the assert (Michał) v3: Oops, hit send before compiling, s/huc_fw/huc->fw Cc: Anusha Srivatsa Cc: Arkadiusz Hiler Cc: Chris Wilson Reviewed-by: Michal Wajdeczko Signed-off-by: Michał Winiarski Link: http://patchwork.freedesktop.org/patch/msgid/20170120192348.2049-1-michal.winiarski@intel.com Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_huc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c index 897ef31d865e..5edd4f7c3c60 100644 --- a/drivers/gpu/drm/i915/intel_huc.c +++ b/drivers/gpu/drm/i915/intel_huc.c @@ -299,6 +299,9 @@ void intel_guc_auth_huc(struct drm_i915_private *dev_priv) int ret; u32 data[2]; + if (huc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS) + return; + vma = i915_gem_object_ggtt_pin(huc->fw.obj, NULL, 0, 0, PIN_OFFSET_BIAS | GUC_WOPCM_TOP); if (IS_ERR(vma)) { -- cgit v1.2.3-55-g7522 From 3139b4a3a24c0a84fed1c1041eb254c858087c72 Mon Sep 17 00:00:00 2001 From: Michał Winiarski Date: Fri, 20 Jan 2017 20:23:47 +0100 Subject: drm/i915/huc: Assert that HuC vma is placed in GuC accessible range HuC firmware is mapped at GuC accessible range. Let's add an assert to verify that. Cc: Anusha Srivatsa Cc: Arkadiusz Hiler Cc: Chris Wilson Reviewed-by: Michal Wajdeczko Signed-off-by: Michał Winiarski Link: http://patchwork.freedesktop.org/patch/msgid/20170120192348.2049-2-michal.winiarski@intel.com Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_huc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c index 5edd4f7c3c60..c144609425f6 100644 --- a/drivers/gpu/drm/i915/intel_huc.c +++ b/drivers/gpu/drm/i915/intel_huc.c @@ -312,7 +312,7 @@ void intel_guc_auth_huc(struct drm_i915_private *dev_priv) /* Specify auth action and where public signature is. */ data[0] = INTEL_GUC_ACTION_AUTHENTICATE_HUC; - data[1] = i915_ggtt_offset(vma) + huc->fw.rsa_offset; + data[1] = guc_ggtt_offset(vma) + huc->fw.rsa_offset; ret = intel_guc_send(guc, data, ARRAY_SIZE(data)); if (ret) { -- cgit v1.2.3-55-g7522 From d65415df0708700e306fcbcaece23369de4cb74e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 19 Jan 2017 08:22:10 +0000 Subject: drm/i915: Do an unlocked wait before set-cache-level ioctl Since a change in cache level is likely to trigger an unbind, avoid waiting under the mutex by preemptively doing an unlocked wait. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170119082211.21257-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_gem.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b2f8ac1386a2..4f23a8f4303e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3397,7 +3397,7 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, struct drm_i915_gem_caching *args = data; struct drm_i915_gem_object *obj; enum i915_cache_level level; - int ret; + int ret = 0; switch (args->caching) { case I915_CACHING_NONE: @@ -3422,20 +3422,29 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, return -EINVAL; } - ret = i915_mutex_lock_interruptible(dev); + obj = i915_gem_object_lookup(file, args->handle); + if (!obj) + return -ENOENT; + + if (obj->cache_level == level) + goto out; + + ret = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT, + to_rps_client(file)); if (ret) - return ret; + goto out; - obj = i915_gem_object_lookup(file, args->handle); - if (!obj) { - ret = -ENOENT; - goto unlock; - } + ret = i915_mutex_lock_interruptible(dev); + if (ret) + goto out; ret = i915_gem_object_set_cache_level(obj, level); - i915_gem_object_put(obj); -unlock: mutex_unlock(&dev->struct_mutex); + +out: + i915_gem_object_put(obj); return ret; } -- cgit v1.2.3-55-g7522 From befedbb7e24935e149640ddfc2b82dab42985e3c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 19 Jan 2017 19:26:55 +0000 Subject: drm/i915: Use common LRU inactive vma bumping for unpin_from_display Now that i915_gem_object_bump_inactive_ggtt() exists, also make use of it for the LRU bumping from i915_gem_object_unpin_from_display() Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170119192659.31789-2-chris@chris-wilson.co.uk Reviewed-by: Jonas Lahtinen --- drivers/gpu/drm/i915/i915_gem.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 4f23a8f4303e..145dc83d302d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3556,8 +3556,7 @@ i915_gem_object_unpin_from_display_plane(struct i915_vma *vma) vma->display_alignment = I915_GTT_MIN_ALIGNMENT; /* Bump the LRU to try and avoid premature eviction whilst flipping */ - if (!i915_vma_is_active(vma)) - list_move_tail(&vma->vm_link, &vma->vm->inactive_list); + i915_gem_object_bump_inactive_ggtt(vma->obj); i915_vma_unpin(vma); } -- cgit v1.2.3-55-g7522 From 1fcdaa7e72a7666a8367cabd77db092992357328 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 19 Jan 2017 19:26:56 +0000 Subject: drm/i915: Reject vma creation larger than address space Disallow creation of a vma that is larger than the available address space, or triggers an overflow on fence expansion. Testcase: igt/gem_exec_reloc/gtt-32 Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170119192659.31789-3-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_vma.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 635f2635b1f2..e58d8799bee2 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -77,7 +77,7 @@ vma_create(struct drm_i915_gem_object *obj, struct rb_node *rb, **p; int i; - vma = kmem_cache_zalloc(to_i915(obj->base.dev)->vmas, GFP_KERNEL); + vma = kmem_cache_zalloc(vm->i915->vmas, GFP_KERNEL); if (vma == NULL) return ERR_PTR(-ENOMEM); @@ -85,7 +85,6 @@ vma_create(struct drm_i915_gem_object *obj, for (i = 0; i < ARRAY_SIZE(vma->last_read); i++) init_request_active(&vma->last_read[i], i915_vma_retire); init_request_active(&vma->last_fence, NULL); - list_add(&vma->vm_link, &vm->unbound_list); vma->vm = vm; vma->obj = obj; vma->size = obj->base.size; @@ -107,11 +106,20 @@ vma_create(struct drm_i915_gem_object *obj, } } + if (unlikely(vma->size > vm->total)) + goto err_vma; + if (i915_is_ggtt(vm)) { - GEM_BUG_ON(overflows_type(vma->size, u32)); + if (unlikely(overflows_type(vma->size, u32))) + goto err_vma; + vma->fence_size = i915_gem_fence_size(vm->i915, vma->size, i915_gem_object_get_tiling(obj), i915_gem_object_get_stride(obj)); + if (unlikely(vma->fence_size < vma->size || /* overflow */ + vma->fence_size > vm->total)) + goto err_vma; + GEM_BUG_ON(!IS_ALIGNED(vma->fence_size, I915_GTT_MIN_ALIGNMENT)); vma->fence_alignment = i915_gem_fence_alignment(vm->i915, vma->size, @@ -140,8 +148,13 @@ vma_create(struct drm_i915_gem_object *obj, } rb_link_node(&vma->obj_node, rb, p); rb_insert_color(&vma->obj_node, &obj->vma_tree); + list_add(&vma->vm_link, &vm->unbound_list); return vma; + +err_vma: + kmem_cache_free(vm->i915->vmas, vma); + return ERR_PTR(-E2BIG); } static struct i915_vma * -- cgit v1.2.3-55-g7522 From e0216b762a7096639db395a22bdda7d1a7213c0f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 19 Jan 2017 19:26:57 +0000 Subject: drm/i915: Treat an error from i915_vma_instance() as unlikely When pinning into the global GTT, an error from creating the VMA is unlikely, so mark it so. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170119192659.31789-4-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 145dc83d302d..a07b62732923 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3688,7 +3688,7 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, lockdep_assert_held(&obj->base.dev->struct_mutex); vma = i915_vma_instance(obj, vm, view); - if (IS_ERR(vma)) + if (unlikely(IS_ERR(vma))) return vma; if (i915_vma_misplaced(vma, size, alignment, flags)) { -- cgit v1.2.3-55-g7522 From 44a0ec0d3bd5ebbe2233b48cc53b5c79b66277b7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 19 Jan 2017 19:26:58 +0000 Subject: drm/i915: Assert the drm_mm_node is allocated when on the VM lists Before moving the vma between the VM active/inactive lists, assert that the node is still allocated. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170119192659.31789-5-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_stolen.c | 2 ++ drivers/gpu/drm/i915/i915_vma.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 61cc0fcae3d8..127d698e7c84 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -702,6 +702,8 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv goto err_pages; } + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + vma->pages = obj->mm.pages; vma->flags |= I915_VMA_GLOBAL_BIND; __i915_vma_set_map_and_fenceable(vma); diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index e58d8799bee2..ecb495b1c5d3 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -45,6 +45,7 @@ i915_vma_retire(struct i915_gem_active *active, if (i915_vma_is_active(vma)) return; + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); list_move_tail(&vma->vm_link, &vma->vm->inactive_list); if (unlikely(i915_vma_is_closed(vma) && !i915_vma_is_pinned(vma))) WARN_ON(i915_vma_unbind(vma)); @@ -493,6 +494,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) GEM_BUG_ON(vma->node.start < start); GEM_BUG_ON(vma->node.start + vma->node.size > end); } + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level)); list_move_tail(&obj->global_link, &dev_priv->mm.bound_list); -- cgit v1.2.3-55-g7522 From b00ddb27324eee53cbed8be7fc00dc572727bb0e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 19 Jan 2017 19:26:59 +0000 Subject: drm/i915: Assert that created vma has a whole number of pages VMA (and their objects) are supposed to composed of whole pages. Add an assert to catch any invalid construct when we create the VMA. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170119192659.31789-6-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_vma.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index ecb495b1c5d3..559966dc3d35 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -110,6 +110,8 @@ vma_create(struct drm_i915_gem_object *obj, if (unlikely(vma->size > vm->total)) goto err_vma; + GEM_BUG_ON(!IS_ALIGNED(vma->size, I915_GTT_PAGE_SIZE)); + if (i915_is_ggtt(vm)) { if (unlikely(overflows_type(vma->size, u32))) goto err_vma; -- cgit v1.2.3-55-g7522 From 6146e6da5c961735dacf9b6c0c8b5f1382193ee2 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Fri, 20 Jan 2017 13:51:23 -0800 Subject: drm/i915: reinstate call to trace_i915_vma_bind The call went away in: commit 3b16525cc4c1a43e9053cfdc414356eea24bdfad Author: Chris Wilson Date: Thu Aug 4 16:32:25 2016 +0100 drm/i915: Split insertion/binding of an object into the VM It is useful to have this trace as it pairs nicely with the vma_unbind one to track vma activity. Added inside the i915_vma_bind function (was outside before) to keep a similar placement as trace_i915_vma_unbind. v2: print bind_flags instead of flags (Chris) Cc: Chris Wilson Signed-off-by: Daniele Ceraolo Spurio Link: http://patchwork.freedesktop.org/patch/msgid/1484949083-11430-1-git-send-email-daniele.ceraolospurio@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_vma.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 559966dc3d35..307b22ae7791 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -269,6 +269,7 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, return ret; } + trace_i915_vma_bind(vma, bind_flags); ret = vma->vm->bind_vma(vma, cache_level, bind_flags); if (ret) return ret; -- cgit v1.2.3-55-g7522 From add6329c728cdc705e076fc3d1663bc78bb50a66 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 23 Jan 2017 08:27:12 +0100 Subject: drm/i915: Update DRIVER_DATE to 20170123 Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 15078f8a577a..ead3b6417c54 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -78,8 +78,8 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20170109" -#define DRIVER_TIMESTAMP 1483953121 +#define DRIVER_DATE "20170123" +#define DRIVER_TIMESTAMP 1485156432 #undef WARN_ON /* Many gcc seem to no see through this and fall over :( */ -- cgit v1.2.3-55-g7522