summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915')
-rw-r--r--drivers/gpu/drm/i915/Kconfig2
-rw-r--r--drivers/gpu/drm/i915/Kconfig.debug1
-rw-r--r--drivers/gpu/drm/i915/gvt/cmd_parser.c16
-rw-r--r--drivers/gpu/drm/i915/gvt/debugfs.c4
-rw-r--r--drivers/gpu/drm/i915/gvt/dmabuf.c19
-rw-r--r--drivers/gpu/drm/i915/gvt/gtt.c55
-rw-r--r--drivers/gpu/drm/i915/gvt/gtt.h16
-rw-r--r--drivers/gpu/drm/i915/gvt/handlers.c83
-rw-r--r--drivers/gpu/drm/i915/gvt/mmio_context.c24
-rw-r--r--drivers/gpu/drm/i915/gvt/reg.h5
-rw-r--r--drivers/gpu/drm/i915/gvt/scheduler.c50
-rw-r--r--drivers/gpu/drm/i915/gvt/scheduler.h1
-rw-r--r--drivers/gpu/drm/i915/i915_gem_userptr.c2
-rw-r--r--drivers/gpu/drm/i915/i915_perf.c1
-rw-r--r--drivers/gpu/drm/i915/i915_priolist_types.h5
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h10
-rw-r--r--drivers/gpu/drm/i915/i915_request.c96
-rw-r--r--drivers/gpu/drm/i915/i915_scheduler.c255
-rw-r--r--drivers/gpu/drm/i915/i915_scheduler_types.h3
-rw-r--r--drivers/gpu/drm/i915/intel_breadcrumbs.c78
-rw-r--r--drivers/gpu/drm/i915/intel_context.c1
-rw-r--r--drivers/gpu/drm/i915/intel_context_types.h3
-rw-r--r--drivers/gpu/drm/i915/intel_csr.c18
-rw-r--r--drivers/gpu/drm/i915/intel_display.c61
-rw-r--r--drivers/gpu/drm/i915/intel_drv.h1
-rw-r--r--drivers/gpu/drm/i915/intel_dsi_vbt.c11
-rw-r--r--drivers/gpu/drm/i915/intel_fbc.c4
-rw-r--r--drivers/gpu/drm/i915/intel_guc_submission.c1
-rw-r--r--drivers/gpu/drm/i915/intel_lrc.c2
-rw-r--r--drivers/gpu/drm/i915/intel_pipe_crc.c13
-rw-r--r--drivers/gpu/drm/i915/intel_sdvo.c58
-rw-r--r--drivers/gpu/drm/i915/intel_sdvo_regs.h3
-rw-r--r--drivers/gpu/drm/i915/intel_workarounds.c8
-rw-r--r--drivers/gpu/drm/i915/selftests/intel_lrc.c12
-rw-r--r--drivers/gpu/drm/i915/selftests/lib_sw_fence.h4
35 files changed, 617 insertions, 309 deletions
diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 148be8e1a090..255f224db64b 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
config DRM_I915
tristate "Intel 8xx/9xx/G3x/G4x/HD Graphics"
depends on DRM
@@ -15,7 +16,6 @@ config DRM_I915
select IRQ_WORK
# i915 depends on ACPI_VIDEO when ACPI is enabled
# but for select to work, need to select ACPI_VIDEO's dependencies, ick
- select BACKLIGHT_LCD_SUPPORT if ACPI
select BACKLIGHT_CLASS_DEVICE if ACPI
select INPUT if ACPI
select ACPI_VIDEO if ACPI
diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug
index ad4d71161dda..04b686d2c2d0 100644
--- a/drivers/gpu/drm/i915/Kconfig.debug
+++ b/drivers/gpu/drm/i915/Kconfig.debug
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
config DRM_I915_WERROR
bool "Force GCC to throw an error instead of a warning when compiling"
# As this may inadvertently break the build, only allow the user
diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index ab002cfd3cab..de5347725564 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -896,12 +896,16 @@ static int cmd_reg_handler(struct parser_exec_state *s,
}
/* TODO
- * Right now only scan LRI command on KBL and in inhibit context.
- * It's good enough to support initializing mmio by lri command in
- * vgpu inhibit context on KBL.
+ * In order to let workload with inhibit context to generate
+ * correct image data into memory, vregs values will be loaded to
+ * hw via LRIs in the workload with inhibit context. But as
+ * indirect context is loaded prior to LRIs in workload, we don't
+ * want reg values specified in indirect context overwritten by
+ * LRIs in workloads. So, when scanning an indirect context, we
+ * update reg values in it into vregs, so LRIs in workload with
+ * inhibit context will restore with correct values
*/
- if ((IS_KABYLAKE(s->vgpu->gvt->dev_priv)
- || IS_COFFEELAKE(s->vgpu->gvt->dev_priv)) &&
+ if (IS_GEN(gvt->dev_priv, 9) &&
intel_gvt_mmio_is_in_ctx(gvt, offset) &&
!strncmp(cmd, "lri", 3)) {
intel_gvt_hypervisor_read_gpa(s->vgpu,
@@ -2526,7 +2530,7 @@ static const struct cmd_info cmd_info[] = {
0, 12, NULL},
{"VEB_DI_IECP", OP_VEB_DNDI_IECP_STATE, F_LEN_VAR, R_VECS, D_BDW_PLUS,
- 0, 20, NULL},
+ 0, 12, NULL},
};
static void add_cmd_entry(struct intel_gvt *gvt, struct cmd_entry *e)
diff --git a/drivers/gpu/drm/i915/gvt/debugfs.c b/drivers/gpu/drm/i915/gvt/debugfs.c
index 2ec89bcb59f1..8a9606f91e68 100644
--- a/drivers/gpu/drm/i915/gvt/debugfs.c
+++ b/drivers/gpu/drm/i915/gvt/debugfs.c
@@ -196,9 +196,9 @@ DEFINE_SIMPLE_ATTRIBUTE(vgpu_scan_nonprivbb_fops,
int intel_gvt_debugfs_add_vgpu(struct intel_vgpu *vgpu)
{
struct dentry *ent;
- char name[10] = "";
+ char name[16] = "";
- sprintf(name, "vgpu%d", vgpu->id);
+ snprintf(name, 16, "vgpu%d", vgpu->id);
vgpu->debugfs = debugfs_create_dir(name, vgpu->gvt->debugfs_root);
if (!vgpu->debugfs)
return -ENOMEM;
diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c
index 4e1e425189ba..41c8ebc60c63 100644
--- a/drivers/gpu/drm/i915/gvt/dmabuf.c
+++ b/drivers/gpu/drm/i915/gvt/dmabuf.c
@@ -45,6 +45,7 @@ static int vgpu_gem_get_pages(
int i, ret;
gen8_pte_t __iomem *gtt_entries;
struct intel_vgpu_fb_info *fb_info;
+ u32 page_num;
fb_info = (struct intel_vgpu_fb_info *)obj->gvt_info;
if (WARN_ON(!fb_info))
@@ -54,14 +55,15 @@ static int vgpu_gem_get_pages(
if (unlikely(!st))
return -ENOMEM;
- ret = sg_alloc_table(st, fb_info->size, GFP_KERNEL);
+ page_num = obj->base.size >> PAGE_SHIFT;
+ ret = sg_alloc_table(st, page_num, GFP_KERNEL);
if (ret) {
kfree(st);
return ret;
}
gtt_entries = (gen8_pte_t __iomem *)dev_priv->ggtt.gsm +
(fb_info->start >> PAGE_SHIFT);
- for_each_sg(st->sgl, sg, fb_info->size, i) {
+ for_each_sg(st->sgl, sg, page_num, i) {
sg->offset = 0;
sg->length = PAGE_SIZE;
sg_dma_address(sg) =
@@ -158,7 +160,7 @@ static struct drm_i915_gem_object *vgpu_create_gem(struct drm_device *dev,
return NULL;
drm_gem_private_object_init(dev, &obj->base,
- info->size << PAGE_SHIFT);
+ roundup(info->size, PAGE_SIZE));
i915_gem_object_init(obj, &intel_vgpu_gem_ops);
obj->read_domains = I915_GEM_DOMAIN_GTT;
@@ -206,11 +208,12 @@ static int vgpu_get_plane_info(struct drm_device *dev,
struct intel_vgpu_fb_info *info,
int plane_id)
{
- struct drm_i915_private *dev_priv = to_i915(dev);
struct intel_vgpu_primary_plane_format p;
struct intel_vgpu_cursor_plane_format c;
int ret, tile_height = 1;
+ memset(info, 0, sizeof(*info));
+
if (plane_id == DRM_PLANE_TYPE_PRIMARY) {
ret = intel_vgpu_decode_primary_plane(vgpu, &p);
if (ret)
@@ -267,8 +270,7 @@ static int vgpu_get_plane_info(struct drm_device *dev,
return -EINVAL;
}
- info->size = (info->stride * roundup(info->height, tile_height)
- + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ info->size = info->stride * roundup(info->height, tile_height);
if (info->size == 0) {
gvt_vgpu_err("fb size is zero\n");
return -EINVAL;
@@ -278,11 +280,6 @@ static int vgpu_get_plane_info(struct drm_device *dev,
gvt_vgpu_err("Not aligned fb address:0x%llx\n", info->start);
return -EFAULT;
}
- if (((info->start >> PAGE_SHIFT) + info->size) >
- ggtt_total_entries(&dev_priv->ggtt)) {
- gvt_vgpu_err("Invalid GTT offset or size\n");
- return -EFAULT;
- }
if (!intel_gvt_ggtt_validate_range(vgpu, info->start, info->size)) {
gvt_vgpu_err("invalid gma addr\n");
diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
index c2f7d20f6346..53115bdae12b 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.c
+++ b/drivers/gpu/drm/i915/gvt/gtt.c
@@ -53,13 +53,19 @@ static int preallocated_oos_pages = 8192;
*/
bool intel_gvt_ggtt_validate_range(struct intel_vgpu *vgpu, u64 addr, u32 size)
{
- if ((!vgpu_gmadr_is_valid(vgpu, addr)) || (size
- && !vgpu_gmadr_is_valid(vgpu, addr + size - 1))) {
- gvt_vgpu_err("invalid range gmadr 0x%llx size 0x%x\n",
- addr, size);
- return false;
- }
- return true;
+ if (size == 0)
+ return vgpu_gmadr_is_valid(vgpu, addr);
+
+ if (vgpu_gmadr_is_aperture(vgpu, addr) &&
+ vgpu_gmadr_is_aperture(vgpu, addr + size - 1))
+ return true;
+ else if (vgpu_gmadr_is_hidden(vgpu, addr) &&
+ vgpu_gmadr_is_hidden(vgpu, addr + size - 1))
+ return true;
+
+ gvt_dbg_mm("Invalid ggtt range at 0x%llx, size: 0x%x\n",
+ addr, size);
+ return false;
}
/* translate a guest gmadr to host gmadr */
@@ -811,7 +817,7 @@ static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt);
/* Allocate shadow page table without guest page. */
static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt(
- struct intel_vgpu *vgpu, intel_gvt_gtt_type_t type)
+ struct intel_vgpu *vgpu, enum intel_gvt_gtt_type type)
{
struct device *kdev = &vgpu->gvt->dev_priv->drm.pdev->dev;
struct intel_vgpu_ppgtt_spt *spt = NULL;
@@ -861,7 +867,7 @@ err_free_spt:
/* Allocate shadow page table associated with specific gfn. */
static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt_gfn(
- struct intel_vgpu *vgpu, intel_gvt_gtt_type_t type,
+ struct intel_vgpu *vgpu, enum intel_gvt_gtt_type type,
unsigned long gfn, bool guest_pde_ips)
{
struct intel_vgpu_ppgtt_spt *spt;
@@ -936,13 +942,22 @@ static int ppgtt_invalidate_spt_by_shadow_entry(struct intel_vgpu *vgpu,
{
struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
struct intel_vgpu_ppgtt_spt *s;
- intel_gvt_gtt_type_t cur_pt_type;
+ enum intel_gvt_gtt_type cur_pt_type;
GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(e->type)));
if (e->type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY
&& e->type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY) {
- cur_pt_type = get_next_pt_type(e->type) + 1;
+ cur_pt_type = get_next_pt_type(e->type);
+
+ if (!gtt_type_is_pt(cur_pt_type) ||
+ !gtt_type_is_pt(cur_pt_type + 1)) {
+ WARN(1, "Invalid page table type, cur_pt_type is: %d\n", cur_pt_type);
+ return -EINVAL;
+ }
+
+ cur_pt_type += 1;
+
if (ops->get_pfn(e) ==
vgpu->gtt.scratch_pt[cur_pt_type].page_mfn)
return 0;
@@ -1076,6 +1091,11 @@ static struct intel_vgpu_ppgtt_spt *ppgtt_populate_spt_by_guest_entry(
} else {
int type = get_next_pt_type(we->type);
+ if (!gtt_type_is_pt(type)) {
+ ret = -EINVAL;
+ goto err;
+ }
+
spt = ppgtt_alloc_spt_gfn(vgpu, type, ops->get_pfn(we), ips);
if (IS_ERR(spt)) {
ret = PTR_ERR(spt);
@@ -1097,6 +1117,7 @@ static struct intel_vgpu_ppgtt_spt *ppgtt_populate_spt_by_guest_entry(
err_free_spt:
ppgtt_free_spt(spt);
+ spt = NULL;
err:
gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
spt, we->val64, we->type);
@@ -1855,7 +1876,7 @@ static void vgpu_free_mm(struct intel_vgpu_mm *mm)
* Zero on success, negative error code in pointer if failed.
*/
struct intel_vgpu_mm *intel_vgpu_create_ppgtt_mm(struct intel_vgpu *vgpu,
- intel_gvt_gtt_type_t root_entry_type, u64 pdps[])
+ enum intel_gvt_gtt_type root_entry_type, u64 pdps[])
{
struct intel_gvt *gvt = vgpu->gvt;
struct intel_vgpu_mm *mm;
@@ -2178,7 +2199,8 @@ static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
unsigned long g_gtt_index = off >> info->gtt_entry_size_shift;
unsigned long gma, gfn;
- struct intel_gvt_gtt_entry e, m;
+ struct intel_gvt_gtt_entry e = {.val64 = 0, .type = GTT_TYPE_GGTT_PTE};
+ struct intel_gvt_gtt_entry m = {.val64 = 0, .type = GTT_TYPE_GGTT_PTE};
dma_addr_t dma_addr;
int ret;
struct intel_gvt_partial_pte *partial_pte, *pos, *n;
@@ -2245,7 +2267,8 @@ static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
if (!partial_update && (ops->test_present(&e))) {
gfn = ops->get_pfn(&e);
- m = e;
+ m.val64 = e.val64;
+ m.type = e.type;
/* one PTE update may be issued in multiple writes and the
* first write may not construct a valid gfn
@@ -2309,7 +2332,7 @@ int intel_vgpu_emulate_ggtt_mmio_write(struct intel_vgpu *vgpu,
}
static int alloc_scratch_pages(struct intel_vgpu *vgpu,
- intel_gvt_gtt_type_t type)
+ enum intel_gvt_gtt_type type)
{
struct intel_vgpu_gtt *gtt = &vgpu->gtt;
struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
@@ -2594,7 +2617,7 @@ struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu,
* Zero on success, negative error code if failed.
*/
struct intel_vgpu_mm *intel_vgpu_get_ppgtt_mm(struct intel_vgpu *vgpu,
- intel_gvt_gtt_type_t root_entry_type, u64 pdps[])
+ enum intel_gvt_gtt_type root_entry_type, u64 pdps[])
{
struct intel_vgpu_mm *mm;
diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h
index 32c573aea494..42d0394f0de2 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.h
+++ b/drivers/gpu/drm/i915/gvt/gtt.h
@@ -95,8 +95,8 @@ struct intel_gvt_gtt {
unsigned long scratch_mfn;
};
-typedef enum {
- GTT_TYPE_INVALID = -1,
+enum intel_gvt_gtt_type {
+ GTT_TYPE_INVALID = 0,
GTT_TYPE_GGTT_PTE,
@@ -124,7 +124,7 @@ typedef enum {
GTT_TYPE_PPGTT_PML4_PT,
GTT_TYPE_MAX,
-} intel_gvt_gtt_type_t;
+};
enum intel_gvt_mm_type {
INTEL_GVT_MM_GGTT,
@@ -148,7 +148,7 @@ struct intel_vgpu_mm {
union {
struct {
- intel_gvt_gtt_type_t root_entry_type;
+ enum intel_gvt_gtt_type root_entry_type;
/*
* The 4 PDPs in ring context. For 48bit addressing,
* only PDP0 is valid and point to PML4. For 32it
@@ -169,7 +169,7 @@ struct intel_vgpu_mm {
};
struct intel_vgpu_mm *intel_vgpu_create_ppgtt_mm(struct intel_vgpu *vgpu,
- intel_gvt_gtt_type_t root_entry_type, u64 pdps[]);
+ enum intel_gvt_gtt_type root_entry_type, u64 pdps[]);
static inline void intel_vgpu_mm_get(struct intel_vgpu_mm *mm)
{
@@ -233,7 +233,7 @@ struct intel_vgpu_ppgtt_spt {
struct intel_vgpu *vgpu;
struct {
- intel_gvt_gtt_type_t type;
+ enum intel_gvt_gtt_type type;
bool pde_ips; /* for 64KB PTEs */
void *vaddr;
struct page *page;
@@ -241,7 +241,7 @@ struct intel_vgpu_ppgtt_spt {
} shadow_page;
struct {
- intel_gvt_gtt_type_t type;
+ enum intel_gvt_gtt_type type;
bool pde_ips; /* for 64KB PTEs */
unsigned long gfn;
unsigned long write_cnt;
@@ -267,7 +267,7 @@ struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu,
u64 pdps[]);
struct intel_vgpu_mm *intel_vgpu_get_ppgtt_mm(struct intel_vgpu *vgpu,
- intel_gvt_gtt_type_t root_entry_type, u64 pdps[]);
+ enum intel_gvt_gtt_type root_entry_type, u64 pdps[]);
int intel_vgpu_put_ppgtt_mm(struct intel_vgpu *vgpu, u64 pdps[]);
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index 18f01eeb2510..25f78196b964 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -464,6 +464,8 @@ static i915_reg_t force_nonpriv_white_list[] = {
_MMIO(0x2690),
_MMIO(0x2694),
_MMIO(0x2698),
+ _MMIO(0x2754),
+ _MMIO(0x28a0),
_MMIO(0x4de0),
_MMIO(0x4de4),
_MMIO(0x4dfc),
@@ -1206,7 +1208,7 @@ static int pvinfo_mmio_read(struct intel_vgpu *vgpu, unsigned int offset,
static int handle_g2v_notification(struct intel_vgpu *vgpu, int notification)
{
- intel_gvt_gtt_type_t root_entry_type = GTT_TYPE_PPGTT_ROOT_L4_ENTRY;
+ enum intel_gvt_gtt_type root_entry_type = GTT_TYPE_PPGTT_ROOT_L4_ENTRY;
struct intel_vgpu_mm *mm;
u64 *pdps;
@@ -1252,18 +1254,15 @@ static int send_display_ready_uevent(struct intel_vgpu *vgpu, int ready)
static int pvinfo_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
void *p_data, unsigned int bytes)
{
- u32 data;
- int ret;
-
- write_vreg(vgpu, offset, p_data, bytes);
- data = vgpu_vreg(vgpu, offset);
+ u32 data = *(u32 *)p_data;
+ bool invalid_write = false;
switch (offset) {
case _vgtif_reg(display_ready):
send_display_ready_uevent(vgpu, data ? 1 : 0);
break;
case _vgtif_reg(g2v_notify):
- ret = handle_g2v_notification(vgpu, data);
+ handle_g2v_notification(vgpu, data);
break;
/* add xhot and yhot to handled list to avoid error log */
case _vgtif_reg(cursor_x_hot):
@@ -1280,13 +1279,19 @@ static int pvinfo_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
case _vgtif_reg(execlist_context_descriptor_hi):
break;
case _vgtif_reg(rsv5[0])..._vgtif_reg(rsv5[3]):
+ invalid_write = true;
enter_failsafe_mode(vgpu, GVT_FAILSAFE_INSUFFICIENT_RESOURCE);
break;
default:
+ invalid_write = true;
gvt_vgpu_err("invalid pvinfo write offset %x bytes %x data %x\n",
offset, bytes, data);
break;
}
+
+ if (!invalid_write)
+ write_vreg(vgpu, offset, p_data, bytes);
+
return 0;
}
@@ -1364,7 +1369,6 @@ static int dma_ctrl_write(struct intel_vgpu *vgpu, unsigned int offset,
static int gen9_trtte_write(struct intel_vgpu *vgpu, unsigned int offset,
void *p_data, unsigned int bytes)
{
- struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
u32 trtte = *(u32 *)p_data;
if ((trtte & 1) && (trtte & (1 << 1)) == 0) {
@@ -1373,11 +1377,6 @@ static int gen9_trtte_write(struct intel_vgpu *vgpu, unsigned int offset,
return -EINVAL;
}
write_vreg(vgpu, offset, p_data, bytes);
- /* TRTTE is not per-context */
-
- mmio_hw_access_pre(dev_priv);
- I915_WRITE(_MMIO(offset), vgpu_vreg(vgpu, offset));
- mmio_hw_access_post(dev_priv);
return 0;
}
@@ -1385,15 +1384,6 @@ static int gen9_trtte_write(struct intel_vgpu *vgpu, unsigned int offset,
static int gen9_trtt_chicken_write(struct intel_vgpu *vgpu, unsigned int offset,
void *p_data, unsigned int bytes)
{
- struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
- u32 val = *(u32 *)p_data;
-
- if (val & 1) {
- /* unblock hw logic */
- mmio_hw_access_pre(dev_priv);
- I915_WRITE(_MMIO(offset), val);
- mmio_hw_access_post(dev_priv);
- }
write_vreg(vgpu, offset, p_data, bytes);
return 0;
}
@@ -1705,8 +1695,22 @@ static int ring_mode_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
bool enable_execlist;
int ret;
+ (*(u32 *)p_data) &= ~_MASKED_BIT_ENABLE(1);
+ if (IS_COFFEELAKE(vgpu->gvt->dev_priv))
+ (*(u32 *)p_data) &= ~_MASKED_BIT_ENABLE(2);
write_vreg(vgpu, offset, p_data, bytes);
+ if (data & _MASKED_BIT_ENABLE(1)) {
+ enter_failsafe_mode(vgpu, GVT_FAILSAFE_UNSUPPORTED_GUEST);
+ return 0;
+ }
+
+ if (IS_COFFEELAKE(vgpu->gvt->dev_priv) &&
+ data & _MASKED_BIT_ENABLE(2)) {
+ enter_failsafe_mode(vgpu, GVT_FAILSAFE_UNSUPPORTED_GUEST);
+ return 0;
+ }
+
/* when PPGTT mode enabled, we will check if guest has called
* pvinfo, if not, we will treat this guest as non-gvtg-aware
* guest, and stop emulating its cfg space, mmio, gtt, etc.
@@ -1788,6 +1792,21 @@ static int ring_reset_ctl_write(struct intel_vgpu *vgpu,
return 0;
}
+static int csfe_chicken1_mmio_write(struct intel_vgpu *vgpu,
+ unsigned int offset, void *p_data,
+ unsigned int bytes)
+{
+ u32 data = *(u32 *)p_data;
+
+ (*(u32 *)p_data) &= ~_MASKED_BIT_ENABLE(0x18);
+ write_vreg(vgpu, offset, p_data, bytes);
+
+ if (data & _MASKED_BIT_ENABLE(0x10) || data & _MASKED_BIT_ENABLE(0x8))
+ enter_failsafe_mode(vgpu, GVT_FAILSAFE_UNSUPPORTED_GUEST);
+
+ return 0;
+}
+
#define MMIO_F(reg, s, f, am, rm, d, r, w) do { \
ret = new_mmio_info(gvt, i915_mmio_reg_offset(reg), \
f, s, am, rm, d, r, w); \
@@ -1908,7 +1927,8 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
MMIO_DFH(_MMIO(0x20dc), D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
MMIO_DFH(_3D_CHICKEN3, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
MMIO_DFH(_MMIO(0x2088), D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
- MMIO_DFH(_MMIO(0x20e4), D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+ MMIO_DFH(FF_SLICE_CS_CHICKEN2, D_ALL,
+ F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
MMIO_DFH(_MMIO(0x2470), D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
MMIO_DFH(GAM_ECOCHK, D_ALL, F_CMD_ACCESS, NULL, NULL);
MMIO_DFH(GEN7_COMMON_SLICE_CHICKEN1, D_ALL, F_MODE_MASK | F_CMD_ACCESS,
@@ -3012,7 +3032,7 @@ static int init_skl_mmio_info(struct intel_gvt *gvt)
MMIO_D(CSR_HTP_SKL, D_SKL_PLUS);
MMIO_D(CSR_LAST_WRITE, D_SKL_PLUS);
- MMIO_D(BDW_SCRATCH1, D_SKL_PLUS);
+ MMIO_DFH(BDW_SCRATCH1, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL);
MMIO_D(SKL_DFSM, D_SKL_PLUS);
MMIO_D(DISPIO_CR_TX_BMU_CR0, D_SKL_PLUS);
@@ -3025,8 +3045,8 @@ static int init_skl_mmio_info(struct intel_gvt *gvt)
MMIO_D(RPM_CONFIG0, D_SKL_PLUS);
MMIO_D(_MMIO(0xd08), D_SKL_PLUS);
MMIO_D(RC6_LOCATION, D_SKL_PLUS);
- MMIO_DFH(GEN7_FF_SLICE_CS_CHICKEN1, D_SKL_PLUS, F_MODE_MASK,
- NULL, NULL);
+ MMIO_DFH(GEN7_FF_SLICE_CS_CHICKEN1, D_SKL_PLUS,
+ F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
MMIO_DFH(GEN9_CS_DEBUG_MODE1, D_SKL_PLUS, F_MODE_MASK | F_CMD_ACCESS,
NULL, NULL);
@@ -3045,7 +3065,7 @@ static int init_skl_mmio_info(struct intel_gvt *gvt)
MMIO_D(_MMIO(0x46520), D_SKL_PLUS);
MMIO_D(_MMIO(0xc403c), D_SKL_PLUS);
- MMIO_D(_MMIO(0xb004), D_SKL_PLUS);
+ MMIO_DFH(GEN8_GARBCNTL, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL);
MMIO_DH(DMA_CTRL, D_SKL_PLUS, NULL, dma_ctrl_write);
MMIO_D(_MMIO(0x65900), D_SKL_PLUS);
@@ -3074,7 +3094,10 @@ static int init_skl_mmio_info(struct intel_gvt *gvt)
MMIO_D(_MMIO(_PLANE_KEYMSK_1(PIPE_C)), D_SKL_PLUS);
MMIO_D(_MMIO(0x44500), D_SKL_PLUS);
- MMIO_DFH(GEN9_CSFE_CHICKEN1_RCS, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL);
+#define CSFE_CHICKEN1_REG(base) _MMIO((base) + 0xD4)
+ MMIO_RING_DFH(CSFE_CHICKEN1_REG, D_SKL_PLUS, F_MODE_MASK | F_CMD_ACCESS,
+ NULL, csfe_chicken1_mmio_write);
+#undef CSFE_CHICKEN1_REG
MMIO_DFH(GEN8_HDC_CHICKEN1, D_SKL_PLUS, F_MODE_MASK | F_CMD_ACCESS,
NULL, NULL);
MMIO_DFH(GEN9_WM_CHICKEN3, D_SKL_PLUS, F_MODE_MASK | F_CMD_ACCESS,
@@ -3254,7 +3277,7 @@ static int init_bxt_mmio_info(struct intel_gvt *gvt)
MMIO_D(GEN8_PUSHBUS_ENABLE, D_BXT);
MMIO_D(GEN8_PUSHBUS_SHIFT, D_BXT);
MMIO_D(GEN6_GFXPAUSE, D_BXT);
- MMIO_D(GEN8_L3SQCREG1, D_BXT);
+ MMIO_DFH(GEN8_L3SQCREG1, D_BXT, F_CMD_ACCESS, NULL, NULL);
MMIO_DFH(GEN9_CTX_PREEMPT_REG, D_BXT, F_CMD_ACCESS, NULL, NULL);
@@ -3303,7 +3326,7 @@ void intel_gvt_clean_mmio_info(struct intel_gvt *gvt)
/* Special MMIO blocks. */
static struct gvt_mmio_block mmio_blocks[] = {
{D_SKL_PLUS, _MMIO(CSR_MMIO_START_RANGE), 0x3000, NULL, NULL},
- {D_ALL, MCHBAR_MIRROR_REG_BASE, 0x4000, NULL, NULL},
+ {D_ALL, _MMIO(MCHBAR_MIRROR_BASE_SNB), 0x40000, NULL, NULL},
{D_ALL, _MMIO(VGT_PVINFO_PAGE), VGT_PVINFO_SIZE,
pvinfo_mmio_read, pvinfo_mmio_write},
{D_ALL, LGC_PALETTE(PIPE_A, 0), 1024, NULL, NULL},
diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c
index e7e14c842be4..90bb3df0db50 100644
--- a/drivers/gpu/drm/i915/gvt/mmio_context.c
+++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
@@ -108,12 +108,13 @@ static struct engine_mmio gen9_engine_mmio_list[] __cacheline_aligned = {
{RCS0, GEN9_HALF_SLICE_CHICKEN5, 0xffff, true}, /* 0xe188 */
{RCS0, GEN9_HALF_SLICE_CHICKEN7, 0xffff, true}, /* 0xe194 */
{RCS0, GEN8_ROW_CHICKEN, 0xffff, true}, /* 0xe4f0 */
- {RCS0, TRVATTL3PTRDW(0), 0, false}, /* 0x4de0 */
- {RCS0, TRVATTL3PTRDW(1), 0, false}, /* 0x4de4 */
- {RCS0, TRNULLDETCT, 0, false}, /* 0x4de8 */
- {RCS0, TRINVTILEDETCT, 0, false}, /* 0x4dec */
- {RCS0, TRVADR, 0, false}, /* 0x4df0 */
- {RCS0, TRTTE, 0, false}, /* 0x4df4 */
+ {RCS0, TRVATTL3PTRDW(0), 0, true}, /* 0x4de0 */
+ {RCS0, TRVATTL3PTRDW(1), 0, true}, /* 0x4de4 */
+ {RCS0, TRNULLDETCT, 0, true}, /* 0x4de8 */
+ {RCS0, TRINVTILEDETCT, 0, true}, /* 0x4dec */
+ {RCS0, TRVADR, 0, true}, /* 0x4df0 */
+ {RCS0, TRTTE, 0, true}, /* 0x4df4 */
+ {RCS0, _MMIO(0x4dfc), 0, true},
{BCS0, RING_GFX_MODE(BLT_RING_BASE), 0xffff, false}, /* 0x2229c */
{BCS0, RING_MI_MODE(BLT_RING_BASE), 0xffff, false}, /* 0x2209c */
@@ -132,6 +133,7 @@ static struct engine_mmio gen9_engine_mmio_list[] __cacheline_aligned = {
{RCS0, GEN9_GAMT_ECO_REG_RW_IA, 0x0, false}, /* 0x4ab0 */
{RCS0, GEN9_CSFE_CHICKEN1_RCS, 0xffff, false}, /* 0x20d4 */
+ {RCS0, _MMIO(0x20D8), 0xffff, true}, /* 0x20d8 */
{RCS0, GEN8_GARBCNTL, 0x0, false}, /* 0xb004 */
{RCS0, GEN7_FF_THREAD_MODE, 0x0, false}, /* 0x20a0 */
@@ -391,10 +393,7 @@ static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next,
if (WARN_ON(ring_id >= ARRAY_SIZE(regs)))
return;
- if (ring_id == RCS0 &&
- (IS_KABYLAKE(dev_priv) ||
- IS_BROXTON(dev_priv) ||
- IS_COFFEELAKE(dev_priv)))
+ if (ring_id == RCS0 && IS_GEN(dev_priv, 9))
return;
if (!pre && !gen9_render_mocs.initialized)
@@ -469,11 +468,10 @@ static void switch_mmio(struct intel_vgpu *pre,
continue;
/*
* No need to do save or restore of the mmio which is in context
- * state image on kabylake, it's initialized by lri command and
+ * state image on gen9, it's initialized by lri command and
* save or restore with context together.
*/
- if ((IS_KABYLAKE(dev_priv) || IS_BROXTON(dev_priv)
- || IS_COFFEELAKE(dev_priv)) && mmio->in_context)
+ if (IS_GEN(dev_priv, 9) && mmio->in_context)
continue;
// save
diff --git a/drivers/gpu/drm/i915/gvt/reg.h b/drivers/gpu/drm/i915/gvt/reg.h
index 3de5b643b266..5b66e14c5b7b 100644
--- a/drivers/gpu/drm/i915/gvt/reg.h
+++ b/drivers/gpu/drm/i915/gvt/reg.h
@@ -102,6 +102,8 @@
#define FORCEWAKE_ACK_MEDIA_GEN9_REG 0x0D88
#define FORCEWAKE_ACK_HSW_REG 0x130044
+#define RB_HEAD_WRAP_CNT_MAX ((1 << 11) - 1)
+#define RB_HEAD_WRAP_CNT_OFF 21
#define RB_HEAD_OFF_MASK ((1U << 21) - (1U << 2))
#define RB_TAIL_OFF_MASK ((1U << 21) - (1U << 3))
#define RB_TAIL_SIZE_MASK ((1U << 21) - (1U << 12))
@@ -126,7 +128,4 @@
#define RING_GFX_MODE(base) _MMIO((base) + 0x29c)
#define VF_GUARDBAND _MMIO(0x83a4)
-/* define the effective range of MCHBAR register on Sandybridge+ */
-#define MCHBAR_MIRROR_REG_BASE _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x4000)
-
#endif
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 8998fa5ab198..0f919f0a43d4 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -298,12 +298,29 @@ static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload)
struct i915_request *req = workload->req;
void *shadow_ring_buffer_va;
u32 *cs;
+ int err;
- if ((IS_KABYLAKE(req->i915) || IS_BROXTON(req->i915)
- || IS_COFFEELAKE(req->i915))
- && is_inhibit_context(req->hw_context))
+ if (IS_GEN(req->i915, 9) && is_inhibit_context(req->hw_context))
intel_vgpu_restore_inhibit_context(vgpu, req);
+ /*
+ * To track whether a request has started on HW, we can emit a
+ * breadcrumb at the beginning of the request and check its
+ * timeline's HWSP to see if the breadcrumb has advanced past the
+ * start of this request. Actually, the request must have the
+ * init_breadcrumb if its timeline set has_init_bread_crumb, or the
+ * scheduler might get a wrong state of it during reset. Since the
+ * requests from gvt always set the has_init_breadcrumb flag, here
+ * need to do the emit_init_breadcrumb for all the requests.
+ */
+ if (req->engine->emit_init_breadcrumb) {
+ err = req->engine->emit_init_breadcrumb(req);
+ if (err) {
+ gvt_vgpu_err("fail to emit init breadcrumb\n");
+ return err;
+ }
+ }
+
/* allocate shadow ring buffer */
cs = intel_ring_begin(workload->req, workload->rb_len / sizeof(u32));
if (IS_ERR(cs)) {
@@ -795,10 +812,31 @@ static void update_guest_context(struct intel_vgpu_workload *workload)
void *src;
unsigned long context_gpa, context_page_num;
int i;
+ struct drm_i915_private *dev_priv = gvt->dev_priv;
+ u32 ring_base;
+ u32 head, tail;
+ u16 wrap_count;
gvt_dbg_sched("ring id %d workload lrca %x\n", rq->engine->id,
workload->ctx_desc.lrca);
+ head = workload->rb_head;
+ tail = workload->rb_tail;
+ wrap_count = workload->guest_rb_head >> RB_HEAD_WRAP_CNT_OFF;
+
+ if (tail < head) {
+ if (wrap_count == RB_HEAD_WRAP_CNT_MAX)
+ wrap_count = 0;
+ else
+ wrap_count += 1;
+ }
+
+ head = (wrap_count << RB_HEAD_WRAP_CNT_OFF) | tail;
+
+ ring_base = dev_priv->engine[workload->ring_id]->mmio_base;
+ vgpu_vreg_t(vgpu, RING_TAIL(ring_base)) = tail;
+ vgpu_vreg_t(vgpu, RING_HEAD(ring_base)) = head;
+
context_page_num = rq->engine->context_size;
context_page_num = context_page_num >> PAGE_SHIFT;
@@ -1343,7 +1381,7 @@ static int prepare_mm(struct intel_vgpu_workload *workload)
struct execlist_ctx_descriptor_format *desc = &workload->ctx_desc;
struct intel_vgpu_mm *mm;
struct intel_vgpu *vgpu = workload->vgpu;
- intel_gvt_gtt_type_t root_entry_type;
+ enum intel_gvt_gtt_type root_entry_type;
u64 pdps[GVT_RING_CTX_NR_PDPS];
switch (desc->addressing_mode) {
@@ -1398,6 +1436,7 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id,
struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
u64 ring_context_gpa;
u32 head, tail, start, ctl, ctx_ctl, per_ctx, indirect_ctx;
+ u32 guest_head;
int ret;
ring_context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm,
@@ -1413,6 +1452,8 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id,
intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
RING_CTX_OFF(ring_tail.val), &tail, 4);
+ guest_head = head;
+
head &= RB_HEAD_OFF_MASK;
tail &= RB_TAIL_OFF_MASK;
@@ -1445,6 +1486,7 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id,
workload->ctx_desc = *desc;
workload->ring_context_gpa = ring_context_gpa;
workload->rb_head = head;
+ workload->guest_rb_head = guest_head;
workload->rb_tail = tail;
workload->rb_start = start;
workload->rb_ctl = ctl;
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h
index 90c6756f5453..c50d14a9ce85 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.h
+++ b/drivers/gpu/drm/i915/gvt/scheduler.h
@@ -100,6 +100,7 @@ struct intel_vgpu_workload {
struct execlist_ctx_descriptor_format ctx_desc;
struct execlist_ring_context *ring_context;
unsigned long rb_head, rb_tail, rb_ctl, rb_start, rb_len;
+ unsigned long guest_rb_head;
bool restore_inhibit;
struct intel_vgpu_elsp_dwords elsp_dwords;
bool emulate_schedule_in;
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 215bf3fef10c..8079ea3af103 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -122,7 +122,7 @@ userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
while (it) {
struct drm_i915_gem_object *obj;
- if (!range->blockable) {
+ if (!mmu_notifier_range_blockable(range)) {
ret = -EAGAIN;
break;
}
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 39a4804091d7..dc4ce694c06a 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -3005,6 +3005,7 @@ static bool gen8_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr)
static bool gen10_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr)
{
return gen8_is_valid_mux_addr(dev_priv, addr) ||
+ addr == i915_mmio_reg_offset(GEN10_NOA_WRITE_HIGH) ||
(addr >= i915_mmio_reg_offset(OA_PERFCNT3_LO) &&
addr <= i915_mmio_reg_offset(OA_PERFCNT4_HI));
}
diff --git a/drivers/gpu/drm/i915/i915_priolist_types.h b/drivers/gpu/drm/i915/i915_priolist_types.h
index cc44ebd3b553..49709de69875 100644
--- a/drivers/gpu/drm/i915/i915_priolist_types.h
+++ b/drivers/gpu/drm/i915/i915_priolist_types.h
@@ -20,15 +20,14 @@ enum {
I915_PRIORITY_INVALID = INT_MIN
};
-#define I915_USER_PRIORITY_SHIFT 3
+#define I915_USER_PRIORITY_SHIFT 2
#define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT)
#define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT)
#define I915_PRIORITY_MASK (I915_PRIORITY_COUNT - 1)
#define I915_PRIORITY_WAIT ((u8)BIT(0))
-#define I915_PRIORITY_NEWCLIENT ((u8)BIT(1))
-#define I915_PRIORITY_NOSEMAPHORE ((u8)BIT(2))
+#define I915_PRIORITY_NOSEMAPHORE ((u8)BIT(1))
#define __NO_PREEMPTION (I915_PRIORITY_WAIT)
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index b74824f0b5b1..13d6bd4e17b2 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -35,7 +35,7 @@
* macros. Do **not** mass change existing definitions just to update the style.
*
* Layout
- * ''''''
+ * ~~~~~~
*
* Keep helper macros near the top. For example, _PIPE() and friends.
*
@@ -79,7 +79,7 @@
* style. Use lower case in hexadecimal values.
*
* Naming
- * ''''''
+ * ~~~~~~
*
* Try to name registers according to the specs. If the register name changes in
* the specs from platform to another, stick to the original name.
@@ -97,7 +97,7 @@
* suffix to the name. For example, ``_SKL`` or ``_GEN8``.
*
* Examples
- * ''''''''
+ * ~~~~~~~~
*
* (Note that the values in the example are indented using spaces instead of
* TABs to avoid misalignment in generated documentation. Use TABs in the
@@ -1062,6 +1062,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
#define NOA_DATA _MMIO(0x986C)
#define NOA_WRITE _MMIO(0x9888)
+#define GEN10_NOA_WRITE_HIGH _MMIO(0x9884)
#define _GEN7_PIPEA_DE_LOAD_SL 0x70068
#define _GEN7_PIPEB_DE_LOAD_SL 0x71068
@@ -7620,6 +7621,9 @@ enum {
#define GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION (1 << 8)
#define GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE (1 << 0)
+#define GEN8_L3CNTLREG _MMIO(0x7034)
+ #define GEN8_ERRDETBCTRL (1 << 9)
+
#define GEN11_COMMON_SLICE_CHICKEN3 _MMIO(0x7304)
#define GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC (1 << 11)
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index b836721d3b13..c88e538b2ef4 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -425,6 +425,26 @@ void __i915_request_submit(struct i915_request *request)
if (i915_gem_context_is_banned(request->gem_context))
i915_request_skip(request, -EIO);
+ /*
+ * Are we using semaphores when the gpu is already saturated?
+ *
+ * Using semaphores incurs a cost in having the GPU poll a
+ * memory location, busywaiting for it to change. The continual
+ * memory reads can have a noticeable impact on the rest of the
+ * system with the extra bus traffic, stalling the cpu as it too
+ * tries to access memory across the bus (perf stat -e bus-cycles).
+ *
+ * If we installed a semaphore on this request and we only submit
+ * the request after the signaler completed, that indicates the
+ * system is overloaded and using semaphores at this time only
+ * increases the amount of work we are doing. If so, we disable
+ * further use of semaphores until we are idle again, whence we
+ * optimistically try again.
+ */
+ if (request->sched.semaphores &&
+ i915_sw_fence_signaled(&request->semaphore))
+ request->hw_context->saturated |= request->sched.semaphores;
+
/* We may be recursing from the signal callback of another i915 fence */
spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
@@ -432,6 +452,7 @@ void __i915_request_submit(struct i915_request *request)
set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) &&
+ !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags) &&
!i915_request_enable_breadcrumb(request))
intel_engine_queue_breadcrumbs(engine);
@@ -481,15 +502,6 @@ void __i915_request_unsubmit(struct i915_request *request)
/* We may be recursing from the signal callback of another i915 fence */
spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
- /*
- * As we do not allow WAIT to preempt inflight requests,
- * once we have executed a request, along with triggering
- * any execution callbacks, we must preserve its ordering
- * within the non-preemptible FIFO.
- */
- BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK); /* only internal */
- request->sched.attr.priority |= __NO_PREEMPTION;
-
if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
i915_request_cancel_breadcrumb(request);
@@ -561,18 +573,7 @@ semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
switch (state) {
case FENCE_COMPLETE:
- /*
- * We only check a small portion of our dependencies
- * and so cannot guarantee that there remains no
- * semaphore chain across all. Instead of opting
- * for the full NOSEMAPHORE boost, we go for the
- * smaller (but still preempting) boost of
- * NEWCLIENT. This will be enough to boost over
- * a busywaiting request (as that cannot be
- * NEWCLIENT) without accidentally boosting
- * a busywait over real work elsewhere.
- */
- i915_schedule_bump_priority(request, I915_PRIORITY_NEWCLIENT);
+ i915_schedule_bump_priority(request, I915_PRIORITY_NOSEMAPHORE);
break;
case FENCE_FREE:
@@ -799,6 +800,39 @@ err_unreserve:
}
static int
+i915_request_await_start(struct i915_request *rq, struct i915_request *signal)
+{
+ if (list_is_first(&signal->ring_link, &signal->ring->request_list))
+ return 0;
+
+ signal = list_prev_entry(signal, ring_link);
+ if (i915_timeline_sync_is_later(rq->timeline, &signal->fence))
+ return 0;
+
+ return i915_sw_fence_await_dma_fence(&rq->submit,
+ &signal->fence, 0,
+ I915_FENCE_GFP);
+}
+
+static intel_engine_mask_t
+already_busywaiting(struct i915_request *rq)
+{
+ /*
+ * Polling a semaphore causes bus traffic, delaying other users of
+ * both the GPU and CPU. We want to limit the impact on others,
+ * while taking advantage of early submission to reduce GPU
+ * latency. Therefore we restrict ourselves to not using more
+ * than one semaphore from each source, and not using a semaphore
+ * if we have detected the engine is saturated (i.e. would not be
+ * submitted early and cause bus traffic reading an already passed
+ * semaphore).
+ *
+ * See the are-we-too-late? check in __i915_request_submit().
+ */
+ return rq->sched.semaphores | rq->hw_context->saturated;
+}
+
+static int
emit_semaphore_wait(struct i915_request *to,
struct i915_request *from,
gfp_t gfp)
@@ -811,14 +845,12 @@ emit_semaphore_wait(struct i915_request *to,
GEM_BUG_ON(INTEL_GEN(to->i915) < 8);
/* Just emit the first semaphore we see as request space is limited. */
- if (to->sched.semaphores & from->engine->mask)
+ if (already_busywaiting(to) & from->engine->mask)
return i915_sw_fence_await_dma_fence(&to->submit,
&from->fence, 0,
I915_FENCE_GFP);
- err = i915_sw_fence_await_dma_fence(&to->semaphore,
- &from->fence, 0,
- I915_FENCE_GFP);
+ err = i915_request_await_start(to, from);
if (err < 0)
return err;
@@ -887,8 +919,18 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from)
&from->fence, 0,
I915_FENCE_GFP);
}
+ if (ret < 0)
+ return ret;
- return ret < 0 ? ret : 0;
+ if (to->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN) {
+ ret = i915_sw_fence_await_dma_fence(&to->semaphore,
+ &from->fence, 0,
+ I915_FENCE_GFP);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
}
int
@@ -1179,7 +1221,7 @@ void i915_request_add(struct i915_request *request)
* the bulk clients. (FQ_CODEL)
*/
if (list_empty(&request->sched.signalers_list))
- attr.priority |= I915_PRIORITY_NEWCLIENT;
+ attr.priority |= I915_PRIORITY_WAIT;
engine->schedule(request, &attr);
}
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 39bc4f54e272..108f52e1bf35 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -35,109 +35,6 @@ static inline bool node_signaled(const struct i915_sched_node *node)
return i915_request_completed(node_to_request(node));
}
-void i915_sched_node_init(struct i915_sched_node *node)
-{
- INIT_LIST_HEAD(&node->signalers_list);
- INIT_LIST_HEAD(&node->waiters_list);
- INIT_LIST_HEAD(&node->link);
- node->attr.priority = I915_PRIORITY_INVALID;
- node->semaphores = 0;
- node->flags = 0;
-}
-
-static struct i915_dependency *
-i915_dependency_alloc(void)
-{
- return kmem_cache_alloc(global.slab_dependencies, GFP_KERNEL);
-}
-
-static void
-i915_dependency_free(struct i915_dependency *dep)
-{
- kmem_cache_free(global.slab_dependencies, dep);
-}
-
-bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
- struct i915_sched_node *signal,
- struct i915_dependency *dep,
- unsigned long flags)
-{
- bool ret = false;
-
- spin_lock_irq(&schedule_lock);
-
- if (!node_signaled(signal)) {
- INIT_LIST_HEAD(&dep->dfs_link);
- list_add(&dep->wait_link, &signal->waiters_list);
- list_add(&dep->signal_link, &node->signalers_list);
- dep->signaler = signal;
- dep->flags = flags;
-
- /* Keep track of whether anyone on this chain has a semaphore */
- if (signal->flags & I915_SCHED_HAS_SEMAPHORE_CHAIN &&
- !node_started(signal))
- node->flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN;
-
- ret = true;
- }
-
- spin_unlock_irq(&schedule_lock);
-
- return ret;
-}
-
-int i915_sched_node_add_dependency(struct i915_sched_node *node,
- struct i915_sched_node *signal)
-{
- struct i915_dependency *dep;
-
- dep = i915_dependency_alloc();
- if (!dep)
- return -ENOMEM;
-
- if (!__i915_sched_node_add_dependency(node, signal, dep,
- I915_DEPENDENCY_ALLOC))
- i915_dependency_free(dep);
-
- return 0;
-}
-
-void i915_sched_node_fini(struct i915_sched_node *node)
-{
- struct i915_dependency *dep, *tmp;
-
- GEM_BUG_ON(!list_empty(&node->link));
-
- spin_lock_irq(&schedule_lock);
-
- /*
- * Everyone we depended upon (the fences we wait to be signaled)
- * should retire before us and remove themselves from our list.
- * However, retirement is run independently on each timeline and
- * so we may be called out-of-order.
- */
- list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) {
- GEM_BUG_ON(!node_signaled(dep->signaler));
- GEM_BUG_ON(!list_empty(&dep->dfs_link));
-
- list_del(&dep->wait_link);
- if (dep->flags & I915_DEPENDENCY_ALLOC)
- i915_dependency_free(dep);
- }
-
- /* Remove ourselves from everyone who depends upon us */
- list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) {
- GEM_BUG_ON(dep->signaler != node);
- GEM_BUG_ON(!list_empty(&dep->dfs_link));
-
- list_del(&dep->signal_link);
- if (dep->flags & I915_DEPENDENCY_ALLOC)
- i915_dependency_free(dep);
- }
-
- spin_unlock_irq(&schedule_lock);
-}
-
static inline struct i915_priolist *to_priolist(struct rb_node *rb)
{
return rb_entry(rb, struct i915_priolist, node);
@@ -239,6 +136,11 @@ out:
return &p->requests[idx];
}
+void __i915_priolist_free(struct i915_priolist *p)
+{
+ kmem_cache_free(global.slab_priorities, p);
+}
+
struct sched_cache {
struct list_head *priolist;
};
@@ -273,7 +175,7 @@ static bool inflight(const struct i915_request *rq,
return active->hw_context == rq->hw_context;
}
-static void __i915_schedule(struct i915_request *rq,
+static void __i915_schedule(struct i915_sched_node *node,
const struct i915_sched_attr *attr)
{
struct intel_engine_cs *engine;
@@ -287,13 +189,13 @@ static void __i915_schedule(struct i915_request *rq,
lockdep_assert_held(&schedule_lock);
GEM_BUG_ON(prio == I915_PRIORITY_INVALID);
- if (i915_request_completed(rq))
+ if (node_signaled(node))
return;
- if (prio <= READ_ONCE(rq->sched.attr.priority))
+ if (prio <= READ_ONCE(node->attr.priority))
return;
- stack.signaler = &rq->sched;
+ stack.signaler = node;
list_add(&stack.dfs_link, &dfs);
/*
@@ -344,9 +246,9 @@ static void __i915_schedule(struct i915_request *rq,
* execlists_submit_request()), we can set our own priority and skip
* acquiring the engine locks.
*/
- if (rq->sched.attr.priority == I915_PRIORITY_INVALID) {
- GEM_BUG_ON(!list_empty(&rq->sched.link));
- rq->sched.attr = *attr;
+ if (node->attr.priority == I915_PRIORITY_INVALID) {
+ GEM_BUG_ON(!list_empty(&node->link));
+ node->attr = *attr;
if (stack.dfs_link.next == stack.dfs_link.prev)
return;
@@ -355,15 +257,14 @@ static void __i915_schedule(struct i915_request *rq,
}
memset(&cache, 0, sizeof(cache));
- engine = rq->engine;
+ engine = node_to_request(node)->engine;
spin_lock(&engine->timeline.lock);
/* Fifo and depth-first replacement ensure our deps execute before us */
list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) {
- struct i915_sched_node *node = dep->signaler;
-
INIT_LIST_HEAD(&dep->dfs_link);
+ node = dep->signaler;
engine = sched_lock_engine(node, engine, &cache);
lockdep_assert_held(&engine->timeline.lock);
@@ -413,13 +314,20 @@ static void __i915_schedule(struct i915_request *rq,
void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
{
spin_lock_irq(&schedule_lock);
- __i915_schedule(rq, attr);
+ __i915_schedule(&rq->sched, attr);
spin_unlock_irq(&schedule_lock);
}
+static void __bump_priority(struct i915_sched_node *node, unsigned int bump)
+{
+ struct i915_sched_attr attr = node->attr;
+
+ attr.priority |= bump;
+ __i915_schedule(node, &attr);
+}
+
void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump)
{
- struct i915_sched_attr attr;
unsigned long flags;
GEM_BUG_ON(bump & ~I915_PRIORITY_MASK);
@@ -428,17 +336,122 @@ void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump)
return;
spin_lock_irqsave(&schedule_lock, flags);
+ __bump_priority(&rq->sched, bump);
+ spin_unlock_irqrestore(&schedule_lock, flags);
+}
- attr = rq->sched.attr;
- attr.priority |= bump;
- __i915_schedule(rq, &attr);
+void i915_sched_node_init(struct i915_sched_node *node)
+{
+ INIT_LIST_HEAD(&node->signalers_list);
+ INIT_LIST_HEAD(&node->waiters_list);
+ INIT_LIST_HEAD(&node->link);
+ node->attr.priority = I915_PRIORITY_INVALID;
+ node->semaphores = 0;
+ node->flags = 0;
+}
- spin_unlock_irqrestore(&schedule_lock, flags);
+static struct i915_dependency *
+i915_dependency_alloc(void)
+{
+ return kmem_cache_alloc(global.slab_dependencies, GFP_KERNEL);
}
-void __i915_priolist_free(struct i915_priolist *p)
+static void
+i915_dependency_free(struct i915_dependency *dep)
{
- kmem_cache_free(global.slab_priorities, p);
+ kmem_cache_free(global.slab_dependencies, dep);
+}
+
+bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
+ struct i915_sched_node *signal,
+ struct i915_dependency *dep,
+ unsigned long flags)
+{
+ bool ret = false;
+
+ spin_lock_irq(&schedule_lock);
+
+ if (!node_signaled(signal)) {
+ INIT_LIST_HEAD(&dep->dfs_link);
+ list_add(&dep->wait_link, &signal->waiters_list);
+ list_add(&dep->signal_link, &node->signalers_list);
+ dep->signaler = signal;
+ dep->flags = flags;
+
+ /* Keep track of whether anyone on this chain has a semaphore */
+ if (signal->flags & I915_SCHED_HAS_SEMAPHORE_CHAIN &&
+ !node_started(signal))
+ node->flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN;
+
+ /*
+ * As we do not allow WAIT to preempt inflight requests,
+ * once we have executed a request, along with triggering
+ * any execution callbacks, we must preserve its ordering
+ * within the non-preemptible FIFO.
+ */
+ BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK);
+ if (flags & I915_DEPENDENCY_EXTERNAL)
+ __bump_priority(signal, __NO_PREEMPTION);
+
+ ret = true;
+ }
+
+ spin_unlock_irq(&schedule_lock);
+
+ return ret;
+}
+
+int i915_sched_node_add_dependency(struct i915_sched_node *node,
+ struct i915_sched_node *signal)
+{
+ struct i915_dependency *dep;
+
+ dep = i915_dependency_alloc();
+ if (!dep)
+ return -ENOMEM;
+
+ if (!__i915_sched_node_add_dependency(node, signal, dep,
+ I915_DEPENDENCY_EXTERNAL |
+ I915_DEPENDENCY_ALLOC))
+ i915_dependency_free(dep);
+
+ return 0;
+}
+
+void i915_sched_node_fini(struct i915_sched_node *node)
+{
+ struct i915_dependency *dep, *tmp;
+
+ GEM_BUG_ON(!list_empty(&node->link));
+
+ spin_lock_irq(&schedule_lock);
+
+ /*
+ * Everyone we depended upon (the fences we wait to be signaled)
+ * should retire before us and remove themselves from our list.
+ * However, retirement is run independently on each timeline and
+ * so we may be called out-of-order.
+ */
+ list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) {
+ GEM_BUG_ON(!node_signaled(dep->signaler));
+ GEM_BUG_ON(!list_empty(&dep->dfs_link));
+
+ list_del(&dep->wait_link);
+ if (dep->flags & I915_DEPENDENCY_ALLOC)
+ i915_dependency_free(dep);
+ }
+
+ /* Remove ourselves from everyone who depends upon us */
+ list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) {
+ GEM_BUG_ON(dep->signaler != node);
+ GEM_BUG_ON(!list_empty(&dep->dfs_link));
+
+ list_del(&dep->signal_link);
+ if (dep->flags & I915_DEPENDENCY_ALLOC)
+ i915_dependency_free(dep);
+ }
+
+ spin_unlock_irq(&schedule_lock);
}
static void i915_global_scheduler_shrink(void)
diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h
index f1af3916a808..4f2b2eb7c3e5 100644
--- a/drivers/gpu/drm/i915/i915_scheduler_types.h
+++ b/drivers/gpu/drm/i915/i915_scheduler_types.h
@@ -66,7 +66,8 @@ struct i915_dependency {
struct list_head wait_link;
struct list_head dfs_link;
unsigned long flags;
-#define I915_DEPENDENCY_ALLOC BIT(0)
+#define I915_DEPENDENCY_ALLOC BIT(0)
+#define I915_DEPENDENCY_EXTERNAL BIT(1)
};
#endif /* _I915_SCHEDULER_TYPES_H_ */
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 3cbffd400b1b..832cb6b1e9bd 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -23,6 +23,7 @@
*/
#include <linux/kthread.h>
+#include <trace/events/dma_fence.h>
#include <uapi/linux/sched/types.h>
#include "i915_drv.h"
@@ -80,9 +81,39 @@ static inline bool __request_completed(const struct i915_request *rq)
return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno);
}
+static bool
+__dma_fence_signal(struct dma_fence *fence)
+{
+ return !test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags);
+}
+
+static void
+__dma_fence_signal__timestamp(struct dma_fence *fence, ktime_t timestamp)
+{
+ fence->timestamp = timestamp;
+ set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags);
+ trace_dma_fence_signaled(fence);
+}
+
+static void
+__dma_fence_signal__notify(struct dma_fence *fence)
+{
+ struct dma_fence_cb *cur, *tmp;
+
+ lockdep_assert_held(fence->lock);
+ lockdep_assert_irqs_disabled();
+
+ list_for_each_entry_safe(cur, tmp, &fence->cb_list, node) {
+ INIT_LIST_HEAD(&cur->node);
+ cur->func(fence, cur);
+ }
+ INIT_LIST_HEAD(&fence->cb_list);
+}
+
void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
{
struct intel_breadcrumbs *b = &engine->breadcrumbs;
+ const ktime_t timestamp = ktime_get();
struct intel_context *ce, *cn;
struct list_head *pos, *next;
LIST_HEAD(signal);
@@ -104,6 +135,10 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_SIGNAL,
&rq->fence.flags));
+ clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
+
+ if (!__dma_fence_signal(&rq->fence))
+ continue;
/*
* Queue for execution after dropping the signaling
@@ -111,14 +146,6 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
* more signalers to the same context or engine.
*/
i915_request_get(rq);
-
- /*
- * We may race with direct invocation of
- * dma_fence_signal(), e.g. i915_request_retire(),
- * so we need to acquire our reference to the request
- * before we cancel the breadcrumb.
- */
- clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
list_add_tail(&rq->signal_link, &signal);
}
@@ -141,7 +168,12 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
struct i915_request *rq =
list_entry(pos, typeof(*rq), signal_link);
- dma_fence_signal(&rq->fence);
+ __dma_fence_signal__timestamp(&rq->fence, timestamp);
+
+ spin_lock(&rq->lock);
+ __dma_fence_signal__notify(&rq->fence);
+ spin_unlock(&rq->lock);
+
i915_request_put(rq);
}
}
@@ -243,19 +275,17 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
bool i915_request_enable_breadcrumb(struct i915_request *rq)
{
- struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
-
- GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags));
+ lockdep_assert_held(&rq->lock);
+ lockdep_assert_irqs_disabled();
- if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
- return true;
-
- spin_lock(&b->irq_lock);
- if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags) &&
- !__request_completed(rq)) {
+ if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) {
+ struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
struct intel_context *ce = rq->hw_context;
struct list_head *pos;
+ spin_lock(&b->irq_lock);
+ GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags));
+
__intel_breadcrumbs_arm_irq(b);
/*
@@ -284,8 +314,8 @@ bool i915_request_enable_breadcrumb(struct i915_request *rq)
list_move_tail(&ce->signal_link, &b->signalers);
set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
+ spin_unlock(&b->irq_lock);
}
- spin_unlock(&b->irq_lock);
return !__request_completed(rq);
}
@@ -294,9 +324,15 @@ void i915_request_cancel_breadcrumb(struct i915_request *rq)
{
struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
- if (!test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))
- return;
+ lockdep_assert_held(&rq->lock);
+ lockdep_assert_irqs_disabled();
+ /*
+ * We must wait for b->irq_lock so that we know the interrupt handler
+ * has released its reference to the intel_context and has completed
+ * the DMA_FENCE_FLAG_SIGNALED_BIT/I915_FENCE_FLAG_SIGNAL dance (if
+ * required).
+ */
spin_lock(&b->irq_lock);
if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) {
struct intel_context *ce = rq->hw_context;
diff --git a/drivers/gpu/drm/i915/intel_context.c b/drivers/gpu/drm/i915/intel_context.c
index 8931e0fee873..924cc556223a 100644
--- a/drivers/gpu/drm/i915/intel_context.c
+++ b/drivers/gpu/drm/i915/intel_context.c
@@ -230,6 +230,7 @@ intel_context_init(struct intel_context *ce,
ce->gem_context = ctx;
ce->engine = engine;
ce->ops = engine->cops;
+ ce->saturated = 0;
INIT_LIST_HEAD(&ce->signal_link);
INIT_LIST_HEAD(&ce->signals);
diff --git a/drivers/gpu/drm/i915/intel_context_types.h b/drivers/gpu/drm/i915/intel_context_types.h
index 68b4ca1611e0..339c7437fe82 100644
--- a/drivers/gpu/drm/i915/intel_context_types.h
+++ b/drivers/gpu/drm/i915/intel_context_types.h
@@ -14,6 +14,7 @@
#include <linux/types.h>
#include "i915_active_types.h"
+#include "intel_engine_types.h"
struct i915_gem_context;
struct i915_vma;
@@ -58,6 +59,8 @@ struct intel_context {
atomic_t pin_count;
struct mutex pin_mutex; /* guards pinning and associated on-gpuing */
+ intel_engine_mask_t saturated; /* submitting semaphores too late? */
+
/**
* active_tracker: Active tracker for the external rq activity
* on this intel_context object.
diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c
index f43c2a2563a5..96618af47088 100644
--- a/drivers/gpu/drm/i915/intel_csr.c
+++ b/drivers/gpu/drm/i915/intel_csr.c
@@ -303,10 +303,17 @@ static u32 *parse_csr_fw(struct drm_i915_private *dev_priv,
u32 dmc_offset = CSR_DEFAULT_FW_OFFSET, readcount = 0, nbytes;
u32 i;
u32 *dmc_payload;
+ size_t fsize;
if (!fw)
return NULL;
+ fsize = sizeof(struct intel_css_header) +
+ sizeof(struct intel_package_header) +
+ sizeof(struct intel_dmc_header);
+ if (fsize > fw->size)
+ goto error_truncated;
+
/* Extract CSS Header information*/
css_header = (struct intel_css_header *)fw->data;
if (sizeof(struct intel_css_header) !=
@@ -366,6 +373,9 @@ static u32 *parse_csr_fw(struct drm_i915_private *dev_priv,
/* Convert dmc_offset into number of bytes. By default it is in dwords*/
dmc_offset *= 4;
readcount += dmc_offset;
+ fsize += dmc_offset;
+ if (fsize > fw->size)
+ goto error_truncated;
/* Extract dmc_header information. */
dmc_header = (struct intel_dmc_header *)&fw->data[readcount];
@@ -397,6 +407,10 @@ static u32 *parse_csr_fw(struct drm_i915_private *dev_priv,
/* fw_size is in dwords, so multiplied by 4 to convert into bytes. */
nbytes = dmc_header->fw_size * 4;
+ fsize += nbytes;
+ if (fsize > fw->size)
+ goto error_truncated;
+
if (nbytes > csr->max_fw_size) {
DRM_ERROR("DMC FW too big (%u bytes)\n", nbytes);
return NULL;
@@ -410,6 +424,10 @@ static u32 *parse_csr_fw(struct drm_i915_private *dev_priv,
}
return memcpy(dmc_payload, &fw->data[readcount], nbytes);
+
+error_truncated:
+ DRM_ERROR("Truncated DMC firmware, rejecting.\n");
+ return NULL;
}
static void intel_csr_runtime_pm_get(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 3bd40a4a6739..75105a2c59ea 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -2432,10 +2432,14 @@ static unsigned int intel_fb_modifier_to_tiling(u64 fb_modifier)
* main surface.
*/
static const struct drm_format_info ccs_formats[] = {
- { .format = DRM_FORMAT_XRGB8888, .depth = 24, .num_planes = 2, .cpp = { 4, 1, }, .hsub = 8, .vsub = 16, },
- { .format = DRM_FORMAT_XBGR8888, .depth = 24, .num_planes = 2, .cpp = { 4, 1, }, .hsub = 8, .vsub = 16, },
- { .format = DRM_FORMAT_ARGB8888, .depth = 32, .num_planes = 2, .cpp = { 4, 1, }, .hsub = 8, .vsub = 16, },
- { .format = DRM_FORMAT_ABGR8888, .depth = 32, .num_planes = 2, .cpp = { 4, 1, }, .hsub = 8, .vsub = 16, },
+ { .format = DRM_FORMAT_XRGB8888, .depth = 24, .num_planes = 2,
+ .cpp = { 4, 1, }, .hsub = 8, .vsub = 16, },
+ { .format = DRM_FORMAT_XBGR8888, .depth = 24, .num_planes = 2,
+ .cpp = { 4, 1, }, .hsub = 8, .vsub = 16, },
+ { .format = DRM_FORMAT_ARGB8888, .depth = 32, .num_planes = 2,
+ .cpp = { 4, 1, }, .hsub = 8, .vsub = 16, .has_alpha = true, },
+ { .format = DRM_FORMAT_ABGR8888, .depth = 32, .num_planes = 2,
+ .cpp = { 4, 1, }, .hsub = 8, .vsub = 16, .has_alpha = true, },
};
static const struct drm_format_info *
@@ -11942,7 +11946,7 @@ encoder_retry:
return 0;
}
-static bool intel_fuzzy_clock_check(int clock1, int clock2)
+bool intel_fuzzy_clock_check(int clock1, int clock2)
{
int diff;
@@ -12001,9 +12005,6 @@ intel_compare_link_m_n(const struct intel_link_m_n *m_n,
m2_n2->gmch_m, m2_n2->gmch_n, !adjust) &&
intel_compare_m_n(m_n->link_m, m_n->link_n,
m2_n2->link_m, m2_n2->link_n, !adjust)) {
- if (adjust)
- *m2_n2 = *m_n;
-
return true;
}
@@ -12082,6 +12083,7 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv,
struct intel_crtc_state *pipe_config,
bool adjust)
{
+ struct intel_crtc *crtc = to_intel_crtc(current_config->base.crtc);
bool ret = true;
bool fixup_inherited = adjust &&
(current_config->base.mode.private_flags & I915_MODE_FLAG_INHERITED) &&
@@ -12303,6 +12305,14 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv,
PIPE_CONF_CHECK_X(gmch_pfit.pgm_ratios);
PIPE_CONF_CHECK_X(gmch_pfit.lvds_border_bits);
+ /*
+ * Changing the EDP transcoder input mux
+ * (A_ONOFF vs. A_ON) requires a full modeset.
+ */
+ if (IS_HASWELL(dev_priv) && crtc->pipe == PIPE_A &&
+ current_config->cpu_transcoder == TRANSCODER_EDP)
+ PIPE_CONF_CHECK_BOOL(pch_pfit.enabled);
+
if (!adjust) {
PIPE_CONF_CHECK_I(pipe_src_w);
PIPE_CONF_CHECK_I(pipe_src_h);
@@ -13136,6 +13146,33 @@ static int calc_watermark_data(struct intel_atomic_state *state)
return 0;
}
+static void intel_crtc_check_fastset(struct intel_crtc_state *old_crtc_state,
+ struct intel_crtc_state *new_crtc_state)
+{
+ struct drm_i915_private *dev_priv =
+ to_i915(new_crtc_state->base.crtc->dev);
+
+ if (!intel_pipe_config_compare(dev_priv, old_crtc_state,
+ new_crtc_state, true))
+ return;
+
+ new_crtc_state->base.mode_changed = false;
+ new_crtc_state->update_pipe = true;
+
+ /*
+ * If we're not doing the full modeset we want to
+ * keep the current M/N values as they may be
+ * sufficiently different to the computed values
+ * to cause problems.
+ *
+ * FIXME: should really copy more fuzzy state here
+ */
+ new_crtc_state->fdi_m_n = old_crtc_state->fdi_m_n;
+ new_crtc_state->dp_m_n = old_crtc_state->dp_m_n;
+ new_crtc_state->dp_m2_n2 = old_crtc_state->dp_m2_n2;
+ new_crtc_state->has_drrs = old_crtc_state->has_drrs;
+}
+
/**
* intel_atomic_check - validate state object
* @dev: drm device
@@ -13184,12 +13221,8 @@ static int intel_atomic_check(struct drm_device *dev,
return ret;
}
- if (intel_pipe_config_compare(dev_priv,
- to_intel_crtc_state(old_crtc_state),
- pipe_config, true)) {
- crtc_state->mode_changed = false;
- pipe_config->update_pipe = true;
- }
+ intel_crtc_check_fastset(to_intel_crtc_state(old_crtc_state),
+ pipe_config);
if (needs_modeset(crtc_state))
any_ms = true;
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index a38b9cff5cd0..e85cd377a652 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1742,6 +1742,7 @@ int vlv_force_pll_on(struct drm_i915_private *dev_priv, enum pipe pipe,
const struct dpll *dpll);
void vlv_force_pll_off(struct drm_i915_private *dev_priv, enum pipe pipe);
int lpt_get_iclkip(struct drm_i915_private *dev_priv);
+bool intel_fuzzy_clock_check(int clock1, int clock2);
/* modesetting asserts */
void assert_panel_unlocked(struct drm_i915_private *dev_priv,
diff --git a/drivers/gpu/drm/i915/intel_dsi_vbt.c b/drivers/gpu/drm/i915/intel_dsi_vbt.c
index 3074448446bc..4b8e48db1843 100644
--- a/drivers/gpu/drm/i915/intel_dsi_vbt.c
+++ b/drivers/gpu/drm/i915/intel_dsi_vbt.c
@@ -853,6 +853,17 @@ bool intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id)
if (mipi_config->target_burst_mode_freq) {
u32 bitrate = intel_dsi_bitrate(intel_dsi);
+ /*
+ * Sometimes the VBT contains a slightly lower clock,
+ * then the bitrate we have calculated, in this case
+ * just replace it with the calculated bitrate.
+ */
+ if (mipi_config->target_burst_mode_freq < bitrate &&
+ intel_fuzzy_clock_check(
+ mipi_config->target_burst_mode_freq,
+ bitrate))
+ mipi_config->target_burst_mode_freq = bitrate;
+
if (mipi_config->target_burst_mode_freq < bitrate) {
DRM_ERROR("Burst mode freq is less than computed\n");
return false;
diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c
index c805a0966395..5679f2fffb7c 100644
--- a/drivers/gpu/drm/i915/intel_fbc.c
+++ b/drivers/gpu/drm/i915/intel_fbc.c
@@ -1280,6 +1280,10 @@ static int intel_sanitize_fbc_option(struct drm_i915_private *dev_priv)
if (!HAS_FBC(dev_priv))
return 0;
+ /* https://bugs.freedesktop.org/show_bug.cgi?id=108085 */
+ if (IS_GEMINILAKE(dev_priv))
+ return 0;
+
if (IS_BROADWELL(dev_priv) || INTEL_GEN(dev_priv) >= 9)
return 1;
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 37f60cb8e9e1..46cd0e70aecb 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -23,7 +23,6 @@
*/
#include <linux/circ_buf.h>
-#include <trace/events/dma_fence.h>
#include "intel_guc_submission.h"
#include "intel_lrc_reg.h"
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 4e0a351bfbca..11e5a86610bf 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -164,7 +164,7 @@
#define WA_TAIL_DWORDS 2
#define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS)
-#define ACTIVE_PRIORITY (I915_PRIORITY_NEWCLIENT | I915_PRIORITY_NOSEMAPHORE)
+#define ACTIVE_PRIORITY (I915_PRIORITY_NOSEMAPHORE)
static int execlists_context_deferred_alloc(struct intel_context *ce,
struct intel_engine_cs *engine);
diff --git a/drivers/gpu/drm/i915/intel_pipe_crc.c b/drivers/gpu/drm/i915/intel_pipe_crc.c
index e94b5b1bc1b7..e7c7be4911c1 100644
--- a/drivers/gpu/drm/i915/intel_pipe_crc.c
+++ b/drivers/gpu/drm/i915/intel_pipe_crc.c
@@ -311,10 +311,17 @@ retry:
pipe_config->base.mode_changed = pipe_config->has_psr;
pipe_config->crc_enabled = enable;
- if (IS_HASWELL(dev_priv) && crtc->pipe == PIPE_A) {
+ if (IS_HASWELL(dev_priv) &&
+ pipe_config->base.active && crtc->pipe == PIPE_A &&
+ pipe_config->cpu_transcoder == TRANSCODER_EDP) {
+ bool old_need_power_well = pipe_config->pch_pfit.enabled ||
+ pipe_config->pch_pfit.force_thru;
+ bool new_need_power_well = pipe_config->pch_pfit.enabled ||
+ enable;
+
pipe_config->pch_pfit.force_thru = enable;
- if (pipe_config->cpu_transcoder == TRANSCODER_EDP &&
- pipe_config->pch_pfit.enabled != enable)
+
+ if (old_need_power_well != new_need_power_well)
pipe_config->base.connectors_changed = true;
}
diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c
index 0e3d91d9ef13..9ecfba0a54a1 100644
--- a/drivers/gpu/drm/i915/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/intel_sdvo.c
@@ -916,6 +916,13 @@ static bool intel_sdvo_set_colorimetry(struct intel_sdvo *intel_sdvo,
return intel_sdvo_set_value(intel_sdvo, SDVO_CMD_SET_COLORIMETRY, &mode, 1);
}
+static bool intel_sdvo_set_audio_state(struct intel_sdvo *intel_sdvo,
+ u8 audio_state)
+{
+ return intel_sdvo_set_value(intel_sdvo, SDVO_CMD_SET_AUDIO_STAT,
+ &audio_state, 1);
+}
+
#if 0
static void intel_sdvo_dump_hdmi_buf(struct intel_sdvo *intel_sdvo)
{
@@ -1487,11 +1494,6 @@ static void intel_sdvo_pre_enable(struct intel_encoder *intel_encoder,
else
sdvox |= SDVO_PIPE_SEL(crtc->pipe);
- if (crtc_state->has_audio) {
- WARN_ON_ONCE(INTEL_GEN(dev_priv) < 4);
- sdvox |= SDVO_AUDIO_ENABLE;
- }
-
if (INTEL_GEN(dev_priv) >= 4) {
/* done in crtc_mode_set as the dpll_md reg must be written early */
} else if (IS_I945G(dev_priv) || IS_I945GM(dev_priv) ||
@@ -1635,8 +1637,13 @@ static void intel_sdvo_get_config(struct intel_encoder *encoder,
if (sdvox & HDMI_COLOR_RANGE_16_235)
pipe_config->limited_color_range = true;
- if (sdvox & SDVO_AUDIO_ENABLE)
- pipe_config->has_audio = true;
+ if (intel_sdvo_get_value(intel_sdvo, SDVO_CMD_GET_AUDIO_STAT,
+ &val, 1)) {
+ u8 mask = SDVO_AUDIO_ELD_VALID | SDVO_AUDIO_PRESENCE_DETECT;
+
+ if ((val & mask) == mask)
+ pipe_config->has_audio = true;
+ }
if (intel_sdvo_get_value(intel_sdvo, SDVO_CMD_GET_ENCODE,
&val, 1)) {
@@ -1647,6 +1654,32 @@ static void intel_sdvo_get_config(struct intel_encoder *encoder,
intel_sdvo_get_avi_infoframe(intel_sdvo, pipe_config);
}
+static void intel_sdvo_disable_audio(struct intel_sdvo *intel_sdvo)
+{
+ intel_sdvo_set_audio_state(intel_sdvo, 0);
+}
+
+static void intel_sdvo_enable_audio(struct intel_sdvo *intel_sdvo,
+ const struct intel_crtc_state *crtc_state,
+ const struct drm_connector_state *conn_state)
+{
+ const struct drm_display_mode *adjusted_mode =
+ &crtc_state->base.adjusted_mode;
+ struct drm_connector *connector = conn_state->connector;
+ u8 *eld = connector->eld;
+
+ eld[6] = drm_av_sync_delay(connector, adjusted_mode) / 2;
+
+ intel_sdvo_set_audio_state(intel_sdvo, 0);
+
+ intel_sdvo_write_infoframe(intel_sdvo, SDVO_HBUF_INDEX_ELD,
+ SDVO_HBUF_TX_DISABLED,
+ eld, drm_eld_size(eld));
+
+ intel_sdvo_set_audio_state(intel_sdvo, SDVO_AUDIO_ELD_VALID |
+ SDVO_AUDIO_PRESENCE_DETECT);
+}
+
static void intel_disable_sdvo(struct intel_encoder *encoder,
const struct intel_crtc_state *old_crtc_state,
const struct drm_connector_state *conn_state)
@@ -1656,6 +1689,9 @@ static void intel_disable_sdvo(struct intel_encoder *encoder,
struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc);
u32 temp;
+ if (old_crtc_state->has_audio)
+ intel_sdvo_disable_audio(intel_sdvo);
+
intel_sdvo_set_active_outputs(intel_sdvo, 0);
if (0)
intel_sdvo_set_encoder_power_state(intel_sdvo,
@@ -1741,6 +1777,9 @@ static void intel_enable_sdvo(struct intel_encoder *encoder,
intel_sdvo_set_encoder_power_state(intel_sdvo,
DRM_MODE_DPMS_ON);
intel_sdvo_set_active_outputs(intel_sdvo, intel_sdvo->attached_output);
+
+ if (pipe_config->has_audio)
+ intel_sdvo_enable_audio(intel_sdvo, pipe_config, conn_state);
}
static enum drm_mode_status
@@ -2603,7 +2642,6 @@ static bool
intel_sdvo_dvi_init(struct intel_sdvo *intel_sdvo, int device)
{
struct drm_encoder *encoder = &intel_sdvo->base.base;
- struct drm_i915_private *dev_priv = to_i915(encoder->dev);
struct drm_connector *connector;
struct intel_encoder *intel_encoder = to_intel_encoder(encoder);
struct intel_connector *intel_connector;
@@ -2640,9 +2678,7 @@ intel_sdvo_dvi_init(struct intel_sdvo *intel_sdvo, int device)
encoder->encoder_type = DRM_MODE_ENCODER_TMDS;
connector->connector_type = DRM_MODE_CONNECTOR_DVID;
- /* gen3 doesn't do the hdmi bits in the SDVO register */
- if (INTEL_GEN(dev_priv) >= 4 &&
- intel_sdvo_is_hdmi_connector(intel_sdvo, device)) {
+ if (intel_sdvo_is_hdmi_connector(intel_sdvo, device)) {
connector->connector_type = DRM_MODE_CONNECTOR_HDMIA;
intel_sdvo_connector->is_hdmi = true;
}
diff --git a/drivers/gpu/drm/i915/intel_sdvo_regs.h b/drivers/gpu/drm/i915/intel_sdvo_regs.h
index db0ed499268a..e9ba3b047f93 100644
--- a/drivers/gpu/drm/i915/intel_sdvo_regs.h
+++ b/drivers/gpu/drm/i915/intel_sdvo_regs.h
@@ -707,6 +707,9 @@ struct intel_sdvo_enhancements_arg {
#define SDVO_CMD_GET_AUDIO_ENCRYPT_PREFER 0x90
#define SDVO_CMD_SET_AUDIO_STAT 0x91
#define SDVO_CMD_GET_AUDIO_STAT 0x92
+ #define SDVO_AUDIO_ELD_VALID (1 << 0)
+ #define SDVO_AUDIO_PRESENCE_DETECT (1 << 1)
+ #define SDVO_AUDIO_CP_READY (1 << 2)
#define SDVO_CMD_SET_HBUF_INDEX 0x93
#define SDVO_HBUF_INDEX_ELD 0
#define SDVO_HBUF_INDEX_AVI_IF 1
diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c
index 9682dd575152..841b8e515f4d 100644
--- a/drivers/gpu/drm/i915/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/intel_workarounds.c
@@ -37,7 +37,7 @@
* costly and simplifies things. We can revisit this in the future.
*
* Layout
- * ''''''
+ * ~~~~~~
*
* Keep things in this file ordered by WA type, as per the above (context, GT,
* display, register whitelist, batchbuffer). Then, inside each type, keep the
@@ -518,6 +518,12 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine)
struct drm_i915_private *i915 = engine->i915;
struct i915_wa_list *wal = &engine->ctx_wa_list;
+ /* WaDisableBankHangMode:icl */
+ wa_write(wal,
+ GEN8_L3CNTLREG,
+ intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) |
+ GEN8_ERRDETBCTRL);
+
/* Wa_1604370585:icl (pre-prod)
* Formerly known as WaPushConstantDereferenceHoldDisable
*/
diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
index fbee030db940..e8b0b5dbcb2c 100644
--- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
+++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
@@ -99,12 +99,14 @@ static int live_busywait_preempt(void *arg)
ctx_hi = kernel_context(i915);
if (!ctx_hi)
goto err_unlock;
- ctx_hi->sched.priority = INT_MAX;
+ ctx_hi->sched.priority =
+ I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
ctx_lo = kernel_context(i915);
if (!ctx_lo)
goto err_ctx_hi;
- ctx_lo->sched.priority = INT_MIN;
+ ctx_lo->sched.priority =
+ I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
if (IS_ERR(obj)) {
@@ -954,12 +956,14 @@ static int live_preempt_hang(void *arg)
ctx_hi = kernel_context(i915);
if (!ctx_hi)
goto err_spin_lo;
- ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
+ ctx_hi->sched.priority =
+ I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
ctx_lo = kernel_context(i915);
if (!ctx_lo)
goto err_ctx_hi;
- ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
+ ctx_lo->sched.priority =
+ I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
for_each_engine(engine, i915, id) {
struct i915_request *rq;
diff --git a/drivers/gpu/drm/i915/selftests/lib_sw_fence.h b/drivers/gpu/drm/i915/selftests/lib_sw_fence.h
index 1f9927e10f3a..e54d6bc23dc3 100644
--- a/drivers/gpu/drm/i915/selftests/lib_sw_fence.h
+++ b/drivers/gpu/drm/i915/selftests/lib_sw_fence.h
@@ -1,10 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* lib_sw_fence.h - library routines for testing N:M synchronisation points
*
* Copyright (C) 2017 Intel Corporation
- *
- * This file is released under the GPLv2.
- *
*/
#ifndef _LIB_SW_FENCE_H_