5 files changed, 30 insertions, 38 deletions
diff --git a/drivers/gpu/drm/v3d/v3d_bo.c b/drivers/gpu/drm/v3d/v3d_bo.c
index 7b1e2a549a71..54d96518a131 100644
--- a/drivers/gpu/drm/v3d/v3d_bo.c
+++ b/drivers/gpu/drm/v3d/v3d_bo.c
@@ -227,37 +227,19 @@ v3d_set_mmap_vma_flags(struct vm_area_struct *vma)
 	vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
 }
 
-int v3d_gem_fault(struct vm_fault *vmf)
+vm_fault_t v3d_gem_fault(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
 	struct drm_gem_object *obj = vma->vm_private_data;
 	struct v3d_bo *bo = to_v3d_bo(obj);
-	unsigned long pfn;
+	pfn_t pfn;
 	pgoff_t pgoff;
-	int ret;
 
 	/* We don't use vmf->pgoff since that has the fake offset: */
 	pgoff = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
-	pfn = page_to_pfn(bo->pages[pgoff]);
-
-	ret = vm_insert_mixed(vma, vmf->address, __pfn_to_pfn_t(pfn, PFN_DEV));
-
-	switch (ret) {
-	case -EAGAIN:
-	case 0:
-	case -ERESTARTSYS:
-	case -EINTR:
-	case -EBUSY:
-		/*
-		 * EBUSY is ok: this just means that another thread
-		 * already did the job.
-		 */
-		return VM_FAULT_NOPAGE;
-	case -ENOMEM:
-		return VM_FAULT_OOM;
-	default:
-		return VM_FAULT_SIGBUS;
-	}
+	pfn = __pfn_to_pfn_t(page_to_pfn(bo->pages[pgoff]), PFN_DEV);
+
+	return vmf_insert_mixed(vma, vmf->address, pfn);
 }
 
 int v3d_mmap(struct file *filp, struct vm_area_struct *vma)
diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
index f32ac8c98f37..e6fed696ad86 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.h
+++ b/drivers/gpu/drm/v3d/v3d_drv.h
@@ -2,6 +2,7 @@
 /* Copyright (C) 2015-2018 Broadcom */
 
 #include <linux/reservation.h>
+#include <linux/mm_types.h>
 #include <drm/drmP.h>
 #include <drm/drm_encoder.h>
 #include <drm/drm_gem.h>
@@ -183,6 +184,8 @@ struct v3d_job {
 
 	/* GPU virtual addresses of the start/end of the CL job. */
 	u32 start, end;
+
+	u32 timedout_ctca, timedout_ctra;
 };
 
 struct v3d_exec_info {
@@ -252,7 +255,7 @@ int v3d_mmap_bo_ioctl(struct drm_device *dev, void *data,
 		      struct drm_file *file_priv);
 int v3d_get_bo_offset_ioctl(struct drm_device *dev, void *data,
 			    struct drm_file *file_priv);
-int v3d_gem_fault(struct vm_fault *vmf);
+vm_fault_t v3d_gem_fault(struct vm_fault *vmf);
 int v3d_mmap(struct file *filp, struct vm_area_struct *vma);
 struct reservation_object *v3d_prime_res_obj(struct drm_gem_object *obj);
 int v3d_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
diff --git a/drivers/gpu/drm/v3d/v3d_fence.c b/drivers/gpu/drm/v3d/v3d_fence.c
index bfe31a89668b..50bfcf9a8a1a 100644
--- a/drivers/gpu/drm/v3d/v3d_fence.c
+++ b/drivers/gpu/drm/v3d/v3d_fence.c
@@ -35,19 +35,7 @@ static const char *v3d_fence_get_timeline_name(struct dma_fence *fence)
 		return "v3d-render";
 }
 
-static bool v3d_fence_enable_signaling(struct dma_fence *fence)
-{
-	return true;
-}
-
 const struct dma_fence_ops v3d_fence_ops = {
 	.get_driver_name = v3d_fence_get_driver_name,
 	.get_timeline_name = v3d_fence_get_timeline_name,
-	.enable_signaling = v3d_fence_enable_signaling,
-	/* Each of our fences gets signaled as complete by the IRQ
-	 * handler, so we rely on the core's tracking of signaling.
-	 */
-	.signaled = NULL,
-	.wait = dma_fence_default_wait,
-	.release = dma_fence_free,
 };
diff --git a/drivers/gpu/drm/v3d/v3d_regs.h b/drivers/gpu/drm/v3d/v3d_regs.h
index fc13282dfc2f..854046565989 100644
--- a/drivers/gpu/drm/v3d/v3d_regs.h
+++ b/drivers/gpu/drm/v3d/v3d_regs.h
@@ -222,6 +222,7 @@
 #define V3D_CLE_CTNCA(n) (V3D_CLE_CT0CA + 4 * n)
 #define V3D_CLE_CT0RA                                  0x00118
 #define V3D_CLE_CT1RA                                  0x0011c
+#define V3D_CLE_CTNRA(n) (V3D_CLE_CT0RA + 4 * n)
 #define V3D_CLE_CT0LC                                  0x00120
 #define V3D_CLE_CT1LC                                  0x00124
 #define V3D_CLE_CT0PC                                  0x00128
diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
index 808bc901f567..a5501581d96b 100644
--- a/drivers/gpu/drm/v3d/v3d_sched.c
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
@@ -14,8 +14,8 @@
  * to the HW only when it has completed the last one, instead of
  * filling up the CT[01]Q FIFOs with jobs.  Similarly, we use
  * v3d_job_dependency() to manage the dependency between bin and
- * render, instead of having the clients submit jobs with using the
- * HW's semaphores to interlock between them.
+ * render, instead of having the clients submit jobs using the HW's
+ * semaphores to interlock between them.
  */
 
 #include <linux/kthread.h>
@@ -153,7 +153,25 @@ v3d_job_timedout(struct drm_sched_job *sched_job)
 	struct v3d_job *job = to_v3d_job(sched_job);
 	struct v3d_exec_info *exec = job->exec;
 	struct v3d_dev *v3d = exec->v3d;
+	enum v3d_queue job_q = job == &exec->bin ? V3D_BIN : V3D_RENDER;
 	enum v3d_queue q;
+	u32 ctca = V3D_CORE_READ(0, V3D_CLE_CTNCA(job_q));
+	u32 ctra = V3D_CORE_READ(0, V3D_CLE_CTNRA(job_q));
+
+	/* If the current address or return address have changed, then
+	 * the GPU has probably made progress and we should delay the
+	 * reset.  This could fail if the GPU got in an infinite loop
+	 * in the CL, but that is pretty unlikely outside of an i-g-t
+	 * testcase.
+	 */
+	if (job->timedout_ctca != ctca || job->timedout_ctra != ctra) {
+		job->timedout_ctca = ctca;
+		job->timedout_ctra = ctra;
+
+		schedule_delayed_work(&job->base.work_tdr,
+				      job->base.sched->timeout);
+		return;
+	}
 
 	mutex_lock(&v3d->reset_lock);