1 files changed, 82 insertions, 70 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index e73d152659a2..16fcb56c232b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -107,14 +107,6 @@ struct amdgpu_pte_update_params {
 	 * DMA addresses to use for mapping, used during VM update by CPU
 	 */
 	dma_addr_t *pages_addr;
-
-	/**
-	 * @kptr:
-	 *
-	 * Kernel pointer of PD/PT BO that needs to be updated,
-	 * used during VM update by CPU
-	 */
-	void *kptr;
 };
 
 /**
@@ -623,6 +615,28 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
 	list_add(&entry->tv.head, validated);
 }
 
+void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo)
+{
+	struct amdgpu_bo *abo;
+	struct amdgpu_vm_bo_base *bo_base;
+
+	if (!amdgpu_bo_is_amdgpu_bo(bo))
+		return;
+
+	if (bo->mem.placement & TTM_PL_FLAG_NO_EVICT)
+		return;
+
+	abo = ttm_to_amdgpu_bo(bo);
+	if (!abo->parent)
+		return;
+	for (bo_base = abo->vm_bo; bo_base; bo_base = bo_base->next) {
+		struct amdgpu_vm *vm = bo_base->vm;
+
+		if (abo->tbo.resv == vm->root.base.bo->tbo.resv)
+			vm->bulk_moveable = false;
+	}
+
+}
 /**
  * amdgpu_vm_move_to_lru_tail - move all BOs to the end of LRU
  *
@@ -638,12 +652,14 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
 	struct ttm_bo_global *glob = adev->mman.bdev.glob;
 	struct amdgpu_vm_bo_base *bo_base;
 
+#if 0
 	if (vm->bulk_moveable) {
 		spin_lock(&glob->lru_lock);
 		ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);
 		spin_unlock(&glob->lru_lock);
 		return;
 	}
+#endif
 
 	memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
 
@@ -799,15 +815,22 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
 		addr += ats_entries * 8;
 	}
 
-	if (entries)
+	if (entries) {
+		uint64_t value = 0;
+
+		/* Workaround for fault priority problem on GMC9 */
+		if (level == AMDGPU_VM_PTB && adev->asic_type >= CHIP_VEGA10)
+			value = AMDGPU_PTE_EXECUTABLE;
+
 		amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0,
-				      entries, 0, 0);
+				      entries, 0, value);
+	}
 
 	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
 
 	WARN_ON(job->ibs[0].length_dw > 64);
 	r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.resv,
-			     AMDGPU_FENCE_OWNER_UNDEFINED, false);
+			     AMDGPU_FENCE_OWNER_KFD, false);
 	if (r)
 		goto error_free;
 
@@ -847,9 +870,6 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	bp->size = amdgpu_vm_bo_size(adev, level);
 	bp->byte_align = AMDGPU_GPU_PAGE_SIZE;
 	bp->domain = AMDGPU_GEM_DOMAIN_VRAM;
-	if (bp->size <= PAGE_SIZE && adev->asic_type >= CHIP_VEGA10 &&
-	    adev->flags & AMD_IS_APU)
-		bp->domain |= AMDGPU_GEM_DOMAIN_GTT;
 	bp->domain = amdgpu_bo_get_preferred_pin_domain(adev, bp->domain);
 	bp->flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
 		AMDGPU_GEM_CREATE_CPU_GTT_USWC;
@@ -929,10 +949,6 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
 		if (r)
 			return r;
 
-		r = amdgpu_vm_clear_bo(adev, vm, pt, cursor.level, ats);
-		if (r)
-			goto error_free_pt;
-
 		if (vm->use_cpu_for_update) {
 			r = amdgpu_bo_kmap(pt, NULL);
 			if (r)
@@ -945,6 +961,10 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
 		pt->parent = amdgpu_bo_ref(cursor.parent->base.bo);
 
 		amdgpu_vm_bo_base_init(&entry->base, vm, pt);
+
+		r = amdgpu_vm_clear_bo(adev, vm, pt, cursor.level, ats);
+		if (r)
+			goto error_free_pt;
 	}
 
 	return 0;
@@ -1314,31 +1334,6 @@ static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params,
 	}
 }
 
-
-/**
- * amdgpu_vm_wait_pd - Wait for PT BOs to be free.
- *
- * @adev: amdgpu_device pointer
- * @vm: related vm
- * @owner: fence owner
- *
- * Returns:
- * 0 on success, errno otherwise.
- */
-static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm,
-			     void *owner)
-{
-	struct amdgpu_sync sync;
-	int r;
-
-	amdgpu_sync_create(&sync);
-	amdgpu_sync_resv(adev, &sync, vm->root.base.bo->tbo.resv, owner, false);
-	r = amdgpu_sync_wait(&sync, true);
-	amdgpu_sync_free(&sync);
-
-	return r;
-}
-
 /**
  * amdgpu_vm_update_func - helper to call update function
  *
@@ -1433,7 +1428,8 @@ restart:
 	params.adev = adev;
 
 	if (vm->use_cpu_for_update) {
-		r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM);
+		r = amdgpu_bo_sync_wait(vm->root.base.bo,
+					AMDGPU_FENCE_OWNER_VM, true);
 		if (unlikely(r))
 			return r;
 
@@ -1506,20 +1502,27 @@ error:
 }
 
 /**
- * amdgpu_vm_update_huge - figure out parameters for PTE updates
+ * amdgpu_vm_update_flags - figure out flags for PTE updates
  *
  * Make sure to set the right flags for the PTEs at the desired level.
  */
-static void amdgpu_vm_update_huge(struct amdgpu_pte_update_params *params,
-				  struct amdgpu_bo *bo, unsigned level,
-				  uint64_t pe, uint64_t addr,
-				  unsigned count, uint32_t incr,
-				  uint64_t flags)
+static void amdgpu_vm_update_flags(struct amdgpu_pte_update_params *params,
+				   struct amdgpu_bo *bo, unsigned level,
+				   uint64_t pe, uint64_t addr,
+				   unsigned count, uint32_t incr,
+				   uint64_t flags)
 
 {
 	if (level != AMDGPU_VM_PTB) {
 		flags |= AMDGPU_PDE_PTE;
 		amdgpu_gmc_get_vm_pde(params->adev, level, &addr, &flags);
+
+	} else if (params->adev->asic_type >= CHIP_VEGA10 &&
+		   !(flags & AMDGPU_PTE_VALID) &&
+		   !(flags & AMDGPU_PTE_PRT)) {
+
+		/* Workaround for fault priority problem on GMC9 */
+		flags |= AMDGPU_PTE_EXECUTABLE;
 	}
 
 	amdgpu_vm_update_func(params, bo, pe, addr, count, incr, flags);
@@ -1676,9 +1679,9 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 			uint64_t upd_end = min(entry_end, frag_end);
 			unsigned nptes = (upd_end - frag_start) >> shift;
 
-			amdgpu_vm_update_huge(params, pt, cursor.level,
-					      pe_start, dst, nptes, incr,
-					      flags | AMDGPU_PTE_FRAG(frag));
+			amdgpu_vm_update_flags(params, pt, cursor.level,
+					       pe_start, dst, nptes, incr,
+					       flags | AMDGPU_PTE_FRAG(frag));
 
 			pe_start += nptes * 8;
 			dst += (uint64_t)nptes * AMDGPU_GPU_PAGE_SIZE << shift;
@@ -1747,22 +1750,29 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	params.adev = adev;
 	params.vm = vm;
 
-	/* sync to everything on unmapping */
+	/* sync to everything except eviction fences on unmapping */
 	if (!(flags & AMDGPU_PTE_VALID))
-		owner = AMDGPU_FENCE_OWNER_UNDEFINED;
+		owner = AMDGPU_FENCE_OWNER_KFD;
 
 	if (vm->use_cpu_for_update) {
 		/* params.src is used as flag to indicate system Memory */
 		if (pages_addr)
 			params.src = ~0;
 
-		/* Wait for PT BOs to be free. PTs share the same resv. object
+		/* Wait for PT BOs to be idle. PTs share the same resv. object
 		 * as the root PD BO
 		 */
-		r = amdgpu_vm_wait_pd(adev, vm, owner);
+		r = amdgpu_bo_sync_wait(vm->root.base.bo, owner, true);
 		if (unlikely(r))
 			return r;
 
+		/* Wait for any BO move to be completed */
+		if (exclusive) {
+			r = dma_fence_wait(exclusive, true);
+			if (unlikely(r))
+				return r;
+		}
+
 		params.func = amdgpu_vm_cpu_set_ptes;
 		params.pages_addr = pages_addr;
 		return amdgpu_vm_update_ptes(&params, start, last + 1,
@@ -1776,13 +1786,12 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	/*
 	 * reserve space for two commands every (1 << BLOCK_SIZE)
 	 *  entries or 2k dwords (whatever is smaller)
-         *
-         * The second command is for the shadow pagetables.
 	 */
+	ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1);
+
+	/* The second command is for the shadow pagetables. */
 	if (vm->root.base.bo->shadow)
-		ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1) * 2;
-	else
-		ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1);
+		ncmds *= 2;
 
 	/* padding, etc. */
 	ndw = 64;
@@ -1801,10 +1810,11 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 		ndw += ncmds * 10;
 
 		/* extra commands for begin/end fragments */
+		ncmds = 2 * adev->vm_manager.fragment_size;
 		if (vm->root.base.bo->shadow)
-		        ndw += 2 * 10 * adev->vm_manager.fragment_size * 2;
-		else
-		        ndw += 2 * 10 * adev->vm_manager.fragment_size;
+			ncmds *= 2;
+
+		ndw += 10 * ncmds;
 
 		params.func = amdgpu_vm_do_set_ptes;
 	}
@@ -3006,7 +3016,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	}
 	DRM_DEBUG_DRIVER("VM update mode is %s\n",
 			 vm->use_cpu_for_update ? "CPU" : "SDMA");
-	WARN_ONCE((vm->use_cpu_for_update & !amdgpu_gmc_vram_full_visible(&adev->gmc)),
+	WARN_ONCE((vm->use_cpu_for_update && !amdgpu_gmc_vram_full_visible(&adev->gmc)),
 		  "CPU update of VM recommended only for large BAR system\n");
 	vm->last_update = NULL;
 
@@ -3025,13 +3035,14 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	if (r)
 		goto error_unreserve;
 
+	amdgpu_vm_bo_base_init(&vm->root.base, vm, root);
+
 	r = amdgpu_vm_clear_bo(adev, vm, root,
 			       adev->vm_manager.root_level,
 			       vm->pte_support_ats);
 	if (r)
 		goto error_unreserve;
 
-	amdgpu_vm_bo_base_init(&vm->root.base, vm, root);
 	amdgpu_bo_unreserve(vm->root.base.bo);
 
 	if (pasid) {
@@ -3136,7 +3147,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, uns
 	vm->pte_support_ats = pte_support_ats;
 	DRM_DEBUG_DRIVER("VM update mode is %s\n",
 			 vm->use_cpu_for_update ? "CPU" : "SDMA");
-	WARN_ONCE((vm->use_cpu_for_update & !amdgpu_gmc_vram_full_visible(&adev->gmc)),
+	WARN_ONCE((vm->use_cpu_for_update && !amdgpu_gmc_vram_full_visible(&adev->gmc)),
 		  "CPU update of VM recommended only for large BAR system\n");
 
 	if (vm->pasid) {
@@ -3366,14 +3377,15 @@ void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned int pasid,
 			 struct amdgpu_task_info *task_info)
 {
 	struct amdgpu_vm *vm;
+	unsigned long flags;
 
-	spin_lock(&adev->vm_manager.pasid_lock);
+	spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
 
 	vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
 	if (vm)
 		*task_info = vm->task_info;
 
-	spin_unlock(&adev->vm_manager.pasid_lock);
+	spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
 }
 
 /**