From 7420f482ea5163bf6dae39a5c7628d5397cd6307 Mon Sep 17 00:00:00 2001
From: Oded Gabbay
Date: Thu, 15 Mar 2018 10:08:35 +0200
Subject: drm/amdkfd: add missing include of mm.h

This patch fixes kernel build in ARCH=frv

Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index d7509b706b26..ad7292fc454c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -26,6 +26,7 @@
 #define AMDGPU_AMDKFD_H_INCLUDED
 
 #include <linux/types.h>
+#include <linux/mm.h>
 #include <linux/mmu_context.h>
 #include <kgd_kfd_interface.h>
 #include <drm/ttm/ttm_execbuf_util.h>
-- 
cgit v1.2.3-55-g7522


From 48a44387189ce575cbfe48b5467884b6576cdaf2 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann
Date: Thu, 15 Mar 2018 17:49:40 +0100
Subject: drm/amdkfd: fix uninitialized variable use

When CONFIG_ACPI is disabled, we never initialize the acpi_table
structure in kfd_create_crat_image_virtual:

drivers/gpu/drm/amd/amdkfd/kfd_crat.c: In function 'kfd_create_crat_image_virtual':
drivers/gpu/drm/amd/amdkfd/kfd_crat.c:888:40: error: 'acpi_table' may be used uninitialized in this function [-Werror=maybe-uninitialized]

The undefined behavior also happens for any other acpi_get_table()
failure, but then the compiler can't warn about it.

This adds an error check that prevents the structure from
being used in error, avoiding both the undefined behavior and
the warning about it.

Fixes: 520b8fb755cc ("drm/amdkfd: Add topology support for CPUs")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 7493f47e7fe1..d85112224f1d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -882,7 +882,7 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size)
 	crat_table->length = sizeof(struct crat_header);
 
 	status = acpi_get_table("DSDT", 0, &acpi_table);
-	if (status == AE_NOT_FOUND)
+	if (status != AE_OK)
 		pr_warn("DSDT table not found for OEM information\n");
 	else {
 		crat_table->oem_revision = acpi_table->revision;
-- 
cgit v1.2.3-55-g7522


From 5b21d3e5fd2110df5a57a508188308351e698a5a Mon Sep 17 00:00:00 2001
From: Felix Kuehling
Date: Thu, 15 Mar 2018 17:27:40 -0400
Subject: drm/amdgpu: Move KFD-specific fields into struct amdgpu_vm

Remove struct amdkfd_vm and move the fields into struct amdgpu_vm.
This will allow turning a VM created by a DRM render node into a
KFD VM.

v2: Removed vm_context field

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h       | 21 ------
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 88 +++++++++++-------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h           |  9 +++
 3 files changed, 50 insertions(+), 68 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index ad7292fc454c..6ba3dd689875 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -93,27 +93,6 @@ struct amdkfd_process_info {
 	struct amdgpu_amdkfd_fence *eviction_fence;
 };
 
-/* struct amdkfd_vm -
- * For Memory Eviction KGD requires a mechanism to keep track of all KFD BOs
- * belonging to a KFD process. All the VMs belonging to the same process point
- * to the same amdkfd_process_info.
- */
-struct amdkfd_vm {
-	/* Keep base as the first parameter for pointer compatibility between
-	 * amdkfd_vm and amdgpu_vm.
-	 */
-	struct amdgpu_vm base;
-
-	/* List node in amdkfd_process_info.vm_list_head*/
-	struct list_head vm_list_node;
-
-	struct amdgpu_device *adev;
-	/* Points to the KFD process VM info*/
-	struct amdkfd_process_info *process_info;
-
-	uint64_t pd_phys_addr;
-};
-
 int amdgpu_amdkfd_init(void);
 void amdgpu_amdkfd_fini(void);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index a12a1654e124..64c33acb6cd8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -333,9 +333,9 @@ static int amdgpu_amdkfd_validate(void *param, struct amdgpu_bo *bo)
  * again. Page directories are only updated after updating page
  * tables.
  */
-static int vm_validate_pt_pd_bos(struct amdkfd_vm *vm)
+static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm)
 {
-	struct amdgpu_bo *pd = vm->base.root.base.bo;
+	struct amdgpu_bo *pd = vm->root.base.bo;
 	struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
 	struct amdgpu_vm_parser param;
 	uint64_t addr, flags = AMDGPU_PTE_VALID;
@@ -344,7 +344,7 @@ static int vm_validate_pt_pd_bos(struct amdkfd_vm *vm)
 	param.domain = AMDGPU_GEM_DOMAIN_VRAM;
 	param.wait = false;
 
-	ret = amdgpu_vm_validate_pt_bos(adev, &vm->base, amdgpu_amdkfd_validate,
+	ret = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_amdkfd_validate,
 					&param);
 	if (ret) {
 		pr_err("amdgpu: failed to validate PT BOs\n");
@@ -357,11 +357,11 @@ static int vm_validate_pt_pd_bos(struct amdkfd_vm *vm)
 		return ret;
 	}
 
-	addr = amdgpu_bo_gpu_offset(vm->base.root.base.bo);
+	addr = amdgpu_bo_gpu_offset(vm->root.base.bo);
 	amdgpu_gmc_get_vm_pde(adev, -1, &addr, &flags);
 	vm->pd_phys_addr = addr;
 
-	if (vm->base.use_cpu_for_update) {
+	if (vm->use_cpu_for_update) {
 		ret = amdgpu_bo_kmap(pd, NULL);
 		if (ret) {
 			pr_err("amdgpu: failed to kmap PD, ret=%d\n", ret);
@@ -415,14 +415,12 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
  * 4a.  Validate new page tables and directories
  */
 static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,
-		struct amdgpu_vm *avm, bool is_aql,
+		struct amdgpu_vm *vm, bool is_aql,
 		struct kfd_bo_va_list **p_bo_va_entry)
 {
 	int ret;
 	struct kfd_bo_va_list *bo_va_entry;
-	struct amdkfd_vm *kvm = container_of(avm,
-					     struct amdkfd_vm, base);
-	struct amdgpu_bo *pd = avm->root.base.bo;
+	struct amdgpu_bo *pd = vm->root.base.bo;
 	struct amdgpu_bo *bo = mem->bo;
 	uint64_t va = mem->va;
 	struct list_head *list_bo_va = &mem->bo_va_list;
@@ -441,10 +439,10 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,
 		return -ENOMEM;
 
 	pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va,
-			va + bo_size, avm);
+			va + bo_size, vm);
 
 	/* Add BO to VM internal data structures*/
-	bo_va_entry->bo_va = amdgpu_vm_bo_add(adev, avm, bo);
+	bo_va_entry->bo_va = amdgpu_vm_bo_add(adev, vm, bo);
 	if (!bo_va_entry->bo_va) {
 		ret = -EINVAL;
 		pr_err("Failed to add BO object to VM. ret == %d\n",
@@ -467,28 +465,28 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,
 	 * fence, so remove it temporarily.
 	 */
 	amdgpu_amdkfd_remove_eviction_fence(pd,
-					kvm->process_info->eviction_fence,
+					vm->process_info->eviction_fence,
 					NULL, NULL);
 
-	ret = amdgpu_vm_alloc_pts(adev, avm, va, amdgpu_bo_size(bo));
+	ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo));
 	if (ret) {
 		pr_err("Failed to allocate pts, err=%d\n", ret);
 		goto err_alloc_pts;
 	}
 
-	ret = vm_validate_pt_pd_bos(kvm);
+	ret = vm_validate_pt_pd_bos(vm);
 	if (ret) {
 		pr_err("validate_pt_pd_bos() failed\n");
 		goto err_alloc_pts;
 	}
 
 	/* Add the eviction fence back */
-	amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true);
+	amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true);
 
 	return 0;
 
 err_alloc_pts:
-	amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true);
+	amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true);
 	amdgpu_vm_bo_rmv(adev, bo_va_entry->bo_va);
 	list_del(&bo_va_entry->bo_list);
 err_vmadd:
@@ -703,7 +701,6 @@ static int unmap_bo_from_gpuvm(struct amdgpu_device *adev,
 {
 	struct amdgpu_bo_va *bo_va = entry->bo_va;
 	struct amdgpu_vm *vm = bo_va->base.vm;
-	struct amdkfd_vm *kvm = container_of(vm, struct amdkfd_vm, base);
 	struct amdgpu_bo *pd = vm->root.base.bo;
 
 	/* Remove eviction fence from PD (and thereby from PTs too as
@@ -713,14 +710,14 @@ static int unmap_bo_from_gpuvm(struct amdgpu_device *adev,
 	 * trigger the eviction fence.
 	 */
 	amdgpu_amdkfd_remove_eviction_fence(pd,
-					    kvm->process_info->eviction_fence,
+					    vm->process_info->eviction_fence,
 					    NULL, NULL);
 	amdgpu_vm_bo_unmap(adev, bo_va, entry->va);
 
 	amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);
 
 	/* Add the eviction fence back */
-	amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true);
+	amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true);
 
 	sync_vm_fence(adev, sync, bo_va->last_pt_update);
 
@@ -780,7 +777,7 @@ update_gpuvm_pte_failed:
 
 static int process_validate_vms(struct amdkfd_process_info *process_info)
 {
-	struct amdkfd_vm *peer_vm;
+	struct amdgpu_vm *peer_vm;
 	int ret;
 
 	list_for_each_entry(peer_vm, &process_info->vm_list_head,
@@ -796,12 +793,12 @@ static int process_validate_vms(struct amdkfd_process_info *process_info)
 static int process_update_pds(struct amdkfd_process_info *process_info,
 			      struct amdgpu_sync *sync)
 {
-	struct amdkfd_vm *peer_vm;
+	struct amdgpu_vm *peer_vm;
 	int ret;
 
 	list_for_each_entry(peer_vm, &process_info->vm_list_head,
 			    vm_list_node) {
-		ret = vm_update_pds(&peer_vm->base, sync);
+		ret = vm_update_pds(peer_vm, sync);
 		if (ret)
 			return ret;
 	}
@@ -814,7 +811,7 @@ int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
 					  struct dma_fence **ef)
 {
 	int ret;
-	struct amdkfd_vm *new_vm;
+	struct amdgpu_vm *new_vm;
 	struct amdkfd_process_info *info;
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 
@@ -823,12 +820,11 @@ int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
 		return -ENOMEM;
 
 	/* Initialize the VM context, allocate the page directory and zero it */
-	ret = amdgpu_vm_init(adev, &new_vm->base, AMDGPU_VM_CONTEXT_COMPUTE, 0);
+	ret = amdgpu_vm_init(adev, new_vm, AMDGPU_VM_CONTEXT_COMPUTE, 0);
 	if (ret) {
 		pr_err("Failed init vm ret %d\n", ret);
 		goto vm_init_fail;
 	}
-	new_vm->adev = adev;
 
 	if (!*process_info) {
 		info = kzalloc(sizeof(*info), GFP_KERNEL);
@@ -871,7 +867,7 @@ create_evict_fence_fail:
 	mutex_destroy(&info->lock);
 	kfree(info);
 alloc_process_info_fail:
-	amdgpu_vm_fini(adev, &new_vm->base);
+	amdgpu_vm_fini(adev, new_vm);
 vm_init_fail:
 	kfree(new_vm);
 	return ret;
@@ -881,8 +877,7 @@ vm_init_fail:
 void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm)
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
-	struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *) vm;
-	struct amdgpu_vm *avm = &kfd_vm->base;
+	struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
 	struct amdgpu_bo *pd;
 	struct amdkfd_process_info *process_info;
 
@@ -896,11 +891,11 @@ void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm)
 	amdgpu_bo_fence(pd, NULL, false);
 	amdgpu_bo_unreserve(pd);
 
-	process_info = kfd_vm->process_info;
+	process_info = avm->process_info;
 
 	mutex_lock(&process_info->lock);
 	process_info->n_vms--;
-	list_del(&kfd_vm->vm_list_node);
+	list_del(&avm->vm_list_node);
 	mutex_unlock(&process_info->lock);
 
 	/* Release per-process resources */
@@ -919,7 +914,7 @@ void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm)
 
 uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm)
 {
-	struct amdkfd_vm *avm = (struct amdkfd_vm *)vm;
+	struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
 
 	return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT;
 }
@@ -930,7 +925,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 		uint64_t *offset, uint32_t flags)
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
-	struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *)vm;
+	struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
 	struct amdgpu_bo *bo;
 	int byte_align;
 	u32 alloc_domain;
@@ -1010,8 +1005,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 	(*mem)->va = va;
 	(*mem)->domain = alloc_domain;
 	(*mem)->mapped_to_gpu_memory = 0;
-	(*mem)->process_info = kfd_vm->process_info;
-	add_kgd_mem_to_kfd_bo_list(*mem, kfd_vm->process_info);
+	(*mem)->process_info = avm->process_info;
+	add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info);
 
 	if (offset)
 		*offset = amdgpu_bo_mmap_offset(bo);
@@ -1092,7 +1087,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 		struct kgd_dev *kgd, struct kgd_mem *mem, void *vm)
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
-	struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *)vm;
+	struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
 	int ret;
 	struct amdgpu_bo *bo;
 	uint32_t domain;
@@ -1128,19 +1123,19 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 	if (unlikely(ret))
 		goto out;
 
-	if (check_if_add_bo_to_vm((struct amdgpu_vm *)vm, mem)) {
-		ret = add_bo_to_vm(adev, mem, (struct amdgpu_vm *)vm, false,
+	if (check_if_add_bo_to_vm(avm, mem)) {
+		ret = add_bo_to_vm(adev, mem, avm, false,
 				&bo_va_entry);
 		if (ret)
 			goto add_bo_to_vm_failed;
 		if (mem->aql_queue) {
-			ret = add_bo_to_vm(adev, mem, (struct amdgpu_vm *)vm,
+			ret = add_bo_to_vm(adev, mem, avm,
 					true, &bo_va_entry_aql);
 			if (ret)
 				goto add_bo_to_vm_failed_aql;
 		}
 	} else {
-		ret = vm_validate_pt_pd_bos((struct amdkfd_vm *)vm);
+		ret = vm_validate_pt_pd_bos(avm);
 		if (unlikely(ret))
 			goto add_bo_to_vm_failed;
 	}
@@ -1184,7 +1179,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 
 	if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->pin_count)
 		amdgpu_bo_fence(bo,
-				&kfd_vm->process_info->eviction_fence->base,
+				&avm->process_info->eviction_fence->base,
 				true);
 	ret = unreserve_bo_and_vms(&ctx, false, false);
 
@@ -1209,7 +1204,7 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 	struct amdkfd_process_info *process_info =
-		((struct amdkfd_vm *)vm)->process_info;
+		((struct amdgpu_vm *)vm)->process_info;
 	unsigned long bo_size = mem->bo->tbo.mem.size;
 	struct kfd_bo_va_list *entry;
 	struct bo_vm_reservation_context ctx;
@@ -1226,7 +1221,7 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
 		goto unreserve_out;
 	}
 
-	ret = vm_validate_pt_pd_bos((struct amdkfd_vm *)vm);
+	ret = vm_validate_pt_pd_bos((struct amdgpu_vm *)vm);
 	if (unlikely(ret))
 		goto unreserve_out;
 
@@ -1368,7 +1363,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
 {
 	struct amdgpu_bo_list_entry *pd_bo_list;
 	struct amdkfd_process_info *process_info = info;
-	struct amdkfd_vm *peer_vm;
+	struct amdgpu_vm *peer_vm;
 	struct kgd_mem *mem;
 	struct bo_vm_reservation_context ctx;
 	struct amdgpu_amdkfd_fence *new_fence;
@@ -1390,8 +1385,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
 	mutex_lock(&process_info->lock);
 	list_for_each_entry(peer_vm, &process_info->vm_list_head,
 			vm_list_node)
-		amdgpu_vm_get_pd_bo(&peer_vm->base, &ctx.list,
-				    &pd_bo_list[i++]);
+		amdgpu_vm_get_pd_bo(peer_vm, &ctx.list, &pd_bo_list[i++]);
 
 	/* Reserve all BOs and page tables/directory. Add all BOs from
 	 * kfd_bo_list to ctx.list
@@ -1422,7 +1416,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
 	/* FIXME: I think this isn't needed */
 	list_for_each_entry(peer_vm, &process_info->vm_list_head,
 			    vm_list_node) {
-		struct amdgpu_bo *bo = peer_vm->base.root.base.bo;
+		struct amdgpu_bo *bo = peer_vm->root.base.bo;
 
 		ttm_bo_wait(&bo->tbo, false, false);
 	}
@@ -1491,7 +1485,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
 	/* Attach eviction fence to PD / PT BOs */
 	list_for_each_entry(peer_vm, &process_info->vm_list_head,
 			    vm_list_node) {
-		struct amdgpu_bo *bo = peer_vm->base.root.base.bo;
+		struct amdgpu_bo *bo = peer_vm->root.base.bo;
 
 		amdgpu_bo_fence(bo, &process_info->eviction_fence->base, true);
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index e9841518343e..4dfdefb7de1f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -207,6 +207,15 @@ struct amdgpu_vm {
 
 	/* Limit non-retry fault storms */
 	unsigned int		fault_credit;
+
+	/* Points to the KFD process VM info */
+	struct amdkfd_process_info *process_info;
+
+	/* List node in amdkfd_process_info.vm_list_head */
+	struct list_head	vm_list_node;
+
+	/* Valid while the PD is reserved or fenced */
+	uint64_t		pd_phys_addr;
 };
 
 struct amdgpu_vm_manager {
-- 
cgit v1.2.3-55-g7522


From 3486625bbfb629621f8a2402e2b24e7ab0a86ef4 Mon Sep 17 00:00:00 2001
From: Felix Kuehling
Date: Thu, 15 Mar 2018 17:27:41 -0400
Subject: drm/amdgpu: Fix initial validation of PD BO for KFD VMs

Make sure the PD BO is valid and attach the eviction fence during VM
creation. This ensures that the pd_phys_address is actually valid
and an eviction that would invalidate it triggers a KFD process
eviction like it should.

v2: Use uninterruptible waiting in initial PD validation

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 64c33acb6cd8..cafe8a571636 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -812,7 +812,7 @@ int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
 {
 	int ret;
 	struct amdgpu_vm *new_vm;
-	struct amdkfd_process_info *info;
+	struct amdkfd_process_info *info = NULL;
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 
 	new_vm = kzalloc(sizeof(*new_vm), GFP_KERNEL);
@@ -851,6 +851,23 @@ int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
 
 	new_vm->process_info = *process_info;
 
+	/* Validate page directory and attach eviction fence */
+	ret = amdgpu_bo_reserve(new_vm->root.base.bo, true);
+	if (ret)
+		goto reserve_pd_fail;
+	ret = vm_validate_pt_pd_bos(new_vm);
+	if (ret) {
+		pr_err("validate_pt_pd_bos() failed\n");
+		goto validate_pd_fail;
+	}
+	ret = ttm_bo_wait(&new_vm->root.base.bo->tbo, false, false);
+	if (ret)
+		goto wait_pd_fail;
+	amdgpu_bo_fence(new_vm->root.base.bo,
+			&new_vm->process_info->eviction_fence->base, true);
+	amdgpu_bo_unreserve(new_vm->root.base.bo);
+
+	/* Update process info */
 	mutex_lock(&new_vm->process_info->lock);
 	list_add_tail(&new_vm->vm_list_node,
 			&(new_vm->process_info->vm_list_head));
@@ -863,6 +880,10 @@ int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
 
 	return ret;
 
+wait_pd_fail:
+validate_pd_fail:
+	amdgpu_bo_unreserve(new_vm->root.base.bo);
+reserve_pd_fail:
 create_evict_fence_fail:
 	mutex_destroy(&info->lock);
 	kfree(info);
-- 
cgit v1.2.3-55-g7522


From b236fa1d339670cc997b68c31be57855bbabc126 Mon Sep 17 00:00:00 2001
From: Felix Kuehling
Date: Thu, 15 Mar 2018 17:27:42 -0400
Subject: drm/amdgpu: Add helper to turn an existing VM into a compute VM

v2: Removed updating and checking of vm->vm_context
v3: Enable amdgpu_vm_clear_bo in amdgpu_vm_make_compute

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 67 ++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  1 +
 2 files changed, 68 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 24474294c92a..ea39ccf288ed 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2461,6 +2461,73 @@ error_free_sched_entity:
 	return r;
 }
 
+/**
+ * amdgpu_vm_make_compute - Turn a GFX VM into a compute VM
+ *
+ * This only works on GFX VMs that don't have any BOs added and no
+ * page tables allocated yet.
+ *
+ * Changes the following VM parameters:
+ * - use_cpu_for_update
+ * - pte_supports_ats
+ * - pasid (old PASID is released, because compute manages its own PASIDs)
+ *
+ * Reinitializes the page directory to reflect the changed ATS
+ * setting. May leave behind an unused shadow BO for the page
+ * directory when switching from SDMA updates to CPU updates.
+ *
+ * Returns 0 for success, -errno for errors.
+ */
+int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+{
+	bool pte_support_ats = (adev->asic_type == CHIP_RAVEN);
+	int r;
+
+	r = amdgpu_bo_reserve(vm->root.base.bo, true);
+	if (r)
+		return r;
+
+	/* Sanity checks */
+	if (!RB_EMPTY_ROOT(&vm->va.rb_root) || vm->root.entries) {
+		r = -EINVAL;
+		goto error;
+	}
+
+	/* Check if PD needs to be reinitialized and do it before
+	 * changing any other state, in case it fails.
+	 */
+	if (pte_support_ats != vm->pte_support_ats) {
+		r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo,
+			       adev->vm_manager.root_level,
+			       pte_support_ats);
+		if (r)
+			goto error;
+	}
+
+	/* Update VM state */
+	vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
+				    AMDGPU_VM_USE_CPU_FOR_COMPUTE);
+	vm->pte_support_ats = pte_support_ats;
+	DRM_DEBUG_DRIVER("VM update mode is %s\n",
+			 vm->use_cpu_for_update ? "CPU" : "SDMA");
+	WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)),
+		  "CPU update of VM recommended only for large BAR system\n");
+
+	if (vm->pasid) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
+		idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
+		spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
+
+		vm->pasid = 0;
+	}
+
+error:
+	amdgpu_bo_unreserve(vm->root.base.bo);
+	return r;
+}
+
 /**
  * amdgpu_vm_free_levels - free PD/PT levels
  *
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 4dfdefb7de1f..30f080364c97 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -260,6 +260,7 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev);
 void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
 int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		   int vm_context, unsigned int pasid);
+int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
 void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
 bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev,
 				  unsigned int pasid);
-- 
cgit v1.2.3-55-g7522


From ede0dd86f45adf2b7083bb161f6bc81da5fe2bad Mon Sep 17 00:00:00 2001
From: Felix Kuehling
Date: Thu, 15 Mar 2018 17:27:43 -0400
Subject: drm/amdgpu: Add kfd2kgd interface to acquire an existing VM

This allows acquiring an existing VM from a render node FD to use it
for a compute process.

Such VMs get destroyed when the original file descriptor is released.
Added a callback from amdgpu_vm_fini to handle KFD VM destruction
correctly in this case.

v2:
* Removed vm->vm_context check in amdgpu_amdkfd_gpuvm_destroy_cb,
  check vm->process_info earlier instead

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h        |   6 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c |   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c |   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 166 +++++++++++++++-------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c            |   3 +
 drivers/gpu/drm/amd/include/kgd_kfd_interface.h   |   2 +
 6 files changed, 124 insertions(+), 55 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 6ba3dd689875..c2c2bea731e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -145,6 +145,12 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
 int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
 					  void **process_info,
 					  struct dma_fence **ef);
+int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
+					   struct file *filp,
+					   void **vm, void **process_info,
+					   struct dma_fence **ef);
+void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
+				    struct amdgpu_vm *vm);
 void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm);
 uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm);
 int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index 7485c376b90e..ea54e53172b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -205,6 +205,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
 	.get_cu_info = get_cu_info,
 	.get_vram_usage = amdgpu_amdkfd_get_vram_usage,
 	.create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
+	.acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
 	.destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
 	.get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
 	.set_vm_context_page_table_base = set_vm_context_page_table_base,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index 7be453494423..89264c9a5e9f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -165,6 +165,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
 	.get_cu_info = get_cu_info,
 	.get_vram_usage = amdgpu_amdkfd_get_vram_usage,
 	.create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
+	.acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
 	.destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
 	.get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
 	.set_vm_context_page_table_base = set_vm_context_page_table_base,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index cafe8a571636..1d6e1479da38 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -806,32 +806,16 @@ static int process_update_pds(struct amdkfd_process_info *process_info,
 	return 0;
 }
 
-int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
-					  void **process_info,
-					  struct dma_fence **ef)
+static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
+		       struct dma_fence **ef)
 {
-	int ret;
-	struct amdgpu_vm *new_vm;
 	struct amdkfd_process_info *info = NULL;
-	struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
-	new_vm = kzalloc(sizeof(*new_vm), GFP_KERNEL);
-	if (!new_vm)
-		return -ENOMEM;
-
-	/* Initialize the VM context, allocate the page directory and zero it */
-	ret = amdgpu_vm_init(adev, new_vm, AMDGPU_VM_CONTEXT_COMPUTE, 0);
-	if (ret) {
-		pr_err("Failed init vm ret %d\n", ret);
-		goto vm_init_fail;
-	}
+	int ret;
 
 	if (!*process_info) {
 		info = kzalloc(sizeof(*info), GFP_KERNEL);
-		if (!info) {
-			ret = -ENOMEM;
-			goto alloc_process_info_fail;
-		}
+		if (!info)
+			return -ENOMEM;
 
 		mutex_init(&info->lock);
 		INIT_LIST_HEAD(&info->vm_list_head);
@@ -842,6 +826,7 @@ int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
 						   current->mm);
 		if (!info->eviction_fence) {
 			pr_err("Failed to create eviction fence\n");
+			ret = -ENOMEM;
 			goto create_evict_fence_fail;
 		}
 
@@ -849,77 +834,137 @@ int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
 		*ef = dma_fence_get(&info->eviction_fence->base);
 	}
 
-	new_vm->process_info = *process_info;
+	vm->process_info = *process_info;
 
 	/* Validate page directory and attach eviction fence */
-	ret = amdgpu_bo_reserve(new_vm->root.base.bo, true);
+	ret = amdgpu_bo_reserve(vm->root.base.bo, true);
 	if (ret)
 		goto reserve_pd_fail;
-	ret = vm_validate_pt_pd_bos(new_vm);
+	ret = vm_validate_pt_pd_bos(vm);
 	if (ret) {
 		pr_err("validate_pt_pd_bos() failed\n");
 		goto validate_pd_fail;
 	}
-	ret = ttm_bo_wait(&new_vm->root.base.bo->tbo, false, false);
+	ret = ttm_bo_wait(&vm->root.base.bo->tbo, false, false);
 	if (ret)
 		goto wait_pd_fail;
-	amdgpu_bo_fence(new_vm->root.base.bo,
-			&new_vm->process_info->eviction_fence->base, true);
-	amdgpu_bo_unreserve(new_vm->root.base.bo);
+	amdgpu_bo_fence(vm->root.base.bo,
+			&vm->process_info->eviction_fence->base, true);
+	amdgpu_bo_unreserve(vm->root.base.bo);
 
 	/* Update process info */
-	mutex_lock(&new_vm->process_info->lock);
-	list_add_tail(&new_vm->vm_list_node,
-			&(new_vm->process_info->vm_list_head));
-	new_vm->process_info->n_vms++;
-	mutex_unlock(&new_vm->process_info->lock);
-
-	*vm = (void *) new_vm;
-
-	pr_debug("Created process vm %p\n", *vm);
+	mutex_lock(&vm->process_info->lock);
+	list_add_tail(&vm->vm_list_node,
+			&(vm->process_info->vm_list_head));
+	vm->process_info->n_vms++;
+	mutex_unlock(&vm->process_info->lock);
 
-	return ret;
+	return 0;
 
 wait_pd_fail:
 validate_pd_fail:
-	amdgpu_bo_unreserve(new_vm->root.base.bo);
+	amdgpu_bo_unreserve(vm->root.base.bo);
 reserve_pd_fail:
+	vm->process_info = NULL;
+	if (info) {
+		/* Two fence references: one in info and one in *ef */
+		dma_fence_put(&info->eviction_fence->base);
+		dma_fence_put(*ef);
+		*ef = NULL;
+		*process_info = NULL;
 create_evict_fence_fail:
-	mutex_destroy(&info->lock);
-	kfree(info);
-alloc_process_info_fail:
+		mutex_destroy(&info->lock);
+		kfree(info);
+	}
+	return ret;
+}
+
+int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
+					  void **process_info,
+					  struct dma_fence **ef)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	struct amdgpu_vm *new_vm;
+	int ret;
+
+	new_vm = kzalloc(sizeof(*new_vm), GFP_KERNEL);
+	if (!new_vm)
+		return -ENOMEM;
+
+	/* Initialize AMDGPU part of the VM */
+	ret = amdgpu_vm_init(adev, new_vm, AMDGPU_VM_CONTEXT_COMPUTE, 0);
+	if (ret) {
+		pr_err("Failed init vm ret %d\n", ret);
+		goto amdgpu_vm_init_fail;
+	}
+
+	/* Initialize KFD part of the VM and process info */
+	ret = init_kfd_vm(new_vm, process_info, ef);
+	if (ret)
+		goto init_kfd_vm_fail;
+
+	*vm = (void *) new_vm;
+
+	return 0;
+
+init_kfd_vm_fail:
 	amdgpu_vm_fini(adev, new_vm);
-vm_init_fail:
+amdgpu_vm_init_fail:
 	kfree(new_vm);
 	return ret;
-
 }
 
-void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm)
+int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
+					   struct file *filp,
+					   void **vm, void **process_info,
+					   struct dma_fence **ef)
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
-	struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
-	struct amdgpu_bo *pd;
-	struct amdkfd_process_info *process_info;
+	struct drm_file *drm_priv = filp->private_data;
+	struct amdgpu_fpriv *drv_priv = drm_priv->driver_priv;
+	struct amdgpu_vm *avm = &drv_priv->vm;
+	int ret;
 
-	if (WARN_ON(!kgd || !vm))
+	/* Already a compute VM? */
+	if (avm->process_info)
+		return -EINVAL;
+
+	/* Convert VM into a compute VM */
+	ret = amdgpu_vm_make_compute(adev, avm);
+	if (ret)
+		return ret;
+
+	/* Initialize KFD part of the VM and process info */
+	ret = init_kfd_vm(avm, process_info, ef);
+	if (ret)
+		return ret;
+
+	*vm = (void *)avm;
+
+	return 0;
+}
+
+void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
+				    struct amdgpu_vm *vm)
+{
+	struct amdkfd_process_info *process_info = vm->process_info;
+	struct amdgpu_bo *pd = vm->root.base.bo;
+
+	if (!process_info)
 		return;
 
-	pr_debug("Destroying process vm %p\n", vm);
 	/* Release eviction fence from PD */
-	pd = avm->root.base.bo;
 	amdgpu_bo_reserve(pd, false);
 	amdgpu_bo_fence(pd, NULL, false);
 	amdgpu_bo_unreserve(pd);
 
-	process_info = avm->process_info;
-
+	/* Update process info */
 	mutex_lock(&process_info->lock);
 	process_info->n_vms--;
-	list_del(&avm->vm_list_node);
+	list_del(&vm->vm_list_node);
 	mutex_unlock(&process_info->lock);
 
-	/* Release per-process resources */
+	/* Release per-process resources when last compute VM is destroyed */
 	if (!process_info->n_vms) {
 		WARN_ON(!list_empty(&process_info->kfd_bo_list));
 
@@ -927,6 +972,17 @@ void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm)
 		mutex_destroy(&process_info->lock);
 		kfree(process_info);
 	}
+}
+
+void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
+
+	if (WARN_ON(!kgd || !vm))
+		return;
+
+	pr_debug("Destroying process vm %p\n", vm);
 
 	/* Release the VM context */
 	amdgpu_vm_fini(adev, avm);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index ea39ccf288ed..cbf1421c1963 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -32,6 +32,7 @@
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
+#include "amdgpu_amdkfd.h"
 
 /*
  * GPUVM
@@ -2575,6 +2576,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	u64 fault;
 	int i, r;
 
+	amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm);
+
 	/* Clear pending page faults from IH when the VM is destroyed */
 	while (kfifo_get(&vm->faults, &fault))
 		amdgpu_ih_clear_fault(adev, fault);
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index 1e5c22ceb256..b1f35c8be2cf 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -336,6 +336,8 @@ struct kfd2kgd_calls {
 
 	int (*create_process_vm)(struct kgd_dev *kgd, void **vm,
 			void **process_info, struct dma_fence **ef);
+	int (*acquire_process_vm)(struct kgd_dev *kgd, struct file *filp,
+			void **vm, void **process_info, struct dma_fence **ef);
 	void (*destroy_process_vm)(struct kgd_dev *kgd, void *vm);
 	uint32_t (*get_process_page_dir)(void *vm);
 	void (*set_vm_context_page_table_base)(struct kgd_dev *kgd,
-- 
cgit v1.2.3-55-g7522


From b84394e206a748ce34beeed462f237474f3c6c00 Mon Sep 17 00:00:00 2001
From: Felix Kuehling
Date: Thu, 15 Mar 2018 17:27:44 -0400
Subject: drm/amdkfd: Create KFD VMs on demand

Instead of creating all VMs on process creation, create them when
a process is bound to a device. This will later allow registering
an existing VM from a DRM render node FD at runtime, before the
process is bound to the device. This way the render node VM can be
used for KFD instead of creating our own redundant VM.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h    |  3 ++
 drivers/gpu/drm/amd/amdkfd/kfd_process.c | 60 ++++++++++++++++++++++++++------
 2 files changed, 53 insertions(+), 10 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index cac7aa258162..014d608226d5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -536,6 +536,7 @@ struct kfd_process_device {
 	uint64_t scratch_limit;
 
 	/* VM context for GPUVM allocations */
+	struct file *drm_file;
 	void *vm;
 
 	/* Flag used to tell the pdd has dequeued from the dqm.
@@ -661,6 +662,8 @@ void kfd_unref_process(struct kfd_process *p);
 void kfd_suspend_all_processes(void);
 int kfd_resume_all_processes(void);
 
+int kfd_process_device_init_vm(struct kfd_process_device *pdd,
+			       struct file *drm_file);
 struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
 						struct kfd_process *p);
 struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 18b2b86ad503..6618aaa6b84f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -30,6 +30,7 @@
 #include <linux/notifier.h>
 #include <linux/compat.h>
 #include <linux/mman.h>
+#include <linux/file.h>
 
 struct mm_struct;
 
@@ -158,7 +159,9 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
 		pr_debug("Releasing pdd (topology id %d) for process (pasid %d)\n",
 				pdd->dev->id, p->pasid);
 
-		if (pdd->vm)
+		if (pdd->drm_file)
+			fput(pdd->drm_file);
+		else if (pdd->vm)
 			pdd->dev->kfd2kgd->destroy_process_vm(
 				pdd->dev->kgd, pdd->vm);
 
@@ -418,18 +421,51 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
 	pdd->already_dequeued = false;
 	list_add(&pdd->per_device_list, &p->per_device_data);
 
-	/* Create the GPUVM context for this specific device */
-	if (dev->kfd2kgd->create_process_vm(dev->kgd, &pdd->vm,
-					    &p->kgd_process_info, &p->ef)) {
+	return pdd;
+}
+
+/**
+ * kfd_process_device_init_vm - Initialize a VM for a process-device
+ *
+ * @pdd: The process-device
+ * @drm_file: Optional pointer to a DRM file descriptor
+ *
+ * If @drm_file is specified, it will be used to acquire the VM from
+ * that file descriptor. If successful, the @pdd takes ownership of
+ * the file descriptor.
+ *
+ * If @drm_file is NULL, a new VM is created.
+ *
+ * Returns 0 on success, -errno on failure.
+ */
+int kfd_process_device_init_vm(struct kfd_process_device *pdd,
+			       struct file *drm_file)
+{
+	struct kfd_process *p;
+	struct kfd_dev *dev;
+	int ret;
+
+	if (pdd->vm)
+		return drm_file ? -EBUSY : 0;
+
+	p = pdd->process;
+	dev = pdd->dev;
+
+	if (drm_file)
+		ret = dev->kfd2kgd->acquire_process_vm(
+			dev->kgd, drm_file,
+			&pdd->vm, &p->kgd_process_info, &p->ef);
+	else
+		ret = dev->kfd2kgd->create_process_vm(
+			dev->kgd, &pdd->vm, &p->kgd_process_info, &p->ef);
+	if (ret) {
 		pr_err("Failed to create process VM object\n");
-		goto err_create_pdd;
+		return ret;
 	}
-	return pdd;
 
-err_create_pdd:
-	list_del(&pdd->per_device_list);
-	kfree(pdd);
-	return NULL;
+	pdd->drm_file = drm_file;
+
+	return 0;
 }
 
 /*
@@ -455,6 +491,10 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
 	if (err)
 		return ERR_PTR(err);
 
+	err = kfd_process_device_init_vm(pdd, NULL);
+	if (err)
+		return ERR_PTR(err);
+
 	return pdd;
 }
 
-- 
cgit v1.2.3-55-g7522


From 7c9b717196b1afd368436c258994ed6054717c7a Mon Sep 17 00:00:00 2001
From: Oak Zeng
Date: Thu, 15 Mar 2018 17:27:45 -0400
Subject: drm/amdkfd: Populate DRM render device minor

Populate DRM render device minor in kfd topology

Signed-off-by: Oak Zeng <Oak.Zeng@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 4 ++++
 drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 1 +
 2 files changed, 5 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 250615535563..ac28abc94e57 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -441,6 +441,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
 			dev->node_props.device_id);
 	sysfs_show_32bit_prop(buffer, "location_id",
 			dev->node_props.location_id);
+	sysfs_show_32bit_prop(buffer, "drm_render_minor",
+			dev->node_props.drm_render_minor);
 
 	if (dev->gpu) {
 		log_max_watch_addr =
@@ -1214,6 +1216,8 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
 		dev->gpu->kfd2kgd->get_max_engine_clock_in_mhz(dev->gpu->kgd);
 	dev->node_props.max_engine_clk_ccompute =
 		cpufreq_quick_get_max(0) / 1000;
+	dev->node_props.drm_render_minor =
+		gpu->shared_resources.drm_render_minor;
 
 	kfd_fill_mem_clk_max_info(dev);
 	kfd_fill_iolink_non_crat_info(dev);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
index c0be2be6dca5..eb54cfcaf039 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -71,6 +71,7 @@ struct kfd_node_properties {
 	uint32_t location_id;
 	uint32_t max_engine_clk_fcompute;
 	uint32_t max_engine_clk_ccompute;
+	int32_t  drm_render_minor;
 	uint16_t marketing_name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE];
 };
 
-- 
cgit v1.2.3-55-g7522


From c7bcbfa4f8d1e0e1078adfe959d4b65542bccf66 Mon Sep 17 00:00:00 2001
From: Felix Kuehling
Date: Thu, 15 Mar 2018 17:27:46 -0400
Subject: drm/amdkfd: Remove limit on number of GPUs

Currently the number of GPUs is limited by aperture placement options
available on GFX7 and GFX8 hardware. This limitation is not necessary.
Scratch and LDS represent per-work-item and per-work-group storage
respectively. Different work-items and work-groups use the same virtual
address to access their own data. Work running on different GPUs is by
definition in different work-groups (different dispatches, in fact).
That means the same virtual addresses can be used for these apertures
on different GPUs.

Add a new AMDKFD_IOC_GET_PROCESS_APERTURES_NEW ioctl that removes the
artificial limitation on the number of GPUs that can be supported. The
new ioctl allows user mode to query the number of GPUs to allocate
enough memory for all GPUs to be reported.

This deprecates AMDKFD_IOC_GET_PROCESS_APERTURES.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c     | 94 ++++++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c | 22 +++----
 include/uapi/linux/kfd_ioctl.h               | 27 +++++++-
 3 files changed, 128 insertions(+), 15 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 6fe24964540b..7d4009418ec3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -825,6 +825,97 @@ static int kfd_ioctl_get_process_apertures(struct file *filp,
 	return 0;
 }
 
+static int kfd_ioctl_get_process_apertures_new(struct file *filp,
+				struct kfd_process *p, void *data)
+{
+	struct kfd_ioctl_get_process_apertures_new_args *args = data;
+	struct kfd_process_device_apertures *pa;
+	struct kfd_process_device *pdd;
+	uint32_t nodes = 0;
+	int ret;
+
+	dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid);
+
+	if (args->num_of_nodes == 0) {
+		/* Return number of nodes, so that user space can alloacate
+		 * sufficient memory
+		 */
+		mutex_lock(&p->mutex);
+
+		if (!kfd_has_process_device_data(p))
+			goto out_unlock;
+
+		/* Run over all pdd of the process */
+		pdd = kfd_get_first_process_device_data(p);
+		do {
+			args->num_of_nodes++;
+			pdd = kfd_get_next_process_device_data(p, pdd);
+		} while (pdd);
+
+		goto out_unlock;
+	}
+
+	/* Fill in process-aperture information for all available
+	 * nodes, but not more than args->num_of_nodes as that is
+	 * the amount of memory allocated by user
+	 */
+	pa = kzalloc((sizeof(struct kfd_process_device_apertures) *
+				args->num_of_nodes), GFP_KERNEL);
+	if (!pa)
+		return -ENOMEM;
+
+	mutex_lock(&p->mutex);
+
+	if (!kfd_has_process_device_data(p)) {
+		args->num_of_nodes = 0;
+		kfree(pa);
+		goto out_unlock;
+	}
+
+	/* Run over all pdd of the process */
+	pdd = kfd_get_first_process_device_data(p);
+	do {
+		pa[nodes].gpu_id = pdd->dev->id;
+		pa[nodes].lds_base = pdd->lds_base;
+		pa[nodes].lds_limit = pdd->lds_limit;
+		pa[nodes].gpuvm_base = pdd->gpuvm_base;
+		pa[nodes].gpuvm_limit = pdd->gpuvm_limit;
+		pa[nodes].scratch_base = pdd->scratch_base;
+		pa[nodes].scratch_limit = pdd->scratch_limit;
+
+		dev_dbg(kfd_device,
+			"gpu id %u\n", pdd->dev->id);
+		dev_dbg(kfd_device,
+			"lds_base %llX\n", pdd->lds_base);
+		dev_dbg(kfd_device,
+			"lds_limit %llX\n", pdd->lds_limit);
+		dev_dbg(kfd_device,
+			"gpuvm_base %llX\n", pdd->gpuvm_base);
+		dev_dbg(kfd_device,
+			"gpuvm_limit %llX\n", pdd->gpuvm_limit);
+		dev_dbg(kfd_device,
+			"scratch_base %llX\n", pdd->scratch_base);
+		dev_dbg(kfd_device,
+			"scratch_limit %llX\n", pdd->scratch_limit);
+		nodes++;
+
+		pdd = kfd_get_next_process_device_data(p, pdd);
+	} while (pdd && (nodes < args->num_of_nodes));
+	mutex_unlock(&p->mutex);
+
+	args->num_of_nodes = nodes;
+	ret = copy_to_user(
+			(void __user *)args->kfd_process_device_apertures_ptr,
+			pa,
+			(nodes * sizeof(struct kfd_process_device_apertures)));
+	kfree(pa);
+	return ret ? -EFAULT : 0;
+
+out_unlock:
+	mutex_unlock(&p->mutex);
+	return 0;
+}
+
 static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
 					void *data)
 {
@@ -1017,6 +1108,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
 
 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER,
 			kfd_ioctl_set_trap_handler, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW,
+			kfd_ioctl_get_process_apertures_new, 0),
 };
 
 #define AMDKFD_CORE_IOCTL_COUNT	ARRAY_SIZE(amdkfd_ioctls)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
index 7377513050e6..a06b0100af96 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
@@ -282,14 +282,14 @@
 	(((uint64_t)(base) & \
 		0xFFFFFF0000000000UL) | 0xFFFFFFFFFFL)
 
-#define MAKE_SCRATCH_APP_BASE(gpu_num) \
-	(((uint64_t)(gpu_num) << 61) + 0x100000000L)
+#define MAKE_SCRATCH_APP_BASE() \
+	(((uint64_t)(0x1UL) << 61) + 0x100000000L)
 
 #define MAKE_SCRATCH_APP_LIMIT(base) \
 	(((uint64_t)base & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF)
 
-#define MAKE_LDS_APP_BASE(gpu_num) \
-	(((uint64_t)(gpu_num) << 61) + 0x0)
+#define MAKE_LDS_APP_BASE() \
+	(((uint64_t)(0x1UL) << 61) + 0x0)
 #define MAKE_LDS_APP_LIMIT(base) \
 	(((uint64_t)(base) & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF)
 
@@ -314,7 +314,7 @@ int kfd_init_apertures(struct kfd_process *process)
 			return -1;
 		}
 		/*
-		 * For 64 bit process aperture will be statically reserved in
+		 * For 64 bit process apertures will be statically reserved in
 		 * the x86_64 non canonical process address space
 		 * amdkfd doesn't currently support apertures for 32 bit process
 		 */
@@ -323,12 +323,11 @@ int kfd_init_apertures(struct kfd_process *process)
 			pdd->gpuvm_base = pdd->gpuvm_limit = 0;
 			pdd->scratch_base = pdd->scratch_limit = 0;
 		} else {
-			/*
-			 * node id couldn't be 0 - the three MSB bits of
-			 * aperture shoudn't be 0
+			/* Same LDS and scratch apertures can be used
+			 * on all GPUs. This allows using more dGPUs
+			 * than placement options for apertures.
 			 */
-			pdd->lds_base = MAKE_LDS_APP_BASE(id + 1);
-
+			pdd->lds_base = MAKE_LDS_APP_BASE();
 			pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base);
 
 			pdd->gpuvm_base = MAKE_GPUVM_APP_BASE(id + 1);
@@ -336,8 +335,7 @@ int kfd_init_apertures(struct kfd_process *process)
 			pdd->gpuvm_limit =
 					MAKE_GPUVM_APP_LIMIT(pdd->gpuvm_base);
 
-			pdd->scratch_base = MAKE_SCRATCH_APP_BASE(id + 1);
-
+			pdd->scratch_base = MAKE_SCRATCH_APP_BASE();
 			pdd->scratch_limit =
 				MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
 		}
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 111d73ba2d96..52014370e2e5 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -107,8 +107,6 @@ struct kfd_ioctl_get_clock_counters_args {
 	__u32 pad;
 };
 
-#define NUM_OF_SUPPORTED_GPUS 7
-
 struct kfd_process_device_apertures {
 	__u64 lds_base;		/* from KFD */
 	__u64 lds_limit;		/* from KFD */
@@ -120,6 +118,12 @@ struct kfd_process_device_apertures {
 	__u32 pad;
 };
 
+/*
+ * AMDKFD_IOC_GET_PROCESS_APERTURES is deprecated. Use
+ * AMDKFD_IOC_GET_PROCESS_APERTURES_NEW instead, which supports an
+ * unlimited number of GPUs.
+ */
+#define NUM_OF_SUPPORTED_GPUS 7
 struct kfd_ioctl_get_process_apertures_args {
 	struct kfd_process_device_apertures
 			process_apertures[NUM_OF_SUPPORTED_GPUS];/* from KFD */
@@ -129,6 +133,19 @@ struct kfd_ioctl_get_process_apertures_args {
 	__u32 pad;
 };
 
+struct kfd_ioctl_get_process_apertures_new_args {
+	/* User allocated. Pointer to struct kfd_process_device_apertures
+	 * filled in by Kernel
+	 */
+	__u64 kfd_process_device_apertures_ptr;
+	/* to KFD - indicates amount of memory present in
+	 *  kfd_process_device_apertures_ptr
+	 * from KFD - Number of entries filled by KFD.
+	 */
+	__u32 num_of_nodes;
+	__u32 pad;
+};
+
 #define MAX_ALLOWED_NUM_POINTS    100
 #define MAX_ALLOWED_AW_BUFF_SIZE 4096
 #define MAX_ALLOWED_WAC_BUFF_SIZE  128
@@ -332,7 +349,11 @@ struct kfd_ioctl_set_trap_handler_args {
 #define AMDKFD_IOC_SET_TRAP_HANDLER		\
 		AMDKFD_IOW(0x13, struct kfd_ioctl_set_trap_handler_args)
 
+#define AMDKFD_IOC_GET_PROCESS_APERTURES_NEW	\
+		AMDKFD_IOWR(0x14,		\
+			struct kfd_ioctl_get_process_apertures_new_args)
+
 #define AMDKFD_COMMAND_START		0x01
-#define AMDKFD_COMMAND_END		0x14
+#define AMDKFD_COMMAND_END		0x15
 
 #endif
-- 
cgit v1.2.3-55-g7522


From d01994c24cb28b6f200138d98cbfc17b6bd967c5 Mon Sep 17 00:00:00 2001
From: Felix Kuehling
Date: Thu, 15 Mar 2018 17:27:47 -0400
Subject: drm/amdkfd: Aperture setup for dGPUs

Set up the GPUVM aperture for SVM (shared virtual memory) that allows
sharing a part of virtual address space between GPUs and CPUs.

Report the size of the GPUVM aperture that is supported by KGD accurately.

The low part of the GPUVM aperture is reserved for kernel use. This is
for kernel-allocated buffers that are only accessed on the GPU:
- CWSR trap handler
- IB for submitting commands in user-mode context from kernel mode

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c | 37 ++++++++++++++++++++++------
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h        |  4 +++
 2 files changed, 33 insertions(+), 8 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
index a06b0100af96..66852de410c8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
@@ -278,9 +278,8 @@
 #define MAKE_GPUVM_APP_BASE(gpu_num) \
 	(((uint64_t)(gpu_num) << 61) + 0x1000000000000L)
 
-#define MAKE_GPUVM_APP_LIMIT(base) \
-	(((uint64_t)(base) & \
-		0xFFFFFF0000000000UL) | 0xFFFFFFFFFFL)
+#define MAKE_GPUVM_APP_LIMIT(base, size) \
+	(((uint64_t)(base) & 0xFFFFFF0000000000UL) + (size) - 1)
 
 #define MAKE_SCRATCH_APP_BASE() \
 	(((uint64_t)(0x1UL) << 61) + 0x100000000L)
@@ -293,6 +292,14 @@
 #define MAKE_LDS_APP_LIMIT(base) \
 	(((uint64_t)(base) & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF)
 
+/* User mode manages most of the SVM aperture address space. The low
+ * 16MB are reserved for kernel use (CWSR trap handler and kernel IB
+ * for now).
+ */
+#define SVM_USER_BASE 0x1000000ull
+#define SVM_CWSR_BASE (SVM_USER_BASE - KFD_CWSR_TBA_TMA_SIZE)
+#define SVM_IB_BASE   (SVM_CWSR_BASE - PAGE_SIZE)
+
 int kfd_init_apertures(struct kfd_process *process)
 {
 	uint8_t id  = 0;
@@ -330,14 +337,28 @@ int kfd_init_apertures(struct kfd_process *process)
 			pdd->lds_base = MAKE_LDS_APP_BASE();
 			pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base);
 
-			pdd->gpuvm_base = MAKE_GPUVM_APP_BASE(id + 1);
-
-			pdd->gpuvm_limit =
-					MAKE_GPUVM_APP_LIMIT(pdd->gpuvm_base);
-
 			pdd->scratch_base = MAKE_SCRATCH_APP_BASE();
 			pdd->scratch_limit =
 				MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
+
+			if (dev->device_info->needs_iommu_device) {
+				/* APUs: GPUVM aperture in
+				 * non-canonical address space
+				 */
+				pdd->gpuvm_base = MAKE_GPUVM_APP_BASE(id + 1);
+				pdd->gpuvm_limit = MAKE_GPUVM_APP_LIMIT(
+					pdd->gpuvm_base,
+					dev->shared_resources.gpuvm_size);
+			} else {
+				/* dGPUs: SVM aperture starting at 0
+				 * with small reserved space for kernel
+				 */
+				pdd->gpuvm_base = SVM_USER_BASE;
+				pdd->gpuvm_limit =
+					dev->shared_resources.gpuvm_size - 1;
+				pdd->qpd.cwsr_base = SVM_CWSR_BASE;
+				pdd->qpd.ib_base = SVM_IB_BASE;
+			}
 		}
 
 		dev_dbg(kfd_device, "node id %u\n", id);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 014d608226d5..0d5d924b31ef 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -488,8 +488,12 @@ struct qcm_process_device {
 
 	/* CWSR memory */
 	void *cwsr_kaddr;
+	uint64_t cwsr_base;
 	uint64_t tba_addr;
 	uint64_t tma_addr;
+
+	/* IB memory */
+	uint64_t ib_base;
 };
 
 /* KFD Memory Eviction */
-- 
cgit v1.2.3-55-g7522


From 52b29d73340da6cbb10ba42b0a28e7fb795afe9c Mon Sep 17 00:00:00 2001
From: Felix Kuehling
Date: Thu, 15 Mar 2018 17:27:48 -0400
Subject: drm/amdkfd: Add per-process IDR for buffer handles

Also used for cleaning up on process termination.

v2: Refactored cleanup on process termination

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h    | 11 +++++
 drivers/gpu/drm/amd/amdkfd/kfd_process.c | 73 ++++++++++++++++++++++++++++++++
 2 files changed, 84 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 0d5d924b31ef..b2b5ef8c4536 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -543,6 +543,9 @@ struct kfd_process_device {
 	struct file *drm_file;
 	void *vm;
 
+	/* GPUVM allocations storage */
+	struct idr alloc_idr;
+
 	/* Flag used to tell the pdd has dequeued from the dqm.
 	 * This is used to prevent dev->dqm->ops.process_termination() from
 	 * being called twice when it is already called in IOMMU callback
@@ -678,6 +681,14 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
 int kfd_reserved_mem_mmap(struct kfd_process *process,
 			  struct vm_area_struct *vma);
 
+/* KFD process API for creating and translating handles */
+int kfd_process_device_create_obj_handle(struct kfd_process_device *pdd,
+					void *mem);
+void *kfd_process_device_translate_handle(struct kfd_process_device *p,
+					int handle);
+void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd,
+					int handle);
+
 /* Process device data iterator */
 struct kfd_process_device *kfd_get_first_process_device_data(
 							struct kfd_process *p);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 6618aaa6b84f..a2ae0236a3ea 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -150,6 +150,40 @@ void kfd_unref_process(struct kfd_process *p)
 	kref_put(&p->ref, kfd_process_ref_release);
 }
 
+static void kfd_process_device_free_bos(struct kfd_process_device *pdd)
+{
+	struct kfd_process *p = pdd->process;
+	void *mem;
+	int id;
+
+	/*
+	 * Remove all handles from idr and release appropriate
+	 * local memory object
+	 */
+	idr_for_each_entry(&pdd->alloc_idr, mem, id) {
+		struct kfd_process_device *peer_pdd;
+
+		list_for_each_entry(peer_pdd, &p->per_device_data,
+				    per_device_list) {
+			if (!peer_pdd->vm)
+				continue;
+			peer_pdd->dev->kfd2kgd->unmap_memory_to_gpu(
+				peer_pdd->dev->kgd, mem, peer_pdd->vm);
+		}
+
+		pdd->dev->kfd2kgd->free_memory_of_gpu(pdd->dev->kgd, mem);
+		kfd_process_device_remove_obj_handle(pdd, id);
+	}
+}
+
+static void kfd_process_free_outstanding_kfd_bos(struct kfd_process *p)
+{
+	struct kfd_process_device *pdd;
+
+	list_for_each_entry(pdd, &p->per_device_data, per_device_list)
+		kfd_process_device_free_bos(pdd);
+}
+
 static void kfd_process_destroy_pdds(struct kfd_process *p)
 {
 	struct kfd_process_device *pdd, *temp;
@@ -171,6 +205,8 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
 			free_pages((unsigned long)pdd->qpd.cwsr_kaddr,
 				get_order(KFD_CWSR_TBA_TMA_SIZE));
 
+		idr_destroy(&pdd->alloc_idr);
+
 		kfree(pdd);
 	}
 }
@@ -187,6 +223,8 @@ static void kfd_process_wq_release(struct work_struct *work)
 
 	kfd_iommu_unbind_process(p);
 
+	kfd_process_free_outstanding_kfd_bos(p);
+
 	kfd_process_destroy_pdds(p);
 	dma_fence_put(p->ef);
 
@@ -371,6 +409,7 @@ static struct kfd_process *create_process(const struct task_struct *thread,
 	return process;
 
 err_init_cwsr:
+	kfd_process_free_outstanding_kfd_bos(process);
 	kfd_process_destroy_pdds(process);
 err_init_apertures:
 	pqm_uninit(&process->pqm);
@@ -421,6 +460,9 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
 	pdd->already_dequeued = false;
 	list_add(&pdd->per_device_list, &p->per_device_data);
 
+	/* Init idr used for memory handle translation */
+	idr_init(&pdd->alloc_idr);
+
 	return pdd;
 }
 
@@ -520,6 +562,37 @@ bool kfd_has_process_device_data(struct kfd_process *p)
 	return !(list_empty(&p->per_device_data));
 }
 
+/* Create specific handle mapped to mem from process local memory idr
+ * Assumes that the process lock is held.
+ */
+int kfd_process_device_create_obj_handle(struct kfd_process_device *pdd,
+					void *mem)
+{
+	return idr_alloc(&pdd->alloc_idr, mem, 0, 0, GFP_KERNEL);
+}
+
+/* Translate specific handle from process local memory idr
+ * Assumes that the process lock is held.
+ */
+void *kfd_process_device_translate_handle(struct kfd_process_device *pdd,
+					int handle)
+{
+	if (handle < 0)
+		return NULL;
+
+	return idr_find(&pdd->alloc_idr, handle);
+}
+
+/* Remove specific handle from process local memory idr
+ * Assumes that the process lock is held.
+ */
+void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd,
+					int handle)
+{
+	if (handle >= 0)
+		idr_remove(&pdd->alloc_idr, handle);
+}
+
 /* This increments the process->ref counter. */
 struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid)
 {
-- 
cgit v1.2.3-55-g7522


From f35751b87034f0c2d11e60cdfb0179c4f1a7e296 Mon Sep 17 00:00:00 2001
From: Felix Kuehling
Date: Thu, 15 Mar 2018 17:27:49 -0400
Subject: drm/amdkfd: Allocate CWSR trap handler memory for dGPUs

Add helpers for allocating GPUVM memory in kernel mode and use them
to allocate memory for the CWSR trap handler.

v2: Use dev instead of pdd->dev in kfd_process_free_gpuvm
v3:
* Cleaned up and simplified kfd_process_alloc_gpuvm
* Moved allocation for dGPU to kfd_process_device_init_vm

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_process.c | 137 ++++++++++++++++++++++++++++---
 1 file changed, 127 insertions(+), 10 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index a2ae0236a3ea..aeb339db8a90 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -54,7 +54,6 @@ static struct kfd_process *find_process(const struct task_struct *thread);
 static void kfd_process_ref_release(struct kref *ref);
 static struct kfd_process *create_process(const struct task_struct *thread,
 					struct file *filep);
-static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep);
 
 static void evict_process_worker(struct work_struct *work);
 static void restore_process_worker(struct work_struct *work);
@@ -74,6 +73,82 @@ void kfd_process_destroy_wq(void)
 	}
 }
 
+static void kfd_process_free_gpuvm(struct kgd_mem *mem,
+			struct kfd_process_device *pdd)
+{
+	struct kfd_dev *dev = pdd->dev;
+
+	dev->kfd2kgd->unmap_memory_to_gpu(dev->kgd, mem, pdd->vm);
+	dev->kfd2kgd->free_memory_of_gpu(dev->kgd, mem);
+}
+
+/* kfd_process_alloc_gpuvm - Allocate GPU VM for the KFD process
+ *	This function should be only called right after the process
+ *	is created and when kfd_processes_mutex is still being held
+ *	to avoid concurrency. Because of that exclusiveness, we do
+ *	not need to take p->mutex.
+ */
+static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
+				   uint64_t gpu_va, uint32_t size,
+				   uint32_t flags, void **kptr)
+{
+	struct kfd_dev *kdev = pdd->dev;
+	struct kgd_mem *mem = NULL;
+	int handle;
+	int err;
+
+	err = kdev->kfd2kgd->alloc_memory_of_gpu(kdev->kgd, gpu_va, size,
+						 pdd->vm, &mem, NULL, flags);
+	if (err)
+		goto err_alloc_mem;
+
+	err = kdev->kfd2kgd->map_memory_to_gpu(kdev->kgd, mem, pdd->vm);
+	if (err)
+		goto err_map_mem;
+
+	err = kdev->kfd2kgd->sync_memory(kdev->kgd, mem, true);
+	if (err) {
+		pr_debug("Sync memory failed, wait interrupted by user signal\n");
+		goto sync_memory_failed;
+	}
+
+	/* Create an obj handle so kfd_process_device_remove_obj_handle
+	 * will take care of the bo removal when the process finishes.
+	 * We do not need to take p->mutex, because the process is just
+	 * created and the ioctls have not had the chance to run.
+	 */
+	handle = kfd_process_device_create_obj_handle(pdd, mem);
+
+	if (handle < 0) {
+		err = handle;
+		goto free_gpuvm;
+	}
+
+	if (kptr) {
+		err = kdev->kfd2kgd->map_gtt_bo_to_kernel(kdev->kgd,
+				(struct kgd_mem *)mem, kptr, NULL);
+		if (err) {
+			pr_debug("Map GTT BO to kernel failed\n");
+			goto free_obj_handle;
+		}
+	}
+
+	return err;
+
+free_obj_handle:
+	kfd_process_device_remove_obj_handle(pdd, handle);
+free_gpuvm:
+sync_memory_failed:
+	kfd_process_free_gpuvm(mem, pdd);
+	return err;
+
+err_map_mem:
+	kdev->kfd2kgd->free_memory_of_gpu(kdev->kgd, mem);
+err_alloc_mem:
+	*kptr = NULL;
+	return err;
+}
+
 struct kfd_process *kfd_create_process(struct file *filep)
 {
 	struct kfd_process *process;
@@ -201,7 +276,7 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
 
 		list_del(&pdd->per_device_list);
 
-		if (pdd->qpd.cwsr_kaddr)
+		if (pdd->qpd.cwsr_kaddr && !pdd->qpd.cwsr_base)
 			free_pages((unsigned long)pdd->qpd.cwsr_kaddr,
 				get_order(KFD_CWSR_TBA_TMA_SIZE));
 
@@ -312,18 +387,18 @@ static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
 	.release = kfd_process_notifier_release,
 };
 
-static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep)
+static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
 {
 	unsigned long  offset;
-	struct kfd_process_device *pdd = NULL;
-	struct kfd_dev *dev = NULL;
-	struct qcm_process_device *qpd = NULL;
+	struct kfd_process_device *pdd;
 
 	list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
-		dev = pdd->dev;
-		qpd = &pdd->qpd;
-		if (!dev->cwsr_enabled || qpd->cwsr_kaddr)
+		struct kfd_dev *dev = pdd->dev;
+		struct qcm_process_device *qpd = &pdd->qpd;
+
+		if (!dev->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base)
 			continue;
+
 		offset = (dev->id | KFD_MMAP_RESERVED_MEM_MASK) << PAGE_SHIFT;
 		qpd->tba_addr = (int64_t)vm_mmap(filep, 0,
 			KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC,
@@ -348,6 +423,36 @@ static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep)
 	return 0;
 }
 
+static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
+{
+	struct kfd_dev *dev = pdd->dev;
+	struct qcm_process_device *qpd = &pdd->qpd;
+	uint32_t flags = ALLOC_MEM_FLAGS_GTT |
+		ALLOC_MEM_FLAGS_NO_SUBSTITUTE | ALLOC_MEM_FLAGS_EXECUTABLE;
+	void *kaddr;
+	int ret;
+
+	if (!dev->cwsr_enabled || qpd->cwsr_kaddr || !qpd->cwsr_base)
+		return 0;
+
+	/* cwsr_base is only set for dGPU */
+	ret = kfd_process_alloc_gpuvm(pdd, qpd->cwsr_base,
+				      KFD_CWSR_TBA_TMA_SIZE, flags, &kaddr);
+	if (ret)
+		return ret;
+
+	qpd->cwsr_kaddr = kaddr;
+	qpd->tba_addr = qpd->cwsr_base;
+
+	memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size);
+
+	qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET;
+	pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
+		 qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr);
+
+	return 0;
+}
+
 static struct kfd_process *create_process(const struct task_struct *thread,
 					struct file *filep)
 {
@@ -402,7 +507,7 @@ static struct kfd_process *create_process(const struct task_struct *thread,
 	INIT_DELAYED_WORK(&process->restore_work, restore_process_worker);
 	process->last_restore_timestamp = get_jiffies_64();
 
-	err = kfd_process_init_cwsr(process, filep);
+	err = kfd_process_init_cwsr_apu(process, filep);
 	if (err)
 		goto err_init_cwsr;
 
@@ -505,9 +610,21 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
 		return ret;
 	}
 
+	ret = kfd_process_device_init_cwsr_dgpu(pdd);
+	if (ret)
+		goto err_init_cwsr;
+
 	pdd->drm_file = drm_file;
 
 	return 0;
+
+err_init_cwsr:
+	kfd_process_device_free_bos(pdd);
+	if (!drm_file)
+		dev->kfd2kgd->destroy_process_vm(dev->kgd, pdd->vm);
+	pdd->vm = NULL;
+
+	return ret;
 }
 
 /*
-- 
cgit v1.2.3-55-g7522


From 552764b680a65d6069ad651b356d5465082939d0 Mon Sep 17 00:00:00 2001
From: Felix Kuehling
Date: Thu, 15 Mar 2018 17:27:50 -0400
Subject: drm/amdkfd: Add TC flush on VMID deallocation for Hawaii

On GFX7 the CP does not perform a TC flush when queues are unmapped.
To avoid TC eviction from accessing an invalid VMID, flush it
explicitly before releasing a VMID.

v2: Fix unnecessary list_for_each_entry_safe
v3: Moved allocation to kfd_process_device_init_vm

Signed-off-by: Amber Lin <Amber.Lin@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  | 22 ++++++++++++-
 drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c    | 37 ++++++++++++++++++++++
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h              |  3 ++
 drivers/gpu/drm/amd/amdkfd/kfd_process.c           | 34 ++++++++++++++++++++
 4 files changed, 95 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index b3b6dab71638..c18e048f23c6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -142,12 +142,31 @@ static int allocate_vmid(struct device_queue_manager *dqm,
 	return 0;
 }
 
+static int flush_texture_cache_nocpsch(struct kfd_dev *kdev,
+				struct qcm_process_device *qpd)
+{
+	uint32_t len;
+
+	if (!qpd->ib_kaddr)
+		return -ENOMEM;
+
+	len = pm_create_release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr);
+
+	return kdev->kfd2kgd->submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid,
+				qpd->ib_base, (uint32_t *)qpd->ib_kaddr, len);
+}
+
 static void deallocate_vmid(struct device_queue_manager *dqm,
 				struct qcm_process_device *qpd,
 				struct queue *q)
 {
 	int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd;
 
+	/* On GFX v7, CP doesn't flush TC at dequeue */
+	if (q->device->device_info->asic_family == CHIP_HAWAII)
+		if (flush_texture_cache_nocpsch(q->device, qpd))
+			pr_err("Failed to flush TC\n");
+
 	kfd_flush_tlb(qpd_to_pdd(qpd));
 
 	/* Release the vmid mapping */
@@ -792,11 +811,12 @@ static void uninitialize(struct device_queue_manager *dqm)
 static int start_nocpsch(struct device_queue_manager *dqm)
 {
 	init_interrupts(dqm);
-	return 0;
+	return pm_init(&dqm->packets, dqm);
 }
 
 static int stop_nocpsch(struct device_queue_manager *dqm)
 {
+	pm_uninit(&dqm->packets);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index 0ecbd1f9b606..7614375489a4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -356,6 +356,43 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
 	return retval;
 }
 
+/* pm_create_release_mem - Create a RELEASE_MEM packet and return the size
+ *     of this packet
+ *     @gpu_addr - GPU address of the packet. It's a virtual address.
+ *     @buffer - buffer to fill up with the packet. It's a CPU kernel pointer
+ *     Return - length of the packet
+ */
+uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer)
+{
+	struct pm4_mec_release_mem *packet;
+
+	WARN_ON(!buffer);
+
+	packet = (struct pm4_mec_release_mem *)buffer;
+	memset(buffer, 0, sizeof(*packet));
+
+	packet->header.u32All = build_pm4_header(IT_RELEASE_MEM,
+						 sizeof(*packet));
+
+	packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
+	packet->bitfields2.event_index = event_index___release_mem__end_of_pipe;
+	packet->bitfields2.tcl1_action_ena = 1;
+	packet->bitfields2.tc_action_ena = 1;
+	packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
+	packet->bitfields2.atc = 0;
+
+	packet->bitfields3.data_sel = data_sel___release_mem__send_32_bit_low;
+	packet->bitfields3.int_sel =
+		int_sel___release_mem__send_interrupt_after_write_confirm;
+
+	packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
+	packet->address_hi = upper_32_bits(gpu_addr);
+
+	packet->data_lo = 0;
+
+	return sizeof(*packet) / sizeof(unsigned int);
+}
+
 int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
 {
 	pm->dqm = dqm;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index b2b5ef8c4536..aaed005ce1f5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -494,6 +494,7 @@ struct qcm_process_device {
 
 	/* IB memory */
 	uint64_t ib_base;
+	void *ib_kaddr;
 };
 
 /* KFD Memory Eviction */
@@ -834,6 +835,8 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
 
 void pm_release_ib(struct packet_manager *pm);
 
+uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer);
+
 uint64_t kfd_get_number_elems(struct kfd_dev *kfd);
 
 /* Events */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index aeb339db8a90..45ef2d03a975 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -149,6 +149,36 @@ err_alloc_mem:
 	return err;
 }
 
+/* kfd_process_device_reserve_ib_mem - Reserve memory inside the
+ *	process for IB usage The memory reserved is for KFD to submit
+ *	IB to AMDGPU from kernel.  If the memory is reserved
+ *	successfully, ib_kaddr will have the CPU/kernel
+ *	address. Check ib_kaddr before accessing the memory.
+ */
+static int kfd_process_device_reserve_ib_mem(struct kfd_process_device *pdd)
+{
+	struct qcm_process_device *qpd = &pdd->qpd;
+	uint32_t flags = ALLOC_MEM_FLAGS_GTT |
+			 ALLOC_MEM_FLAGS_NO_SUBSTITUTE |
+			 ALLOC_MEM_FLAGS_WRITABLE |
+			 ALLOC_MEM_FLAGS_EXECUTABLE;
+	void *kaddr;
+	int ret;
+
+	if (qpd->ib_kaddr || !qpd->ib_base)
+		return 0;
+
+	/* ib_base is only set for dGPU */
+	ret = kfd_process_alloc_gpuvm(pdd, qpd->ib_base, PAGE_SIZE, flags,
+				      &kaddr);
+	if (ret)
+		return ret;
+
+	qpd->ib_kaddr = kaddr;
+
+	return 0;
+}
+
 struct kfd_process *kfd_create_process(struct file *filep)
 {
 	struct kfd_process *process;
@@ -610,6 +640,9 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
 		return ret;
 	}
 
+	ret = kfd_process_device_reserve_ib_mem(pdd);
+	if (ret)
+		goto err_reserve_ib_mem;
 	ret = kfd_process_device_init_cwsr_dgpu(pdd);
 	if (ret)
 		goto err_init_cwsr;
@@ -619,6 +652,7 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
 	return 0;
 
 err_init_cwsr:
+err_reserve_ib_mem:
 	kfd_process_device_free_bos(pdd);
 	if (!drm_file)
 		dev->kfd2kgd->destroy_process_vm(dev->kgd, pdd->vm);
-- 
cgit v1.2.3-55-g7522


From 5ec7e02854b3b9b55936c3b44b8acfb85e333f49 Mon Sep 17 00:00:00 2001
From: Felix Kuehling
Date: Thu, 15 Mar 2018 17:27:51 -0400
Subject: drm/amdkfd: Add ioctls for GPUVM memory management

v2:
* Fix error handling after kfd_bind_process_to_device in
  kfd_ioctl_map_memory_to_gpu
v3:
* Add ioctl to acquire VM from a DRM FD
v4:
* Return number of successful map/unmap operations in failure cases
* Facilitate partial retry after failed map/unmap
* Added comments with parameter descriptions to new APIs
* Defined AMDKFD_IOC_FREE_MEMORY_OF_GPU write-only

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c        | 377 ++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h           |   8 +
 drivers/gpu/drm/amd/include/kgd_kfd_interface.h |   2 +
 include/uapi/linux/kfd_ioctl.h                  |  97 +++++-
 4 files changed, 483 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 7d4009418ec3..a563ff2ca7dd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -24,6 +24,7 @@
 #include <linux/export.h>
 #include <linux/err.h>
 #include <linux/fs.h>
+#include <linux/file.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
@@ -1046,6 +1047,366 @@ static int kfd_ioctl_get_tile_config(struct file *filep,
 	return 0;
 }
 
+static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p,
+				void *data)
+{
+	struct kfd_ioctl_acquire_vm_args *args = data;
+	struct kfd_process_device *pdd;
+	struct kfd_dev *dev;
+	struct file *drm_file;
+	int ret;
+
+	dev = kfd_device_by_id(args->gpu_id);
+	if (!dev)
+		return -EINVAL;
+
+	drm_file = fget(args->drm_fd);
+	if (!drm_file)
+		return -EINVAL;
+
+	mutex_lock(&p->mutex);
+
+	pdd = kfd_get_process_device_data(dev, p);
+	if (!pdd) {
+		ret = -EINVAL;
+		goto err_unlock;
+	}
+
+	if (pdd->drm_file) {
+		ret = pdd->drm_file == drm_file ? 0 : -EBUSY;
+		goto err_unlock;
+	}
+
+	ret = kfd_process_device_init_vm(pdd, drm_file);
+	if (ret)
+		goto err_unlock;
+	/* On success, the PDD keeps the drm_file reference */
+	mutex_unlock(&p->mutex);
+
+	return 0;
+
+err_unlock:
+	mutex_unlock(&p->mutex);
+	fput(drm_file);
+	return ret;
+}
+
+bool kfd_dev_is_large_bar(struct kfd_dev *dev)
+{
+	struct kfd_local_mem_info mem_info;
+
+	if (dev->device_info->needs_iommu_device)
+		return false;
+
+	dev->kfd2kgd->get_local_mem_info(dev->kgd, &mem_info);
+	if (mem_info.local_mem_size_private == 0 &&
+			mem_info.local_mem_size_public > 0)
+		return true;
+	return false;
+}
+
+static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
+					struct kfd_process *p, void *data)
+{
+	struct kfd_ioctl_alloc_memory_of_gpu_args *args = data;
+	struct kfd_process_device *pdd;
+	void *mem;
+	struct kfd_dev *dev;
+	int idr_handle;
+	long err;
+	uint64_t offset = args->mmap_offset;
+	uint32_t flags = args->flags;
+
+	if (args->size == 0)
+		return -EINVAL;
+
+	dev = kfd_device_by_id(args->gpu_id);
+	if (!dev)
+		return -EINVAL;
+
+	if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) &&
+		(flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) &&
+		!kfd_dev_is_large_bar(dev)) {
+		pr_err("Alloc host visible vram on small bar is not allowed\n");
+		return -EINVAL;
+	}
+
+	mutex_lock(&p->mutex);
+
+	pdd = kfd_bind_process_to_device(dev, p);
+	if (IS_ERR(pdd)) {
+		err = PTR_ERR(pdd);
+		goto err_unlock;
+	}
+
+	err = dev->kfd2kgd->alloc_memory_of_gpu(
+		dev->kgd, args->va_addr, args->size,
+		pdd->vm, (struct kgd_mem **) &mem, &offset,
+		flags);
+
+	if (err)
+		goto err_unlock;
+
+	idr_handle = kfd_process_device_create_obj_handle(pdd, mem);
+	if (idr_handle < 0) {
+		err = -EFAULT;
+		goto err_free;
+	}
+
+	mutex_unlock(&p->mutex);
+
+	args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
+	args->mmap_offset = offset;
+
+	return 0;
+
+err_free:
+	dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem);
+err_unlock:
+	mutex_unlock(&p->mutex);
+	return err;
+}
+
+static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
+					struct kfd_process *p, void *data)
+{
+	struct kfd_ioctl_free_memory_of_gpu_args *args = data;
+	struct kfd_process_device *pdd;
+	void *mem;
+	struct kfd_dev *dev;
+	int ret;
+
+	dev = kfd_device_by_id(GET_GPU_ID(args->handle));
+	if (!dev)
+		return -EINVAL;
+
+	mutex_lock(&p->mutex);
+
+	pdd = kfd_get_process_device_data(dev, p);
+	if (!pdd) {
+		pr_err("Process device data doesn't exist\n");
+		ret = -EINVAL;
+		goto err_unlock;
+	}
+
+	mem = kfd_process_device_translate_handle(
+		pdd, GET_IDR_HANDLE(args->handle));
+	if (!mem) {
+		ret = -EINVAL;
+		goto err_unlock;
+	}
+
+	ret = dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem);
+
+	/* If freeing the buffer failed, leave the handle in place for
+	 * clean-up during process tear-down.
+	 */
+	if (!ret)
+		kfd_process_device_remove_obj_handle(
+			pdd, GET_IDR_HANDLE(args->handle));
+
+err_unlock:
+	mutex_unlock(&p->mutex);
+	return ret;
+}
+
+static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
+					struct kfd_process *p, void *data)
+{
+	struct kfd_ioctl_map_memory_to_gpu_args *args = data;
+	struct kfd_process_device *pdd, *peer_pdd;
+	void *mem;
+	struct kfd_dev *dev, *peer;
+	long err = 0;
+	int i;
+	uint32_t *devices_arr = NULL;
+
+	dev = kfd_device_by_id(GET_GPU_ID(args->handle));
+	if (!dev)
+		return -EINVAL;
+
+	if (!args->n_devices) {
+		pr_debug("Device IDs array empty\n");
+		return -EINVAL;
+	}
+	if (args->n_success > args->n_devices) {
+		pr_debug("n_success exceeds n_devices\n");
+		return -EINVAL;
+	}
+
+	devices_arr = kmalloc(args->n_devices * sizeof(*devices_arr),
+			      GFP_KERNEL);
+	if (!devices_arr)
+		return -ENOMEM;
+
+	err = copy_from_user(devices_arr,
+			     (void __user *)args->device_ids_array_ptr,
+			     args->n_devices * sizeof(*devices_arr));
+	if (err != 0) {
+		err = -EFAULT;
+		goto copy_from_user_failed;
+	}
+
+	mutex_lock(&p->mutex);
+
+	pdd = kfd_bind_process_to_device(dev, p);
+	if (IS_ERR(pdd)) {
+		err = PTR_ERR(pdd);
+		goto bind_process_to_device_failed;
+	}
+
+	mem = kfd_process_device_translate_handle(pdd,
+						GET_IDR_HANDLE(args->handle));
+	if (!mem) {
+		err = -ENOMEM;
+		goto get_mem_obj_from_handle_failed;
+	}
+
+	for (i = args->n_success; i < args->n_devices; i++) {
+		peer = kfd_device_by_id(devices_arr[i]);
+		if (!peer) {
+			pr_debug("Getting device by id failed for 0x%x\n",
+				 devices_arr[i]);
+			err = -EINVAL;
+			goto get_mem_obj_from_handle_failed;
+		}
+
+		peer_pdd = kfd_bind_process_to_device(peer, p);
+		if (IS_ERR(peer_pdd)) {
+			err = PTR_ERR(peer_pdd);
+			goto get_mem_obj_from_handle_failed;
+		}
+		err = peer->kfd2kgd->map_memory_to_gpu(
+			peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
+		if (err) {
+			pr_err("Failed to map to gpu %d/%d\n",
+			       i, args->n_devices);
+			goto map_memory_to_gpu_failed;
+		}
+		args->n_success = i+1;
+	}
+
+	mutex_unlock(&p->mutex);
+
+	err = dev->kfd2kgd->sync_memory(dev->kgd, (struct kgd_mem *) mem, true);
+	if (err) {
+		pr_debug("Sync memory failed, wait interrupted by user signal\n");
+		goto sync_memory_failed;
+	}
+
+	/* Flush TLBs after waiting for the page table updates to complete */
+	for (i = 0; i < args->n_devices; i++) {
+		peer = kfd_device_by_id(devices_arr[i]);
+		if (WARN_ON_ONCE(!peer))
+			continue;
+		peer_pdd = kfd_get_process_device_data(peer, p);
+		if (WARN_ON_ONCE(!peer_pdd))
+			continue;
+		kfd_flush_tlb(peer_pdd);
+	}
+
+	kfree(devices_arr);
+
+	return err;
+
+bind_process_to_device_failed:
+get_mem_obj_from_handle_failed:
+map_memory_to_gpu_failed:
+	mutex_unlock(&p->mutex);
+copy_from_user_failed:
+sync_memory_failed:
+	kfree(devices_arr);
+
+	return err;
+}
+
+static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
+					struct kfd_process *p, void *data)
+{
+	struct kfd_ioctl_unmap_memory_from_gpu_args *args = data;
+	struct kfd_process_device *pdd, *peer_pdd;
+	void *mem;
+	struct kfd_dev *dev, *peer;
+	long err = 0;
+	uint32_t *devices_arr = NULL, i;
+
+	dev = kfd_device_by_id(GET_GPU_ID(args->handle));
+	if (!dev)
+		return -EINVAL;
+
+	if (!args->n_devices) {
+		pr_debug("Device IDs array empty\n");
+		return -EINVAL;
+	}
+	if (args->n_success > args->n_devices) {
+		pr_debug("n_success exceeds n_devices\n");
+		return -EINVAL;
+	}
+
+	devices_arr = kmalloc(args->n_devices * sizeof(*devices_arr),
+			      GFP_KERNEL);
+	if (!devices_arr)
+		return -ENOMEM;
+
+	err = copy_from_user(devices_arr,
+			     (void __user *)args->device_ids_array_ptr,
+			     args->n_devices * sizeof(*devices_arr));
+	if (err != 0) {
+		err = -EFAULT;
+		goto copy_from_user_failed;
+	}
+
+	mutex_lock(&p->mutex);
+
+	pdd = kfd_get_process_device_data(dev, p);
+	if (!pdd) {
+		err = PTR_ERR(pdd);
+		goto bind_process_to_device_failed;
+	}
+
+	mem = kfd_process_device_translate_handle(pdd,
+						GET_IDR_HANDLE(args->handle));
+	if (!mem) {
+		err = -ENOMEM;
+		goto get_mem_obj_from_handle_failed;
+	}
+
+	for (i = args->n_success; i < args->n_devices; i++) {
+		peer = kfd_device_by_id(devices_arr[i]);
+		if (!peer) {
+			err = -EINVAL;
+			goto get_mem_obj_from_handle_failed;
+		}
+
+		peer_pdd = kfd_get_process_device_data(peer, p);
+		if (!peer_pdd) {
+			err = -ENODEV;
+			goto get_mem_obj_from_handle_failed;
+		}
+		err = dev->kfd2kgd->unmap_memory_to_gpu(
+			peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
+		if (err) {
+			pr_err("Failed to unmap from gpu %d/%d\n",
+			       i, args->n_devices);
+			goto unmap_memory_from_gpu_failed;
+		}
+		args->n_success = i+1;
+	}
+	kfree(devices_arr);
+
+	mutex_unlock(&p->mutex);
+
+	return 0;
+
+bind_process_to_device_failed:
+get_mem_obj_from_handle_failed:
+unmap_memory_from_gpu_failed:
+	mutex_unlock(&p->mutex);
+copy_from_user_failed:
+	kfree(devices_arr);
+	return err;
+}
+
 #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
 	[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
 			    .cmd_drv = 0, .name = #ioctl}
@@ -1111,6 +1472,22 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
 
 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW,
 			kfd_ioctl_get_process_apertures_new, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_ACQUIRE_VM,
+			kfd_ioctl_acquire_vm, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_GPU,
+			kfd_ioctl_alloc_memory_of_gpu, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_FREE_MEMORY_OF_GPU,
+			kfd_ioctl_free_memory_of_gpu, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_MAP_MEMORY_TO_GPU,
+			kfd_ioctl_map_memory_to_gpu, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU,
+			kfd_ioctl_unmap_memory_from_gpu, 0),
+
 };
 
 #define AMDKFD_CORE_IOCTL_COUNT	ARRAY_SIZE(amdkfd_ioctls)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index aaed005ce1f5..1542807373d7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -509,6 +509,14 @@ struct qcm_process_device {
 int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
 					       struct dma_fence *fence);
 
+/* 8 byte handle containing GPU ID in the most significant 4 bytes and
+ * idr_handle in the least significant 4 bytes
+ */
+#define MAKE_HANDLE(gpu_id, idr_handle) \
+	(((uint64_t)(gpu_id) << 32) + idr_handle)
+#define GET_GPU_ID(handle) (handle >> 32)
+#define GET_IDR_HANDLE(handle) (handle & 0xFFFFFFFF)
+
 enum kfd_pdd_bound {
 	PDD_UNBOUND = 0,
 	PDD_BOUND,
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index b1f35c8be2cf..237289a72bb7 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -130,6 +130,7 @@ struct tile_config {
 
 /*
  * Allocation flag domains
+ * NOTE: This must match the corresponding definitions in kfd_ioctl.h.
  */
 #define ALLOC_MEM_FLAGS_VRAM		(1 << 0)
 #define ALLOC_MEM_FLAGS_GTT		(1 << 1)
@@ -138,6 +139,7 @@ struct tile_config {
 
 /*
  * Allocation flags attributes/access options.
+ * NOTE: This must match the corresponding definitions in kfd_ioctl.h.
  */
 #define ALLOC_MEM_FLAGS_WRITABLE	(1 << 31)
 #define ALLOC_MEM_FLAGS_EXECUTABLE	(1 << 30)
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 52014370e2e5..b4f5073dbac2 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -286,6 +286,86 @@ struct kfd_ioctl_set_trap_handler_args {
 	__u32 pad;
 };
 
+struct kfd_ioctl_acquire_vm_args {
+	__u32 drm_fd;	/* to KFD */
+	__u32 gpu_id;	/* to KFD */
+};
+
+/* Allocation flags: memory types */
+#define KFD_IOC_ALLOC_MEM_FLAGS_VRAM		(1 << 0)
+#define KFD_IOC_ALLOC_MEM_FLAGS_GTT		(1 << 1)
+#define KFD_IOC_ALLOC_MEM_FLAGS_USERPTR		(1 << 2)
+#define KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL	(1 << 3)
+/* Allocation flags: attributes/access options */
+#define KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE	(1 << 31)
+#define KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE	(1 << 30)
+#define KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC		(1 << 29)
+#define KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE	(1 << 28)
+#define KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM	(1 << 27)
+#define KFD_IOC_ALLOC_MEM_FLAGS_COHERENT	(1 << 26)
+
+/* Allocate memory for later SVM (shared virtual memory) mapping.
+ *
+ * @va_addr:     virtual address of the memory to be allocated
+ *               all later mappings on all GPUs will use this address
+ * @size:        size in bytes
+ * @handle:      buffer handle returned to user mode, used to refer to
+ *               this allocation for mapping, unmapping and freeing
+ * @mmap_offset: for CPU-mapping the allocation by mmapping a render node
+ *               for userptrs this is overloaded to specify the CPU address
+ * @gpu_id:      device identifier
+ * @flags:       memory type and attributes. See KFD_IOC_ALLOC_MEM_FLAGS above
+ */
+struct kfd_ioctl_alloc_memory_of_gpu_args {
+	__u64 va_addr;		/* to KFD */
+	__u64 size;		/* to KFD */
+	__u64 handle;		/* from KFD */
+	__u64 mmap_offset;	/* to KFD (userptr), from KFD (mmap offset) */
+	__u32 gpu_id;		/* to KFD */
+	__u32 flags;
+};
+
+/* Free memory allocated with kfd_ioctl_alloc_memory_of_gpu
+ *
+ * @handle: memory handle returned by alloc
+ */
+struct kfd_ioctl_free_memory_of_gpu_args {
+	__u64 handle;		/* to KFD */
+};
+
+/* Map memory to one or more GPUs
+ *
+ * @handle:                memory handle returned by alloc
+ * @device_ids_array_ptr:  array of gpu_ids (__u32 per device)
+ * @n_devices:             number of devices in the array
+ * @n_success:             number of devices mapped successfully
+ *
+ * @n_success returns information to the caller how many devices from
+ * the start of the array have mapped the buffer successfully. It can
+ * be passed into a subsequent retry call to skip those devices. For
+ * the first call the caller should initialize it to 0.
+ *
+ * If the ioctl completes with return code 0 (success), n_success ==
+ * n_devices.
+ */
+struct kfd_ioctl_map_memory_to_gpu_args {
+	__u64 handle;			/* to KFD */
+	__u64 device_ids_array_ptr;	/* to KFD */
+	__u32 n_devices;		/* to KFD */
+	__u32 n_success;		/* to/from KFD */
+};
+
+/* Unmap memory from one or more GPUs
+ *
+ * same arguments as for mapping
+ */
+struct kfd_ioctl_unmap_memory_from_gpu_args {
+	__u64 handle;			/* to KFD */
+	__u64 device_ids_array_ptr;	/* to KFD */
+	__u32 n_devices;		/* to KFD */
+	__u32 n_success;		/* to/from KFD */
+};
+
 #define AMDKFD_IOCTL_BASE 'K'
 #define AMDKFD_IO(nr)			_IO(AMDKFD_IOCTL_BASE, nr)
 #define AMDKFD_IOR(nr, type)		_IOR(AMDKFD_IOCTL_BASE, nr, type)
@@ -353,7 +433,22 @@ struct kfd_ioctl_set_trap_handler_args {
 		AMDKFD_IOWR(0x14,		\
 			struct kfd_ioctl_get_process_apertures_new_args)
 
+#define AMDKFD_IOC_ACQUIRE_VM			\
+		AMDKFD_IOW(0x15, struct kfd_ioctl_acquire_vm_args)
+
+#define AMDKFD_IOC_ALLOC_MEMORY_OF_GPU		\
+		AMDKFD_IOWR(0x16, struct kfd_ioctl_alloc_memory_of_gpu_args)
+
+#define AMDKFD_IOC_FREE_MEMORY_OF_GPU		\
+		AMDKFD_IOW(0x17, struct kfd_ioctl_free_memory_of_gpu_args)
+
+#define AMDKFD_IOC_MAP_MEMORY_TO_GPU		\
+		AMDKFD_IOWR(0x18, struct kfd_ioctl_map_memory_to_gpu_args)
+
+#define AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU	\
+		AMDKFD_IOWR(0x19, struct kfd_ioctl_unmap_memory_from_gpu_args)
+
 #define AMDKFD_COMMAND_START		0x01
-#define AMDKFD_COMMAND_END		0x15
+#define AMDKFD_COMMAND_END		0x1A
 
 #endif
-- 
cgit v1.2.3-55-g7522


From 0fc8011f89feb8b2c3008583b777d097e1974660 Mon Sep 17 00:00:00 2001
From: Felix Kuehling
Date: Thu, 15 Mar 2018 17:27:52 -0400
Subject: drm/amdkfd: Kmap event page for dGPUs

The events page must be accessible in user mode by the GPU and CPU
as well as in kernel mode by the CPU. On dGPUs user mode virtual
addresses are managed by the Thunk's GPU memory allocation code.
Therefore we can't allocate the memory in kernel mode like we do
on APUs. But KFD still needs to map the memory for kernel access.
To facilitate this, the Thunk provides the buffer handle of the
events page to KFD when creating the first event.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 56 ++++++++++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdkfd/kfd_events.c  | 31 ++++++++++++++++--
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h    |  2 ++
 3 files changed, 87 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index a563ff2ca7dd..ec0b2c0284ec 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -923,6 +923,58 @@ static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
 	struct kfd_ioctl_create_event_args *args = data;
 	int err;
 
+	/* For dGPUs the event page is allocated in user mode. The
+	 * handle is passed to KFD with the first call to this IOCTL
+	 * through the event_page_offset field.
+	 */
+	if (args->event_page_offset) {
+		struct kfd_dev *kfd;
+		struct kfd_process_device *pdd;
+		void *mem, *kern_addr;
+		uint64_t size;
+
+		if (p->signal_page) {
+			pr_err("Event page is already set\n");
+			return -EINVAL;
+		}
+
+		kfd = kfd_device_by_id(GET_GPU_ID(args->event_page_offset));
+		if (!kfd) {
+			pr_err("Getting device by id failed in %s\n", __func__);
+			return -EINVAL;
+		}
+
+		mutex_lock(&p->mutex);
+		pdd = kfd_bind_process_to_device(kfd, p);
+		if (IS_ERR(pdd)) {
+			err = PTR_ERR(pdd);
+			goto out_unlock;
+		}
+
+		mem = kfd_process_device_translate_handle(pdd,
+				GET_IDR_HANDLE(args->event_page_offset));
+		if (!mem) {
+			pr_err("Can't find BO, offset is 0x%llx\n",
+			       args->event_page_offset);
+			err = -EINVAL;
+			goto out_unlock;
+		}
+		mutex_unlock(&p->mutex);
+
+		err = kfd->kfd2kgd->map_gtt_bo_to_kernel(kfd->kgd,
+						mem, &kern_addr, &size);
+		if (err) {
+			pr_err("Failed to map event page to kernel\n");
+			return err;
+		}
+
+		err = kfd_event_page_set(p, kern_addr, size);
+		if (err) {
+			pr_err("Failed to set event page\n");
+			return err;
+		}
+	}
+
 	err = kfd_event_create(filp, p, args->event_type,
 				args->auto_reset != 0, args->node_id,
 				&args->event_id, &args->event_trigger_data,
@@ -930,6 +982,10 @@ static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
 				&args->event_slot_index);
 
 	return err;
+
+out_unlock:
+	mutex_unlock(&p->mutex);
+	return err;
 }
 
 static int kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 6fb9c0d46d63..4890a90f1e44 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -52,6 +52,7 @@ struct kfd_event_waiter {
 struct kfd_signal_page {
 	uint64_t *kernel_address;
 	uint64_t __user *user_address;
+	bool need_to_free_pages;
 };
 
 
@@ -79,6 +80,7 @@ static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p)
 	       KFD_SIGNAL_EVENT_LIMIT * 8);
 
 	page->kernel_address = backing_store;
+	page->need_to_free_pages = true;
 	pr_debug("Allocated new event signal page at %p, for process %p\n",
 			page, p);
 
@@ -269,8 +271,9 @@ static void shutdown_signal_page(struct kfd_process *p)
 	struct kfd_signal_page *page = p->signal_page;
 
 	if (page) {
-		free_pages((unsigned long)page->kernel_address,
-				get_order(KFD_SIGNAL_EVENT_LIMIT * 8));
+		if (page->need_to_free_pages)
+			free_pages((unsigned long)page->kernel_address,
+				   get_order(KFD_SIGNAL_EVENT_LIMIT * 8));
 		kfree(page);
 	}
 }
@@ -292,6 +295,30 @@ static bool event_can_be_cpu_signaled(const struct kfd_event *ev)
 	return ev->type == KFD_EVENT_TYPE_SIGNAL;
 }
 
+int kfd_event_page_set(struct kfd_process *p, void *kernel_address,
+		       uint64_t size)
+{
+	struct kfd_signal_page *page;
+
+	if (p->signal_page)
+		return -EBUSY;
+
+	page = kzalloc(sizeof(*page), GFP_KERNEL);
+	if (!page)
+		return -ENOMEM;
+
+	/* Initialize all events to unsignaled */
+	memset(kernel_address, (uint8_t) UNSIGNALED_EVENT_SLOT,
+	       KFD_SIGNAL_EVENT_LIMIT * 8);
+
+	page->kernel_address = kernel_address;
+
+	p->signal_page = page;
+	p->signal_mapped_size = size;
+
+	return 0;
+}
+
 int kfd_event_create(struct file *devkfd, struct kfd_process *p,
 		     uint32_t event_type, bool auto_reset, uint32_t node_id,
 		     uint32_t *event_id, uint32_t *event_trigger_data,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 1542807373d7..aa9386356587 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -866,6 +866,8 @@ void kfd_signal_iommu_event(struct kfd_dev *dev,
 void kfd_signal_hw_exception_event(unsigned int pasid);
 int kfd_set_event(struct kfd_process *p, uint32_t event_id);
 int kfd_reset_event(struct kfd_process *p, uint32_t event_id);
+int kfd_event_page_set(struct kfd_process *p, void *kernel_address,
+		       uint64_t size);
 int kfd_event_create(struct file *devkfd, struct kfd_process *p,
 		     uint32_t event_type, bool auto_reset, uint32_t node_id,
 		     uint32_t *event_id, uint32_t *event_trigger_data,
-- 
cgit v1.2.3-55-g7522


From 374200b154ae48e8f011fb74dab21f80459f9e47 Mon Sep 17 00:00:00 2001
From: Felix Kuehling
Date: Thu, 15 Mar 2018 17:27:53 -0400
Subject: drm/amdkfd: Add module option for testing large-BAR functionality

Simulate large-BAR system by exporting only visible memory. This
limits the amount of available VRAM to the size of the BAR, but
enables CPU access to VRAM.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 5 +++++
 drivers/gpu/drm/amd/amdkfd/kfd_crat.c    | 3 +++
 drivers/gpu/drm/amd/amdkfd/kfd_module.c  | 5 +++++
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h    | 6 ++++++
 4 files changed, 19 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index ec0b2c0284ec..cd679cf1fd30 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1151,6 +1151,11 @@ bool kfd_dev_is_large_bar(struct kfd_dev *dev)
 {
 	struct kfd_local_mem_info mem_info;
 
+	if (debug_largebar) {
+		pr_debug("Simulate large-bar allocation on non large-bar machine\n");
+		return true;
+	}
+
 	if (dev->device_info->needs_iommu_device)
 		return false;
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index d85112224f1d..4f126ef6139b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -1117,6 +1117,9 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
 	sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
 			sub_type_hdr->length);
 
+	if (debug_largebar)
+		local_mem_info.local_mem_size_private = 0;
+
 	if (local_mem_info.local_mem_size_private == 0)
 		ret = kfd_fill_gpu_memory_affinity(&avail_size,
 				kdev, HSA_MEM_HEAP_TYPE_FB_PUBLIC,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
index 65574c6a10b3..b0acb0603883 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
@@ -71,6 +71,11 @@ module_param(send_sigterm, int, 0444);
 MODULE_PARM_DESC(send_sigterm,
 	"Send sigterm to HSA process on unhandled exception (0 = disable, 1 = enable)");
 
+int debug_largebar;
+module_param(debug_largebar, int, 0444);
+MODULE_PARM_DESC(debug_largebar,
+	"Debug large-bar flag used to simulate large-bar capability on non-large bar machine (0 = disable, 1 = enable)");
+
 int ignore_crat;
 module_param(ignore_crat, int, 0444);
 MODULE_PARM_DESC(ignore_crat,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index aa9386356587..db27f9f13696 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -104,6 +104,12 @@ extern int cwsr_enable;
  */
 extern int send_sigterm;
 
+/*
+ * This kernel module is used to simulate large bar machine on non-large bar
+ * enabled machines.
+ */
+extern int debug_largebar;
+
 /*
  * Ignore CRAT table during KFD initialization, can be used to work around
  * broken CRAT tables on some AMD systems
-- 
cgit v1.2.3-55-g7522


From 810955ba712fc5c517b5e999fd69bfd20251effb Mon Sep 17 00:00:00 2001
From: Felix Kuehling
Date: Fri, 23 Mar 2018 15:30:35 -0400
Subject: drm/amdgpu: Fix acquiring VM on large-BAR systems

On large-BAR systems the VM page tables for compute are accessed by
the CPU. Always allow CPU access to the page directory so that it can
be used later by the CPU when a VM is converted to a compute VM.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index cbf1421c1963..da55a78d7380 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2406,8 +2406,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	if (vm->use_cpu_for_update)
 		flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
 	else
-		flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
-				AMDGPU_GEM_CREATE_SHADOW);
+		flags |= AMDGPU_GEM_CREATE_SHADOW;
 
 	size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level);
 	r = amdgpu_bo_create(adev, size, align, AMDGPU_GEM_DOMAIN_VRAM, flags,
-- 
cgit v1.2.3-55-g7522


From 1679ae8f8f4148766423066aeb3dbb0a985a373a Mon Sep 17 00:00:00 2001
From: Felix Kuehling
Date: Fri, 23 Mar 2018 15:30:36 -0400
Subject: drm/amdkfd: Use ordered workqueue to restore processes

Restoring multiple processes concurrently can lead to live-locks
where each process prevents the other from validating all its BOs.

v2: fix duplicate check of same variable

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_module.c  |  6 +++++-
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h    |  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_process.c | 30 ++++++++++++++++++++++++++----
 3 files changed, 32 insertions(+), 6 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
index b0acb0603883..e0c07d24d251 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
@@ -133,7 +133,9 @@ static int __init kfd_module_init(void)
 	if (err < 0)
 		goto err_topology;
 
-	kfd_process_create_wq();
+	err = kfd_process_create_wq();
+	if (err < 0)
+		goto err_create_wq;
 
 	kfd_debugfs_init();
 
@@ -143,6 +145,8 @@ static int __init kfd_module_init(void)
 
 	return 0;
 
+err_create_wq:
+	kfd_topology_shutdown();
 err_topology:
 	kfd_chardev_exit();
 err_ioctl:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index db27f9f13696..96a9cc0f02c9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -674,7 +674,7 @@ struct amdkfd_ioctl_desc {
 	const char *name;
 };
 
-void kfd_process_create_wq(void);
+int kfd_process_create_wq(void);
 void kfd_process_destroy_wq(void);
 struct kfd_process *kfd_create_process(struct file *filep);
 struct kfd_process *kfd_get_process(const struct task_struct *);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 45ef2d03a975..1711ad0642f7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -48,8 +48,17 @@ static DEFINE_MUTEX(kfd_processes_mutex);
 
 DEFINE_SRCU(kfd_processes_srcu);
 
+/* For process termination handling */
 static struct workqueue_struct *kfd_process_wq;
 
+/* Ordered, single-threaded workqueue for restoring evicted
+ * processes. Restoring multiple processes concurrently under memory
+ * pressure can lead to processes blocking each other from validating
+ * their BOs and result in a live-lock situation where processes
+ * remain evicted indefinitely.
+ */
+static struct workqueue_struct *kfd_restore_wq;
+
 static struct kfd_process *find_process(const struct task_struct *thread);
 static void kfd_process_ref_release(struct kref *ref);
 static struct kfd_process *create_process(const struct task_struct *thread,
@@ -59,10 +68,19 @@ static void evict_process_worker(struct work_struct *work);
 static void restore_process_worker(struct work_struct *work);
 
 
-void kfd_process_create_wq(void)
+int kfd_process_create_wq(void)
 {
 	if (!kfd_process_wq)
 		kfd_process_wq = alloc_workqueue("kfd_process_wq", 0, 0);
+	if (!kfd_restore_wq)
+		kfd_restore_wq = alloc_ordered_workqueue("kfd_restore_wq", 0);
+
+	if (!kfd_process_wq || !kfd_restore_wq) {
+		kfd_process_destroy_wq();
+		return -ENOMEM;
+	}
+
+	return 0;
 }
 
 void kfd_process_destroy_wq(void)
@@ -71,6 +89,10 @@ void kfd_process_destroy_wq(void)
 		destroy_workqueue(kfd_process_wq);
 		kfd_process_wq = NULL;
 	}
+	if (kfd_restore_wq) {
+		destroy_workqueue(kfd_restore_wq);
+		kfd_restore_wq = NULL;
+	}
 }
 
 static void kfd_process_free_gpuvm(struct kgd_mem *mem,
@@ -869,7 +891,7 @@ static void evict_process_worker(struct work_struct *work)
 		dma_fence_signal(p->ef);
 		dma_fence_put(p->ef);
 		p->ef = NULL;
-		schedule_delayed_work(&p->restore_work,
+		queue_delayed_work(kfd_restore_wq, &p->restore_work,
 				msecs_to_jiffies(PROCESS_RESTORE_TIME_MS));
 
 		pr_debug("Finished evicting pasid %d\n", p->pasid);
@@ -918,7 +940,7 @@ static void restore_process_worker(struct work_struct *work)
 	if (ret) {
 		pr_debug("Failed to restore BOs of pasid %d, retry after %d ms\n",
 			 p->pasid, PROCESS_BACK_OFF_TIME_MS);
-		ret = schedule_delayed_work(&p->restore_work,
+		ret = queue_delayed_work(kfd_restore_wq, &p->restore_work,
 				msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS));
 		WARN(!ret, "reschedule restore work failed\n");
 		return;
@@ -957,7 +979,7 @@ int kfd_resume_all_processes(void)
 	int ret = 0, idx = srcu_read_lock(&kfd_processes_srcu);
 
 	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
-		if (!schedule_delayed_work(&p->restore_work, 0)) {
+		if (!queue_delayed_work(kfd_restore_wq, &p->restore_work, 0)) {
 			pr_err("Restore process %d failed during resume\n",
 			       p->pasid);
 			ret = -EFAULT;
-- 
cgit v1.2.3-55-g7522