diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 21 |
1 files changed, 20 insertions, 1 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 44fc2038770e..6b59eab39fbe 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -61,6 +61,8 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, static void deallocate_sdma_queue(struct device_queue_manager *dqm, unsigned int sdma_queue_id); +static void kfd_process_hw_exception(struct work_struct *work); + static inline enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type) { @@ -1010,6 +1012,8 @@ static int initialize_cpsch(struct device_queue_manager *dqm) dqm->active_runlist = false; dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1; + INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception); + return 0; } @@ -1042,6 +1046,8 @@ static int start_cpsch(struct device_queue_manager *dqm) init_interrupts(dqm); dqm_lock(dqm); + /* clear hang status when driver try to start the hw scheduler */ + dqm->is_hws_hang = false; execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); dqm_unlock(dqm); @@ -1255,6 +1261,8 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, { int retval = 0; + if (dqm->is_hws_hang) + return -EIO; if (!dqm->active_runlist) return retval; @@ -1293,9 +1301,13 @@ static int execute_queues_cpsch(struct device_queue_manager *dqm, { int retval; + if (dqm->is_hws_hang) + return -EIO; retval = unmap_queues_cpsch(dqm, filter, filter_param); if (retval) { pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n"); + dqm->is_hws_hang = true; + schedule_work(&dqm->hw_exception_work); return retval; } @@ -1543,7 +1555,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, } retval = execute_queues_cpsch(dqm, filter, 0); - if (retval || qpd->reset_wavefronts) { + if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) { pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev); dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process); qpd->reset_wavefronts = false; @@ -1701,6 +1713,13 @@ int kfd_process_vm_fault(struct device_queue_manager *dqm, return ret; } +static void kfd_process_hw_exception(struct work_struct *work) +{ + struct device_queue_manager *dqm = container_of(work, + struct device_queue_manager, hw_exception_work); + dqm->dev->kfd2kgd->gpu_recover(dqm->dev->kgd); +} + #if defined(CONFIG_DEBUG_FS) static void seq_reg_dump(struct seq_file *m, |