summaryrefslogtreecommitdiffstats
path: root/drivers/misc/habanalabs/hw_queue.c
diff options
context:
space:
mode:
authorOded Gabbay2019-02-15 23:39:21 +0100
committerGreg Kroah-Hartman2019-02-18 09:46:45 +0100
commiteff6f4a0e70b7bcf4674f471a768860a74e638a6 (patch)
treee39e2436c9022aa993d935e37507dece50a5e8e4 /drivers/misc/habanalabs/hw_queue.c
parenthabanalabs: add device reset support (diff)
downloadkernel-qcow2-linux-eff6f4a0e70b7bcf4674f471a768860a74e638a6.tar.gz
kernel-qcow2-linux-eff6f4a0e70b7bcf4674f471a768860a74e638a6.tar.xz
kernel-qcow2-linux-eff6f4a0e70b7bcf4674f471a768860a74e638a6.zip
habanalabs: add command submission module
This patch adds the main flow for the user to submit work to the device. Each work is described by a command submission object (CS). The CS contains 3 arrays of command buffers: One for execution, and two for context-switch (store and restore). For each CB, the user specifies on which queue to put that CB. In case of an internal queue, the entry doesn't contain a pointer to the CB but the address in the on-chip memory that the CB resides at. The driver parses some of the CBs to enforce security restrictions. The user receives a sequence number that represents the CS object. The user can then query the driver regarding the status of the CS, using that sequence number. In case the CS doesn't finish before the timeout expires, the driver will perform a soft-reset of the device. Reviewed-by: Mike Rapoport <rppt@linux.ibm.com> Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'drivers/misc/habanalabs/hw_queue.c')
-rw-r--r--drivers/misc/habanalabs/hw_queue.c232
1 files changed, 232 insertions, 0 deletions
diff --git a/drivers/misc/habanalabs/hw_queue.c b/drivers/misc/habanalabs/hw_queue.c
index 2ec43f36cdb8..68dfda59a875 100644
--- a/drivers/misc/habanalabs/hw_queue.c
+++ b/drivers/misc/habanalabs/hw_queue.c
@@ -34,6 +34,29 @@ static inline int queue_free_slots(struct hl_hw_queue *q, u32 queue_len)
return (abs(delta) - queue_len);
}
+void hl_int_hw_queue_update_ci(struct hl_cs *cs)
+{
+ struct hl_device *hdev = cs->ctx->hdev;
+ struct hl_hw_queue *q;
+ int i;
+
+ hdev->asic_funcs->hw_queues_lock(hdev);
+
+ if (hdev->disabled)
+ goto out;
+
+ q = &hdev->kernel_queues[0];
+ for (i = 0 ; i < HL_MAX_QUEUES ; i++, q++) {
+ if (q->queue_type == QUEUE_TYPE_INT) {
+ q->ci += cs->jobs_in_queue_cnt[i];
+ q->ci &= ((q->int_queue_len << 1) - 1);
+ }
+ }
+
+out:
+ hdev->asic_funcs->hw_queues_unlock(hdev);
+}
+
/*
* ext_queue_submit_bd - Submit a buffer descriptor to an external queue
*
@@ -120,6 +143,37 @@ static int ext_queue_sanity_checks(struct hl_device *hdev,
}
/*
+ * int_queue_sanity_checks - perform some sanity checks on internal queue
+ *
+ * @hdev : pointer to hl_device structure
+ * @q : pointer to hl_hw_queue structure
+ * @num_of_entries : how many entries to check for space
+ *
+ * H/W queues spinlock should be taken before calling this function
+ *
+ * Perform the following:
+ * - Make sure we have enough space in the h/w queue
+ *
+ */
+static int int_queue_sanity_checks(struct hl_device *hdev,
+ struct hl_hw_queue *q,
+ int num_of_entries)
+{
+ int free_slots_cnt;
+
+ /* Check we have enough space in the queue */
+ free_slots_cnt = queue_free_slots(q, q->int_queue_len);
+
+ if (free_slots_cnt < num_of_entries) {
+ dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n",
+ q->hw_queue_id, num_of_entries);
+ return -EAGAIN;
+ }
+
+ return 0;
+}
+
+/*
* hl_hw_queue_send_cb_no_cmpl - send a single CB (not a JOB) without completion
*
* @hdev: pointer to hl_device structure
@@ -166,6 +220,184 @@ out:
}
/*
+ * ext_hw_queue_schedule_job - submit an JOB to an external queue
+ *
+ * @job: pointer to the job that needs to be submitted to the queue
+ *
+ * This function must be called when the scheduler mutex is taken
+ *
+ */
+static void ext_hw_queue_schedule_job(struct hl_cs_job *job)
+{
+ struct hl_device *hdev = job->cs->ctx->hdev;
+ struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id];
+ struct hl_cq_entry cq_pkt;
+ struct hl_cq *cq;
+ u64 cq_addr;
+ struct hl_cb *cb;
+ u32 ctl;
+ u32 len;
+ u64 ptr;
+
+ /*
+ * Update the JOB ID inside the BD CTL so the device would know what
+ * to write in the completion queue
+ */
+ ctl = ((q->pi << BD_CTL_SHADOW_INDEX_SHIFT) & BD_CTL_SHADOW_INDEX_MASK);
+
+ cb = job->patched_cb;
+ len = job->job_cb_size;
+ ptr = cb->bus_address;
+
+ cq_pkt.data = (q->pi << CQ_ENTRY_SHADOW_INDEX_SHIFT)
+ & CQ_ENTRY_SHADOW_INDEX_MASK;
+ cq_pkt.data |= 1 << CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT;
+ cq_pkt.data |= 1 << CQ_ENTRY_READY_SHIFT;
+
+ /*
+ * No need to protect pi_offset because scheduling to the
+ * H/W queues is done under the scheduler mutex
+ *
+ * No need to check if CQ is full because it was already
+ * checked in hl_queue_sanity_checks
+ */
+ cq = &hdev->completion_queue[q->hw_queue_id];
+ cq_addr = cq->bus_address +
+ hdev->asic_prop.host_phys_base_address;
+ cq_addr += cq->pi * sizeof(struct hl_cq_entry);
+
+ hdev->asic_funcs->add_end_of_cb_packets(cb->kernel_address, len,
+ cq_addr, cq_pkt.data, q->hw_queue_id);
+
+ q->shadow_queue[hl_pi_2_offset(q->pi)] = job;
+
+ cq->pi = hl_cq_inc_ptr(cq->pi);
+
+ ext_queue_submit_bd(hdev, q, ctl, len, ptr);
+}
+
+/*
+ * int_hw_queue_schedule_job - submit an JOB to an internal queue
+ *
+ * @job: pointer to the job that needs to be submitted to the queue
+ *
+ * This function must be called when the scheduler mutex is taken
+ *
+ */
+static void int_hw_queue_schedule_job(struct hl_cs_job *job)
+{
+ struct hl_device *hdev = job->cs->ctx->hdev;
+ struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id];
+ struct hl_bd bd;
+ u64 *pi, *pbd = (u64 *) &bd;
+
+ bd.ctl = 0;
+ bd.len = job->job_cb_size;
+ bd.ptr = (u64) (uintptr_t) job->user_cb;
+
+ pi = (u64 *) (uintptr_t) (q->kernel_address +
+ ((q->pi & (q->int_queue_len - 1)) * sizeof(bd)));
+
+ pi[0] = pbd[0];
+ pi[1] = pbd[1];
+
+ q->pi++;
+ q->pi &= ((q->int_queue_len << 1) - 1);
+
+ /* Flush PQ entry write. Relevant only for specific ASICs */
+ hdev->asic_funcs->flush_pq_write(hdev, pi, pbd[0]);
+
+ hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi);
+}
+
+/*
+ * hl_hw_queue_schedule_cs - schedule a command submission
+ *
+ * @job : pointer to the CS
+ *
+ */
+int hl_hw_queue_schedule_cs(struct hl_cs *cs)
+{
+ struct hl_device *hdev = cs->ctx->hdev;
+ struct hl_cs_job *job, *tmp;
+ struct hl_hw_queue *q;
+ int rc = 0, i, cq_cnt;
+
+ hdev->asic_funcs->hw_queues_lock(hdev);
+
+ if (hl_device_disabled_or_in_reset(hdev)) {
+ dev_err(hdev->dev,
+ "device is disabled or in reset, CS rejected!\n");
+ rc = -EPERM;
+ goto out;
+ }
+
+ q = &hdev->kernel_queues[0];
+ /* This loop assumes all external queues are consecutive */
+ for (i = 0, cq_cnt = 0 ; i < HL_MAX_QUEUES ; i++, q++) {
+ if (q->queue_type == QUEUE_TYPE_EXT) {
+ if (cs->jobs_in_queue_cnt[i]) {
+ rc = ext_queue_sanity_checks(hdev, q,
+ cs->jobs_in_queue_cnt[i], true);
+ if (rc)
+ goto unroll_cq_resv;
+ cq_cnt++;
+ }
+ } else if (q->queue_type == QUEUE_TYPE_INT) {
+ if (cs->jobs_in_queue_cnt[i]) {
+ rc = int_queue_sanity_checks(hdev, q,
+ cs->jobs_in_queue_cnt[i]);
+ if (rc)
+ goto unroll_cq_resv;
+ }
+ }
+ }
+
+ spin_lock(&hdev->hw_queues_mirror_lock);
+ list_add_tail(&cs->mirror_node, &hdev->hw_queues_mirror_list);
+
+ /* Queue TDR if the CS is the first entry and if timeout is wanted */
+ if ((hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT) &&
+ (list_first_entry(&hdev->hw_queues_mirror_list,
+ struct hl_cs, mirror_node) == cs)) {
+ cs->tdr_active = true;
+ schedule_delayed_work(&cs->work_tdr, hdev->timeout_jiffies);
+ spin_unlock(&hdev->hw_queues_mirror_lock);
+ } else {
+ spin_unlock(&hdev->hw_queues_mirror_lock);
+ }
+
+ list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) {
+ if (job->ext_queue)
+ ext_hw_queue_schedule_job(job);
+ else
+ int_hw_queue_schedule_job(job);
+ }
+
+ cs->submitted = true;
+
+ goto out;
+
+unroll_cq_resv:
+ /* This loop assumes all external queues are consecutive */
+ q = &hdev->kernel_queues[0];
+ for (i = 0 ; (i < HL_MAX_QUEUES) && (cq_cnt > 0) ; i++, q++) {
+ if ((q->queue_type == QUEUE_TYPE_EXT) &&
+ (cs->jobs_in_queue_cnt[i])) {
+ atomic_t *free_slots =
+ &hdev->completion_queue[i].free_slots_cnt;
+ atomic_add(cs->jobs_in_queue_cnt[i], free_slots);
+ cq_cnt--;
+ }
+ }
+
+out:
+ hdev->asic_funcs->hw_queues_unlock(hdev);
+
+ return rc;
+}
+
+/*
* hl_hw_queue_inc_ci_kernel - increment ci for kernel's queue
*
* @hdev: pointer to hl_device structure