Yichen Wang <yichen.w...@bytedance.com> writes: > From: Hao Xiang <hao.xi...@linux.dev> > > * Create a dedicated thread for DSA task completion. > * DSA completion thread runs a loop and poll for completed tasks. > * Start and stop DSA completion thread during DSA device start stop. > > User space application can directly submit task to Intel DSA > accelerator by writing to DSA's device memory (mapped in user space). > Once a task is submitted, the device starts processing it and write > the completion status back to the task. A user space application can > poll the task's completion status to check for completion. This change > uses a dedicated thread to perform DSA task completion checking. > > Signed-off-by: Hao Xiang <hao.xi...@linux.dev> > Signed-off-by: Yichen Wang <yichen.w...@bytedance.com> > --- > include/qemu/dsa.h | 1 + > util/dsa.c | 274 ++++++++++++++++++++++++++++++++++++++++++++- > 2 files changed, 274 insertions(+), 1 deletion(-) > > diff --git a/include/qemu/dsa.h b/include/qemu/dsa.h > index 04ee8924ab..d24567f0be 100644 > --- a/include/qemu/dsa.h > +++ b/include/qemu/dsa.h > @@ -69,6 +69,7 @@ typedef struct QemuDsaBatchTask { > QemuDsaTaskType task_type; > QemuDsaTaskStatus status; > int batch_size; > + bool *results; > QSIMPLEQ_ENTRY(QemuDsaBatchTask) entry; > } QemuDsaBatchTask; > > diff --git a/util/dsa.c b/util/dsa.c > index b55fa599f0..c3ca71df86 100644 > --- a/util/dsa.c > +++ b/util/dsa.c > @@ -33,9 +33,20 @@ > #define DSA_WQ_PORTAL_SIZE 4096 > #define DSA_WQ_DEPTH 128 > #define MAX_DSA_DEVICES 16 > +#define DSA_COMPLETION_THREAD "qemu_dsa_completion" > + > +typedef struct { > + bool stopping; > + bool running; > + QemuThread thread; > + int thread_id; > + QemuSemaphore sem_init_done; > + QemuDsaDeviceGroup *group; > +} QemuDsaCompletionThread; > > uint32_t max_retry_count; > static QemuDsaDeviceGroup dsa_group; > +static QemuDsaCompletionThread completion_thread; > > > /** > @@ -403,6 +414,265 @@ submit_batch_wi_async(QemuDsaBatchTask *batch_task) > return dsa_task_enqueue(device_group, batch_task); > } > > +/** > + * @brief Poll for the DSA work item completion. > + * > + * @param completion A pointer to the DSA work item completion record. > + * @param opcode The DSA opcode. > + * > + * @return Zero if successful, non-zero otherwise. > + */ > +static int > +poll_completion(struct dsa_completion_record *completion, > + enum dsa_opcode opcode) > +{ > + uint8_t status; > + uint64_t retry = 0; > + > + while (true) { > + /* The DSA operation completes successfully or fails. */ > + status = completion->status; > + if (status == DSA_COMP_SUCCESS || > + status == DSA_COMP_PAGE_FAULT_NOBOF || > + status == DSA_COMP_BATCH_PAGE_FAULT || > + status == DSA_COMP_BATCH_FAIL) { > + break; > + } else if (status != DSA_COMP_NONE) { > + error_report("DSA opcode %d failed with status = %d.", > + opcode, status); > + return 1; > + } > + retry++; > + if (retry > max_retry_count) { > + error_report("DSA wait for completion retry %lu times.", retry); > + return 1; > + } > + _mm_pause(); > + } > + > + return 0; > +} > + > +/** > + * @brief Complete a single DSA task in the batch task. > + * > + * @param task A pointer to the batch task structure. > + * > + * @return Zero if successful, otherwise non-zero. > + */ > +static int > +poll_task_completion(QemuDsaBatchTask *task) > +{ > + assert(task->task_type == QEMU_DSA_TASK); > + > + struct dsa_completion_record *completion = &task->completions[0]; > + uint8_t status; > + int ret; > + > + ret = poll_completion(completion, task->descriptors[0].opcode); > + if (ret != 0) { > + goto exit; > + } > + > + status = completion->status; > + if (status == DSA_COMP_SUCCESS) { > + task->results[0] = (completion->result == 0); > + goto exit; > + } > + > + assert(status == DSA_COMP_PAGE_FAULT_NOBOF); > + > +exit: > + return ret; > +} > + > +/** > + * @brief Poll a batch task status until it completes. If DSA task doesn't > + * complete properly, use CPU to complete the task. > + * > + * @param batch_task A pointer to the DSA batch task. > + * > + * @return Zero if successful, otherwise non-zero. > + */ > +static int > +poll_batch_task_completion(QemuDsaBatchTask *batch_task) > +{ > + struct dsa_completion_record *batch_completion = > + &batch_task->batch_completion; > + struct dsa_completion_record *completion; > + uint8_t batch_status; > + uint8_t status; > + bool *results = batch_task->results; > + uint32_t count = batch_task->batch_descriptor.desc_count; > + int ret; > + > + ret = poll_completion(batch_completion, > + batch_task->batch_descriptor.opcode); > + if (ret != 0) { > + goto exit; > + } > + > + batch_status = batch_completion->status; > + > + if (batch_status == DSA_COMP_SUCCESS) { > + if (batch_completion->bytes_completed == count) { > + /* > + * Let's skip checking for each descriptors' completion status > + * if the batch descriptor says all succedded. > + */ > + for (int i = 0; i < count; i++) { > + assert(batch_task->completions[i].status == > DSA_COMP_SUCCESS); > + results[i] = (batch_task->completions[i].result == 0); > + } > + goto exit; > + } > + } else { > + assert(batch_status == DSA_COMP_BATCH_FAIL || > + batch_status == DSA_COMP_BATCH_PAGE_FAULT); > + } > + > + for (int i = 0; i < count; i++) { > + > + completion = &batch_task->completions[i]; > + status = completion->status; > + > + if (status == DSA_COMP_SUCCESS) { > + results[i] = (completion->result == 0); > + continue; > + } > + > + assert(status == DSA_COMP_PAGE_FAULT_NOBOF); > + > + if (status != DSA_COMP_PAGE_FAULT_NOBOF) { > + error_report("Unexpected DSA completion status = %u.", status);
Unreachable with the assert above. With that fixed: Reviewed-by: Fabiano Rosas <faro...@suse.de>