From f67432a2019caf05b57a146bf45c1024a5cb608e Mon Sep 17 00:00:00 2001 From: John Snow Date: Wed, 29 Aug 2018 21:57:26 -0400 Subject: jobs: change start callback to run callback Presently we codify the entry point for a job as the "start" callback, but a more apt name would be "run" to clarify the idea that when this function returns we consider the job to have "finished," except for any cleanup which occurs in separate callbacks later. As part of this clarification, change the signature to include an error object and a return code. The error ptr is not yet used, and the return code while captured, will be overwritten by actions in the job_completed function. Signed-off-by: John Snow Reviewed-by: Max Reitz Message-id: 20180830015734.19765-2-jsnow@redhat.com Reviewed-by: Jeff Cody Signed-off-by: Max Reitz --- include/qemu/job.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/qemu/job.h b/include/qemu/job.h index 18c9223e31..9cf463d228 100644 --- a/include/qemu/job.h +++ b/include/qemu/job.h @@ -169,7 +169,7 @@ struct JobDriver { JobType job_type; /** Mandatory: Entrypoint for the Coroutine. */ - CoroutineEntry *start; + int coroutine_fn (*run)(Job *job, Error **errp); /** * If the callback is not NULL, it will be invoked when the job transitions -- cgit v1.2.3-55-g7522 From 3d1f8b07a4c241f81949eff507d9f3a8fd73b87b Mon Sep 17 00:00:00 2001 From: John Snow Date: Wed, 29 Aug 2018 21:57:27 -0400 Subject: jobs: canonize Error object Jobs presently use both an Error object in the case of the create job, and char strings in the case of generic errors elsewhere. Unify the two paths as just j->err, and remove the extra argument from job_completed. The integer error code for job_completed is kept for now, to be removed shortly in a separate patch. Signed-off-by: John Snow Message-id: 20180830015734.19765-3-jsnow@redhat.com [mreitz: Dropped a superfluous g_strdup()] Reviewed-by: Eric Blake Signed-off-by: Max Reitz --- block/backup.c | 2 +- block/commit.c | 2 +- block/create.c | 5 ++--- block/mirror.c | 2 +- block/stream.c | 2 +- include/qemu/job.h | 14 ++++++++------ job-qmp.c | 5 +++-- job.c | 18 ++++++------------ tests/test-bdrv-drain.c | 2 +- tests/test-blockjob-txn.c | 2 +- tests/test-blockjob.c | 2 +- 11 files changed, 26 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/block/backup.c b/block/backup.c index 5d47781840..1e965d54e5 100644 --- a/block/backup.c +++ b/block/backup.c @@ -388,7 +388,7 @@ static void backup_complete(Job *job, void *opaque) { BackupCompleteData *data = opaque; - job_completed(job, data->ret, NULL); + job_completed(job, data->ret); g_free(data); } diff --git a/block/commit.c b/block/commit.c index a0ea86ff64..4a17bb73ec 100644 --- a/block/commit.c +++ b/block/commit.c @@ -117,7 +117,7 @@ static void commit_complete(Job *job, void *opaque) * bdrv_set_backing_hd() to fail. */ block_job_remove_all_bdrv(bjob); - job_completed(job, ret, NULL); + job_completed(job, ret); g_free(data); /* If bdrv_drop_intermediate() didn't already do that, remove the commit diff --git a/block/create.c b/block/create.c index 04733c3618..26a385c6c7 100644 --- a/block/create.c +++ b/block/create.c @@ -35,14 +35,13 @@ typedef struct BlockdevCreateJob { BlockDriver *drv; BlockdevCreateOptions *opts; int ret; - Error *err; } BlockdevCreateJob; static void blockdev_create_complete(Job *job, void *opaque) { BlockdevCreateJob *s = container_of(job, BlockdevCreateJob, common); - job_completed(job, s->ret, s->err); + job_completed(job, s->ret); } static int coroutine_fn blockdev_create_run(Job *job, Error **errp) @@ -50,7 +49,7 @@ static int coroutine_fn blockdev_create_run(Job *job, Error **errp) BlockdevCreateJob *s = container_of(job, BlockdevCreateJob, common); job_progress_set_remaining(&s->common, 1); - s->ret = s->drv->bdrv_co_create(s->opts, &s->err); + s->ret = s->drv->bdrv_co_create(s->opts, errp); job_progress_update(&s->common, 1); qapi_free_BlockdevCreateOptions(s->opts); diff --git a/block/mirror.c b/block/mirror.c index 691763db41..be5dc6b7b0 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -710,7 +710,7 @@ static void mirror_exit(Job *job, void *opaque) blk_insert_bs(bjob->blk, mirror_top_bs, &error_abort); bs_opaque->job = NULL; - job_completed(job, data->ret, NULL); + job_completed(job, data->ret); g_free(data); bdrv_drained_end(src); diff --git a/block/stream.c b/block/stream.c index b4b987df7e..26a775386b 100644 --- a/block/stream.c +++ b/block/stream.c @@ -93,7 +93,7 @@ out: } g_free(s->backing_file_str); - job_completed(job, data->ret, NULL); + job_completed(job, data->ret); g_free(data); } diff --git a/include/qemu/job.h b/include/qemu/job.h index 9cf463d228..e0e99870a1 100644 --- a/include/qemu/job.h +++ b/include/qemu/job.h @@ -124,12 +124,16 @@ typedef struct Job { /** Estimated progress_current value at the completion of the job */ int64_t progress_total; - /** Error string for a failed job (NULL if, and only if, job->ret == 0) */ - char *error; - /** ret code passed to job_completed. */ int ret; + /** + * Error object for a failed job. + * If job->ret is nonzero and an error object was not set, it will be set + * to strerror(-job->ret) during job_completed. + */ + Error *err; + /** The completion function that will be called when the job completes. */ BlockCompletionFunc *cb; @@ -484,15 +488,13 @@ void job_transition_to_ready(Job *job); /** * @job: The job being completed. * @ret: The status code. - * @error: The error message for a failing job (only with @ret < 0). If @ret is - * negative, but NULL is given for @error, strerror() is used. * * Marks @job as completed. If @ret is non-zero, the job transaction it is part * of is aborted. If @ret is zero, the job moves into the WAITING state. If it * is the last job to complete in its transaction, all jobs in the transaction * move from WAITING to PENDING. */ -void job_completed(Job *job, int ret, Error *error); +void job_completed(Job *job, int ret); /** Asynchronously complete the specified @job. */ void job_complete(Job *job, Error **errp); diff --git a/job-qmp.c b/job-qmp.c index 410775df61..a969b2bbf0 100644 --- a/job-qmp.c +++ b/job-qmp.c @@ -146,8 +146,9 @@ static JobInfo *job_query_single(Job *job, Error **errp) .status = job->status, .current_progress = job->progress_current, .total_progress = job->progress_total, - .has_error = !!job->error, - .error = g_strdup(job->error), + .has_error = !!job->err, + .error = job->err ? \ + g_strdup(error_get_pretty(job->err)) : NULL, }; return info; diff --git a/job.c b/job.c index 76988f6678..7b3721d2c7 100644 --- a/job.c +++ b/job.c @@ -369,7 +369,7 @@ void job_unref(Job *job) QLIST_REMOVE(job, job_list); - g_free(job->error); + error_free(job->err); g_free(job->id); g_free(job); } @@ -546,7 +546,7 @@ static void coroutine_fn job_co_entry(void *opaque) assert(job && job->driver && job->driver->run); job_pause_point(job); - job->ret = job->driver->run(job, NULL); + job->ret = job->driver->run(job, &job->err); } @@ -666,8 +666,8 @@ static void job_update_rc(Job *job) job->ret = -ECANCELED; } if (job->ret) { - if (!job->error) { - job->error = g_strdup(strerror(-job->ret)); + if (!job->err) { + error_setg(&job->err, "%s", strerror(-job->ret)); } job_state_transition(job, JOB_STATUS_ABORTING); } @@ -865,17 +865,11 @@ static void job_completed_txn_success(Job *job) } } -void job_completed(Job *job, int ret, Error *error) +void job_completed(Job *job, int ret) { assert(job && job->txn && !job_is_completed(job)); job->ret = ret; - if (error) { - assert(job->ret < 0); - job->error = g_strdup(error_get_pretty(error)); - error_free(error); - } - job_update_rc(job); trace_job_completed(job, ret, job->ret); if (job->ret) { @@ -893,7 +887,7 @@ void job_cancel(Job *job, bool force) } job_cancel_async(job, force); if (!job_started(job)) { - job_completed(job, -ECANCELED, NULL); + job_completed(job, -ECANCELED); } else if (job->deferred_to_main_loop) { job_completed_txn_abort(job); } else { diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c index 39ee723c8e..6bda4451c1 100644 --- a/tests/test-bdrv-drain.c +++ b/tests/test-bdrv-drain.c @@ -754,7 +754,7 @@ typedef struct TestBlockJob { static void test_job_completed(Job *job, void *opaque) { - job_completed(job, 0, NULL); + job_completed(job, 0); } static int coroutine_fn test_job_run(Job *job, Error **errp) diff --git a/tests/test-blockjob-txn.c b/tests/test-blockjob-txn.c index 3194924071..82cedee78b 100644 --- a/tests/test-blockjob-txn.c +++ b/tests/test-blockjob-txn.c @@ -34,7 +34,7 @@ static void test_block_job_complete(Job *job, void *opaque) rc = -ECANCELED; } - job_completed(job, rc, NULL); + job_completed(job, rc); bdrv_unref(bs); } diff --git a/tests/test-blockjob.c b/tests/test-blockjob.c index b0462bfdec..408a226939 100644 --- a/tests/test-blockjob.c +++ b/tests/test-blockjob.c @@ -167,7 +167,7 @@ static void cancel_job_completed(Job *job, void *opaque) { CancelJob *s = opaque; s->completed = true; - job_completed(job, 0, NULL); + job_completed(job, 0); } static void cancel_job_complete(Job *job, Error **errp) -- cgit v1.2.3-55-g7522 From 00359a71d45a414ee47d8e423104dc0afd24ec65 Mon Sep 17 00:00:00 2001 From: John Snow Date: Wed, 29 Aug 2018 21:57:28 -0400 Subject: jobs: add exit shim All jobs do the same thing when they leave their running loop: - Store the return code in a structure - wait to receive this structure in the main thread - signal job completion via job_completed Few jobs do anything beyond exactly this. Consolidate this exit logic for a net reduction in SLOC. More seriously, when we utilize job_defer_to_main_loop_bh to call a function that calls job_completed, job_finalize_single will run in a context where it has recursively taken the aio_context lock, which can cause hangs if it puts down a reference that causes a flush. You can observe this in practice by looking at mirror_exit's careful placement of job_completed and bdrv_unref calls. If we centralize job exiting, we can signal job completion from outside of the aio_context, which should allow for job cleanup code to run with only one lock, which makes cleanup callbacks less tricky to write. Signed-off-by: John Snow Reviewed-by: Max Reitz Message-id: 20180830015734.19765-4-jsnow@redhat.com Reviewed-by: Jeff Cody Signed-off-by: Max Reitz --- include/qemu/job.h | 11 +++++++++++ job.c | 18 ++++++++++++++++++ 2 files changed, 29 insertions(+) (limited to 'include') diff --git a/include/qemu/job.h b/include/qemu/job.h index e0e99870a1..1144d671a1 100644 --- a/include/qemu/job.h +++ b/include/qemu/job.h @@ -208,6 +208,17 @@ struct JobDriver { */ void (*drain)(Job *job); + /** + * If the callback is not NULL, exit will be invoked from the main thread + * when the job's coroutine has finished, but before transactional + * convergence; before @prepare or @abort. + * + * FIXME TODO: This callback is only temporary to transition remaining jobs + * to prepare/commit/abort/clean callbacks and will be removed before 3.1. + * is released. + */ + void (*exit)(Job *job); + /** * If the callback is not NULL, prepare will be invoked when all the jobs * belonging to the same transaction complete; or upon this job's completion diff --git a/job.c b/job.c index 7b3721d2c7..5df7791ad8 100644 --- a/job.c +++ b/job.c @@ -535,6 +535,18 @@ void job_drain(Job *job) } } +static void job_exit(void *opaque) +{ + Job *job = (Job *)opaque; + AioContext *aio_context = job->aio_context; + + if (job->driver->exit) { + aio_context_acquire(aio_context); + job->driver->exit(job); + aio_context_release(aio_context); + } + job_completed(job, job->ret); +} /** * All jobs must allow a pause point before entering their job proper. This @@ -547,6 +559,12 @@ static void coroutine_fn job_co_entry(void *opaque) assert(job && job->driver && job->driver->run); job_pause_point(job); job->ret = job->driver->run(job, &job->err); + if (!job->deferred_to_main_loop) { + job->deferred_to_main_loop = true; + aio_bh_schedule_oneshot(qemu_get_aio_context(), + job_exit, + job); + } } -- cgit v1.2.3-55-g7522 From 404ff28d6ae59fc1c24d631710d4063fc68aed03 Mon Sep 17 00:00:00 2001 From: John Snow Date: Wed, 29 Aug 2018 21:57:33 -0400 Subject: jobs: remove ret argument to job_completed; privatize it Jobs are now expected to return their retcode on the stack, from the .run callback, so we can remove that argument. job_cancel does not need to set -ECANCELED because job_completed will update the return code itself if the job was canceled. While we're here, make job_completed static to job.c and remove it from job.h; move the documentation of return code to the .run() callback and to the job->ret property, accordingly. Signed-off-by: John Snow Message-id: 20180830015734.19765-9-jsnow@redhat.com Reviewed-by: Max Reitz Signed-off-by: Max Reitz --- include/qemu/job.h | 28 +++++++++++++++------------- job.c | 11 ++++++----- trace-events | 2 +- 3 files changed, 22 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/qemu/job.h b/include/qemu/job.h index 1144d671a1..23395c17fa 100644 --- a/include/qemu/job.h +++ b/include/qemu/job.h @@ -124,7 +124,11 @@ typedef struct Job { /** Estimated progress_current value at the completion of the job */ int64_t progress_total; - /** ret code passed to job_completed. */ + /** + * Return code from @run and/or @prepare callback(s). + * Not final until the job has reached the CONCLUDED status. + * 0 on success, -errno on failure. + */ int ret; /** @@ -172,7 +176,16 @@ struct JobDriver { /** Enum describing the operation */ JobType job_type; - /** Mandatory: Entrypoint for the Coroutine. */ + /** + * Mandatory: Entrypoint for the Coroutine. + * + * This callback will be invoked when moving from CREATED to RUNNING. + * + * If this callback returns nonzero, the job transaction it is part of is + * aborted. If it returns zero, the job moves into the WAITING state. If it + * is the last job to complete in its transaction, all jobs in the + * transaction move from WAITING to PENDING. + */ int coroutine_fn (*run)(Job *job, Error **errp); /** @@ -496,17 +509,6 @@ void job_early_fail(Job *job); /** Moves the @job from RUNNING to READY */ void job_transition_to_ready(Job *job); -/** - * @job: The job being completed. - * @ret: The status code. - * - * Marks @job as completed. If @ret is non-zero, the job transaction it is part - * of is aborted. If @ret is zero, the job moves into the WAITING state. If it - * is the last job to complete in its transaction, all jobs in the transaction - * move from WAITING to PENDING. - */ -void job_completed(Job *job, int ret); - /** Asynchronously complete the specified @job. */ void job_complete(Job *job, Error **errp); diff --git a/job.c b/job.c index 5df7791ad8..37d828f964 100644 --- a/job.c +++ b/job.c @@ -535,6 +535,8 @@ void job_drain(Job *job) } } +static void job_completed(Job *job); + static void job_exit(void *opaque) { Job *job = (Job *)opaque; @@ -545,7 +547,7 @@ static void job_exit(void *opaque) job->driver->exit(job); aio_context_release(aio_context); } - job_completed(job, job->ret); + job_completed(job); } /** @@ -883,13 +885,12 @@ static void job_completed_txn_success(Job *job) } } -void job_completed(Job *job, int ret) +static void job_completed(Job *job) { assert(job && job->txn && !job_is_completed(job)); - job->ret = ret; job_update_rc(job); - trace_job_completed(job, ret, job->ret); + trace_job_completed(job, job->ret); if (job->ret) { job_completed_txn_abort(job); } else { @@ -905,7 +906,7 @@ void job_cancel(Job *job, bool force) } job_cancel_async(job, force); if (!job_started(job)) { - job_completed(job, -ECANCELED); + job_completed(job); } else if (job->deferred_to_main_loop) { job_completed_txn_abort(job); } else { diff --git a/trace-events b/trace-events index c445f54773..4fd2cb4b97 100644 --- a/trace-events +++ b/trace-events @@ -107,7 +107,7 @@ gdbstub_err_checksum_incorrect(uint8_t expected, uint8_t got) "got command packe # job.c job_state_transition(void *job, int ret, const char *legal, const char *s0, const char *s1) "job %p (ret: %d) attempting %s transition (%s-->%s)" job_apply_verb(void *job, const char *state, const char *verb, const char *legal) "job %p in state %s; applying verb %s (%s)" -job_completed(void *job, int ret, int jret) "job %p ret %d corrected ret %d" +job_completed(void *job, int ret) "job %p ret %d" # job-qmp.c qmp_job_cancel(void *job) "job %p" -- cgit v1.2.3-55-g7522 From e21a1c9831fc80ae3f3c1affdfa43350035d8588 Mon Sep 17 00:00:00 2001 From: John Snow Date: Wed, 29 Aug 2018 21:57:34 -0400 Subject: jobs: remove job_defer_to_main_loop Now that the job infrastructure is handling the job_completed call for all implemented jobs, we can remove the interface that allowed jobs to schedule their own completion. Signed-off-by: John Snow Reviewed-by: Max Reitz Message-id: 20180830015734.19765-10-jsnow@redhat.com Signed-off-by: Max Reitz --- include/qemu/job.h | 17 ----------------- job.c | 40 ++-------------------------------------- 2 files changed, 2 insertions(+), 55 deletions(-) (limited to 'include') diff --git a/include/qemu/job.h b/include/qemu/job.h index 23395c17fa..e0cff702b7 100644 --- a/include/qemu/job.h +++ b/include/qemu/job.h @@ -568,23 +568,6 @@ void job_finalize(Job *job, Error **errp); */ void job_dismiss(Job **job, Error **errp); -typedef void JobDeferToMainLoopFn(Job *job, void *opaque); - -/** - * @job: The job - * @fn: The function to run in the main loop - * @opaque: The opaque value that is passed to @fn - * - * This function must be called by the main job coroutine just before it - * returns. @fn is executed in the main loop with the job AioContext acquired. - * - * Block jobs must call bdrv_unref(), bdrv_close(), and anything that uses - * bdrv_drain_all() in the main loop. - * - * The @job AioContext is held while @fn executes. - */ -void job_defer_to_main_loop(Job *job, JobDeferToMainLoopFn *fn, void *opaque); - /** * Synchronously finishes the given @job. If @finish is given, it is called to * trigger completion or cancellation of the job. diff --git a/job.c b/job.c index 37d828f964..b960e72710 100644 --- a/job.c +++ b/job.c @@ -561,12 +561,8 @@ static void coroutine_fn job_co_entry(void *opaque) assert(job && job->driver && job->driver->run); job_pause_point(job); job->ret = job->driver->run(job, &job->err); - if (!job->deferred_to_main_loop) { - job->deferred_to_main_loop = true; - aio_bh_schedule_oneshot(qemu_get_aio_context(), - job_exit, - job); - } + job->deferred_to_main_loop = true; + aio_bh_schedule_oneshot(qemu_get_aio_context(), job_exit, job); } @@ -969,38 +965,6 @@ void job_complete(Job *job, Error **errp) job->driver->complete(job, errp); } - -typedef struct { - Job *job; - JobDeferToMainLoopFn *fn; - void *opaque; -} JobDeferToMainLoopData; - -static void job_defer_to_main_loop_bh(void *opaque) -{ - JobDeferToMainLoopData *data = opaque; - Job *job = data->job; - AioContext *aio_context = job->aio_context; - - aio_context_acquire(aio_context); - data->fn(data->job, data->opaque); - aio_context_release(aio_context); - - g_free(data); -} - -void job_defer_to_main_loop(Job *job, JobDeferToMainLoopFn *fn, void *opaque) -{ - JobDeferToMainLoopData *data = g_malloc(sizeof(*data)); - data->job = job; - data->fn = fn; - data->opaque = opaque; - job->deferred_to_main_loop = true; - - aio_bh_schedule_oneshot(qemu_get_aio_context(), - job_defer_to_main_loop_bh, data); -} - int job_finish_sync(Job *job, void (*finish)(Job *, Error **errp), Error **errp) { Error *local_err = NULL; -- cgit v1.2.3-55-g7522