diff options
author | Max Reitz | 2018-09-25 16:12:41 +0200 |
---|---|---|
committer | Max Reitz | 2018-09-25 16:12:44 +0200 |
commit | 9c76ff9c16be890e70fce30754b096ff9950d1ee (patch) | |
tree | 358214b68c3d62d3943b80c22d744d0e32764475 /job.c | |
parent | blockdev: document transactional shortcomings (diff) | |
parent | test-bdrv-drain: Test draining job source child and parent (diff) | |
download | qemu-9c76ff9c16be890e70fce30754b096ff9950d1ee.tar.gz qemu-9c76ff9c16be890e70fce30754b096ff9950d1ee.tar.xz qemu-9c76ff9c16be890e70fce30754b096ff9950d1ee.zip |
Merge remote-tracking branch 'kevin/tags/for-upstream' into block
Block layer patches:
- Fix some jobs/drain/aio_poll related hangs
- commit: Add top-node/base-node options
- linux-aio: Fix locking for qemu_laio_process_completions()
- Fix use after free error in bdrv_open_inherit
# gpg: Signature made Tue Sep 25 15:54:01 2018 CEST
# gpg: using RSA key 7F09B272C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>"
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74 56FE 7F09 B272 C88F 2FD6
* kevin/tags/for-upstream: (26 commits)
test-bdrv-drain: Test draining job source child and parent
block: Use a single global AioWait
test-bdrv-drain: Fix outdated comments
test-bdrv-drain: AIO_WAIT_WHILE() in job .commit/.abort
job: Avoid deadlocks in job_completed_txn_abort()
test-bdrv-drain: Test nested poll in bdrv_drain_poll_top_level()
block: Remove aio_poll() in bdrv_drain_poll variants
blockjob: Lie better in child_job_drained_poll()
block-backend: Decrease in_flight only after callback
block-backend: Fix potential double blk_delete()
block-backend: Add .drained_poll callback
block: Add missing locking in bdrv_co_drain_bh_cb()
test-bdrv-drain: Test AIO_WAIT_WHILE() in completion callback
job: Use AIO_WAIT_WHILE() in job_finish_sync()
test-blockjob: Acquire AioContext around job_cancel_sync()
test-bdrv-drain: Drain with block jobs in an I/O thread
aio-wait: Increase num_waiters even in home thread
blockjob: Wake up BDS when job becomes idle
job: Fix missing locking due to mismerge
job: Fix nested aio_poll() hanging in job_txn_apply
...
Signed-off-by: Max Reitz <mreitz@redhat.com>
Diffstat (limited to 'job.c')
-rw-r--r-- | job.c | 67 |
1 files changed, 41 insertions, 26 deletions
@@ -29,6 +29,7 @@ #include "qemu/job.h" #include "qemu/id.h" #include "qemu/main-loop.h" +#include "block/aio-wait.h" #include "trace-root.h" #include "qapi/qapi-events-job.h" @@ -136,21 +137,13 @@ static void job_txn_del_job(Job *job) } } -static int job_txn_apply(JobTxn *txn, int fn(Job *), bool lock) +static int job_txn_apply(JobTxn *txn, int fn(Job *)) { - AioContext *ctx; Job *job, *next; int rc = 0; QLIST_FOREACH_SAFE(job, &txn->jobs, txn_list, next) { - if (lock) { - ctx = job->aio_context; - aio_context_acquire(ctx); - } rc = fn(job); - if (lock) { - aio_context_release(ctx); - } if (rc) { break; } @@ -410,6 +403,11 @@ static void job_event_ready(Job *job) notifier_list_notify(&job->on_ready, job); } +static void job_event_idle(Job *job) +{ + notifier_list_notify(&job->on_idle, job); +} + void job_enter_cond(Job *job, bool(*fn)(Job *job)) { if (!job_started(job)) { @@ -455,6 +453,7 @@ static void coroutine_fn job_do_yield(Job *job, uint64_t ns) timer_mod(&job->sleep_timer, ns); } job->busy = false; + job_event_idle(job); job_unlock(); qemu_coroutine_yield(); @@ -719,6 +718,7 @@ static void job_cancel_async(Job *job, bool force) static void job_completed_txn_abort(Job *job) { + AioContext *outer_ctx = job->aio_context; AioContext *ctx; JobTxn *txn = job->txn; Job *other_job; @@ -732,23 +732,26 @@ static void job_completed_txn_abort(Job *job) txn->aborting = true; job_txn_ref(txn); - /* We are the first failed job. Cancel other jobs. */ - QLIST_FOREACH(other_job, &txn->jobs, txn_list) { - ctx = other_job->aio_context; - aio_context_acquire(ctx); - } + /* We can only hold the single job's AioContext lock while calling + * job_finalize_single() because the finalization callbacks can involve + * calls of AIO_WAIT_WHILE(), which could deadlock otherwise. */ + aio_context_release(outer_ctx); /* Other jobs are effectively cancelled by us, set the status for * them; this job, however, may or may not be cancelled, depending * on the caller, so leave it. */ QLIST_FOREACH(other_job, &txn->jobs, txn_list) { if (other_job != job) { + ctx = other_job->aio_context; + aio_context_acquire(ctx); job_cancel_async(other_job, false); + aio_context_release(ctx); } } while (!QLIST_EMPTY(&txn->jobs)) { other_job = QLIST_FIRST(&txn->jobs); ctx = other_job->aio_context; + aio_context_acquire(ctx); if (!job_is_completed(other_job)) { assert(job_is_cancelled(other_job)); job_finish_sync(other_job, NULL, NULL); @@ -757,6 +760,8 @@ static void job_completed_txn_abort(Job *job) aio_context_release(ctx); } + aio_context_acquire(outer_ctx); + job_txn_unref(txn); } @@ -780,11 +785,11 @@ static void job_do_finalize(Job *job) assert(job && job->txn); /* prepare the transaction to complete */ - rc = job_txn_apply(job->txn, job_prepare, true); + rc = job_txn_apply(job->txn, job_prepare); if (rc) { job_completed_txn_abort(job); } else { - job_txn_apply(job->txn, job_finalize_single, true); + job_txn_apply(job->txn, job_finalize_single); } } @@ -830,10 +835,10 @@ static void job_completed_txn_success(Job *job) assert(other_job->ret == 0); } - job_txn_apply(txn, job_transition_to_pending, false); + job_txn_apply(txn, job_transition_to_pending); /* If no jobs need manual finalization, automatically do so */ - if (job_txn_apply(txn, job_needs_finalize, false) == 0) { + if (job_txn_apply(txn, job_needs_finalize) == 0) { job_do_finalize(job); } } @@ -855,7 +860,20 @@ static void job_completed(Job *job) static void job_exit(void *opaque) { Job *job = (Job *)opaque; + AioContext *ctx = job->aio_context; + + aio_context_acquire(ctx); + + /* This is a lie, we're not quiescent, but still doing the completion + * callbacks. However, completion callbacks tend to involve operations that + * drain block nodes, and if .drained_poll still returned true, we would + * deadlock. */ + job->busy = false; + job_event_idle(job); + job_completed(job); + + aio_context_release(ctx); } /** @@ -870,6 +888,7 @@ static void coroutine_fn job_co_entry(void *opaque) job_pause_point(job); job->ret = job->driver->run(job, &job->err); job->deferred_to_main_loop = true; + job->busy = true; aio_bh_schedule_oneshot(qemu_get_aio_context(), job_exit, job); } @@ -971,14 +990,10 @@ int job_finish_sync(Job *job, void (*finish)(Job *, Error **errp), Error **errp) job_unref(job); return -EBUSY; } - /* job_drain calls job_enter, and it should be enough to induce progress - * until the job completes or moves to the main thread. */ - while (!job->deferred_to_main_loop && !job_is_completed(job)) { - job_drain(job); - } - while (!job_is_completed(job)) { - aio_poll(qemu_get_aio_context(), true); - } + + AIO_WAIT_WHILE(job->aio_context, + (job_drain(job), !job_is_completed(job))); + ret = (job_is_cancelled(job) && job->ret == 0) ? -ECANCELED : job->ret; job_unref(job); return ret; |