summaryrefslogtreecommitdiffstats
path: root/job.c
diff options
context:
space:
mode:
authorMax Reitz2018-09-25 16:12:41 +0200
committerMax Reitz2018-09-25 16:12:44 +0200
commit9c76ff9c16be890e70fce30754b096ff9950d1ee (patch)
tree358214b68c3d62d3943b80c22d744d0e32764475 /job.c
parentblockdev: document transactional shortcomings (diff)
parenttest-bdrv-drain: Test draining job source child and parent (diff)
downloadqemu-9c76ff9c16be890e70fce30754b096ff9950d1ee.tar.gz
qemu-9c76ff9c16be890e70fce30754b096ff9950d1ee.tar.xz
qemu-9c76ff9c16be890e70fce30754b096ff9950d1ee.zip
Merge remote-tracking branch 'kevin/tags/for-upstream' into block
Block layer patches: - Fix some jobs/drain/aio_poll related hangs - commit: Add top-node/base-node options - linux-aio: Fix locking for qemu_laio_process_completions() - Fix use after free error in bdrv_open_inherit # gpg: Signature made Tue Sep 25 15:54:01 2018 CEST # gpg: using RSA key 7F09B272C88F2FD6 # gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>" # Primary key fingerprint: DC3D EB15 9A9A F95D 3D74 56FE 7F09 B272 C88F 2FD6 * kevin/tags/for-upstream: (26 commits) test-bdrv-drain: Test draining job source child and parent block: Use a single global AioWait test-bdrv-drain: Fix outdated comments test-bdrv-drain: AIO_WAIT_WHILE() in job .commit/.abort job: Avoid deadlocks in job_completed_txn_abort() test-bdrv-drain: Test nested poll in bdrv_drain_poll_top_level() block: Remove aio_poll() in bdrv_drain_poll variants blockjob: Lie better in child_job_drained_poll() block-backend: Decrease in_flight only after callback block-backend: Fix potential double blk_delete() block-backend: Add .drained_poll callback block: Add missing locking in bdrv_co_drain_bh_cb() test-bdrv-drain: Test AIO_WAIT_WHILE() in completion callback job: Use AIO_WAIT_WHILE() in job_finish_sync() test-blockjob: Acquire AioContext around job_cancel_sync() test-bdrv-drain: Drain with block jobs in an I/O thread aio-wait: Increase num_waiters even in home thread blockjob: Wake up BDS when job becomes idle job: Fix missing locking due to mismerge job: Fix nested aio_poll() hanging in job_txn_apply ... Signed-off-by: Max Reitz <mreitz@redhat.com>
Diffstat (limited to 'job.c')
-rw-r--r--job.c67
1 files changed, 41 insertions, 26 deletions
diff --git a/job.c b/job.c
index 192f168423..c65e01bbfa 100644
--- a/job.c
+++ b/job.c
@@ -29,6 +29,7 @@
#include "qemu/job.h"
#include "qemu/id.h"
#include "qemu/main-loop.h"
+#include "block/aio-wait.h"
#include "trace-root.h"
#include "qapi/qapi-events-job.h"
@@ -136,21 +137,13 @@ static void job_txn_del_job(Job *job)
}
}
-static int job_txn_apply(JobTxn *txn, int fn(Job *), bool lock)
+static int job_txn_apply(JobTxn *txn, int fn(Job *))
{
- AioContext *ctx;
Job *job, *next;
int rc = 0;
QLIST_FOREACH_SAFE(job, &txn->jobs, txn_list, next) {
- if (lock) {
- ctx = job->aio_context;
- aio_context_acquire(ctx);
- }
rc = fn(job);
- if (lock) {
- aio_context_release(ctx);
- }
if (rc) {
break;
}
@@ -410,6 +403,11 @@ static void job_event_ready(Job *job)
notifier_list_notify(&job->on_ready, job);
}
+static void job_event_idle(Job *job)
+{
+ notifier_list_notify(&job->on_idle, job);
+}
+
void job_enter_cond(Job *job, bool(*fn)(Job *job))
{
if (!job_started(job)) {
@@ -455,6 +453,7 @@ static void coroutine_fn job_do_yield(Job *job, uint64_t ns)
timer_mod(&job->sleep_timer, ns);
}
job->busy = false;
+ job_event_idle(job);
job_unlock();
qemu_coroutine_yield();
@@ -719,6 +718,7 @@ static void job_cancel_async(Job *job, bool force)
static void job_completed_txn_abort(Job *job)
{
+ AioContext *outer_ctx = job->aio_context;
AioContext *ctx;
JobTxn *txn = job->txn;
Job *other_job;
@@ -732,23 +732,26 @@ static void job_completed_txn_abort(Job *job)
txn->aborting = true;
job_txn_ref(txn);
- /* We are the first failed job. Cancel other jobs. */
- QLIST_FOREACH(other_job, &txn->jobs, txn_list) {
- ctx = other_job->aio_context;
- aio_context_acquire(ctx);
- }
+ /* We can only hold the single job's AioContext lock while calling
+ * job_finalize_single() because the finalization callbacks can involve
+ * calls of AIO_WAIT_WHILE(), which could deadlock otherwise. */
+ aio_context_release(outer_ctx);
/* Other jobs are effectively cancelled by us, set the status for
* them; this job, however, may or may not be cancelled, depending
* on the caller, so leave it. */
QLIST_FOREACH(other_job, &txn->jobs, txn_list) {
if (other_job != job) {
+ ctx = other_job->aio_context;
+ aio_context_acquire(ctx);
job_cancel_async(other_job, false);
+ aio_context_release(ctx);
}
}
while (!QLIST_EMPTY(&txn->jobs)) {
other_job = QLIST_FIRST(&txn->jobs);
ctx = other_job->aio_context;
+ aio_context_acquire(ctx);
if (!job_is_completed(other_job)) {
assert(job_is_cancelled(other_job));
job_finish_sync(other_job, NULL, NULL);
@@ -757,6 +760,8 @@ static void job_completed_txn_abort(Job *job)
aio_context_release(ctx);
}
+ aio_context_acquire(outer_ctx);
+
job_txn_unref(txn);
}
@@ -780,11 +785,11 @@ static void job_do_finalize(Job *job)
assert(job && job->txn);
/* prepare the transaction to complete */
- rc = job_txn_apply(job->txn, job_prepare, true);
+ rc = job_txn_apply(job->txn, job_prepare);
if (rc) {
job_completed_txn_abort(job);
} else {
- job_txn_apply(job->txn, job_finalize_single, true);
+ job_txn_apply(job->txn, job_finalize_single);
}
}
@@ -830,10 +835,10 @@ static void job_completed_txn_success(Job *job)
assert(other_job->ret == 0);
}
- job_txn_apply(txn, job_transition_to_pending, false);
+ job_txn_apply(txn, job_transition_to_pending);
/* If no jobs need manual finalization, automatically do so */
- if (job_txn_apply(txn, job_needs_finalize, false) == 0) {
+ if (job_txn_apply(txn, job_needs_finalize) == 0) {
job_do_finalize(job);
}
}
@@ -855,7 +860,20 @@ static void job_completed(Job *job)
static void job_exit(void *opaque)
{
Job *job = (Job *)opaque;
+ AioContext *ctx = job->aio_context;
+
+ aio_context_acquire(ctx);
+
+ /* This is a lie, we're not quiescent, but still doing the completion
+ * callbacks. However, completion callbacks tend to involve operations that
+ * drain block nodes, and if .drained_poll still returned true, we would
+ * deadlock. */
+ job->busy = false;
+ job_event_idle(job);
+
job_completed(job);
+
+ aio_context_release(ctx);
}
/**
@@ -870,6 +888,7 @@ static void coroutine_fn job_co_entry(void *opaque)
job_pause_point(job);
job->ret = job->driver->run(job, &job->err);
job->deferred_to_main_loop = true;
+ job->busy = true;
aio_bh_schedule_oneshot(qemu_get_aio_context(), job_exit, job);
}
@@ -971,14 +990,10 @@ int job_finish_sync(Job *job, void (*finish)(Job *, Error **errp), Error **errp)
job_unref(job);
return -EBUSY;
}
- /* job_drain calls job_enter, and it should be enough to induce progress
- * until the job completes or moves to the main thread. */
- while (!job->deferred_to_main_loop && !job_is_completed(job)) {
- job_drain(job);
- }
- while (!job_is_completed(job)) {
- aio_poll(qemu_get_aio_context(), true);
- }
+
+ AIO_WAIT_WHILE(job->aio_context,
+ (job_drain(job), !job_is_completed(job)));
+
ret = (job_is_cancelled(job) && job->ret == 0) ? -ECANCELED : job->ret;
job_unref(job);
return ret;