1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
|
/*
* QEMU backup
*
* Copyright (C) 2013 Proxmox Server Solutions
* Copyright (c) 2019 Virtuozzo International GmbH.
*
* Authors:
* Dietmar Maurer (dietmar@proxmox.com)
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*
*/
#include "qemu/osdep.h"
#include "trace.h"
#include "block/block.h"
#include "block/block_int.h"
#include "block/blockjob_int.h"
#include "block/block_backup.h"
#include "block/block-copy.h"
#include "qapi/error.h"
#include "qapi/qmp/qerror.h"
#include "qemu/cutils.h"
#include "sysemu/block-backend.h"
#include "qemu/bitmap.h"
#include "qemu/error-report.h"
#include "block/backup-top.h"
#define BACKUP_CLUSTER_SIZE_DEFAULT (1 << 16)
typedef struct BackupBlockJob {
BlockJob common;
BlockDriverState *backup_top;
BlockDriverState *source_bs;
BdrvDirtyBitmap *sync_bitmap;
MirrorSyncMode sync_mode;
BitmapSyncMode bitmap_mode;
BlockdevOnError on_source_error;
BlockdevOnError on_target_error;
uint64_t len;
int64_t cluster_size;
BackupPerf perf;
BlockCopyState *bcs;
bool wait;
BlockCopyCallState *bg_bcs_call;
} BackupBlockJob;
static const BlockJobDriver backup_job_driver;
static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret)
{
BdrvDirtyBitmap *bm;
bool sync = (((ret == 0) || (job->bitmap_mode == BITMAP_SYNC_MODE_ALWAYS)) \
&& (job->bitmap_mode != BITMAP_SYNC_MODE_NEVER));
if (sync) {
/*
* We succeeded, or we always intended to sync the bitmap.
* Delete this bitmap and install the child.
*/
bm = bdrv_dirty_bitmap_abdicate(job->sync_bitmap, NULL);
} else {
/*
* We failed, or we never intended to sync the bitmap anyway.
* Merge the successor back into the parent, keeping all data.
*/
bm = bdrv_reclaim_dirty_bitmap(job->sync_bitmap, NULL);
}
assert(bm);
if (ret < 0 && job->bitmap_mode == BITMAP_SYNC_MODE_ALWAYS) {
/* If we failed and synced, merge in the bits we didn't copy: */
bdrv_dirty_bitmap_merge_internal(bm, block_copy_dirty_bitmap(job->bcs),
NULL, true);
}
}
static void backup_commit(Job *job)
{
BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
if (s->sync_bitmap) {
backup_cleanup_sync_bitmap(s, 0);
}
}
static void backup_abort(Job *job)
{
BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
if (s->sync_bitmap) {
backup_cleanup_sync_bitmap(s, -1);
}
}
static void backup_clean(Job *job)
{
BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
bdrv_backup_top_drop(s->backup_top);
}
void backup_do_checkpoint(BlockJob *job, Error **errp)
{
BackupBlockJob *backup_job = container_of(job, BackupBlockJob, common);
assert(block_job_driver(job) == &backup_job_driver);
if (backup_job->sync_mode != MIRROR_SYNC_MODE_NONE) {
error_setg(errp, "The backup job only supports block checkpoint in"
" sync=none mode");
return;
}
bdrv_set_dirty_bitmap(block_copy_dirty_bitmap(backup_job->bcs), 0,
backup_job->len);
}
static BlockErrorAction backup_error_action(BackupBlockJob *job,
bool read, int error)
{
if (read) {
return block_job_error_action(&job->common, job->on_source_error,
true, error);
} else {
return block_job_error_action(&job->common, job->on_target_error,
false, error);
}
}
static void coroutine_fn backup_block_copy_callback(void *opaque)
{
BackupBlockJob *s = opaque;
if (s->wait) {
s->wait = false;
aio_co_wake(s->common.job.co);
} else {
job_enter(&s->common.job);
}
}
static int coroutine_fn backup_loop(BackupBlockJob *job)
{
BlockCopyCallState *s = NULL;
int ret = 0;
bool error_is_read;
BlockErrorAction act;
while (true) { /* retry loop */
job->bg_bcs_call = s = block_copy_async(job->bcs, 0,
QEMU_ALIGN_UP(job->len, job->cluster_size),
job->perf.max_workers, job->perf.max_chunk,
backup_block_copy_callback, job);
while (!block_copy_call_finished(s) &&
!job_is_cancelled(&job->common.job))
{
job_yield(&job->common.job);
}
if (!block_copy_call_finished(s)) {
assert(job_is_cancelled(&job->common.job));
/*
* Note that we can't use job_yield() here, as it doesn't work for
* cancelled job.
*/
block_copy_call_cancel(s);
job->wait = true;
qemu_coroutine_yield();
assert(block_copy_call_finished(s));
ret = 0;
goto out;
}
if (job_is_cancelled(&job->common.job) ||
block_copy_call_succeeded(s))
{
ret = 0;
goto out;
}
if (block_copy_call_cancelled(s)) {
/*
* Job is not cancelled but only block-copy call. This is possible
* after job pause. Now the pause is finished, start new block-copy
* iteration.
*/
block_copy_call_free(s);
continue;
}
/* The only remaining case is failed block-copy call. */
assert(block_copy_call_failed(s));
ret = block_copy_call_status(s, &error_is_read);
act = backup_error_action(job, error_is_read, -ret);
switch (act) {
case BLOCK_ERROR_ACTION_REPORT:
goto out;
case BLOCK_ERROR_ACTION_STOP:
/*
* Go to pause prior to starting new block-copy call on the next
* iteration.
*/
job_pause_point(&job->common.job);
break;
case BLOCK_ERROR_ACTION_IGNORE:
/* Proceed to new block-copy call to retry. */
break;
default:
abort();
}
block_copy_call_free(s);
}
out:
block_copy_call_free(s);
job->bg_bcs_call = NULL;
return ret;
}
static void backup_init_bcs_bitmap(BackupBlockJob *job)
{
bool ret;
uint64_t estimate;
BdrvDirtyBitmap *bcs_bitmap = block_copy_dirty_bitmap(job->bcs);
if (job->sync_mode == MIRROR_SYNC_MODE_BITMAP) {
ret = bdrv_dirty_bitmap_merge_internal(bcs_bitmap, job->sync_bitmap,
NULL, true);
assert(ret);
} else {
if (job->sync_mode == MIRROR_SYNC_MODE_TOP) {
/*
* We can't hog the coroutine to initialize this thoroughly.
* Set a flag and resume work when we are able to yield safely.
*/
block_copy_set_skip_unallocated(job->bcs, true);
}
bdrv_set_dirty_bitmap(bcs_bitmap, 0, job->len);
}
estimate = bdrv_get_dirty_count(bcs_bitmap);
job_progress_set_remaining(&job->common.job, estimate);
}
static int coroutine_fn backup_run(Job *job, Error **errp)
{
BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
int ret;
backup_init_bcs_bitmap(s);
if (s->sync_mode == MIRROR_SYNC_MODE_TOP) {
int64_t offset = 0;
int64_t count;
for (offset = 0; offset < s->len; ) {
if (job_is_cancelled(job)) {
return -ECANCELED;
}
job_pause_point(job);
if (job_is_cancelled(job)) {
return -ECANCELED;
}
ret = block_copy_reset_unallocated(s->bcs, offset, &count);
if (ret < 0) {
return ret;
}
offset += count;
}
block_copy_set_skip_unallocated(s->bcs, false);
}
if (s->sync_mode == MIRROR_SYNC_MODE_NONE) {
/*
* All bits are set in bcs bitmap to allow any cluster to be copied.
* This does not actually require them to be copied.
*/
while (!job_is_cancelled(job)) {
/*
* Yield until the job is cancelled. We just let our before_write
* notify callback service CoW requests.
*/
job_yield(job);
}
} else {
return backup_loop(s);
}
return 0;
}
static void coroutine_fn backup_pause(Job *job)
{
BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
if (s->bg_bcs_call && !block_copy_call_finished(s->bg_bcs_call)) {
block_copy_call_cancel(s->bg_bcs_call);
s->wait = true;
qemu_coroutine_yield();
}
}
static void coroutine_fn backup_set_speed(BlockJob *job, int64_t speed)
{
BackupBlockJob *s = container_of(job, BackupBlockJob, common);
/*
* block_job_set_speed() is called first from block_job_create(), when we
* don't yet have s->bcs.
*/
if (s->bcs) {
block_copy_set_speed(s->bcs, speed);
if (s->bg_bcs_call) {
block_copy_kick(s->bg_bcs_call);
}
}
}
static const BlockJobDriver backup_job_driver = {
.job_driver = {
.instance_size = sizeof(BackupBlockJob),
.job_type = JOB_TYPE_BACKUP,
.free = block_job_free,
.user_resume = block_job_user_resume,
.run = backup_run,
.commit = backup_commit,
.abort = backup_abort,
.clean = backup_clean,
.pause = backup_pause,
},
.set_speed = backup_set_speed,
};
static int64_t backup_calculate_cluster_size(BlockDriverState *target,
Error **errp)
{
int ret;
BlockDriverInfo bdi;
bool target_does_cow = bdrv_backing_chain_next(target);
/*
* If there is no backing file on the target, we cannot rely on COW if our
* backup cluster size is smaller than the target cluster size. Even for
* targets with a backing file, try to avoid COW if possible.
*/
ret = bdrv_get_info(target, &bdi);
if (ret == -ENOTSUP && !target_does_cow) {
/* Cluster size is not defined */
warn_report("The target block device doesn't provide "
"information about the block size and it doesn't have a "
"backing file. The default block size of %u bytes is "
"used. If the actual block size of the target exceeds "
"this default, the backup may be unusable",
BACKUP_CLUSTER_SIZE_DEFAULT);
return BACKUP_CLUSTER_SIZE_DEFAULT;
} else if (ret < 0 && !target_does_cow) {
error_setg_errno(errp, -ret,
"Couldn't determine the cluster size of the target image, "
"which has no backing file");
error_append_hint(errp,
"Aborting, since this may create an unusable destination image\n");
return ret;
} else if (ret < 0 && target_does_cow) {
/* Not fatal; just trudge on ahead. */
return BACKUP_CLUSTER_SIZE_DEFAULT;
}
return MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
}
BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
BlockDriverState *target, int64_t speed,
MirrorSyncMode sync_mode, BdrvDirtyBitmap *sync_bitmap,
BitmapSyncMode bitmap_mode,
bool compress,
const char *filter_node_name,
BackupPerf *perf,
BlockdevOnError on_source_error,
BlockdevOnError on_target_error,
int creation_flags,
BlockCompletionFunc *cb, void *opaque,
JobTxn *txn, Error **errp)
{
int64_t len, target_len;
BackupBlockJob *job = NULL;
int64_t cluster_size;
BdrvRequestFlags write_flags;
BlockDriverState *backup_top = NULL;
BlockCopyState *bcs = NULL;
assert(bs);
assert(target);
/* QMP interface protects us from these cases */
assert(sync_mode != MIRROR_SYNC_MODE_INCREMENTAL);
assert(sync_bitmap || sync_mode != MIRROR_SYNC_MODE_BITMAP);
if (bs == target) {
error_setg(errp, "Source and target cannot be the same");
return NULL;
}
if (!bdrv_is_inserted(bs)) {
error_setg(errp, "Device is not inserted: %s",
bdrv_get_device_name(bs));
return NULL;
}
if (!bdrv_is_inserted(target)) {
error_setg(errp, "Device is not inserted: %s",
bdrv_get_device_name(target));
return NULL;
}
if (compress && !bdrv_supports_compressed_writes(target)) {
error_setg(errp, "Compression is not supported for this drive %s",
bdrv_get_device_name(target));
return NULL;
}
if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) {
return NULL;
}
if (bdrv_op_is_blocked(target, BLOCK_OP_TYPE_BACKUP_TARGET, errp)) {
return NULL;
}
cluster_size = backup_calculate_cluster_size(target, errp);
if (cluster_size < 0) {
goto error;
}
if (perf->max_workers < 1) {
error_setg(errp, "max-workers must be greater than zero");
return NULL;
}
if (perf->max_chunk < 0) {
error_setg(errp, "max-chunk must be zero (which means no limit) or "
"positive");
return NULL;
}
if (perf->max_chunk && perf->max_chunk < cluster_size) {
error_setg(errp, "Required max-chunk (%" PRIi64 ") is less than backup "
"cluster size (%" PRIi64 ")", perf->max_chunk, cluster_size);
return NULL;
}
if (sync_bitmap) {
/* If we need to write to this bitmap, check that we can: */
if (bitmap_mode != BITMAP_SYNC_MODE_NEVER &&
bdrv_dirty_bitmap_check(sync_bitmap, BDRV_BITMAP_DEFAULT, errp)) {
return NULL;
}
/* Create a new bitmap, and freeze/disable this one. */
if (bdrv_dirty_bitmap_create_successor(sync_bitmap, errp) < 0) {
return NULL;
}
}
len = bdrv_getlength(bs);
if (len < 0) {
error_setg_errno(errp, -len, "Unable to get length for '%s'",
bdrv_get_device_or_node_name(bs));
goto error;
}
target_len = bdrv_getlength(target);
if (target_len < 0) {
error_setg_errno(errp, -target_len, "Unable to get length for '%s'",
bdrv_get_device_or_node_name(bs));
goto error;
}
if (target_len != len) {
error_setg(errp, "Source and target image have different sizes");
goto error;
}
/*
* If source is in backing chain of target assume that target is going to be
* used for "image fleecing", i.e. it should represent a kind of snapshot of
* source at backup-start point in time. And target is going to be read by
* somebody (for example, used as NBD export) during backup job.
*
* In this case, we need to add BDRV_REQ_SERIALISING write flag to avoid
* intersection of backup writes and third party reads from target,
* otherwise reading from target we may occasionally read already updated by
* guest data.
*
* For more information see commit f8d59dfb40bb and test
* tests/qemu-iotests/222
*/
write_flags = (bdrv_chain_contains(target, bs) ? BDRV_REQ_SERIALISING : 0) |
(compress ? BDRV_REQ_WRITE_COMPRESSED : 0),
backup_top = bdrv_backup_top_append(bs, target, filter_node_name,
cluster_size, perf,
write_flags, &bcs, errp);
if (!backup_top) {
goto error;
}
/* job->len is fixed, so we can't allow resize */
job = block_job_create(job_id, &backup_job_driver, txn, backup_top,
0, BLK_PERM_ALL,
speed, creation_flags, cb, opaque, errp);
if (!job) {
goto error;
}
job->backup_top = backup_top;
job->source_bs = bs;
job->on_source_error = on_source_error;
job->on_target_error = on_target_error;
job->sync_mode = sync_mode;
job->sync_bitmap = sync_bitmap;
job->bitmap_mode = bitmap_mode;
job->bcs = bcs;
job->cluster_size = cluster_size;
job->len = len;
job->perf = *perf;
block_copy_set_progress_meter(bcs, &job->common.job.progress);
block_copy_set_speed(bcs, speed);
/* Required permissions are already taken by backup-top target */
block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL,
&error_abort);
return &job->common;
error:
if (sync_bitmap) {
bdrv_reclaim_dirty_bitmap(sync_bitmap, NULL);
}
if (backup_top) {
bdrv_backup_top_drop(backup_top);
}
return NULL;
}
|