From 97ec9117c346239fc5b0f6d1973111e8ca370087 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Fri, 8 Sep 2017 17:44:57 +0800 Subject: file-posix: Clear out first sector in hdev_create People get surprised when, after "qemu-img create -f raw /dev/sdX", they still see qcow2 with "qemu-img info", if previously the bdev had a qcow2 header. While this is natural because raw doesn't need to write any magic bytes during creation, hdev_create is free to clear out the first sector to make sure the stale qcow2 header doesn't cause such confusion. Signed-off-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/file-posix.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'block') diff --git a/block/file-posix.c b/block/file-posix.c index ab12a2b591..36ee89e940 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -2730,6 +2730,16 @@ static int hdev_create(const char *filename, QemuOpts *opts, ret = -ENOSPC; } + if (!ret && total_size) { + uint8_t buf[BDRV_SECTOR_SIZE] = { 0 }; + int64_t zero_size = MIN(BDRV_SECTOR_SIZE, total_size); + if (lseek(fd, 0, SEEK_SET) == -1) { + ret = -errno; + } else { + ret = qemu_write_full(fd, buf, zero_size); + ret = ret == zero_size ? 0 : -errno; + } + } qemu_close(fd); return ret; } -- cgit v1.2.3-55-g7522 From 43a5dc02fd6070827d5c4ff652b885219fa8cbe1 Mon Sep 17 00:00:00 2001 From: Manos Pitsidianakis Date: Mon, 18 Sep 2017 23:25:29 +0300 Subject: block/throttle-groups.c: allocate RestartData on the heap RestartData is the opaque data of the throttle_group_restart_queue_entry coroutine. By being stack allocated, it isn't available anymore if aio_co_enter schedules the coroutine with a bottom half and runs after throttle_group_restart_queue returns. Cc: qemu-stable@nongnu.org Signed-off-by: Manos Pitsidianakis Reviewed-by: Eric Blake Reviewed-by: Alberto Garcia Signed-off-by: Kevin Wolf --- block/throttle-groups.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'block') diff --git a/block/throttle-groups.c b/block/throttle-groups.c index 6ba992c8d7..b291a88481 100644 --- a/block/throttle-groups.c +++ b/block/throttle-groups.c @@ -403,17 +403,19 @@ static void coroutine_fn throttle_group_restart_queue_entry(void *opaque) schedule_next_request(tgm, is_write); qemu_mutex_unlock(&tg->lock); } + + g_free(data); } static void throttle_group_restart_queue(ThrottleGroupMember *tgm, bool is_write) { Coroutine *co; - RestartData rd = { - .tgm = tgm, - .is_write = is_write - }; + RestartData *rd = g_new0(RestartData, 1); + + rd->tgm = tgm; + rd->is_write = is_write; - co = qemu_coroutine_create(throttle_group_restart_queue_entry, &rd); + co = qemu_coroutine_create(throttle_group_restart_queue_entry, rd); aio_co_enter(tgm->aio_context, co); } -- cgit v1.2.3-55-g7522 From 7a6ab45e19b615b9285b9cfa2bbc1fee012bc8d7 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Wed, 13 Sep 2017 12:21:28 +0200 Subject: block: Clean up some bad code in the vvfat driver Remove the unnecessary home-grown redefinition of the assert() macro here, and remove the unusable debug code at the end of the checkpoint() function. The code there uses assert() with side-effects (assignment to the "mapping" variable), which should be avoided. Looking more closely, it seems as it is apparently also only usable for one certain directory layout (with a file named USB.H in it) and thus is of no use for the rest of the world. Signed-off-by: Thomas Huth Reviewed-by: John Snow Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- block/vvfat.c | 26 ++------------------------ 1 file changed, 2 insertions(+), 24 deletions(-) (limited to 'block') diff --git a/block/vvfat.c b/block/vvfat.c index cbabb36f62..1d6e7087a8 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -57,15 +57,6 @@ static void checkpoint(void); -#ifdef __MINGW32__ -void nonono(const char* file, int line, const char* msg) { - fprintf(stderr, "Nonono! %s:%d %s\n", file, line, msg); - exit(-5); -} -#undef assert -#define assert(a) do {if (!(a)) nonono(__FILE__, __LINE__, #a);}while(0) -#endif - #else #define DLOG(a) @@ -3270,24 +3261,11 @@ static void bdrv_vvfat_init(void) block_init(bdrv_vvfat_init); #ifdef DEBUG -static void checkpoint(void) { +static void checkpoint(void) +{ assert(((mapping_t*)array_get(&(vvv->mapping), 0))->end == 2); check1(vvv); check2(vvv); assert(!vvv->current_mapping || vvv->current_fd || (vvv->current_mapping->mode & MODE_DIRECTORY)); -#if 0 - if (((direntry_t*)vvv->directory.pointer)[1].attributes != 0xf) - fprintf(stderr, "Nonono!\n"); - mapping_t* mapping; - direntry_t* direntry; - assert(vvv->mapping.size >= vvv->mapping.item_size * vvv->mapping.next); - assert(vvv->directory.size >= vvv->directory.item_size * vvv->directory.next); - if (vvv->mapping.next<47) - return; - assert((mapping = array_get(&(vvv->mapping), 47))); - assert(mapping->dir_index < vvv->directory.next); - direntry = array_get(&(vvv->directory), mapping->dir_index); - assert(!memcmp(direntry->name, "USB H ", 11) || direntry->name[0]==0); -#endif } #endif -- cgit v1.2.3-55-g7522 From e0995dc3da0894d0a8260bddaa200a4cd7809ba4 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 14 Sep 2017 12:47:11 +0200 Subject: block: Add reopen_queue to bdrv_child_perm() In the context of bdrv_reopen(), we'll have to look at the state of the graph as it will be after the reopen. This interface addition is in preparation for the change. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake --- block.c | 19 ++++++++++++------- block/commit.c | 1 + block/mirror.c | 1 + block/replication.c | 1 + block/vvfat.c | 1 + include/block/block_int.h | 7 +++++++ 6 files changed, 23 insertions(+), 7 deletions(-) (limited to 'block') diff --git a/block.c b/block.c index 6dd47e414e..c7724c85e3 100644 --- a/block.c +++ b/block.c @@ -1537,16 +1537,17 @@ static void bdrv_child_abort_perm_update(BdrvChild *c); static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared); static void bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs, - BdrvChild *c, - const BdrvChildRole *role, + BdrvChild *c, const BdrvChildRole *role, + BlockReopenQueue *reopen_queue, uint64_t parent_perm, uint64_t parent_shared, uint64_t *nperm, uint64_t *nshared) { if (bs->drv && bs->drv->bdrv_child_perm) { - bs->drv->bdrv_child_perm(bs, c, role, + bs->drv->bdrv_child_perm(bs, c, role, reopen_queue, parent_perm, parent_shared, nperm, nshared); } + /* TODO Take force_share from reopen_queue */ if (child_bs && child_bs->force_share) { *nshared = BLK_PERM_ALL; } @@ -1596,7 +1597,7 @@ static int bdrv_check_perm(BlockDriverState *bs, uint64_t cumulative_perms, /* Check all children */ QLIST_FOREACH(c, &bs->children, next) { uint64_t cur_perm, cur_shared; - bdrv_child_perm(bs, c->bs, c, c->role, + bdrv_child_perm(bs, c->bs, c, c->role, NULL, cumulative_perms, cumulative_shared_perms, &cur_perm, &cur_shared); ret = bdrv_child_check_perm(c, cur_perm, cur_shared, ignore_children, @@ -1658,7 +1659,7 @@ static void bdrv_set_perm(BlockDriverState *bs, uint64_t cumulative_perms, /* Update all children */ QLIST_FOREACH(c, &bs->children, next) { uint64_t cur_perm, cur_shared; - bdrv_child_perm(bs, c->bs, c, c->role, + bdrv_child_perm(bs, c->bs, c, c->role, NULL, cumulative_perms, cumulative_shared_perms, &cur_perm, &cur_shared); bdrv_child_set_perm(c, cur_perm, cur_shared); @@ -1827,6 +1828,7 @@ int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared, void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c, const BdrvChildRole *role, + BlockReopenQueue *reopen_queue, uint64_t perm, uint64_t shared, uint64_t *nperm, uint64_t *nshared) { @@ -1844,6 +1846,7 @@ void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c, void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c, const BdrvChildRole *role, + BlockReopenQueue *reopen_queue, uint64_t perm, uint64_t shared, uint64_t *nperm, uint64_t *nshared) { @@ -1853,9 +1856,11 @@ void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c, if (!backing) { /* Apart from the modifications below, the same permissions are * forwarded and left alone as for filters */ - bdrv_filter_default_perms(bs, c, role, perm, shared, &perm, &shared); + bdrv_filter_default_perms(bs, c, role, reopen_queue, perm, shared, + &perm, &shared); /* Format drivers may touch metadata even if the guest doesn't write */ + /* TODO Take flags from reopen_queue */ if (bdrv_is_writable(bs)) { perm |= BLK_PERM_WRITE | BLK_PERM_RESIZE; } @@ -1999,7 +2004,7 @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, assert(parent_bs->drv); assert(bdrv_get_aio_context(parent_bs) == bdrv_get_aio_context(child_bs)); - bdrv_child_perm(parent_bs, child_bs, NULL, child_role, + bdrv_child_perm(parent_bs, child_bs, NULL, child_role, NULL, perm, shared_perm, &perm, &shared_perm); child = bdrv_root_attach_child(child_bs, child_name, child_role, diff --git a/block/commit.c b/block/commit.c index 898d91f653..8f0e83578a 100644 --- a/block/commit.c +++ b/block/commit.c @@ -257,6 +257,7 @@ static void bdrv_commit_top_close(BlockDriverState *bs) static void bdrv_commit_top_child_perm(BlockDriverState *bs, BdrvChild *c, const BdrvChildRole *role, + BlockReopenQueue *reopen_queue, uint64_t perm, uint64_t shared, uint64_t *nperm, uint64_t *nshared) { diff --git a/block/mirror.c b/block/mirror.c index 6531652d73..6f5cb9f26c 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -1084,6 +1084,7 @@ static void bdrv_mirror_top_close(BlockDriverState *bs) static void bdrv_mirror_top_child_perm(BlockDriverState *bs, BdrvChild *c, const BdrvChildRole *role, + BlockReopenQueue *reopen_queue, uint64_t perm, uint64_t shared, uint64_t *nperm, uint64_t *nshared) { diff --git a/block/replication.c b/block/replication.c index bf4462c8e7..3a4e6822e4 100644 --- a/block/replication.c +++ b/block/replication.c @@ -157,6 +157,7 @@ static void replication_close(BlockDriverState *bs) static void replication_child_perm(BlockDriverState *bs, BdrvChild *c, const BdrvChildRole *role, + BlockReopenQueue *reopen_queue, uint64_t perm, uint64_t shared, uint64_t *nperm, uint64_t *nshared) { diff --git a/block/vvfat.c b/block/vvfat.c index 1d6e7087a8..a0f2335894 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -3202,6 +3202,7 @@ err: static void vvfat_child_perm(BlockDriverState *bs, BdrvChild *c, const BdrvChildRole *role, + BlockReopenQueue *reopen_queue, uint64_t perm, uint64_t shared, uint64_t *nperm, uint64_t *nshared) { diff --git a/include/block/block_int.h b/include/block/block_int.h index ba4c383393..99abe2ce74 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -411,9 +411,14 @@ struct BlockDriver { * * If @c is NULL, return the permissions for attaching a new child for the * given @role. + * + * If @reopen_queue is non-NULL, don't return the currently needed + * permissions, but those that will be needed after applying the + * @reopen_queue. */ void (*bdrv_child_perm)(BlockDriverState *bs, BdrvChild *c, const BdrvChildRole *role, + BlockReopenQueue *reopen_queue, uint64_t parent_perm, uint64_t parent_shared, uint64_t *nperm, uint64_t *nshared); @@ -983,6 +988,7 @@ int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared, * all children */ void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c, const BdrvChildRole *role, + BlockReopenQueue *reopen_queue, uint64_t perm, uint64_t shared, uint64_t *nperm, uint64_t *nshared); @@ -992,6 +998,7 @@ void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c, * CONSISTENT_READ and doesn't share WRITE. */ void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c, const BdrvChildRole *role, + BlockReopenQueue *reopen_queue, uint64_t perm, uint64_t shared, uint64_t *nperm, uint64_t *nshared); -- cgit v1.2.3-55-g7522 From f71c08ea8e60f035485a512fd2af8908567592f0 Mon Sep 17 00:00:00 2001 From: Pavel Butsykin Date: Mon, 18 Sep 2017 15:42:28 +0300 Subject: qcow2: add qcow2_cache_discard Whenever l2/refcount table clusters are discarded from the file we can automatically drop unnecessary content of the cache tables. This reduces the chance of eviction useful cache data and eliminates inconsistent data in the cache with the data in the file. Signed-off-by: Pavel Butsykin Reviewed-by: Max Reitz Reviewed-by: John Snow Message-id: 20170918124230.8152-3-pbutsykin@virtuozzo.com Signed-off-by: Max Reitz --- block/qcow2-cache.c | 26 ++++++++++++++++++++++++++ block/qcow2-refcount.c | 20 ++++++++++++++++++-- block/qcow2.h | 3 +++ 3 files changed, 47 insertions(+), 2 deletions(-) (limited to 'block') diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c index 1d25147392..75746a7f43 100644 --- a/block/qcow2-cache.c +++ b/block/qcow2-cache.c @@ -411,3 +411,29 @@ void qcow2_cache_entry_mark_dirty(BlockDriverState *bs, Qcow2Cache *c, assert(c->entries[i].offset != 0); c->entries[i].dirty = true; } + +void *qcow2_cache_is_table_offset(BlockDriverState *bs, Qcow2Cache *c, + uint64_t offset) +{ + int i; + + for (i = 0; i < c->size; i++) { + if (c->entries[i].offset == offset) { + return qcow2_cache_get_table_addr(bs, c, i); + } + } + return NULL; +} + +void qcow2_cache_discard(BlockDriverState *bs, Qcow2Cache *c, void *table) +{ + int i = qcow2_cache_get_table_idx(bs, c, table); + + assert(c->entries[i].ref == 0); + + c->entries[i].offset = 0; + c->entries[i].lru_counter = 0; + c->entries[i].dirty = false; + + qcow2_cache_table_release(bs, c, i, 1); +} diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 168fc32e7b..8c17c0e3aa 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -861,8 +861,24 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, } s->set_refcount(refcount_block, block_index, refcount); - if (refcount == 0 && s->discard_passthrough[type]) { - update_refcount_discard(bs, cluster_offset, s->cluster_size); + if (refcount == 0) { + void *table; + + table = qcow2_cache_is_table_offset(bs, s->refcount_block_cache, + offset); + if (table != NULL) { + qcow2_cache_put(bs, s->refcount_block_cache, &refcount_block); + qcow2_cache_discard(bs, s->refcount_block_cache, table); + } + + table = qcow2_cache_is_table_offset(bs, s->l2_table_cache, offset); + if (table != NULL) { + qcow2_cache_discard(bs, s->l2_table_cache, table); + } + + if (s->discard_passthrough[type]) { + update_refcount_discard(bs, cluster_offset, s->cluster_size); + } } } diff --git a/block/qcow2.h b/block/qcow2.h index 96a8d43c17..52c374e9ed 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -649,6 +649,9 @@ int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset, int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset, void **table); void qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table); +void *qcow2_cache_is_table_offset(BlockDriverState *bs, Qcow2Cache *c, + uint64_t offset); +void qcow2_cache_discard(BlockDriverState *bs, Qcow2Cache *c, void *table); /* qcow2-bitmap.c functions */ int qcow2_check_bitmaps_refcounts(BlockDriverState *bs, BdrvCheckResult *res, -- cgit v1.2.3-55-g7522 From 46b732cdf3737ea8f9654f6ccd67ff52ddeddc20 Mon Sep 17 00:00:00 2001 From: Pavel Butsykin Date: Mon, 18 Sep 2017 15:42:29 +0300 Subject: qcow2: add shrink image support This patch add shrinking of the image file for qcow2. As a result, this allows us to reduce the virtual image size and free up space on the disk without copying the image. Image can be fragmented and shrink is done by punching holes in the image file. Signed-off-by: Pavel Butsykin Reviewed-by: Max Reitz Reviewed-by: John Snow Message-id: 20170918124230.8152-4-pbutsykin@virtuozzo.com Signed-off-by: Max Reitz --- block/qcow2-cluster.c | 50 +++++++++++++++++++++ block/qcow2-refcount.c | 120 +++++++++++++++++++++++++++++++++++++++++++++++++ block/qcow2.c | 43 ++++++++++++++---- block/qcow2.h | 14 ++++++ qapi/block-core.json | 8 +++- 5 files changed, 225 insertions(+), 10 deletions(-) (limited to 'block') diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 0d4824993c..d2518d1893 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -32,6 +32,56 @@ #include "qemu/bswap.h" #include "trace.h" +int qcow2_shrink_l1_table(BlockDriverState *bs, uint64_t exact_size) +{ + BDRVQcow2State *s = bs->opaque; + int new_l1_size, i, ret; + + if (exact_size >= s->l1_size) { + return 0; + } + + new_l1_size = exact_size; + +#ifdef DEBUG_ALLOC2 + fprintf(stderr, "shrink l1_table from %d to %d\n", s->l1_size, new_l1_size); +#endif + + BLKDBG_EVENT(bs->file, BLKDBG_L1_SHRINK_WRITE_TABLE); + ret = bdrv_pwrite_zeroes(bs->file, s->l1_table_offset + + new_l1_size * sizeof(uint64_t), + (s->l1_size - new_l1_size) * sizeof(uint64_t), 0); + if (ret < 0) { + goto fail; + } + + ret = bdrv_flush(bs->file->bs); + if (ret < 0) { + goto fail; + } + + BLKDBG_EVENT(bs->file, BLKDBG_L1_SHRINK_FREE_L2_CLUSTERS); + for (i = s->l1_size - 1; i > new_l1_size - 1; i--) { + if ((s->l1_table[i] & L1E_OFFSET_MASK) == 0) { + continue; + } + qcow2_free_clusters(bs, s->l1_table[i] & L1E_OFFSET_MASK, + s->cluster_size, QCOW2_DISCARD_ALWAYS); + s->l1_table[i] = 0; + } + return 0; + +fail: + /* + * If the write in the l1_table failed the image may contain a partially + * overwritten l1_table. In this case it would be better to clear the + * l1_table in memory to avoid possible image corruption. + */ + memset(s->l1_table + new_l1_size, 0, + (s->l1_size - new_l1_size) * sizeof(uint64_t)); + return ret; +} + int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, bool exact_size) { diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 8c17c0e3aa..88d5a3f1ad 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -29,6 +29,7 @@ #include "block/qcow2.h" #include "qemu/range.h" #include "qemu/bswap.h" +#include "qemu/cutils.h" static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size); static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, @@ -3061,3 +3062,122 @@ done: qemu_vfree(new_refblock); return ret; } + +static int qcow2_discard_refcount_block(BlockDriverState *bs, + uint64_t discard_block_offs) +{ + BDRVQcow2State *s = bs->opaque; + uint64_t refblock_offs = get_refblock_offset(s, discard_block_offs); + uint64_t cluster_index = discard_block_offs >> s->cluster_bits; + uint32_t block_index = cluster_index & (s->refcount_block_size - 1); + void *refblock; + int ret; + + assert(discard_block_offs != 0); + + ret = qcow2_cache_get(bs, s->refcount_block_cache, refblock_offs, + &refblock); + if (ret < 0) { + return ret; + } + + if (s->get_refcount(refblock, block_index) != 1) { + qcow2_signal_corruption(bs, true, -1, -1, "Invalid refcount:" + " refblock offset %#" PRIx64 + ", reftable index %u" + ", block offset %#" PRIx64 + ", refcount %#" PRIx64, + refblock_offs, + offset_to_reftable_index(s, discard_block_offs), + discard_block_offs, + s->get_refcount(refblock, block_index)); + qcow2_cache_put(bs, s->refcount_block_cache, &refblock); + return -EINVAL; + } + s->set_refcount(refblock, block_index, 0); + + qcow2_cache_entry_mark_dirty(bs, s->refcount_block_cache, refblock); + + qcow2_cache_put(bs, s->refcount_block_cache, &refblock); + + if (cluster_index < s->free_cluster_index) { + s->free_cluster_index = cluster_index; + } + + refblock = qcow2_cache_is_table_offset(bs, s->refcount_block_cache, + discard_block_offs); + if (refblock) { + /* discard refblock from the cache if refblock is cached */ + qcow2_cache_discard(bs, s->refcount_block_cache, refblock); + } + update_refcount_discard(bs, discard_block_offs, s->cluster_size); + + return 0; +} + +int qcow2_shrink_reftable(BlockDriverState *bs) +{ + BDRVQcow2State *s = bs->opaque; + uint64_t *reftable_tmp = + g_malloc(s->refcount_table_size * sizeof(uint64_t)); + int i, ret; + + for (i = 0; i < s->refcount_table_size; i++) { + int64_t refblock_offs = s->refcount_table[i] & REFT_OFFSET_MASK; + void *refblock; + bool unused_block; + + if (refblock_offs == 0) { + reftable_tmp[i] = 0; + continue; + } + ret = qcow2_cache_get(bs, s->refcount_block_cache, refblock_offs, + &refblock); + if (ret < 0) { + goto out; + } + + /* the refblock has own reference */ + if (i == offset_to_reftable_index(s, refblock_offs)) { + uint64_t block_index = (refblock_offs >> s->cluster_bits) & + (s->refcount_block_size - 1); + uint64_t refcount = s->get_refcount(refblock, block_index); + + s->set_refcount(refblock, block_index, 0); + + unused_block = buffer_is_zero(refblock, s->cluster_size); + + s->set_refcount(refblock, block_index, refcount); + } else { + unused_block = buffer_is_zero(refblock, s->cluster_size); + } + qcow2_cache_put(bs, s->refcount_block_cache, &refblock); + + reftable_tmp[i] = unused_block ? 0 : cpu_to_be64(s->refcount_table[i]); + } + + ret = bdrv_pwrite_sync(bs->file, s->refcount_table_offset, reftable_tmp, + s->refcount_table_size * sizeof(uint64_t)); + /* + * If the write in the reftable failed the image may contain a partially + * overwritten reftable. In this case it would be better to clear the + * reftable in memory to avoid possible image corruption. + */ + for (i = 0; i < s->refcount_table_size; i++) { + if (s->refcount_table[i] && !reftable_tmp[i]) { + if (ret == 0) { + ret = qcow2_discard_refcount_block(bs, s->refcount_table[i] & + REFT_OFFSET_MASK); + } + s->refcount_table[i] = 0; + } + } + + if (!s->cache_discards) { + qcow2_process_discards(bs, ret); + } + +out: + g_free(reftable_tmp); + return ret; +} diff --git a/block/qcow2.c b/block/qcow2.c index d33fb3ecdd..970006fc1d 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -3104,18 +3104,43 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, } old_length = bs->total_sectors * 512; + new_l1_size = size_to_l1(s, offset); - /* shrinking is currently not supported */ if (offset < old_length) { - error_setg(errp, "qcow2 doesn't support shrinking images yet"); - return -ENOTSUP; - } + if (prealloc != PREALLOC_MODE_OFF) { + error_setg(errp, + "Preallocation can't be used for shrinking an image"); + return -EINVAL; + } - new_l1_size = size_to_l1(s, offset); - ret = qcow2_grow_l1_table(bs, new_l1_size, true); - if (ret < 0) { - error_setg_errno(errp, -ret, "Failed to grow the L1 table"); - return ret; + ret = qcow2_cluster_discard(bs, ROUND_UP(offset, s->cluster_size), + old_length - ROUND_UP(offset, + s->cluster_size), + QCOW2_DISCARD_ALWAYS, true); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to discard cropped clusters"); + return ret; + } + + ret = qcow2_shrink_l1_table(bs, new_l1_size); + if (ret < 0) { + error_setg_errno(errp, -ret, + "Failed to reduce the number of L2 tables"); + return ret; + } + + ret = qcow2_shrink_reftable(bs); + if (ret < 0) { + error_setg_errno(errp, -ret, + "Failed to discard unused refblocks"); + return ret; + } + } else { + ret = qcow2_grow_l1_table(bs, new_l1_size, true); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to grow the L1 table"); + return ret; + } } switch (prealloc) { diff --git a/block/qcow2.h b/block/qcow2.h index 52c374e9ed..5a289a81e2 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -521,6 +521,18 @@ static inline uint64_t refcount_diff(uint64_t r1, uint64_t r2) return r1 > r2 ? r1 - r2 : r2 - r1; } +static inline +uint32_t offset_to_reftable_index(BDRVQcow2State *s, uint64_t offset) +{ + return offset >> (s->refcount_block_bits + s->cluster_bits); +} + +static inline uint64_t get_refblock_offset(BDRVQcow2State *s, uint64_t offset) +{ + uint32_t index = offset_to_reftable_index(s, offset); + return s->refcount_table[index] & REFT_OFFSET_MASK; +} + /* qcow2.c functions */ int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov, int64_t sector_num, int nb_sectors); @@ -584,10 +596,12 @@ int qcow2_inc_refcounts_imrt(BlockDriverState *bs, BdrvCheckResult *res, int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order, BlockDriverAmendStatusCB *status_cb, void *cb_opaque, Error **errp); +int qcow2_shrink_reftable(BlockDriverState *bs); /* qcow2-cluster.c functions */ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, bool exact_size); +int qcow2_shrink_l1_table(BlockDriverState *bs, uint64_t max_size); int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index); int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset); int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num, diff --git a/qapi/block-core.json b/qapi/block-core.json index c69a395804..750bb0c77c 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -2533,6 +2533,11 @@ # # Trigger events supported by blkdebug. # +# @l1_shrink_write_table: write zeros to the l1 table to shrink image. +# (since 2.11) +# +# @l1_shrink_free_l2_clusters: discard the l2 tables. (since 2.11) +# # Since: 2.9 ## { 'enum': 'BlkdebugEvent', 'prefix': 'BLKDBG', @@ -2549,7 +2554,8 @@ 'cluster_alloc_bytes', 'cluster_free', 'flush_to_os', 'flush_to_disk', 'pwritev_rmw_head', 'pwritev_rmw_after_head', 'pwritev_rmw_tail', 'pwritev_rmw_after_tail', 'pwritev', - 'pwritev_zero', 'pwritev_done', 'empty_image_prepare' ] } + 'pwritev_zero', 'pwritev_done', 'empty_image_prepare', + 'l1_shrink_write_table', 'l1_shrink_free_l2_clusters' ] } ## # @BlkdebugInjectErrorOptions: -- cgit v1.2.3-55-g7522 From 5330f32b71b1868bdb3b444733063cb5adc4e8e6 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Fri, 22 Sep 2017 17:43:53 +0300 Subject: block/qcow2-bitmap: fix use of uninitialized pointer Without initialization to zero dirty_bitmap field may be not zero for a bitmap which should not be stored and qcow2_store_persistent_dirty_bitmaps will erroneously call store_bitmap for it which leads to SIGSEGV on bdrv_dirty_bitmap_name. Signed-off-by: Vladimir Sementsov-Ogievskiy Message-id: 20170922144353.4220-1-vsementsov@virtuozzo.com Cc: qemu-stable@nongnu.org Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Signed-off-by: Max Reitz --- block/qcow2-bitmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block') diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c index e8d3bdbd6e..14f41d0427 100644 --- a/block/qcow2-bitmap.c +++ b/block/qcow2-bitmap.c @@ -602,7 +602,7 @@ static Qcow2BitmapList *bitmap_list_load(BlockDriverState *bs, uint64_t offset, goto fail; } - bm = g_new(Qcow2Bitmap, 1); + bm = g_new0(Qcow2Bitmap, 1); bm->table.offset = e->bitmap_table_offset; bm->table.size = e->bitmap_table_size; bm->flags = e->flags; -- cgit v1.2.3-55-g7522