diff options
-rw-r--r-- | kernel/cmake/kernel.cmake | 2 | ||||
-rw-r--r-- | kernel/xloop_file_fmt_qcow_cluster.c | 281 | ||||
-rw-r--r-- | kernel/xloop_file_fmt_qcow_cluster.h | 7 | ||||
-rw-r--r-- | kernel/xloop_file_fmt_qcow_main.c | 469 | ||||
-rw-r--r-- | kernel/xloop_file_fmt_qcow_main.h | 269 |
5 files changed, 835 insertions, 193 deletions
diff --git a/kernel/cmake/kernel.cmake b/kernel/cmake/kernel.cmake index 385cb40..3ac7633 100644 --- a/kernel/cmake/kernel.cmake +++ b/kernel/cmake/kernel.cmake @@ -22,7 +22,7 @@ macro(add_kernel_module MODULE_NAME KERNEL_DIR MODULE_MACRO MODULE_SOURCE_FILES endif() # define build command set(MODULE_BUILD_COMMAND ${CMAKE_MAKE_PROGRAM} ${MODULE_MACRO} - -C /${KERNEL_DIR}/build + -C ${KERNEL_DIR}/build M=${CMAKE_CURRENT_BINARY_DIR}/${MODULE_NAME} modules EXTRA_CFLAGS=${KERNEL_C_FLAGS} KBUILD_EXTRA_SYMBOLS=${MODULE_EXTRA_SYMBOLS}) diff --git a/kernel/xloop_file_fmt_qcow_cluster.c b/kernel/xloop_file_fmt_qcow_cluster.c index deef22b..8394c76 100644 --- a/kernel/xloop_file_fmt_qcow_cluster.c +++ b/kernel/xloop_file_fmt_qcow_cluster.c @@ -21,6 +21,14 @@ #include "xloop_file_fmt_qcow_cluster.h" /* + * __xloop_file_fmt_qcow_cluster_l2_load + * + * @xlo_fmt: QCOW file format + * @offset: A guest offset, used to calculate what slice of the L2 + * table to load. + * @l2_offset: Offset to the L2 table in the image file. + * @l2_slice: Location to store the pointer to the L2 slice. + * * Loads a L2 slice into memory (L2 slices are the parts of L2 tables * that are loaded by the qcow2 cache). If the slice is in the cache, * the cache is used; otherwise the L2 slice is loaded from the image @@ -31,7 +39,7 @@ static int __xloop_file_fmt_qcow_cluster_l2_load(struct xloop_file_fmt *xlo_fmt, { struct xloop_file_fmt_qcow_data *qcow_data = xlo_fmt->private_data; - int start_of_slice = sizeof(u64) * ( + int start_of_slice = xloop_file_fmt_qcow_l2_entry_size(qcow_data) * ( xloop_file_fmt_qcow_offset_to_l2_index(qcow_data, offset) - xloop_file_fmt_qcow_offset_to_l2_slice_index(qcow_data, offset) ); @@ -42,93 +50,159 @@ static int __xloop_file_fmt_qcow_cluster_l2_load(struct xloop_file_fmt *xlo_fmt, } /* - * Checks how many clusters in a given L2 slice are contiguous in the image - * file. As soon as one of the flags in the bitmask stop_flags changes compared - * to the first cluster, the search is stopped and the cluster is not counted - * as contiguous. (This allows it, for example, to stop at the first compressed - * cluster which may require a different handling) + * For a given L2 entry, count the number of contiguous subclusters of + * the same type starting from @sc_from. Compressed clusters are + * treated as if they were divided into subclusters of size + * qcow_data->subcluster_size. + * + * Return the number of contiguous subclusters and set @type to the + * subcluster type. + * + * If the L2 entry is invalid return -errno and set @type to + * QCOW_SUBCLUSTER_INVALID. */ -static int __xloop_file_fmt_qcow_cluster_count_contiguous( - struct xloop_file_fmt *xlo_fmt, int nb_clusters, int cluster_size, - u64 *l2_slice, u64 stop_flags) +static int __xloop_file_fmt_qcow_get_subcluster_range_type( + struct xloop_file_fmt *xlo_fmt, u64 l2_entry, u64 l2_bitmap, + unsigned int sc_from, enum xloop_file_fmt_qcow_subcluster_type *type) { - int i; - enum xloop_file_fmt_qcow_cluster_type first_cluster_type; - u64 mask = stop_flags | L2E_OFFSET_MASK | QCOW_OFLAG_COMPRESSED; - u64 first_entry = be64_to_cpu(l2_slice[0]); - u64 offset = first_entry & mask; - - first_cluster_type = xloop_file_fmt_qcow_get_cluster_type(xlo_fmt, - first_entry); - if (first_cluster_type == QCOW_CLUSTER_UNALLOCATED) { + struct xloop_file_fmt_qcow_data *qcow_data = xlo_fmt->private_data; + u32 val; + + *type = xloop_file_fmt_qcow_get_subcluster_type(xlo_fmt, l2_entry, + l2_bitmap, sc_from); + + if (*type == QCOW_SUBCLUSTER_INVALID) { + return -EINVAL; + } else if (!xloop_file_fmt_qcow_has_subclusters(qcow_data) || + *type == QCOW_SUBCLUSTER_COMPRESSED) { + return qcow_data->subclusters_per_cluster - sc_from; + } + + switch (*type) { + case QCOW_SUBCLUSTER_NORMAL: + val = l2_bitmap | QCOW_OFLAG_SUB_ALLOC_RANGE(0, sc_from); + return __builtin_ctz(~val) - sc_from; + + case QCOW_SUBCLUSTER_ZERO_PLAIN: + case QCOW_SUBCLUSTER_ZERO_ALLOC: + val = (l2_bitmap | QCOW_OFLAG_SUB_ZERO_RANGE(0, sc_from)) >> 32; + return __builtin_ctz(~val) - sc_from; + + case QCOW_SUBCLUSTER_UNALLOCATED_PLAIN: + case QCOW_SUBCLUSTER_UNALLOCATED_ALLOC: + val = ((l2_bitmap >> 32) | l2_bitmap) + & ~QCOW_OFLAG_SUB_ALLOC_RANGE(0, sc_from); + return __builtin_ctz(val) - sc_from; + + default: + /* not reachable */ + ASSERT(false); + *type = QCOW_SUBCLUSTER_INVALID; return 0; - } - - /* must be allocated */ - ASSERT(first_cluster_type == QCOW_CLUSTER_NORMAL || - first_cluster_type == QCOW_CLUSTER_ZERO_ALLOC); - - for (i = 0; i < nb_clusters; i++) { - u64 l2_entry = be64_to_cpu(l2_slice[i]) & mask; - if (offset + (u64) i * cluster_size != l2_entry) { - break; - } - } - - return i; + } } /* - * Checks how many consecutive unallocated clusters in a given L2 - * slice have the same cluster type. + * Return the number of contiguous subclusters of the exact same type + * in a given L2 slice, starting from cluster @l2_index, subcluster + * @sc_index. Allocated subclusters are required to be contiguous in + * the image file. + * At most @nb_clusters are checked (note that this means clusters, + * not subclusters). + * Compressed clusters are always processed one by one but for the + * purpose of this count they are treated as if they were divided into + * subclusters of size qcow_data->subcluster_size. + * On failure return -errno and update @l2_index to point to the + * invalid entry. */ -static int __xloop_file_fmt_qcow_cluster_count_contiguous_unallocated( - struct xloop_file_fmt *xlo_fmt, int nb_clusters, u64 *l2_slice, - enum xloop_file_fmt_qcow_cluster_type wanted_type) +static int __xloop_file_fmt_qcow_count_contiguous_subclusters( + struct xloop_file_fmt *xlo_fmt, int nb_clusters, unsigned int sc_index, + u64 *l2_slice, unsigned int *l2_index) { - int i; - - ASSERT(wanted_type == QCOW_CLUSTER_ZERO_PLAIN || - wanted_type == QCOW_CLUSTER_UNALLOCATED); - - for (i = 0; i < nb_clusters; i++) { - u64 entry = be64_to_cpu(l2_slice[i]); - enum xloop_file_fmt_qcow_cluster_type type = - xloop_file_fmt_qcow_get_cluster_type(xlo_fmt, entry); - - if (type != wanted_type) { - break; - } - } - - return i; + struct xloop_file_fmt_qcow_data *qcow_data = xlo_fmt->private_data; + int i, count = 0; + bool check_offset = false; + u64 expected_offset = 0; + enum xloop_file_fmt_qcow_subcluster_type expected_type = + QCOW_SUBCLUSTER_NORMAL; + enum xloop_file_fmt_qcow_subcluster_type type; + + ASSERT(*l2_index + nb_clusters <= qcow_data->l2_slice_size); + + for (i = 0; i < nb_clusters; i++) { + unsigned int first_sc = (i == 0) ? sc_index : 0; + u64 l2_entry = xloop_file_fmt_qcow_get_l2_entry(qcow_data, l2_slice, + *l2_index + i); + u64 l2_bitmap = xloop_file_fmt_qcow_get_l2_bitmap(qcow_data, l2_slice, + *l2_index + i); + int ret = __xloop_file_fmt_qcow_get_subcluster_range_type(xlo_fmt, + l2_entry, l2_bitmap, first_sc, &type); + if (ret < 0) { + *l2_index += i; /* Point to the invalid entry */ + return -EIO; + } + if (i == 0) { + if (type == QCOW_SUBCLUSTER_COMPRESSED) { + /* Compressed clusters are always processed one by one */ + return ret; + } + expected_type = type; + expected_offset = l2_entry & QCOW_L2E_OFFSET_MASK; + check_offset = (type == QCOW_SUBCLUSTER_NORMAL || + type == QCOW_SUBCLUSTER_ZERO_ALLOC || + type == QCOW_SUBCLUSTER_UNALLOCATED_ALLOC); + } else if (type != expected_type) { + break; + } else if (check_offset) { + expected_offset += qcow_data->cluster_size; + if (expected_offset != (l2_entry & QCOW_L2E_OFFSET_MASK)) { + break; + } + } + count += ret; + /* Stop if there are type changes before the end of the cluster */ + if (first_sc + ret < qcow_data->subclusters_per_cluster) { + break; + } + } + + return count; } /* - * For a given offset of the virtual disk, find the cluster type and offset in - * the qcow2 file. The offset is stored in *cluster_offset. + * xloop_file_fmt_qcow_get_host_offset + * + * For a given offset of the virtual disk find the equivalent host + * offset in the qcow2 file and store it in *host_offset. Neither + * offset needs to be aligned to a cluster boundary. + * + * If the cluster is unallocated then *host_offset will be 0. + * If the cluster is compressed then *host_offset will contain the + * complete compressed cluster descriptor. * * On entry, *bytes is the maximum number of contiguous bytes starting at * offset that we are interested in. * * On exit, *bytes is the number of bytes starting at offset that have the same - * cluster type and (if applicable) are stored contiguously in the image file. - * Compressed clusters are always returned one by one. + * subcluster type and (if applicable) are stored contiguously in the image + * file. The subcluster type is stored in *subcluster_type. + * Compressed clusters are always processed one by one. * - * Returns the cluster type (QCOW2_CLUSTER_*) on success, -errno in error - * cases. + * Returns 0 on success, -errno in error cases. */ -int xloop_file_fmt_qcow_cluster_get_offset(struct xloop_file_fmt *xlo_fmt, - u64 offset, unsigned int *bytes, u64 *cluster_offset) +int xloop_file_fmt_qcow_get_host_offset(struct xloop_file_fmt *xlo_fmt, + u64 offset, unsigned int *bytes, u64 *host_offset, + enum xloop_file_fmt_qcow_subcluster_type *subcluster_type) { struct xloop_file_fmt_qcow_data *qcow_data = xlo_fmt->private_data; - unsigned int l2_index; - u64 l1_index, l2_offset, *l2_slice; - int c; + unsigned int l2_index, sc_index; + u64 l1_index, l2_offset, *l2_slice, l2_entry, l2_bitmap; + int sc; unsigned int offset_in_cluster; u64 bytes_available, bytes_needed, nb_clusters; - enum xloop_file_fmt_qcow_cluster_type type; + enum xloop_file_fmt_qcow_subcluster_type type; int ret; + u64 host_cluster_offset; offset_in_cluster = xloop_file_fmt_qcow_offset_into_cluster(qcow_data, offset); @@ -146,18 +220,18 @@ int xloop_file_fmt_qcow_cluster_get_offset(struct xloop_file_fmt *xlo_fmt, bytes_needed = bytes_available; } - *cluster_offset = 0; + *host_offset = 0; /* seek to the l2 offset in the l1 table */ l1_index = xloop_file_fmt_qcow_offset_to_l1_index(qcow_data, offset); if (l1_index >= qcow_data->l1_size) { - type = QCOW_CLUSTER_UNALLOCATED; + type = QCOW_SUBCLUSTER_UNALLOCATED_PLAIN; goto out; } - l2_offset = qcow_data->l1_table[l1_index] & L1E_OFFSET_MASK; + l2_offset = qcow_data->l1_table[l1_index] & QCOW_L1E_OFFSET_MASK; if (!l2_offset) { - type = QCOW_CLUSTER_UNALLOCATED; + type = QCOW_SUBCLUSTER_UNALLOCATED_PLAIN; goto out; } @@ -177,7 +251,11 @@ int xloop_file_fmt_qcow_cluster_get_offset(struct xloop_file_fmt *xlo_fmt, /* find the cluster offset for the given disk offset */ l2_index = xloop_file_fmt_qcow_offset_to_l2_slice_index(qcow_data, offset); - *cluster_offset = be64_to_cpu(l2_slice[l2_index]); + sc_index = xloop_file_fmt_qcow_offset_to_sc_index(qcow_data, offset); + l2_entry = xloop_file_fmt_qcow_get_l2_entry(qcow_data, l2_slice, + l2_index); + l2_bitmap = xloop_file_fmt_qcow_get_l2_bitmap(qcow_data, l2_slice, + l2_index); nb_clusters = xloop_file_fmt_qcow_size_to_clusters(qcow_data, bytes_needed); @@ -186,10 +264,11 @@ int xloop_file_fmt_qcow_cluster_get_offset(struct xloop_file_fmt *xlo_fmt, * assertion is always true */ ASSERT(nb_clusters <= INT_MAX); - type = xloop_file_fmt_qcow_get_cluster_type(xlo_fmt, *cluster_offset); + type = xloop_file_fmt_qcow_get_subcluster_type(xlo_fmt, l2_entry, + l2_bitmap, sc_index); if (qcow_data->qcow_version < 3 && ( - type == QCOW_CLUSTER_ZERO_PLAIN || - type == QCOW_CLUSTER_ZERO_ALLOC)) { + type == QCOW_SUBCLUSTER_ZERO_PLAIN || + type == QCOW_SUBCLUSTER_ZERO_ALLOC)) { dev_err_ratelimited(xloop_file_fmt_to_dev(xlo_fmt), "zero cluster " "entry found in pre-v3 image (L2 offset: %llx, L2 index: %x)\n", l2_offset, l2_index); @@ -197,45 +276,39 @@ int xloop_file_fmt_qcow_cluster_get_offset(struct xloop_file_fmt *xlo_fmt, goto fail; } switch (type) { - case QCOW_CLUSTER_COMPRESSED: - if (xloop_file_fmt_qcow_has_data_file(xlo_fmt)) { + case QCOW_SUBCLUSTER_INVALID: + break; /* This is handled by count_contiguous_subclusters() below */ + case QCOW_SUBCLUSTER_COMPRESSED: + if (xloop_file_fmt_qcow_has_data_file(qcow_data)) { dev_err_ratelimited(xloop_file_fmt_to_dev(xlo_fmt), "compressed " "cluster entry found in image with external data file " "(L2 offset: %llx, L2 index: %x)\n", l2_offset, l2_index); ret = -EIO; goto fail; } - /* Compressed clusters can only be processed one by one */ - c = 1; - *cluster_offset &= L2E_COMPRESSED_OFFSET_SIZE_MASK; + *host_offset = l2_entry & QCOW_L2E_COMPRESSED_OFFSET_SIZE_MASK; break; - case QCOW_CLUSTER_ZERO_PLAIN: - case QCOW_CLUSTER_UNALLOCATED: - /* how many empty clusters ? */ - c = __xloop_file_fmt_qcow_cluster_count_contiguous_unallocated( - xlo_fmt, nb_clusters, &l2_slice[l2_index], type); - *cluster_offset = 0; + case QCOW_SUBCLUSTER_ZERO_PLAIN: + case QCOW_SUBCLUSTER_UNALLOCATED_PLAIN: break; - case QCOW_CLUSTER_ZERO_ALLOC: - case QCOW_CLUSTER_NORMAL: - /* how many allocated clusters ? */ - c = __xloop_file_fmt_qcow_cluster_count_contiguous(xlo_fmt, - nb_clusters, qcow_data->cluster_size, - &l2_slice[l2_index], QCOW_OFLAG_ZERO); - *cluster_offset &= L2E_OFFSET_MASK; + case QCOW_SUBCLUSTER_ZERO_ALLOC: + case QCOW_SUBCLUSTER_NORMAL: + case QCOW_SUBCLUSTER_UNALLOCATED_ALLOC: + host_cluster_offset = l2_entry & QCOW_L2E_OFFSET_MASK; + *host_offset = host_cluster_offset + offset_in_cluster; if (xloop_file_fmt_qcow_offset_into_cluster(qcow_data, - *cluster_offset)) { + host_cluster_offset)) { dev_err_ratelimited(xloop_file_fmt_to_dev(xlo_fmt), "cluster " "allocation offset %llx unaligned (L2 offset: %llx, " - "L2 index: %x)\n", *cluster_offset, l2_offset, l2_index); + "L2 index: %x)\n", host_cluster_offset, l2_offset, l2_index); ret = -EIO; goto fail; } - if (xloop_file_fmt_qcow_has_data_file(xlo_fmt) && - *cluster_offset != offset - offset_in_cluster) { + if (xloop_file_fmt_qcow_has_data_file(qcow_data) && + *host_offset != offset) { dev_err_ratelimited(xloop_file_fmt_to_dev(xlo_fmt), "external " "data file host cluster offset %llx does not match guest " - "cluster offset: %llx, L2 index: %x)\n", *cluster_offset, + "cluster offset: %llx, L2 index: %x)\n", host_cluster_offset, offset - offset_in_cluster, l2_index); ret = -EIO; goto fail; @@ -245,9 +318,19 @@ int xloop_file_fmt_qcow_cluster_get_offset(struct xloop_file_fmt *xlo_fmt, BUG(); } + sc = __xloop_file_fmt_qcow_count_contiguous_subclusters(xlo_fmt, + nb_clusters, sc_index, l2_slice, &l2_index); + + if (sc < 0) { + dev_err_ratelimited(xloop_file_fmt_to_dev(xlo_fmt), "invalid cluster " + "entry found (L2 offset: %#llx, L2 index: %#x)", l2_offset, + l2_index); + ret = -EIO; + goto fail; + } xloop_file_fmt_qcow_cache_put(xlo_fmt, (void **) &l2_slice); - bytes_available = (s64) c * qcow_data->cluster_size; + bytes_available = ((s64) sc + sc_index) << qcow_data->subcluster_bits; out: if (bytes_available > bytes_needed) { @@ -260,7 +343,9 @@ out: ASSERT(bytes_available - offset_in_cluster <= UINT_MAX); *bytes = bytes_available - offset_in_cluster; - return type; + *subcluster_type = type; + + return 0; fail: xloop_file_fmt_qcow_cache_put(xlo_fmt, (void **) &l2_slice); diff --git a/kernel/xloop_file_fmt_qcow_cluster.h b/kernel/xloop_file_fmt_qcow_cluster.h index ff3e0a1..a3716f5 100644 --- a/kernel/xloop_file_fmt_qcow_cluster.h +++ b/kernel/xloop_file_fmt_qcow_cluster.h @@ -15,9 +15,8 @@ #include "xloop_file_fmt.h" -extern int xloop_file_fmt_qcow_cluster_get_offset(struct xloop_file_fmt *xlo_fmt, - u64 offset, - unsigned int *bytes, - u64 *cluster_offset); +extern int xloop_file_fmt_qcow_get_host_offset(struct xloop_file_fmt *xlo_fmt, + u64 offset, unsigned int *bytes, u64 *host_offset, + enum xloop_file_fmt_qcow_subcluster_type *subcluster_type); #endif diff --git a/kernel/xloop_file_fmt_qcow_main.c b/kernel/xloop_file_fmt_qcow_main.c index 55d2f32..fbc49f5 100644 --- a/kernel/xloop_file_fmt_qcow_main.c +++ b/kernel/xloop_file_fmt_qcow_main.c @@ -23,12 +23,23 @@ #include <linux/string.h> #include <linux/vmalloc.h> #include <linux/zlib.h> +#ifdef CONFIG_ZSTD_DECOMPRESS +#include <linux/zstd.h> +#endif #include "xloop_file_fmt.h" #include "xloop_file_fmt_qcow_main.h" #include "xloop_file_fmt_qcow_cache.h" #include "xloop_file_fmt_qcow_cluster.h" +#ifdef CONFIG_ZSTD_DECOMPRESS +#define ZSTD_WINDOWLOG_LIMIT_DEFAULT 27 +#define ZSTD_MAXWINDOWSIZE ((U32_C(1) << ZSTD_WINDOWLOG_LIMIT_DEFAULT) + 1) +#endif + +typedef ssize_t (*qcow_file_fmt_decompress_fn)(struct xloop_file_fmt *xlo_fmt, + void *dest, size_t dest_size, const void *src, size_t src_size); + static int __qcow_file_fmt_header_read(struct xloop_file_fmt *xlo_fmt, struct file *file, struct xloop_file_fmt_qcow_header *header) { @@ -135,32 +146,70 @@ static int __qcow_file_fmt_compression_init(struct xloop_file_fmt *xlo_fmt) { struct xloop_file_fmt_qcow_data *qcow_data = xlo_fmt->private_data; int ret = 0; +#ifdef CONFIG_ZSTD_DECOMPRESS + size_t workspace_size; +#endif - qcow_data->strm = kzalloc(sizeof(*qcow_data->strm), GFP_KERNEL); - if (!qcow_data->strm) { + /* create workspace for ZLIB decompression stream */ + qcow_data->zlib_dstrm = kzalloc(sizeof(*qcow_data->zlib_dstrm), GFP_KERNEL); + if (!qcow_data->zlib_dstrm) { ret = -ENOMEM; goto out; } - qcow_data->strm->workspace = vzalloc(zlib_inflate_workspacesize()); - if (!qcow_data->strm->workspace) { + qcow_data->zlib_dstrm->workspace = vzalloc(zlib_inflate_workspacesize()); + if (!qcow_data->zlib_dstrm->workspace) { + ret = -ENOMEM; + goto out_free_zlib_dstrm; + } + + /* set up ZLIB decompression stream */ + ret = zlib_inflateInit2(qcow_data->zlib_dstrm, -12); + if (ret != Z_OK) { + ret = -EIO; + goto out_free_zlib_dworkspace; + } + +#ifdef CONFIG_ZSTD_DECOMPRESS + /* create workspace for ZSTD decompression stream */ + workspace_size = ZSTD_DStreamWorkspaceBound(ZSTD_MAXWINDOWSIZE); + qcow_data->zstd_dworkspace = vzalloc(workspace_size); + if (!qcow_data->zstd_dworkspace) { ret = -ENOMEM; - goto out_free_strm; + goto out_free_zlib_dworkspace; + } + + /* set up ZSTD decompression stream */ + qcow_data->zstd_dstrm = ZSTD_initDStream(ZSTD_MAXWINDOWSIZE, + qcow_data->zstd_dworkspace, workspace_size); + if (!qcow_data->zstd_dstrm) { + ret = -EINVAL; + goto out_free_zstd_dworkspace; } +#endif + /* create cache for last compressed QCOW cluster */ qcow_data->cmp_last_coffset = ULLONG_MAX; qcow_data->cmp_out_buf = vmalloc(qcow_data->cluster_size); if (!qcow_data->cmp_out_buf) { ret = -ENOMEM; - goto out_free_workspace; +#ifdef CONFIG_ZSTD_DECOMPRESS + goto out_free_zstd_dworkspace; +#else + goto out_free_zlib_dworkspace; +#endif } return ret; -out_free_workspace: - vfree(qcow_data->strm->workspace); -out_free_strm: - kfree(qcow_data->strm); +#ifdef CONFIG_ZSTD_DECOMPRESS +out_free_zstd_dworkspace: + vfree(qcow_data->zstd_dworkspace); +#endif +out_free_zlib_dworkspace: + vfree(qcow_data->zlib_dstrm->workspace); +out_free_zlib_dstrm: + kfree(qcow_data->zlib_dstrm); out: return ret; } @@ -169,8 +218,17 @@ static void __qcow_file_fmt_compression_exit(struct xloop_file_fmt *xlo_fmt) { struct xloop_file_fmt_qcow_data *qcow_data = xlo_fmt->private_data; - vfree(qcow_data->strm->workspace); - kfree(qcow_data->strm); + /* ZLIB specific cleanup */ + zlib_inflateEnd(qcow_data->zlib_dstrm); + vfree(qcow_data->zlib_dstrm->workspace); + kfree(qcow_data->zlib_dstrm); + + /* ZSTD specific cleanup */ +#ifdef CONFIG_ZSTD_DECOMPRESS + vfree(qcow_data->zstd_dworkspace); +#endif + + /* last compressed QCOW cluster cleanup */ vfree(qcow_data->cmp_out_buf); } @@ -227,6 +285,13 @@ static void __qcow_file_fmt_header_to_buf(struct xloop_file_fmt *xlo_fmt, header->header_length); } + if (header->header_length > offsetof(struct xloop_file_fmt_qcow_header, + compression_type)) { + len += sprintf(header_buf + len, + "compression_type: %d\n", + header->compression_type); + } + ASSERT(len < QCOW_HEADER_BUF_LEN); } @@ -253,10 +318,12 @@ static ssize_t __qcow_file_fmt_dbgfs_ofs_read(struct file *file, struct xloop_file_fmt_qcow_data *qcow_data = xlo_fmt->private_data; unsigned int cur_bytes = 1; u64 offset = 0; - u64 cluster_offset = 0; + u64 coffset = 0; + u64 host_offset = 0; s64 offset_in_cluster = 0; + enum xloop_file_fmt_qcow_subcluster_type type; ssize_t len = 0; - int ret = 0; + int ret = 0, csize = 0, nb_csectors = 0; /* read the share debugfs offset */ ret = mutex_lock_interruptible(&qcow_data->dbgfs_qcow_offset_mutex); @@ -267,8 +334,8 @@ static ssize_t __qcow_file_fmt_dbgfs_ofs_read(struct file *file, mutex_unlock(&qcow_data->dbgfs_qcow_offset_mutex); /* calculate and print the cluster offset */ - ret = xloop_file_fmt_qcow_cluster_get_offset(xlo_fmt, - offset, &cur_bytes, &cluster_offset); + ret = xloop_file_fmt_qcow_get_host_offset(xlo_fmt, + offset, &cur_bytes, &host_offset, &type); if (ret < 0) return -EINVAL; @@ -276,8 +343,26 @@ static ssize_t __qcow_file_fmt_dbgfs_ofs_read(struct file *file, offset); len = sprintf(qcow_data->dbgfs_file_qcow_cluster_buf, - "offset: %lld\ncluster_offset: %lld\noffset_in_cluster: %lld\n", - offset, cluster_offset, offset_in_cluster); + "cluster type: %s\n" + "cluster offset host: %lld\n" + "cluster offset guest: %lld\n" + "cluster offset in-cluster: %lld\n", + xloop_file_fmt_qcow_get_subcluster_name(type), + host_offset, offset, offset_in_cluster); + + if (type == QCOW_SUBCLUSTER_COMPRESSED) { + coffset = host_offset & qcow_data->cluster_offset_mask; + nb_csectors = ((host_offset >> qcow_data->csize_shift) & + qcow_data->csize_mask) + 1; + csize = nb_csectors * QCOW_COMPRESSED_SECTOR_SIZE - + (coffset & ~QCOW_COMPRESSED_SECTOR_MASK); + + len += sprintf(qcow_data->dbgfs_file_qcow_cluster_buf + len, + "cluster compressed offset: %lld\n" + "cluster compressed sectors: %d\n" + "cluster compressed size: %d\n", + coffset, nb_csectors, csize); + } ASSERT(len < QCOW_CLUSTER_BUF_LEN); @@ -385,6 +470,44 @@ static void __qcow_file_fmt_dbgfs_exit(struct xloop_file_fmt *xlo_fmt) } #endif +static int __qcow_file_fmt_validate_compression_type( + struct xloop_file_fmt *xlo_fmt) +{ + struct xloop_file_fmt_qcow_data *qcow_data = xlo_fmt->private_data; + + switch (qcow_data->compression_type) { + case QCOW_COMPRESSION_TYPE_ZLIB: +#ifdef CONFIG_ZSTD_DECOMPRESS + case QCOW_COMPRESSION_TYPE_ZSTD: +#endif + break; + default: + dev_err(xloop_file_fmt_to_dev(xlo_fmt), "unknown compression type: %u", + qcow_data->compression_type); + return -ENOTSUPP; + } + + /* + * if the compression type differs from QCOW_COMPRESSION_TYPE_ZLIB + * the incompatible feature flag must be set + */ + if (qcow_data->compression_type == QCOW_COMPRESSION_TYPE_ZLIB) { + if (qcow_data->incompatible_features & QCOW_INCOMPAT_COMPRESSION) { + dev_err(xloop_file_fmt_to_dev(xlo_fmt), "compression type " + "incompatible feature bit must not be set\n"); + return -EINVAL; + } + } else { + if (!(qcow_data->incompatible_features & QCOW_INCOMPAT_COMPRESSION)) { + dev_err(xloop_file_fmt_to_dev(xlo_fmt), "compression type " + "incompatible feature bit must be set\n"); + return -EINVAL; + } + } + + return 0; +} + static int qcow_file_fmt_init(struct xloop_file_fmt *xlo_fmt) { struct xloop_file_fmt_qcow_data *qcow_data; @@ -393,6 +516,10 @@ static int qcow_file_fmt_init(struct xloop_file_fmt *xlo_fmt) u64 l1_vm_state_index; u64 l2_cache_size; u64 l2_cache_entry_size; + u64 virtual_disk_size; + u64 max_l2_entries; + u64 max_l2_cache; + u64 l2_cache_max_setting; ssize_t len; unsigned int i; int ret = 0; @@ -428,8 +555,6 @@ static int qcow_file_fmt_init(struct xloop_file_fmt *xlo_fmt) qcow_data->cluster_bits = header.cluster_bits; qcow_data->cluster_size = 1 << qcow_data->cluster_bits; - qcow_data->cluster_sectors = 1 << - (qcow_data->cluster_bits - SECTOR_SHIFT); if (header.header_length > qcow_data->cluster_size) { dev_err(xloop_file_fmt_to_dev(xlo_fmt), "QCOW header exceeds cluster " @@ -457,6 +582,25 @@ static int qcow_file_fmt_init(struct xloop_file_fmt *xlo_fmt) qcow_data->compatible_features = header.compatible_features; qcow_data->autoclear_features = header.autoclear_features; + /* + * Handle compression type + * Older qcow2 images don't contain the compression type header. + * Distinguish them by the header length and use + * the only valid (default) compression type in that case + */ + if (header.header_length > offsetof(struct xloop_file_fmt_qcow_header, + compression_type)) { + qcow_data->compression_type = header.compression_type; + } else { + qcow_data->compression_type = QCOW_COMPRESSION_TYPE_ZLIB; + } + + ret = __qcow_file_fmt_validate_compression_type(xlo_fmt); + if (ret) { + goto free_qcow_data; + } + + /* check for incompatible features */ if (qcow_data->incompatible_features & QCOW_INCOMPAT_DIRTY) { dev_err(xloop_file_fmt_to_dev(xlo_fmt), "image contains inconsistent " "refcounts\n"); @@ -472,12 +616,31 @@ static int qcow_file_fmt_init(struct xloop_file_fmt *xlo_fmt) } if (qcow_data->incompatible_features & QCOW_INCOMPAT_DATA_FILE) { - dev_err(xloop_file_fmt_to_dev(xlo_fmt), "clusters in the external " - "data file are not refcounted\n"); - ret = -EACCES; + dev_err(xloop_file_fmt_to_dev(xlo_fmt), "data-file is required for " + "this image\n"); + ret = -EINVAL; goto free_qcow_data; } + qcow_data->subclusters_per_cluster = + xloop_file_fmt_qcow_has_subclusters(qcow_data) ? + QCOW_EXTL2_SUBCLUSTERS_PER_CLUSTER : 1; + qcow_data->subcluster_size = + qcow_data->cluster_size / qcow_data->subclusters_per_cluster; + /* + * check if subcluster_size is non-zero to avoid unknown results of + * __builtin_ctz + */ + ASSERT(qcow_data->subcluster_size != 0); + qcow_data->subcluster_bits = __builtin_ctz(qcow_data->subcluster_size); + + if (qcow_data->subcluster_size < (1 << QCOW_MIN_CLUSTER_BITS)) { + dev_err(xloop_file_fmt_to_dev(xlo_fmt), "unsupported subcluster " + "size: %d\n", qcow_data->subcluster_size); + ret = -EINVAL; + goto free_qcow_data; + } + /* Check support for various header values */ if (header.refcount_order > 6) { dev_err(xloop_file_fmt_to_dev(xlo_fmt), "reference count entry width " @@ -498,8 +661,13 @@ static int qcow_file_fmt_init(struct xloop_file_fmt *xlo_fmt) goto free_qcow_data; } - /* L2 is always one cluster */ - qcow_data->l2_bits = qcow_data->cluster_bits - 3; + /* + * check if xloop_file_fmt_qcow_l2_entry_size(qcow_data) is non-zero to + * avoid unknown results of __builtin_ctz + */ + ASSERT(xloop_file_fmt_qcow_l2_entry_size(qcow_data) != 0); + qcow_data->l2_bits = qcow_data->cluster_bits - + __builtin_ctz(xloop_file_fmt_qcow_l2_entry_size(qcow_data)); qcow_data->l2_size = 1 << qcow_data->l2_bits; /* 2^(qcow_data->refcount_order - 3) is the refcount width in bytes */ qcow_data->refcount_block_bits = qcow_data->cluster_bits - @@ -544,7 +712,7 @@ static int qcow_file_fmt_init(struct xloop_file_fmt *xlo_fmt) /* read the level 1 table */ ret = __qcow_file_fmt_validate_table(xlo_fmt, header.l1_table_offset, - header.l1_size, sizeof(u64), QCOW_MAX_L1_SIZE, + header.l1_size, QCOW_L1E_SIZE, QCOW_MAX_L1_SIZE, "Active L1 table"); if (ret < 0) { goto free_qcow_data; @@ -571,7 +739,7 @@ static int qcow_file_fmt_init(struct xloop_file_fmt *xlo_fmt) if (qcow_data->l1_size > 0) { qcow_data->l1_table = vzalloc(round_up(qcow_data->l1_size * - sizeof(u64), 512)); + QCOW_L1E_SIZE, 512)); if (qcow_data->l1_table == NULL) { dev_err(xloop_file_fmt_to_dev(xlo_fmt), "could not allocate " "L1 table\n"); @@ -579,7 +747,7 @@ static int qcow_file_fmt_init(struct xloop_file_fmt *xlo_fmt) goto free_qcow_data; } len = kernel_read(xlo->xlo_backing_file, qcow_data->l1_table, - qcow_data->l1_size * sizeof(u64), + qcow_data->l1_size * QCOW_L1E_SIZE, &qcow_data->l1_table_offset); if (len < 0) { dev_err(xloop_file_fmt_to_dev(xlo_fmt), "could not read " @@ -604,13 +772,21 @@ static int qcow_file_fmt_init(struct xloop_file_fmt *xlo_fmt) goto free_l1_table; } - /* create cache for L2 */ - l2_cache_size = qcow_data->size / (qcow_data->cluster_size / 8); - l2_cache_entry_size = min(qcow_data->cluster_size, (int)4096); + virtual_disk_size = qcow_data->size; + max_l2_entries = DIV_ROUND_UP(virtual_disk_size, qcow_data->cluster_size); + max_l2_cache = round_up( + max_l2_entries * xloop_file_fmt_qcow_l2_entry_size(qcow_data), + qcow_data->cluster_size); - /* limit the L2 size to maximum QCOW_DEFAULT_L2_CACHE_MAX_SIZE */ - l2_cache_size = min(l2_cache_size, (u64)QCOW_DEFAULT_L2_CACHE_MAX_SIZE); + /* define the maximum L2 cache size */ + l2_cache_max_setting = QCOW_DEFAULT_L2_CACHE_MAX_SIZE; + + /* limit the L2 cache size to maximum l2_cache_max_setting */ + l2_cache_size = min(max_l2_cache, l2_cache_max_setting); + + /* determine the size of a cache entry */ + l2_cache_entry_size = min(qcow_data->cluster_size, (int)PAGE_SIZE); /* calculate the number of cache tables */ l2_cache_size /= l2_cache_entry_size; @@ -624,7 +800,8 @@ static int qcow_file_fmt_init(struct xloop_file_fmt *xlo_fmt) goto free_l1_table; } - qcow_data->l2_slice_size = l2_cache_entry_size / sizeof(u64); + qcow_data->l2_slice_size = + l2_cache_entry_size / xloop_file_fmt_qcow_l2_entry_size(qcow_data); qcow_data->l2_table_cache = xloop_file_fmt_qcow_cache_create(xlo_fmt, l2_cache_size, l2_cache_entry_size); @@ -681,39 +858,195 @@ static void qcow_file_fmt_exit(struct xloop_file_fmt *xlo_fmt) } } -static ssize_t __qcow_file_fmt_buffer_decompress(struct xloop_file_fmt *xlo_fmt, +/* + * __qcow_file_fmt_zlib_decompress() + * + * Decompress some data (not more than @src_size bytes) to produce exactly + * @dest_size bytes using zlib compression method + * + * @xlo_fmt - QCOW file format + * @dest - destination buffer, @dest_size bytes + * @src - source buffer, @src_size bytes + * + * Returns: 0 on success + * -EIO on fail + */ +static ssize_t __qcow_file_fmt_zlib_decompress(struct xloop_file_fmt *xlo_fmt, void *dest, size_t dest_size, const void *src, size_t src_size) { struct xloop_file_fmt_qcow_data *qcow_data = xlo_fmt->private_data; - int ret = 0; + u8 zerostuff = 0; + ssize_t ret = 0; - qcow_data->strm->avail_in = src_size; - qcow_data->strm->next_in = (void *) src; - qcow_data->strm->avail_out = dest_size; - qcow_data->strm->next_out = dest; - - ret = zlib_inflateInit2(qcow_data->strm, -12); + ret = zlib_inflateReset(qcow_data->zlib_dstrm); if (ret != Z_OK) { - return -1; + ret = -EINVAL; + goto out; } - ret = zlib_inflate(qcow_data->strm, Z_FINISH); - if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) - || qcow_data->strm->avail_out != 0) { - /* We approve Z_BUF_ERROR because we need @dest buffer to be - * filled, but @src buffer may be processed partly (because in - * qcow2 we know size of compressed data with precision of one - * sector) */ - ret = -1; - } else { - ret = 0; + qcow_data->zlib_dstrm->avail_in = src_size; + qcow_data->zlib_dstrm->next_in = (void *)src; + qcow_data->zlib_dstrm->avail_out = dest_size; + qcow_data->zlib_dstrm->next_out = dest; + + ret = zlib_inflate(qcow_data->zlib_dstrm, Z_SYNC_FLUSH); + /* + * Work around a bug in zlib, which sometimes wants to taste an extra + * byte when being used in the (undocumented) raw deflate mode. + * (From USAGI). + */ + if (ret == Z_OK && !qcow_data->zlib_dstrm->avail_in && + qcow_data->zlib_dstrm->avail_out) { + qcow_data->zlib_dstrm->next_in = &zerostuff; + qcow_data->zlib_dstrm->avail_in = 1; + ret = zlib_inflate(qcow_data->zlib_dstrm, Z_FINISH); + } + if (ret != Z_STREAM_END) { + ret = -EIO; + goto out; } + +out: return ret; } +#ifdef CONFIG_ZSTD_DECOMPRESS +/* + * __qcow_file_fmt_zstd_decompress() + * + * Decompress some data (not more than @src_size bytes) to produce exactly + * @dest_size bytes using zstd compression method + * + * @xlo_fmt - QCOW file format + * @dest - destination buffer, @dest_size bytes + * @src - source buffer, @src_size bytes + * + * Returns: 0 on success + * -EIO on any error + */ +static ssize_t __qcow_file_fmt_zstd_decompress(struct xloop_file_fmt *xlo_fmt, + void *dest, + size_t dest_size, + const void *src, + size_t src_size) +{ + struct xloop_file_fmt_qcow_data *qcow_data = xlo_fmt->private_data; + size_t zstd_ret = 0; + ssize_t ret = 0; + + ZSTD_outBuffer output = { + .dst = dest, + .size = dest_size, + .pos = 0 + }; + + ZSTD_inBuffer input = { + .src = src, + .size = src_size, + .pos = 0 + }; + + zstd_ret = ZSTD_resetDStream(qcow_data->zstd_dstrm); + + if (ZSTD_isError(zstd_ret)) { + ret = -EIO; + goto out; + } + + /* + * The compressed stream from the input buffer may consist of more + * than one zstd frame. So we iterate until we get a fully + * uncompressed cluster. + * From zstd docs related to ZSTD_decompressStream: + * "return : 0 when a frame is completely decoded and fully flushed" + * We suppose that this means: each time ZSTD_decompressStream reads + * only ONE full frame and returns 0 if and only if that frame + * is completely decoded and flushed. Only after returning 0, + * ZSTD_decompressStream reads another ONE full frame. + */ + while (output.pos < output.size) { + size_t last_in_pos = input.pos; + size_t last_out_pos = output.pos; + zstd_ret = ZSTD_decompressStream(qcow_data->zstd_dstrm, &output, &input); + + if (ZSTD_isError(zstd_ret)) { + ret = -EIO; + break; + } + + /* + * The ZSTD manual is vague about what to do if it reads + * the buffer partially, and we don't want to get stuck + * in an infinite loop where ZSTD_decompressStream + * returns > 0 waiting for another input chunk. So, we add + * a check which ensures that the loop makes some progress + * on each step. + */ + if (last_in_pos >= input.pos && + last_out_pos >= output.pos) { + ret = -EIO; + break; + } + } + /* + * Make sure that we have the frame fully flushed here + * if not, we somehow managed to get uncompressed cluster + * greater then the cluster size, possibly because of its + * damage. + */ + if (zstd_ret > 0) { + ret = -EIO; + } + +out: + ASSERT(ret == 0 || ret == -EIO); + return ret; +} +#endif + +/* + * __qcow_file_fmt_buffer_decompress() + * + * Decompress @src_size bytes of data using the compression + * method defined by the image compression type + * + * @xlo_fmt - QCOW file format + * @dest - destination buffer, @dest_size bytes + * @src - source buffer, @src_size bytes + * + * Returns: compressed size on success + * a negative error code on failure + */ +static ssize_t __qcow_file_fmt_buffer_decompress(struct xloop_file_fmt *xlo_fmt, + void *dest, + size_t dest_size, + const void *src, + size_t src_size) +{ + struct xloop_file_fmt_qcow_data *qcow_data = xlo_fmt->private_data; + qcow_file_fmt_decompress_fn decompress_fn; + + switch (qcow_data->compression_type) { + case QCOW_COMPRESSION_TYPE_ZLIB: + decompress_fn = __qcow_file_fmt_zlib_decompress; + break; + +#ifdef CONFIG_ZSTD_DECOMPRESS + case QCOW_COMPRESSION_TYPE_ZSTD: + decompress_fn = __qcow_file_fmt_zstd_decompress; + break; +#endif + default: + return -EINVAL; + } + + return decompress_fn(xlo_fmt, dest, dest_size, src, src_size); +} + + static int __qcow_file_fmt_read_compressed(struct xloop_file_fmt *xlo_fmt, struct bio_vec *bvec, u64 file_cluster_offset, @@ -783,8 +1116,9 @@ static int __qcow_file_fmt_read_bvec(struct xloop_file_fmt *xlo_fmt, int ret; unsigned int cur_bytes; /* number of bytes in current iteration */ u64 bytes; - u64 cluster_offset = 0; + u64 host_offset = 0; u64 bytes_done = 0; + enum xloop_file_fmt_qcow_subcluster_type type; void *data; unsigned long irq_flags; ssize_t len; @@ -797,8 +1131,8 @@ static int __qcow_file_fmt_read_bvec(struct xloop_file_fmt *xlo_fmt, /* prepare next request */ cur_bytes = bytes; - ret = xloop_file_fmt_qcow_cluster_get_offset(xlo_fmt, *ppos, - &cur_bytes, &cluster_offset); + ret = xloop_file_fmt_qcow_get_host_offset(xlo_fmt, *ppos, + &cur_bytes, &host_offset, &type); if (ret < 0) { goto fail; } @@ -806,32 +1140,28 @@ static int __qcow_file_fmt_read_bvec(struct xloop_file_fmt *xlo_fmt, offset_in_cluster = xloop_file_fmt_qcow_offset_into_cluster( qcow_data, *ppos); - switch (ret) { - case QCOW_CLUSTER_UNALLOCATED: - case QCOW_CLUSTER_ZERO_PLAIN: - case QCOW_CLUSTER_ZERO_ALLOC: + switch (type) { + case QCOW_SUBCLUSTER_ZERO_PLAIN: + case QCOW_SUBCLUSTER_ZERO_ALLOC: + case QCOW_SUBCLUSTER_UNALLOCATED_PLAIN: + case QCOW_SUBCLUSTER_UNALLOCATED_ALLOC: data = bvec_kmap_irq(bvec, &irq_flags) + bytes_done; memset(data, 0, cur_bytes); flush_dcache_page(bvec->bv_page); bvec_kunmap_irq(data, &irq_flags); break; - case QCOW_CLUSTER_COMPRESSED: + case QCOW_SUBCLUSTER_COMPRESSED: ret = __qcow_file_fmt_read_compressed(xlo_fmt, bvec, - cluster_offset, *ppos, cur_bytes, bytes_done); + host_offset, *ppos, cur_bytes, bytes_done); if (ret < 0) { goto fail; } break; - case QCOW_CLUSTER_NORMAL: - if ((cluster_offset & 511) != 0) { - ret = -EIO; - goto fail; - } - - pos_read = cluster_offset + offset_in_cluster; + case QCOW_SUBCLUSTER_NORMAL: + pos_read = host_offset; data = bvec_kmap_irq(bvec, &irq_flags) + bytes_done; len = kernel_read(xlo->xlo_backing_file, data, cur_bytes, @@ -842,6 +1172,7 @@ static int __qcow_file_fmt_read_bvec(struct xloop_file_fmt *xlo_fmt, if (len < 0) return len; + ASSERT(len == cur_bytes); break; default: diff --git a/kernel/xloop_file_fmt_qcow_main.h b/kernel/xloop_file_fmt_qcow_main.h index e6031be..023c679 100644 --- a/kernel/xloop_file_fmt_qcow_main.h +++ b/kernel/xloop_file_fmt_qcow_main.h @@ -19,6 +19,9 @@ #include <linux/mutex.h> #include <linux/types.h> #include <linux/zlib.h> +#ifdef CONFIG_ZSTD_DECOMPRESS +#include <linux/zstd.h> +#endif #ifdef CONFIG_DEBUG_FS #include <linux/debugfs.h> @@ -80,6 +83,33 @@ do { \ /* The cluster reads as all zeros */ #define QCOW_OFLAG_ZERO (1ULL << 0) +#define QCOW_EXTL2_SUBCLUSTERS_PER_CLUSTER 32 + +/* The subcluster X [0..31] is allocated */ +#define QCOW_OFLAG_SUB_ALLOC(X) (1ULL << (X)) +/* The subcluster X [0..31] reads as zeroes */ +#define QCOW_OFLAG_SUB_ZERO(X) (QCOW_OFLAG_SUB_ALLOC(X) << 32) +/* Subclusters [X, Y) (0 <= X <= Y <= 32) are allocated */ +#define QCOW_OFLAG_SUB_ALLOC_RANGE(X, Y) \ + (QCOW_OFLAG_SUB_ALLOC(Y) - QCOW_OFLAG_SUB_ALLOC(X)) +/* Subclusters [X, Y) (0 <= X <= Y <= 32) read as zeroes */ +#define QCOW_OFLAG_SUB_ZERO_RANGE(X, Y) \ + (QCOW_OFLAG_SUB_ALLOC_RANGE(X, Y) << 32) +/* L2 entry bitmap with all allocation bits set */ +#define QCOW_L2_BITMAP_ALL_ALLOC (QCOW_OFLAG_SUB_ALLOC_RANGE(0, 32)) +/* L2 entry bitmap with all "read as zeroes" bits set */ +#define QCOW_L2_BITMAP_ALL_ZEROES (QCOW_OFLAG_SUB_ZERO_RANGE(0, 32)) + +/* Size of normal and extended L2 entries */ +#define QCOW_L2E_SIZE_NORMAL (sizeof(u64)) +#define QCOW_L2E_SIZE_EXTENDED (sizeof(u64) * 2) + +/* Size of L1 table entries */ +#define QCOW_L1E_SIZE (sizeof(u64)) + +/* Size of reftable entries */ +#define QCOW_REFTABLE_ENTRY_SIZE (sizeof(u64)) + #define QCOW_MIN_CLUSTER_BITS 9 #define QCOW_MAX_CLUSTER_BITS 21 @@ -104,7 +134,7 @@ do { \ /* Buffer size for debugfs file buffer to receive and display offset and * cluster offset information */ #define QCOW_OFFSET_BUF_LEN 32 -#define QCOW_CLUSTER_BUF_LEN 128 +#define QCOW_CLUSTER_BUF_LEN 256 struct xloop_file_fmt_qcow_header { u32 magic; @@ -128,6 +158,12 @@ struct xloop_file_fmt_qcow_header { u32 refcount_order; u32 header_length; + + /* Additional fields */ + u8 compression_type; + + /* header must be a multiple of 8 */ + u8 padding[7]; } __attribute__((packed)); struct xloop_file_fmt_qcow_snapshot_header { @@ -144,11 +180,11 @@ struct xloop_file_fmt_qcow_snapshot_header { u64 vm_clock_nsec; u32 vm_state_size; - /* for extension */ - u32 extra_data_size; - /* extra data follows */ - /* id_str follows */ - /* name follows */ + + /* Size of all extra data, including QCowSnapshotExtraData if available */ + u32 extra_data_size; + /* Data beyond QCowSnapshotExtraData, if any */ + void *unknown_extra_data; } __attribute__((packed)); enum { @@ -162,13 +198,19 @@ enum { QCOW_INCOMPAT_DIRTY_BITNR = 0, QCOW_INCOMPAT_CORRUPT_BITNR = 1, QCOW_INCOMPAT_DATA_FILE_BITNR = 2, + QCOW_INCOMPAT_COMPRESSION_BITNR = 3, + QCOW_INCOMPAT_EXTL2_BITNR = 4, QCOW_INCOMPAT_DIRTY = 1 << QCOW_INCOMPAT_DIRTY_BITNR, QCOW_INCOMPAT_CORRUPT = 1 << QCOW_INCOMPAT_CORRUPT_BITNR, QCOW_INCOMPAT_DATA_FILE = 1 << QCOW_INCOMPAT_DATA_FILE_BITNR, + QCOW_INCOMPAT_COMPRESSION = 1 << QCOW_INCOMPAT_COMPRESSION_BITNR, + QCOW_INCOMPAT_EXTL2 = 1 << QCOW_INCOMPAT_EXTL2_BITNR, QCOW_INCOMPAT_MASK = QCOW_INCOMPAT_DIRTY | QCOW_INCOMPAT_CORRUPT - | QCOW_INCOMPAT_DATA_FILE, + | QCOW_INCOMPAT_DATA_FILE + | QCOW_INCOMPAT_COMPRESSION + | QCOW_INCOMPAT_EXTL2, }; /* compatible feature bits */ @@ -190,12 +232,19 @@ enum { QCOW_AUTOCLEAR_DATA_FILE_RAW, }; +enum xloop_file_fmt_qcow_compression_type { + QCOW_COMPRESSION_TYPE_ZLIB, + QCOW_COMPRESSION_TYPE_ZSTD, +}; + struct xloop_file_fmt_qcow_data { u64 size; int cluster_bits; int cluster_size; - int cluster_sectors; int l2_slice_size; + int subcluster_bits; + int subcluster_size; + int subclusters_per_cluster; int l2_bits; int l2_size; int l1_size; @@ -237,10 +286,27 @@ struct xloop_file_fmt_qcow_data { u64 compatible_features; u64 autoclear_features; - struct z_stream_s *strm; + /* ZLIB specific data */ + z_streamp zlib_dstrm; + + /* ZSTD specific data */ +#ifdef CONFIG_ZSTD_DECOMPRESS + void *zstd_dworkspace; + ZSTD_DStream *zstd_dstrm; +#endif + + /* used to cache last compressed QCOW cluster */ u8 *cmp_out_buf; u64 cmp_last_coffset; + /* + * Compression type used for the image. Default: 0 - ZLIB + * The image compression type is set on image creation. + * For now, the only way to change the compression type + * is to convert the image with the desired compression type set. + */ + enum xloop_file_fmt_qcow_compression_type compression_type; + /* debugfs entries */ #ifdef CONFIG_DEBUG_FS struct dentry *dbgfs_dir; @@ -265,6 +331,34 @@ struct xloop_file_fmt_qcow_cow_region { unsigned nb_bytes; }; +/* + * In images with standard L2 entries all clusters are treated as if + * they had one subcluster so xloop_file_fmt_qcow_cluster_type and + * xloop_file_fmt_qcow_subcluster_type can be mapped to each other and + * have the exact same meaning (QCOW_SUBCLUSTER_UNALLOCATED_ALLOC cannot + * happen in these images). + * + * In images with extended L2 entries xloop_file_fmt_qcow_cluster_type + * refers to the complete cluster and xloop_file_fmt_qcow_subcluster_type + * to each of the individual subclusters, so there are several possible + * combinations: + * + * |--------------+---------------------------| + * | Cluster type | Possible subcluster types | + * |--------------+---------------------------| + * | UNALLOCATED | UNALLOCATED_PLAIN | + * | | ZERO_PLAIN | + * |--------------+---------------------------| + * | NORMAL | UNALLOCATED_ALLOC | + * | | ZERO_ALLOC | + * | | NORMAL | + * |--------------+---------------------------| + * | COMPRESSED | COMPRESSED | + * |--------------+---------------------------| + * + * QCOW_SUBCLUSTER_INVALID means that the L2 entry is incorrect and + * the image should be marked corrupt. + */ enum xloop_file_fmt_qcow_cluster_type { QCOW_CLUSTER_UNALLOCATED, QCOW_CLUSTER_ZERO_PLAIN, @@ -273,6 +367,16 @@ enum xloop_file_fmt_qcow_cluster_type { QCOW_CLUSTER_COMPRESSED, }; +enum xloop_file_fmt_qcow_subcluster_type { + QCOW_SUBCLUSTER_UNALLOCATED_PLAIN, + QCOW_SUBCLUSTER_UNALLOCATED_ALLOC, + QCOW_SUBCLUSTER_ZERO_PLAIN, + QCOW_SUBCLUSTER_ZERO_ALLOC, + QCOW_SUBCLUSTER_NORMAL, + QCOW_SUBCLUSTER_COMPRESSED, + QCOW_SUBCLUSTER_INVALID, +}; + enum xloop_file_fmt_qcow_metadata_overlap { QCOW_OL_MAIN_HEADER_BITNR = 0, QCOW_OL_ACTIVE_L1_BITNR = 1, @@ -314,25 +418,51 @@ enum xloop_file_fmt_qcow_metadata_overlap { #define QCOW_OL_ALL \ (QCOW_OL_CACHED | QCOW_OL_INACTIVE_L2) -#define L1E_OFFSET_MASK 0x00fffffffffffe00ULL -#define L2E_OFFSET_MASK 0x00fffffffffffe00ULL -#define L2E_COMPRESSED_OFFSET_SIZE_MASK 0x3fffffffffffffffULL +#define QCOW_L1E_OFFSET_MASK 0x00fffffffffffe00ULL +#define QCOW_L2E_OFFSET_MASK 0x00fffffffffffe00ULL +#define QCOW_L2E_COMPRESSED_OFFSET_SIZE_MASK 0x3fffffffffffffffULL -#define REFT_OFFSET_MASK 0xfffffffffffffe00ULL +static inline bool xloop_file_fmt_qcow_has_subclusters( + struct xloop_file_fmt_qcow_data *qcow_data) +{ + return qcow_data->incompatible_features & QCOW_INCOMPAT_EXTL2; +} + +static inline size_t xloop_file_fmt_qcow_l2_entry_size( + struct xloop_file_fmt_qcow_data *qcow_data) +{ + return xloop_file_fmt_qcow_has_subclusters(qcow_data) ? + QCOW_L2E_SIZE_EXTENDED : QCOW_L2E_SIZE_NORMAL; +} + +static inline u64 xloop_file_fmt_qcow_get_l2_entry( + struct xloop_file_fmt_qcow_data *qcow_data, u64 *l2_slice, int idx) +{ + idx *= xloop_file_fmt_qcow_l2_entry_size(qcow_data) / sizeof(u64); + return be64_to_cpu(l2_slice[idx]); +} -#define INV_OFFSET (-1ULL) +static inline u64 xloop_file_fmt_qcow_get_l2_bitmap( + struct xloop_file_fmt_qcow_data *qcow_data, u64 *l2_slice, int idx) +{ + if (xloop_file_fmt_qcow_has_subclusters(qcow_data)) { + idx *= xloop_file_fmt_qcow_l2_entry_size(qcow_data) / sizeof(u64); + return be64_to_cpu(l2_slice[idx + 1]); + } else { + return 0; /* For convenience only; this value has no meaning. */ + } +} static inline bool xloop_file_fmt_qcow_has_data_file( - struct xloop_file_fmt *xlo_fmt) + struct xloop_file_fmt_qcow_data *qcow_data) { /* At the moment, there is no support for copy on write! */ return false; } static inline bool xloop_file_fmt_qcow_data_file_is_raw( - struct xloop_file_fmt *xlo_fmt) + struct xloop_file_fmt_qcow_data *qcow_data) { - struct xloop_file_fmt_qcow_data *qcow_data = xlo_fmt->private_data; return !!(qcow_data->autoclear_features & QCOW_AUTOCLEAR_DATA_FILE_RAW); } @@ -349,6 +479,12 @@ static inline s64 xloop_file_fmt_qcow_offset_into_cluster( return offset & (qcow_data->cluster_size - 1); } +static inline s64 xloop_file_fmt_qcow_offset_into_subcluster( + struct xloop_file_fmt_qcow_data *qcow_data, s64 offset) +{ + return offset & (qcow_data->subcluster_size - 1); +} + static inline s64 xloop_file_fmt_qcow_size_to_clusters( struct xloop_file_fmt_qcow_data *qcow_data, u64 size) { @@ -382,6 +518,13 @@ static inline int xloop_file_fmt_qcow_offset_to_l2_slice_index( (qcow_data->l2_slice_size - 1); } +static inline int xloop_file_fmt_qcow_offset_to_sc_index( + struct xloop_file_fmt_qcow_data *qcow_data, s64 offset) +{ + return (offset >> qcow_data->subcluster_bits) & + (qcow_data->subclusters_per_cluster - 1); +} + static inline s64 xloop_file_fmt_qcow_vm_state_offset( struct xloop_file_fmt_qcow_data *qcow_data) { @@ -390,22 +533,25 @@ static inline s64 xloop_file_fmt_qcow_vm_state_offset( } static inline enum xloop_file_fmt_qcow_cluster_type -xloop_file_fmt_qcow_get_cluster_type(struct xloop_file_fmt *xlo_fmt, u64 l2_entry) +xloop_file_fmt_qcow_get_cluster_type(struct xloop_file_fmt *xlo_fmt, + u64 l2_entry) { + struct xloop_file_fmt_qcow_data *qcow_data = xlo_fmt->private_data; + if (l2_entry & QCOW_OFLAG_COMPRESSED) { return QCOW_CLUSTER_COMPRESSED; } else if (l2_entry & QCOW_OFLAG_ZERO) { - if (l2_entry & L2E_OFFSET_MASK) { + if (l2_entry & QCOW_L2E_OFFSET_MASK) { return QCOW_CLUSTER_ZERO_ALLOC; } return QCOW_CLUSTER_ZERO_PLAIN; - } else if (!(l2_entry & L2E_OFFSET_MASK)) { + } else if (!(l2_entry & QCOW_L2E_OFFSET_MASK)) { /* Offset 0 generally means unallocated, but it is ambiguous * with external data files because 0 is a valid offset there. * However, all clusters in external data files always have * refcount 1, so we can rely on QCOW_OFLAG_COPIED to * disambiguate. */ - if (xloop_file_fmt_qcow_has_data_file(xlo_fmt) && + if (xloop_file_fmt_qcow_has_data_file(qcow_data) && (l2_entry & QCOW_OFLAG_COPIED)) { return QCOW_CLUSTER_NORMAL; } else { @@ -416,4 +562,85 @@ xloop_file_fmt_qcow_get_cluster_type(struct xloop_file_fmt *xlo_fmt, u64 l2_entr } } +/* + * In an image without subsclusters @l2_bitmap is ignored and + * @sc_index must be 0. + * Return QCOW_SUBCLUSTER_INVALID if an invalid l2 entry is detected + * (this checks the whole entry and bitmap, not only the bits related + * to subcluster @sc_index). + */ +static inline enum xloop_file_fmt_qcow_subcluster_type +xloop_file_fmt_qcow_get_subcluster_type(struct xloop_file_fmt *xlo_fmt, + u64 l2_entry, u64 l2_bitmap, unsigned int sc_index) +{ + struct xloop_file_fmt_qcow_data *qcow_data = xlo_fmt->private_data; + enum xloop_file_fmt_qcow_cluster_type type = + xloop_file_fmt_qcow_get_cluster_type(xlo_fmt, l2_entry); + ASSERT(sc_index < qcow_data->subclusters_per_cluster); + + if (xloop_file_fmt_qcow_has_subclusters(qcow_data)) { + switch (type) { + case QCOW_CLUSTER_COMPRESSED: + return QCOW_SUBCLUSTER_COMPRESSED; + case QCOW_CLUSTER_NORMAL: + if ((l2_bitmap >> 32) & l2_bitmap) { + return QCOW_SUBCLUSTER_INVALID; + } else if (l2_bitmap & QCOW_OFLAG_SUB_ZERO(sc_index)) { + return QCOW_SUBCLUSTER_ZERO_ALLOC; + } else if (l2_bitmap & QCOW_OFLAG_SUB_ALLOC(sc_index)) { + return QCOW_SUBCLUSTER_NORMAL; + } else { + return QCOW_SUBCLUSTER_UNALLOCATED_ALLOC; + } + case QCOW_CLUSTER_UNALLOCATED: + if (l2_bitmap & QCOW_L2_BITMAP_ALL_ALLOC) { + return QCOW_SUBCLUSTER_INVALID; + } else if (l2_bitmap & QCOW_OFLAG_SUB_ZERO(sc_index)) { + return QCOW_SUBCLUSTER_ZERO_PLAIN; + } else { + return QCOW_SUBCLUSTER_UNALLOCATED_PLAIN; + } + default: + /* not reachable */ + ASSERT(false); + return QCOW_SUBCLUSTER_INVALID; + } + } else { + switch (type) { + case QCOW_CLUSTER_COMPRESSED: + return QCOW_SUBCLUSTER_COMPRESSED; + case QCOW_CLUSTER_ZERO_PLAIN: + return QCOW_SUBCLUSTER_ZERO_PLAIN; + case QCOW_CLUSTER_ZERO_ALLOC: + return QCOW_SUBCLUSTER_ZERO_ALLOC; + case QCOW_CLUSTER_NORMAL: + return QCOW_SUBCLUSTER_NORMAL; + case QCOW_CLUSTER_UNALLOCATED: + return QCOW_SUBCLUSTER_UNALLOCATED_PLAIN; + default: + /* not reachable */ + ASSERT(false); + return QCOW_SUBCLUSTER_INVALID; + } + } +} + +#ifdef CONFIG_DEBUG_FS +static inline const char *xloop_file_fmt_qcow_get_subcluster_name( + const enum xloop_file_fmt_qcow_subcluster_type type) +{ + static const char *subcluster_names[] = { + "QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN", + "QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC", + "QCOW2_SUBCLUSTER_ZERO_PLAIN", + "QCOW2_SUBCLUSTER_ZERO_ALLOC", + "QCOW2_SUBCLUSTER_NORMAL", + "QCOW2_SUBCLUSTER_COMPRESSED", + "QCOW2_SUBCLUSTER_INVALID" + }; + + return subcluster_names[type]; +} +#endif + #endif |