summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorManuel Bentele2020-10-07 11:01:56 +0200
committerManuel Bentele2020-10-07 11:01:56 +0200
commite19fe3beba17c3b85c4d776deaa7834565d51490 (patch)
tree88eea1ad0e5dc5b1a2254a3f459fa9b461b80662
parentAdded RPM package creation and moved to relative CMake installation paths (diff)
downloadxloop-e19fe3beba17c3b85c4d776deaa7834565d51490.tar.gz
xloop-e19fe3beba17c3b85c4d776deaa7834565d51490.tar.xz
xloop-e19fe3beba17c3b85c4d776deaa7834565d51490.zip
Updated QCOW file format driver with upstream patches
Ported recent upstream patches from QEMU's qcow2 file format to xloop's QCOW file format driver. The following changes had been made to the QCOW file format driver: - support to read extended L2 has been added - added decompression interface to support various compression types - implemented the ZSTD compression type for QCOW images
-rw-r--r--kernel/cmake/kernel.cmake2
-rw-r--r--kernel/xloop_file_fmt_qcow_cluster.c281
-rw-r--r--kernel/xloop_file_fmt_qcow_cluster.h7
-rw-r--r--kernel/xloop_file_fmt_qcow_main.c469
-rw-r--r--kernel/xloop_file_fmt_qcow_main.h269
5 files changed, 835 insertions, 193 deletions
diff --git a/kernel/cmake/kernel.cmake b/kernel/cmake/kernel.cmake
index 385cb40..3ac7633 100644
--- a/kernel/cmake/kernel.cmake
+++ b/kernel/cmake/kernel.cmake
@@ -22,7 +22,7 @@ macro(add_kernel_module MODULE_NAME KERNEL_DIR MODULE_MACRO MODULE_SOURCE_FILES
endif()
# define build command
set(MODULE_BUILD_COMMAND ${CMAKE_MAKE_PROGRAM} ${MODULE_MACRO}
- -C /${KERNEL_DIR}/build
+ -C ${KERNEL_DIR}/build
M=${CMAKE_CURRENT_BINARY_DIR}/${MODULE_NAME} modules
EXTRA_CFLAGS=${KERNEL_C_FLAGS}
KBUILD_EXTRA_SYMBOLS=${MODULE_EXTRA_SYMBOLS})
diff --git a/kernel/xloop_file_fmt_qcow_cluster.c b/kernel/xloop_file_fmt_qcow_cluster.c
index deef22b..8394c76 100644
--- a/kernel/xloop_file_fmt_qcow_cluster.c
+++ b/kernel/xloop_file_fmt_qcow_cluster.c
@@ -21,6 +21,14 @@
#include "xloop_file_fmt_qcow_cluster.h"
/*
+ * __xloop_file_fmt_qcow_cluster_l2_load
+ *
+ * @xlo_fmt: QCOW file format
+ * @offset: A guest offset, used to calculate what slice of the L2
+ * table to load.
+ * @l2_offset: Offset to the L2 table in the image file.
+ * @l2_slice: Location to store the pointer to the L2 slice.
+ *
* Loads a L2 slice into memory (L2 slices are the parts of L2 tables
* that are loaded by the qcow2 cache). If the slice is in the cache,
* the cache is used; otherwise the L2 slice is loaded from the image
@@ -31,7 +39,7 @@ static int __xloop_file_fmt_qcow_cluster_l2_load(struct xloop_file_fmt *xlo_fmt,
{
struct xloop_file_fmt_qcow_data *qcow_data = xlo_fmt->private_data;
- int start_of_slice = sizeof(u64) * (
+ int start_of_slice = xloop_file_fmt_qcow_l2_entry_size(qcow_data) * (
xloop_file_fmt_qcow_offset_to_l2_index(qcow_data, offset) -
xloop_file_fmt_qcow_offset_to_l2_slice_index(qcow_data, offset)
);
@@ -42,93 +50,159 @@ static int __xloop_file_fmt_qcow_cluster_l2_load(struct xloop_file_fmt *xlo_fmt,
}
/*
- * Checks how many clusters in a given L2 slice are contiguous in the image
- * file. As soon as one of the flags in the bitmask stop_flags changes compared
- * to the first cluster, the search is stopped and the cluster is not counted
- * as contiguous. (This allows it, for example, to stop at the first compressed
- * cluster which may require a different handling)
+ * For a given L2 entry, count the number of contiguous subclusters of
+ * the same type starting from @sc_from. Compressed clusters are
+ * treated as if they were divided into subclusters of size
+ * qcow_data->subcluster_size.
+ *
+ * Return the number of contiguous subclusters and set @type to the
+ * subcluster type.
+ *
+ * If the L2 entry is invalid return -errno and set @type to
+ * QCOW_SUBCLUSTER_INVALID.
*/
-static int __xloop_file_fmt_qcow_cluster_count_contiguous(
- struct xloop_file_fmt *xlo_fmt, int nb_clusters, int cluster_size,
- u64 *l2_slice, u64 stop_flags)
+static int __xloop_file_fmt_qcow_get_subcluster_range_type(
+ struct xloop_file_fmt *xlo_fmt, u64 l2_entry, u64 l2_bitmap,
+ unsigned int sc_from, enum xloop_file_fmt_qcow_subcluster_type *type)
{
- int i;
- enum xloop_file_fmt_qcow_cluster_type first_cluster_type;
- u64 mask = stop_flags | L2E_OFFSET_MASK | QCOW_OFLAG_COMPRESSED;
- u64 first_entry = be64_to_cpu(l2_slice[0]);
- u64 offset = first_entry & mask;
-
- first_cluster_type = xloop_file_fmt_qcow_get_cluster_type(xlo_fmt,
- first_entry);
- if (first_cluster_type == QCOW_CLUSTER_UNALLOCATED) {
+ struct xloop_file_fmt_qcow_data *qcow_data = xlo_fmt->private_data;
+ u32 val;
+
+ *type = xloop_file_fmt_qcow_get_subcluster_type(xlo_fmt, l2_entry,
+ l2_bitmap, sc_from);
+
+ if (*type == QCOW_SUBCLUSTER_INVALID) {
+ return -EINVAL;
+ } else if (!xloop_file_fmt_qcow_has_subclusters(qcow_data) ||
+ *type == QCOW_SUBCLUSTER_COMPRESSED) {
+ return qcow_data->subclusters_per_cluster - sc_from;
+ }
+
+ switch (*type) {
+ case QCOW_SUBCLUSTER_NORMAL:
+ val = l2_bitmap | QCOW_OFLAG_SUB_ALLOC_RANGE(0, sc_from);
+ return __builtin_ctz(~val) - sc_from;
+
+ case QCOW_SUBCLUSTER_ZERO_PLAIN:
+ case QCOW_SUBCLUSTER_ZERO_ALLOC:
+ val = (l2_bitmap | QCOW_OFLAG_SUB_ZERO_RANGE(0, sc_from)) >> 32;
+ return __builtin_ctz(~val) - sc_from;
+
+ case QCOW_SUBCLUSTER_UNALLOCATED_PLAIN:
+ case QCOW_SUBCLUSTER_UNALLOCATED_ALLOC:
+ val = ((l2_bitmap >> 32) | l2_bitmap)
+ & ~QCOW_OFLAG_SUB_ALLOC_RANGE(0, sc_from);
+ return __builtin_ctz(val) - sc_from;
+
+ default:
+ /* not reachable */
+ ASSERT(false);
+ *type = QCOW_SUBCLUSTER_INVALID;
return 0;
- }
-
- /* must be allocated */
- ASSERT(first_cluster_type == QCOW_CLUSTER_NORMAL ||
- first_cluster_type == QCOW_CLUSTER_ZERO_ALLOC);
-
- for (i = 0; i < nb_clusters; i++) {
- u64 l2_entry = be64_to_cpu(l2_slice[i]) & mask;
- if (offset + (u64) i * cluster_size != l2_entry) {
- break;
- }
- }
-
- return i;
+ }
}
/*
- * Checks how many consecutive unallocated clusters in a given L2
- * slice have the same cluster type.
+ * Return the number of contiguous subclusters of the exact same type
+ * in a given L2 slice, starting from cluster @l2_index, subcluster
+ * @sc_index. Allocated subclusters are required to be contiguous in
+ * the image file.
+ * At most @nb_clusters are checked (note that this means clusters,
+ * not subclusters).
+ * Compressed clusters are always processed one by one but for the
+ * purpose of this count they are treated as if they were divided into
+ * subclusters of size qcow_data->subcluster_size.
+ * On failure return -errno and update @l2_index to point to the
+ * invalid entry.
*/
-static int __xloop_file_fmt_qcow_cluster_count_contiguous_unallocated(
- struct xloop_file_fmt *xlo_fmt, int nb_clusters, u64 *l2_slice,
- enum xloop_file_fmt_qcow_cluster_type wanted_type)
+static int __xloop_file_fmt_qcow_count_contiguous_subclusters(
+ struct xloop_file_fmt *xlo_fmt, int nb_clusters, unsigned int sc_index,
+ u64 *l2_slice, unsigned int *l2_index)
{
- int i;
-
- ASSERT(wanted_type == QCOW_CLUSTER_ZERO_PLAIN ||
- wanted_type == QCOW_CLUSTER_UNALLOCATED);
-
- for (i = 0; i < nb_clusters; i++) {
- u64 entry = be64_to_cpu(l2_slice[i]);
- enum xloop_file_fmt_qcow_cluster_type type =
- xloop_file_fmt_qcow_get_cluster_type(xlo_fmt, entry);
-
- if (type != wanted_type) {
- break;
- }
- }
-
- return i;
+ struct xloop_file_fmt_qcow_data *qcow_data = xlo_fmt->private_data;
+ int i, count = 0;
+ bool check_offset = false;
+ u64 expected_offset = 0;
+ enum xloop_file_fmt_qcow_subcluster_type expected_type =
+ QCOW_SUBCLUSTER_NORMAL;
+ enum xloop_file_fmt_qcow_subcluster_type type;
+
+ ASSERT(*l2_index + nb_clusters <= qcow_data->l2_slice_size);
+
+ for (i = 0; i < nb_clusters; i++) {
+ unsigned int first_sc = (i == 0) ? sc_index : 0;
+ u64 l2_entry = xloop_file_fmt_qcow_get_l2_entry(qcow_data, l2_slice,
+ *l2_index + i);
+ u64 l2_bitmap = xloop_file_fmt_qcow_get_l2_bitmap(qcow_data, l2_slice,
+ *l2_index + i);
+ int ret = __xloop_file_fmt_qcow_get_subcluster_range_type(xlo_fmt,
+ l2_entry, l2_bitmap, first_sc, &type);
+ if (ret < 0) {
+ *l2_index += i; /* Point to the invalid entry */
+ return -EIO;
+ }
+ if (i == 0) {
+ if (type == QCOW_SUBCLUSTER_COMPRESSED) {
+ /* Compressed clusters are always processed one by one */
+ return ret;
+ }
+ expected_type = type;
+ expected_offset = l2_entry & QCOW_L2E_OFFSET_MASK;
+ check_offset = (type == QCOW_SUBCLUSTER_NORMAL ||
+ type == QCOW_SUBCLUSTER_ZERO_ALLOC ||
+ type == QCOW_SUBCLUSTER_UNALLOCATED_ALLOC);
+ } else if (type != expected_type) {
+ break;
+ } else if (check_offset) {
+ expected_offset += qcow_data->cluster_size;
+ if (expected_offset != (l2_entry & QCOW_L2E_OFFSET_MASK)) {
+ break;
+ }
+ }
+ count += ret;
+ /* Stop if there are type changes before the end of the cluster */
+ if (first_sc + ret < qcow_data->subclusters_per_cluster) {
+ break;
+ }
+ }
+
+ return count;
}
/*
- * For a given offset of the virtual disk, find the cluster type and offset in
- * the qcow2 file. The offset is stored in *cluster_offset.
+ * xloop_file_fmt_qcow_get_host_offset
+ *
+ * For a given offset of the virtual disk find the equivalent host
+ * offset in the qcow2 file and store it in *host_offset. Neither
+ * offset needs to be aligned to a cluster boundary.
+ *
+ * If the cluster is unallocated then *host_offset will be 0.
+ * If the cluster is compressed then *host_offset will contain the
+ * complete compressed cluster descriptor.
*
* On entry, *bytes is the maximum number of contiguous bytes starting at
* offset that we are interested in.
*
* On exit, *bytes is the number of bytes starting at offset that have the same
- * cluster type and (if applicable) are stored contiguously in the image file.
- * Compressed clusters are always returned one by one.
+ * subcluster type and (if applicable) are stored contiguously in the image
+ * file. The subcluster type is stored in *subcluster_type.
+ * Compressed clusters are always processed one by one.
*
- * Returns the cluster type (QCOW2_CLUSTER_*) on success, -errno in error
- * cases.
+ * Returns 0 on success, -errno in error cases.
*/
-int xloop_file_fmt_qcow_cluster_get_offset(struct xloop_file_fmt *xlo_fmt,
- u64 offset, unsigned int *bytes, u64 *cluster_offset)
+int xloop_file_fmt_qcow_get_host_offset(struct xloop_file_fmt *xlo_fmt,
+ u64 offset, unsigned int *bytes, u64 *host_offset,
+ enum xloop_file_fmt_qcow_subcluster_type *subcluster_type)
{
struct xloop_file_fmt_qcow_data *qcow_data = xlo_fmt->private_data;
- unsigned int l2_index;
- u64 l1_index, l2_offset, *l2_slice;
- int c;
+ unsigned int l2_index, sc_index;
+ u64 l1_index, l2_offset, *l2_slice, l2_entry, l2_bitmap;
+ int sc;
unsigned int offset_in_cluster;
u64 bytes_available, bytes_needed, nb_clusters;
- enum xloop_file_fmt_qcow_cluster_type type;
+ enum xloop_file_fmt_qcow_subcluster_type type;
int ret;
+ u64 host_cluster_offset;
offset_in_cluster = xloop_file_fmt_qcow_offset_into_cluster(qcow_data,
offset);
@@ -146,18 +220,18 @@ int xloop_file_fmt_qcow_cluster_get_offset(struct xloop_file_fmt *xlo_fmt,
bytes_needed = bytes_available;
}
- *cluster_offset = 0;
+ *host_offset = 0;
/* seek to the l2 offset in the l1 table */
l1_index = xloop_file_fmt_qcow_offset_to_l1_index(qcow_data, offset);
if (l1_index >= qcow_data->l1_size) {
- type = QCOW_CLUSTER_UNALLOCATED;
+ type = QCOW_SUBCLUSTER_UNALLOCATED_PLAIN;
goto out;
}
- l2_offset = qcow_data->l1_table[l1_index] & L1E_OFFSET_MASK;
+ l2_offset = qcow_data->l1_table[l1_index] & QCOW_L1E_OFFSET_MASK;
if (!l2_offset) {
- type = QCOW_CLUSTER_UNALLOCATED;
+ type = QCOW_SUBCLUSTER_UNALLOCATED_PLAIN;
goto out;
}
@@ -177,7 +251,11 @@ int xloop_file_fmt_qcow_cluster_get_offset(struct xloop_file_fmt *xlo_fmt,
/* find the cluster offset for the given disk offset */
l2_index = xloop_file_fmt_qcow_offset_to_l2_slice_index(qcow_data,
offset);
- *cluster_offset = be64_to_cpu(l2_slice[l2_index]);
+ sc_index = xloop_file_fmt_qcow_offset_to_sc_index(qcow_data, offset);
+ l2_entry = xloop_file_fmt_qcow_get_l2_entry(qcow_data, l2_slice,
+ l2_index);
+ l2_bitmap = xloop_file_fmt_qcow_get_l2_bitmap(qcow_data, l2_slice,
+ l2_index);
nb_clusters = xloop_file_fmt_qcow_size_to_clusters(qcow_data,
bytes_needed);
@@ -186,10 +264,11 @@ int xloop_file_fmt_qcow_cluster_get_offset(struct xloop_file_fmt *xlo_fmt,
* assertion is always true */
ASSERT(nb_clusters <= INT_MAX);
- type = xloop_file_fmt_qcow_get_cluster_type(xlo_fmt, *cluster_offset);
+ type = xloop_file_fmt_qcow_get_subcluster_type(xlo_fmt, l2_entry,
+ l2_bitmap, sc_index);
if (qcow_data->qcow_version < 3 && (
- type == QCOW_CLUSTER_ZERO_PLAIN ||
- type == QCOW_CLUSTER_ZERO_ALLOC)) {
+ type == QCOW_SUBCLUSTER_ZERO_PLAIN ||
+ type == QCOW_SUBCLUSTER_ZERO_ALLOC)) {
dev_err_ratelimited(xloop_file_fmt_to_dev(xlo_fmt), "zero cluster "
"entry found in pre-v3 image (L2 offset: %llx, L2 index: %x)\n",
l2_offset, l2_index);
@@ -197,45 +276,39 @@ int xloop_file_fmt_qcow_cluster_get_offset(struct xloop_file_fmt *xlo_fmt,
goto fail;
}
switch (type) {
- case QCOW_CLUSTER_COMPRESSED:
- if (xloop_file_fmt_qcow_has_data_file(xlo_fmt)) {
+ case QCOW_SUBCLUSTER_INVALID:
+ break; /* This is handled by count_contiguous_subclusters() below */
+ case QCOW_SUBCLUSTER_COMPRESSED:
+ if (xloop_file_fmt_qcow_has_data_file(qcow_data)) {
dev_err_ratelimited(xloop_file_fmt_to_dev(xlo_fmt), "compressed "
"cluster entry found in image with external data file "
"(L2 offset: %llx, L2 index: %x)\n", l2_offset, l2_index);
ret = -EIO;
goto fail;
}
- /* Compressed clusters can only be processed one by one */
- c = 1;
- *cluster_offset &= L2E_COMPRESSED_OFFSET_SIZE_MASK;
+ *host_offset = l2_entry & QCOW_L2E_COMPRESSED_OFFSET_SIZE_MASK;
break;
- case QCOW_CLUSTER_ZERO_PLAIN:
- case QCOW_CLUSTER_UNALLOCATED:
- /* how many empty clusters ? */
- c = __xloop_file_fmt_qcow_cluster_count_contiguous_unallocated(
- xlo_fmt, nb_clusters, &l2_slice[l2_index], type);
- *cluster_offset = 0;
+ case QCOW_SUBCLUSTER_ZERO_PLAIN:
+ case QCOW_SUBCLUSTER_UNALLOCATED_PLAIN:
break;
- case QCOW_CLUSTER_ZERO_ALLOC:
- case QCOW_CLUSTER_NORMAL:
- /* how many allocated clusters ? */
- c = __xloop_file_fmt_qcow_cluster_count_contiguous(xlo_fmt,
- nb_clusters, qcow_data->cluster_size,
- &l2_slice[l2_index], QCOW_OFLAG_ZERO);
- *cluster_offset &= L2E_OFFSET_MASK;
+ case QCOW_SUBCLUSTER_ZERO_ALLOC:
+ case QCOW_SUBCLUSTER_NORMAL:
+ case QCOW_SUBCLUSTER_UNALLOCATED_ALLOC:
+ host_cluster_offset = l2_entry & QCOW_L2E_OFFSET_MASK;
+ *host_offset = host_cluster_offset + offset_in_cluster;
if (xloop_file_fmt_qcow_offset_into_cluster(qcow_data,
- *cluster_offset)) {
+ host_cluster_offset)) {
dev_err_ratelimited(xloop_file_fmt_to_dev(xlo_fmt), "cluster "
"allocation offset %llx unaligned (L2 offset: %llx, "
- "L2 index: %x)\n", *cluster_offset, l2_offset, l2_index);
+ "L2 index: %x)\n", host_cluster_offset, l2_offset, l2_index);
ret = -EIO;
goto fail;
}
- if (xloop_file_fmt_qcow_has_data_file(xlo_fmt) &&
- *cluster_offset != offset - offset_in_cluster) {
+ if (xloop_file_fmt_qcow_has_data_file(qcow_data) &&
+ *host_offset != offset) {
dev_err_ratelimited(xloop_file_fmt_to_dev(xlo_fmt), "external "
"data file host cluster offset %llx does not match guest "
- "cluster offset: %llx, L2 index: %x)\n", *cluster_offset,
+ "cluster offset: %llx, L2 index: %x)\n", host_cluster_offset,
offset - offset_in_cluster, l2_index);
ret = -EIO;
goto fail;
@@ -245,9 +318,19 @@ int xloop_file_fmt_qcow_cluster_get_offset(struct xloop_file_fmt *xlo_fmt,
BUG();
}
+ sc = __xloop_file_fmt_qcow_count_contiguous_subclusters(xlo_fmt,
+ nb_clusters, sc_index, l2_slice, &l2_index);
+
+ if (sc < 0) {
+ dev_err_ratelimited(xloop_file_fmt_to_dev(xlo_fmt), "invalid cluster "
+ "entry found (L2 offset: %#llx, L2 index: %#x)", l2_offset,
+ l2_index);
+ ret = -EIO;
+ goto fail;
+ }
xloop_file_fmt_qcow_cache_put(xlo_fmt, (void **) &l2_slice);
- bytes_available = (s64) c * qcow_data->cluster_size;
+ bytes_available = ((s64) sc + sc_index) << qcow_data->subcluster_bits;
out:
if (bytes_available > bytes_needed) {
@@ -260,7 +343,9 @@ out:
ASSERT(bytes_available - offset_in_cluster <= UINT_MAX);
*bytes = bytes_available - offset_in_cluster;
- return type;
+ *subcluster_type = type;
+
+ return 0;
fail:
xloop_file_fmt_qcow_cache_put(xlo_fmt, (void **) &l2_slice);
diff --git a/kernel/xloop_file_fmt_qcow_cluster.h b/kernel/xloop_file_fmt_qcow_cluster.h
index ff3e0a1..a3716f5 100644
--- a/kernel/xloop_file_fmt_qcow_cluster.h
+++ b/kernel/xloop_file_fmt_qcow_cluster.h
@@ -15,9 +15,8 @@
#include "xloop_file_fmt.h"
-extern int xloop_file_fmt_qcow_cluster_get_offset(struct xloop_file_fmt *xlo_fmt,
- u64 offset,
- unsigned int *bytes,
- u64 *cluster_offset);
+extern int xloop_file_fmt_qcow_get_host_offset(struct xloop_file_fmt *xlo_fmt,
+ u64 offset, unsigned int *bytes, u64 *host_offset,
+ enum xloop_file_fmt_qcow_subcluster_type *subcluster_type);
#endif
diff --git a/kernel/xloop_file_fmt_qcow_main.c b/kernel/xloop_file_fmt_qcow_main.c
index 55d2f32..fbc49f5 100644
--- a/kernel/xloop_file_fmt_qcow_main.c
+++ b/kernel/xloop_file_fmt_qcow_main.c
@@ -23,12 +23,23 @@
#include <linux/string.h>
#include <linux/vmalloc.h>
#include <linux/zlib.h>
+#ifdef CONFIG_ZSTD_DECOMPRESS
+#include <linux/zstd.h>
+#endif
#include "xloop_file_fmt.h"
#include "xloop_file_fmt_qcow_main.h"
#include "xloop_file_fmt_qcow_cache.h"
#include "xloop_file_fmt_qcow_cluster.h"
+#ifdef CONFIG_ZSTD_DECOMPRESS
+#define ZSTD_WINDOWLOG_LIMIT_DEFAULT 27
+#define ZSTD_MAXWINDOWSIZE ((U32_C(1) << ZSTD_WINDOWLOG_LIMIT_DEFAULT) + 1)
+#endif
+
+typedef ssize_t (*qcow_file_fmt_decompress_fn)(struct xloop_file_fmt *xlo_fmt,
+ void *dest, size_t dest_size, const void *src, size_t src_size);
+
static int __qcow_file_fmt_header_read(struct xloop_file_fmt *xlo_fmt,
struct file *file, struct xloop_file_fmt_qcow_header *header)
{
@@ -135,32 +146,70 @@ static int __qcow_file_fmt_compression_init(struct xloop_file_fmt *xlo_fmt)
{
struct xloop_file_fmt_qcow_data *qcow_data = xlo_fmt->private_data;
int ret = 0;
+#ifdef CONFIG_ZSTD_DECOMPRESS
+ size_t workspace_size;
+#endif
- qcow_data->strm = kzalloc(sizeof(*qcow_data->strm), GFP_KERNEL);
- if (!qcow_data->strm) {
+ /* create workspace for ZLIB decompression stream */
+ qcow_data->zlib_dstrm = kzalloc(sizeof(*qcow_data->zlib_dstrm), GFP_KERNEL);
+ if (!qcow_data->zlib_dstrm) {
ret = -ENOMEM;
goto out;
}
- qcow_data->strm->workspace = vzalloc(zlib_inflate_workspacesize());
- if (!qcow_data->strm->workspace) {
+ qcow_data->zlib_dstrm->workspace = vzalloc(zlib_inflate_workspacesize());
+ if (!qcow_data->zlib_dstrm->workspace) {
+ ret = -ENOMEM;
+ goto out_free_zlib_dstrm;
+ }
+
+ /* set up ZLIB decompression stream */
+ ret = zlib_inflateInit2(qcow_data->zlib_dstrm, -12);
+ if (ret != Z_OK) {
+ ret = -EIO;
+ goto out_free_zlib_dworkspace;
+ }
+
+#ifdef CONFIG_ZSTD_DECOMPRESS
+ /* create workspace for ZSTD decompression stream */
+ workspace_size = ZSTD_DStreamWorkspaceBound(ZSTD_MAXWINDOWSIZE);
+ qcow_data->zstd_dworkspace = vzalloc(workspace_size);
+ if (!qcow_data->zstd_dworkspace) {
ret = -ENOMEM;
- goto out_free_strm;
+ goto out_free_zlib_dworkspace;
+ }
+
+ /* set up ZSTD decompression stream */
+ qcow_data->zstd_dstrm = ZSTD_initDStream(ZSTD_MAXWINDOWSIZE,
+ qcow_data->zstd_dworkspace, workspace_size);
+ if (!qcow_data->zstd_dstrm) {
+ ret = -EINVAL;
+ goto out_free_zstd_dworkspace;
}
+#endif
+ /* create cache for last compressed QCOW cluster */
qcow_data->cmp_last_coffset = ULLONG_MAX;
qcow_data->cmp_out_buf = vmalloc(qcow_data->cluster_size);
if (!qcow_data->cmp_out_buf) {
ret = -ENOMEM;
- goto out_free_workspace;
+#ifdef CONFIG_ZSTD_DECOMPRESS
+ goto out_free_zstd_dworkspace;
+#else
+ goto out_free_zlib_dworkspace;
+#endif
}
return ret;
-out_free_workspace:
- vfree(qcow_data->strm->workspace);
-out_free_strm:
- kfree(qcow_data->strm);
+#ifdef CONFIG_ZSTD_DECOMPRESS
+out_free_zstd_dworkspace:
+ vfree(qcow_data->zstd_dworkspace);
+#endif
+out_free_zlib_dworkspace:
+ vfree(qcow_data->zlib_dstrm->workspace);
+out_free_zlib_dstrm:
+ kfree(qcow_data->zlib_dstrm);
out:
return ret;
}
@@ -169,8 +218,17 @@ static void __qcow_file_fmt_compression_exit(struct xloop_file_fmt *xlo_fmt)
{
struct xloop_file_fmt_qcow_data *qcow_data = xlo_fmt->private_data;
- vfree(qcow_data->strm->workspace);
- kfree(qcow_data->strm);
+ /* ZLIB specific cleanup */
+ zlib_inflateEnd(qcow_data->zlib_dstrm);
+ vfree(qcow_data->zlib_dstrm->workspace);
+ kfree(qcow_data->zlib_dstrm);
+
+ /* ZSTD specific cleanup */
+#ifdef CONFIG_ZSTD_DECOMPRESS
+ vfree(qcow_data->zstd_dworkspace);
+#endif
+
+ /* last compressed QCOW cluster cleanup */
vfree(qcow_data->cmp_out_buf);
}
@@ -227,6 +285,13 @@ static void __qcow_file_fmt_header_to_buf(struct xloop_file_fmt *xlo_fmt,
header->header_length);
}
+ if (header->header_length > offsetof(struct xloop_file_fmt_qcow_header,
+ compression_type)) {
+ len += sprintf(header_buf + len,
+ "compression_type: %d\n",
+ header->compression_type);
+ }
+
ASSERT(len < QCOW_HEADER_BUF_LEN);
}
@@ -253,10 +318,12 @@ static ssize_t __qcow_file_fmt_dbgfs_ofs_read(struct file *file,
struct xloop_file_fmt_qcow_data *qcow_data = xlo_fmt->private_data;
unsigned int cur_bytes = 1;
u64 offset = 0;
- u64 cluster_offset = 0;
+ u64 coffset = 0;
+ u64 host_offset = 0;
s64 offset_in_cluster = 0;
+ enum xloop_file_fmt_qcow_subcluster_type type;
ssize_t len = 0;
- int ret = 0;
+ int ret = 0, csize = 0, nb_csectors = 0;
/* read the share debugfs offset */
ret = mutex_lock_interruptible(&qcow_data->dbgfs_qcow_offset_mutex);
@@ -267,8 +334,8 @@ static ssize_t __qcow_file_fmt_dbgfs_ofs_read(struct file *file,
mutex_unlock(&qcow_data->dbgfs_qcow_offset_mutex);
/* calculate and print the cluster offset */
- ret = xloop_file_fmt_qcow_cluster_get_offset(xlo_fmt,
- offset, &cur_bytes, &cluster_offset);
+ ret = xloop_file_fmt_qcow_get_host_offset(xlo_fmt,
+ offset, &cur_bytes, &host_offset, &type);
if (ret < 0)
return -EINVAL;
@@ -276,8 +343,26 @@ static ssize_t __qcow_file_fmt_dbgfs_ofs_read(struct file *file,
offset);
len = sprintf(qcow_data->dbgfs_file_qcow_cluster_buf,
- "offset: %lld\ncluster_offset: %lld\noffset_in_cluster: %lld\n",
- offset, cluster_offset, offset_in_cluster);
+ "cluster type: %s\n"
+ "cluster offset host: %lld\n"
+ "cluster offset guest: %lld\n"
+ "cluster offset in-cluster: %lld\n",
+ xloop_file_fmt_qcow_get_subcluster_name(type),
+ host_offset, offset, offset_in_cluster);
+
+ if (type == QCOW_SUBCLUSTER_COMPRESSED) {
+ coffset = host_offset & qcow_data->cluster_offset_mask;
+ nb_csectors = ((host_offset >> qcow_data->csize_shift) &
+ qcow_data->csize_mask) + 1;
+ csize = nb_csectors * QCOW_COMPRESSED_SECTOR_SIZE -
+ (coffset & ~QCOW_COMPRESSED_SECTOR_MASK);
+
+ len += sprintf(qcow_data->dbgfs_file_qcow_cluster_buf + len,
+ "cluster compressed offset: %lld\n"
+ "cluster compressed sectors: %d\n"
+ "cluster compressed size: %d\n",
+ coffset, nb_csectors, csize);
+ }
ASSERT(len < QCOW_CLUSTER_BUF_LEN);
@@ -385,6 +470,44 @@ static void __qcow_file_fmt_dbgfs_exit(struct xloop_file_fmt *xlo_fmt)
}
#endif
+static int __qcow_file_fmt_validate_compression_type(
+ struct xloop_file_fmt *xlo_fmt)
+{
+ struct xloop_file_fmt_qcow_data *qcow_data = xlo_fmt->private_data;
+
+ switch (qcow_data->compression_type) {
+ case QCOW_COMPRESSION_TYPE_ZLIB:
+#ifdef CONFIG_ZSTD_DECOMPRESS
+ case QCOW_COMPRESSION_TYPE_ZSTD:
+#endif
+ break;
+ default:
+ dev_err(xloop_file_fmt_to_dev(xlo_fmt), "unknown compression type: %u",
+ qcow_data->compression_type);
+ return -ENOTSUPP;
+ }
+
+ /*
+ * if the compression type differs from QCOW_COMPRESSION_TYPE_ZLIB
+ * the incompatible feature flag must be set
+ */
+ if (qcow_data->compression_type == QCOW_COMPRESSION_TYPE_ZLIB) {
+ if (qcow_data->incompatible_features & QCOW_INCOMPAT_COMPRESSION) {
+ dev_err(xloop_file_fmt_to_dev(xlo_fmt), "compression type "
+ "incompatible feature bit must not be set\n");
+ return -EINVAL;
+ }
+ } else {
+ if (!(qcow_data->incompatible_features & QCOW_INCOMPAT_COMPRESSION)) {
+ dev_err(xloop_file_fmt_to_dev(xlo_fmt), "compression type "
+ "incompatible feature bit must be set\n");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
static int qcow_file_fmt_init(struct xloop_file_fmt *xlo_fmt)
{
struct xloop_file_fmt_qcow_data *qcow_data;
@@ -393,6 +516,10 @@ static int qcow_file_fmt_init(struct xloop_file_fmt *xlo_fmt)
u64 l1_vm_state_index;
u64 l2_cache_size;
u64 l2_cache_entry_size;
+ u64 virtual_disk_size;
+ u64 max_l2_entries;
+ u64 max_l2_cache;
+ u64 l2_cache_max_setting;
ssize_t len;
unsigned int i;
int ret = 0;
@@ -428,8 +555,6 @@ static int qcow_file_fmt_init(struct xloop_file_fmt *xlo_fmt)
qcow_data->cluster_bits = header.cluster_bits;
qcow_data->cluster_size = 1 << qcow_data->cluster_bits;
- qcow_data->cluster_sectors = 1 <<
- (qcow_data->cluster_bits - SECTOR_SHIFT);
if (header.header_length > qcow_data->cluster_size) {
dev_err(xloop_file_fmt_to_dev(xlo_fmt), "QCOW header exceeds cluster "
@@ -457,6 +582,25 @@ static int qcow_file_fmt_init(struct xloop_file_fmt *xlo_fmt)
qcow_data->compatible_features = header.compatible_features;
qcow_data->autoclear_features = header.autoclear_features;
+ /*
+ * Handle compression type
+ * Older qcow2 images don't contain the compression type header.
+ * Distinguish them by the header length and use
+ * the only valid (default) compression type in that case
+ */
+ if (header.header_length > offsetof(struct xloop_file_fmt_qcow_header,
+ compression_type)) {
+ qcow_data->compression_type = header.compression_type;
+ } else {
+ qcow_data->compression_type = QCOW_COMPRESSION_TYPE_ZLIB;
+ }
+
+ ret = __qcow_file_fmt_validate_compression_type(xlo_fmt);
+ if (ret) {
+ goto free_qcow_data;
+ }
+
+ /* check for incompatible features */
if (qcow_data->incompatible_features & QCOW_INCOMPAT_DIRTY) {
dev_err(xloop_file_fmt_to_dev(xlo_fmt), "image contains inconsistent "
"refcounts\n");
@@ -472,12 +616,31 @@ static int qcow_file_fmt_init(struct xloop_file_fmt *xlo_fmt)
}
if (qcow_data->incompatible_features & QCOW_INCOMPAT_DATA_FILE) {
- dev_err(xloop_file_fmt_to_dev(xlo_fmt), "clusters in the external "
- "data file are not refcounted\n");
- ret = -EACCES;
+ dev_err(xloop_file_fmt_to_dev(xlo_fmt), "data-file is required for "
+ "this image\n");
+ ret = -EINVAL;
goto free_qcow_data;
}
+ qcow_data->subclusters_per_cluster =
+ xloop_file_fmt_qcow_has_subclusters(qcow_data) ?
+ QCOW_EXTL2_SUBCLUSTERS_PER_CLUSTER : 1;
+ qcow_data->subcluster_size =
+ qcow_data->cluster_size / qcow_data->subclusters_per_cluster;
+ /*
+ * check if subcluster_size is non-zero to avoid unknown results of
+ * __builtin_ctz
+ */
+ ASSERT(qcow_data->subcluster_size != 0);
+ qcow_data->subcluster_bits = __builtin_ctz(qcow_data->subcluster_size);
+
+ if (qcow_data->subcluster_size < (1 << QCOW_MIN_CLUSTER_BITS)) {
+ dev_err(xloop_file_fmt_to_dev(xlo_fmt), "unsupported subcluster "
+ "size: %d\n", qcow_data->subcluster_size);
+ ret = -EINVAL;
+ goto free_qcow_data;
+ }
+
/* Check support for various header values */
if (header.refcount_order > 6) {
dev_err(xloop_file_fmt_to_dev(xlo_fmt), "reference count entry width "
@@ -498,8 +661,13 @@ static int qcow_file_fmt_init(struct xloop_file_fmt *xlo_fmt)
goto free_qcow_data;
}
- /* L2 is always one cluster */
- qcow_data->l2_bits = qcow_data->cluster_bits - 3;
+ /*
+ * check if xloop_file_fmt_qcow_l2_entry_size(qcow_data) is non-zero to
+ * avoid unknown results of __builtin_ctz
+ */
+ ASSERT(xloop_file_fmt_qcow_l2_entry_size(qcow_data) != 0);
+ qcow_data->l2_bits = qcow_data->cluster_bits -
+ __builtin_ctz(xloop_file_fmt_qcow_l2_entry_size(qcow_data));
qcow_data->l2_size = 1 << qcow_data->l2_bits;
/* 2^(qcow_data->refcount_order - 3) is the refcount width in bytes */
qcow_data->refcount_block_bits = qcow_data->cluster_bits -
@@ -544,7 +712,7 @@ static int qcow_file_fmt_init(struct xloop_file_fmt *xlo_fmt)
/* read the level 1 table */
ret = __qcow_file_fmt_validate_table(xlo_fmt, header.l1_table_offset,
- header.l1_size, sizeof(u64), QCOW_MAX_L1_SIZE,
+ header.l1_size, QCOW_L1E_SIZE, QCOW_MAX_L1_SIZE,
"Active L1 table");
if (ret < 0) {
goto free_qcow_data;
@@ -571,7 +739,7 @@ static int qcow_file_fmt_init(struct xloop_file_fmt *xlo_fmt)
if (qcow_data->l1_size > 0) {
qcow_data->l1_table = vzalloc(round_up(qcow_data->l1_size *
- sizeof(u64), 512));
+ QCOW_L1E_SIZE, 512));
if (qcow_data->l1_table == NULL) {
dev_err(xloop_file_fmt_to_dev(xlo_fmt), "could not allocate "
"L1 table\n");
@@ -579,7 +747,7 @@ static int qcow_file_fmt_init(struct xloop_file_fmt *xlo_fmt)
goto free_qcow_data;
}
len = kernel_read(xlo->xlo_backing_file, qcow_data->l1_table,
- qcow_data->l1_size * sizeof(u64),
+ qcow_data->l1_size * QCOW_L1E_SIZE,
&qcow_data->l1_table_offset);
if (len < 0) {
dev_err(xloop_file_fmt_to_dev(xlo_fmt), "could not read "
@@ -604,13 +772,21 @@ static int qcow_file_fmt_init(struct xloop_file_fmt *xlo_fmt)
goto free_l1_table;
}
-
/* create cache for L2 */
- l2_cache_size = qcow_data->size / (qcow_data->cluster_size / 8);
- l2_cache_entry_size = min(qcow_data->cluster_size, (int)4096);
+ virtual_disk_size = qcow_data->size;
+ max_l2_entries = DIV_ROUND_UP(virtual_disk_size, qcow_data->cluster_size);
+ max_l2_cache = round_up(
+ max_l2_entries * xloop_file_fmt_qcow_l2_entry_size(qcow_data),
+ qcow_data->cluster_size);
- /* limit the L2 size to maximum QCOW_DEFAULT_L2_CACHE_MAX_SIZE */
- l2_cache_size = min(l2_cache_size, (u64)QCOW_DEFAULT_L2_CACHE_MAX_SIZE);
+ /* define the maximum L2 cache size */
+ l2_cache_max_setting = QCOW_DEFAULT_L2_CACHE_MAX_SIZE;
+
+ /* limit the L2 cache size to maximum l2_cache_max_setting */
+ l2_cache_size = min(max_l2_cache, l2_cache_max_setting);
+
+ /* determine the size of a cache entry */
+ l2_cache_entry_size = min(qcow_data->cluster_size, (int)PAGE_SIZE);
/* calculate the number of cache tables */
l2_cache_size /= l2_cache_entry_size;
@@ -624,7 +800,8 @@ static int qcow_file_fmt_init(struct xloop_file_fmt *xlo_fmt)
goto free_l1_table;
}
- qcow_data->l2_slice_size = l2_cache_entry_size / sizeof(u64);
+ qcow_data->l2_slice_size =
+ l2_cache_entry_size / xloop_file_fmt_qcow_l2_entry_size(qcow_data);
qcow_data->l2_table_cache = xloop_file_fmt_qcow_cache_create(xlo_fmt,
l2_cache_size, l2_cache_entry_size);
@@ -681,39 +858,195 @@ static void qcow_file_fmt_exit(struct xloop_file_fmt *xlo_fmt)
}
}
-static ssize_t __qcow_file_fmt_buffer_decompress(struct xloop_file_fmt *xlo_fmt,
+/*
+ * __qcow_file_fmt_zlib_decompress()
+ *
+ * Decompress some data (not more than @src_size bytes) to produce exactly
+ * @dest_size bytes using zlib compression method
+ *
+ * @xlo_fmt - QCOW file format
+ * @dest - destination buffer, @dest_size bytes
+ * @src - source buffer, @src_size bytes
+ *
+ * Returns: 0 on success
+ * -EIO on fail
+ */
+static ssize_t __qcow_file_fmt_zlib_decompress(struct xloop_file_fmt *xlo_fmt,
void *dest,
size_t dest_size,
const void *src,
size_t src_size)
{
struct xloop_file_fmt_qcow_data *qcow_data = xlo_fmt->private_data;
- int ret = 0;
+ u8 zerostuff = 0;
+ ssize_t ret = 0;
- qcow_data->strm->avail_in = src_size;
- qcow_data->strm->next_in = (void *) src;
- qcow_data->strm->avail_out = dest_size;
- qcow_data->strm->next_out = dest;
-
- ret = zlib_inflateInit2(qcow_data->strm, -12);
+ ret = zlib_inflateReset(qcow_data->zlib_dstrm);
if (ret != Z_OK) {
- return -1;
+ ret = -EINVAL;
+ goto out;
}
- ret = zlib_inflate(qcow_data->strm, Z_FINISH);
- if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR)
- || qcow_data->strm->avail_out != 0) {
- /* We approve Z_BUF_ERROR because we need @dest buffer to be
- * filled, but @src buffer may be processed partly (because in
- * qcow2 we know size of compressed data with precision of one
- * sector) */
- ret = -1;
- } else {
- ret = 0;
+ qcow_data->zlib_dstrm->avail_in = src_size;
+ qcow_data->zlib_dstrm->next_in = (void *)src;
+ qcow_data->zlib_dstrm->avail_out = dest_size;
+ qcow_data->zlib_dstrm->next_out = dest;
+
+ ret = zlib_inflate(qcow_data->zlib_dstrm, Z_SYNC_FLUSH);
+ /*
+ * Work around a bug in zlib, which sometimes wants to taste an extra
+ * byte when being used in the (undocumented) raw deflate mode.
+ * (From USAGI).
+ */
+ if (ret == Z_OK && !qcow_data->zlib_dstrm->avail_in &&
+ qcow_data->zlib_dstrm->avail_out) {
+ qcow_data->zlib_dstrm->next_in = &zerostuff;
+ qcow_data->zlib_dstrm->avail_in = 1;
+ ret = zlib_inflate(qcow_data->zlib_dstrm, Z_FINISH);
+ }
+ if (ret != Z_STREAM_END) {
+ ret = -EIO;
+ goto out;
}
+
+out:
return ret;
}
+#ifdef CONFIG_ZSTD_DECOMPRESS
+/*
+ * __qcow_file_fmt_zstd_decompress()
+ *
+ * Decompress some data (not more than @src_size bytes) to produce exactly
+ * @dest_size bytes using zstd compression method
+ *
+ * @xlo_fmt - QCOW file format
+ * @dest - destination buffer, @dest_size bytes
+ * @src - source buffer, @src_size bytes
+ *
+ * Returns: 0 on success
+ * -EIO on any error
+ */
+static ssize_t __qcow_file_fmt_zstd_decompress(struct xloop_file_fmt *xlo_fmt,
+ void *dest,
+ size_t dest_size,
+ const void *src,
+ size_t src_size)
+{
+ struct xloop_file_fmt_qcow_data *qcow_data = xlo_fmt->private_data;
+ size_t zstd_ret = 0;
+ ssize_t ret = 0;
+
+ ZSTD_outBuffer output = {
+ .dst = dest,
+ .size = dest_size,
+ .pos = 0
+ };
+
+ ZSTD_inBuffer input = {
+ .src = src,
+ .size = src_size,
+ .pos = 0
+ };
+
+ zstd_ret = ZSTD_resetDStream(qcow_data->zstd_dstrm);
+
+ if (ZSTD_isError(zstd_ret)) {
+ ret = -EIO;
+ goto out;
+ }
+
+ /*
+ * The compressed stream from the input buffer may consist of more
+ * than one zstd frame. So we iterate until we get a fully
+ * uncompressed cluster.
+ * From zstd docs related to ZSTD_decompressStream:
+ * "return : 0 when a frame is completely decoded and fully flushed"
+ * We suppose that this means: each time ZSTD_decompressStream reads
+ * only ONE full frame and returns 0 if and only if that frame
+ * is completely decoded and flushed. Only after returning 0,
+ * ZSTD_decompressStream reads another ONE full frame.
+ */
+ while (output.pos < output.size) {
+ size_t last_in_pos = input.pos;
+ size_t last_out_pos = output.pos;
+ zstd_ret = ZSTD_decompressStream(qcow_data->zstd_dstrm, &output, &input);
+
+ if (ZSTD_isError(zstd_ret)) {
+ ret = -EIO;
+ break;
+ }
+
+ /*
+ * The ZSTD manual is vague about what to do if it reads
+ * the buffer partially, and we don't want to get stuck
+ * in an infinite loop where ZSTD_decompressStream
+ * returns > 0 waiting for another input chunk. So, we add
+ * a check which ensures that the loop makes some progress
+ * on each step.
+ */
+ if (last_in_pos >= input.pos &&
+ last_out_pos >= output.pos) {
+ ret = -EIO;
+ break;
+ }
+ }
+ /*
+ * Make sure that we have the frame fully flushed here
+ * if not, we somehow managed to get uncompressed cluster
+ * greater then the cluster size, possibly because of its
+ * damage.
+ */
+ if (zstd_ret > 0) {
+ ret = -EIO;
+ }
+
+out:
+ ASSERT(ret == 0 || ret == -EIO);
+ return ret;
+}
+#endif
+
+/*
+ * __qcow_file_fmt_buffer_decompress()
+ *
+ * Decompress @src_size bytes of data using the compression
+ * method defined by the image compression type
+ *
+ * @xlo_fmt - QCOW file format
+ * @dest - destination buffer, @dest_size bytes
+ * @src - source buffer, @src_size bytes
+ *
+ * Returns: compressed size on success
+ * a negative error code on failure
+ */
+static ssize_t __qcow_file_fmt_buffer_decompress(struct xloop_file_fmt *xlo_fmt,
+ void *dest,
+ size_t dest_size,
+ const void *src,
+ size_t src_size)
+{
+ struct xloop_file_fmt_qcow_data *qcow_data = xlo_fmt->private_data;
+ qcow_file_fmt_decompress_fn decompress_fn;
+
+ switch (qcow_data->compression_type) {
+ case QCOW_COMPRESSION_TYPE_ZLIB:
+ decompress_fn = __qcow_file_fmt_zlib_decompress;
+ break;
+
+#ifdef CONFIG_ZSTD_DECOMPRESS
+ case QCOW_COMPRESSION_TYPE_ZSTD:
+ decompress_fn = __qcow_file_fmt_zstd_decompress;
+ break;
+#endif
+ default:
+ return -EINVAL;
+ }
+
+ return decompress_fn(xlo_fmt, dest, dest_size, src, src_size);
+}
+
+
static int __qcow_file_fmt_read_compressed(struct xloop_file_fmt *xlo_fmt,
struct bio_vec *bvec,
u64 file_cluster_offset,
@@ -783,8 +1116,9 @@ static int __qcow_file_fmt_read_bvec(struct xloop_file_fmt *xlo_fmt,
int ret;
unsigned int cur_bytes; /* number of bytes in current iteration */
u64 bytes;
- u64 cluster_offset = 0;
+ u64 host_offset = 0;
u64 bytes_done = 0;
+ enum xloop_file_fmt_qcow_subcluster_type type;
void *data;
unsigned long irq_flags;
ssize_t len;
@@ -797,8 +1131,8 @@ static int __qcow_file_fmt_read_bvec(struct xloop_file_fmt *xlo_fmt,
/* prepare next request */
cur_bytes = bytes;
- ret = xloop_file_fmt_qcow_cluster_get_offset(xlo_fmt, *ppos,
- &cur_bytes, &cluster_offset);
+ ret = xloop_file_fmt_qcow_get_host_offset(xlo_fmt, *ppos,
+ &cur_bytes, &host_offset, &type);
if (ret < 0) {
goto fail;
}
@@ -806,32 +1140,28 @@ static int __qcow_file_fmt_read_bvec(struct xloop_file_fmt *xlo_fmt,
offset_in_cluster = xloop_file_fmt_qcow_offset_into_cluster(
qcow_data, *ppos);
- switch (ret) {
- case QCOW_CLUSTER_UNALLOCATED:
- case QCOW_CLUSTER_ZERO_PLAIN:
- case QCOW_CLUSTER_ZERO_ALLOC:
+ switch (type) {
+ case QCOW_SUBCLUSTER_ZERO_PLAIN:
+ case QCOW_SUBCLUSTER_ZERO_ALLOC:
+ case QCOW_SUBCLUSTER_UNALLOCATED_PLAIN:
+ case QCOW_SUBCLUSTER_UNALLOCATED_ALLOC:
data = bvec_kmap_irq(bvec, &irq_flags) + bytes_done;
memset(data, 0, cur_bytes);
flush_dcache_page(bvec->bv_page);
bvec_kunmap_irq(data, &irq_flags);
break;
- case QCOW_CLUSTER_COMPRESSED:
+ case QCOW_SUBCLUSTER_COMPRESSED:
ret = __qcow_file_fmt_read_compressed(xlo_fmt, bvec,
- cluster_offset, *ppos, cur_bytes, bytes_done);
+ host_offset, *ppos, cur_bytes, bytes_done);
if (ret < 0) {
goto fail;
}
break;
- case QCOW_CLUSTER_NORMAL:
- if ((cluster_offset & 511) != 0) {
- ret = -EIO;
- goto fail;
- }
-
- pos_read = cluster_offset + offset_in_cluster;
+ case QCOW_SUBCLUSTER_NORMAL:
+ pos_read = host_offset;
data = bvec_kmap_irq(bvec, &irq_flags) + bytes_done;
len = kernel_read(xlo->xlo_backing_file, data, cur_bytes,
@@ -842,6 +1172,7 @@ static int __qcow_file_fmt_read_bvec(struct xloop_file_fmt *xlo_fmt,
if (len < 0)
return len;
+ ASSERT(len == cur_bytes);
break;
default:
diff --git a/kernel/xloop_file_fmt_qcow_main.h b/kernel/xloop_file_fmt_qcow_main.h
index e6031be..023c679 100644
--- a/kernel/xloop_file_fmt_qcow_main.h
+++ b/kernel/xloop_file_fmt_qcow_main.h
@@ -19,6 +19,9 @@
#include <linux/mutex.h>
#include <linux/types.h>
#include <linux/zlib.h>
+#ifdef CONFIG_ZSTD_DECOMPRESS
+#include <linux/zstd.h>
+#endif
#ifdef CONFIG_DEBUG_FS
#include <linux/debugfs.h>
@@ -80,6 +83,33 @@ do { \
/* The cluster reads as all zeros */
#define QCOW_OFLAG_ZERO (1ULL << 0)
+#define QCOW_EXTL2_SUBCLUSTERS_PER_CLUSTER 32
+
+/* The subcluster X [0..31] is allocated */
+#define QCOW_OFLAG_SUB_ALLOC(X) (1ULL << (X))
+/* The subcluster X [0..31] reads as zeroes */
+#define QCOW_OFLAG_SUB_ZERO(X) (QCOW_OFLAG_SUB_ALLOC(X) << 32)
+/* Subclusters [X, Y) (0 <= X <= Y <= 32) are allocated */
+#define QCOW_OFLAG_SUB_ALLOC_RANGE(X, Y) \
+ (QCOW_OFLAG_SUB_ALLOC(Y) - QCOW_OFLAG_SUB_ALLOC(X))
+/* Subclusters [X, Y) (0 <= X <= Y <= 32) read as zeroes */
+#define QCOW_OFLAG_SUB_ZERO_RANGE(X, Y) \
+ (QCOW_OFLAG_SUB_ALLOC_RANGE(X, Y) << 32)
+/* L2 entry bitmap with all allocation bits set */
+#define QCOW_L2_BITMAP_ALL_ALLOC (QCOW_OFLAG_SUB_ALLOC_RANGE(0, 32))
+/* L2 entry bitmap with all "read as zeroes" bits set */
+#define QCOW_L2_BITMAP_ALL_ZEROES (QCOW_OFLAG_SUB_ZERO_RANGE(0, 32))
+
+/* Size of normal and extended L2 entries */
+#define QCOW_L2E_SIZE_NORMAL (sizeof(u64))
+#define QCOW_L2E_SIZE_EXTENDED (sizeof(u64) * 2)
+
+/* Size of L1 table entries */
+#define QCOW_L1E_SIZE (sizeof(u64))
+
+/* Size of reftable entries */
+#define QCOW_REFTABLE_ENTRY_SIZE (sizeof(u64))
+
#define QCOW_MIN_CLUSTER_BITS 9
#define QCOW_MAX_CLUSTER_BITS 21
@@ -104,7 +134,7 @@ do { \
/* Buffer size for debugfs file buffer to receive and display offset and
* cluster offset information */
#define QCOW_OFFSET_BUF_LEN 32
-#define QCOW_CLUSTER_BUF_LEN 128
+#define QCOW_CLUSTER_BUF_LEN 256
struct xloop_file_fmt_qcow_header {
u32 magic;
@@ -128,6 +158,12 @@ struct xloop_file_fmt_qcow_header {
u32 refcount_order;
u32 header_length;
+
+ /* Additional fields */
+ u8 compression_type;
+
+ /* header must be a multiple of 8 */
+ u8 padding[7];
} __attribute__((packed));
struct xloop_file_fmt_qcow_snapshot_header {
@@ -144,11 +180,11 @@ struct xloop_file_fmt_qcow_snapshot_header {
u64 vm_clock_nsec;
u32 vm_state_size;
- /* for extension */
- u32 extra_data_size;
- /* extra data follows */
- /* id_str follows */
- /* name follows */
+
+ /* Size of all extra data, including QCowSnapshotExtraData if available */
+ u32 extra_data_size;
+ /* Data beyond QCowSnapshotExtraData, if any */
+ void *unknown_extra_data;
} __attribute__((packed));
enum {
@@ -162,13 +198,19 @@ enum {
QCOW_INCOMPAT_DIRTY_BITNR = 0,
QCOW_INCOMPAT_CORRUPT_BITNR = 1,
QCOW_INCOMPAT_DATA_FILE_BITNR = 2,
+ QCOW_INCOMPAT_COMPRESSION_BITNR = 3,
+ QCOW_INCOMPAT_EXTL2_BITNR = 4,
QCOW_INCOMPAT_DIRTY = 1 << QCOW_INCOMPAT_DIRTY_BITNR,
QCOW_INCOMPAT_CORRUPT = 1 << QCOW_INCOMPAT_CORRUPT_BITNR,
QCOW_INCOMPAT_DATA_FILE = 1 << QCOW_INCOMPAT_DATA_FILE_BITNR,
+ QCOW_INCOMPAT_COMPRESSION = 1 << QCOW_INCOMPAT_COMPRESSION_BITNR,
+ QCOW_INCOMPAT_EXTL2 = 1 << QCOW_INCOMPAT_EXTL2_BITNR,
QCOW_INCOMPAT_MASK = QCOW_INCOMPAT_DIRTY
| QCOW_INCOMPAT_CORRUPT
- | QCOW_INCOMPAT_DATA_FILE,
+ | QCOW_INCOMPAT_DATA_FILE
+ | QCOW_INCOMPAT_COMPRESSION
+ | QCOW_INCOMPAT_EXTL2,
};
/* compatible feature bits */
@@ -190,12 +232,19 @@ enum {
QCOW_AUTOCLEAR_DATA_FILE_RAW,
};
+enum xloop_file_fmt_qcow_compression_type {
+ QCOW_COMPRESSION_TYPE_ZLIB,
+ QCOW_COMPRESSION_TYPE_ZSTD,
+};
+
struct xloop_file_fmt_qcow_data {
u64 size;
int cluster_bits;
int cluster_size;
- int cluster_sectors;
int l2_slice_size;
+ int subcluster_bits;
+ int subcluster_size;
+ int subclusters_per_cluster;
int l2_bits;
int l2_size;
int l1_size;
@@ -237,10 +286,27 @@ struct xloop_file_fmt_qcow_data {
u64 compatible_features;
u64 autoclear_features;
- struct z_stream_s *strm;
+ /* ZLIB specific data */
+ z_streamp zlib_dstrm;
+
+ /* ZSTD specific data */
+#ifdef CONFIG_ZSTD_DECOMPRESS
+ void *zstd_dworkspace;
+ ZSTD_DStream *zstd_dstrm;
+#endif
+
+ /* used to cache last compressed QCOW cluster */
u8 *cmp_out_buf;
u64 cmp_last_coffset;
+ /*
+ * Compression type used for the image. Default: 0 - ZLIB
+ * The image compression type is set on image creation.
+ * For now, the only way to change the compression type
+ * is to convert the image with the desired compression type set.
+ */
+ enum xloop_file_fmt_qcow_compression_type compression_type;
+
/* debugfs entries */
#ifdef CONFIG_DEBUG_FS
struct dentry *dbgfs_dir;
@@ -265,6 +331,34 @@ struct xloop_file_fmt_qcow_cow_region {
unsigned nb_bytes;
};
+/*
+ * In images with standard L2 entries all clusters are treated as if
+ * they had one subcluster so xloop_file_fmt_qcow_cluster_type and
+ * xloop_file_fmt_qcow_subcluster_type can be mapped to each other and
+ * have the exact same meaning (QCOW_SUBCLUSTER_UNALLOCATED_ALLOC cannot
+ * happen in these images).
+ *
+ * In images with extended L2 entries xloop_file_fmt_qcow_cluster_type
+ * refers to the complete cluster and xloop_file_fmt_qcow_subcluster_type
+ * to each of the individual subclusters, so there are several possible
+ * combinations:
+ *
+ * |--------------+---------------------------|
+ * | Cluster type | Possible subcluster types |
+ * |--------------+---------------------------|
+ * | UNALLOCATED | UNALLOCATED_PLAIN |
+ * | | ZERO_PLAIN |
+ * |--------------+---------------------------|
+ * | NORMAL | UNALLOCATED_ALLOC |
+ * | | ZERO_ALLOC |
+ * | | NORMAL |
+ * |--------------+---------------------------|
+ * | COMPRESSED | COMPRESSED |
+ * |--------------+---------------------------|
+ *
+ * QCOW_SUBCLUSTER_INVALID means that the L2 entry is incorrect and
+ * the image should be marked corrupt.
+ */
enum xloop_file_fmt_qcow_cluster_type {
QCOW_CLUSTER_UNALLOCATED,
QCOW_CLUSTER_ZERO_PLAIN,
@@ -273,6 +367,16 @@ enum xloop_file_fmt_qcow_cluster_type {
QCOW_CLUSTER_COMPRESSED,
};
+enum xloop_file_fmt_qcow_subcluster_type {
+ QCOW_SUBCLUSTER_UNALLOCATED_PLAIN,
+ QCOW_SUBCLUSTER_UNALLOCATED_ALLOC,
+ QCOW_SUBCLUSTER_ZERO_PLAIN,
+ QCOW_SUBCLUSTER_ZERO_ALLOC,
+ QCOW_SUBCLUSTER_NORMAL,
+ QCOW_SUBCLUSTER_COMPRESSED,
+ QCOW_SUBCLUSTER_INVALID,
+};
+
enum xloop_file_fmt_qcow_metadata_overlap {
QCOW_OL_MAIN_HEADER_BITNR = 0,
QCOW_OL_ACTIVE_L1_BITNR = 1,
@@ -314,25 +418,51 @@ enum xloop_file_fmt_qcow_metadata_overlap {
#define QCOW_OL_ALL \
(QCOW_OL_CACHED | QCOW_OL_INACTIVE_L2)
-#define L1E_OFFSET_MASK 0x00fffffffffffe00ULL
-#define L2E_OFFSET_MASK 0x00fffffffffffe00ULL
-#define L2E_COMPRESSED_OFFSET_SIZE_MASK 0x3fffffffffffffffULL
+#define QCOW_L1E_OFFSET_MASK 0x00fffffffffffe00ULL
+#define QCOW_L2E_OFFSET_MASK 0x00fffffffffffe00ULL
+#define QCOW_L2E_COMPRESSED_OFFSET_SIZE_MASK 0x3fffffffffffffffULL
-#define REFT_OFFSET_MASK 0xfffffffffffffe00ULL
+static inline bool xloop_file_fmt_qcow_has_subclusters(
+ struct xloop_file_fmt_qcow_data *qcow_data)
+{
+ return qcow_data->incompatible_features & QCOW_INCOMPAT_EXTL2;
+}
+
+static inline size_t xloop_file_fmt_qcow_l2_entry_size(
+ struct xloop_file_fmt_qcow_data *qcow_data)
+{
+ return xloop_file_fmt_qcow_has_subclusters(qcow_data) ?
+ QCOW_L2E_SIZE_EXTENDED : QCOW_L2E_SIZE_NORMAL;
+}
+
+static inline u64 xloop_file_fmt_qcow_get_l2_entry(
+ struct xloop_file_fmt_qcow_data *qcow_data, u64 *l2_slice, int idx)
+{
+ idx *= xloop_file_fmt_qcow_l2_entry_size(qcow_data) / sizeof(u64);
+ return be64_to_cpu(l2_slice[idx]);
+}
-#define INV_OFFSET (-1ULL)
+static inline u64 xloop_file_fmt_qcow_get_l2_bitmap(
+ struct xloop_file_fmt_qcow_data *qcow_data, u64 *l2_slice, int idx)
+{
+ if (xloop_file_fmt_qcow_has_subclusters(qcow_data)) {
+ idx *= xloop_file_fmt_qcow_l2_entry_size(qcow_data) / sizeof(u64);
+ return be64_to_cpu(l2_slice[idx + 1]);
+ } else {
+ return 0; /* For convenience only; this value has no meaning. */
+ }
+}
static inline bool xloop_file_fmt_qcow_has_data_file(
- struct xloop_file_fmt *xlo_fmt)
+ struct xloop_file_fmt_qcow_data *qcow_data)
{
/* At the moment, there is no support for copy on write! */
return false;
}
static inline bool xloop_file_fmt_qcow_data_file_is_raw(
- struct xloop_file_fmt *xlo_fmt)
+ struct xloop_file_fmt_qcow_data *qcow_data)
{
- struct xloop_file_fmt_qcow_data *qcow_data = xlo_fmt->private_data;
return !!(qcow_data->autoclear_features &
QCOW_AUTOCLEAR_DATA_FILE_RAW);
}
@@ -349,6 +479,12 @@ static inline s64 xloop_file_fmt_qcow_offset_into_cluster(
return offset & (qcow_data->cluster_size - 1);
}
+static inline s64 xloop_file_fmt_qcow_offset_into_subcluster(
+ struct xloop_file_fmt_qcow_data *qcow_data, s64 offset)
+{
+ return offset & (qcow_data->subcluster_size - 1);
+}
+
static inline s64 xloop_file_fmt_qcow_size_to_clusters(
struct xloop_file_fmt_qcow_data *qcow_data, u64 size)
{
@@ -382,6 +518,13 @@ static inline int xloop_file_fmt_qcow_offset_to_l2_slice_index(
(qcow_data->l2_slice_size - 1);
}
+static inline int xloop_file_fmt_qcow_offset_to_sc_index(
+ struct xloop_file_fmt_qcow_data *qcow_data, s64 offset)
+{
+ return (offset >> qcow_data->subcluster_bits) &
+ (qcow_data->subclusters_per_cluster - 1);
+}
+
static inline s64 xloop_file_fmt_qcow_vm_state_offset(
struct xloop_file_fmt_qcow_data *qcow_data)
{
@@ -390,22 +533,25 @@ static inline s64 xloop_file_fmt_qcow_vm_state_offset(
}
static inline enum xloop_file_fmt_qcow_cluster_type
-xloop_file_fmt_qcow_get_cluster_type(struct xloop_file_fmt *xlo_fmt, u64 l2_entry)
+xloop_file_fmt_qcow_get_cluster_type(struct xloop_file_fmt *xlo_fmt,
+ u64 l2_entry)
{
+ struct xloop_file_fmt_qcow_data *qcow_data = xlo_fmt->private_data;
+
if (l2_entry & QCOW_OFLAG_COMPRESSED) {
return QCOW_CLUSTER_COMPRESSED;
} else if (l2_entry & QCOW_OFLAG_ZERO) {
- if (l2_entry & L2E_OFFSET_MASK) {
+ if (l2_entry & QCOW_L2E_OFFSET_MASK) {
return QCOW_CLUSTER_ZERO_ALLOC;
}
return QCOW_CLUSTER_ZERO_PLAIN;
- } else if (!(l2_entry & L2E_OFFSET_MASK)) {
+ } else if (!(l2_entry & QCOW_L2E_OFFSET_MASK)) {
/* Offset 0 generally means unallocated, but it is ambiguous
* with external data files because 0 is a valid offset there.
* However, all clusters in external data files always have
* refcount 1, so we can rely on QCOW_OFLAG_COPIED to
* disambiguate. */
- if (xloop_file_fmt_qcow_has_data_file(xlo_fmt) &&
+ if (xloop_file_fmt_qcow_has_data_file(qcow_data) &&
(l2_entry & QCOW_OFLAG_COPIED)) {
return QCOW_CLUSTER_NORMAL;
} else {
@@ -416,4 +562,85 @@ xloop_file_fmt_qcow_get_cluster_type(struct xloop_file_fmt *xlo_fmt, u64 l2_entr
}
}
+/*
+ * In an image without subsclusters @l2_bitmap is ignored and
+ * @sc_index must be 0.
+ * Return QCOW_SUBCLUSTER_INVALID if an invalid l2 entry is detected
+ * (this checks the whole entry and bitmap, not only the bits related
+ * to subcluster @sc_index).
+ */
+static inline enum xloop_file_fmt_qcow_subcluster_type
+xloop_file_fmt_qcow_get_subcluster_type(struct xloop_file_fmt *xlo_fmt,
+ u64 l2_entry, u64 l2_bitmap, unsigned int sc_index)
+{
+ struct xloop_file_fmt_qcow_data *qcow_data = xlo_fmt->private_data;
+ enum xloop_file_fmt_qcow_cluster_type type =
+ xloop_file_fmt_qcow_get_cluster_type(xlo_fmt, l2_entry);
+ ASSERT(sc_index < qcow_data->subclusters_per_cluster);
+
+ if (xloop_file_fmt_qcow_has_subclusters(qcow_data)) {
+ switch (type) {
+ case QCOW_CLUSTER_COMPRESSED:
+ return QCOW_SUBCLUSTER_COMPRESSED;
+ case QCOW_CLUSTER_NORMAL:
+ if ((l2_bitmap >> 32) & l2_bitmap) {
+ return QCOW_SUBCLUSTER_INVALID;
+ } else if (l2_bitmap & QCOW_OFLAG_SUB_ZERO(sc_index)) {
+ return QCOW_SUBCLUSTER_ZERO_ALLOC;
+ } else if (l2_bitmap & QCOW_OFLAG_SUB_ALLOC(sc_index)) {
+ return QCOW_SUBCLUSTER_NORMAL;
+ } else {
+ return QCOW_SUBCLUSTER_UNALLOCATED_ALLOC;
+ }
+ case QCOW_CLUSTER_UNALLOCATED:
+ if (l2_bitmap & QCOW_L2_BITMAP_ALL_ALLOC) {
+ return QCOW_SUBCLUSTER_INVALID;
+ } else if (l2_bitmap & QCOW_OFLAG_SUB_ZERO(sc_index)) {
+ return QCOW_SUBCLUSTER_ZERO_PLAIN;
+ } else {
+ return QCOW_SUBCLUSTER_UNALLOCATED_PLAIN;
+ }
+ default:
+ /* not reachable */
+ ASSERT(false);
+ return QCOW_SUBCLUSTER_INVALID;
+ }
+ } else {
+ switch (type) {
+ case QCOW_CLUSTER_COMPRESSED:
+ return QCOW_SUBCLUSTER_COMPRESSED;
+ case QCOW_CLUSTER_ZERO_PLAIN:
+ return QCOW_SUBCLUSTER_ZERO_PLAIN;
+ case QCOW_CLUSTER_ZERO_ALLOC:
+ return QCOW_SUBCLUSTER_ZERO_ALLOC;
+ case QCOW_CLUSTER_NORMAL:
+ return QCOW_SUBCLUSTER_NORMAL;
+ case QCOW_CLUSTER_UNALLOCATED:
+ return QCOW_SUBCLUSTER_UNALLOCATED_PLAIN;
+ default:
+ /* not reachable */
+ ASSERT(false);
+ return QCOW_SUBCLUSTER_INVALID;
+ }
+ }
+}
+
+#ifdef CONFIG_DEBUG_FS
+static inline const char *xloop_file_fmt_qcow_get_subcluster_name(
+ const enum xloop_file_fmt_qcow_subcluster_type type)
+{
+ static const char *subcluster_names[] = {
+ "QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN",
+ "QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC",
+ "QCOW2_SUBCLUSTER_ZERO_PLAIN",
+ "QCOW2_SUBCLUSTER_ZERO_ALLOC",
+ "QCOW2_SUBCLUSTER_NORMAL",
+ "QCOW2_SUBCLUSTER_COMPRESSED",
+ "QCOW2_SUBCLUSTER_INVALID"
+ };
+
+ return subcluster_names[type];
+}
+#endif
+
#endif