summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--kernel/cmake/kernel.cmake2
-rw-r--r--kernel/xloop_file_fmt_qcow_cluster.c281
-rw-r--r--kernel/xloop_file_fmt_qcow_cluster.h7
-rw-r--r--kernel/xloop_file_fmt_qcow_main.c469
-rw-r--r--kernel/xloop_file_fmt_qcow_main.h269
5 files changed, 835 insertions, 193 deletions
diff --git a/kernel/cmake/kernel.cmake b/kernel/cmake/kernel.cmake
index 385cb40..3ac7633 100644
--- a/kernel/cmake/kernel.cmake
+++ b/kernel/cmake/kernel.cmake
@@ -22,7 +22,7 @@ macro(add_kernel_module MODULE_NAME KERNEL_DIR MODULE_MACRO MODULE_SOURCE_FILES
endif()
# define build command
set(MODULE_BUILD_COMMAND ${CMAKE_MAKE_PROGRAM} ${MODULE_MACRO}
- -C /${KERNEL_DIR}/build
+ -C ${KERNEL_DIR}/build
M=${CMAKE_CURRENT_BINARY_DIR}/${MODULE_NAME} modules
EXTRA_CFLAGS=${KERNEL_C_FLAGS}
KBUILD_EXTRA_SYMBOLS=${MODULE_EXTRA_SYMBOLS})
diff --git a/kernel/xloop_file_fmt_qcow_cluster.c b/kernel/xloop_file_fmt_qcow_cluster.c
index deef22b..8394c76 100644
--- a/kernel/xloop_file_fmt_qcow_cluster.c
+++ b/kernel/xloop_file_fmt_qcow_cluster.c
@@ -21,6 +21,14 @@
#include "xloop_file_fmt_qcow_cluster.h"
/*
+ * __xloop_file_fmt_qcow_cluster_l2_load
+ *
+ * @xlo_fmt: QCOW file format
+ * @offset: A guest offset, used to calculate what slice of the L2
+ * table to load.
+ * @l2_offset: Offset to the L2 table in the image file.
+ * @l2_slice: Location to store the pointer to the L2 slice.
+ *
* Loads a L2 slice into memory (L2 slices are the parts of L2 tables
* that are loaded by the qcow2 cache). If the slice is in the cache,
* the cache is used; otherwise the L2 slice is loaded from the image
@@ -31,7 +39,7 @@ static int __xloop_file_fmt_qcow_cluster_l2_load(struct xloop_file_fmt *xlo_fmt,
{
struct xloop_file_fmt_qcow_data *qcow_data = xlo_fmt->private_data;
- int start_of_slice = sizeof(u64) * (
+ int start_of_slice = xloop_file_fmt_qcow_l2_entry_size(qcow_data) * (
xloop_file_fmt_qcow_offset_to_l2_index(qcow_data, offset) -
xloop_file_fmt_qcow_offset_to_l2_slice_index(qcow_data, offset)
);
@@ -42,93 +50,159 @@ static int __xloop_file_fmt_qcow_cluster_l2_load(struct xloop_file_fmt *xlo_fmt,
}
/*
- * Checks how many clusters in a given L2 slice are contiguous in the image
- * file. As soon as one of the flags in the bitmask stop_flags changes compared
- * to the first cluster, the search is stopped and the cluster is not counted
- * as contiguous. (This allows it, for example, to stop at the first compressed
- * cluster which may require a different handling)
+ * For a given L2 entry, count the number of contiguous subclusters of
+ * the same type starting from @sc_from. Compressed clusters are
+ * treated as if they were divided into subclusters of size
+ * qcow_data->subcluster_size.
+ *
+ * Return the number of contiguous subclusters and set @type to the
+ * subcluster type.
+ *
+ * If the L2 entry is invalid return -errno and set @type to
+ * QCOW_SUBCLUSTER_INVALID.
*/
-static int __xloop_file_fmt_qcow_cluster_count_contiguous(
- struct xloop_file_fmt *xlo_fmt, int nb_clusters, int cluster_size,
- u64 *l2_slice, u64 stop_flags)
+static int __xloop_file_fmt_qcow_get_subcluster_range_type(
+ struct xloop_file_fmt *xlo_fmt, u64 l2_entry, u64 l2_bitmap,
+ unsigned int sc_from, enum xloop_file_fmt_qcow_subcluster_type *type)
{
- int i;
- enum xloop_file_fmt_qcow_cluster_type first_cluster_type;
- u64 mask = stop_flags | L2E_OFFSET_MASK | QCOW_OFLAG_COMPRESSED;
- u64 first_entry = be64_to_cpu(l2_slice[0]);
- u64 offset = first_entry & mask;
-
- first_cluster_type = xloop_file_fmt_qcow_get_cluster_type(xlo_fmt,
- first_entry);
- if (first_cluster_type == QCOW_CLUSTER_UNALLOCATED) {
+ struct xloop_file_fmt_qcow_data *qcow_data = xlo_fmt->private_data;
+ u32 val;
+
+ *type = xloop_file_fmt_qcow_get_subcluster_type(xlo_fmt, l2_entry,
+ l2_bitmap, sc_from);
+
+ if (*type == QCOW_SUBCLUSTER_INVALID) {
+ return -EINVAL;
+ } else if (!xloop_file_fmt_qcow_has_subclusters(qcow_data) ||
+ *type == QCOW_SUBCLUSTER_COMPRESSED) {
+ return qcow_data->subclusters_per_cluster - sc_from;
+ }
+
+ switch (*type) {
+ case QCOW_SUBCLUSTER_NORMAL:
+ val = l2_bitmap | QCOW_OFLAG_SUB_ALLOC_RANGE(0, sc_from);
+ return __builtin_ctz(~val) - sc_from;
+
+ case QCOW_SUBCLUSTER_ZERO_PLAIN:
+ case QCOW_SUBCLUSTER_ZERO_ALLOC:
+ val = (l2_bitmap | QCOW_OFLAG_SUB_ZERO_RANGE(0, sc_from)) >> 32;
+ return __builtin_ctz(~val) - sc_from;
+
+ case QCOW_SUBCLUSTER_UNALLOCATED_PLAIN:
+ case QCOW_SUBCLUSTER_UNALLOCATED_ALLOC:
+ val = ((l2_bitmap >> 32) | l2_bitmap)
+ & ~QCOW_OFLAG_SUB_ALLOC_RANGE(0, sc_from);
+ return __builtin_ctz(val) - sc_from;
+
+ default:
+ /* not reachable */
+ ASSERT(false);
+ *type = QCOW_SUBCLUSTER_INVALID;
return 0;
- }
-
- /* must be allocated */
- ASSERT(first_cluster_type == QCOW_CLUSTER_NORMAL ||
- first_cluster_type == QCOW_CLUSTER_ZERO_ALLOC);
-
- for (i = 0; i < nb_clusters; i++) {
- u64 l2_entry = be64_to_cpu(l2_slice[i]) & mask;
- if (offset + (u64) i * cluster_size != l2_entry) {
- break;
- }
- }
-
- return i;
+ }
}
/*
- * Checks how many consecutive unallocated clusters in a given L2
- * slice have the same cluster type.
+ * Return the number of contiguous subclusters of the exact same type
+ * in a given L2 slice, starting from cluster @l2_index, subcluster
+ * @sc_index. Allocated subclusters are required to be contiguous in
+ * the image file.
+ * At most @nb_clusters are checked (note that this means clusters,
+ * not subclusters).
+ * Compressed clusters are always processed one by one but for the
+ * purpose of this count they are treated as if they were divided into
+ * subclusters of size qcow_data->subcluster_size.
+ * On failure return -errno and update @l2_index to point to the
+ * invalid entry.
*/
-static int __xloop_file_fmt_qcow_cluster_count_contiguous_unallocated(
- struct xloop_file_fmt *xlo_fmt, int nb_clusters, u64 *l2_slice,
- enum xloop_file_fmt_qcow_cluster_type wanted_type)
+static int __xloop_file_fmt_qcow_count_contiguous_subclusters(
+ struct xloop_file_fmt *xlo_fmt, int nb_clusters, unsigned int sc_index,
+ u64 *l2_slice, unsigned int *l2_index)
{
- int i;
-
- ASSERT(wanted_type == QCOW_CLUSTER_ZERO_PLAIN ||
- wanted_type == QCOW_CLUSTER_UNALLOCATED);
-
- for (i = 0; i < nb_clusters; i++) {
- u64 entry = be64_to_cpu(l2_slice[i]);
- enum xloop_file_fmt_qcow_cluster_type type =
- xloop_file_fmt_qcow_get_cluster_type(xlo_fmt, entry);
-
- if (type != wanted_type) {
- break;
- }
- }
-
- return i;
+ struct xloop_file_fmt_qcow_data *qcow_data = xlo_fmt->private_data;
+ int i, count = 0;
+ bool check_offset = false;
+ u64 expected_offset = 0;
+ enum xloop_file_fmt_qcow_subcluster_type expected_type =
+ QCOW_SUBCLUSTER_NORMAL;
+ enum xloop_file_fmt_qcow_subcluster_type type;
+
+ ASSERT(*l2_index + nb_clusters <= qcow_data->l2_slice_size);
+
+ for (i = 0; i < nb_clusters; i++) {
+ unsigned int first_sc = (i == 0) ? sc_index : 0;
+ u64 l2_entry = xloop_file_fmt_qcow_get_l2_entry(qcow_data, l2_slice,
+ *l2_index + i);
+ u64 l2_bitmap = xloop_file_fmt_qcow_get_l2_bitmap(qcow_data, l2_slice,
+ *l2_index + i);
+ int ret = __xloop_file_fmt_qcow_get_subcluster_range_type(xlo_fmt,
+ l2_entry, l2_bitmap, first_sc, &type);
+ if (ret < 0) {
+ *l2_index += i; /* Point to the invalid entry */
+ return -EIO;
+ }
+ if (i == 0) {
+ if (type == QCOW_SUBCLUSTER_COMPRESSED) {
+ /* Compressed clusters are always processed one by one */
+ return ret;
+ }
+ expected_type = type;
+ expected_offset = l2_entry & QCOW_L2E_OFFSET_MASK;
+ check_offset = (type == QCOW_SUBCLUSTER_NORMAL ||
+ type == QCOW_SUBCLUSTER_ZERO_ALLOC ||
+ type == QCOW_SUBCLUSTER_UNALLOCATED_ALLOC);
+ } else if (type != expected_type) {
+ break;
+ } else if (check_offset) {
+ expected_offset += qcow_data->cluster_size;
+ if (expected_offset != (l2_entry & QCOW_L2E_OFFSET_MASK)) {
+ break;
+ }
+ }
+ count += ret;
+ /* Stop if there are type changes before the end of the cluster */
+ if (first_sc + ret < qcow_data->subclusters_per_cluster) {
+ break;
+ }
+ }
+
+ return count;
}
/*
- * For a given offset of the virtual disk, find the cluster type and offset in
- * the qcow2 file. The offset is stored in *cluster_offset.
+ * xloop_file_fmt_qcow_get_host_offset
+ *
+ * For a given offset of the virtual disk find the equivalent host
+ * offset in the qcow2 file and store it in *host_offset. Neither
+ * offset needs to be aligned to a cluster boundary.
+ *
+ * If the cluster is unallocated then *host_offset will be 0.
+ * If the cluster is compressed then *host_offset will contain the
+ * complete compressed cluster descriptor.
*
* On entry, *bytes is the maximum number of contiguous bytes starting at
* offset that we are interested in.
*
* On exit, *bytes is the number of bytes starting at offset that have the same
- * cluster type and (if applicable) are stored contiguously in the image file.
- * Compressed clusters are always returned one by one.
+ * subcluster type and (if applicable) are stored contiguously in the image
+ * file. The subcluster type is stored in *subcluster_type.
+ * Compressed clusters are always processed one by one.
*
- * Returns the cluster type (QCOW2_CLUSTER_*) on success, -errno in error
- * cases.
+ * Returns 0 on success, -errno in error cases.
*/
-int xloop_file_fmt_qcow_cluster_get_offset(struct xloop_file_fmt *xlo_fmt,
- u64 offset, unsigned int *bytes, u64 *cluster_offset)
+int xloop_file_fmt_qcow_get_host_offset(struct xloop_file_fmt *xlo_fmt,
+ u64 offset, unsigned int *bytes, u64 *host_offset,
+ enum xloop_file_fmt_qcow_subcluster_type *subcluster_type)
{
struct xloop_file_fmt_qcow_data *qcow_data = xlo_fmt->private_data;
- unsigned int l2_index;
- u64 l1_index, l2_offset, *l2_slice;
- int c;
+ unsigned int l2_index, sc_index;
+ u64 l1_index, l2_offset, *l2_slice, l2_entry, l2_bitmap;
+ int sc;
unsigned int offset_in_cluster;
u64 bytes_available, bytes_needed, nb_clusters;
- enum xloop_file_fmt_qcow_cluster_type type;
+ enum xloop_file_fmt_qcow_subcluster_type type;
int ret;
+ u64 host_cluster_offset;
offset_in_cluster = xloop_file_fmt_qcow_offset_into_cluster(qcow_data,
offset);
@@ -146,18 +220,18 @@ int xloop_file_fmt_qcow_cluster_get_offset(struct xloop_file_fmt *xlo_fmt,
bytes_needed = bytes_available;
}
- *cluster_offset = 0;
+ *host_offset = 0;
/* seek to the l2 offset in the l1 table */
l1_index = xloop_file_fmt_qcow_offset_to_l1_index(qcow_data, offset);
if (l1_index >= qcow_data->l1_size) {
- type = QCOW_CLUSTER_UNALLOCATED;
+ type = QCOW_SUBCLUSTER_UNALLOCATED_PLAIN;
goto out;
}
- l2_offset = qcow_data->l1_table[l1_index] & L1E_OFFSET_MASK;
+ l2_offset = qcow_data->l1_table[l1_index] & QCOW_L1E_OFFSET_MASK;
if (!l2_offset) {
- type = QCOW_CLUSTER_UNALLOCATED;
+ type = QCOW_SUBCLUSTER_UNALLOCATED_PLAIN;
goto out;
}
@@ -177,7 +251,11 @@ int xloop_file_fmt_qcow_cluster_get_offset(struct xloop_file_fmt *xlo_fmt,
/* find the cluster offset for the given disk offset */
l2_index = xloop_file_fmt_qcow_offset_to_l2_slice_index(qcow_data,
offset);
- *cluster_offset = be64_to_cpu(l2_slice[l2_index]);
+ sc_index = xloop_file_fmt_qcow_offset_to_sc_index(qcow_data, offset);
+ l2_entry = xloop_file_fmt_qcow_get_l2_entry(qcow_data, l2_slice,
+ l2_index);
+ l2_bitmap = xloop_file_fmt_qcow_get_l2_bitmap(qcow_data, l2_slice,
+ l2_index);
nb_clusters = xloop_file_fmt_qcow_size_to_clusters(qcow_data,
bytes_needed);
@@ -186,10 +264,11 @@ int xloop_file_fmt_qcow_cluster_get_offset(struct xloop_file_fmt *xlo_fmt,
* assertion is always true */
ASSERT(nb_clusters <= INT_MAX);
- type = xloop_file_fmt_qcow_get_cluster_type(xlo_fmt, *cluster_offset);
+ type = xloop_file_fmt_qcow_get_subcluster_type(xlo_fmt, l2_entry,
+ l2_bitmap, sc_index);
if (qcow_data->qcow_version < 3 && (
- type == QCOW_CLUSTER_ZERO_PLAIN ||
- type == QCOW_CLUSTER_ZERO_ALLOC)) {
+ type == QCOW_SUBCLUSTER_ZERO_PLAIN ||
+ type == QCOW_SUBCLUSTER_ZERO_ALLOC)) {
dev_err_ratelimited(xloop_file_fmt_to_dev(xlo_fmt), "zero cluster "
"entry found in pre-v3 image (L2 offset: %llx, L2 index: %x)\n",
l2_offset, l2_index);
@@ -197,45 +276,39 @@ int xloop_file_fmt_qcow_cluster_get_offset(struct xloop_file_fmt *xlo_fmt,
goto fail;
}
switch (type) {
- case QCOW_CLUSTER_COMPRESSED:
- if (xloop_file_fmt_qcow_has_data_file(xlo_fmt)) {
+ case QCOW_SUBCLUSTER_INVALID:
+ break; /* This is handled by count_contiguous_subclusters() below */
+ case QCOW_SUBCLUSTER_COMPRESSED:
+ if (xloop_file_fmt_qcow_has_data_file(qcow_data)) {
dev_err_ratelimited(xloop_file_fmt_to_dev(xlo_fmt), "compressed "
"cluster entry found in image with external data file "
"(L2 offset: %llx, L2 index: %x)\n", l2_offset, l2_index);
ret = -EIO;
goto fail;
}
- /* Compressed clusters can only be processed one by one */
- c = 1;
- *cluster_offset &= L2E_COMPRESSED_OFFSET_SIZE_MASK;
+ *host_offset = l2_entry & QCOW_L2E_COMPRESSED_OFFSET_SIZE_MASK;
break;
- case QCOW_CLUSTER_ZERO_PLAIN:
- case QCOW_CLUSTER_UNALLOCATED:
- /* how many empty clusters ? */
- c = __xloop_file_fmt_qcow_cluster_count_contiguous_unallocated(
- xlo_fmt, nb_clusters, &l2_slice[l2_index], type);
- *cluster_offset = 0;
+ case QCOW_SUBCLUSTER_ZERO_PLAIN:
+ case QCOW_SUBCLUSTER_UNALLOCATED_PLAIN:
break;
- case QCOW_CLUSTER_ZERO_ALLOC:
- case QCOW_CLUSTER_NORMAL:
- /* how many allocated clusters ? */
- c = __xloop_file_fmt_qcow_cluster_count_contiguous(xlo_fmt,
- nb_clusters, qcow_data->cluster_size,
- &l2_slice[l2_index], QCOW_OFLAG_ZERO);
- *cluster_offset &= L2E_OFFSET_MASK;
+ case QCOW_SUBCLUSTER_ZERO_ALLOC:
+ case QCOW_SUBCLUSTER_NORMAL:
+ case QCOW_SUBCLUSTER_UNALLOCATED_ALLOC:
+ host_cluster_offset = l2_entry & QCOW_L2E_OFFSET_MASK;
+ *host_offset = host_cluster_offset + offset_in_cluster;
if (xloop_file_fmt_qcow_offset_into_cluster(qcow_data,
- *cluster_offset)) {
+ host_cluster_offset)) {
dev_err_ratelimited(xloop_file_fmt_to_dev(xlo_fmt), "cluster "
"allocation offset %llx unaligned (L2 offset: %llx, "
- "L2 index: %x)\n", *cluster_offset, l2_offset, l2_index);
+ "L2 index: %x)\n", host_cluster_offset, l2_offset, l2_index);
ret = -EIO;
goto fail;
}
- if (xloop_file_fmt_qcow_has_data_file(xlo_fmt) &&
- *cluster_offset != offset - offset_in_cluster) {
+ if (xloop_file_fmt_qcow_has_data_file(qcow_data) &&
+ *host_offset != offset) {
dev_err_ratelimited(xloop_file_fmt_to_dev(xlo_fmt), "external "
"data file host cluster offset %llx does not match guest "
- "cluster offset: %llx, L2 index: %x)\n", *cluster_offset,
+ "cluster offset: %llx, L2 index: %x)\n", host_cluster_offset,
offset - offset_in_cluster, l2_index);
ret = -EIO;
goto fail;
@@ -245,9 +318,19 @@ int xloop_file_fmt_qcow_cluster_get_offset(struct xloop_file_fmt *xlo_fmt,
BUG();
}
+ sc = __xloop_file_fmt_qcow_count_contiguous_subclusters(xlo_fmt,
+ nb_clusters, sc_index, l2_slice, &l2_index);
+
+ if (sc < 0) {
+ dev_err_ratelimited(xloop_file_fmt_to_dev(xlo_fmt), "invalid cluster "
+ "entry found (L2 offset: %#llx, L2 index: %#x)", l2_offset,
+ l2_index);
+ ret = -EIO;
+ goto fail;
+ }
xloop_file_fmt_qcow_cache_put(xlo_fmt, (void **) &l2_slice);
- bytes_available = (s64) c * qcow_data->cluster_size;
+ bytes_available = ((s64) sc + sc_index) << qcow_data->subcluster_bits;
out:
if (bytes_available > bytes_needed) {
@@ -260,7 +343,9 @@ out:
ASSERT(bytes_available - offset_in_cluster <= UINT_MAX);
*bytes = bytes_available - offset_in_cluster;
- return type;
+ *subcluster_type = type;
+
+ return 0;
fail:
xloop_file_fmt_qcow_cache_put(xlo_fmt, (void **) &l2_slice);
diff --git a/kernel/xloop_file_fmt_qcow_cluster.h b/kernel/xloop_file_fmt_qcow_cluster.h
index ff3e0a1..a3716f5 100644
--- a/kernel/xloop_file_fmt_qcow_cluster.h
+++ b/kernel/xloop_file_fmt_qcow_cluster.h
@@ -15,9 +15,8 @@
#include "xloop_file_fmt.h"
-extern int xloop_file_fmt_qcow_cluster_get_offset(struct xloop_file_fmt *xlo_fmt,
- u64 offset,
- unsigned int *bytes,
- u64 *cluster_offset);
+extern int xloop_file_fmt_qcow_get_host_offset(struct xloop_file_fmt *xlo_fmt,
+ u64 offset, unsigned int *bytes, u64 *host_offset,
+ enum xloop_file_fmt_qcow_subcluster_type *subcluster_type);
#endif
diff --git a/kernel/xloop_file_fmt_qcow_main.c b/kernel/xloop_file_fmt_qcow_main.c
index 55d2f32..fbc49f5 100644
--- a/kernel/xloop_file_fmt_qcow_main.c
+++ b/kernel/xloop_file_fmt_qcow_main.c
@@ -23,12 +23,23 @@
#include <linux/string.h>
#include <linux/vmalloc.h>
#include <linux/zlib.h>
+#ifdef CONFIG_ZSTD_DECOMPRESS
+#include <linux/zstd.h>
+#endif
#include "xloop_file_fmt.h"
#include "xloop_file_fmt_qcow_main.h"
#include "xloop_file_fmt_qcow_cache.h"
#include "xloop_file_fmt_qcow_cluster.h"
+#ifdef CONFIG_ZSTD_DECOMPRESS
+#define ZSTD_WINDOWLOG_LIMIT_DEFAULT 27
+#define ZSTD_MAXWINDOWSIZE ((U32_C(1) << ZSTD_WINDOWLOG_LIMIT_DEFAULT) + 1)
+#endif
+
+typedef ssize_t (*qcow_file_fmt_decompress_fn)(struct xloop_file_fmt *xlo_fmt,
+ void *dest, size_t dest_size, const void *src, size_t src_size);
+
static int __qcow_file_fmt_header_read(struct xloop_file_fmt *xlo_fmt,
struct file *file, struct xloop_file_fmt_qcow_header *header)
{
@@ -135,32 +146,70 @@ static int __qcow_file_fmt_compression_init(struct xloop_file_fmt *xlo_fmt)
{
struct xloop_file_fmt_qcow_data *qcow_data = xlo_fmt->private_data;
int ret = 0;
+#ifdef CONFIG_ZSTD_DECOMPRESS
+ size_t workspace_size;
+#endif
- qcow_data->strm = kzalloc(sizeof(*qcow_data->strm), GFP_KERNEL);
- if (!qcow_data->strm) {
+ /* create workspace for ZLIB decompression stream */
+ qcow_data->zlib_dstrm = kzalloc(sizeof(*qcow_data->zlib_dstrm), GFP_KERNEL);
+ if (!qcow_data->zlib_dstrm) {
ret = -ENOMEM;
goto out;
}
- qcow_data->strm->workspace = vzalloc(zlib_inflate_workspacesize());
- if (!qcow_data->strm->workspace) {
+ qcow_data->zlib_dstrm->workspace = vzalloc(zlib_inflate_workspacesize());
+ if (!qcow_data->zlib_dstrm->workspace) {
+ ret = -ENOMEM;
+ goto out_free_zlib_dstrm;
+ }
+
+ /* set up ZLIB decompression stream */
+ ret = zlib_inflateInit2(qcow_data->zlib_dstrm, -12);
+ if (ret != Z_OK) {
+ ret = -EIO;
+ goto out_free_zlib_dworkspace;
+ }
+
+#ifdef CONFIG_ZSTD_DECOMPRESS
+ /* create workspace for ZSTD decompression stream */
+ workspace_size = ZSTD_DStreamWorkspaceBound(ZSTD_MAXWINDOWSIZE);
+ qcow_data->zstd_dworkspace = vzalloc(workspace_size);
+ if (!qcow_data->zstd_dworkspace) {
ret = -ENOMEM;
- goto out_free_strm;
+ goto out_free_zlib_dworkspace;
+ }
+
+ /* set up ZSTD decompression stream */
+ qcow_data->zstd_dstrm = ZSTD_initDStream(ZSTD_MAXWINDOWSIZE,
+ qcow_data->zstd_dworkspace, workspace_size);
+ if (!qcow_data->zstd_dstrm) {
+ ret = -EINVAL;
+ goto out_free_zstd_dworkspace;
}
+#endif
+ /* create cache for last compressed QCOW cluster */
qcow_data->cmp_last_coffset = ULLONG_MAX;
qcow_data->cmp_out_buf = vmalloc(qcow_data->cluster_size);
if (!qcow_data->cmp_out_buf) {
ret = -ENOMEM;
- goto out_free_workspace;
+#ifdef CONFIG_ZSTD_DECOMPRESS
+ goto out_free_zstd_dworkspace;
+#else
+ goto out_free_zlib_dworkspace;
+#endif
}
return ret;
-out_free_workspace:
- vfree(qcow_data->strm->workspace);
-out_free_strm:
- kfree(qcow_data->strm);
+#ifdef CONFIG_ZSTD_DECOMPRESS
+out_free_zstd_dworkspace:
+ vfree(qcow_data->zstd_dworkspace);
+#endif
+out_free_zlib_dworkspace:
+ vfree(qcow_data->zlib_dstrm->workspace);
+out_free_zlib_dstrm:
+ kfree(qcow_data->zlib_dstrm);
out:
return ret;
}
@@ -169,8 +218,17 @@ static void __qcow_file_fmt_compression_exit(struct xloop_file_fmt *xlo_fmt)
{
struct xloop_file_fmt_qcow_data *qcow_data = xlo_fmt->private_data;
- vfree(qcow_data->strm->workspace);
- kfree(qcow_data->strm);
+ /* ZLIB specific cleanup */
+ zlib_inflateEnd(qcow_data->zlib_dstrm);
+ vfree(qcow_data->zlib_dstrm->workspace);
+ kfree(qcow_data->zlib_dstrm);
+
+ /* ZSTD specific cleanup */
+#ifdef CONFIG_ZSTD_DECOMPRESS
+ vfree(qcow_data->zstd_dworkspace);
+#endif
+
+ /* last compressed QCOW cluster cleanup */
vfree(qcow_data->cmp_out_buf);
}
@@ -227,6 +285,13 @@ static void __qcow_file_fmt_header_to_buf(struct xloop_file_fmt *xlo_fmt,
header->header_length);
}
+ if (header->header_length > offsetof(struct xloop_file_fmt_qcow_header,
+ compression_type)) {
+ len += sprintf(header_buf + len,
+ "compression_type: %d\n",
+ header->compression_type);
+ }
+
ASSERT(len < QCOW_HEADER_BUF_LEN);
}
@@ -253,10 +318,12 @@ static ssize_t __qcow_file_fmt_dbgfs_ofs_read(struct file *file,
struct xloop_file_fmt_qcow_data *qcow_data = xlo_fmt->private_data;
unsigned int cur_bytes = 1;
u64 offset = 0;
- u64 cluster_offset = 0;
+ u64 coffset = 0;
+ u64 host_offset = 0;
s64 offset_in_cluster = 0;
+ enum xloop_file_fmt_qcow_subcluster_type type;
ssize_t len = 0;
- int ret = 0;
+ int ret = 0, csize = 0, nb_csectors = 0;
/* read the share debugfs offset */
ret = mutex_lock_interruptible(&qcow_data->dbgfs_qcow_offset_mutex);
@@ -267,8 +334,8 @@ static ssize_t __qcow_file_fmt_dbgfs_ofs_read(struct file *file,
mutex_unlock(&qcow_data->dbgfs_qcow_offset_mutex);
/* calculate and print the cluster offset */
- ret = xloop_file_fmt_qcow_cluster_get_offset(xlo_fmt,
- offset, &cur_bytes, &cluster_offset);
+ ret = xloop_file_fmt_qcow_get_host_offset(xlo_fmt,
+ offset, &cur_bytes, &host_offset, &type);
if (ret < 0)
return -EINVAL;
@@ -276,8 +343,26 @@ static ssize_t __qcow_file_fmt_dbgfs_ofs_read(struct file *file,
offset);
len = sprintf(qcow_data->dbgfs_file_qcow_cluster_buf,
- "offset: %lld\ncluster_offset: %lld\noffset_in_cluster: %lld\n",
- offset, cluster_offset, offset_in_cluster);
+ "cluster type: %s\n"
+ "cluster offset host: %lld\n"
+ "cluster offset guest: %lld\n"
+ "cluster offset in-cluster: %lld\n",
+ xloop_file_fmt_qcow_get_subcluster_name(type),
+ host_offset, offset, offset_in_cluster);
+
+ if (type == QCOW_SUBCLUSTER_COMPRESSED) {
+ coffset = host_offset & qcow_data->cluster_offset_mask;
+ nb_csectors = ((host_offset >> qcow_data->csize_shift) &
+ qcow_data->csize_mask) + 1;
+ csize = nb_csectors * QCOW_COMPRESSED_SECTOR_SIZE -
+ (coffset & ~QCOW_COMPRESSED_SECTOR_MASK);
+
+ len += sprintf(qcow_data->dbgfs_file_qcow_cluster_buf + len,
+ "cluster compressed offset: %lld\n"
+ "cluster compressed sectors: %d\n"
+ "cluster compressed size: %d\n",
+ coffset, nb_csectors, csize);
+ }
ASSERT(len < QCOW_CLUSTER_BUF_LEN);
@@ -385,6 +470,44 @@ static void __qcow_file_fmt_dbgfs_exit(struct xloop_file_fmt *xlo_fmt)
}
#endif
+static int __qcow_file_fmt_validate_compression_type(
+ struct xloop_file_fmt *xlo_fmt)
+{
+ struct xloop_file_fmt_qcow_data *qcow_data = xlo_fmt->private_data;
+
+ switch (qcow_data->compression_type) {
+ case QCOW_COMPRESSION_TYPE_ZLIB:
+#ifdef CONFIG_ZSTD_DECOMPRESS
+ case QCOW_COMPRESSION_TYPE_ZSTD:
+#endif
+ break;
+ default:
+ dev_err(xloop_file_fmt_to_dev(xlo_fmt), "unknown compression type: %u",
+ qcow_data->compression_type);
+ return -ENOTSUPP;
+ }
+
+ /*
+ * if the compression type differs from QCOW_COMPRESSION_TYPE_ZLIB
+ * the incompatible feature flag must be set
+ */
+ if (qcow_data->compression_type == QCOW_COMPRESSION_TYPE_ZLIB) {
+ if (qcow_data->incompatible_features & QCOW_INCOMPAT_COMPRESSION) {
+ dev_err(xloop_file_fmt_to_dev(xlo_fmt), "compression type "
+ "incompatible feature bit must not be set\n");
+ return -EINVAL;
+ }
+ } else {
+ if (!(qcow_data->incompatible_features & QCOW_INCOMPAT_COMPRESSION)) {
+ dev_err(xloop_file_fmt_to_dev(xlo_fmt), "compression type "
+ "incompatible feature bit must be set\n");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
static int qcow_file_fmt_init(struct xloop_file_fmt *xlo_fmt)
{
struct xloop_file_fmt_qcow_data *qcow_data;
@@ -393,6 +516,10 @@ static int qcow_file_fmt_init(struct xloop_file_fmt *xlo_fmt)
u64 l1_vm_state_index;
u64 l2_cache_size;
u64 l2_cache_entry_size;
+ u64 virtual_disk_size;
+ u64 max_l2_entries;
+ u64 max_l2_cache;
+ u64 l2_cache_max_setting;
ssize_t len;
unsigned int i;
int ret = 0;
@@ -428,8 +555,6 @@ static int qcow_file_fmt_init(struct xloop_file_fmt *xlo_fmt)
qcow_data->cluster_bits = header.cluster_bits;
qcow_data->cluster_size = 1 << qcow_data->cluster_bits;
- qcow_data->cluster_sectors = 1 <<
- (qcow_data->cluster_bits - SECTOR_SHIFT);
if (header.header_length > qcow_data->cluster_size) {
dev_err(xloop_file_fmt_to_dev(xlo_fmt), "QCOW header exceeds cluster "
@@ -457,6 +582,25 @@ static int qcow_file_fmt_init(struct xloop_file_fmt *xlo_fmt)
qcow_data->compatible_features = header.compatible_features;
qcow_data->autoclear_features = header.autoclear_features;
+ /*
+ * Handle compression type
+ * Older qcow2 images don't contain the compression type header.
+ * Distinguish them by the header length and use
+ * the only valid (default) compression type in that case
+ */
+ if (header.header_length > offsetof(struct xloop_file_fmt_qcow_header,
+ compression_type)) {
+ qcow_data->compression_type = header.compression_type;
+ } else {
+ qcow_data->compression_type = QCOW_COMPRESSION_TYPE_ZLIB;
+ }
+
+ ret = __qcow_file_fmt_validate_compression_type(xlo_fmt);
+ if (ret) {
+ goto free_qcow_data;
+ }
+
+ /* check for incompatible features */
if (qcow_data->incompatible_features & QCOW_INCOMPAT_DIRTY) {
dev_err(xloop_file_fmt_to_dev(xlo_fmt), "image contains inconsistent "
"refcounts\n");
@@ -472,12 +616,31 @@ static int qcow_file_fmt_init(struct xloop_file_fmt *xlo_fmt)
}
if (qcow_data->incompatible_features & QCOW_INCOMPAT_DATA_FILE) {
- dev_err(xloop_file_fmt_to_dev(xlo_fmt), "clusters in the external "
- "data file are not refcounted\n");
- ret = -EACCES;
+ dev_err(xloop_file_fmt_to_dev(xlo_fmt), "data-file is required for "
+ "this image\n");
+ ret = -EINVAL;
goto free_qcow_data;
}
+ qcow_data->subclusters_per_cluster =
+ xloop_file_fmt_qcow_has_subclusters(qcow_data) ?
+ QCOW_EXTL2_SUBCLUSTERS_PER_CLUSTER : 1;
+ qcow_data->subcluster_size =
+ qcow_data->cluster_size / qcow_data->subclusters_per_cluster;
+ /*
+ * check if subcluster_size is non-zero to avoid unknown results of
+ * __builtin_ctz
+ */
+ ASSERT(qcow_data->subcluster_size != 0);
+ qcow_data->subcluster_bits = __builtin_ctz(qcow_data->subcluster_size);
+
+ if (qcow_data->subcluster_size < (1 << QCOW_MIN_CLUSTER_BITS)) {
+ dev_err(xloop_file_fmt_to_dev(xlo_fmt), "unsupported subcluster "
+ "size: %d\n", qcow_data->subcluster_size);
+ ret = -EINVAL;
+ goto free_qcow_data;
+ }
+
/* Check support for various header values */
if (header.refcount_order > 6) {
dev_err(xloop_file_fmt_to_dev(xlo_fmt), "reference count entry width "
@@ -498,8 +661,13 @@ static int qcow_file_fmt_init(struct xloop_file_fmt *xlo_fmt)
goto free_qcow_data;
}
- /* L2 is always one cluster */
- qcow_data->l2_bits = qcow_data->cluster_bits - 3;
+ /*
+ * check if xloop_file_fmt_qcow_l2_entry_size(qcow_data) is non-zero to
+ * avoid unknown results of __builtin_ctz
+ */
+ ASSERT(xloop_file_fmt_qcow_l2_entry_size(qcow_data) != 0);
+ qcow_data->l2_bits = qcow_data->cluster_bits -
+ __builtin_ctz(xloop_file_fmt_qcow_l2_entry_size(qcow_data));
qcow_data->l2_size = 1 << qcow_data->l2_bits;
/* 2^(qcow_data->refcount_order - 3) is the refcount width in bytes */
qcow_data->refcount_block_bits = qcow_data->cluster_bits -
@@ -544,7 +712,7 @@ static int qcow_file_fmt_init(struct xloop_file_fmt *xlo_fmt)
/* read the level 1 table */
ret = __qcow_file_fmt_validate_table(xlo_fmt, header.l1_table_offset,
- header.l1_size, sizeof(u64), QCOW_MAX_L1_SIZE,
+ header.l1_size, QCOW_L1E_SIZE, QCOW_MAX_L1_SIZE,
"Active L1 table");
if (ret < 0) {
goto free_qcow_data;
@@ -571,7 +739,7 @@ static int qcow_file_fmt_init(struct xloop_file_fmt *xlo_fmt)
if (qcow_data->l1_size > 0) {
qcow_data->l1_table = vzalloc(round_up(qcow_data->l1_size *
- sizeof(u64), 512));
+ QCOW_L1E_SIZE, 512));
if (qcow_data->l1_table == NULL) {
dev_err(xloop_file_fmt_to_dev(xlo_fmt), "could not allocate "
"L1 table\n");
@@ -579,7 +747,7 @@ static int qcow_file_fmt_init(struct xloop_file_fmt *xlo_fmt)
goto free_qcow_data;
}
len = kernel_read(xlo->xlo_backing_file, qcow_data->l1_table,
- qcow_data->l1_size * sizeof(u64),
+ qcow_data->l1_size * QCOW_L1E_SIZE,
&qcow_data->l1_table_offset);
if (len < 0) {
dev_err(xloop_file_fmt_to_dev(xlo_fmt), "could not read "
@@ -604,13 +772,21 @@ static int qcow_file_fmt_init(struct xloop_file_fmt *xlo_fmt)
goto free_l1_table;
}
-
/* create cache for L2 */
- l2_cache_size = qcow_data->size / (qcow_data->cluster_size / 8);
- l2_cache_entry_size = min(qcow_data->cluster_size, (int)4096);
+ virtual_disk_size = qcow_data->size;
+ max_l2_entries = DIV_ROUND_UP(virtual_disk_size, qcow_data->cluster_size);
+ max_l2_cache = round_up(
+ max_l2_entries * xloop_file_fmt_qcow_l2_entry_size(qcow_data),
+ qcow_data->cluster_size);
- /* limit the L2 size to maximum QCOW_DEFAULT_L2_CACHE_MAX_SIZE */
- l2_cache_size = min(l2_cache_size, (u64)QCOW_DEFAULT_L2_CACHE_MAX_SIZE);
+ /* define the maximum L2 cache size */
+ l2_cache_max_setting = QCOW_DEFAULT_L2_CACHE_MAX_SIZE;
+
+ /* limit the L2 cache size to maximum l2_cache_max_setting */
+ l2_cache_size = min(max_l2_cache, l2_cache_max_setting);
+
+ /* determine the size of a cache entry */
+ l2_cache_entry_size = min(qcow_data->cluster_size, (int)PAGE_SIZE);
/* calculate the number of cache tables */
l2_cache_size /= l2_cache_entry_size;
@@ -624,7 +800,8 @@ static int qcow_file_fmt_init(struct xloop_file_fmt *xlo_fmt)
goto free_l1_table;
}
- qcow_data->l2_slice_size = l2_cache_entry_size / sizeof(u64);
+ qcow_data->l2_slice_size =
+ l2_cache_entry_size / xloop_file_fmt_qcow_l2_entry_size(qcow_data);
qcow_data->l2_table_cache = xloop_file_fmt_qcow_cache_create(xlo_fmt,
l2_cache_size, l2_cache_entry_size);
@@ -681,39 +858,195 @@ static void qcow_file_fmt_exit(struct xloop_file_fmt *xlo_fmt)
}
}
-static ssize_t __qcow_file_fmt_buffer_decompress(struct xloop_file_fmt *xlo_fmt,
+/*
+ * __qcow_file_fmt_zlib_decompress()
+ *
+ * Decompress some data (not more than @src_size bytes) to produce exactly
+ * @dest_size bytes using zlib compression method
+ *
+ * @xlo_fmt - QCOW file format
+ * @dest - destination buffer, @dest_size bytes
+ * @src - source buffer, @src_size bytes
+ *
+ * Returns: 0 on success
+ * -EIO on fail
+ */
+static ssize_t __qcow_file_fmt_zlib_decompress(struct xloop_file_fmt *xlo_fmt,
void *dest,
size_t dest_size,
const void *src,
size_t src_size)
{
struct xloop_file_fmt_qcow_data *qcow_data = xlo_fmt->private_data;
- int ret = 0;
+ u8 zerostuff = 0;
+ ssize_t ret = 0;
- qcow_data->strm->avail_in = src_size;
- qcow_data->strm->next_in = (void *) src;
- qcow_data->strm->avail_out = dest_size;
- qcow_data->strm->next_out = dest;
-
- ret = zlib_inflateInit2(qcow_data->strm, -12);
+ ret = zlib_inflateReset(qcow_data->zlib_dstrm);
if (ret != Z_OK) {
- return -1;
+ ret = -EINVAL;
+ goto out;
}
- ret = zlib_inflate(qcow_data->strm, Z_FINISH);
- if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR)
- || qcow_data->strm->avail_out != 0) {
- /* We approve Z_BUF_ERROR because we need @dest buffer to be
- * filled, but @src buffer may be processed partly (because in
- * qcow2 we know size of compressed data with precision of one
- * sector) */
- ret = -1;
- } else {
- ret = 0;
+ qcow_data->zlib_dstrm->avail_in = src_size;
+ qcow_data->zlib_dstrm->next_in = (void *)src;
+ qcow_data->zlib_dstrm->avail_out = dest_size;
+ qcow_data->zlib_dstrm->next_out = dest;
+
+ ret = zlib_inflate(qcow_data->zlib_dstrm, Z_SYNC_FLUSH);
+ /*
+ * Work around a bug in zlib, which sometimes wants to taste an extra
+ * byte when being used in the (undocumented) raw deflate mode.
+ * (From USAGI).
+ */
+ if (ret == Z_OK && !qcow_data->zlib_dstrm->avail_in &&
+ qcow_data->zlib_dstrm->avail_out) {
+ qcow_data->zlib_dstrm->next_in = &zerostuff;
+ qcow_data->zlib_dstrm->avail_in = 1;
+ ret = zlib_inflate(qcow_data->zlib_dstrm, Z_FINISH);
+ }
+ if (ret != Z_STREAM_END) {
+ ret = -EIO;
+ goto out;
}
+
+out:
return ret;
}
+#ifdef CONFIG_ZSTD_DECOMPRESS
+/*
+ * __qcow_file_fmt_zstd_decompress()
+ *
+ * Decompress some data (not more than @src_size bytes) to produce exactly
+ * @dest_size bytes using zstd compression method
+ *
+ * @xlo_fmt - QCOW file format
+ * @dest - destination buffer, @dest_size bytes
+ * @src - source buffer, @src_size bytes
+ *
+ * Returns: 0 on success
+ * -EIO on any error
+ */
+static ssize_t __qcow_file_fmt_zstd_decompress(struct xloop_file_fmt *xlo_fmt,
+ void *dest,
+ size_t dest_size,
+ const void *src,
+ size_t src_size)
+{
+ struct xloop_file_fmt_qcow_data *qcow_data = xlo_fmt->private_data;
+ size_t zstd_ret = 0;
+ ssize_t ret = 0;
+
+ ZSTD_outBuffer output = {
+ .dst = dest,
+ .size = dest_size,
+ .pos = 0
+ };
+
+ ZSTD_inBuffer input = {
+ .src = src,
+ .size = src_size,
+ .pos = 0
+ };
+
+ zstd_ret = ZSTD_resetDStream(qcow_data->zstd_dstrm);
+
+ if (ZSTD_isError(zstd_ret)) {
+ ret = -EIO;
+ goto out;
+ }
+
+ /*
+ * The compressed stream from the input buffer may consist of more
+ * than one zstd frame. So we iterate until we get a fully
+ * uncompressed cluster.
+ * From zstd docs related to ZSTD_decompressStream:
+ * "return : 0 when a frame is completely decoded and fully flushed"
+ * We suppose that this means: each time ZSTD_decompressStream reads
+ * only ONE full frame and returns 0 if and only if that frame
+ * is completely decoded and flushed. Only after returning 0,
+ * ZSTD_decompressStream reads another ONE full frame.
+ */
+ while (output.pos < output.size) {
+ size_t last_in_pos = input.pos;
+ size_t last_out_pos = output.pos;
+ zstd_ret = ZSTD_decompressStream(qcow_data->zstd_dstrm, &output, &input);
+
+ if (ZSTD_isError(zstd_ret)) {
+ ret = -EIO;
+ break;
+ }
+
+ /*
+ * The ZSTD manual is vague about what to do if it reads
+ * the buffer partially, and we don't want to get stuck
+ * in an infinite loop where ZSTD_decompressStream
+ * returns > 0 waiting for another input chunk. So, we add
+ * a check which ensures that the loop makes some progress
+ * on each step.
+ */
+ if (last_in_pos >= input.pos &&
+ last_out_pos >= output.pos) {
+ ret = -EIO;
+ break;
+ }
+ }
+ /*
+ * Make sure that we have the frame fully flushed here
+ * if not, we somehow managed to get uncompressed cluster
+ * greater then the cluster size, possibly because of its
+ * damage.
+ */
+ if (zstd_ret > 0) {
+ ret = -EIO;
+ }
+
+out:
+ ASSERT(ret == 0 || ret == -EIO);
+ return ret;
+}
+#endif
+
+/*
+ * __qcow_file_fmt_buffer_decompress()
+ *
+ * Decompress @src_size bytes of data using the compression
+ * method defined by the image compression type
+ *
+ * @xlo_fmt - QCOW file format
+ * @dest - destination buffer, @dest_size bytes
+ * @src - source buffer, @src_size bytes
+ *
+ * Returns: compressed size on success
+ * a negative error code on failure
+ */
+static ssize_t __qcow_file_fmt_buffer_decompress(struct xloop_file_fmt *xlo_fmt,
+ void *dest,
+ size_t dest_size,
+ const void *src,
+ size_t src_size)
+{
+ struct xloop_file_fmt_qcow_data *qcow_data = xlo_fmt->private_data;
+ qcow_file_fmt_decompress_fn decompress_fn;
+
+ switch (qcow_data->compression_type) {
+ case QCOW_COMPRESSION_TYPE_ZLIB:
+ decompress_fn = __qcow_file_fmt_zlib_decompress;
+ break;
+
+#ifdef CONFIG_ZSTD_DECOMPRESS
+ case QCOW_COMPRESSION_TYPE_ZSTD:
+ decompress_fn = __qcow_file_fmt_zstd_decompress;
+ break;
+#endif
+ default:
+ return -EINVAL;
+ }
+
+ return decompress_fn(xlo_fmt, dest, dest_size, src, src_size);
+}
+
+
static int __qcow_file_fmt_read_compressed(struct xloop_file_fmt *xlo_fmt,
struct bio_vec *bvec,
u64 file_cluster_offset,
@@ -783,8 +1116,9 @@ static int __qcow_file_fmt_read_bvec(struct xloop_file_fmt *xlo_fmt,
int ret;
unsigned int cur_bytes; /* number of bytes in current iteration */
u64 bytes;
- u64 cluster_offset = 0;
+ u64 host_offset = 0;
u64 bytes_done = 0;
+ enum xloop_file_fmt_qcow_subcluster_type type;
void *data;
unsigned long irq_flags;
ssize_t len;
@@ -797,8 +1131,8 @@ static int __qcow_file_fmt_read_bvec(struct xloop_file_fmt *xlo_fmt,
/* prepare next request */
cur_bytes = bytes;
- ret = xloop_file_fmt_qcow_cluster_get_offset(xlo_fmt, *ppos,
- &cur_bytes, &cluster_offset);
+ ret = xloop_file_fmt_qcow_get_host_offset(xlo_fmt, *ppos,
+ &cur_bytes, &host_offset, &type);
if (ret < 0) {
goto fail;
}
@@ -806,32 +1140,28 @@ static int __qcow_file_fmt_read_bvec(struct xloop_file_fmt *xlo_fmt,
offset_in_cluster = xloop_file_fmt_qcow_offset_into_cluster(
qcow_data, *ppos);
- switch (ret) {
- case QCOW_CLUSTER_UNALLOCATED:
- case QCOW_CLUSTER_ZERO_PLAIN:
- case QCOW_CLUSTER_ZERO_ALLOC:
+ switch (type) {
+ case QCOW_SUBCLUSTER_ZERO_PLAIN:
+ case QCOW_SUBCLUSTER_ZERO_ALLOC:
+ case QCOW_SUBCLUSTER_UNALLOCATED_PLAIN:
+ case QCOW_SUBCLUSTER_UNALLOCATED_ALLOC:
data = bvec_kmap_irq(bvec, &irq_flags) + bytes_done;
memset(data, 0, cur_bytes);
flush_dcache_page(bvec->bv_page);
bvec_kunmap_irq(data, &irq_flags);
break;
- case QCOW_CLUSTER_COMPRESSED:
+ case QCOW_SUBCLUSTER_COMPRESSED:
ret = __qcow_file_fmt_read_compressed(xlo_fmt, bvec,
- cluster_offset, *ppos, cur_bytes, bytes_done);
+ host_offset, *ppos, cur_bytes, bytes_done);
if (ret < 0) {
goto fail;
}
break;
- case QCOW_CLUSTER_NORMAL:
- if ((cluster_offset & 511) != 0) {
- ret = -EIO;
- goto fail;
- }
-
- pos_read = cluster_offset + offset_in_cluster;
+ case QCOW_SUBCLUSTER_NORMAL:
+ pos_read = host_offset;
data = bvec_kmap_irq(bvec, &irq_flags) + bytes_done;
len = kernel_read(xlo->xlo_backing_file, data, cur_bytes,
@@ -842,6 +1172,7 @@ static int __qcow_file_fmt_read_bvec(struct xloop_file_fmt *xlo_fmt,
if (len < 0)
return len;
+ ASSERT(len == cur_bytes);
break;
default:
diff --git a/kernel/xloop_file_fmt_qcow_main.h b/kernel/xloop_file_fmt_qcow_main.h
index e6031be..023c679 100644
--- a/kernel/xloop_file_fmt_qcow_main.h
+++ b/kernel/xloop_file_fmt_qcow_main.h
@@ -19,6 +19,9 @@
#include <linux/mutex.h>
#include <linux/types.h>
#include <linux/zlib.h>
+#ifdef CONFIG_ZSTD_DECOMPRESS
+#include <linux/zstd.h>
+#endif
#ifdef CONFIG_DEBUG_FS
#include <linux/debugfs.h>
@@ -80,6 +83,33 @@ do { \
/* The cluster reads as all zeros */
#define QCOW_OFLAG_ZERO (1ULL << 0)
+#define QCOW_EXTL2_SUBCLUSTERS_PER_CLUSTER 32
+
+/* The subcluster X [0..31] is allocated */
+#define QCOW_OFLAG_SUB_ALLOC(X) (1ULL << (X))
+/* The subcluster X [0..31] reads as zeroes */
+#define QCOW_OFLAG_SUB_ZERO(X) (QCOW_OFLAG_SUB_ALLOC(X) << 32)
+/* Subclusters [X, Y) (0 <= X <= Y <= 32) are allocated */
+#define QCOW_OFLAG_SUB_ALLOC_RANGE(X, Y) \
+ (QCOW_OFLAG_SUB_ALLOC(Y) - QCOW_OFLAG_SUB_ALLOC(X))
+/* Subclusters [X, Y) (0 <= X <= Y <= 32) read as zeroes */
+#define QCOW_OFLAG_SUB_ZERO_RANGE(X, Y) \
+ (QCOW_OFLAG_SUB_ALLOC_RANGE(X, Y) << 32)
+/* L2 entry bitmap with all allocation bits set */
+#define QCOW_L2_BITMAP_ALL_ALLOC (QCOW_OFLAG_SUB_ALLOC_RANGE(0, 32))
+/* L2 entry bitmap with all "read as zeroes" bits set */
+#define QCOW_L2_BITMAP_ALL_ZEROES (QCOW_OFLAG_SUB_ZERO_RANGE(0, 32))
+
+/* Size of normal and extended L2 entries */
+#define QCOW_L2E_SIZE_NORMAL (sizeof(u64))
+#define QCOW_L2E_SIZE_EXTENDED (sizeof(u64) * 2)
+
+/* Size of L1 table entries */
+#define QCOW_L1E_SIZE (sizeof(u64))
+
+/* Size of reftable entries */
+#define QCOW_REFTABLE_ENTRY_SIZE (sizeof(u64))
+
#define QCOW_MIN_CLUSTER_BITS 9
#define QCOW_MAX_CLUSTER_BITS 21
@@ -104,7 +134,7 @@ do { \
/* Buffer size for debugfs file buffer to receive and display offset and
* cluster offset information */
#define QCOW_OFFSET_BUF_LEN 32
-#define QCOW_CLUSTER_BUF_LEN 128
+#define QCOW_CLUSTER_BUF_LEN 256
struct xloop_file_fmt_qcow_header {
u32 magic;
@@ -128,6 +158,12 @@ struct xloop_file_fmt_qcow_header {
u32 refcount_order;
u32 header_length;
+
+ /* Additional fields */
+ u8 compression_type;
+
+ /* header must be a multiple of 8 */
+ u8 padding[7];
} __attribute__((packed));
struct xloop_file_fmt_qcow_snapshot_header {
@@ -144,11 +180,11 @@ struct xloop_file_fmt_qcow_snapshot_header {
u64 vm_clock_nsec;
u32 vm_state_size;
- /* for extension */
- u32 extra_data_size;
- /* extra data follows */
- /* id_str follows */
- /* name follows */
+
+ /* Size of all extra data, including QCowSnapshotExtraData if available */
+ u32 extra_data_size;
+ /* Data beyond QCowSnapshotExtraData, if any */
+ void *unknown_extra_data;
} __attribute__((packed));
enum {
@@ -162,13 +198,19 @@ enum {
QCOW_INCOMPAT_DIRTY_BITNR = 0,
QCOW_INCOMPAT_CORRUPT_BITNR = 1,
QCOW_INCOMPAT_DATA_FILE_BITNR = 2,
+ QCOW_INCOMPAT_COMPRESSION_BITNR = 3,
+ QCOW_INCOMPAT_EXTL2_BITNR = 4,
QCOW_INCOMPAT_DIRTY = 1 << QCOW_INCOMPAT_DIRTY_BITNR,
QCOW_INCOMPAT_CORRUPT = 1 << QCOW_INCOMPAT_CORRUPT_BITNR,
QCOW_INCOMPAT_DATA_FILE = 1 << QCOW_INCOMPAT_DATA_FILE_BITNR,
+ QCOW_INCOMPAT_COMPRESSION = 1 << QCOW_INCOMPAT_COMPRESSION_BITNR,
+ QCOW_INCOMPAT_EXTL2 = 1 << QCOW_INCOMPAT_EXTL2_BITNR,
QCOW_INCOMPAT_MASK = QCOW_INCOMPAT_DIRTY
| QCOW_INCOMPAT_CORRUPT
- | QCOW_INCOMPAT_DATA_FILE,
+ | QCOW_INCOMPAT_DATA_FILE
+ | QCOW_INCOMPAT_COMPRESSION
+ | QCOW_INCOMPAT_EXTL2,
};
/* compatible feature bits */
@@ -190,12 +232,19 @@ enum {
QCOW_AUTOCLEAR_DATA_FILE_RAW,
};
+enum xloop_file_fmt_qcow_compression_type {
+ QCOW_COMPRESSION_TYPE_ZLIB,
+ QCOW_COMPRESSION_TYPE_ZSTD,
+};
+
struct xloop_file_fmt_qcow_data {
u64 size;
int cluster_bits;
int cluster_size;
- int cluster_sectors;
int l2_slice_size;
+ int subcluster_bits;
+ int subcluster_size;
+ int subclusters_per_cluster;
int l2_bits;
int l2_size;
int l1_size;
@@ -237,10 +286,27 @@ struct xloop_file_fmt_qcow_data {
u64 compatible_features;
u64 autoclear_features;
- struct z_stream_s *strm;
+ /* ZLIB specific data */
+ z_streamp zlib_dstrm;
+
+ /* ZSTD specific data */
+#ifdef CONFIG_ZSTD_DECOMPRESS
+ void *zstd_dworkspace;
+ ZSTD_DStream *zstd_dstrm;
+#endif
+
+ /* used to cache last compressed QCOW cluster */
u8 *cmp_out_buf;
u64 cmp_last_coffset;
+ /*
+ * Compression type used for the image. Default: 0 - ZLIB
+ * The image compression type is set on image creation.
+ * For now, the only way to change the compression type
+ * is to convert the image with the desired compression type set.
+ */
+ enum xloop_file_fmt_qcow_compression_type compression_type;
+
/* debugfs entries */
#ifdef CONFIG_DEBUG_FS
struct dentry *dbgfs_dir;
@@ -265,6 +331,34 @@ struct xloop_file_fmt_qcow_cow_region {
unsigned nb_bytes;
};
+/*
+ * In images with standard L2 entries all clusters are treated as if
+ * they had one subcluster so xloop_file_fmt_qcow_cluster_type and
+ * xloop_file_fmt_qcow_subcluster_type can be mapped to each other and
+ * have the exact same meaning (QCOW_SUBCLUSTER_UNALLOCATED_ALLOC cannot
+ * happen in these images).
+ *
+ * In images with extended L2 entries xloop_file_fmt_qcow_cluster_type
+ * refers to the complete cluster and xloop_file_fmt_qcow_subcluster_type
+ * to each of the individual subclusters, so there are several possible
+ * combinations:
+ *
+ * |--------------+---------------------------|
+ * | Cluster type | Possible subcluster types |
+ * |--------------+---------------------------|
+ * | UNALLOCATED | UNALLOCATED_PLAIN |
+ * | | ZERO_PLAIN |
+ * |--------------+---------------------------|
+ * | NORMAL | UNALLOCATED_ALLOC |
+ * | | ZERO_ALLOC |
+ * | | NORMAL |
+ * |--------------+---------------------------|
+ * | COMPRESSED | COMPRESSED |
+ * |--------------+---------------------------|
+ *
+ * QCOW_SUBCLUSTER_INVALID means that the L2 entry is incorrect and
+ * the image should be marked corrupt.
+ */
enum xloop_file_fmt_qcow_cluster_type {
QCOW_CLUSTER_UNALLOCATED,
QCOW_CLUSTER_ZERO_PLAIN,
@@ -273,6 +367,16 @@ enum xloop_file_fmt_qcow_cluster_type {
QCOW_CLUSTER_COMPRESSED,
};
+enum xloop_file_fmt_qcow_subcluster_type {
+ QCOW_SUBCLUSTER_UNALLOCATED_PLAIN,
+ QCOW_SUBCLUSTER_UNALLOCATED_ALLOC,
+ QCOW_SUBCLUSTER_ZERO_PLAIN,
+ QCOW_SUBCLUSTER_ZERO_ALLOC,
+ QCOW_SUBCLUSTER_NORMAL,
+ QCOW_SUBCLUSTER_COMPRESSED,
+ QCOW_SUBCLUSTER_INVALID,
+};
+
enum xloop_file_fmt_qcow_metadata_overlap {
QCOW_OL_MAIN_HEADER_BITNR = 0,
QCOW_OL_ACTIVE_L1_BITNR = 1,
@@ -314,25 +418,51 @@ enum xloop_file_fmt_qcow_metadata_overlap {
#define QCOW_OL_ALL \
(QCOW_OL_CACHED | QCOW_OL_INACTIVE_L2)
-#define L1E_OFFSET_MASK 0x00fffffffffffe00ULL
-#define L2E_OFFSET_MASK 0x00fffffffffffe00ULL
-#define L2E_COMPRESSED_OFFSET_SIZE_MASK 0x3fffffffffffffffULL
+#define QCOW_L1E_OFFSET_MASK 0x00fffffffffffe00ULL
+#define QCOW_L2E_OFFSET_MASK 0x00fffffffffffe00ULL
+#define QCOW_L2E_COMPRESSED_OFFSET_SIZE_MASK 0x3fffffffffffffffULL
-#define REFT_OFFSET_MASK 0xfffffffffffffe00ULL
+static inline bool xloop_file_fmt_qcow_has_subclusters(
+ struct xloop_file_fmt_qcow_data *qcow_data)
+{
+ return qcow_data->incompatible_features & QCOW_INCOMPAT_EXTL2;
+}
+
+static inline size_t xloop_file_fmt_qcow_l2_entry_size(
+ struct xloop_file_fmt_qcow_data *qcow_data)
+{
+ return xloop_file_fmt_qcow_has_subclusters(qcow_data) ?
+ QCOW_L2E_SIZE_EXTENDED : QCOW_L2E_SIZE_NORMAL;
+}
+
+static inline u64 xloop_file_fmt_qcow_get_l2_entry(
+ struct xloop_file_fmt_qcow_data *qcow_data, u64 *l2_slice, int idx)
+{
+ idx *= xloop_file_fmt_qcow_l2_entry_size(qcow_data) / sizeof(u64);
+ return be64_to_cpu(l2_slice[idx]);
+}
-#define INV_OFFSET (-1ULL)
+static inline u64 xloop_file_fmt_qcow_get_l2_bitmap(
+ struct xloop_file_fmt_qcow_data *qcow_data, u64 *l2_slice, int idx)
+{
+ if (xloop_file_fmt_qcow_has_subclusters(qcow_data)) {
+ idx *= xloop_file_fmt_qcow_l2_entry_size(qcow_data) / sizeof(u64);
+ return be64_to_cpu(l2_slice[idx + 1]);
+ } else {
+ return 0; /* For convenience only; this value has no meaning. */
+ }
+}
static inline bool xloop_file_fmt_qcow_has_data_file(
- struct xloop_file_fmt *xlo_fmt)
+ struct xloop_file_fmt_qcow_data *qcow_data)
{
/* At the moment, there is no support for copy on write! */
return false;
}
static inline bool xloop_file_fmt_qcow_data_file_is_raw(
- struct xloop_file_fmt *xlo_fmt)
+ struct xloop_file_fmt_qcow_data *qcow_data)
{
- struct xloop_file_fmt_qcow_data *qcow_data = xlo_fmt->private_data;
return !!(qcow_data->autoclear_features &
QCOW_AUTOCLEAR_DATA_FILE_RAW);
}
@@ -349,6 +479,12 @@ static inline s64 xloop_file_fmt_qcow_offset_into_cluster(
return offset & (qcow_data->cluster_size - 1);
}
+static inline s64 xloop_file_fmt_qcow_offset_into_subcluster(
+ struct xloop_file_fmt_qcow_data *qcow_data, s64 offset)
+{
+ return offset & (qcow_data->subcluster_size - 1);
+}
+
static inline s64 xloop_file_fmt_qcow_size_to_clusters(
struct xloop_file_fmt_qcow_data *qcow_data, u64 size)
{
@@ -382,6 +518,13 @@ static inline int xloop_file_fmt_qcow_offset_to_l2_slice_index(
(qcow_data->l2_slice_size - 1);
}
+static inline int xloop_file_fmt_qcow_offset_to_sc_index(
+ struct xloop_file_fmt_qcow_data *qcow_data, s64 offset)
+{
+ return (offset >> qcow_data->subcluster_bits) &
+ (qcow_data->subclusters_per_cluster - 1);
+}
+
static inline s64 xloop_file_fmt_qcow_vm_state_offset(
struct xloop_file_fmt_qcow_data *qcow_data)
{
@@ -390,22 +533,25 @@ static inline s64 xloop_file_fmt_qcow_vm_state_offset(
}
static inline enum xloop_file_fmt_qcow_cluster_type
-xloop_file_fmt_qcow_get_cluster_type(struct xloop_file_fmt *xlo_fmt, u64 l2_entry)
+xloop_file_fmt_qcow_get_cluster_type(struct xloop_file_fmt *xlo_fmt,
+ u64 l2_entry)
{
+ struct xloop_file_fmt_qcow_data *qcow_data = xlo_fmt->private_data;
+
if (l2_entry & QCOW_OFLAG_COMPRESSED) {
return QCOW_CLUSTER_COMPRESSED;
} else if (l2_entry & QCOW_OFLAG_ZERO) {
- if (l2_entry & L2E_OFFSET_MASK) {
+ if (l2_entry & QCOW_L2E_OFFSET_MASK) {
return QCOW_CLUSTER_ZERO_ALLOC;
}
return QCOW_CLUSTER_ZERO_PLAIN;
- } else if (!(l2_entry & L2E_OFFSET_MASK)) {
+ } else if (!(l2_entry & QCOW_L2E_OFFSET_MASK)) {
/* Offset 0 generally means unallocated, but it is ambiguous
* with external data files because 0 is a valid offset there.
* However, all clusters in external data files always have
* refcount 1, so we can rely on QCOW_OFLAG_COPIED to
* disambiguate. */
- if (xloop_file_fmt_qcow_has_data_file(xlo_fmt) &&
+ if (xloop_file_fmt_qcow_has_data_file(qcow_data) &&
(l2_entry & QCOW_OFLAG_COPIED)) {
return QCOW_CLUSTER_NORMAL;
} else {
@@ -416,4 +562,85 @@ xloop_file_fmt_qcow_get_cluster_type(struct xloop_file_fmt *xlo_fmt, u64 l2_entr
}
}
+/*
+ * In an image without subsclusters @l2_bitmap is ignored and
+ * @sc_index must be 0.
+ * Return QCOW_SUBCLUSTER_INVALID if an invalid l2 entry is detected
+ * (this checks the whole entry and bitmap, not only the bits related
+ * to subcluster @sc_index).
+ */
+static inline enum xloop_file_fmt_qcow_subcluster_type
+xloop_file_fmt_qcow_get_subcluster_type(struct xloop_file_fmt *xlo_fmt,
+ u64 l2_entry, u64 l2_bitmap, unsigned int sc_index)
+{
+ struct xloop_file_fmt_qcow_data *qcow_data = xlo_fmt->private_data;
+ enum xloop_file_fmt_qcow_cluster_type type =
+ xloop_file_fmt_qcow_get_cluster_type(xlo_fmt, l2_entry);
+ ASSERT(sc_index < qcow_data->subclusters_per_cluster);
+
+ if (xloop_file_fmt_qcow_has_subclusters(qcow_data)) {
+ switch (type) {
+ case QCOW_CLUSTER_COMPRESSED:
+ return QCOW_SUBCLUSTER_COMPRESSED;
+ case QCOW_CLUSTER_NORMAL:
+ if ((l2_bitmap >> 32) & l2_bitmap) {
+ return QCOW_SUBCLUSTER_INVALID;
+ } else if (l2_bitmap & QCOW_OFLAG_SUB_ZERO(sc_index)) {
+ return QCOW_SUBCLUSTER_ZERO_ALLOC;
+ } else if (l2_bitmap & QCOW_OFLAG_SUB_ALLOC(sc_index)) {
+ return QCOW_SUBCLUSTER_NORMAL;
+ } else {
+ return QCOW_SUBCLUSTER_UNALLOCATED_ALLOC;
+ }
+ case QCOW_CLUSTER_UNALLOCATED:
+ if (l2_bitmap & QCOW_L2_BITMAP_ALL_ALLOC) {
+ return QCOW_SUBCLUSTER_INVALID;
+ } else if (l2_bitmap & QCOW_OFLAG_SUB_ZERO(sc_index)) {
+ return QCOW_SUBCLUSTER_ZERO_PLAIN;
+ } else {
+ return QCOW_SUBCLUSTER_UNALLOCATED_PLAIN;
+ }
+ default:
+ /* not reachable */
+ ASSERT(false);
+ return QCOW_SUBCLUSTER_INVALID;
+ }
+ } else {
+ switch (type) {
+ case QCOW_CLUSTER_COMPRESSED:
+ return QCOW_SUBCLUSTER_COMPRESSED;
+ case QCOW_CLUSTER_ZERO_PLAIN:
+ return QCOW_SUBCLUSTER_ZERO_PLAIN;
+ case QCOW_CLUSTER_ZERO_ALLOC:
+ return QCOW_SUBCLUSTER_ZERO_ALLOC;
+ case QCOW_CLUSTER_NORMAL:
+ return QCOW_SUBCLUSTER_NORMAL;
+ case QCOW_CLUSTER_UNALLOCATED:
+ return QCOW_SUBCLUSTER_UNALLOCATED_PLAIN;
+ default:
+ /* not reachable */
+ ASSERT(false);
+ return QCOW_SUBCLUSTER_INVALID;
+ }
+ }
+}
+
+#ifdef CONFIG_DEBUG_FS
+static inline const char *xloop_file_fmt_qcow_get_subcluster_name(
+ const enum xloop_file_fmt_qcow_subcluster_type type)
+{
+ static const char *subcluster_names[] = {
+ "QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN",
+ "QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC",
+ "QCOW2_SUBCLUSTER_ZERO_PLAIN",
+ "QCOW2_SUBCLUSTER_ZERO_ALLOC",
+ "QCOW2_SUBCLUSTER_NORMAL",
+ "QCOW2_SUBCLUSTER_COMPRESSED",
+ "QCOW2_SUBCLUSTER_INVALID"
+ };
+
+ return subcluster_names[type];
+}
+#endif
+
#endif