summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorManuel Bentele2019-07-27 16:54:39 +0200
committerManuel Bentele2019-08-21 22:02:53 +0200
commitd7de47156f3d05356d4c305574055a0e30dacc24 (patch)
tree96508124a904223b307891c832e2bcbed713f216
parentblock: loop: fix wrong file names in header comments (diff)
downloadkernel-qcow2-linux-d7de47156f3d05356d4c305574055a0e30dacc24.tar.gz
kernel-qcow2-linux-d7de47156f3d05356d4c305574055a0e30dacc24.tar.xz
kernel-qcow2-linux-d7de47156f3d05356d4c305574055a0e30dacc24.zip
block: loop: add QCOW2 file format (readonly)
The QCOW2 file format is added as a new file format module to the existing loop device file format subsystem. The implementation of the QCOW2 format is based on the original implementation of the QEMU project and was ported to the Linux kernel space. The implementation of the QCOW2 file format supports the reading of a normal QCOW2 disk image as well as the reading of sparsed and/or compressed QCOW2 images. Write support is missing and is not ported yet. Discard, flush and reading or writing aio is missing, too. That's the reason why the loop device can not be configured to use direct IO with this implementation. Signed-off-by: Manuel Bentele <development@manuel-bentele.de>
-rw-r--r--drivers/block/loop/Kconfig2
-rw-r--r--drivers/block/loop/Makefile12
-rw-r--r--drivers/block/loop/loop_file_fmt.c104
-rw-r--r--drivers/block/loop/loop_file_fmt.h27
-rw-r--r--drivers/block/loop/loop_file_fmt_qcow.c106
-rw-r--r--drivers/block/loop/loop_file_fmt_qcow_cache.c216
-rw-r--r--drivers/block/loop/loop_file_fmt_qcow_cache.h51
-rw-r--r--drivers/block/loop/loop_file_fmt_qcow_cluster.c270
-rw-r--r--drivers/block/loop/loop_file_fmt_qcow_cluster.h23
-rw-r--r--drivers/block/loop/loop_file_fmt_qcow_main.c785
-rw-r--r--drivers/block/loop/loop_file_fmt_qcow_main.h388
-rw-r--r--drivers/block/loop/loop_file_fmt_raw.c47
-rw-r--r--drivers/block/loop/loop_main.c64
13 files changed, 1885 insertions, 210 deletions
diff --git a/drivers/block/loop/Kconfig b/drivers/block/loop/Kconfig
index 3aedc74e5f9d..238ae96189d7 100644
--- a/drivers/block/loop/Kconfig
+++ b/drivers/block/loop/Kconfig
@@ -86,6 +86,8 @@ config BLK_DEV_LOOP_FILE_FMT_RAW
config BLK_DEV_LOOP_FILE_FMT_QCOW
tristate "Loop device QCOW file format support"
depends on BLK_DEV_LOOP
+ select ZLIB_INFLATE
+ select ZLIB_DEFLATE
---help---
Say Y or M here if you want to enable the QEMU's copy on write (QCOW)
file format support of the loop device module.
diff --git a/drivers/block/loop/Makefile b/drivers/block/loop/Makefile
index 2a7eeca32a78..a82cd7a06226 100644
--- a/drivers/block/loop/Makefile
+++ b/drivers/block/loop/Makefile
@@ -1,9 +1,11 @@
# SPDX-License-Identifier: GPL-2.0
-loop-y += loop_main.o loop_file_fmt.o
-obj-$(CONFIG_BLK_DEV_LOOP) += loop.o
+loop-y += loop_main.o loop_file_fmt.o
+obj-$(CONFIG_BLK_DEV_LOOP) += loop.o
-obj-$(CONFIG_BLK_DEV_CRYPTOLOOP) += cryptoloop.o
+obj-$(CONFIG_BLK_DEV_CRYPTOLOOP) += cryptoloop.o
-obj-$(CONFIG_BLK_DEV_LOOP_FILE_FMT_RAW) += loop_file_fmt_raw.o
-obj-$(CONFIG_BLK_DEV_LOOP_FILE_FMT_QCOW) += loop_file_fmt_qcow.o \ No newline at end of file
+obj-$(CONFIG_BLK_DEV_LOOP_FILE_FMT_RAW) += loop_file_fmt_raw.o
+
+loop_file_fmt_qcow-y += loop_file_fmt_qcow_main.o loop_file_fmt_qcow_cluster.o loop_file_fmt_qcow_cache.o
+obj-$(CONFIG_BLK_DEV_LOOP_FILE_FMT_QCOW) += loop_file_fmt_qcow.o
diff --git a/drivers/block/loop/loop_file_fmt.c b/drivers/block/loop/loop_file_fmt.c
index c65de0e52889..26427b700233 100644
--- a/drivers/block/loop/loop_file_fmt.c
+++ b/drivers/block/loop/loop_file_fmt.c
@@ -4,7 +4,7 @@
*
* File format subsystem for the loop device module.
*
- * Copyright (C) 2019 Manuel Bentele
+ * Copyright (C) 2019 Manuel Bentele <development@manuel-bentele.de>
*/
#include <linux/kernel.h>
@@ -29,11 +29,11 @@ int loop_file_fmt_register_driver(struct loop_file_fmt_driver *drv)
if (loop_file_fmt_drivers[drv->file_fmt_type] == NULL) {
loop_file_fmt_drivers[drv->file_fmt_type] = drv;
- printk(KERN_INFO "loop: successfully registered file format "
- "driver %s\n", drv->name);
+ printk(KERN_INFO "loop_file_fmt: successfully registered file "
+ "format driver %s", drv->name);
} else {
- printk(KERN_WARNING "loop: driver for file format already "
- "registered\n");
+ printk(KERN_WARNING "loop_file_fmt: driver for file format "
+ "already registered");
ret = -EBUSY;
}
@@ -50,8 +50,8 @@ void loop_file_fmt_unregister_driver(struct loop_file_fmt_driver *drv)
return;
loop_file_fmt_drivers[drv->file_fmt_type] = NULL;
- printk(KERN_INFO "loop: successfully unregistered file format driver "
- "%s\n", drv->name);
+ printk(KERN_INFO "loop_file_fmt: successfully unregistered file "
+ "format driver %s", drv->name);
}
EXPORT_SYMBOL(loop_file_fmt_unregister_driver);
@@ -82,34 +82,37 @@ struct loop_device *loop_file_fmt_get_lo(struct loop_file_fmt *lo_fmt)
}
EXPORT_SYMBOL(loop_file_fmt_get_lo);
-int loop_file_fmt_init(struct loop_file_fmt *lo_fmt)
+int loop_file_fmt_init(struct loop_file_fmt *lo_fmt,
+ u32 file_fmt_type)
{
- struct loop_file_fmt_ops* ops;
+ struct loop_file_fmt_ops *ops;
struct module *drv;
- if (lo_fmt->file_fmt_type > MAX_LO_FILE_FMT)
+ if (file_fmt_type > MAX_LO_FILE_FMT)
return -EINVAL;
+ lo_fmt->file_fmt_type = file_fmt_type;
+
if (lo_fmt->file_fmt_state != file_fmt_uninitialized) {
printk(KERN_WARNING "loop_file_fmt: file format is "
- "initialized already\n");
+ "initialized already");
return -EINVAL;
}
/* check if new file format driver is registered */
if (loop_file_fmt_drivers[lo_fmt->file_fmt_type] == NULL) {
printk(KERN_ERR "loop_file_fmt: file format driver is not "
- "available\n");
+ "available");
return -ENODEV;
}
- printk(KERN_INFO "loop_file_fmt: use file format driver %s\n",
+ printk(KERN_INFO "loop_file_fmt: use file format driver %s",
loop_file_fmt_drivers[lo_fmt->file_fmt_type]->name);
drv = loop_file_fmt_drivers[lo_fmt->file_fmt_type]->owner;
if (!try_module_get(drv)) {
printk(KERN_ERR "loop_file_fmt: file format driver %s can not "
- "be accessed\n",
+ "be accessed",
loop_file_fmt_drivers[lo_fmt->file_fmt_type]->name);
return -ENODEV;
}
@@ -121,18 +124,18 @@ int loop_file_fmt_init(struct loop_file_fmt *lo_fmt)
ops = loop_file_fmt_drivers[lo_fmt->file_fmt_type]->ops;
if (likely(ops->init))
return ops->init(lo_fmt);
- else
- return -ENOSYS;
+
+ return 0;
}
void loop_file_fmt_exit(struct loop_file_fmt *lo_fmt)
{
- struct loop_file_fmt_ops* ops;
+ struct loop_file_fmt_ops *ops;
struct module *drv;
if (lo_fmt->file_fmt_state != file_fmt_initialized) {
printk(KERN_WARNING "loop_file_fmt: file format is "
- "uninitialized already\n");
+ "uninitialized already");
return;
}
@@ -151,11 +154,11 @@ void loop_file_fmt_exit(struct loop_file_fmt *lo_fmt)
int loop_file_fmt_read(struct loop_file_fmt *lo_fmt,
struct request *rq)
{
- struct loop_file_fmt_ops* ops;
+ struct loop_file_fmt_ops *ops;
if (unlikely(lo_fmt->file_fmt_state != file_fmt_initialized)) {
printk(KERN_ERR "loop_file_fmt: file format is "
- "not initialized, can not read\n");
+ "not initialized, can not read");
return -EINVAL;
}
@@ -163,17 +166,17 @@ int loop_file_fmt_read(struct loop_file_fmt *lo_fmt,
if (likely(ops->read))
return ops->read(lo_fmt, rq);
else
- return -ENOSYS;
+ return -EIO;
}
int loop_file_fmt_read_aio(struct loop_file_fmt *lo_fmt,
struct request *rq)
{
- struct loop_file_fmt_ops* ops;
+ struct loop_file_fmt_ops *ops;
if (unlikely(lo_fmt->file_fmt_state != file_fmt_initialized)) {
printk(KERN_ERR "loop_file_fmt: file format is "
- "not initialized, can not read aio\n");
+ "not initialized, can not read aio");
return -EINVAL;
}
@@ -181,17 +184,17 @@ int loop_file_fmt_read_aio(struct loop_file_fmt *lo_fmt,
if (likely(ops->read_aio))
return ops->read_aio(lo_fmt, rq);
else
- return -ENOSYS;
+ return -EIO;
}
int loop_file_fmt_write(struct loop_file_fmt *lo_fmt,
struct request *rq)
{
- struct loop_file_fmt_ops* ops;
+ struct loop_file_fmt_ops *ops;
if (unlikely(lo_fmt->file_fmt_state != file_fmt_initialized)) {
printk(KERN_ERR "loop_file_fmt: file format is "
- "not initialized, can not write\n");
+ "not initialized, can not write");
return -EINVAL;
}
@@ -199,17 +202,17 @@ int loop_file_fmt_write(struct loop_file_fmt *lo_fmt,
if (likely(ops->write))
return ops->write(lo_fmt, rq);
else
- return -ENOSYS;
+ return -EIO;
}
int loop_file_fmt_write_aio(struct loop_file_fmt *lo_fmt,
struct request *rq)
{
- struct loop_file_fmt_ops* ops;
+ struct loop_file_fmt_ops *ops;
if (unlikely(lo_fmt->file_fmt_state != file_fmt_initialized)) {
printk(KERN_ERR "loop_file_fmt: file format is "
- "not initialized, can not write aio\n");
+ "not initialized, can not write aio");
return -EINVAL;
}
@@ -217,17 +220,17 @@ int loop_file_fmt_write_aio(struct loop_file_fmt *lo_fmt,
if (likely(ops->write_aio))
return ops->write_aio(lo_fmt, rq);
else
- return -ENOSYS;
+ return -EIO;
}
int loop_file_fmt_discard(struct loop_file_fmt *lo_fmt,
struct request *rq)
{
- struct loop_file_fmt_ops* ops;
+ struct loop_file_fmt_ops *ops;
if (unlikely(lo_fmt->file_fmt_state != file_fmt_initialized)) {
printk(KERN_ERR "loop_file_fmt: file format is "
- "not initialized, can not discard\n");
+ "not initialized, can not discard");
return -EINVAL;
}
@@ -235,29 +238,45 @@ int loop_file_fmt_discard(struct loop_file_fmt *lo_fmt,
if (likely(ops->discard))
return ops->discard(lo_fmt, rq);
else
- return -ENOSYS;
+ return -EIO;
}
-int loop_file_fmt_flush(struct loop_file_fmt *lo_fmt,
- struct request *rq)
+int loop_file_fmt_flush(struct loop_file_fmt *lo_fmt)
{
- struct loop_file_fmt_ops* ops;
+ struct loop_file_fmt_ops *ops;
if (unlikely(lo_fmt->file_fmt_state != file_fmt_initialized)) {
printk(KERN_ERR "loop_file_fmt: file format is "
- "not initialized, can not flush\n");
+ "not initialized, can not flush");
return -EINVAL;
}
ops = loop_file_fmt_drivers[lo_fmt->file_fmt_type]->ops;
if (likely(ops->flush))
- return ops->flush(lo_fmt, rq);
+ return ops->flush(lo_fmt);
+
+ return 0;
+}
+
+loff_t loop_file_fmt_sector_size(struct loop_file_fmt *lo_fmt)
+{
+ struct loop_file_fmt_ops *ops;
+
+ if (unlikely(lo_fmt->file_fmt_state != file_fmt_initialized)) {
+ printk(KERN_ERR "loop_file_fmt: file format is "
+ "not initialized, can not read sector size");
+ return 0;
+ }
+
+ ops = loop_file_fmt_drivers[lo_fmt->file_fmt_type]->ops;
+ if (likely(ops->sector_size))
+ return ops->sector_size(lo_fmt);
else
- return -ENOSYS;
+ return 0;
}
int loop_file_fmt_change(struct loop_file_fmt *lo_fmt,
- __u32 file_fmt_type_new)
+ u32 file_fmt_type_new)
{
if (file_fmt_type_new > MAX_LO_FILE_FMT)
return -EINVAL;
@@ -269,11 +288,10 @@ int loop_file_fmt_change(struct loop_file_fmt *lo_fmt,
/* Load the new file format driver because the file format is
* uninitialized now */
- lo_fmt->file_fmt_type = file_fmt_type_new;
- return loop_file_fmt_init(lo_fmt);
+ return loop_file_fmt_init(lo_fmt, file_fmt_type_new);
}
-ssize_t loop_file_fmt_print_type(__u32 file_fmt_type, char *file_fmt_name)
+ssize_t loop_file_fmt_print_type(u32 file_fmt_type, char *file_fmt_name)
{
ssize_t len = 0;
diff --git a/drivers/block/loop/loop_file_fmt.h b/drivers/block/loop/loop_file_fmt.h
index 67719bcd1b6a..208c8f7cbc31 100644
--- a/drivers/block/loop/loop_file_fmt.h
+++ b/drivers/block/loop/loop_file_fmt.h
@@ -4,7 +4,7 @@
*
* File format subsystem for the loop device module.
*
- * Copyright (C) 2019 Manuel Bentele
+ * Copyright (C) 2019 Manuel Bentele <development@manuel-bentele.de>
*/
#ifndef _LINUX_LOOP_FILE_FMT_H
@@ -30,16 +30,15 @@ struct loop_file_fmt_ops {
int (*discard) (struct loop_file_fmt *lo_fmt,
struct request *rq);
- int (*flush) (struct loop_file_fmt *lo_fmt,
- struct request *rq);
-};
+ int (*flush) (struct loop_file_fmt *lo_fmt);
-/* states of the file format */
+ loff_t (*sector_size) (struct loop_file_fmt *lo_fmt);
+};
/* data structure for implementing file format drivers */
struct loop_file_fmt_driver {
const char *name;
- const __u32 file_fmt_type;
+ const u32 file_fmt_type;
struct loop_file_fmt_ops *ops;
struct module *owner;
};
@@ -64,6 +63,8 @@ struct loop_file_fmt_driver {
* initialized ------------------------------> initialized
* loop_file_fmt_flush(...)
* initialized ------------------------------> initialized
+ * loop_file_fmt_sector_size(...)
+ * initialized ------------------------------> initialized
*
* loop_file_fmt_change(...)
* +-----------------------------------------------------------+
@@ -78,7 +79,7 @@ enum {
/* data structure for using with the file format subsystem */
struct loop_file_fmt {
- __u32 file_fmt_type;
+ u32 file_fmt_type;
int file_fmt_state;
struct loop_device *lo;
void *private_data;
@@ -96,7 +97,8 @@ extern int loop_file_fmt_set_lo(struct loop_file_fmt *lo_fmt,
struct loop_device *lo);
extern struct loop_device *loop_file_fmt_get_lo(struct loop_file_fmt *lo_fmt);
-extern int loop_file_fmt_init(struct loop_file_fmt *lo_fmt);
+extern int loop_file_fmt_init(struct loop_file_fmt *lo_fmt,
+ u32 file_fmt_type);
extern void loop_file_fmt_exit(struct loop_file_fmt *lo_fmt);
extern int loop_file_fmt_read(struct loop_file_fmt *lo_fmt,
@@ -110,14 +112,15 @@ extern int loop_file_fmt_write_aio(struct loop_file_fmt *lo_fmt,
extern int loop_file_fmt_discard(struct loop_file_fmt *lo_fmt,
struct request *rq);
-extern int loop_file_fmt_flush(struct loop_file_fmt *lo_fmt,
- struct request *rq);
+extern int loop_file_fmt_flush(struct loop_file_fmt *lo_fmt);
+
+extern loff_t loop_file_fmt_sector_size(struct loop_file_fmt *lo_fmt);
extern int loop_file_fmt_change(struct loop_file_fmt *lo_fmt,
- __u32 file_fmt_type_new);
+ u32 file_fmt_type_new);
/* helper functions of the subsystem */
-extern ssize_t loop_file_fmt_print_type(__u32 file_fmt_type,
+extern ssize_t loop_file_fmt_print_type(u32 file_fmt_type,
char *file_fmt_name);
#endif
diff --git a/drivers/block/loop/loop_file_fmt_qcow.c b/drivers/block/loop/loop_file_fmt_qcow.c
deleted file mode 100644
index 18fb0565f44a..000000000000
--- a/drivers/block/loop/loop_file_fmt_qcow.c
+++ /dev/null
@@ -1,106 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * loop_file_fmt_qcow.c
- *
- * QCOW file format driver for the loop device module.
- *
- * Copyright (C) 2019 Manuel Bentele
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-
-#include "loop_file_fmt.h"
-
-static int qcow_file_fmt_init(struct loop_file_fmt *lo_fmt)
-{
- printk(KERN_INFO "loop_file_fmt_qcow: init QCOW file format");
- return 0;
-}
-
-static void qcow_file_fmt_exit(struct loop_file_fmt *lo_fmt)
-{
- printk(KERN_INFO "loop_file_fmt_qcow: exit QCOW file format");
- return;
-}
-
-static int qcow_file_fmt_read(struct loop_file_fmt *lo_fmt,
- struct request *rq)
-{
- printk(KERN_INFO "loop_file_fmt_qcow: read QCOW file format");
- return 0;
-}
-
-static int qcow_file_fmt_read_aio(struct loop_file_fmt *lo_fmt,
- struct request *rq)
-{
- printk(KERN_INFO "loop_file_fmt_qcow: read (aio) QCOW file format");
- return 0;
-}
-
-static int qcow_file_fmt_write(struct loop_file_fmt *lo_fmt,
- struct request *rq)
-{
- printk(KERN_INFO "loop_file_fmt_qcow: write QCOW file format");
- return 0;
-}
-
-static int qcow_file_fmt_write_aio(struct loop_file_fmt *lo_fmt,
- struct request *rq)
-{
- printk(KERN_INFO "loop_file_fmt_qcow: write (aio) QCOW file format");
- return 0;
-}
-
-static int qcow_file_fmt_discard(struct loop_file_fmt *lo_fmt,
- struct request *rq)
-{
- printk(KERN_INFO "loop_file_fmt_qcow: discard QCOW file format");
- return 0;
-}
-
-static int qcow_file_fmt_flush(struct loop_file_fmt *lo_fmt,
- struct request *rq)
-{
- printk(KERN_INFO "loop_file_fmt_qcow: flush QCOW file format");
- return 0;
-}
-
-static struct loop_file_fmt_ops qcow_file_fmt_ops = {
- .init = qcow_file_fmt_init,
- .exit = qcow_file_fmt_exit,
- .read = qcow_file_fmt_read,
- .write = qcow_file_fmt_write,
- .read_aio = qcow_file_fmt_read_aio,
- .write_aio = qcow_file_fmt_write_aio,
- .discard = qcow_file_fmt_discard,
- .flush = qcow_file_fmt_flush
-};
-
-static struct loop_file_fmt_driver qcow_file_fmt_driver = {
- .name = "QCOW",
- .file_fmt_type = LO_FILE_FMT_QCOW,
- .ops = &qcow_file_fmt_ops,
- .owner = THIS_MODULE
-};
-
-static int __init loop_file_fmt_qcow_init(void)
-{
- printk(KERN_INFO "loop_file_fmt_qcow: init loop device QCOW file format driver");
- return loop_file_fmt_register_driver(&qcow_file_fmt_driver);
-}
-
-static void __exit loop_file_fmt_qcow_exit(void)
-{
- printk(KERN_INFO "loop_file_fmt_qcow: exit loop device QCOW file format driver");
- loop_file_fmt_unregister_driver(&qcow_file_fmt_driver);
-}
-
-module_init(loop_file_fmt_qcow_init);
-module_exit(loop_file_fmt_qcow_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Manuel Bentele <development@manuel-bentele.de>");
-MODULE_DESCRIPTION("Loop device QCOW file format driver");
-MODULE_SOFTDEP("pre: loop");
diff --git a/drivers/block/loop/loop_file_fmt_qcow_cache.c b/drivers/block/loop/loop_file_fmt_qcow_cache.c
new file mode 100644
index 000000000000..658c8582ef56
--- /dev/null
+++ b/drivers/block/loop/loop_file_fmt_qcow_cache.c
@@ -0,0 +1,216 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * loop_file_fmt_qcow_cache.c
+ *
+ * QCOW file format driver for the loop device module.
+ *
+ * Ported QCOW2 implementation of the QEMU project (GPL-2.0):
+ * L2/refcount table cache for the QCOW2 format.
+ *
+ * The copyright (C) 2010 of the original code is owned by
+ * Kevin Wolf <kwolf@redhat.com>
+ *
+ * Copyright (C) 2019 Manuel Bentele <development@manuel-bentele.de>
+ */
+
+#include <linux/kernel.h>
+#include <linux/log2.h>
+#include <linux/types.h>
+#include <linux/limits.h>
+#include <linux/fs.h>
+#include <linux/vmalloc.h>
+
+#include "loop_file_fmt_qcow_main.h"
+#include "loop_file_fmt_qcow_cache.h"
+
+static inline void *__loop_file_fmt_qcow_cache_get_table_addr(
+ struct loop_file_fmt_qcow_cache *c, int table)
+{
+ return (u8 *) c->table_array + (size_t) table * c->table_size;
+}
+
+static inline int __loop_file_fmt_qcow_cache_get_table_idx(
+ struct loop_file_fmt_qcow_cache *c, void *table)
+{
+ ptrdiff_t table_offset = (u8 *) table - (u8 *) c->table_array;
+ int idx = table_offset / c->table_size;
+ ASSERT(idx >= 0 && idx < c->size && table_offset % c->table_size == 0);
+ return idx;
+}
+
+static inline const char *__loop_file_fmt_qcow_cache_get_name(
+ struct loop_file_fmt *lo_fmt, struct loop_file_fmt_qcow_cache *c)
+{
+ struct loop_file_fmt_qcow_data *qcow_data = lo_fmt->private_data;
+
+ if (c == qcow_data->refcount_block_cache) {
+ return "refcount block";
+ } else if (c == qcow_data->l2_table_cache) {
+ return "L2 table";
+ } else {
+ /* do not abort, because this is not critical */
+ return "unknown";
+ }
+}
+
+struct loop_file_fmt_qcow_cache *loop_file_fmt_qcow_cache_create(
+ struct loop_file_fmt *lo_fmt, int num_tables, unsigned table_size)
+{
+#ifdef CONFIG_DEBUG_DRIVER
+ struct loop_file_fmt_qcow_data *qcow_data = lo_fmt->private_data;
+#endif
+ struct loop_file_fmt_qcow_cache *c;
+
+ ASSERT(num_tables > 0);
+ ASSERT(is_power_of_2(table_size));
+ ASSERT(table_size >= (1 << QCOW_MIN_CLUSTER_BITS));
+ ASSERT(table_size <= qcow_data->cluster_size);
+
+ c = kzalloc(sizeof(*c), GFP_KERNEL);
+ if (!c) {
+ return NULL;
+ }
+
+ c->size = num_tables;
+ c->table_size = table_size;
+ c->entries = vzalloc(sizeof(struct loop_file_fmt_qcow_cache_table) *
+ num_tables);
+ c->table_array = vzalloc(num_tables * c->table_size);
+
+ if (!c->entries || !c->table_array) {
+ vfree(c->table_array);
+ vfree(c->entries);
+ kfree(c);
+ c = NULL;
+ }
+
+ return c;
+}
+
+void loop_file_fmt_qcow_cache_destroy(struct loop_file_fmt *lo_fmt)
+{
+ struct loop_file_fmt_qcow_data *qcow_data = lo_fmt->private_data;
+ struct loop_file_fmt_qcow_cache *c = qcow_data->l2_table_cache;
+ int i;
+
+ for (i = 0; i < c->size; i++) {
+ ASSERT(c->entries[i].ref == 0);
+ }
+
+ vfree(c->table_array);
+ vfree(c->entries);
+ kfree(c);
+}
+
+static int __loop_file_fmt_qcow_cache_entry_flush(
+ struct loop_file_fmt_qcow_cache *c, int i)
+{
+ if (!c->entries[i].dirty || !c->entries[i].offset) {
+ return 0;
+ } else {
+ printk(KERN_ERR "loop_file_fmt_qcow: Flush dirty cache tables "
+ "is not supported yet\n");
+ return -ENOSYS;
+ }
+}
+
+static int __loop_file_fmt_qcow_cache_do_get(struct loop_file_fmt *lo_fmt,
+ struct loop_file_fmt_qcow_cache *c, u64 offset, void **table,
+ bool read_from_disk)
+{
+ struct loop_device *lo = loop_file_fmt_get_lo(lo_fmt);
+ int i;
+ int ret;
+ int lookup_index;
+ u64 min_lru_counter = U64_MAX;
+ int min_lru_index = -1;
+ size_t len;
+
+ ASSERT(offset != 0);
+
+ if (!IS_ALIGNED(offset, c->table_size)) {
+ printk_ratelimited(KERN_ERR "loop_file_fmt_qcow: Cannot get "
+ "entry from %s cache: offset %llx is unaligned\n",
+ __loop_file_fmt_qcow_cache_get_name(lo_fmt, c),
+ offset);
+ return -EIO;
+ }
+
+ /* Check if the table is already cached */
+ i = lookup_index = (offset / c->table_size * 4) % c->size;
+ do {
+ const struct loop_file_fmt_qcow_cache_table *t =
+ &c->entries[i];
+ if (t->offset == offset) {
+ goto found;
+ }
+ if (t->ref == 0 && t->lru_counter < min_lru_counter) {
+ min_lru_counter = t->lru_counter;
+ min_lru_index = i;
+ }
+ if (++i == c->size) {
+ i = 0;
+ }
+ } while (i != lookup_index);
+
+ if (min_lru_index == -1) {
+ BUG();
+ panic("Oops: This can't happen in current synchronous code, "
+ "but leave the check here as a reminder for whoever "
+ "starts using AIO with the QCOW cache");
+ }
+
+ /* Cache miss: write a table back and replace it */
+ i = min_lru_index;
+
+ ret = __loop_file_fmt_qcow_cache_entry_flush(c, i);
+ if (ret < 0) {
+ return ret;
+ }
+
+ c->entries[i].offset = 0;
+ if (read_from_disk) {
+ len = kernel_read(lo->lo_backing_file,
+ __loop_file_fmt_qcow_cache_get_table_addr(c, i),
+ c->table_size, &offset);
+ if (len < 0) {
+ len = ret;
+ return ret;
+ }
+ }
+
+ c->entries[i].offset = offset;
+
+ /* And return the right table */
+found:
+ c->entries[i].ref++;
+ *table = __loop_file_fmt_qcow_cache_get_table_addr(c, i);
+
+ return 0;
+}
+
+int loop_file_fmt_qcow_cache_get(struct loop_file_fmt *lo_fmt, u64 offset,
+ void **table)
+{
+ struct loop_file_fmt_qcow_data *qcow_data = lo_fmt->private_data;
+ struct loop_file_fmt_qcow_cache *c = qcow_data->l2_table_cache;
+
+ return __loop_file_fmt_qcow_cache_do_get(lo_fmt, c, offset, table,
+ true);
+}
+
+void loop_file_fmt_qcow_cache_put(struct loop_file_fmt *lo_fmt, void **table)
+{
+ struct loop_file_fmt_qcow_data *qcow_data = lo_fmt->private_data;
+ struct loop_file_fmt_qcow_cache *c = qcow_data->l2_table_cache;
+ int i = __loop_file_fmt_qcow_cache_get_table_idx(c, *table);
+
+ c->entries[i].ref--;
+ *table = NULL;
+
+ if (c->entries[i].ref == 0) {
+ c->entries[i].lru_counter = ++c->lru_counter;
+ }
+
+ ASSERT(c->entries[i].ref >= 0);
+}
diff --git a/drivers/block/loop/loop_file_fmt_qcow_cache.h b/drivers/block/loop/loop_file_fmt_qcow_cache.h
new file mode 100644
index 000000000000..1abf9b2b7c09
--- /dev/null
+++ b/drivers/block/loop/loop_file_fmt_qcow_cache.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * loop_file_fmt_qcow_cache.h
+ *
+ * Ported QCOW2 implementation of the QEMU project (GPL-2.0):
+ * L2/refcount table cache for the QCOW2 format.
+ *
+ * The copyright (C) 2010 of the original code is owned by
+ * Kevin Wolf <kwolf@redhat.com>
+ *
+ * Copyright (C) 2019 Manuel Bentele <development@manuel-bentele.de>
+ */
+
+#ifndef _LINUX_LOOP_FILE_FMT_QCOW_CACHE_H
+#define _LINUX_LOOP_FILE_FMT_QCOW_CACHE_H
+
+#include "loop_file_fmt.h"
+
+struct loop_file_fmt_qcow_cache_table {
+ s64 offset;
+ u64 lru_counter;
+ int ref;
+ bool dirty;
+};
+
+struct loop_file_fmt_qcow_cache {
+ struct loop_file_fmt_qcow_cache_table *entries;
+ struct loop_file_fmt_qcow_cache *depends;
+ int size;
+ int table_size;
+ bool depends_on_flush;
+ void *table_array;
+ u64 lru_counter;
+ u64 cache_clean_lru_counter;
+};
+
+extern struct loop_file_fmt_qcow_cache *loop_file_fmt_qcow_cache_create(
+ struct loop_file_fmt *lo_fmt,
+ int num_tables,
+ unsigned table_size);
+
+extern void loop_file_fmt_qcow_cache_destroy(struct loop_file_fmt *lo_fmt);
+
+extern int loop_file_fmt_qcow_cache_get(struct loop_file_fmt *lo_fmt,
+ u64 offset,
+ void **table);
+
+extern void loop_file_fmt_qcow_cache_put(struct loop_file_fmt *lo_fmt,
+ void **table);
+
+#endif
diff --git a/drivers/block/loop/loop_file_fmt_qcow_cluster.c b/drivers/block/loop/loop_file_fmt_qcow_cluster.c
new file mode 100644
index 000000000000..9c91a8b4aeb7
--- /dev/null
+++ b/drivers/block/loop/loop_file_fmt_qcow_cluster.c
@@ -0,0 +1,270 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * loop_file_fmt_qcow_cluster.c
+ *
+ * Ported QCOW2 implementation of the QEMU project (GPL-2.0):
+ * Cluster calculation and lookup for the QCOW2 format.
+ *
+ * The copyright (C) 2004-2006 of the original code is owned by Fabrice Bellard.
+ *
+ * Copyright (C) 2019 Manuel Bentele <development@manuel-bentele.de>
+ */
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+
+#include "loop_file_fmt.h"
+#include "loop_file_fmt_qcow_main.h"
+#include "loop_file_fmt_qcow_cache.h"
+#include "loop_file_fmt_qcow_cluster.h"
+
+/*
+ * Loads a L2 slice into memory (L2 slices are the parts of L2 tables
+ * that are loaded by the qcow2 cache). If the slice is in the cache,
+ * the cache is used; otherwise the L2 slice is loaded from the image
+ * file.
+ */
+static int __loop_file_fmt_qcow_cluster_l2_load(struct loop_file_fmt *lo_fmt,
+ u64 offset, u64 l2_offset, u64 **l2_slice)
+{
+ struct loop_file_fmt_qcow_data *qcow_data = lo_fmt->private_data;
+
+ int start_of_slice = sizeof(u64) * (
+ loop_file_fmt_qcow_offset_to_l2_index(qcow_data, offset) -
+ loop_file_fmt_qcow_offset_to_l2_slice_index(qcow_data, offset)
+ );
+
+ ASSERT(qcow_data->l2_table_cache != NULL);
+ return loop_file_fmt_qcow_cache_get(lo_fmt, l2_offset + start_of_slice,
+ (void **) l2_slice);
+}
+
+/*
+ * Checks how many clusters in a given L2 slice are contiguous in the image
+ * file. As soon as one of the flags in the bitmask stop_flags changes compared
+ * to the first cluster, the search is stopped and the cluster is not counted
+ * as contiguous. (This allows it, for example, to stop at the first compressed
+ * cluster which may require a different handling)
+ */
+static int __loop_file_fmt_qcow_cluster_count_contiguous(
+ struct loop_file_fmt *lo_fmt, int nb_clusters, int cluster_size,
+ u64 *l2_slice, u64 stop_flags)
+{
+ int i;
+ enum loop_file_fmt_qcow_cluster_type first_cluster_type;
+ u64 mask = stop_flags | L2E_OFFSET_MASK | QCOW_OFLAG_COMPRESSED;
+ u64 first_entry = be64_to_cpu(l2_slice[0]);
+ u64 offset = first_entry & mask;
+
+ first_cluster_type = loop_file_fmt_qcow_get_cluster_type(lo_fmt,
+ first_entry);
+ if (first_cluster_type == QCOW_CLUSTER_UNALLOCATED) {
+ return 0;
+ }
+
+ /* must be allocated */
+ ASSERT(first_cluster_type == QCOW_CLUSTER_NORMAL ||
+ first_cluster_type == QCOW_CLUSTER_ZERO_ALLOC);
+
+ for (i = 0; i < nb_clusters; i++) {
+ u64 l2_entry = be64_to_cpu(l2_slice[i]) & mask;
+ if (offset + (u64) i * cluster_size != l2_entry) {
+ break;
+ }
+ }
+
+ return i;
+}
+
+/*
+ * Checks how many consecutive unallocated clusters in a given L2
+ * slice have the same cluster type.
+ */
+static int __loop_file_fmt_qcow_cluster_count_contiguous_unallocated(
+ struct loop_file_fmt *lo_fmt, int nb_clusters, u64 *l2_slice,
+ enum loop_file_fmt_qcow_cluster_type wanted_type)
+{
+ int i;
+
+ ASSERT(wanted_type == QCOW_CLUSTER_ZERO_PLAIN ||
+ wanted_type == QCOW_CLUSTER_UNALLOCATED);
+
+ for (i = 0; i < nb_clusters; i++) {
+ u64 entry = be64_to_cpu(l2_slice[i]);
+ enum loop_file_fmt_qcow_cluster_type type =
+ loop_file_fmt_qcow_get_cluster_type(lo_fmt, entry);
+
+ if (type != wanted_type) {
+ break;
+ }
+ }
+
+ return i;
+}
+
+/*
+ * For a given offset of the virtual disk, find the cluster type and offset in
+ * the qcow2 file. The offset is stored in *cluster_offset.
+ *
+ * On entry, *bytes is the maximum number of contiguous bytes starting at
+ * offset that we are interested in.
+ *
+ * On exit, *bytes is the number of bytes starting at offset that have the same
+ * cluster type and (if applicable) are stored contiguously in the image file.
+ * Compressed clusters are always returned one by one.
+ *
+ * Returns the cluster type (QCOW2_CLUSTER_*) on success, -errno in error
+ * cases.
+ */
+int loop_file_fmt_qcow_cluster_get_offset(struct loop_file_fmt *lo_fmt,
+ u64 offset, unsigned int *bytes, u64 *cluster_offset)
+{
+ struct loop_file_fmt_qcow_data *qcow_data = lo_fmt->private_data;
+ unsigned int l2_index;
+ u64 l1_index, l2_offset, *l2_slice;
+ int c;
+ unsigned int offset_in_cluster;
+ u64 bytes_available, bytes_needed, nb_clusters;
+ enum loop_file_fmt_qcow_cluster_type type;
+ int ret;
+
+ offset_in_cluster = loop_file_fmt_qcow_offset_into_cluster(qcow_data,
+ offset);
+ bytes_needed = (u64) *bytes + offset_in_cluster;
+
+ /* compute how many bytes there are between the start of the cluster
+ * containing offset and the end of the l2 slice that contains
+ * the entry pointing to it */
+ bytes_available = ((u64)(
+ qcow_data->l2_slice_size -
+ loop_file_fmt_qcow_offset_to_l2_slice_index(qcow_data, offset))
+ ) << qcow_data->cluster_bits;
+
+ if (bytes_needed > bytes_available) {
+ bytes_needed = bytes_available;
+ }
+
+ *cluster_offset = 0;
+
+ /* seek to the l2 offset in the l1 table */
+ l1_index = loop_file_fmt_qcow_offset_to_l1_index(qcow_data, offset);
+ if (l1_index >= qcow_data->l1_size) {
+ type = QCOW_CLUSTER_UNALLOCATED;
+ goto out;
+ }
+
+ l2_offset = qcow_data->l1_table[l1_index] & L1E_OFFSET_MASK;
+ if (!l2_offset) {
+ type = QCOW_CLUSTER_UNALLOCATED;
+ goto out;
+ }
+
+ if (loop_file_fmt_qcow_offset_into_cluster(qcow_data, l2_offset)) {
+ printk_ratelimited(KERN_ERR "loop_file_fmt_qcow: L2 table "
+ "offset %llx unaligned (L1 index: %llx)", l2_offset,
+ l1_index);
+ return -EIO;
+ }
+
+ /* load the l2 slice in memory */
+ ret = __loop_file_fmt_qcow_cluster_l2_load(lo_fmt, offset, l2_offset,
+ &l2_slice);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* find the cluster offset for the given disk offset */
+ l2_index = loop_file_fmt_qcow_offset_to_l2_slice_index(qcow_data,
+ offset);
+ *cluster_offset = be64_to_cpu(l2_slice[l2_index]);
+
+ nb_clusters = loop_file_fmt_qcow_size_to_clusters(qcow_data,
+ bytes_needed);
+ /* bytes_needed <= *bytes + offset_in_cluster, both of which are
+ * unsigned integers; the minimum cluster size is 512, so this
+ * assertion is always true */
+ ASSERT(nb_clusters <= INT_MAX);
+
+ type = loop_file_fmt_qcow_get_cluster_type(lo_fmt, *cluster_offset);
+ if (qcow_data->qcow_version < 3 && (
+ type == QCOW_CLUSTER_ZERO_PLAIN ||
+ type == QCOW_CLUSTER_ZERO_ALLOC)) {
+ printk_ratelimited(KERN_ERR "loop_file_fmt_qcow: zero cluster "
+ "entry found in pre-v3 image (L2 offset: %llx, "
+ "L2 index: %x)\n", l2_offset, l2_index);
+ ret = -EIO;
+ goto fail;
+ }
+ switch (type) {
+ case QCOW_CLUSTER_COMPRESSED:
+ if (loop_file_fmt_qcow_has_data_file(lo_fmt)) {
+ printk_ratelimited(KERN_ERR "loop_file_fmt_qcow: "
+ "compressed cluster entry found in image with "
+ "external data file (L2 offset: %llx, "
+ "L2 index: %x)", l2_offset, l2_index);
+ ret = -EIO;
+ goto fail;
+ }
+ /* Compressed clusters can only be processed one by one */
+ c = 1;
+ *cluster_offset &= L2E_COMPRESSED_OFFSET_SIZE_MASK;
+ break;
+ case QCOW_CLUSTER_ZERO_PLAIN:
+ case QCOW_CLUSTER_UNALLOCATED:
+ /* how many empty clusters ? */
+ c = __loop_file_fmt_qcow_cluster_count_contiguous_unallocated(
+ lo_fmt, nb_clusters, &l2_slice[l2_index], type);
+ *cluster_offset = 0;
+ break;
+ case QCOW_CLUSTER_ZERO_ALLOC:
+ case QCOW_CLUSTER_NORMAL:
+ /* how many allocated clusters ? */
+ c = __loop_file_fmt_qcow_cluster_count_contiguous(lo_fmt,
+ nb_clusters, qcow_data->cluster_size,
+ &l2_slice[l2_index], QCOW_OFLAG_ZERO);
+ *cluster_offset &= L2E_OFFSET_MASK;
+ if (loop_file_fmt_qcow_offset_into_cluster(qcow_data,
+ *cluster_offset)) {
+ printk_ratelimited(KERN_ERR "loop_file_fmt_qcow: "
+ "cluster allocation offset %llx unaligned "
+ "(L2 offset: %llx, L2 index: %x)\n",
+ *cluster_offset, l2_offset, l2_index);
+ ret = -EIO;
+ goto fail;
+ }
+ if (loop_file_fmt_qcow_has_data_file(lo_fmt) &&
+ *cluster_offset != offset - offset_in_cluster) {
+ printk_ratelimited(KERN_ERR "loop_file_fmt_qcow: "
+ "external data file host cluster offset %llx "
+ "does not match guest cluster offset: %llx, "
+ "L2 index: %x)", *cluster_offset,
+ offset - offset_in_cluster, l2_index);
+ ret = -EIO;
+ goto fail;
+ }
+ break;
+ default:
+ BUG();
+ }
+
+ loop_file_fmt_qcow_cache_put(lo_fmt, (void **) &l2_slice);
+
+ bytes_available = (s64) c * qcow_data->cluster_size;
+
+out:
+ if (bytes_available > bytes_needed) {
+ bytes_available = bytes_needed;
+ }
+
+ /* bytes_available <= bytes_needed <= *bytes + offset_in_cluster;
+ * subtracting offset_in_cluster will therefore definitely yield
+ * something not exceeding UINT_MAX */
+ ASSERT(bytes_available - offset_in_cluster <= UINT_MAX);
+ *bytes = bytes_available - offset_in_cluster;
+
+ return type;
+
+fail:
+ loop_file_fmt_qcow_cache_put(lo_fmt, (void **) &l2_slice);
+ return ret;
+}
diff --git a/drivers/block/loop/loop_file_fmt_qcow_cluster.h b/drivers/block/loop/loop_file_fmt_qcow_cluster.h
new file mode 100644
index 000000000000..d62e3318f6ce
--- /dev/null
+++ b/drivers/block/loop/loop_file_fmt_qcow_cluster.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * loop_file_fmt_qcow_cluster.h
+ *
+ * Ported QCOW2 implementation of the QEMU project (GPL-2.0):
+ * Cluster calculation and lookup for the QCOW2 format.
+ *
+ * The copyright (C) 2004-2006 of the original code is owned by Fabrice Bellard.
+ *
+ * Copyright (C) 2019 Manuel Bentele <development@manuel-bentele.de>
+ */
+
+#ifndef _LINUX_LOOP_FILE_FMT_QCOW_CLUSTER_H
+#define _LINUX_LOOP_FILE_FMT_QCOW_CLUSTER_H
+
+#include "loop_file_fmt.h"
+
+extern int loop_file_fmt_qcow_cluster_get_offset(struct loop_file_fmt *lo_fmt,
+ u64 offset,
+ unsigned int *bytes,
+ u64 *cluster_offset);
+
+#endif
diff --git a/drivers/block/loop/loop_file_fmt_qcow_main.c b/drivers/block/loop/loop_file_fmt_qcow_main.c
new file mode 100644
index 000000000000..b3d7b0bfc085
--- /dev/null
+++ b/drivers/block/loop/loop_file_fmt_qcow_main.c
@@ -0,0 +1,785 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * loop_file_fmt_qcow.c
+ *
+ * QCOW file format driver for the loop device module.
+ *
+ * Copyright (C) 2019 Manuel Bentele <development@manuel-bentele.de>
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/types.h>
+#include <linux/limits.h>
+#include <linux/blkdev.h>
+#include <linux/bio.h>
+#include <linux/bvec.h>
+#include <linux/uio.h>
+#include <linux/string.h>
+#include <linux/vmalloc.h>
+#include <linux/zlib.h>
+
+#include "loop_file_fmt.h"
+#include "loop_file_fmt_qcow_main.h"
+#include "loop_file_fmt_qcow_cache.h"
+#include "loop_file_fmt_qcow_cluster.h"
+
+static int __qcow_file_fmt_header_read(struct loop_file_fmt *lo_fmt,
+ struct loop_file_fmt_qcow_header *header)
+{
+ struct loop_device *lo = loop_file_fmt_get_lo(lo_fmt);
+ ssize_t len;
+ loff_t offset;
+ int ret = 0;
+
+ /* read QCOW header */
+ offset = 0;
+ len = kernel_read(lo->lo_backing_file, header, sizeof(*header),
+ &offset);
+ if (len < 0) {
+ printk(KERN_ERR "loop_file_fmt_qcow: could not read QCOW "
+ "header");
+ return len;
+ }
+
+ header->magic = be32_to_cpu(header->magic);
+ header->version = be32_to_cpu(header->version);
+ header->backing_file_offset = be64_to_cpu(header->backing_file_offset);
+ header->backing_file_size = be32_to_cpu(header->backing_file_size);
+ header->cluster_bits = be32_to_cpu(header->cluster_bits);
+ header->size = be64_to_cpu(header->size);
+ header->crypt_method = be32_to_cpu(header->crypt_method);
+ header->l1_size = be32_to_cpu(header->l1_size);
+ header->l1_table_offset = be64_to_cpu(header->l1_table_offset);
+ header->refcount_table_offset =
+ be64_to_cpu(header->refcount_table_offset);
+ header->refcount_table_clusters =
+ be32_to_cpu(header->refcount_table_clusters);
+ header->nb_snapshots = be32_to_cpu(header->nb_snapshots);
+ header->snapshots_offset = be64_to_cpu(header->snapshots_offset);
+
+ /* check QCOW file format and header version */
+ if (header->magic != QCOW_MAGIC) {
+ printk(KERN_ERR "loop_file_fmt_qcow: image is not in QCOW "
+ "format");
+ return -EINVAL;
+ }
+
+ if (header->version < 2 || header->version > 3) {
+ printk(KERN_ERR "loop_file_fmt_qcow: unsupported QCOW version "
+ "%d", header->version);
+ return -ENOTSUPP;
+ }
+
+ /* initialize version 3 header fields */
+ if (header->version == 2) {
+ header->incompatible_features = 0;
+ header->compatible_features = 0;
+ header->autoclear_features = 0;
+ header->refcount_order = 4;
+ header->header_length = 72;
+ } else {
+ header->incompatible_features =
+ be64_to_cpu(header->incompatible_features);
+ header->compatible_features =
+ be64_to_cpu(header->compatible_features);
+ header->autoclear_features =
+ be64_to_cpu(header->autoclear_features);
+ header->refcount_order = be32_to_cpu(header->refcount_order);
+ header->header_length = be32_to_cpu(header->header_length);
+
+ if (header->header_length < 104) {
+ printk(KERN_ERR "loop_file_fmt_qcow: QCOW header too "
+ "short");
+ return -EINVAL;
+ }
+ }
+
+ return ret;
+}
+
+#ifdef CONFIG_DEBUG_DRIVER
+static void __qcow_file_fmt_header_print(
+ struct loop_file_fmt_qcow_header *header)
+{
+ printk(KERN_DEBUG "loop_file_fmt_qcow: "
+ "header.magic=%d",
+ header->magic);
+ printk(KERN_DEBUG "loop_file_fmt_qcow: "
+ "header.version=%d",
+ header->version);
+ printk(KERN_DEBUG "loop_file_fmt_qcow: "
+ "header.backing_file_offset=%lld",
+ header->backing_file_offset);
+ printk(KERN_DEBUG "loop_file_fmt_qcow: "
+ "header.backing_file_size=%d",
+ header->backing_file_size);
+ printk(KERN_DEBUG "loop_file_fmt_qcow: "
+ "header.cluster_bits=%d",
+ header->cluster_bits);
+ printk(KERN_DEBUG "loop_file_fmt_qcow: "
+ "header.size=%lld",
+ header->size);
+ printk(KERN_DEBUG "loop_file_fmt_qcow: "
+ "header.crypt_method=%d",
+ header->crypt_method);
+ printk(KERN_DEBUG "loop_file_fmt_qcow: "
+ "header.l1_size=%d",
+ header->l1_size);
+ printk(KERN_DEBUG "loop_file_fmt_qcow: "
+ "header.l1_table_offset=%lld",
+ header->l1_table_offset);
+ printk(KERN_DEBUG "loop_file_fmt_qcow: "
+ "header.refcount_table_offset=%lld",
+ header->refcount_table_offset);
+ printk(KERN_DEBUG "loop_file_fmt_qcow: "
+ "header.refcount_table_clusters=%d",
+ header->refcount_table_clusters);
+ printk(KERN_DEBUG "loop_file_fmt_qcow: "
+ "header.nb_snapshots=%d",
+ header->nb_snapshots);
+ printk(KERN_DEBUG "loop_file_fmt_qcow: "
+ "header.snapshots_offset=%lld",
+ header->snapshots_offset);
+
+ if (header->version == 3) {
+ printk(KERN_DEBUG "loop_file_fmt_qcow: "
+ "header.incompatible_features=%lld",
+ header->incompatible_features);
+ printk(KERN_DEBUG "loop_file_fmt_qcow: "
+ "header.compatible_features=%lld",
+ header->compatible_features);
+ printk(KERN_DEBUG "loop_file_fmt_qcow: "
+ "header.autoclear_features=%lld",
+ header->autoclear_features);
+ printk(KERN_DEBUG "loop_file_fmt_qcow: "
+ "header.refcount_order=%d",
+ header->refcount_order);
+ printk(KERN_DEBUG "loop_file_fmt_qcow: "
+ "header.header_length=%d",
+ header->header_length);
+ }
+}
+#endif
+
+static int __qcow_file_fmt_validate_table(struct loop_file_fmt *lo_fmt,
+ u64 offset, u64 entries, size_t entry_len, s64 max_size_bytes,
+ const char *table_name)
+{
+ struct loop_file_fmt_qcow_data *qcow_data = lo_fmt->private_data;
+
+ if (entries > max_size_bytes / entry_len) {
+ printk(KERN_INFO "loop_file_fmt_qcow: %s too large",
+ table_name);
+ return -EFBIG;
+ }
+
+ /* Use signed S64_MAX as the maximum even for u64 header fields,
+ * because values will be passed to qemu functions taking s64. */
+ if ((S64_MAX - entries * entry_len < offset) || (
+ loop_file_fmt_qcow_offset_into_cluster(qcow_data, offset) != 0)
+ ) {
+ printk(KERN_INFO "loop_file_fmt_qcow: %s offset invalid",
+ table_name);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static inline loff_t __qcow_file_fmt_rq_get_pos(struct loop_file_fmt *lo_fmt,
+ struct request *rq)
+{
+ struct loop_device *lo = loop_file_fmt_get_lo(lo_fmt);
+ return ((loff_t) blk_rq_pos(rq) << 9) + lo->lo_offset;
+}
+
+static inline void __qcow_file_fmt_iov_iter_bvec(struct iov_iter *i,
+ unsigned int direction,
+ const struct bio_vec *bvec,
+ unsigned long nr_segs,
+ size_t count)
+{
+ iov_iter_bvec(i, direction, bvec, nr_segs, count);
+ i->type |= ITER_BVEC_FLAG_NO_REF;
+}
+
+static int __qcow_file_fmt_compression_init(struct loop_file_fmt *lo_fmt)
+{
+ struct loop_file_fmt_qcow_data *qcow_data = lo_fmt->private_data;
+ int ret = 0;
+
+ qcow_data->strm = kmalloc(sizeof(*qcow_data->strm), GFP_KERNEL);
+ if (!qcow_data->strm) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ qcow_data->strm->workspace = vmalloc(zlib_inflate_workspacesize());
+ if (!qcow_data->strm->workspace) {
+ ret = -ENOMEM;
+ goto out_free_strm;
+ }
+
+ return ret;
+
+out_free_strm:
+ kfree(qcow_data->strm);
+out:
+ return ret;
+}
+
+static void __qcow_file_fmt_compression_exit(struct loop_file_fmt *lo_fmt)
+{
+ struct loop_file_fmt_qcow_data *qcow_data = lo_fmt->private_data;
+
+ if (qcow_data->strm->workspace)
+ vfree(qcow_data->strm->workspace);
+
+ if (qcow_data->strm)
+ kfree(qcow_data->strm);
+}
+
+static int qcow_file_fmt_init(struct loop_file_fmt *lo_fmt)
+{
+ struct loop_file_fmt_qcow_data *qcow_data;
+ struct loop_device *lo = loop_file_fmt_get_lo(lo_fmt);
+ struct loop_file_fmt_qcow_header header;
+ u64 l1_vm_state_index;
+ u64 l2_cache_size;
+ u64 l2_cache_entry_size;
+ ssize_t len;
+ unsigned int i;
+ int ret = 0;
+
+ /* allocate memory for saving QCOW file format data */
+ qcow_data = kzalloc(sizeof(*qcow_data), GFP_KERNEL);
+ if (!qcow_data)
+ return -ENOMEM;
+
+ lo_fmt->private_data = qcow_data;
+
+ /* read the QCOW file header */
+ ret = __qcow_file_fmt_header_read(lo_fmt, &header);
+ if (ret)
+ goto free_qcow_data;
+
+#ifdef CONFIG_DEBUG_DRIVER
+ __qcow_file_fmt_header_print(&header);
+#endif
+
+ qcow_data->qcow_version = header.version;
+
+ /* Initialise cluster size */
+ if (header.cluster_bits < QCOW_MIN_CLUSTER_BITS
+ || header.cluster_bits > QCOW_MAX_CLUSTER_BITS) {
+ printk(KERN_ERR "loop_file_fmt_qcow: unsupported cluster "
+ "size: 2^%d", header.cluster_bits);
+ ret = -EINVAL;
+ goto free_qcow_data;
+ }
+
+ qcow_data->cluster_bits = header.cluster_bits;
+ qcow_data->cluster_size = 1 << qcow_data->cluster_bits;
+ qcow_data->cluster_sectors = 1 <<
+ (qcow_data->cluster_bits - SECTOR_SHIFT);
+
+ if (header.header_length > qcow_data->cluster_size) {
+ printk(KERN_ERR "loop_file_fmt_qcow: QCOW header exceeds "
+ "cluster size");
+ ret = -EINVAL;
+ goto free_qcow_data;
+ }
+
+ if (header.backing_file_offset > qcow_data->cluster_size) {
+ printk(KERN_ERR "loop_file_fmt_qcow: invalid backing file "
+ "offset");
+ ret = -EINVAL;
+ goto free_qcow_data;
+ }
+
+ if (header.backing_file_offset) {
+ printk(KERN_ERR "loop_file_fmt_qcow: backing file support not "
+ "available");
+ ret = -ENOTSUPP;
+ goto free_qcow_data;
+ }
+
+ /* handle feature bits */
+ qcow_data->incompatible_features = header.incompatible_features;
+ qcow_data->compatible_features = header.compatible_features;
+ qcow_data->autoclear_features = header.autoclear_features;
+
+ if (qcow_data->incompatible_features & QCOW_INCOMPAT_DIRTY) {
+ printk(KERN_ERR "loop_file_fmt_qcow: image contains "
+ "inconsistent refcounts");
+ ret = -EACCES;
+ goto free_qcow_data;
+ }
+
+ if (qcow_data->incompatible_features & QCOW_INCOMPAT_CORRUPT) {
+ printk(KERN_ERR "loop_file_fmt_qcow: image is corrupt; cannot "
+ "be opened read/write");
+ ret = -EACCES;
+ goto free_qcow_data;
+ }
+
+ if (qcow_data->incompatible_features & QCOW_INCOMPAT_DATA_FILE) {
+ printk(KERN_ERR "loop_file_fmt_qcow: clusters in the external "
+ "data file are not refcounted");
+ ret = -EACCES;
+ goto free_qcow_data;
+ }
+
+ /* Check support for various header values */
+ if (header.refcount_order > 6) {
+ printk(KERN_ERR "loop_file_fmt_qcow: reference count entry "
+ "width too large; may not exceed 64 bits");
+ ret = -EINVAL;
+ goto free_qcow_data;
+ }
+ qcow_data->refcount_order = header.refcount_order;
+ qcow_data->refcount_bits = 1 << qcow_data->refcount_order;
+ qcow_data->refcount_max = U64_C(1) << (qcow_data->refcount_bits - 1);
+ qcow_data->refcount_max += qcow_data->refcount_max - 1;
+
+ qcow_data->crypt_method_header = header.crypt_method;
+ if (qcow_data->crypt_method_header) {
+ printk(KERN_ERR "loop_file_fmt_qcow: encryption support not "
+ "available");
+ ret = -ENOTSUPP;
+ goto free_qcow_data;
+ }
+
+ /* L2 is always one cluster */
+ qcow_data->l2_bits = qcow_data->cluster_bits - 3;
+ qcow_data->l2_size = 1 << qcow_data->l2_bits;
+ /* 2^(qcow_data->refcount_order - 3) is the refcount width in bytes */
+ qcow_data->refcount_block_bits = qcow_data->cluster_bits -
+ (qcow_data->refcount_order - 3);
+ qcow_data->refcount_block_size = 1 << qcow_data->refcount_block_bits;
+ qcow_data->size = header.size;
+ qcow_data->csize_shift = (62 - (qcow_data->cluster_bits - 8));
+ qcow_data->csize_mask = (1 << (qcow_data->cluster_bits - 8)) - 1;
+ qcow_data->cluster_offset_mask = (1LL << qcow_data->csize_shift) - 1;
+
+ qcow_data->refcount_table_offset = header.refcount_table_offset;
+ qcow_data->refcount_table_size = header.refcount_table_clusters <<
+ (qcow_data->cluster_bits - 3);
+
+ if (header.refcount_table_clusters == 0) {
+ printk(KERN_ERR "loop_file_fmt_qcow: image does not contain a "
+ "reference count table");
+ ret = -EINVAL;
+ goto free_qcow_data;
+ }
+
+ ret = __qcow_file_fmt_validate_table(lo_fmt,
+ qcow_data->refcount_table_offset,
+ header.refcount_table_clusters, qcow_data->cluster_size,
+ QCOW_MAX_REFTABLE_SIZE, "Reference count table");
+ if (ret < 0) {
+ goto free_qcow_data;
+ }
+
+ /* The total size in bytes of the snapshot table is checked in
+ * qcow2_read_snapshots() because the size of each snapshot is
+ * variable and we don't know it yet.
+ * Here we only check the offset and number of snapshots. */
+ ret = __qcow_file_fmt_validate_table(lo_fmt, header.snapshots_offset,
+ header.nb_snapshots,
+ sizeof(struct loop_file_fmt_qcow_snapshot_header),
+ sizeof(struct loop_file_fmt_qcow_snapshot_header) *
+ QCOW_MAX_SNAPSHOTS, "Snapshot table");
+ if (ret < 0) {
+ goto free_qcow_data;
+ }
+
+ /* read the level 1 table */
+ ret = __qcow_file_fmt_validate_table(lo_fmt, header.l1_table_offset,
+ header.l1_size, sizeof(u64), QCOW_MAX_L1_SIZE,
+ "Active L1 table");
+ if (ret < 0) {
+ goto free_qcow_data;
+ }
+ qcow_data->l1_size = header.l1_size;
+ qcow_data->l1_table_offset = header.l1_table_offset;
+
+ l1_vm_state_index = loop_file_fmt_qcow_size_to_l1(qcow_data,
+ header.size);
+ if (l1_vm_state_index > INT_MAX) {
+ printk(KERN_ERR "loop_file_fmt_qcow: image is too big");
+ ret = -EFBIG;
+ goto free_qcow_data;
+ }
+ qcow_data->l1_vm_state_index = l1_vm_state_index;
+
+ /* the L1 table must contain at least enough entries to put header.size
+ * bytes */
+ if (qcow_data->l1_size < qcow_data->l1_vm_state_index) {
+ printk(KERN_ERR "loop_file_fmt_qcow: L1 table is too small");
+ ret = -EINVAL;
+ goto free_qcow_data;
+ }
+
+ if (qcow_data->l1_size > 0) {
+ qcow_data->l1_table = vzalloc(round_up(qcow_data->l1_size *
+ sizeof(u64), 512));
+ if (qcow_data->l1_table == NULL) {
+ printk(KERN_ERR "loop_file_fmt_qcow: could not "
+ "allocate L1 table");
+ ret = -ENOMEM;
+ goto free_qcow_data;
+ }
+ len = kernel_read(lo->lo_backing_file, qcow_data->l1_table,
+ qcow_data->l1_size * sizeof(u64),
+ &qcow_data->l1_table_offset);
+ if (len < 0) {
+ printk(KERN_ERR "loop_file_fmt_qcow: could not read L1 "
+ "table");
+ ret = len;
+ goto free_l1_table;
+ }
+ for (i = 0; i < qcow_data->l1_size; i++) {
+ qcow_data->l1_table[i] =
+ be64_to_cpu(qcow_data->l1_table[i]);
+ }
+ }
+
+ /* Internal snapshots */
+ qcow_data->snapshots_offset = header.snapshots_offset;
+ qcow_data->nb_snapshots = header.nb_snapshots;
+
+ if (qcow_data->nb_snapshots > 0) {
+ printk(KERN_ERR "loop_file_fmt_qcow: snapshots support not "
+ "available");
+ ret = -ENOTSUPP;
+ goto free_l1_table;
+ }
+
+
+ /* create cache for L2 */
+ l2_cache_size = qcow_data->size / (qcow_data->cluster_size / 8);
+ l2_cache_entry_size = qcow_data->cluster_size;
+ qcow_data->l2_slice_size = l2_cache_entry_size / sizeof(u64);
+ qcow_data->l2_table_cache = loop_file_fmt_qcow_cache_create(lo_fmt,
+ l2_cache_size, l2_cache_entry_size);
+ if (!qcow_data->l2_table_cache) {
+ ret = -ENOMEM;
+ goto free_l1_table;
+ }
+
+ /* initialize compression support */
+ ret = __qcow_file_fmt_compression_init(lo_fmt);
+ if (ret < 0)
+ goto free_l2_cache;
+
+ return ret;
+
+free_l2_cache:
+ loop_file_fmt_qcow_cache_destroy(lo_fmt);
+free_l1_table:
+ vfree(qcow_data->l1_table);
+free_qcow_data:
+ kfree(qcow_data);
+ lo_fmt->private_data = NULL;
+ return ret;
+}
+
+static void qcow_file_fmt_exit(struct loop_file_fmt *lo_fmt)
+{
+ struct loop_file_fmt_qcow_data *qcow_data = lo_fmt->private_data;
+
+ __qcow_file_fmt_compression_exit(lo_fmt);
+
+ if (qcow_data->l1_table) {
+ vfree(qcow_data->l1_table);
+ }
+
+ if (qcow_data->l2_table_cache) {
+ loop_file_fmt_qcow_cache_destroy(lo_fmt);
+ }
+
+ if (qcow_data) {
+ kfree(qcow_data);
+ lo_fmt->private_data = NULL;
+ }
+}
+
+static ssize_t __qcow_file_fmt_buffer_decompress(struct loop_file_fmt *lo_fmt,
+ void *dest,
+ size_t dest_size,
+ const void *src,
+ size_t src_size)
+{
+ struct loop_file_fmt_qcow_data *qcow_data = lo_fmt->private_data;
+ int ret = 0;
+
+ qcow_data->strm->avail_in = src_size;
+ qcow_data->strm->next_in = (void *) src;
+ qcow_data->strm->avail_out = dest_size;
+ qcow_data->strm->next_out = dest;
+
+ ret = zlib_inflateInit2(qcow_data->strm, -12);
+ if (ret != Z_OK) {
+ return -1;
+ }
+
+ ret = zlib_inflate(qcow_data->strm, Z_FINISH);
+ if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR)
+ || qcow_data->strm->avail_out != 0) {
+ /* We approve Z_BUF_ERROR because we need @dest buffer to be
+ * filled, but @src buffer may be processed partly (because in
+ * qcow2 we know size of compressed data with precision of one
+ * sector) */
+ ret = -1;
+ }
+
+ zlib_inflateEnd(qcow_data->strm);
+
+ return ret;
+}
+
+static int __qcow_file_fmt_read_compressed(struct loop_file_fmt *lo_fmt,
+ struct bio_vec *bvec,
+ u64 file_cluster_offset,
+ u64 offset,
+ u64 bytes)
+{
+ struct loop_file_fmt_qcow_data *qcow_data = lo_fmt->private_data;
+ struct loop_device *lo = loop_file_fmt_get_lo(lo_fmt);
+ int ret = 0, csize, nb_csectors;
+ u64 coffset;
+ u8 *in_buf, *out_buf;
+ ssize_t len;
+ int offset_in_cluster = loop_file_fmt_qcow_offset_into_cluster(
+ qcow_data, offset);
+ void *data;
+ unsigned long irq_flags;
+
+ coffset = file_cluster_offset & qcow_data->cluster_offset_mask;
+ nb_csectors = ((file_cluster_offset >> qcow_data->csize_shift) &
+ qcow_data->csize_mask) + 1;
+ csize = nb_csectors * 512 - (coffset & 511);
+
+ in_buf = vmalloc(csize);
+ if (!in_buf) {
+ return -ENOMEM;
+ }
+
+ out_buf = vmalloc(qcow_data->cluster_size);
+ if (!out_buf) {
+ ret = -ENOMEM;
+ goto out_free_in_buf;
+ }
+
+ len = kernel_read(lo->lo_backing_file, in_buf, csize, &coffset);
+ if (len < 0) {
+ ret = len;
+ goto out_free_out_buf;
+ }
+
+ if (__qcow_file_fmt_buffer_decompress(lo_fmt, out_buf,
+ qcow_data->cluster_size, in_buf, csize) < 0) {
+ ret = -EIO;
+ goto out_free_out_buf;
+ }
+
+ ASSERT(bytes <= bvec->bv_len);
+ data = bvec_kmap_irq(bvec, &irq_flags);
+ memcpy(data, out_buf + offset_in_cluster, bytes);
+ flush_dcache_page(bvec->bv_page);
+ bvec_kunmap_irq(data, &irq_flags);
+
+out_free_out_buf:
+ vfree(out_buf);
+out_free_in_buf:
+ vfree(in_buf);
+
+ return ret;
+}
+
+static int __qcow_file_fmt_read_bvec(struct loop_file_fmt *lo_fmt,
+ struct bio_vec *bvec,
+ loff_t pos)
+{
+ struct loop_file_fmt_qcow_data *qcow_data = lo_fmt->private_data;
+ struct loop_device *lo = loop_file_fmt_get_lo(lo_fmt);
+ int offset_in_cluster;
+ int ret;
+ unsigned int cur_bytes; /* number of bytes in current iteration */
+ u64 offset;
+ u64 bytes;
+ u64 cluster_offset = 0;
+ u64 bytes_done = 0;
+ void *data;
+ unsigned long irq_flags;
+ ssize_t len;
+ loff_t pos_read;
+
+ offset = pos;
+ bytes = bvec->bv_len;
+
+ while (bytes > 0) {
+
+ /* prepare next request */
+ cur_bytes = bytes;
+
+ ret = loop_file_fmt_qcow_cluster_get_offset(lo_fmt, offset,
+ &cur_bytes, &cluster_offset);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ offset_in_cluster = loop_file_fmt_qcow_offset_into_cluster(
+ qcow_data, offset);
+
+ switch (ret) {
+ case QCOW_CLUSTER_UNALLOCATED:
+ case QCOW_CLUSTER_ZERO_PLAIN:
+ case QCOW_CLUSTER_ZERO_ALLOC:
+ data = bvec_kmap_irq(bvec, &irq_flags) + bytes_done;
+ memset(data, 0, cur_bytes);
+ flush_dcache_page(bvec->bv_page);
+ bvec_kunmap_irq(data, &irq_flags);
+ break;
+
+ case QCOW_CLUSTER_COMPRESSED:
+ ret = __qcow_file_fmt_read_compressed(lo_fmt, bvec,
+ cluster_offset, offset, cur_bytes);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ break;
+
+ case QCOW_CLUSTER_NORMAL:
+ if ((cluster_offset & 511) != 0) {
+ ret = -EIO;
+ goto fail;
+ }
+
+ pos_read = cluster_offset + offset_in_cluster;
+
+ data = bvec_kmap_irq(bvec, &irq_flags) + bytes_done;
+ len = kernel_read(lo->lo_backing_file, data, cur_bytes,
+ &pos_read);
+ flush_dcache_page(bvec->bv_page);
+ bvec_kunmap_irq(data, &irq_flags);
+
+ if (len < 0)
+ return len;
+
+ break;
+
+ default:
+ ret = -EIO;
+ goto fail;
+ }
+
+ bytes -= cur_bytes;
+ offset += cur_bytes;
+ bytes_done += cur_bytes;
+ }
+
+ ret = 0;
+
+fail:
+ return ret;
+}
+
+static int qcow_file_fmt_read(struct loop_file_fmt *lo_fmt,
+ struct request *rq)
+{
+ struct bio_vec bvec;
+ struct req_iterator iter;
+ loff_t pos;
+ int ret = 0;
+
+ u64 cluster_offset;
+ unsigned int cur_bytes = blk_rq_bytes(rq);
+
+ pos = __qcow_file_fmt_rq_get_pos(lo_fmt, rq);
+
+ ret = loop_file_fmt_qcow_cluster_get_offset(lo_fmt, pos, &cur_bytes,
+ &cluster_offset);
+
+ ret = 0;
+
+ rq_for_each_segment(bvec, rq, iter) {
+ ret = __qcow_file_fmt_read_bvec(lo_fmt, &bvec, pos);
+ if (ret)
+ return ret;
+
+ cond_resched();
+ }
+
+ return ret;
+}
+
+static loff_t qcow_file_fmt_sector_size(struct loop_file_fmt *lo_fmt)
+{
+ struct loop_file_fmt_qcow_data *qcow_data = lo_fmt->private_data;
+ struct loop_device *lo = loop_file_fmt_get_lo(lo_fmt);
+ loff_t loopsize;
+
+ if (qcow_data->size > 0)
+ loopsize = qcow_data->size;
+ else
+ return 0;
+
+ if (lo->lo_offset > 0)
+ loopsize -= lo->lo_offset;
+
+ if (lo->lo_sizelimit > 0 && lo->lo_sizelimit < loopsize)
+ loopsize = lo->lo_sizelimit;
+ printk(KERN_INFO "loop_file_fmt_qcow: sector_size=%lld", loopsize);
+ /*
+ * Unfortunately, if we want to do I/O on the device,
+ * the number of 512-byte sectors has to fit into a sector_t.
+ */
+ return loopsize >> 9;
+}
+
+static struct loop_file_fmt_ops qcow_file_fmt_ops = {
+ .init = qcow_file_fmt_init,
+ .exit = qcow_file_fmt_exit,
+ .read = qcow_file_fmt_read,
+ .write = NULL,
+ .read_aio = NULL,
+ .write_aio = NULL,
+ .discard = NULL,
+ .flush = NULL,
+ .sector_size = qcow_file_fmt_sector_size
+};
+
+static struct loop_file_fmt_driver qcow_file_fmt_driver = {
+ .name = "QCOW",
+ .file_fmt_type = LO_FILE_FMT_QCOW,
+ .ops = &qcow_file_fmt_ops,
+ .owner = THIS_MODULE
+};
+
+static int __init loop_file_fmt_qcow_init(void)
+{
+ printk(KERN_INFO "loop_file_fmt_qcow: init loop device QCOW file "
+ "format driver");
+ return loop_file_fmt_register_driver(&qcow_file_fmt_driver);
+}
+
+static void __exit loop_file_fmt_qcow_exit(void)
+{
+ printk(KERN_INFO "loop_file_fmt_qcow: exit loop device QCOW file "
+ "format driver");
+ loop_file_fmt_unregister_driver(&qcow_file_fmt_driver);
+}
+
+module_init(loop_file_fmt_qcow_init);
+module_exit(loop_file_fmt_qcow_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Manuel Bentele <development@manuel-bentele.de>");
+MODULE_DESCRIPTION("Loop device QCOW file format driver");
+MODULE_SOFTDEP("pre: loop");
diff --git a/drivers/block/loop/loop_file_fmt_qcow_main.h b/drivers/block/loop/loop_file_fmt_qcow_main.h
new file mode 100644
index 000000000000..c04aa4547799
--- /dev/null
+++ b/drivers/block/loop/loop_file_fmt_qcow_main.h
@@ -0,0 +1,388 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * loop_file_fmt_qcow.h
+ *
+ * QCOW file format driver for the loop device module.
+ *
+ * Ported QCOW2 implementation of the QEMU project (GPL-2.0):
+ * Declarations for the QCOW2 file format.
+ *
+ * The copyright (C) 2004-2006 of the original code is owned by Fabrice Bellard.
+ *
+ * Copyright (C) 2019 Manuel Bentele <development@manuel-bentele.de>
+ */
+
+#ifndef _LINUX_LOOP_FILE_FMT_QCOW_H
+#define _LINUX_LOOP_FILE_FMT_QCOW_H
+
+#include <linux/list.h>
+#include <linux/types.h>
+#include <linux/zlib.h>
+
+#include "loop_file_fmt.h"
+
+#ifdef CONFIG_DEBUG_DRIVER
+#define ASSERT(x) \
+do { \
+ if (!(x)) { \
+ printk(KERN_EMERG "assertion failed %s: %d: %s\n", \
+ __FILE__, __LINE__, #x); \
+ BUG(); \
+ } \
+} while (0)
+#else
+#define ASSERT(x) do { } while (0)
+#endif
+
+#define KiB (1024)
+#define MiB (1024 * 1024)
+
+#define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb)
+
+#define QCOW_CRYPT_NONE 0
+#define QCOW_CRYPT_AES 1
+#define QCOW_CRYPT_LUKS 2
+
+#define QCOW_MAX_CRYPT_CLUSTERS 32
+#define QCOW_MAX_SNAPSHOTS 65536
+
+/* Field widths in QCOW mean normal cluster offsets cannot reach
+ * 64PB; depending on cluster size, compressed clusters can have a
+ * smaller limit (64PB for up to 16k clusters, then ramps down to
+ * 512TB for 2M clusters). */
+#define QCOW_MAX_CLUSTER_OFFSET ((1ULL << 56) - 1)
+
+/* 8 MB refcount table is enough for 2 PB images at 64k cluster size
+ * (128 GB for 512 byte clusters, 2 EB for 2 MB clusters) */
+#define QCOW_MAX_REFTABLE_SIZE (8 * MiB)
+
+/* 32 MB L1 table is enough for 2 PB images at 64k cluster size
+ * (128 GB for 512 byte clusters, 2 EB for 2 MB clusters) */
+#define QCOW_MAX_L1_SIZE (32 * MiB)
+
+/* Allow for an average of 1k per snapshot table entry, should be plenty of
+ * space for snapshot names and IDs */
+#define QCOW_MAX_SNAPSHOTS_SIZE (1024 * QCOW_MAX_SNAPSHOTS)
+
+/* Bitmap header extension constraints */
+#define QCOW_MAX_BITMAPS 65535
+#define QCOW_MAX_BITMAP_DIRECTORY_SIZE (1024 * QCOW_MAX_BITMAPS)
+
+/* indicate that the refcount of the referenced cluster is exactly one. */
+#define QCOW_OFLAG_COPIED (1ULL << 63)
+/* indicate that the cluster is compressed (they never have the copied flag) */
+#define QCOW_OFLAG_COMPRESSED (1ULL << 62)
+/* The cluster reads as all zeros */
+#define QCOW_OFLAG_ZERO (1ULL << 0)
+
+#define QCOW_MIN_CLUSTER_BITS 9
+#define QCOW_MAX_CLUSTER_BITS 21
+
+/* Must be at least 2 to cover COW */
+#define QCOW_MIN_L2_CACHE_SIZE 2 /* cache entries */
+
+/* Must be at least 4 to cover all cases of refcount table growth */
+#define QCOW_MIN_REFCOUNT_CACHE_SIZE 4 /* clusters */
+
+#define QCOW_DEFAULT_L2_CACHE_MAX_SIZE (32 * MiB)
+#define QCOW_DEFAULT_CACHE_CLEAN_INTERVAL 600 /* seconds */
+
+#define QCOW_DEFAULT_CLUSTER_SIZE 65536
+
+struct loop_file_fmt_qcow_header {
+ u32 magic;
+ u32 version;
+ u64 backing_file_offset;
+ u32 backing_file_size;
+ u32 cluster_bits;
+ u64 size; /* in bytes */
+ u32 crypt_method;
+ u32 l1_size;
+ u64 l1_table_offset;
+ u64 refcount_table_offset;
+ u32 refcount_table_clusters;
+ u32 nb_snapshots;
+ u64 snapshots_offset;
+
+ /* The following fields are only valid for version >= 3 */
+ u64 incompatible_features;
+ u64 compatible_features;
+ u64 autoclear_features;
+
+ u32 refcount_order;
+ u32 header_length;
+} __attribute__((packed));
+
+struct loop_file_fmt_qcow_snapshot_header {
+ /* header is 8 byte aligned */
+ u64 l1_table_offset;
+
+ u32 l1_size;
+ u16 id_str_size;
+ u16 name_size;
+
+ u32 date_sec;
+ u32 date_nsec;
+
+ u64 vm_clock_nsec;
+
+ u32 vm_state_size;
+ /* for extension */
+ u32 extra_data_size;
+ /* extra data follows */
+ /* id_str follows */
+ /* name follows */
+} __attribute__((packed));
+
+enum {
+ QCOW_FEAT_TYPE_INCOMPATIBLE = 0,
+ QCOW_FEAT_TYPE_COMPATIBLE = 1,
+ QCOW_FEAT_TYPE_AUTOCLEAR = 2,
+};
+
+/* incompatible feature bits */
+enum {
+ QCOW_INCOMPAT_DIRTY_BITNR = 0,
+ QCOW_INCOMPAT_CORRUPT_BITNR = 1,
+ QCOW_INCOMPAT_DATA_FILE_BITNR = 2,
+ QCOW_INCOMPAT_DIRTY = 1 << QCOW_INCOMPAT_DIRTY_BITNR,
+ QCOW_INCOMPAT_CORRUPT = 1 << QCOW_INCOMPAT_CORRUPT_BITNR,
+ QCOW_INCOMPAT_DATA_FILE = 1 << QCOW_INCOMPAT_DATA_FILE_BITNR,
+
+ QCOW_INCOMPAT_MASK = QCOW_INCOMPAT_DIRTY
+ | QCOW_INCOMPAT_CORRUPT
+ | QCOW_INCOMPAT_DATA_FILE,
+};
+
+/* compatible feature bits */
+enum {
+ QCOW_COMPAT_LAZY_REFCOUNTS_BITNR = 0,
+ QCOW_COMPAT_LAZY_REFCOUNTS = 1 << QCOW_COMPAT_LAZY_REFCOUNTS_BITNR,
+
+ QCOW_COMPAT_FEAT_MASK = QCOW_COMPAT_LAZY_REFCOUNTS,
+};
+
+/* autoclear feature bits */
+enum {
+ QCOW_AUTOCLEAR_BITMAPS_BITNR = 0,
+ QCOW_AUTOCLEAR_DATA_FILE_RAW_BITNR = 1,
+ QCOW_AUTOCLEAR_BITMAPS = 1 << QCOW_AUTOCLEAR_BITMAPS_BITNR,
+ QCOW_AUTOCLEAR_DATA_FILE_RAW = 1 << QCOW_AUTOCLEAR_DATA_FILE_RAW_BITNR,
+
+ QCOW_AUTOCLEAR_MASK = QCOW_AUTOCLEAR_BITMAPS |
+ QCOW_AUTOCLEAR_DATA_FILE_RAW,
+};
+
+struct loop_file_fmt_qcow_data {
+ u64 size;
+ int cluster_bits;
+ int cluster_size;
+ int cluster_sectors;
+ int l2_slice_size;
+ int l2_bits;
+ int l2_size;
+ int l1_size;
+ int l1_vm_state_index;
+ int refcount_block_bits;
+ int refcount_block_size;
+ int csize_shift;
+ int csize_mask;
+ u64 cluster_offset_mask;
+ u64 l1_table_offset;
+ u64 *l1_table;
+
+ struct loop_file_fmt_qcow_cache *l2_table_cache;
+ struct loop_file_fmt_qcow_cache *refcount_block_cache;
+
+ u64 *refcount_table;
+ u64 refcount_table_offset;
+ u32 refcount_table_size;
+ u32 max_refcount_table_index; /* Last used entry in refcount_table */
+ u64 free_cluster_index;
+ u64 free_byte_offset;
+
+ u32 crypt_method_header;
+ u64 snapshots_offset;
+ int snapshots_size;
+ unsigned int nb_snapshots;
+
+ u32 nb_bitmaps;
+ u64 bitmap_directory_size;
+ u64 bitmap_directory_offset;
+
+ int qcow_version;
+ bool use_lazy_refcounts;
+ int refcount_order;
+ int refcount_bits;
+ u64 refcount_max;
+
+ u64 incompatible_features;
+ u64 compatible_features;
+ u64 autoclear_features;
+
+ struct z_stream_s *strm;
+};
+
+struct loop_file_fmt_qcow_cow_region {
+ /**
+ * Offset of the COW region in bytes from the start of the first
+ * cluster touched by the request.
+ */
+ unsigned offset;
+
+ /** Number of bytes to copy */
+ unsigned nb_bytes;
+};
+
+enum loop_file_fmt_qcow_cluster_type {
+ QCOW_CLUSTER_UNALLOCATED,
+ QCOW_CLUSTER_ZERO_PLAIN,
+ QCOW_CLUSTER_ZERO_ALLOC,
+ QCOW_CLUSTER_NORMAL,
+ QCOW_CLUSTER_COMPRESSED,
+};
+
+enum loop_file_fmt_qcow_metadata_overlap {
+ QCOW_OL_MAIN_HEADER_BITNR = 0,
+ QCOW_OL_ACTIVE_L1_BITNR = 1,
+ QCOW_OL_ACTIVE_L2_BITNR = 2,
+ QCOW_OL_REFCOUNT_TABLE_BITNR = 3,
+ QCOW_OL_REFCOUNT_BLOCK_BITNR = 4,
+ QCOW_OL_SNAPSHOT_TABLE_BITNR = 5,
+ QCOW_OL_INACTIVE_L1_BITNR = 6,
+ QCOW_OL_INACTIVE_L2_BITNR = 7,
+ QCOW_OL_BITMAP_DIRECTORY_BITNR = 8,
+
+ QCOW_OL_MAX_BITNR = 9,
+
+ QCOW_OL_NONE = 0,
+ QCOW_OL_MAIN_HEADER = (1 << QCOW_OL_MAIN_HEADER_BITNR),
+ QCOW_OL_ACTIVE_L1 = (1 << QCOW_OL_ACTIVE_L1_BITNR),
+ QCOW_OL_ACTIVE_L2 = (1 << QCOW_OL_ACTIVE_L2_BITNR),
+ QCOW_OL_REFCOUNT_TABLE = (1 << QCOW_OL_REFCOUNT_TABLE_BITNR),
+ QCOW_OL_REFCOUNT_BLOCK = (1 << QCOW_OL_REFCOUNT_BLOCK_BITNR),
+ QCOW_OL_SNAPSHOT_TABLE = (1 << QCOW_OL_SNAPSHOT_TABLE_BITNR),
+ QCOW_OL_INACTIVE_L1 = (1 << QCOW_OL_INACTIVE_L1_BITNR),
+ /* NOTE: Checking overlaps with inactive L2 tables will result in bdrv
+ * reads. */
+ QCOW_OL_INACTIVE_L2 = (1 << QCOW_OL_INACTIVE_L2_BITNR),
+ QCOW_OL_BITMAP_DIRECTORY = (1 << QCOW_OL_BITMAP_DIRECTORY_BITNR),
+};
+
+/* Perform all overlap checks which can be done in constant time */
+#define QCOW_OL_CONSTANT \
+ (QCOW_OL_MAIN_HEADER | QCOW_OL_ACTIVE_L1 | QCOW_OL_REFCOUNT_TABLE | \
+ QCOW_OL_SNAPSHOT_TABLE | QCOW_OL_BITMAP_DIRECTORY)
+
+/* Perform all overlap checks which don't require disk access */
+#define QCOW_OL_CACHED \
+ (QCOW_OL_CONSTANT | QCOW_OL_ACTIVE_L2 | QCOW_OL_REFCOUNT_BLOCK | \
+ QCOW_OL_INACTIVE_L1)
+
+/* Perform all overlap checks */
+#define QCOW_OL_ALL \
+ (QCOW_OL_CACHED | QCOW_OL_INACTIVE_L2)
+
+#define L1E_OFFSET_MASK 0x00fffffffffffe00ULL
+#define L2E_OFFSET_MASK 0x00fffffffffffe00ULL
+#define L2E_COMPRESSED_OFFSET_SIZE_MASK 0x3fffffffffffffffULL
+
+#define REFT_OFFSET_MASK 0xfffffffffffffe00ULL
+
+#define INV_OFFSET (-1ULL)
+
+static inline bool loop_file_fmt_qcow_has_data_file(
+ struct loop_file_fmt *lo_fmt)
+{
+ /* At the moment, there is no support for copy on write! */
+ return false;
+}
+
+static inline bool loop_file_fmt_qcow_data_file_is_raw(
+ struct loop_file_fmt *lo_fmt)
+{
+ struct loop_file_fmt_qcow_data *qcow_data = lo_fmt->private_data;
+ return !!(qcow_data->autoclear_features &
+ QCOW_AUTOCLEAR_DATA_FILE_RAW);
+}
+
+static inline s64 loop_file_fmt_qcow_start_of_cluster(
+ struct loop_file_fmt_qcow_data *qcow_data, s64 offset)
+{
+ return offset & ~(qcow_data->cluster_size - 1);
+}
+
+static inline s64 loop_file_fmt_qcow_offset_into_cluster(
+ struct loop_file_fmt_qcow_data *qcow_data, s64 offset)
+{
+ return offset & (qcow_data->cluster_size - 1);
+}
+
+static inline s64 loop_file_fmt_qcow_size_to_clusters(
+ struct loop_file_fmt_qcow_data *qcow_data, u64 size)
+{
+ return (size + (qcow_data->cluster_size - 1)) >>
+ qcow_data->cluster_bits;
+}
+
+static inline s64 loop_file_fmt_qcow_size_to_l1(
+ struct loop_file_fmt_qcow_data *qcow_data, s64 size)
+{
+ int shift = qcow_data->cluster_bits + qcow_data->l2_bits;
+ return (size + (1ULL << shift) - 1) >> shift;
+}
+
+static inline int loop_file_fmt_qcow_offset_to_l1_index(
+ struct loop_file_fmt_qcow_data *qcow_data, u64 offset)
+{
+ return offset >> (qcow_data->l2_bits + qcow_data->cluster_bits);
+}
+
+static inline int loop_file_fmt_qcow_offset_to_l2_index(
+ struct loop_file_fmt_qcow_data *qcow_data, s64 offset)
+{
+ return (offset >> qcow_data->cluster_bits) & (qcow_data->l2_size - 1);
+}
+
+static inline int loop_file_fmt_qcow_offset_to_l2_slice_index(
+ struct loop_file_fmt_qcow_data *qcow_data, s64 offset)
+{
+ return (offset >> qcow_data->cluster_bits) &
+ (qcow_data->l2_slice_size - 1);
+}
+
+static inline s64 loop_file_fmt_qcow_vm_state_offset(
+ struct loop_file_fmt_qcow_data *qcow_data)
+{
+ return (s64)qcow_data->l1_vm_state_index <<
+ (qcow_data->cluster_bits + qcow_data->l2_bits);
+}
+
+static inline enum loop_file_fmt_qcow_cluster_type
+loop_file_fmt_qcow_get_cluster_type(struct loop_file_fmt *lo_fmt, u64 l2_entry)
+{
+ if (l2_entry & QCOW_OFLAG_COMPRESSED) {
+ return QCOW_CLUSTER_COMPRESSED;
+ } else if (l2_entry & QCOW_OFLAG_ZERO) {
+ if (l2_entry & L2E_OFFSET_MASK) {
+ return QCOW_CLUSTER_ZERO_ALLOC;
+ }
+ return QCOW_CLUSTER_ZERO_PLAIN;
+ } else if (!(l2_entry & L2E_OFFSET_MASK)) {
+ /* Offset 0 generally means unallocated, but it is ambiguous
+ * with external data files because 0 is a valid offset there.
+ * However, all clusters in external data files always have
+ * refcount 1, so we can rely on QCOW_OFLAG_COPIED to
+ * disambiguate. */
+ if (loop_file_fmt_qcow_has_data_file(lo_fmt) &&
+ (l2_entry & QCOW_OFLAG_COPIED)) {
+ return QCOW_CLUSTER_NORMAL;
+ } else {
+ return QCOW_CLUSTER_UNALLOCATED;
+ }
+ } else {
+ return QCOW_CLUSTER_NORMAL;
+ }
+}
+
+#endif
diff --git a/drivers/block/loop/loop_file_fmt_raw.c b/drivers/block/loop/loop_file_fmt_raw.c
index 01e003be83c1..d3c8743de5d2 100644
--- a/drivers/block/loop/loop_file_fmt_raw.c
+++ b/drivers/block/loop/loop_file_fmt_raw.c
@@ -4,7 +4,7 @@
*
* RAW file format driver for the loop device module.
*
- * Copyright (C) 2019 Manuel Bentele
+ * Copyright (C) 2019 Manuel Bentele <development@manuel-bentele.de>
*/
#include <linux/module.h>
@@ -22,16 +22,6 @@
#include "loop_file_fmt.h"
-static int raw_file_fmt_init(struct loop_file_fmt *lo_fmt)
-{
- return 0;
-}
-
-static void raw_file_fmt_exit(struct loop_file_fmt *lo_fmt)
-{
- return;
-}
-
static inline loff_t __raw_file_fmt_rq_get_pos(struct loop_file_fmt *lo_fmt,
struct request *rq)
{
@@ -84,8 +74,6 @@ static int raw_file_fmt_read_transfer(struct loop_file_fmt *lo_fmt,
int ret = 0;
loff_t pos;
- printk(KERN_INFO "loop_file_fmt_raw: raw_file_fmt_read()");
-
page = alloc_page(GFP_NOIO);
if (unlikely(!page))
return -ENOMEM;
@@ -395,8 +383,7 @@ static int raw_file_fmt_discard(struct loop_file_fmt *lo_fmt,
return ret;
}
-static int raw_file_fmt_flush(struct loop_file_fmt *lo_fmt,
- struct request *rq)
+static int raw_file_fmt_flush(struct loop_file_fmt *lo_fmt)
{
struct loop_device *lo = loop_file_fmt_get_lo(lo_fmt);
struct file *file = lo->lo_backing_file;
@@ -407,15 +394,39 @@ static int raw_file_fmt_flush(struct loop_file_fmt *lo_fmt,
return ret;
}
+static loff_t raw_file_fmt_sector_size(struct loop_file_fmt *lo_fmt)
+{
+ struct loop_device *lo = loop_file_fmt_get_lo(lo_fmt);
+ loff_t loopsize;
+
+ /* Compute loopsize in bytes */
+ loopsize = i_size_read(lo->lo_backing_file->f_mapping->host);
+ if (lo->lo_offset > 0)
+ loopsize -= lo->lo_offset;
+ /* offset is beyond i_size, weird but possible */
+ if (loopsize < 0)
+ return 0;
+
+ if (lo->lo_sizelimit > 0 && lo->lo_sizelimit < loopsize)
+ loopsize = lo->lo_sizelimit;
+
+ /*
+ * Unfortunately, if we want to do I/O on the device,
+ * the number of 512-byte sectors has to fit into a sector_t.
+ */
+ return loopsize >> 9;
+}
+
static struct loop_file_fmt_ops raw_file_fmt_ops = {
- .init = raw_file_fmt_init,
- .exit = raw_file_fmt_exit,
+ .init = NULL,
+ .exit = NULL,
.read = raw_file_fmt_read,
.write = raw_file_fmt_write,
.read_aio = raw_file_fmt_read_aio,
.write_aio = raw_file_fmt_write_aio,
.discard = raw_file_fmt_discard,
- .flush = raw_file_fmt_flush
+ .flush = raw_file_fmt_flush,
+ .sector_size = raw_file_fmt_sector_size
};
static struct loop_file_fmt_driver raw_file_fmt_driver = {
diff --git a/drivers/block/loop/loop_main.c b/drivers/block/loop/loop_main.c
index 35ce8f7cd2a5..09f001f0690c 100644
--- a/drivers/block/loop/loop_main.c
+++ b/drivers/block/loop/loop_main.c
@@ -43,6 +43,9 @@
* operations write_begin is not available on the backing filesystem.
* Anton Altaparmakov, 16 Feb 2005
*
+ * Support for using file formats.
+ * Manuel Bentele <development@manuel-bentele.de>, 2019
+ *
* Still To Fix:
* - Advisory locking is ignored here.
* - Should use an own CAP_* category instead of CAP_SYS_ADMIN
@@ -207,7 +210,7 @@ static void __loop_update_dio(struct loop_device *lo, bool dio)
return;
/* flush dirty pages before changing direct IO */
- vfs_fsync(file, 0);
+ loop_file_fmt_flush(lo->lo_fmt);
/*
* The flag of LO_FLAGS_DIRECT_IO is handled similarly with
@@ -229,7 +232,7 @@ static void __loop_update_dio(struct loop_device *lo, bool dio)
static int
figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit)
{
- loff_t size = get_size(offset, sizelimit, lo->lo_backing_file);
+ loff_t size = loop_file_fmt_sector_size(lo->lo_fmt);
sector_t x = (sector_t)size;
struct block_device *bdev = lo->lo_device;
@@ -239,6 +242,7 @@ figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit)
lo->lo_offset = offset;
if (lo->lo_sizelimit != sizelimit)
lo->lo_sizelimit = sizelimit;
+
set_capacity(lo->lo_disk, x);
bd_set_size(bdev, (loff_t)get_capacity(bdev->bd_disk) << 9);
/* let user-space know about the new size */
@@ -287,7 +291,7 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq)
switch (req_op(rq)) {
case REQ_OP_FLUSH:
- return loop_file_fmt_flush(lo->lo_fmt, rq);
+ return loop_file_fmt_flush(lo->lo_fmt);
case REQ_OP_DISCARD:
case REQ_OP_WRITE_ZEROES:
return loop_file_fmt_discard(lo->lo_fmt, rq);
@@ -656,16 +660,6 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
!file->f_op->write_iter)
lo_flags |= LO_FLAGS_READ_ONLY;
- error = -EFBIG;
- size = get_loop_size(lo, file);
- if ((loff_t)(sector_t)size != size)
- goto out_unlock;
- error = loop_prepare_queue(lo);
- if (error)
- goto out_unlock;
-
- error = 0;
-
set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0);
lo->use_dio = false;
@@ -682,6 +676,20 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
blk_queue_write_cache(lo->lo_queue, true, false);
loop_update_dio(lo);
+
+ error = loop_file_fmt_init(lo->lo_fmt, LO_FILE_FMT_RAW);
+ if (error)
+ goto out_unlock;
+
+ size = loop_file_fmt_sector_size(lo->lo_fmt);
+
+ error = -EFBIG;
+ if ((loff_t)(sector_t)size != size)
+ goto out_unlock;
+ error = loop_prepare_queue(lo);
+ if (error)
+ goto out_unlock;
+
set_capacity(lo->lo_disk, size);
bd_set_size(bdev, size << 9);
loop_sysfs_init(lo);
@@ -773,11 +781,11 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
goto out_unlock;
}
- loop_file_fmt_exit(lo->lo_fmt);
-
/* freeze request queue during the transition */
blk_mq_freeze_queue(lo->lo_queue);
+ loop_file_fmt_exit(lo->lo_fmt);
+
spin_lock_irq(&lo->lo_lock);
lo->lo_backing_file = NULL;
spin_unlock_irq(&lo->lo_lock);
@@ -915,11 +923,6 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
err = -EPERM;
goto out_unlock;
}
- if (lo->lo_state == Lo_bound) {
- err = loop_file_fmt_init(lo->lo_fmt);
- if (err)
- goto out_unlock;
- }
if (lo->lo_state != Lo_bound) {
err = -ENXIO;
goto out_unlock;
@@ -961,12 +964,6 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
if (err)
goto out_unfreeze;
- if (lo->lo_fmt->file_fmt_type != info->lo_file_fmt_type) {
- err = loop_file_fmt_change(lo->lo_fmt, info->lo_file_fmt_type);
- if (err)
- goto out_unfreeze;
- }
-
if (lo->lo_offset != info->lo_offset ||
lo->lo_sizelimit != info->lo_sizelimit) {
/* kill_bdev should have truncated all the pages */
@@ -983,6 +980,21 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
}
}
+ if (lo->lo_fmt->file_fmt_type != info->lo_file_fmt_type) {
+ err = loop_file_fmt_change(lo->lo_fmt, info->lo_file_fmt_type);
+ if (err)
+ goto out_unfreeze;
+
+ /* After change of the file format, recalculate the capacity of
+ * the loop device. figure_loop_size() automatically calls the
+ * sector_size function of the corresponding loop file format
+ * driver to determine the new capacity. */
+ if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) {
+ err = -EFBIG;
+ goto out_unfreeze;
+ }
+ }
+
loop_config_discard(lo);
memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);