diff options
-rw-r--r-- | drivers/block/loop/Kconfig | 15 | ||||
-rw-r--r-- | drivers/block/loop/Makefile | 5 | ||||
-rw-r--r-- | drivers/block/loop/loop_file_fmt.c | 244 | ||||
-rw-r--r-- | drivers/block/loop/loop_file_fmt.h | 88 | ||||
-rw-r--r-- | drivers/block/loop/loop_file_fmt_qcow.c | 106 | ||||
-rw-r--r-- | drivers/block/loop/loop_file_fmt_raw.c | 447 | ||||
-rw-r--r-- | drivers/block/loop/loop_main.c | 361 | ||||
-rw-r--r-- | drivers/block/loop/loop_main.h | 5 | ||||
-rw-r--r-- | include/uapi/linux/loop.h | 1 |
9 files changed, 944 insertions, 328 deletions
diff --git a/drivers/block/loop/Kconfig b/drivers/block/loop/Kconfig index a595e6a9f20f..3aedc74e5f9d 100644 --- a/drivers/block/loop/Kconfig +++ b/drivers/block/loop/Kconfig @@ -75,4 +75,17 @@ config BLK_DEV_CRYPTOLOOP ext3 or Reiserfs. Please use the Device Mapper crypto module instead, which can be configured to be on-disk compatible with the cryptoloop device. -
\ No newline at end of file + +config BLK_DEV_LOOP_FILE_FMT_RAW + tristate "Loop device binary file format support" + depends on BLK_DEV_LOOP + ---help--- + Say Y or M here if you want to enable the binary (RAW) file format + support of the loop device module. + +config BLK_DEV_LOOP_FILE_FMT_QCOW + tristate "Loop device QCOW file format support" + depends on BLK_DEV_LOOP + ---help--- + Say Y or M here if you want to enable the QEMU's copy on write (QCOW) + file format support of the loop device module. diff --git a/drivers/block/loop/Makefile b/drivers/block/loop/Makefile index 5dffb318797e..2a7eeca32a78 100644 --- a/drivers/block/loop/Makefile +++ b/drivers/block/loop/Makefile @@ -1,6 +1,9 @@ # SPDX-License-Identifier: GPL-2.0 -loop-y += loop_main.o +loop-y += loop_main.o loop_file_fmt.o obj-$(CONFIG_BLK_DEV_LOOP) += loop.o obj-$(CONFIG_BLK_DEV_CRYPTOLOOP) += cryptoloop.o + +obj-$(CONFIG_BLK_DEV_LOOP_FILE_FMT_RAW) += loop_file_fmt_raw.o +obj-$(CONFIG_BLK_DEV_LOOP_FILE_FMT_QCOW) += loop_file_fmt_qcow.o
\ No newline at end of file diff --git a/drivers/block/loop/loop_file_fmt.c b/drivers/block/loop/loop_file_fmt.c new file mode 100644 index 000000000000..f29c15ed044b --- /dev/null +++ b/drivers/block/loop/loop_file_fmt.c @@ -0,0 +1,244 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * loop_file_fmt.c + * + * File format subsystem for the loop device module. + * + * Copyright (C) 2019 Manuel Bentele + */ + +#include <linux/kernel.h> +#include <linux/module.h> + +#include "loop_file_fmt.h" + +/* storage for all registered file format drivers */ +static struct loop_file_fmt_driver *loop_file_fmt_drivers[MAX_LO_FILE_FMT] = { + NULL +}; + +int loop_file_fmt_register_driver(struct loop_file_fmt_driver *drv) +{ + int ret = 0; + + if (drv == NULL) + return -EFAULT; + + if (drv->file_fmt_type > MAX_LO_FILE_FMT) + return -EINVAL; + + if (loop_file_fmt_drivers[drv->file_fmt_type] == NULL) { + loop_file_fmt_drivers[drv->file_fmt_type] = drv; + printk(KERN_INFO "loop: successfully registered file format " + "driver %s\n", drv->name); + } else { + printk(KERN_WARNING "loop: driver for file format already " + "registered\n"); + ret = -EBUSY; + } + + return ret; +} +EXPORT_SYMBOL(loop_file_fmt_register_driver); + +void loop_file_fmt_unregister_driver(struct loop_file_fmt_driver *drv) +{ + if (drv == NULL) + return; + + if (drv->file_fmt_type > MAX_LO_FILE_FMT) + return; + + loop_file_fmt_drivers[drv->file_fmt_type] = NULL; + printk(KERN_INFO "loop: successfully unregistered file format driver " + "%s\n", drv->name); +} +EXPORT_SYMBOL(loop_file_fmt_unregister_driver); + +struct loop_file_fmt *loop_file_fmt_alloc(void) +{ + return kzalloc(sizeof(struct loop_file_fmt), GFP_KERNEL); +} + +void loop_file_fmt_free(struct loop_file_fmt *lo_fmt) +{ + kfree(lo_fmt); +} + +int loop_file_fmt_set_lo(struct loop_file_fmt *lo_fmt, struct loop_device *lo) +{ + if (lo_fmt == NULL) + return -EINVAL; + + lo_fmt->lo = lo; + + return 0; +} +EXPORT_SYMBOL(loop_file_fmt_set_lo); + +struct loop_device *loop_file_fmt_get_lo(struct loop_file_fmt *lo_fmt) +{ + return lo_fmt->lo; +} +EXPORT_SYMBOL(loop_file_fmt_get_lo); + +int loop_file_fmt_init(struct loop_file_fmt *lo_fmt) +{ + struct loop_file_fmt_ops* ops; + struct module *drv; + + if (lo_fmt->file_fmt_type > MAX_LO_FILE_FMT) + return -EINVAL; + + /* check if new file format driver is registered */ + if (loop_file_fmt_drivers[lo_fmt->file_fmt_type] == NULL) { + printk(KERN_ERR "loop_file_fmt: file format driver is not " + "available\n"); + return -ENODEV; + } + + printk(KERN_INFO "loop_file_fmt: use file format driver %s\n", + loop_file_fmt_drivers[lo_fmt->file_fmt_type]->name); + + drv = loop_file_fmt_drivers[lo_fmt->file_fmt_type]->owner; + if (!try_module_get(drv)) { + printk(KERN_ERR "loop_file_fmt: file format driver %s can not " + "be accessed\n", + loop_file_fmt_drivers[lo_fmt->file_fmt_type]->name); + return -ENODEV; + } + + ops = loop_file_fmt_drivers[lo_fmt->file_fmt_type]->ops; + if (likely(ops->init)) + return ops->init(lo_fmt); + else + return -ENOSYS; +} + +void loop_file_fmt_exit(struct loop_file_fmt *lo_fmt) +{ + struct loop_file_fmt_ops* ops; + struct module *drv; + + ops = loop_file_fmt_drivers[lo_fmt->file_fmt_type]->ops; + if (likely(ops->exit)) + ops->exit(lo_fmt); + + drv = loop_file_fmt_drivers[lo_fmt->file_fmt_type]->owner; + module_put(drv); +} + +int loop_file_fmt_read(struct loop_file_fmt *lo_fmt, + struct request *rq) +{ + struct loop_file_fmt_ops* ops = + loop_file_fmt_drivers[lo_fmt->file_fmt_type]->ops; + + if (likely(ops->read)) + return ops->read(lo_fmt, rq); + else + return -ENOSYS; +} + +int loop_file_fmt_read_aio(struct loop_file_fmt *lo_fmt, + struct request *rq) +{ + struct loop_file_fmt_ops* ops = + loop_file_fmt_drivers[lo_fmt->file_fmt_type]->ops; + + if (likely(ops->read_aio)) + return ops->read_aio(lo_fmt, rq); + else + return -ENOSYS; +} + +int loop_file_fmt_write(struct loop_file_fmt *lo_fmt, + struct request *rq) +{ + struct loop_file_fmt_ops* ops = + loop_file_fmt_drivers[lo_fmt->file_fmt_type]->ops; + + if (likely(ops->write)) + return ops->write(lo_fmt, rq); + else + return -ENOSYS; +} + +int loop_file_fmt_write_aio(struct loop_file_fmt *lo_fmt, + struct request *rq) +{ + struct loop_file_fmt_ops* ops = + loop_file_fmt_drivers[lo_fmt->file_fmt_type]->ops; + + if (likely(ops->write_aio)) + return ops->write_aio(lo_fmt, rq); + else + return -ENOSYS; +} + +int loop_file_fmt_discard(struct loop_file_fmt *lo_fmt, + struct request *rq) +{ + struct loop_file_fmt_ops* ops = + loop_file_fmt_drivers[lo_fmt->file_fmt_type]->ops; + + if (likely(ops->discard)) + return ops->discard(lo_fmt, rq); + else + return -ENOSYS; +} + +int loop_file_fmt_flush(struct loop_file_fmt *lo_fmt, + struct request *rq) +{ + struct loop_file_fmt_ops* ops = + loop_file_fmt_drivers[lo_fmt->file_fmt_type]->ops; + + if (likely(ops->flush)) + return ops->flush(lo_fmt, rq); + else + return -ENOSYS; +} + +int loop_file_fmt_change(struct loop_file_fmt *lo_fmt, + __u32 file_fmt_type_new) +{ + if (file_fmt_type_new > MAX_LO_FILE_FMT) + return -EINVAL; + + /* + * Unload the old file format driver and after that, + * load the new driver + */ + loop_file_fmt_exit(lo_fmt); + lo_fmt->file_fmt_type = file_fmt_type_new; + printk(KERN_ERR "loop: change file format driver"); + return loop_file_fmt_init(lo_fmt); +} + +ssize_t loop_file_fmt_print_type(__u32 file_fmt_type, char *file_fmt_name) +{ + ssize_t len = 0; + + switch(file_fmt_type) { + case LO_FILE_FMT_RAW: + len = sprintf(file_fmt_name, "%s", "RAW"); + break; + case LO_FILE_FMT_QCOW: + len = sprintf(file_fmt_name, "%s", "QCOW"); + break; + case LO_FILE_FMT_VDI: + len = sprintf(file_fmt_name, "%s", "VDI"); + break; + case LO_FILE_FMT_VMDK: + len = sprintf(file_fmt_name, "%s", "VMDK"); + break; + default: + len = sprintf(file_fmt_name, "%s", "ERROR: Unsupported loop " + "file format!"); + break; + } + + return len; +} +EXPORT_SYMBOL(loop_file_fmt_print_type); diff --git a/drivers/block/loop/loop_file_fmt.h b/drivers/block/loop/loop_file_fmt.h new file mode 100644 index 000000000000..f5989b95cfac --- /dev/null +++ b/drivers/block/loop/loop_file_fmt.h @@ -0,0 +1,88 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * loop_file_fmt.h + * + * File format subsystem for the loop device module. + * + * Copyright (C) 2019 Manuel Bentele + */ + +#ifndef _LINUX_LOOP_FILE_FMT_H +#define _LINUX_LOOP_FILE_FMT_H + +#include "loop_main.h" + +struct loop_file_fmt; + +/* data structure representing the file format subsystem interface */ +struct loop_file_fmt_ops { + int (*init) (struct loop_file_fmt *lo_fmt); + void (*exit) (struct loop_file_fmt *lo_fmt); + + int (*read) (struct loop_file_fmt *lo_fmt, + struct request *rq); + int (*write) (struct loop_file_fmt *lo_fmt, + struct request *rq); + int (*read_aio) (struct loop_file_fmt *lo_fmt, + struct request *rq); + int (*write_aio) (struct loop_file_fmt *lo_fmt, + struct request *rq); + int (*discard) (struct loop_file_fmt *lo_fmt, + struct request *rq); + + int (*flush) (struct loop_file_fmt *lo_fmt, + struct request *rq); +}; + +/* data structure for implementing file format drivers */ +struct loop_file_fmt_driver { + const char *name; + const __u32 file_fmt_type; + struct loop_file_fmt_ops *ops; + struct module *owner; +}; + +/* data structure for using with the file format subsystem */ +struct loop_file_fmt { + __u32 file_fmt_type; + struct loop_device *lo; + void *private_data; +}; + +/* subsystem functions for the driver implementation */ +extern int loop_file_fmt_register_driver(struct loop_file_fmt_driver *drv); +extern void loop_file_fmt_unregister_driver(struct loop_file_fmt_driver *drv); + +/* subsystem functions for subsystem usage */ +extern struct loop_file_fmt *loop_file_fmt_alloc(void); +extern void loop_file_fmt_free(struct loop_file_fmt *lo_fmt); + +extern int loop_file_fmt_set_lo(struct loop_file_fmt *lo_fmt, + struct loop_device *lo); +extern struct loop_device *loop_file_fmt_get_lo(struct loop_file_fmt *lo_fmt); + +extern int loop_file_fmt_init(struct loop_file_fmt *lo_fmt); +extern void loop_file_fmt_exit(struct loop_file_fmt *lo_fmt); + +extern int loop_file_fmt_read(struct loop_file_fmt *lo_fmt, + struct request *rq); +extern int loop_file_fmt_read_aio(struct loop_file_fmt *lo_fmt, + struct request *rq); +extern int loop_file_fmt_write(struct loop_file_fmt *lo_fmt, + struct request *rq); +extern int loop_file_fmt_write_aio(struct loop_file_fmt *lo_fmt, + struct request *rq); +extern int loop_file_fmt_discard(struct loop_file_fmt *lo_fmt, + struct request *rq); + +extern int loop_file_fmt_flush(struct loop_file_fmt *lo_fmt, + struct request *rq); + +extern int loop_file_fmt_change(struct loop_file_fmt *lo_fmt, + __u32 file_fmt_type_new); + +/* helper functions of the subsystem */ +extern ssize_t loop_file_fmt_print_type(__u32 file_fmt_type, + char *file_fmt_name); + +#endif diff --git a/drivers/block/loop/loop_file_fmt_qcow.c b/drivers/block/loop/loop_file_fmt_qcow.c new file mode 100644 index 000000000000..a122fd9c077e --- /dev/null +++ b/drivers/block/loop/loop_file_fmt_qcow.c @@ -0,0 +1,106 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * loop_file_fmt_qcow.h + * + * QCOW file format driver for the loop device module. + * + * Copyright (C) 2019 Manuel Bentele + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/init.h> + +#include "loop_file_fmt.h" + +static int qcow_file_fmt_init(struct loop_file_fmt *lo_fmt) +{ + printk(KERN_INFO "loop_file_fmt_qcow: init QCOW file format"); + return 0; +} + +static void qcow_file_fmt_exit(struct loop_file_fmt *lo_fmt) +{ + printk(KERN_INFO "loop_file_fmt_qcow: exit QCOW file format"); + return; +} + +static int qcow_file_fmt_read(struct loop_file_fmt *lo_fmt, + struct request *rq) +{ + printk(KERN_INFO "loop_file_fmt_qcow: read QCOW file format"); + return 0; +} + +static int qcow_file_fmt_read_aio(struct loop_file_fmt *lo_fmt, + struct request *rq) +{ + printk(KERN_INFO "loop_file_fmt_qcow: read (aio) QCOW file format"); + return 0; +} + +static int qcow_file_fmt_write(struct loop_file_fmt *lo_fmt, + struct request *rq) +{ + printk(KERN_INFO "loop_file_fmt_qcow: write QCOW file format"); + return 0; +} + +static int qcow_file_fmt_write_aio(struct loop_file_fmt *lo_fmt, + struct request *rq) +{ + printk(KERN_INFO "loop_file_fmt_qcow: write (aio) QCOW file format"); + return 0; +} + +static int qcow_file_fmt_discard(struct loop_file_fmt *lo_fmt, + struct request *rq) +{ + printk(KERN_INFO "loop_file_fmt_qcow: discard QCOW file format"); + return 0; +} + +static int qcow_file_fmt_flush(struct loop_file_fmt *lo_fmt, + struct request *rq) +{ + printk(KERN_INFO "loop_file_fmt_qcow: flush QCOW file format"); + return 0; +} + +static struct loop_file_fmt_ops qcow_file_fmt_ops = { + .init = qcow_file_fmt_init, + .exit = qcow_file_fmt_exit, + .read = qcow_file_fmt_read, + .write = qcow_file_fmt_write, + .read_aio = qcow_file_fmt_read_aio, + .write_aio = qcow_file_fmt_write_aio, + .discard = qcow_file_fmt_discard, + .flush = qcow_file_fmt_flush +}; + +static struct loop_file_fmt_driver qcow_file_fmt_driver = { + .name = "QCOW", + .file_fmt_type = LO_FILE_FMT_QCOW, + .ops = &qcow_file_fmt_ops, + .owner = THIS_MODULE +}; + +static int __init loop_file_fmt_qcow_init(void) +{ + printk(KERN_INFO "loop_file_fmt_qcow: init loop device QCOW file format driver"); + return loop_file_fmt_register_driver(&qcow_file_fmt_driver); +} + +static void __exit loop_file_fmt_qcow_exit(void) +{ + printk(KERN_INFO "loop_file_fmt_qcow: exit loop device QCOW file format driver"); + loop_file_fmt_unregister_driver(&qcow_file_fmt_driver); +} + +module_init(loop_file_fmt_qcow_init); +module_exit(loop_file_fmt_qcow_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Manuel Bentele <development@manuel-bentele.de>"); +MODULE_DESCRIPTION("Loop device QCOW file format driver"); +MODULE_SOFTDEP("pre: loop"); diff --git a/drivers/block/loop/loop_file_fmt_raw.c b/drivers/block/loop/loop_file_fmt_raw.c new file mode 100644 index 000000000000..524e8e306157 --- /dev/null +++ b/drivers/block/loop/loop_file_fmt_raw.c @@ -0,0 +1,447 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * loop_file_fmt_raw.h + * + * RAW file format driver for the loop device module. + * + * Copyright (C) 2019 Manuel Bentele + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/blkdev.h> +#include <linux/compiler.h> +#include <linux/fs.h> +#include <linux/falloc.h> +#include <linux/printk.h> +#include <linux/sched.h> +#include <linux/types.h> +#include <linux/uio.h> + +#include "loop_file_fmt.h" + +static int raw_file_fmt_init(struct loop_file_fmt *lo_fmt) +{ + return 0; +} + +static void raw_file_fmt_exit(struct loop_file_fmt *lo_fmt) +{ + return; +} + +static inline loff_t __raw_file_fmt_rq_get_pos(struct loop_file_fmt *lo_fmt, + struct request *rq) +{ + struct loop_device *lo = loop_file_fmt_get_lo(lo_fmt); + return ((loff_t) blk_rq_pos(rq) << 9) + lo->lo_offset; +} + +static inline void __raw_file_fmt_iov_iter_bvec(struct iov_iter *i, + unsigned int direction, + const struct bio_vec *bvec, + unsigned long nr_segs, + size_t count) +{ + iov_iter_bvec(i, direction, bvec, nr_segs, count); + i->type |= ITER_BVEC_FLAG_NO_REF; +} + +/* transfer function for DEPRECATED cryptoloop support */ +static inline int __raw_file_fmt_do_transfer(struct loop_file_fmt *lo_fmt, + int cmd, + struct page *rpage, + unsigned roffs, + struct page *lpage, + unsigned loffs, + int size, + sector_t rblock) +{ + struct loop_device *lo = loop_file_fmt_get_lo(lo_fmt); + int ret; + + ret = lo->transfer(lo, cmd, rpage, roffs, lpage, loffs, size, rblock); + if (likely(!ret)) + return 0; + + printk_ratelimited(KERN_ERR + "loop: Transfer error at byte offset %llu, length %i.\n", + (unsigned long long)rblock << 9, size); + return ret; +} + +static int raw_file_fmt_read_transfer(struct loop_file_fmt *lo_fmt, + struct request *rq) +{ + struct bio_vec bvec, b; + struct req_iterator iter; + struct iov_iter i; + struct page *page; + struct loop_device *lo; + ssize_t len; + int ret = 0; + loff_t pos; + + printk(KERN_INFO "loop_file_fmt_raw: raw_file_fmt_read()"); + + page = alloc_page(GFP_NOIO); + if (unlikely(!page)) + return -ENOMEM; + + lo = loop_file_fmt_get_lo(lo_fmt); + pos = __raw_file_fmt_rq_get_pos(lo_fmt, rq); + + rq_for_each_segment(bvec, rq, iter) { + loff_t offset = pos; + + b.bv_page = page; + b.bv_offset = 0; + b.bv_len = bvec.bv_len; + + __raw_file_fmt_iov_iter_bvec(&i, READ, &b, 1, b.bv_len); + len = vfs_iter_read(lo->lo_backing_file, &i, &pos, 0); + if (len < 0) { + ret = len; + goto out_free_page; + } + + ret = __raw_file_fmt_do_transfer(lo_fmt, READ, page, 0, + bvec.bv_page, bvec.bv_offset, len, offset >> 9); + if (ret) + goto out_free_page; + + flush_dcache_page(bvec.bv_page); + + if (len != bvec.bv_len) { + struct bio *bio; + + __rq_for_each_bio(bio, rq) + zero_fill_bio(bio); + break; + } + } + + ret = 0; +out_free_page: + __free_page(page); + return ret; +} + +static int raw_file_fmt_read(struct loop_file_fmt *lo_fmt, + struct request *rq) +{ + struct bio_vec bvec; + struct req_iterator iter; + struct iov_iter i; + struct loop_device *lo; + ssize_t len; + loff_t pos; + + lo = loop_file_fmt_get_lo(lo_fmt); + + if (lo->transfer) + return raw_file_fmt_read_transfer(lo_fmt, rq); + + pos = __raw_file_fmt_rq_get_pos(lo_fmt, rq); + + rq_for_each_segment(bvec, rq, iter) { + __raw_file_fmt_iov_iter_bvec(&i, READ, &bvec, 1, bvec.bv_len); + len = vfs_iter_read(lo->lo_backing_file, &i, &pos, 0); + if (len < 0) + return len; + + flush_dcache_page(bvec.bv_page); + + if (len != bvec.bv_len) { + struct bio *bio; + + __rq_for_each_bio(bio, rq) + zero_fill_bio(bio); + break; + } + cond_resched(); + } + + return 0; +} + +static void __raw_file_fmt_rw_aio_do_completion(struct loop_cmd *cmd) +{ + struct request *rq = blk_mq_rq_from_pdu(cmd); + + if (!atomic_dec_and_test(&cmd->ref)) + return; + kfree(cmd->bvec); + cmd->bvec = NULL; + blk_mq_complete_request(rq); +} + +static void __raw_file_fmt_rw_aio_complete(struct kiocb *iocb, long ret, long ret2) +{ + struct loop_cmd *cmd = container_of(iocb, struct loop_cmd, iocb); + + if (cmd->css) + css_put(cmd->css); + cmd->ret = ret; + __raw_file_fmt_rw_aio_do_completion(cmd); +} + +static int __raw_file_fmt_rw_aio(struct loop_file_fmt *lo_fmt, + struct request *rq, + bool rw) +{ + struct iov_iter iter; + struct req_iterator rq_iter; + struct bio_vec *bvec; + struct bio *bio = rq->bio; + struct file *file; + struct bio_vec tmp; + struct loop_device *lo; + struct loop_cmd *cmd; + unsigned int offset; + int nr_bvec = 0; + int ret; + loff_t pos; + + lo = loop_file_fmt_get_lo(lo_fmt); + file = lo->lo_backing_file; + cmd = blk_mq_rq_to_pdu(rq); + pos = __raw_file_fmt_rq_get_pos(lo_fmt, rq); + + rq_for_each_bvec(tmp, rq, rq_iter) + nr_bvec++; + + if (rq->bio != rq->biotail) { + + bvec = kmalloc_array(nr_bvec, sizeof(struct bio_vec), + GFP_NOIO); + if (!bvec) + return -EIO; + cmd->bvec = bvec; + + /* + * The bios of the request may be started from the middle of + * the 'bvec' because of bio splitting, so we can't directly + * copy bio->bi_iov_vec to new bvec. The rq_for_each_bvec + * API will take care of all details for us. + */ + rq_for_each_bvec(tmp, rq, rq_iter) { + *bvec = tmp; + bvec++; + } + bvec = cmd->bvec; + offset = 0; + } else { + /* + * Same here, this bio may be started from the middle of the + * 'bvec' because of bio splitting, so offset from the bvec + * must be passed to iov iterator + */ + offset = bio->bi_iter.bi_bvec_done; + bvec = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter); + } + atomic_set(&cmd->ref, 2); + + __raw_file_fmt_iov_iter_bvec(&iter, rw, bvec, nr_bvec, blk_rq_bytes(rq)); + iter.iov_offset = offset; + + cmd->iocb.ki_pos = pos; + cmd->iocb.ki_filp = file; + cmd->iocb.ki_complete = __raw_file_fmt_rw_aio_complete; + cmd->iocb.ki_flags = IOCB_DIRECT; + cmd->iocb.ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0); + if (cmd->css) + kthread_associate_blkcg(cmd->css); + + if (rw == WRITE) + ret = call_write_iter(file, &cmd->iocb, &iter); + else + ret = call_read_iter(file, &cmd->iocb, &iter); + + __raw_file_fmt_rw_aio_do_completion(cmd); + kthread_associate_blkcg(NULL); + + if (ret != -EIOCBQUEUED) + cmd->iocb.ki_complete(&cmd->iocb, ret, 0); + return 0; +} + +static int raw_file_fmt_read_aio(struct loop_file_fmt *lo_fmt, + struct request *rq) +{ + return __raw_file_fmt_rw_aio(lo_fmt, rq, READ); +} + +static int __raw_file_fmt_write_bvec(struct file *file, + struct bio_vec *bvec, + loff_t *ppos) +{ + struct iov_iter i; + ssize_t bw; + + __raw_file_fmt_iov_iter_bvec(&i, WRITE, bvec, 1, bvec->bv_len); + + file_start_write(file); + bw = vfs_iter_write(file, &i, ppos, 0); + file_end_write(file); + + if (likely(bw == bvec->bv_len)) + return 0; + + printk_ratelimited(KERN_ERR + "loop_file_fmt_raw: Write error at byte offset %llu, length " + "%i.\n", (unsigned long long)*ppos, bvec->bv_len); + if (bw >= 0) + bw = -EIO; + return bw; +} + +static int raw_file_fmt_write_transfer(struct loop_file_fmt *lo_fmt, + struct request *rq) +{ + struct bio_vec bvec, b; + struct req_iterator iter; + struct page *page; + struct loop_device *lo; + int ret = 0; + loff_t pos; + + lo = loop_file_fmt_get_lo(lo_fmt); + pos = __raw_file_fmt_rq_get_pos(lo_fmt, rq); + + page = alloc_page(GFP_NOIO); + if (unlikely(!page)) + return -ENOMEM; + + rq_for_each_segment(bvec, rq, iter) { + ret = __raw_file_fmt_do_transfer(lo_fmt, WRITE, page, 0, + bvec.bv_page, bvec.bv_offset, bvec.bv_len, pos >> 9); + if (unlikely(ret)) + break; + + b.bv_page = page; + b.bv_offset = 0; + b.bv_len = bvec.bv_len; + ret = __raw_file_fmt_write_bvec(lo->lo_backing_file, &b, + &pos); + if (ret < 0) + break; + } + + __free_page(page); + return ret; +} + +static int raw_file_fmt_write(struct loop_file_fmt *lo_fmt, + struct request *rq) +{ + struct bio_vec bvec; + struct req_iterator iter; + struct loop_device *lo; + int ret = 0; + loff_t pos; + + lo = loop_file_fmt_get_lo(lo_fmt); + + if (lo->transfer) + return raw_file_fmt_write_transfer(lo_fmt, rq); + + pos = __raw_file_fmt_rq_get_pos(lo_fmt, rq); + + rq_for_each_segment(bvec, rq, iter) { + ret = __raw_file_fmt_write_bvec(lo->lo_backing_file, &bvec, + &pos); + if (ret < 0) + break; + cond_resched(); + } + + return ret; +} + +static int raw_file_fmt_write_aio(struct loop_file_fmt *lo_fmt, + struct request *rq) +{ + return __raw_file_fmt_rw_aio(lo_fmt, rq, WRITE); +} + +static int raw_file_fmt_discard(struct loop_file_fmt *lo_fmt, + struct request *rq) +{ + loff_t pos = __raw_file_fmt_rq_get_pos(lo_fmt, rq); + struct loop_device *lo = loop_file_fmt_get_lo(lo_fmt); + + /* + * We use punch hole to reclaim the free space used by the + * image a.k.a. discard. However we do not support discard if + * encryption is enabled, because it may give an attacker + * useful information. + */ + struct file *file = lo->lo_backing_file; + int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE; + int ret; + + if ((!file->f_op->fallocate) || lo->lo_encrypt_key_size) { + ret = -EOPNOTSUPP; + goto out; + } + + ret = file->f_op->fallocate(file, mode, pos, blk_rq_bytes(rq)); + if (unlikely(ret && ret != -EINVAL && ret != -EOPNOTSUPP)) + ret = -EIO; + out: + return ret; +} + +static int raw_file_fmt_flush(struct loop_file_fmt *lo_fmt, + struct request *rq) +{ + struct loop_device *lo = loop_file_fmt_get_lo(lo_fmt); + struct file *file = lo->lo_backing_file; + int ret = vfs_fsync(file, 0); + if (unlikely(ret && ret != -EINVAL)) + ret = -EIO; + + return ret; +} + +static struct loop_file_fmt_ops raw_file_fmt_ops = { + .init = raw_file_fmt_init, + .exit = raw_file_fmt_exit, + .read = raw_file_fmt_read, + .write = raw_file_fmt_write, + .read_aio = raw_file_fmt_read_aio, + .write_aio = raw_file_fmt_write_aio, + .discard = raw_file_fmt_discard, + .flush = raw_file_fmt_flush +}; + +static struct loop_file_fmt_driver raw_file_fmt_driver = { + .name = "RAW", + .file_fmt_type = LO_FILE_FMT_RAW, + .ops = &raw_file_fmt_ops, + .owner = THIS_MODULE +}; + +static int __init loop_file_fmt_raw_init(void) +{ + printk(KERN_INFO "loop_file_fmt_raw: init loop device RAW file format " + "driver"); + return loop_file_fmt_register_driver(&raw_file_fmt_driver); +} + +static void __exit loop_file_fmt_raw_exit(void) +{ + printk(KERN_INFO "loop_file_fmt_raw: exit loop device RAW file format " + "driver"); + loop_file_fmt_unregister_driver(&raw_file_fmt_driver); +} + +module_init(loop_file_fmt_raw_init); +module_exit(loop_file_fmt_raw_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Manuel Bentele <development@manuel-bentele.de>"); +MODULE_DESCRIPTION("Loop device RAW file format driver"); +MODULE_SOFTDEP("pre: loop"); diff --git a/drivers/block/loop/loop_main.c b/drivers/block/loop/loop_main.c index 6f851c13c13a..4d95b698f883 100644 --- a/drivers/block/loop/loop_main.c +++ b/drivers/block/loop/loop_main.c @@ -78,6 +78,7 @@ #include <linux/uio.h> #include <linux/ioprio.h> +#include "loop_file_fmt.h" #include "loop_main.h" #include <linux/uaccess.h> @@ -245,211 +246,6 @@ figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit) return 0; } -static inline int -lo_do_transfer(struct loop_device *lo, int cmd, - struct page *rpage, unsigned roffs, - struct page *lpage, unsigned loffs, - int size, sector_t rblock) -{ - int ret; - - ret = lo->transfer(lo, cmd, rpage, roffs, lpage, loffs, size, rblock); - if (likely(!ret)) - return 0; - - printk_ratelimited(KERN_ERR - "loop: Transfer error at byte offset %llu, length %i.\n", - (unsigned long long)rblock << 9, size); - return ret; -} - -static int lo_write_bvec(struct file *file, struct bio_vec *bvec, loff_t *ppos) -{ - struct iov_iter i; - ssize_t bw; - - iov_iter_bvec(&i, ITER_BVEC | WRITE, bvec, 1, bvec->bv_len); - - file_start_write(file); - bw = vfs_iter_write(file, &i, ppos, 0); - file_end_write(file); - - if (likely(bw == bvec->bv_len)) - return 0; - - printk_ratelimited(KERN_ERR - "loop: Write error at byte offset %llu, length %i.\n", - (unsigned long long)*ppos, bvec->bv_len); - if (bw >= 0) - bw = -EIO; - return bw; -} - -static int lo_write_simple(struct loop_device *lo, struct request *rq, - loff_t pos) -{ - struct bio_vec bvec; - struct req_iterator iter; - int ret = 0; - - rq_for_each_segment(bvec, rq, iter) { - ret = lo_write_bvec(lo->lo_backing_file, &bvec, &pos); - if (ret < 0) - break; - cond_resched(); - } - - return ret; -} - -/* - * This is the slow, transforming version that needs to double buffer the - * data as it cannot do the transformations in place without having direct - * access to the destination pages of the backing file. - */ -static int lo_write_transfer(struct loop_device *lo, struct request *rq, - loff_t pos) -{ - struct bio_vec bvec, b; - struct req_iterator iter; - struct page *page; - int ret = 0; - - page = alloc_page(GFP_NOIO); - if (unlikely(!page)) - return -ENOMEM; - - rq_for_each_segment(bvec, rq, iter) { - ret = lo_do_transfer(lo, WRITE, page, 0, bvec.bv_page, - bvec.bv_offset, bvec.bv_len, pos >> 9); - if (unlikely(ret)) - break; - - b.bv_page = page; - b.bv_offset = 0; - b.bv_len = bvec.bv_len; - ret = lo_write_bvec(lo->lo_backing_file, &b, &pos); - if (ret < 0) - break; - } - - __free_page(page); - return ret; -} - -static int lo_read_simple(struct loop_device *lo, struct request *rq, - loff_t pos) -{ - struct bio_vec bvec; - struct req_iterator iter; - struct iov_iter i; - ssize_t len; - - rq_for_each_segment(bvec, rq, iter) { - iov_iter_bvec(&i, ITER_BVEC, &bvec, 1, bvec.bv_len); - len = vfs_iter_read(lo->lo_backing_file, &i, &pos, 0); - if (len < 0) - return len; - - flush_dcache_page(bvec.bv_page); - - if (len != bvec.bv_len) { - struct bio *bio; - - __rq_for_each_bio(bio, rq) - zero_fill_bio(bio); - break; - } - cond_resched(); - } - - return 0; -} - -static int lo_read_transfer(struct loop_device *lo, struct request *rq, - loff_t pos) -{ - struct bio_vec bvec, b; - struct req_iterator iter; - struct iov_iter i; - struct page *page; - ssize_t len; - int ret = 0; - - page = alloc_page(GFP_NOIO); - if (unlikely(!page)) - return -ENOMEM; - - rq_for_each_segment(bvec, rq, iter) { - loff_t offset = pos; - - b.bv_page = page; - b.bv_offset = 0; - b.bv_len = bvec.bv_len; - - iov_iter_bvec(&i, ITER_BVEC, &b, 1, b.bv_len); - len = vfs_iter_read(lo->lo_backing_file, &i, &pos, 0); - if (len < 0) { - ret = len; - goto out_free_page; - } - - ret = lo_do_transfer(lo, READ, page, 0, bvec.bv_page, - bvec.bv_offset, len, offset >> 9); - if (ret) - goto out_free_page; - - flush_dcache_page(bvec.bv_page); - - if (len != bvec.bv_len) { - struct bio *bio; - - __rq_for_each_bio(bio, rq) - zero_fill_bio(bio); - break; - } - } - - ret = 0; -out_free_page: - __free_page(page); - return ret; -} - -static int lo_discard(struct loop_device *lo, struct request *rq, loff_t pos) -{ - /* - * We use punch hole to reclaim the free space used by the - * image a.k.a. discard. However we do not support discard if - * encryption is enabled, because it may give an attacker - * useful information. - */ - struct file *file = lo->lo_backing_file; - int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE; - int ret; - - if ((!file->f_op->fallocate) || lo->lo_encrypt_key_size) { - ret = -EOPNOTSUPP; - goto out; - } - - ret = file->f_op->fallocate(file, mode, pos, blk_rq_bytes(rq)); - if (unlikely(ret && ret != -EINVAL && ret != -EOPNOTSUPP)) - ret = -EIO; - out: - return ret; -} - -static int lo_req_flush(struct loop_device *lo, struct request *rq) -{ - struct file *file = lo->lo_backing_file; - int ret = vfs_fsync(file, 0); - if (unlikely(ret && ret != -EINVAL)) - ret = -EIO; - - return ret; -} - static void lo_complete_rq(struct request *rq) { struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq); @@ -485,134 +281,26 @@ end_io: } } -static void lo_rw_aio_do_completion(struct loop_cmd *cmd) -{ - struct request *rq = blk_mq_rq_from_pdu(cmd); - - if (!atomic_dec_and_test(&cmd->ref)) - return; - kfree(cmd->bvec); - cmd->bvec = NULL; - blk_mq_complete_request(rq); -} - -static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2) -{ - struct loop_cmd *cmd = container_of(iocb, struct loop_cmd, iocb); - - if (cmd->css) - css_put(cmd->css); - cmd->ret = ret; - lo_rw_aio_do_completion(cmd); -} - -static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd, - loff_t pos, bool rw) -{ - struct iov_iter iter; - struct bio_vec *bvec; - struct request *rq = blk_mq_rq_from_pdu(cmd); - struct bio *bio = rq->bio; - struct file *file = lo->lo_backing_file; - unsigned int offset; - int segments = 0; - int ret; - - if (rq->bio != rq->biotail) { - struct req_iterator iter; - struct bio_vec tmp; - - __rq_for_each_bio(bio, rq) - segments += bio_segments(bio); - bvec = kmalloc_array(segments, sizeof(struct bio_vec), - GFP_NOIO); - if (!bvec) - return -EIO; - cmd->bvec = bvec; - - /* - * The bios of the request may be started from the middle of - * the 'bvec' because of bio splitting, so we can't directly - * copy bio->bi_iov_vec to new bvec. The rq_for_each_segment - * API will take care of all details for us. - */ - rq_for_each_segment(tmp, rq, iter) { - *bvec = tmp; - bvec++; - } - bvec = cmd->bvec; - offset = 0; - } else { - /* - * Same here, this bio may be started from the middle of the - * 'bvec' because of bio splitting, so offset from the bvec - * must be passed to iov iterator - */ - offset = bio->bi_iter.bi_bvec_done; - bvec = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter); - segments = bio_segments(bio); - } - atomic_set(&cmd->ref, 2); - - iov_iter_bvec(&iter, ITER_BVEC | rw, bvec, - segments, blk_rq_bytes(rq)); - iter.iov_offset = offset; - - cmd->iocb.ki_pos = pos; - cmd->iocb.ki_filp = file; - cmd->iocb.ki_complete = lo_rw_aio_complete; - cmd->iocb.ki_flags = IOCB_DIRECT; - cmd->iocb.ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0); - if (cmd->css) - kthread_associate_blkcg(cmd->css); - - if (rw == WRITE) - ret = call_write_iter(file, &cmd->iocb, &iter); - else - ret = call_read_iter(file, &cmd->iocb, &iter); - - lo_rw_aio_do_completion(cmd); - kthread_associate_blkcg(NULL); - - if (ret != -EIOCBQUEUED) - cmd->iocb.ki_complete(&cmd->iocb, ret, 0); - return 0; -} - static int do_req_filebacked(struct loop_device *lo, struct request *rq) { struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq); - loff_t pos = ((loff_t) blk_rq_pos(rq) << 9) + lo->lo_offset; - /* - * lo_write_simple and lo_read_simple should have been covered - * by io submit style function like lo_rw_aio(), one blocker - * is that lo_read_simple() need to call flush_dcache_page after - * the page is written from kernel, and it isn't easy to handle - * this in io submit style function which submits all segments - * of the req at one time. And direct read IO doesn't need to - * run flush_dcache_page(). - */ switch (req_op(rq)) { case REQ_OP_FLUSH: - return lo_req_flush(lo, rq); + return loop_file_fmt_flush(lo->lo_fmt, rq); case REQ_OP_DISCARD: case REQ_OP_WRITE_ZEROES: - return lo_discard(lo, rq, pos); + return loop_file_fmt_discard(lo->lo_fmt, rq); case REQ_OP_WRITE: - if (lo->transfer) - return lo_write_transfer(lo, rq, pos); - else if (cmd->use_aio) - return lo_rw_aio(lo, cmd, pos, WRITE); + if (cmd->use_aio) + return loop_file_fmt_write_aio(lo->lo_fmt, rq); else - return lo_write_simple(lo, rq, pos); + return loop_file_fmt_write(lo->lo_fmt, rq); case REQ_OP_READ: - if (lo->transfer) - return lo_read_transfer(lo, rq, pos); - else if (cmd->use_aio) - return lo_rw_aio(lo, cmd, pos, READ); + if (cmd->use_aio) + return loop_file_fmt_read_aio(lo->lo_fmt, rq); else - return lo_read_simple(lo, rq, pos); + return loop_file_fmt_read(lo->lo_fmt, rq); default: WARN_ON_ONCE(1); return -EIO; @@ -806,7 +494,7 @@ static ssize_t __print_file_fmt_type(__u32 file_fmt_type, char* buf) { static ssize_t loop_attr_file_fmt_type_show(struct loop_device *lo, char *buf) { - return __print_file_fmt_type(lo->lo_file_fmt_type, buf); + return __print_file_fmt_type(lo->lo_fmt->file_fmt_type, buf); } static ssize_t loop_attr_offset_show(struct loop_device *lo, char *buf) @@ -1085,6 +773,8 @@ static int __loop_clr_fd(struct loop_device *lo, bool release) goto out_unlock; } + loop_file_fmt_exit(lo->lo_fmt); + /* freeze request queue during the transition */ blk_mq_freeze_queue(lo->lo_queue); @@ -1225,6 +915,11 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) err = -EPERM; goto out_unlock; } + if (lo->lo_state == Lo_bound) { + err = loop_file_fmt_init(lo->lo_fmt); + if (err) + goto out_unlock; + } if (lo->lo_state != Lo_bound) { err = -ENXIO; goto out_unlock; @@ -1266,6 +961,12 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) if (err) goto out_unfreeze; + if (lo->lo_fmt->file_fmt_type != info->lo_file_fmt_type) { + err = loop_file_fmt_change(lo->lo_fmt, info->lo_file_fmt_type); + if (err) + goto out_unfreeze; + } + if (lo->lo_offset != info->lo_offset || lo->lo_sizelimit != info->lo_sizelimit) { /* kill_bdev should have truncated all the pages */ @@ -1301,7 +1002,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) lo->lo_encrypt_key_size = info->lo_encrypt_key_size; lo->lo_init[0] = info->lo_init[0]; lo->lo_init[1] = info->lo_init[1]; - lo->lo_file_fmt_type = info->lo_file_fmt_type; + lo->lo_fmt->file_fmt_type = info->lo_file_fmt_type; if (info->lo_encrypt_key_size) { memcpy(lo->lo_encrypt_key, info->lo_encrypt_key, info->lo_encrypt_key_size); @@ -1358,7 +1059,7 @@ loop_get_status(struct loop_device *lo, struct loop_info64 *info) memcpy(info->lo_encrypt_key, lo->lo_encrypt_key, lo->lo_encrypt_key_size); } - info->lo_file_fmt_type = lo->lo_file_fmt_type; + info->lo_file_fmt_type = lo->lo_fmt->file_fmt_type; /* Drop loop_ctl_mutex while we call into the filesystem. */ path = lo->lo_backing_file->f_path; @@ -2010,9 +1711,16 @@ static int loop_add(struct loop_device **l, int i) blk_queue_flag_set(QUEUE_FLAG_NOMERGES, lo->lo_queue); err = -ENOMEM; + lo->lo_fmt = loop_file_fmt_alloc(); + if (!lo->lo_fmt) + goto out_free_queue; + + loop_file_fmt_set_lo(lo->lo_fmt, lo); + + err = -ENOMEM; disk = lo->lo_disk = alloc_disk(1 << part_shift); if (!disk) - goto out_free_queue; + goto out_free_file_fmt; /* * Disable partition scanning by default. The in-kernel partition @@ -2048,6 +1756,8 @@ static int loop_add(struct loop_device **l, int i) *l = lo; return lo->lo_number; +out_free_file_fmt: + loop_file_fmt_free(lo->lo_fmt); out_free_queue: blk_cleanup_queue(lo->lo_queue); out_cleanup_tags: @@ -2062,6 +1772,7 @@ out: static void loop_remove(struct loop_device *lo) { + loop_file_fmt_free(lo->lo_fmt); del_gendisk(lo->lo_disk); blk_cleanup_queue(lo->lo_queue); blk_mq_free_tag_set(&lo->tag_set); diff --git a/drivers/block/loop/loop_main.h b/drivers/block/loop/loop_main.h index 62616f48e754..8e1eff264b3c 100644 --- a/drivers/block/loop/loop_main.h +++ b/drivers/block/loop/loop_main.h @@ -17,6 +17,8 @@ #include <linux/kthread.h> #include <uapi/linux/loop.h> +#include "loop_file_fmt.h" + /* Possible states of device */ enum { Lo_unbound, @@ -42,11 +44,12 @@ struct loop_device { int lo_encrypt_key_size; struct loop_func_table *lo_encryption; __u32 lo_init[2]; - __u32 lo_file_fmt_type; kuid_t lo_key_owner; /* Who set the key */ int (*ioctl)(struct loop_device *, int cmd, unsigned long arg); + struct loop_file_fmt *lo_fmt; + struct file * lo_backing_file; struct block_device *lo_device; void *key_data; diff --git a/include/uapi/linux/loop.h b/include/uapi/linux/loop.h index ce16bfaeab2c..f2cf1ff2ec1b 100644 --- a/include/uapi/linux/loop.h +++ b/include/uapi/linux/loop.h @@ -85,6 +85,7 @@ struct loop_info64 { #define LO_FILE_FMT_QCOW 1 #define LO_FILE_FMT_VDI 2 #define LO_FILE_FMT_VMDK 3 +#define MAX_LO_FILE_FMT 5 /* * IOCTL commands --- we will commandeer 0x4C ('L') |