From dbb41ce2b7f309d394054a6bd1e33afd578798a5 Mon Sep 17 00:00:00 2001 From: Manuel Bentele Date: Fri, 23 Oct 2020 15:18:01 +0200 Subject: Move the source code of all xloop components to the common 'src' directory --- src/kernel/xloop_file_fmt_raw.c | 476 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 476 insertions(+) create mode 100644 src/kernel/xloop_file_fmt_raw.c (limited to 'src/kernel/xloop_file_fmt_raw.c') diff --git a/src/kernel/xloop_file_fmt_raw.c b/src/kernel/xloop_file_fmt_raw.c new file mode 100644 index 0000000..76ab39e --- /dev/null +++ b/src/kernel/xloop_file_fmt_raw.c @@ -0,0 +1,476 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * xloop_file_fmt_raw.c + * + * RAW file format driver for the xloop device module. + * + * Copyright (C) 2019 Manuel Bentele + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "xloop_file_fmt.h" + +static inline loff_t __raw_file_fmt_rq_get_pos(struct xloop_file_fmt *xlo_fmt, + struct request *rq) +{ + struct xloop_device *xlo = xloop_file_fmt_get_xlo(xlo_fmt); + return ((loff_t) blk_rq_pos(rq) << 9) + xlo->xlo_offset; +} + +/* transfer function for DEPRECATED cryptoxloop support */ +static inline int __raw_file_fmt_do_transfer(struct xloop_device *xlo, int cmd, + struct page *rpage, unsigned roffs, + struct page *lpage, unsigned loffs, + int size, sector_t rblock) +{ + int ret; + + ret = xlo->transfer(xlo, cmd, rpage, roffs, lpage, loffs, size, rblock); + if (likely(!ret)) + return 0; + + pr_err_ratelimited("transfer error at byte offset %llu, length %i.\n", + (unsigned long long)rblock << 9, size); + return ret; +} + +static int __raw_file_fmt_read_transfer(struct xloop_device *xlo, + struct request *rq, loff_t pos) +{ + struct bio_vec bvec, b; + struct req_iterator iter; + struct iov_iter i; + struct page *page; + ssize_t len; + int ret = 0; + + page = alloc_page(GFP_NOIO); + if (unlikely(!page)) + return -ENOMEM; + + rq_for_each_segment(bvec, rq, iter) { + loff_t offset = pos; + + b.bv_page = page; + b.bv_offset = 0; + b.bv_len = bvec.bv_len; + + iov_iter_bvec(&i, READ, &b, 1, b.bv_len); + len = vfs_iter_read(xlo->xlo_backing_file, &i, &pos, 0); + if (len < 0) { + ret = len; + goto out_free_page; + } + + ret = __raw_file_fmt_do_transfer(xlo, READ, page, 0, bvec.bv_page, + bvec.bv_offset, len, offset >> 9); + if (ret) + goto out_free_page; + + flush_dcache_page(bvec.bv_page); + + if (len != bvec.bv_len) { + struct bio *bio; + + __rq_for_each_bio(bio, rq) + zero_fill_bio(bio); + break; + } + } + + ret = 0; +out_free_page: + __free_page(page); + return ret; +} + +static int raw_file_fmt_read(struct xloop_file_fmt *xlo_fmt, + struct request *rq) +{ + struct bio_vec bvec; + struct req_iterator iter; + struct iov_iter i; + ssize_t len; + struct xloop_device *xlo; + loff_t pos; + + xlo = xloop_file_fmt_get_xlo(xlo_fmt); + pos = __raw_file_fmt_rq_get_pos(xlo_fmt, rq); + + if (xlo->transfer) + return __raw_file_fmt_read_transfer(xlo, rq, pos); + + rq_for_each_segment(bvec, rq, iter) { + iov_iter_bvec(&i, READ, &bvec, 1, bvec.bv_len); + len = vfs_iter_read(xlo->xlo_backing_file, &i, &pos, 0); + if (len < 0) + return len; + + flush_dcache_page(bvec.bv_page); + + if (len != bvec.bv_len) { + struct bio *bio; + + __rq_for_each_bio(bio, rq) + zero_fill_bio(bio); + break; + } + cond_resched(); + } + + return 0; +} + +static void __raw_file_fmt_rw_aio_do_completion(struct xloop_cmd *cmd) +{ + struct request *rq = blk_mq_rq_from_pdu(cmd); + + if (!atomic_dec_and_test(&cmd->ref)) + return; + kfree(cmd->bvec); + cmd->bvec = NULL; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 9, 0) + if (likely(!blk_should_fake_timeout(rq->q))) + blk_mq_complete_request(rq); +#else + blk_mq_complete_request(rq); +#endif +} + +static void __raw_file_fmt_rw_aio_complete(struct kiocb *iocb, long ret, long ret2) +{ + struct xloop_cmd *cmd = container_of(iocb, struct xloop_cmd, iocb); + + if (cmd->css) + css_put(cmd->css); + cmd->ret = ret; + __raw_file_fmt_rw_aio_do_completion(cmd); +} + +static int __raw_file_fmt_rw_aio(struct xloop_device *xlo, + struct xloop_cmd *cmd, loff_t pos, bool rw) +{ + struct iov_iter iter; + struct req_iterator rq_iter; + struct bio_vec *bvec; + struct request *rq = blk_mq_rq_from_pdu(cmd); + struct bio *bio = rq->bio; + struct file *file = xlo->xlo_backing_file; + struct bio_vec tmp; + unsigned int offset; + int nr_bvec = 0; + int ret; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 1, 0) + rq_for_each_bvec(tmp, rq, rq_iter) + nr_bvec++; +#endif + + if (rq->bio != rq->biotail) { +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 1, 0) + __rq_for_each_bio(bio, rq) + nr_bvec += bio_segments(bio); +#endif + bvec = kmalloc_array(nr_bvec, sizeof(struct bio_vec), + GFP_NOIO); + if (!bvec) + return -EIO; + cmd->bvec = bvec; + + /* + * The bios of the request may be started from the middle of + * the 'bvec' because of bio splitting, so we can't directly + * copy bio->bi_iov_vec to new bvec. The rq_for_each_bvec + * API will take care of all details for us. + */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 1, 0) + rq_for_each_bvec(tmp, rq, rq_iter) { +#else + rq_for_each_segment(tmp, rq, rq_iter) { +#endif + *bvec = tmp; + bvec++; + } + bvec = cmd->bvec; + offset = 0; + } else { + /* + * Same here, this bio may be started from the middle of the + * 'bvec' because of bio splitting, so offset from the bvec + * must be passed to iov iterator + */ + offset = bio->bi_iter.bi_bvec_done; + bvec = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter); +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 1, 0) + nr_bvec = bio_segments(bio); +#endif + } + atomic_set(&cmd->ref, 2); + + iov_iter_bvec(&iter, rw, bvec, nr_bvec, blk_rq_bytes(rq)); + iter.iov_offset = offset; + + cmd->iocb.ki_pos = pos; + cmd->iocb.ki_filp = file; + cmd->iocb.ki_complete = __raw_file_fmt_rw_aio_complete; + cmd->iocb.ki_flags = IOCB_DIRECT; + cmd->iocb.ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0); + if (cmd->css) + kthread_associate_blkcg(cmd->css); + + if (rw == WRITE) + ret = call_write_iter(file, &cmd->iocb, &iter); + else + ret = call_read_iter(file, &cmd->iocb, &iter); + + __raw_file_fmt_rw_aio_do_completion(cmd); + kthread_associate_blkcg(NULL); + + if (ret != -EIOCBQUEUED) + cmd->iocb.ki_complete(&cmd->iocb, ret, 0); + return 0; +} + +static int raw_file_fmt_read_aio(struct xloop_file_fmt *xlo_fmt, + struct request *rq) +{ + struct xloop_device *xlo = xloop_file_fmt_get_xlo(xlo_fmt); + struct xloop_cmd *cmd = blk_mq_rq_to_pdu(rq); + loff_t pos = __raw_file_fmt_rq_get_pos(xlo_fmt, rq); + + return __raw_file_fmt_rw_aio(xlo, cmd, pos, READ); +} + +static int __raw_file_fmt_write_bvec(struct file *file, + struct bio_vec *bvec, + loff_t *ppos) +{ + struct iov_iter i; + ssize_t bw; + + iov_iter_bvec(&i, WRITE, bvec, 1, bvec->bv_len); + + file_start_write(file); + bw = vfs_iter_write(file, &i, ppos, 0); + file_end_write(file); + + if (likely(bw == bvec->bv_len)) + return 0; + + pr_err_ratelimited("write error at byte offset %llu, length %i.\n", + (unsigned long long)*ppos, bvec->bv_len); + if (bw >= 0) + bw = -EIO; + return bw; +} + +/* + * This is the slow, transforming version that needs to double buffer the + * data as it cannot do the transformations in place without having direct + * access to the destination pages of the backing file. + */ +static int __raw_file_fmt_write_transfer(struct xloop_device *xlo, + struct request *rq, loff_t pos) +{ +struct bio_vec bvec, b; + struct req_iterator iter; + struct page *page; + int ret = 0; + + page = alloc_page(GFP_NOIO); + if (unlikely(!page)) + return -ENOMEM; + + rq_for_each_segment(bvec, rq, iter) { + ret = __raw_file_fmt_do_transfer(xlo, WRITE, page, 0, bvec.bv_page, + bvec.bv_offset, bvec.bv_len, pos >> 9); + if (unlikely(ret)) + break; + + b.bv_page = page; + b.bv_offset = 0; + b.bv_len = bvec.bv_len; + ret = __raw_file_fmt_write_bvec(xlo->xlo_backing_file, &b, &pos); + if (ret < 0) + break; + } + + __free_page(page); + return ret; +} + +static int raw_file_fmt_write(struct xloop_file_fmt *xlo_fmt, + struct request *rq) +{ + struct bio_vec bvec; + struct req_iterator iter; + int ret = 0; + struct xloop_device *xlo; + loff_t pos; + + xlo = xloop_file_fmt_get_xlo(xlo_fmt); + pos = __raw_file_fmt_rq_get_pos(xlo_fmt, rq); + + if (xlo->transfer) + return __raw_file_fmt_write_transfer(xlo, rq, pos); + + rq_for_each_segment(bvec, rq, iter) { + ret = __raw_file_fmt_write_bvec(xlo->xlo_backing_file, &bvec, &pos); + if (ret < 0) + break; + cond_resched(); + } + + return ret; +} + +static int raw_file_fmt_write_aio(struct xloop_file_fmt *xlo_fmt, + struct request *rq) +{ + struct xloop_device *xlo = xloop_file_fmt_get_xlo(xlo_fmt); + struct xloop_cmd *cmd = blk_mq_rq_to_pdu(rq); + loff_t pos = __raw_file_fmt_rq_get_pos(xlo_fmt, rq); + + return __raw_file_fmt_rw_aio(xlo, cmd, pos, WRITE); +} + +static int __raw_file_fmt_fallocate(struct xloop_device *xlo, + struct request *rq, loff_t pos, int mode) +{ + /* + * We use fallocate to manipulate the space mappings used by the image + * a.k.a. discard/zerorange. However we do not support this if + * encryption is enabled, because it may give an attacker useful + * information. + */ + struct file *file = xlo->xlo_backing_file; + struct request_queue *q = xlo->xlo_queue; + int ret; + + mode |= FALLOC_FL_KEEP_SIZE; + + if (!blk_queue_discard(q)) { + ret = -EOPNOTSUPP; + goto out; + } + + ret = file->f_op->fallocate(file, mode, pos, blk_rq_bytes(rq)); + if (unlikely(ret && ret != -EINVAL && ret != -EOPNOTSUPP)) + ret = -EIO; +out: + return ret; +} + +static int raw_file_fmt_write_zeros(struct xloop_file_fmt *xlo_fmt, + struct request *rq) +{ + loff_t pos = __raw_file_fmt_rq_get_pos(xlo_fmt, rq); + struct xloop_device *xlo = xloop_file_fmt_get_xlo(xlo_fmt); + + /* + * If the caller doesn't want deallocation, call zeroout to + * write zeroes the range. Otherwise, punch them out. + */ + return __raw_file_fmt_fallocate(xlo, rq, pos, + (rq->cmd_flags & REQ_NOUNMAP) ? + FALLOC_FL_ZERO_RANGE : + FALLOC_FL_PUNCH_HOLE); +} + +static int raw_file_fmt_discard(struct xloop_file_fmt *xlo_fmt, + struct request *rq) +{ + loff_t pos = __raw_file_fmt_rq_get_pos(xlo_fmt, rq); + struct xloop_device *xlo = xloop_file_fmt_get_xlo(xlo_fmt); + + return __raw_file_fmt_fallocate(xlo, rq, pos, FALLOC_FL_PUNCH_HOLE); +} + +static int raw_file_fmt_flush(struct xloop_file_fmt *xlo_fmt) +{ + struct xloop_device *xlo = xloop_file_fmt_get_xlo(xlo_fmt); + struct file *file = xlo->xlo_backing_file; + int ret = vfs_fsync(file, 0); + if (unlikely(ret && ret != -EINVAL)) + ret = -EIO; + + return ret; +} + +static loff_t raw_file_fmt_sector_size(struct xloop_file_fmt *xlo_fmt, + struct file *file, loff_t offset, loff_t sizelimit) +{ + loff_t xloopsize; + + /* Compute xloopsize in bytes */ + xloopsize = i_size_read(file->f_mapping->host); + if (offset > 0) + xloopsize -= offset; + /* offset is beyond i_size, weird but possible */ + if (xloopsize < 0) + return 0; + + if (sizelimit > 0 && sizelimit < xloopsize) + xloopsize = sizelimit; + /* + * Unfortunately, if we want to do I/O on the device, + * the number of 512-byte sectors has to fit into a sector_t. + */ + return xloopsize >> 9; +} + +static struct xloop_file_fmt_ops raw_file_fmt_ops = { + .init = NULL, + .exit = NULL, + .read = raw_file_fmt_read, + .write = raw_file_fmt_write, + .read_aio = raw_file_fmt_read_aio, + .write_aio = raw_file_fmt_write_aio, + .write_zeros = raw_file_fmt_write_zeros, + .discard = raw_file_fmt_discard, + .flush = raw_file_fmt_flush, + .sector_size = raw_file_fmt_sector_size, +}; + +static struct xloop_file_fmt_driver raw_file_fmt_driver = { + .name = "RAW", + .file_fmt_type = XLO_FILE_FMT_RAW, + .ops = &raw_file_fmt_ops, + .owner = THIS_MODULE, +}; + +static int __init xloop_file_fmt_raw_init(void) +{ + pr_info("init xloop device RAW file format driver\n"); + return xloop_file_fmt_register_driver(&raw_file_fmt_driver); +} + +static void __exit xloop_file_fmt_raw_exit(void) +{ + pr_info("exit xloop device RAW file format driver\n"); + xloop_file_fmt_unregister_driver(&raw_file_fmt_driver); +} + +module_init(xloop_file_fmt_raw_init); +module_exit(xloop_file_fmt_raw_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Manuel Bentele "); +MODULE_DESCRIPTION("xloop device RAW file format driver"); +MODULE_SOFTDEP("pre: xloop"); +MODULE_VERSION(__stringify(VERSION)); -- cgit v1.2.3-55-g7522