/* SPDX-License-Identifier: GPL-2.0 */ /* * loop_file_fmt_raw.c * * RAW file format driver for the loop device module. * * Copyright (C) 2019 Manuel Bentele */ #include #include #include #include #include #include #include #include #include #include #include #include #include "loop_file_fmt.h" static inline loff_t __raw_file_fmt_rq_get_pos(struct loop_file_fmt *lo_fmt, struct request *rq) { struct loop_device *lo = loop_file_fmt_get_lo(lo_fmt); return ((loff_t) blk_rq_pos(rq) << 9) + lo->lo_offset; } /* transfer function for DEPRECATED cryptoloop support */ static inline int __raw_file_fmt_do_transfer(struct loop_file_fmt *lo_fmt, int cmd, struct page *rpage, unsigned roffs, struct page *lpage, unsigned loffs, int size, sector_t rblock) { struct loop_device *lo = loop_file_fmt_get_lo(lo_fmt); int ret; ret = lo->transfer(lo, cmd, rpage, roffs, lpage, loffs, size, rblock); if (likely(!ret)) return 0; printk_ratelimited(KERN_ERR "loop: Transfer error at byte offset %llu, length %i.\n", (unsigned long long)rblock << 9, size); return ret; } static int raw_file_fmt_read_transfer(struct loop_file_fmt *lo_fmt, struct request *rq) { struct bio_vec bvec, b; struct req_iterator iter; struct iov_iter i; struct page *page; struct loop_device *lo; ssize_t len; int ret = 0; loff_t pos; page = alloc_page(GFP_NOIO); if (unlikely(!page)) return -ENOMEM; lo = loop_file_fmt_get_lo(lo_fmt); pos = __raw_file_fmt_rq_get_pos(lo_fmt, rq); rq_for_each_segment(bvec, rq, iter) { loff_t offset = pos; b.bv_page = page; b.bv_offset = 0; b.bv_len = bvec.bv_len; iov_iter_bvec(&i, READ, &b, 1, b.bv_len); len = vfs_iter_read(lo->lo_backing_file, &i, &pos, 0); if (len < 0) { ret = len; goto out_free_page; } ret = __raw_file_fmt_do_transfer(lo_fmt, READ, page, 0, bvec.bv_page, bvec.bv_offset, len, offset >> 9); if (ret) goto out_free_page; flush_dcache_page(bvec.bv_page); if (len != bvec.bv_len) { struct bio *bio; __rq_for_each_bio(bio, rq) zero_fill_bio(bio); break; } } ret = 0; out_free_page: __free_page(page); return ret; } static int raw_file_fmt_read(struct loop_file_fmt *lo_fmt, struct request *rq) { struct bio_vec bvec; struct req_iterator iter; struct iov_iter i; struct loop_device *lo; ssize_t len; loff_t pos; lo = loop_file_fmt_get_lo(lo_fmt); if (lo->transfer) return raw_file_fmt_read_transfer(lo_fmt, rq); pos = __raw_file_fmt_rq_get_pos(lo_fmt, rq); rq_for_each_segment(bvec, rq, iter) { iov_iter_bvec(&i, READ, &bvec, 1, bvec.bv_len); len = vfs_iter_read(lo->lo_backing_file, &i, &pos, 0); if (len < 0) return len; flush_dcache_page(bvec.bv_page); if (len != bvec.bv_len) { struct bio *bio; __rq_for_each_bio(bio, rq) zero_fill_bio(bio); break; } cond_resched(); } return 0; } static void __raw_file_fmt_rw_aio_do_completion(struct loop_cmd *cmd) { struct request *rq = blk_mq_rq_from_pdu(cmd); if (!atomic_dec_and_test(&cmd->ref)) return; kfree(cmd->bvec); cmd->bvec = NULL; blk_mq_complete_request(rq); } static void __raw_file_fmt_rw_aio_complete(struct kiocb *iocb, long ret, long ret2) { struct loop_cmd *cmd = container_of(iocb, struct loop_cmd, iocb); if (cmd->css) css_put(cmd->css); cmd->ret = ret; __raw_file_fmt_rw_aio_do_completion(cmd); } static int __raw_file_fmt_rw_aio(struct loop_file_fmt *lo_fmt, struct request *rq, bool rw) { struct iov_iter iter; struct req_iterator rq_iter; struct bio_vec *bvec; struct bio *bio = rq->bio; struct file *file; struct bio_vec tmp; struct loop_device *lo; struct loop_cmd *cmd; unsigned int offset; int nr_bvec = 0; int ret; loff_t pos; lo = loop_file_fmt_get_lo(lo_fmt); file = lo->lo_backing_file; cmd = blk_mq_rq_to_pdu(rq); pos = __raw_file_fmt_rq_get_pos(lo_fmt, rq); rq_for_each_bvec(tmp, rq, rq_iter) nr_bvec++; if (rq->bio != rq->biotail) { bvec = kmalloc_array(nr_bvec, sizeof(struct bio_vec), GFP_NOIO); if (!bvec) return -EIO; cmd->bvec = bvec; /* * The bios of the request may be started from the middle of * the 'bvec' because of bio splitting, so we can't directly * copy bio->bi_iov_vec to new bvec. The rq_for_each_bvec * API will take care of all details for us. */ rq_for_each_bvec(tmp, rq, rq_iter) { *bvec = tmp; bvec++; } bvec = cmd->bvec; offset = 0; } else { /* * Same here, this bio may be started from the middle of the * 'bvec' because of bio splitting, so offset from the bvec * must be passed to iov iterator */ offset = bio->bi_iter.bi_bvec_done; bvec = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter); } atomic_set(&cmd->ref, 2); iov_iter_bvec(&iter, rw, bvec, nr_bvec, blk_rq_bytes(rq)); iter.iov_offset = offset; cmd->iocb.ki_pos = pos; cmd->iocb.ki_filp = file; cmd->iocb.ki_complete = __raw_file_fmt_rw_aio_complete; cmd->iocb.ki_flags = IOCB_DIRECT; cmd->iocb.ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0); if (cmd->css) kthread_associate_blkcg(cmd->css); if (rw == WRITE) ret = call_write_iter(file, &cmd->iocb, &iter); else ret = call_read_iter(file, &cmd->iocb, &iter); __raw_file_fmt_rw_aio_do_completion(cmd); kthread_associate_blkcg(NULL); if (ret != -EIOCBQUEUED) cmd->iocb.ki_complete(&cmd->iocb, ret, 0); return 0; } static int raw_file_fmt_read_aio(struct loop_file_fmt *lo_fmt, struct request *rq) { return __raw_file_fmt_rw_aio(lo_fmt, rq, READ); } static int __raw_file_fmt_write_bvec(struct file *file, struct bio_vec *bvec, loff_t *ppos) { struct iov_iter i; ssize_t bw; iov_iter_bvec(&i, WRITE, bvec, 1, bvec->bv_len); file_start_write(file); bw = vfs_iter_write(file, &i, ppos, 0); file_end_write(file); if (likely(bw == bvec->bv_len)) return 0; printk_ratelimited(KERN_ERR "loop_file_fmt_raw: Write error at byte offset %llu, length " "%i.\n", (unsigned long long)*ppos, bvec->bv_len); if (bw >= 0) bw = -EIO; return bw; } static int raw_file_fmt_write_transfer(struct loop_file_fmt *lo_fmt, struct request *rq) { struct bio_vec bvec, b; struct req_iterator iter; struct page *page; struct loop_device *lo; int ret = 0; loff_t pos; lo = loop_file_fmt_get_lo(lo_fmt); pos = __raw_file_fmt_rq_get_pos(lo_fmt, rq); page = alloc_page(GFP_NOIO); if (unlikely(!page)) return -ENOMEM; rq_for_each_segment(bvec, rq, iter) { ret = __raw_file_fmt_do_transfer(lo_fmt, WRITE, page, 0, bvec.bv_page, bvec.bv_offset, bvec.bv_len, pos >> 9); if (unlikely(ret)) break; b.bv_page = page; b.bv_offset = 0; b.bv_len = bvec.bv_len; ret = __raw_file_fmt_write_bvec(lo->lo_backing_file, &b, &pos); if (ret < 0) break; } __free_page(page); return ret; } static int raw_file_fmt_write(struct loop_file_fmt *lo_fmt, struct request *rq) { struct bio_vec bvec; struct req_iterator iter; struct loop_device *lo; int ret = 0; loff_t pos; lo = loop_file_fmt_get_lo(lo_fmt); if (lo->transfer) return raw_file_fmt_write_transfer(lo_fmt, rq); pos = __raw_file_fmt_rq_get_pos(lo_fmt, rq); rq_for_each_segment(bvec, rq, iter) { ret = __raw_file_fmt_write_bvec(lo->lo_backing_file, &bvec, &pos); if (ret < 0) break; cond_resched(); } return ret; } static int raw_file_fmt_write_aio(struct loop_file_fmt *lo_fmt, struct request *rq) { return __raw_file_fmt_rw_aio(lo_fmt, rq, WRITE); } static int raw_file_fmt_discard(struct loop_file_fmt *lo_fmt, struct request *rq) { loff_t pos = __raw_file_fmt_rq_get_pos(lo_fmt, rq); struct loop_device *lo = loop_file_fmt_get_lo(lo_fmt); /* * We use punch hole to reclaim the free space used by the * image a.k.a. discard. However we do not support discard if * encryption is enabled, because it may give an attacker * useful information. */ struct file *file = lo->lo_backing_file; int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE; int ret; if ((!file->f_op->fallocate) || lo->lo_encrypt_key_size) { ret = -EOPNOTSUPP; goto out; } ret = file->f_op->fallocate(file, mode, pos, blk_rq_bytes(rq)); if (unlikely(ret && ret != -EINVAL && ret != -EOPNOTSUPP)) ret = -EIO; out: return ret; } static int raw_file_fmt_flush(struct loop_file_fmt *lo_fmt) { struct loop_device *lo = loop_file_fmt_get_lo(lo_fmt); struct file *file = lo->lo_backing_file; int ret = vfs_fsync(file, 0); if (unlikely(ret && ret != -EINVAL)) ret = -EIO; return ret; } static loff_t raw_file_fmt_sector_size(struct loop_file_fmt *lo_fmt) { struct loop_device *lo = loop_file_fmt_get_lo(lo_fmt); loff_t loopsize; /* Compute loopsize in bytes */ loopsize = i_size_read(lo->lo_backing_file->f_mapping->host); if (lo->lo_offset > 0) loopsize -= lo->lo_offset; /* offset is beyond i_size, weird but possible */ if (loopsize < 0) return 0; if (lo->lo_sizelimit > 0 && lo->lo_sizelimit < loopsize) loopsize = lo->lo_sizelimit; /* * Unfortunately, if we want to do I/O on the device, * the number of 512-byte sectors has to fit into a sector_t. */ return loopsize >> 9; } static struct loop_file_fmt_ops raw_file_fmt_ops = { .init = NULL, .exit = NULL, .read = raw_file_fmt_read, .write = raw_file_fmt_write, .read_aio = raw_file_fmt_read_aio, .write_aio = raw_file_fmt_write_aio, .discard = raw_file_fmt_discard, .flush = raw_file_fmt_flush, .sector_size = raw_file_fmt_sector_size }; static struct loop_file_fmt_driver raw_file_fmt_driver = { .name = "RAW", .file_fmt_type = LO_FILE_FMT_RAW, .ops = &raw_file_fmt_ops, .owner = THIS_MODULE }; static int __init loop_file_fmt_raw_init(void) { printk(KERN_INFO "loop_file_fmt_raw: init loop device RAW file format " "driver"); return loop_file_fmt_register_driver(&raw_file_fmt_driver); } static void __exit loop_file_fmt_raw_exit(void) { printk(KERN_INFO "loop_file_fmt_raw: exit loop device RAW file format " "driver"); loop_file_fmt_unregister_driver(&raw_file_fmt_driver); } module_init(loop_file_fmt_raw_init); module_exit(loop_file_fmt_raw_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Manuel Bentele "); MODULE_DESCRIPTION("Loop device RAW file format driver"); MODULE_SOFTDEP("pre: loop");