diff options
Diffstat (limited to 'drivers/md')
51 files changed, 664 insertions, 432 deletions
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index ca4abe1ccd8d..cacbe2dbd5c3 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -68,6 +68,8 @@ #include <linux/random.h> #include <trace/events/bcache.h> +#define MAX_OPEN_BUCKETS 128 + /* Bucket heap / gen */ uint8_t bch_inc_gen(struct cache *ca, struct bucket *b) @@ -671,7 +673,7 @@ int bch_open_buckets_alloc(struct cache_set *c) spin_lock_init(&c->data_bucket_lock); - for (i = 0; i < 6; i++) { + for (i = 0; i < MAX_OPEN_BUCKETS; i++) { struct open_bucket *b = kzalloc(sizeof(*b), GFP_KERNEL); if (!b) return -ENOMEM; diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index dee542fff68e..2ed9bd231d84 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -333,6 +333,7 @@ struct cached_dev { /* Limit number of writeback bios in flight */ struct semaphore in_flight; struct task_struct *writeback_thread; + struct workqueue_struct *writeback_write_wq; struct keybuf writeback_keys; diff --git a/drivers/md/bcache/closure.c b/drivers/md/bcache/closure.c index 864e673aec39..7d5286b05036 100644 --- a/drivers/md/bcache/closure.c +++ b/drivers/md/bcache/closure.c @@ -70,21 +70,10 @@ void __closure_wake_up(struct closure_waitlist *wait_list) list = llist_del_all(&wait_list->list); /* We first reverse the list to preserve FIFO ordering and fairness */ - - while (list) { - struct llist_node *t = list; - list = llist_next(list); - - t->next = reverse; - reverse = t; - } + reverse = llist_reverse_order(list); /* Then do the wakeups */ - - while (reverse) { - cl = container_of(reverse, struct closure, list); - reverse = llist_next(reverse); - + llist_for_each_entry(cl, reverse, list) { closure_set_waiting(cl, 0); closure_sub(cl, CLOSURE_WAITING + 1); } diff --git a/drivers/md/bcache/closure.h b/drivers/md/bcache/closure.h index 1ec84ca81146..295b7e43f92c 100644 --- a/drivers/md/bcache/closure.h +++ b/drivers/md/bcache/closure.h @@ -312,8 +312,6 @@ static inline void closure_wake_up(struct closure_waitlist *list) * been dropped with closure_put()), it will resume execution at @fn running out * of @wq (or, if @wq is NULL, @fn will be called by closure_put() directly). * - * NOTE: This macro expands to a return in the calling function! - * * This is because after calling continue_at() you no longer have a ref on @cl, * and whatever @cl owns may be freed out from under you - a running closure fn * has a ref on its own closure which continue_at() drops. @@ -340,8 +338,6 @@ do { \ * Causes @fn to be executed out of @cl, in @wq context (or called directly if * @wq is NULL). * - * NOTE: like continue_at(), this macro expands to a return in the caller! - * * The ref the caller of continue_at_nobarrier() had on @cl is now owned by @fn, * thus it's not safe to touch anything protected by @cl after a * continue_at_nobarrier(). diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c index 35a5a7210e51..61076eda2e6d 100644 --- a/drivers/md/bcache/debug.c +++ b/drivers/md/bcache/debug.c @@ -49,7 +49,7 @@ void bch_btree_verify(struct btree *b) v->keys.ops = b->keys.ops; bio = bch_bbio_alloc(b->c); - bio->bi_bdev = PTR_CACHE(b->c, &b->key, 0)->bdev; + bio_set_dev(bio, PTR_CACHE(b->c, &b->key, 0)->bdev); bio->bi_iter.bi_sector = PTR_OFFSET(&b->key, 0); bio->bi_iter.bi_size = KEY_SIZE(&v->key) << 9; bio->bi_opf = REQ_OP_READ | REQ_META; diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c index 6a9b85095e7b..7e871bdc0097 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c @@ -34,7 +34,7 @@ void __bch_submit_bbio(struct bio *bio, struct cache_set *c) struct bbio *b = container_of(bio, struct bbio, bio); bio->bi_iter.bi_sector = PTR_OFFSET(&b->key, 0); - bio->bi_bdev = PTR_CACHE(c, &b->key, 0)->bdev; + bio_set_dev(bio, PTR_CACHE(c, &b->key, 0)->bdev); b->submit_time_us = local_clock_us(); closure_bio_submit(bio, bio->bi_private); diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 0352d05e495c..7e1d1c3ba33a 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -53,7 +53,7 @@ reread: left = ca->sb.bucket_size - offset; bio_reset(bio); bio->bi_iter.bi_sector = bucket + offset; - bio->bi_bdev = ca->bdev; + bio_set_dev(bio, ca->bdev); bio->bi_iter.bi_size = len << 9; bio->bi_end_io = journal_read_endio; @@ -452,7 +452,7 @@ static void do_journal_discard(struct cache *ca) bio_set_op_attrs(bio, REQ_OP_DISCARD, 0); bio->bi_iter.bi_sector = bucket_to_sector(ca->set, ca->sb.d[ja->discard_idx]); - bio->bi_bdev = ca->bdev; + bio_set_dev(bio, ca->bdev); bio->bi_iter.bi_size = bucket_bytes(ca); bio->bi_end_io = journal_discard_endio; @@ -623,7 +623,7 @@ static void journal_write_unlocked(struct closure *cl) bio_reset(bio); bio->bi_iter.bi_sector = PTR_OFFSET(k, i); - bio->bi_bdev = ca->bdev; + bio_set_dev(bio, ca->bdev); bio->bi_iter.bi_size = sectors << 9; bio->bi_end_io = journal_write_endio; diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 019b3df9f1c6..681b4f12b05a 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -196,12 +196,12 @@ static void bch_data_insert_start(struct closure *cl) struct data_insert_op *op = container_of(cl, struct data_insert_op, cl); struct bio *bio = op->bio, *n; - if (atomic_sub_return(bio_sectors(bio), &op->c->sectors_to_gc) < 0) - wake_up_gc(op->c); - if (op->bypass) return bch_data_invalidate(cl); + if (atomic_sub_return(bio_sectors(bio), &op->c->sectors_to_gc) < 0) + wake_up_gc(op->c); + /* * Journal writes are marked REQ_PREFLUSH; if the original write was a * flush, it'll wait on the journal write. @@ -400,12 +400,6 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio) if (!congested && !dc->sequential_cutoff) goto rescale; - if (!congested && - mode == CACHE_MODE_WRITEBACK && - op_is_write(bio->bi_opf) && - op_is_sync(bio->bi_opf)) - goto rescale; - spin_lock(&dc->io_lock); hlist_for_each_entry(i, iohash(dc, bio->bi_iter.bi_sector), hash) @@ -607,7 +601,8 @@ static void request_endio(struct bio *bio) static void bio_complete(struct search *s) { if (s->orig_bio) { - generic_end_io_acct(bio_data_dir(s->orig_bio), + struct request_queue *q = s->orig_bio->bi_disk->queue; + generic_end_io_acct(q, bio_data_dir(s->orig_bio), &s->d->disk->part0, s->start_time); trace_bcache_request_end(s->d, s->orig_bio); @@ -734,7 +729,7 @@ static void cached_dev_read_done(struct closure *cl) if (s->iop.bio) { bio_reset(s->iop.bio); s->iop.bio->bi_iter.bi_sector = s->cache_miss->bi_iter.bi_sector; - s->iop.bio->bi_bdev = s->cache_miss->bi_bdev; + bio_copy_dev(s->iop.bio, s->cache_miss); s->iop.bio->bi_iter.bi_size = s->insert_bio_sectors << 9; bch_bio_map(s->iop.bio, NULL); @@ -793,7 +788,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, !(bio->bi_opf & REQ_META) && s->iop.c->gc_stats.in_use < CUTOFF_CACHE_READA) reada = min_t(sector_t, dc->readahead >> 9, - bdev_sectors(bio->bi_bdev) - bio_end_sector(bio)); + get_capacity(bio->bi_disk) - bio_end_sector(bio)); s->insert_bio_sectors = min(sectors, bio_sectors(bio) + reada); @@ -819,7 +814,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, goto out_submit; cache_bio->bi_iter.bi_sector = miss->bi_iter.bi_sector; - cache_bio->bi_bdev = miss->bi_bdev; + bio_copy_dev(cache_bio, miss); cache_bio->bi_iter.bi_size = s->insert_bio_sectors << 9; cache_bio->bi_end_io = request_endio; @@ -918,7 +913,7 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s) struct bio *flush = bio_alloc_bioset(GFP_NOIO, 0, dc->disk.bio_split); - flush->bi_bdev = bio->bi_bdev; + bio_copy_dev(flush, bio); flush->bi_end_io = request_endio; flush->bi_private = cl; flush->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; @@ -955,13 +950,13 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q, struct bio *bio) { struct search *s; - struct bcache_device *d = bio->bi_bdev->bd_disk->private_data; + struct bcache_device *d = bio->bi_disk->private_data; struct cached_dev *dc = container_of(d, struct cached_dev, disk); int rw = bio_data_dir(bio); - generic_start_io_acct(rw, bio_sectors(bio), &d->disk->part0); + generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0); - bio->bi_bdev = dc->bdev; + bio_set_dev(bio, dc->bdev); bio->bi_iter.bi_sector += dc->sb.data_offset; if (cached_dev_get(dc)) { @@ -1071,10 +1066,10 @@ static blk_qc_t flash_dev_make_request(struct request_queue *q, { struct search *s; struct closure *cl; - struct bcache_device *d = bio->bi_bdev->bd_disk->private_data; + struct bcache_device *d = bio->bi_disk->private_data; int rw = bio_data_dir(bio); - generic_start_io_acct(rw, bio_sectors(bio), &d->disk->part0); + generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0); s = search_alloc(bio, d); cl = &s->cl; diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 8352fad765f6..fc0a31b13ac4 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -257,7 +257,7 @@ void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent) closure_init(cl, parent); bio_reset(bio); - bio->bi_bdev = dc->bdev; + bio_set_dev(bio, dc->bdev); bio->bi_end_io = write_bdev_super_endio; bio->bi_private = dc; @@ -303,7 +303,7 @@ void bcache_write_super(struct cache_set *c) SET_CACHE_SYNC(&ca->sb, CACHE_SYNC(&c->sb)); bio_reset(bio); - bio->bi_bdev = ca->bdev; + bio_set_dev(bio, ca->bdev); bio->bi_end_io = write_super_endio; bio->bi_private = ca; @@ -508,7 +508,7 @@ static void prio_io(struct cache *ca, uint64_t bucket, int op, closure_init_stack(cl); bio->bi_iter.bi_sector = bucket * ca->sb.bucket_size; - bio->bi_bdev = ca->bdev; + bio_set_dev(bio, ca->bdev); bio->bi_iter.bi_size = bucket_bytes(ca); bio->bi_end_io = prio_endio; @@ -1026,7 +1026,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c) } if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) { - bch_sectors_dirty_init(dc); + bch_sectors_dirty_init(&dc->disk); atomic_set(&dc->has_dirty, 1); atomic_inc(&dc->count); bch_writeback_queue(dc); @@ -1059,6 +1059,8 @@ static void cached_dev_free(struct closure *cl) cancel_delayed_work_sync(&dc->writeback_rate_update); if (!IS_ERR_OR_NULL(dc->writeback_thread)) kthread_stop(dc->writeback_thread); + if (dc->writeback_write_wq) + destroy_workqueue(dc->writeback_write_wq); mutex_lock(&bch_register_lock); @@ -1228,6 +1230,7 @@ static int flash_dev_run(struct cache_set *c, struct uuid_entry *u) goto err; bcache_device_attach(d, c, u - c->uuids); + bch_sectors_dirty_init(d); bch_flash_dev_request_init(d); add_disk(d->disk); @@ -1374,9 +1377,6 @@ static void cache_set_flush(struct closure *cl) struct btree *b; unsigned i; - if (!c) - closure_return(cl); - bch_cache_accounting_destroy(&c->accounting); kobject_put(&c->internal); @@ -1964,6 +1964,8 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, else err = "device busy"; mutex_unlock(&bch_register_lock); + if (!IS_ERR(bdev)) + bdput(bdev); if (attr == &ksysfs_register_quiet) goto out; } diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index f90f13616980..104c57cd666c 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -192,7 +192,7 @@ STORE(__cached_dev) { struct cached_dev *dc = container_of(kobj, struct cached_dev, disk.kobj); - unsigned v = size; + ssize_t v = size; struct cache_set *c; struct kobj_uevent_env *env; @@ -227,7 +227,7 @@ STORE(__cached_dev) bch_cached_dev_run(dc); if (attr == &sysfs_cache_mode) { - ssize_t v = bch_read_string_list(buf, bch_cache_modes + 1); + v = bch_read_string_list(buf, bch_cache_modes + 1); if (v < 0) return v; @@ -615,8 +615,21 @@ STORE(__bch_cache_set) bch_cache_accounting_clear(&c->accounting); } - if (attr == &sysfs_trigger_gc) + if (attr == &sysfs_trigger_gc) { + /* + * Garbage collection thread only works when sectors_to_gc < 0, + * when users write to sysfs entry trigger_gc, most of time + * they want to forcibly triger gargage collection. Here -1 is + * set to c->sectors_to_gc, to make gc_should_run() give a + * chance to permit gc thread to run. "give a chance" means + * before going into gc_should_run(), there is still chance + * that c->sectors_to_gc being set to other positive value. So + * writing sysfs entry trigger_gc won't always make sure gc + * thread takes effect. + */ + atomic_set(&c->sectors_to_gc, -1); wake_up_gc(c); + } if (attr == &sysfs_prune_cache) { struct shrink_control sc; diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c index 8c3a938f4bf0..176d3c2ef5f5 100644 --- a/drivers/md/bcache/util.c +++ b/drivers/md/bcache/util.c @@ -74,24 +74,44 @@ STRTO_H(strtouint, unsigned int) STRTO_H(strtoll, long long) STRTO_H(strtoull, unsigned long long) +/** + * bch_hprint() - formats @v to human readable string for sysfs. + * + * @v - signed 64 bit integer + * @buf - the (at least 8 byte) buffer to format the result into. + * + * Returns the number of bytes used by format. + */ ssize_t bch_hprint(char *buf, int64_t v) { static const char units[] = "?kMGTPEZY"; - char dec[4] = ""; - int u, t = 0; - - for (u = 0; v >= 1024 || v <= -1024; u++) { - t = v & ~(~0 << 10); - v >>= 10; - } - - if (!u) - return sprintf(buf, "%llu", v); - - if (v < 100 && v > -100) - snprintf(dec, sizeof(dec), ".%i", t / 100); - - return sprintf(buf, "%lli%s%c", v, dec, units[u]); + int u = 0, t; + + uint64_t q; + + if (v < 0) + q = -v; + else + q = v; + + /* For as long as the number is more than 3 digits, but at least + * once, shift right / divide by 1024. Keep the remainder for + * a digit after the decimal point. + */ + do { + u++; + + t = q & ~(~0 << 10); + q >>= 10; + } while (q >= 1000); + + if (v < 0) + /* '-', up to 3 digits, '.', 1 digit, 1 character, null; + * yields 8 bytes. + */ + return sprintf(buf, "-%llu.%i%c", q, t * 10 / 1024, units[u]); + else + return sprintf(buf, "%llu.%i%c", q, t * 10 / 1024, units[u]); } ssize_t bch_snprint_string_list(char *buf, size_t size, const char * const list[], diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 42c66e76f05e..e663ca082183 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -21,7 +21,8 @@ static void __update_writeback_rate(struct cached_dev *dc) { struct cache_set *c = dc->disk.c; - uint64_t cache_sectors = c->nbuckets * c->sb.bucket_size; + uint64_t cache_sectors = c->nbuckets * c->sb.bucket_size - + bcache_flash_devs_sectors_dirty(c); uint64_t cache_dirty_target = div_u64(cache_sectors * dc->writeback_percent, 100); @@ -181,12 +182,12 @@ static void write_dirty(struct closure *cl) dirty_init(w); bio_set_op_attrs(&io->bio, REQ_OP_WRITE, 0); io->bio.bi_iter.bi_sector = KEY_START(&w->key); - io->bio.bi_bdev = io->dc->bdev; + bio_set_dev(&io->bio, io->dc->bdev); io->bio.bi_end_io = dirty_endio; closure_bio_submit(&io->bio, cl); - continue_at(cl, write_dirty_finish, system_wq); + continue_at(cl, write_dirty_finish, io->dc->writeback_write_wq); } static void read_dirty_endio(struct bio *bio) @@ -206,7 +207,7 @@ static void read_dirty_submit(struct closure *cl) closure_bio_submit(&io->bio, cl); - continue_at(cl, write_dirty, system_wq); + continue_at(cl, write_dirty, io->dc->writeback_write_wq); } static void read_dirty(struct cached_dev *dc) @@ -250,8 +251,7 @@ static void read_dirty(struct cached_dev *dc) dirty_init(w); bio_set_op_attrs(&io->bio, REQ_OP_READ, 0); io->bio.bi_iter.bi_sector = PTR_OFFSET(&w->key, 0); - io->bio.bi_bdev = PTR_CACHE(dc->disk.c, - &w->key, 0)->bdev; + bio_set_dev(&io->bio, PTR_CACHE(dc->disk.c, &w->key, 0)->bdev); io->bio.bi_end_io = read_dirty_endio; if (bio_alloc_pages(&io->bio, GFP_KERNEL)) @@ -482,17 +482,17 @@ static int sectors_dirty_init_fn(struct btree_op *_op, struct btree *b, return MAP_CONTINUE; } -void bch_sectors_dirty_init(struct cached_dev *dc) +void bch_sectors_dirty_init(struct bcache_device *d) { struct sectors_dirty_init op; bch_btree_op_init(&op.op, -1); - op.inode = dc->disk.id; + op.inode = d->id; - bch_btree_map_keys(&op.op, dc->disk.c, &KEY(op.inode, 0, 0), + bch_btree_map_keys(&op.op, d->c, &KEY(op.inode, 0, 0), sectors_dirty_init_fn, 0); - dc->disk.sectors_dirty_last = bcache_dev_sectors_dirty(&dc->disk); + d->sectors_dirty_last = bcache_dev_sectors_dirty(d); } void bch_cached_dev_writeback_init(struct cached_dev *dc) @@ -516,6 +516,11 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc) int bch_cached_dev_writeback_start(struct cached_dev *dc) { + dc->writeback_write_wq = alloc_workqueue("bcache_writeback_wq", + WQ_MEM_RECLAIM, 0); + if (!dc->writeback_write_wq) + return -ENOMEM; + dc->writeback_thread = kthread_create(bch_writeback_thread, dc, "bcache_writeback"); if (IS_ERR(dc->writeback_thread)) diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h index 629bd1a502fd..e35421d20d2e 100644 --- a/drivers/md/bcache/writeback.h +++ b/drivers/md/bcache/writeback.h @@ -14,6 +14,25 @@ static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d) return ret; } +static inline uint64_t bcache_flash_devs_sectors_dirty(struct cache_set *c) +{ + uint64_t i, ret = 0; + + mutex_lock(&bch_register_lock); + + for (i = 0; i < c->nr_uuids; i++) { + struct bcache_device *d = c->devices[i]; + + if (!d || !UUID_FLASH_ONLY(&c->uuids[i])) + continue; + ret += bcache_dev_sectors_dirty(d); + } + + mutex_unlock(&bch_register_lock); + + return ret; +} + static inline unsigned offset_to_stripe(struct bcache_device *d, uint64_t offset) { @@ -84,7 +103,7 @@ static inline void bch_writeback_add(struct cached_dev *dc) void bcache_dev_sectors_dirty_add(struct cache_set *, unsigned, uint64_t, int); -void bch_sectors_dirty_init(struct cached_dev *dc); +void bch_sectors_dirty_init(struct bcache_device *); void bch_cached_dev_writeback_init(struct cached_dev *); int bch_cached_dev_writeback_start(struct cached_dev *); diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 40f3cd7eab0f..d2121637b4ab 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -625,7 +625,7 @@ re_read: err = read_sb_page(bitmap->mddev, offset, sb_page, - 0, sizeof(bitmap_super_t)); + 0, PAGE_SIZE); } if (err) return err; @@ -2058,6 +2058,11 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks, long pages; struct bitmap_page *new_bp; + if (bitmap->storage.file && !init) { + pr_info("md: cannot resize file-based bitmap\n"); + return -EINVAL; + } + if (chunksize == 0) { /* If there is enough space, leave the chunk size unchanged, * else increase by factor of two until there is enough space. @@ -2118,7 +2123,7 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks, if (store.sb_page && bitmap->storage.sb_page) memcpy(page_address(store.sb_page), page_address(bitmap->storage.sb_page), - sizeof(bitmap_super_t)); + PAGE_SIZE); bitmap_file_unmap(&bitmap->storage); bitmap->storage = store; diff --git a/drivers/md/dm-bio-record.h b/drivers/md/dm-bio-record.h index dd3646111561..c82578af56a5 100644 --- a/drivers/md/dm-bio-record.h +++ b/drivers/md/dm-bio-record.h @@ -18,21 +18,24 @@ */ struct dm_bio_details { - struct block_device *bi_bdev; + struct gendisk *bi_disk; + u8 bi_partno; unsigned long bi_flags; struct bvec_iter bi_iter; }; static inline void dm_bio_record(struct dm_bio_details *bd, struct bio *bio) { - bd->bi_bdev = bio->bi_bdev; + bd->bi_disk = bio->bi_disk; + bd->bi_partno = bio->bi_partno; bd->bi_flags = bio->bi_flags; bd->bi_iter = bio->bi_iter; } static inline void dm_bio_restore(struct dm_bio_details *bd, struct bio *bio) { - bio->bi_bdev = bd->bi_bdev; + bio->bi_disk = bd->bi_disk; + bio->bi_partno = bd->bi_partno; bio->bi_flags = bd->bi_flags; bio->bi_iter = bd->bi_iter; } diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 44f4a8ac95bd..d216a8f7bc22 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -64,6 +64,12 @@ #define DM_BUFIO_BLOCK_SIZE_GFP_LIMIT (PAGE_SIZE << (MAX_ORDER - 1)) /* + * Align buffer writes to this boundary. + * Tests show that SSDs have the highest IOPS when using 4k writes. + */ +#define DM_BUFIO_WRITE_ALIGN 4096 + +/* * dm_buffer->list_mode */ #define LIST_CLEAN 0 @@ -149,6 +155,10 @@ struct dm_buffer { blk_status_t write_error; unsigned long state; unsigned long last_accessed; + unsigned dirty_start; + unsigned dirty_end; + unsigned write_start; + unsigned write_end; struct dm_bufio_client *c; struct list_head write_list; struct bio bio; @@ -560,7 +570,7 @@ static void dmio_complete(unsigned long error, void *context) } static void use_dmio(struct dm_buffer *b, int rw, sector_t sector, - unsigned n_sectors, bio_end_io_t *end_io) + unsigned n_sectors, unsigned offset, bio_end_io_t *end_io) { int r; struct dm_io_request io_req = { @@ -578,10 +588,10 @@ static void use_dmio(struct dm_buffer *b, int rw, sector_t sector, if (b->data_mode != DATA_MODE_VMALLOC) { io_req.mem.type = DM_IO_KMEM; - io_req.mem.ptr.addr = b->data; + io_req.mem.ptr.addr = (char *)b->data + offset; } else { io_req.mem.type = DM_IO_VMA; - io_req.mem.ptr.vma = b->data; + io_req.mem.ptr.vma = (char *)b->data + offset; } b->bio.bi_end_io = end_io; @@ -609,14 +619,14 @@ static void inline_endio(struct bio *bio) } static void use_inline_bio(struct dm_buffer *b, int rw, sector_t sector, - unsigned n_sectors, bio_end_io_t *end_io) + unsigned n_sectors, unsigned offset, bio_end_io_t *end_io) { char *ptr; - int len; + unsigned len; bio_init(&b->bio, b->bio_vec, DM_BUFIO_INLINE_VECS); b->bio.bi_iter.bi_sector = sector; - b->bio.bi_bdev = b->c->bdev; + bio_set_dev(&b->bio, b->c->bdev); b->bio.bi_end_io = inline_endio; /* * Use of .bi_private isn't a problem here because @@ -625,29 +635,20 @@ static void use_inline_bio(struct dm_buffer *b, int rw, sector_t sector, b->bio.bi_private = end_io; bio_set_op_attrs(&b->bio, rw, 0); - /* - * We assume that if len >= PAGE_SIZE ptr is page-aligned. - * If len < PAGE_SIZE the buffer doesn't cross page boundary. - */ - ptr = b->data; + ptr = (char *)b->data + offset; len = n_sectors << SECTOR_SHIFT; - if (len >= PAGE_SIZE) - BUG_ON((unsigned long)ptr & (PAGE_SIZE - 1)); - else - BUG_ON((unsigned long)ptr & (len - 1)); - do { - if (!bio_add_page(&b->bio, virt_to_page(ptr), - len < PAGE_SIZE ? len : PAGE_SIZE, + unsigned this_step = min((unsigned)(PAGE_SIZE - offset_in_page(ptr)), len); + if (!bio_add_page(&b->bio, virt_to_page(ptr), this_step, offset_in_page(ptr))) { BUG_ON(b->c->block_size <= PAGE_SIZE); - use_dmio(b, rw, sector, n_sectors, end_io); + use_dmio(b, rw, sector, n_sectors, offset, end_io); return; } - len -= PAGE_SIZE; - ptr += PAGE_SIZE; + len -= this_step; + ptr += this_step; } while (len > 0); submit_bio(&b->bio); @@ -657,18 +658,33 @@ static void submit_io(struct dm_buffer *b, int rw, bio_end_io_t *end_io) { unsigned n_sectors; sector_t sector; - - if (rw == WRITE && b->c->write_callback) - b->c->write_callback(b); + unsigned offset, end; sector = (b->block << b->c->sectors_per_block_bits) + b->c->start; - n_sectors = 1 << b->c->sectors_per_block_bits; + + if (rw != WRITE) { + n_sectors = 1 << b->c->sectors_per_block_bits; + offset = 0; + } else { + if (b->c->write_callback) + b->c->write_callback(b); + offset = b->write_start; + end = b->write_end; + offset &= -DM_BUFIO_WRITE_ALIGN; + end += DM_BUFIO_WRITE_ALIGN - 1; + end &= -DM_BUFIO_WRITE_ALIGN; + if (unlikely(end > b->c->block_size)) + end = b->c->block_size; + + sector += offset >> SECTOR_SHIFT; + n_sectors = (end - offset) >> SECTOR_SHIFT; + } if (n_sectors <= ((DM_BUFIO_INLINE_VECS * PAGE_SIZE) >> SECTOR_SHIFT) && b->data_mode != DATA_MODE_VMALLOC) - use_inline_bio(b, rw, sector, n_sectors, end_io); + use_inline_bio(b, rw, sector, n_sectors, offset, end_io); else - use_dmio(b, rw, sector, n_sectors, end_io); + use_dmio(b, rw, sector, n_sectors, offset, end_io); } /*---------------------------------------------------------------- @@ -720,6 +736,9 @@ static void __write_dirty_buffer(struct dm_buffer *b, clear_bit(B_DIRTY, &b->state); wait_on_bit_lock_io(&b->state, B_WRITING, TASK_UNINTERRUPTIBLE); + b->write_start = b->dirty_start; + b->write_end = b->dirty_end; + if (!write_list) submit_io(b, WRITE, write_endio); else @@ -1221,19 +1240,37 @@ void dm_bufio_release(struct dm_buffer *b) } EXPORT_SYMBOL_GPL(dm_bufio_release); -void dm_bufio_mark_buffer_dirty(struct dm_buffer *b) +void dm_bufio_mark_partial_buffer_dirty(struct dm_buffer *b, + unsigned start, unsigned end) { struct dm_bufio_client *c = b->c; + BUG_ON(start >= end); + BUG_ON(end > b->c->block_size); + dm_bufio_lock(c); BUG_ON(test_bit(B_READING, &b->state)); - if (!test_and_set_bit(B_DIRTY, &b->state)) + if (!test_and_set_bit(B_DIRTY, &b->state)) { + b->dirty_start = start; + b->dirty_end = end; __relink_lru(b, LIST_DIRTY); + } else { + if (start < b->dirty_start) + b->dirty_start = start; + if (end > b->dirty_end) + b->dirty_end = end; + } dm_bufio_unlock(c); } +EXPORT_SYMBOL_GPL(dm_bufio_mark_partial_buffer_dirty); + +void dm_bufio_mark_buffer_dirty(struct dm_buffer *b) +{ + dm_bufio_mark_partial_buffer_dirty(b, 0, b->c->block_size); +} EXPORT_SYMBOL_GPL(dm_bufio_mark_buffer_dirty); void dm_bufio_write_dirty_buffers_async(struct dm_bufio_client *c) @@ -1398,6 +1435,8 @@ retry: wait_on_bit_io(&b->state, B_WRITING, TASK_UNINTERRUPTIBLE); set_bit(B_DIRTY, &b->state); + b->dirty_start = 0; + b->dirty_end = c->block_size; __unlink_buffer(b); __link_buffer(b, new_block, LIST_DIRTY); } else { diff --git a/drivers/md/dm-bufio.h b/drivers/md/dm-bufio.h index b6d8f53ec15b..be732d3f8611 100644 --- a/drivers/md/dm-bufio.h +++ b/drivers/md/dm-bufio.h @@ -94,6 +94,15 @@ void dm_bufio_release(struct dm_buffer *b); void dm_bufio_mark_buffer_dirty(struct dm_buffer *b); /* + * Mark a part of the buffer dirty. + * + * The specified part of the buffer is scheduled to be written. dm-bufio may + * write the specified part of the buffer or it may write a larger superset. + */ +void dm_bufio_mark_partial_buffer_dirty(struct dm_buffer *b, + unsigned start, unsigned end); + +/* * Initiate writing of dirty buffers, without waiting for completion. */ void dm_bufio_write_dirty_buffers_async(struct dm_bufio_client *c); diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index c5ea03fc7ee1..8785134c9f1f 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -833,7 +833,7 @@ static bool is_discarded_oblock(struct cache *cache, dm_oblock_t b) *--------------------------------------------------------------*/ static void remap_to_origin(struct cache *cache, struct bio *bio) { - bio->bi_bdev = cache->origin_dev->bdev; + bio_set_dev(bio, cache->origin_dev->bdev); } static void remap_to_cache(struct cache *cache, struct bio *bio, @@ -842,7 +842,7 @@ static void remap_to_cache(struct cache *cache, struct bio *bio, sector_t bi_sector = bio->bi_iter.bi_sector; sector_t block = from_cblock(cblock); - bio->bi_bdev = cache->cache_dev->bdev; + bio_set_dev(bio, cache->cache_dev->bdev); if (!block_size_is_power_of_two(cache)) bio->bi_iter.bi_sector = (block * cache->sectors_per_block) + @@ -2306,7 +2306,7 @@ static void init_features(struct cache_features *cf) static int parse_features(struct cache_args *ca, struct dm_arg_set *as, char **error) { - static struct dm_arg _args[] = { + static const struct dm_arg _args[] = { {0, 2, "Invalid number of cache feature arguments"}, }; @@ -2348,7 +2348,7 @@ static int parse_features(struct cache_args *ca, struct dm_arg_set *as, static int parse_policy(struct cache_args *ca, struct dm_arg_set *as, char **error) { - static struct dm_arg _args[] = { + static const struct dm_arg _args[] = { {0, 1024, "Invalid number of policy arguments"}, }; diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index cdf6b1e12460..a55ffd4f5933 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -758,9 +758,8 @@ static int crypt_iv_tcw_whitening(struct crypt_config *cc, int i, r; /* xor whitening with sector number */ - memcpy(buf, tcw->whitening, TCW_WHITENING_SIZE); - crypto_xor(buf, (u8 *)§or, 8); - crypto_xor(&buf[8], (u8 *)§or, 8); + crypto_xor_cpy(buf, tcw->whitening, (u8 *)§or, 8); + crypto_xor_cpy(&buf[8], tcw->whitening + 8, (u8 *)§or, 8); /* calculate crc32 for every 32bit part and xor it */ desc->tfm = tcw->crc32_tfm; @@ -805,10 +804,10 @@ static int crypt_iv_tcw_gen(struct crypt_config *cc, u8 *iv, } /* Calculate IV */ - memcpy(iv, tcw->iv_seed, cc->iv_size); - crypto_xor(iv, (u8 *)§or, 8); + crypto_xor_cpy(iv, tcw->iv_seed, (u8 *)§or, 8); if (cc->iv_size > 8) - crypto_xor(&iv[8], (u8 *)§or, cc->iv_size - 8); + crypto_xor_cpy(&iv[8], tcw->iv_seed + 8, (u8 *)§or, + cc->iv_size - 8); return r; } @@ -933,9 +932,6 @@ static int dm_crypt_integrity_io_alloc(struct dm_crypt_io *io, struct bio *bio) bip->bip_iter.bi_size = tag_len; bip->bip_iter.bi_sector = io->cc->start + io->sector; - /* We own the metadata, do not let bio_free to release it */ - bip->bip_flags &= ~BIP_BLOCK_INTEGRITY; - ret = bio_integrity_add_page(bio, virt_to_page(io->integrity_metadata), tag_len, offset_in_page(io->integrity_metadata)); if (unlikely(ret != tag_len)) @@ -1547,7 +1543,7 @@ static void clone_init(struct dm_crypt_io *io, struct bio *clone) clone->bi_private = io; clone->bi_end_io = crypt_endio; - clone->bi_bdev = cc->dev->bdev; + bio_set_dev(clone, cc->dev->bdev); clone->bi_opf = io->base_bio->bi_opf; } @@ -2533,7 +2529,7 @@ static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **ar { struct crypt_config *cc = ti->private; struct dm_arg_set as; - static struct dm_arg _args[] = { + static const struct dm_arg _args[] = { {0, 6, "Invalid number of feature args"}, }; unsigned int opt_params, val; @@ -2796,7 +2792,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio) */ if (unlikely(bio->bi_opf & REQ_PREFLUSH || bio_op(bio) == REQ_OP_DISCARD)) { - bio->bi_bdev = cc->dev->bdev; + bio_set_dev(bio, cc->dev->bdev); if (bio_sectors(bio)) bio->bi_iter.bi_sector = cc->start + dm_target_offset(ti, bio->bi_iter.bi_sector); diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index ae3158795d26..2209a9700acd 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c @@ -282,7 +282,7 @@ static int delay_map(struct dm_target *ti, struct bio *bio) struct delay_c *dc = ti->private; if ((bio_data_dir(bio) == WRITE) && (dc->dev_write)) { - bio->bi_bdev = dc->dev_write->bdev; + bio_set_dev(bio, dc->dev_write->bdev); if (bio_sectors(bio)) bio->bi_iter.bi_sector = dc->start_write + dm_target_offset(ti, bio->bi_iter.bi_sector); @@ -290,7 +290,7 @@ static int delay_map(struct dm_target *ti, struct bio *bio) return delay_bio(dc, dc->write_delay, bio); } - bio->bi_bdev = dc->dev_read->bdev; + bio_set_dev(bio, dc->dev_read->bdev); bio->bi_iter.bi_sector = dc->start_read + dm_target_offset(ti, bio->bi_iter.bi_sector); diff --git a/drivers/md/dm-era-target.c b/drivers/md/dm-era-target.c index e7ba89f98d8d..ba84b8d62cd0 100644 --- a/drivers/md/dm-era-target.c +++ b/drivers/md/dm-era-target.c @@ -1192,7 +1192,7 @@ static dm_block_t get_block(struct era *era, struct bio *bio) static void remap_to_origin(struct era *era, struct bio *bio) { - bio->bi_bdev = era->origin_dev->bdev; + bio_set_dev(bio, era->origin_dev->bdev); } /*---------------------------------------------------------------- diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index e2c7234931bc..b82cb1ab1eaa 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -51,7 +51,7 @@ static int parse_features(struct dm_arg_set *as, struct flakey_c *fc, unsigned argc; const char *arg_name; - static struct dm_arg _args[] = { + static const struct dm_arg _args[] = { {0, 6, "Invalid number of feature args"}, {1, UINT_MAX, "Invalid corrupt bio byte"}, {0, 255, "Invalid corrupt value to write into bio byte (0-255)"}, @@ -178,7 +178,7 @@ static int parse_features(struct dm_arg_set *as, struct flakey_c *fc, */ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv) { - static struct dm_arg _args[] = { + static const struct dm_arg _args[] = { {0, UINT_MAX, "Invalid up interval"}, {0, UINT_MAX, "Invalid down interval"}, }; @@ -274,7 +274,7 @@ static void flakey_map_bio(struct dm_target *ti, struct bio *bio) { struct flakey_c *fc = ti->private; - bio->bi_bdev = fc->dev->bdev; + bio_set_dev(bio, fc->dev->bdev); if (bio_sectors(bio) || bio_op(bio) == REQ_OP_ZONE_RESET) bio->bi_iter.bi_sector = flakey_map_sector(ti, bio->bi_iter.bi_sector); diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 3acce09bba35..096fe9b66c50 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -225,6 +225,8 @@ struct dm_integrity_c { struct alg_spec internal_hash_alg; struct alg_spec journal_crypt_alg; struct alg_spec journal_mac_alg; + + atomic64_t number_of_mismatches; }; struct dm_integrity_range { @@ -250,7 +252,8 @@ struct dm_integrity_io { struct completion *completion; - struct block_device *orig_bi_bdev; + struct gendisk *orig_bi_disk; + u8 orig_bi_partno; bio_end_io_t *orig_bi_end_io; struct bio_integrity_payload *orig_bi_integrity; struct bvec_iter orig_bi_iter; @@ -297,7 +300,7 @@ static void __DEBUG_bytes(__u8 *bytes, size_t len, const char *msg, ...) /* * DM Integrity profile, protection is performed layer above (dm-crypt) */ -static struct blk_integrity_profile dm_integrity_profile = { +static const struct blk_integrity_profile dm_integrity_profile = { .name = "DM-DIF-EXT-TAG", .generate_fn = NULL, .verify_fn = NULL, @@ -309,6 +312,8 @@ static void dm_integrity_dtr(struct dm_target *ti); static void dm_integrity_io_error(struct dm_integrity_c *ic, const char *msg, int err) { + if (err == -EILSEQ) + atomic64_inc(&ic->number_of_mismatches); if (!cmpxchg(&ic->failed, 0, err)) DMERR("Error on %s: %d", msg, err); } @@ -769,13 +774,13 @@ static void write_journal(struct dm_integrity_c *ic, unsigned commit_start, unsi unsigned i; io_comp.ic = ic; - io_comp.comp = COMPLETION_INITIALIZER_ONSTACK(io_comp.comp); + init_completion(&io_comp.comp); if (commit_start + commit_sections <= ic->journal_sections) { io_comp.in_flight = (atomic_t)ATOMIC_INIT(1); if (ic->journal_io) { crypt_comp_1.ic = ic; - crypt_comp_1.comp = COMPLETION_INITIALIZER_ONSTACK(crypt_comp_1.comp); + init_completion(&crypt_comp_1.comp); crypt_comp_1.in_flight = (atomic_t)ATOMIC_INIT(0); encrypt_journal(ic, true, commit_start, commit_sections, &crypt_comp_1); wait_for_completion_io(&crypt_comp_1.comp); @@ -791,18 +796,18 @@ static void write_journal(struct dm_integrity_c *ic, unsigned commit_start, unsi to_end = ic->journal_sections - commit_start; if (ic->journal_io) { crypt_comp_1.ic = ic; - crypt_comp_1.comp = COMPLETION_INITIALIZER_ONSTACK(crypt_comp_1.comp); + init_completion(&crypt_comp_1.comp); crypt_comp_1.in_flight = (atomic_t)ATOMIC_INIT(0); encrypt_journal(ic, true, commit_start, to_end, &crypt_comp_1); if (try_wait_for_completion(&crypt_comp_1.comp)) { rw_journal(ic, REQ_OP_WRITE, REQ_FUA, commit_start, to_end, &io_comp); - crypt_comp_1.comp = COMPLETION_INITIALIZER_ONSTACK(crypt_comp_1.comp); + reinit_completion(&crypt_comp_1.comp); crypt_comp_1.in_flight = (atomic_t)ATOMIC_INIT(0); encrypt_journal(ic, true, 0, commit_sections - to_end, &crypt_comp_1); wait_for_completion_io(&crypt_comp_1.comp); } else { crypt_comp_2.ic = ic; - crypt_comp_2.comp = COMPLETION_INITIALIZER_ONSTACK(crypt_comp_2.comp); + init_completion(&crypt_comp_2.comp); crypt_comp_2.in_flight = (atomic_t)ATOMIC_INIT(0); encrypt_journal(ic, true, 0, commit_sections - to_end, &crypt_comp_2); wait_for_completion_io(&crypt_comp_1.comp); @@ -1040,7 +1045,7 @@ static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, se memcpy(tag, dp, to_copy); } else if (op == TAG_WRITE) { memcpy(dp, tag, to_copy); - dm_bufio_mark_buffer_dirty(b); + dm_bufio_mark_partial_buffer_dirty(b, *metadata_offset, *metadata_offset + to_copy); } else { /* e.g.: op == TAG_CMP */ if (unlikely(memcmp(dp, tag, to_copy))) { @@ -1164,7 +1169,8 @@ static void integrity_end_io(struct bio *bio) struct dm_integrity_io *dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io)); bio->bi_iter = dio->orig_bi_iter; - bio->bi_bdev = dio->orig_bi_bdev; + bio->bi_disk = dio->orig_bi_disk; + bio->bi_partno = dio->orig_bi_partno; if (dio->orig_bi_integrity) { bio->bi_integrity = dio->orig_bi_integrity; bio->bi_opf |= REQ_INTEGRITY; @@ -1273,6 +1279,7 @@ again: DMERR("Checksum failed at sector 0x%llx", (unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size))); r = -EILSEQ; + atomic64_inc(&ic->number_of_mismatches); } if (likely(checksums != checksums_onstack)) kfree(checksums); @@ -1674,15 +1681,16 @@ sleep: dio->in_flight = (atomic_t)ATOMIC_INIT(2); if (need_sync_io) { - read_comp = COMPLETION_INITIALIZER_ONSTACK(read_comp); + init_completion(&read_comp); dio->completion = &read_comp; } else dio->completion = NULL; dio->orig_bi_iter = bio->bi_iter; - dio->orig_bi_bdev = bio->bi_bdev; - bio->bi_bdev = ic->dev->bdev; + dio->orig_bi_disk = bio->bi_disk; + dio->orig_bi_partno = bio->bi_partno; + bio_set_dev(bio, ic->dev->bdev); dio->orig_bi_integrity = bio_integrity(bio); bio->bi_integrity = NULL; @@ -1697,7 +1705,11 @@ sleep: if (need_sync_io) { wait_for_completion_io(&read_comp); - integrity_metadata(&dio->work); + if (likely(!bio->bi_status)) + integrity_metadata(&dio->work); + else + dec_in_flight(dio); + } else { INIT_WORK(&dio->work, integrity_metadata); queue_work(ic->metadata_wq, &dio->work); @@ -1831,7 +1843,7 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start, comp.ic = ic; comp.in_flight = (atomic_t)ATOMIC_INIT(1); - comp.comp = COMPLETION_INITIALIZER_ONSTACK(comp.comp); + init_completion(&comp.comp); i = write_start; for (n = 0; n < write_sections; n++, i++, wraparound_section(ic, &i)) { @@ -2058,7 +2070,7 @@ static void replay_journal(struct dm_integrity_c *ic) if (ic->journal_io) { struct journal_completion crypt_comp; crypt_comp.ic = ic; - crypt_comp.comp = COMPLETION_INITIALIZER_ONSTACK(crypt_comp.comp); + init_completion(&crypt_comp.comp); crypt_comp.in_flight = (atomic_t)ATOMIC_INIT(0); encrypt_journal(ic, false, 0, ic->journal_sections, &crypt_comp); wait_for_completion(&crypt_comp.comp); @@ -2230,7 +2242,7 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type, switch (type) { case STATUSTYPE_INFO: - result[0] = '\0'; + DMEMIT("%llu", (unsigned long long)atomic64_read(&ic->number_of_mismatches)); break; case STATUSTYPE_TABLE: { @@ -2631,7 +2643,7 @@ static int create_journal(struct dm_integrity_c *ic, char **error) memset(iv, 0x00, ivsize); skcipher_request_set_crypt(req, sg, sg, PAGE_SIZE * ic->journal_pages + sizeof ic->commit_ids, iv); - comp.comp = COMPLETION_INITIALIZER_ONSTACK(comp.comp); + init_completion(&comp.comp); comp.in_flight = (atomic_t)ATOMIC_INIT(1); if (do_crypt(true, req, &comp)) wait_for_completion(&comp.comp); @@ -2688,7 +2700,7 @@ static int create_journal(struct dm_integrity_c *ic, char **error) sg_init_one(&sg, crypt_data, crypt_len); skcipher_request_set_crypt(req, &sg, &sg, crypt_len, iv); - comp.comp = COMPLETION_INITIALIZER_ONSTACK(comp.comp); + init_completion(&comp.comp); comp.in_flight = (atomic_t)ATOMIC_INIT(1); if (do_crypt(true, req, &comp)) wait_for_completion(&comp.comp); @@ -2775,7 +2787,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) int r; unsigned extra_args; struct dm_arg_set as; - static struct dm_arg _args[] = { + static const struct dm_arg _args[] = { {0, 9, "Invalid number of feature args"}, }; unsigned journal_sectors, interleave_sectors, buffer_sectors, journal_watermark, sync_msec; @@ -2803,6 +2815,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) bio_list_init(&ic->flush_bio_list); init_waitqueue_head(&ic->copy_to_journal_wait); init_completion(&ic->crypto_backoff); + atomic64_set(&ic->number_of_mismatches, 0); r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &ic->dev); if (r) { @@ -3199,7 +3212,7 @@ static void dm_integrity_dtr(struct dm_target *ti) static struct target_type integrity_target = { .name = "integrity", - .version = {1, 0, 0}, + .version = {1, 1, 0}, .module = THIS_MODULE, .features = DM_TARGET_SINGLETON | DM_TARGET_INTEGRITY, .ctr = dm_integrity_ctr, diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 25039607f3cb..b4357ed4d541 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -347,7 +347,7 @@ static void do_region(int op, int op_flags, unsigned region, bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, io->client->bios); bio->bi_iter.bi_sector = where->sector + (where->count - remaining); - bio->bi_bdev = where->bdev; + bio_set_dev(bio, where->bdev); bio->bi_end_io = endio; bio_set_op_attrs(bio, op, op_flags); store_io_and_region_in_bio(bio, io, region); diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index e06f0ef7d2ec..8756a6850431 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1629,7 +1629,7 @@ static int target_message(struct file *filp, struct dm_ioctl *param, size_t para *---------------------------------------------------------------*/ static ioctl_fn lookup_ioctl(unsigned int cmd, int *ioctl_flags) { - static struct { + static const struct { int cmd; int flags; ioctl_fn fn; diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 41971a090e34..d5f8eff7c11d 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -88,7 +88,7 @@ static void linear_map_bio(struct dm_target *ti, struct bio *bio) { struct linear_c *lc = ti->private; - bio->bi_bdev = lc->dev->bdev; + bio_set_dev(bio, lc->dev->bdev); if (bio_sectors(bio) || bio_op(bio) == REQ_OP_ZONE_RESET) bio->bi_iter.bi_sector = linear_map_sector(ti, bio->bi_iter.bi_sector); @@ -184,20 +184,6 @@ static size_t linear_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff, return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i); } -static void linear_dax_flush(struct dm_target *ti, pgoff_t pgoff, void *addr, - size_t size) -{ - struct linear_c *lc = ti->private; - struct block_device *bdev = lc->dev->bdev; - struct dax_device *dax_dev = lc->dev->dax_dev; - sector_t dev_sector, sector = pgoff * PAGE_SECTORS; - - dev_sector = linear_map_sector(ti, sector); - if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(size, PAGE_SIZE), &pgoff)) - return; - dax_flush(dax_dev, pgoff, addr, size); -} - static struct target_type linear_target = { .name = "linear", .version = {1, 4, 0}, @@ -212,7 +198,6 @@ static struct target_type linear_target = { .iterate_devices = linear_iterate_devices, .direct_access = linear_dax_direct_access, .dax_copy_from_iter = linear_dax_copy_from_iter, - .dax_flush = linear_dax_flush, }; int __init dm_linear_init(void) diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c index a1da0eb58a93..8b80a9ce9ea9 100644 --- a/drivers/md/dm-log-writes.c +++ b/drivers/md/dm-log-writes.c @@ -100,6 +100,7 @@ struct log_writes_c { struct dm_dev *logdev; u64 logged_entries; u32 sectorsize; + u32 sectorshift; atomic_t io_blocks; atomic_t pending_blocks; sector_t next_sector; @@ -128,6 +129,18 @@ struct per_bio_data { struct pending_block *block; }; +static inline sector_t bio_to_dev_sectors(struct log_writes_c *lc, + sector_t sectors) +{ + return sectors >> (lc->sectorshift - SECTOR_SHIFT); +} + +static inline sector_t dev_to_bio_sectors(struct log_writes_c *lc, + sector_t sectors) +{ + return sectors << (lc->sectorshift - SECTOR_SHIFT); +} + static void put_pending_block(struct log_writes_c *lc) { if (atomic_dec_and_test(&lc->pending_blocks)) { @@ -198,7 +211,7 @@ static int write_metadata(struct log_writes_c *lc, void *entry, } bio->bi_iter.bi_size = 0; bio->bi_iter.bi_sector = sector; - bio->bi_bdev = lc->logdev->bdev; + bio_set_dev(bio, lc->logdev->bdev); bio->bi_end_io = log_end_io; bio->bi_private = lc; bio_set_op_attrs(bio, REQ_OP_WRITE, 0); @@ -253,7 +266,7 @@ static int log_one_block(struct log_writes_c *lc, if (!block->vec_cnt) goto out; - sector++; + sector += dev_to_bio_sectors(lc, 1); atomic_inc(&lc->io_blocks); bio = bio_alloc(GFP_KERNEL, min(block->vec_cnt, BIO_MAX_PAGES)); @@ -263,7 +276,7 @@ static int log_one_block(struct log_writes_c *lc, } bio->bi_iter.bi_size = 0; bio->bi_iter.bi_sector = sector; - bio->bi_bdev = lc->logdev->bdev; + bio_set_dev(bio, lc->logdev->bdev); bio->bi_end_io = log_end_io; bio->bi_private = lc; bio_set_op_attrs(bio, REQ_OP_WRITE, 0); @@ -285,7 +298,7 @@ static int log_one_block(struct log_writes_c *lc, } bio->bi_iter.bi_size = 0; bio->bi_iter.bi_sector = sector; - bio->bi_bdev = lc->logdev->bdev; + bio_set_dev(bio, lc->logdev->bdev); bio->bi_end_io = log_end_io; bio->bi_private = lc; bio_set_op_attrs(bio, REQ_OP_WRITE, 0); @@ -354,10 +367,9 @@ static int log_writes_kthread(void *arg) goto next; sector = lc->next_sector; - if (block->flags & LOG_DISCARD_FLAG) - lc->next_sector++; - else - lc->next_sector += block->nr_sectors + 1; + if (!(block->flags & LOG_DISCARD_FLAG)) + lc->next_sector += dev_to_bio_sectors(lc, block->nr_sectors); + lc->next_sector += dev_to_bio_sectors(lc, 1); /* * Apparently the size of the device may not be known @@ -399,7 +411,7 @@ next: if (!try_to_freeze()) { set_current_state(TASK_INTERRUPTIBLE); if (!kthread_should_stop() && - !atomic_read(&lc->pending_blocks)) + list_empty(&lc->logging_blocks)) schedule(); __set_current_state(TASK_RUNNING); } @@ -435,7 +447,6 @@ static int log_writes_ctr(struct dm_target *ti, unsigned int argc, char **argv) INIT_LIST_HEAD(&lc->unflushed_blocks); INIT_LIST_HEAD(&lc->logging_blocks); init_waitqueue_head(&lc->wait); - lc->sectorsize = 1 << SECTOR_SHIFT; atomic_set(&lc->io_blocks, 0); atomic_set(&lc->pending_blocks, 0); @@ -455,6 +466,8 @@ static int log_writes_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad; } + lc->sectorsize = bdev_logical_block_size(lc->dev->bdev); + lc->sectorshift = ilog2(lc->sectorsize); lc->log_kthread = kthread_run(log_writes_kthread, lc, "log-write"); if (IS_ERR(lc->log_kthread)) { ret = PTR_ERR(lc->log_kthread); @@ -464,8 +477,12 @@ static int log_writes_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad; } - /* We put the super at sector 0, start logging at sector 1 */ - lc->next_sector = 1; + /* + * next_sector is in 512b sectors to correspond to what bi_sector expects. + * The super starts at sector 0, and the next_sector is the next logical + * one based on the sectorsize of the device. + */ + lc->next_sector = lc->sectorsize >> SECTOR_SHIFT; lc->logging_enabled = true; lc->end_sector = logdev_last_sector(lc); lc->device_supports_discard = true; @@ -539,7 +556,7 @@ static void normal_map_bio(struct dm_target *ti, struct bio *bio) { struct log_writes_c *lc = ti->private; - bio->bi_bdev = lc->dev->bdev; + bio_set_dev(bio, lc->dev->bdev); } static int log_writes_map(struct dm_target *ti, struct bio *bio) @@ -599,8 +616,8 @@ static int log_writes_map(struct dm_target *ti, struct bio *bio) if (discard_bio) block->flags |= LOG_DISCARD_FLAG; - block->sector = bio->bi_iter.bi_sector; - block->nr_sectors = bio_sectors(bio); + block->sector = bio_to_dev_sectors(lc, bio->bi_iter.bi_sector); + block->nr_sectors = bio_to_dev_sectors(lc, bio_sectors(bio)); /* We don't need the data, just submit */ if (discard_bio) { @@ -767,9 +784,12 @@ static void log_writes_io_hints(struct dm_target *ti, struct queue_limits *limit if (!q || !blk_queue_discard(q)) { lc->device_supports_discard = false; - limits->discard_granularity = 1 << SECTOR_SHIFT; + limits->discard_granularity = lc->sectorsize; limits->max_discard_sectors = (UINT_MAX >> SECTOR_SHIFT); } + limits->logical_block_size = bdev_logical_block_size(lc->dev->bdev); + limits->physical_block_size = bdev_physical_block_size(lc->dev->bdev); + limits->io_min = limits->physical_block_size; } static struct target_type log_writes_target = { diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index d24e4b05f5da..11f273d2f018 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -565,7 +565,7 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_m mpio->nr_bytes = nr_bytes; bio->bi_status = 0; - bio->bi_bdev = pgpath->path.dev->bdev; + bio_set_dev(bio, pgpath->path.dev->bdev); bio->bi_opf |= REQ_FAILFAST_TRANSPORT; if (pgpath->pg->ps.type->start_io) @@ -632,6 +632,10 @@ static void process_queued_bios(struct work_struct *work) case DM_MAPIO_REMAPPED: generic_make_request(bio); break; + case 0: + break; + default: + WARN_ONCE(true, "__multipath_map_bio() returned %d\n", r); } } blk_finish_plug(&plug); @@ -698,7 +702,7 @@ static int parse_path_selector(struct dm_arg_set *as, struct priority_group *pg, struct path_selector_type *pst; unsigned ps_argc; - static struct dm_arg _args[] = { + static const struct dm_arg _args[] = { {0, 1024, "invalid number of path selector args"}, }; @@ -822,7 +826,7 @@ retain: static struct priority_group *parse_priority_group(struct dm_arg_set *as, struct multipath *m) { - static struct dm_arg _args[] = { + static const struct dm_arg _args[] = { {1, 1024, "invalid number of paths"}, {0, 1024, "invalid number of selector args"} }; @@ -898,7 +902,7 @@ static int parse_hw_handler(struct dm_arg_set *as, struct multipath *m) int ret; struct dm_target *ti = m->ti; - static struct dm_arg _args[] = { + static const struct dm_arg _args[] = { {0, 1024, "invalid number of hardware handler args"}, }; @@ -950,7 +954,7 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m) struct dm_target *ti = m->ti; const char *arg_name; - static struct dm_arg _args[] = { + static const struct dm_arg _args[] = { {0, 8, "invalid number of feature args"}, {1, 50, "pg_init_retries must be between 1 and 50"}, {0, 60000, "pg_init_delay_msecs must be between 0 and 60000"}, @@ -1019,7 +1023,7 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m) static int multipath_ctr(struct dm_target *ti, unsigned argc, char **argv) { /* target arguments */ - static struct dm_arg _args[] = { + static const struct dm_arg _args[] = { {0, 1024, "invalid number of priority groups"}, {0, 1024, "invalid initial priority group number"}, }; @@ -1379,6 +1383,7 @@ static void pg_init_done(void *data, int errors) case SCSI_DH_RETRY: /* Wait before retrying. */ delay_retry = 1; + /* fall through */ case SCSI_DH_IMM_RETRY: case SCSI_DH_RES_TEMP_UNAVAIL: if (pg_init_limit_reached(m, pgpath)) diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index a4fbd911d566..c0b82136b2d1 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -145,7 +145,7 @@ static void dispatch_bios(void *context, struct bio_list *bio_list) struct dm_raid1_bio_record { struct mirror *m; - /* if details->bi_bdev == NULL, details were not saved */ + /* if details->bi_disk == NULL, details were not saved */ struct dm_bio_details details; region_t write_region; }; @@ -464,7 +464,7 @@ static sector_t map_sector(struct mirror *m, struct bio *bio) static void map_bio(struct mirror *m, struct bio *bio) { - bio->bi_bdev = m->dev->bdev; + bio_set_dev(bio, m->dev->bdev); bio->bi_iter.bi_sector = map_sector(m, bio); } @@ -1199,7 +1199,7 @@ static int mirror_map(struct dm_target *ti, struct bio *bio) struct dm_raid1_bio_record *bio_record = dm_per_bio_data(bio, sizeof(struct dm_raid1_bio_record)); - bio_record->details.bi_bdev = NULL; + bio_record->details.bi_disk = NULL; if (rw == WRITE) { /* Save region for mirror_end_io() handler */ @@ -1266,7 +1266,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, goto out; if (unlikely(*error)) { - if (!bio_record->details.bi_bdev) { + if (!bio_record->details.bi_disk) { /* * There wasn't enough memory to record necessary * information for a retry or there was no other @@ -1291,7 +1291,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, bd = &bio_record->details; dm_bio_restore(bd, bio); - bio_record->details.bi_bdev = NULL; + bio_record->details.bi_disk = NULL; bio->bi_status = 0; queue_bio(ms, bio, rw); @@ -1301,7 +1301,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, } out: - bio_record->details.bi_bdev = NULL; + bio_record->details.bi_disk = NULL; return DM_ENDIO_DONE; } diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index c6ebc5b1e00e..eadfcfd106ff 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -117,9 +117,9 @@ static void end_clone_bio(struct bio *clone) struct dm_rq_clone_bio_info *info = container_of(clone, struct dm_rq_clone_bio_info, clone); struct dm_rq_target_io *tio = info->tio; - struct bio *bio = info->orig; unsigned int nr_bytes = info->orig->bi_iter.bi_size; blk_status_t error = clone->bi_status; + bool is_last = !clone->bi_next; bio_put(clone); @@ -137,28 +137,23 @@ static void end_clone_bio(struct bio *clone) * when the request is completed. */ tio->error = error; - return; + goto exit; } /* * I/O for the bio successfully completed. * Notice the data completion to the upper layer. */ - - /* - * bios are processed from the head of the list. - * So the completing bio should always be rq->bio. - * If it's not, something wrong is happening. - */ - if (tio->orig->bio != bio) - DMERR("bio completion is going in the middle of the request"); + tio->completed += nr_bytes; /* * Update the original request. * Do not use blk_end_request() here, because it may complete * the original request before the clone, and break the ordering. */ - blk_update_request(tio->orig, BLK_STS_OK, nr_bytes); + if (is_last) + exit: + blk_update_request(tio->orig, BLK_STS_OK, tio->completed); } static struct dm_rq_target_io *tio_from_request(struct request *rq) @@ -237,14 +232,14 @@ static void dm_end_request(struct request *clone, blk_status_t error) /* * Requeue the original request of a clone. */ -static void dm_old_requeue_request(struct request *rq) +static void dm_old_requeue_request(struct request *rq, unsigned long delay_ms) { struct request_queue *q = rq->q; unsigned long flags; spin_lock_irqsave(q->queue_lock, flags); blk_requeue_request(q, rq); - blk_run_queue_async(q); + blk_delay_queue(q, delay_ms); spin_unlock_irqrestore(q->queue_lock, flags); } @@ -270,6 +265,7 @@ static void dm_requeue_original_request(struct dm_rq_target_io *tio, bool delay_ struct mapped_device *md = tio->md; struct request *rq = tio->orig; int rw = rq_data_dir(rq); + unsigned long delay_ms = delay_requeue ? 100 : 0; rq_end_stats(md, rq); if (tio->clone) { @@ -278,9 +274,9 @@ static void dm_requeue_original_request(struct dm_rq_target_io *tio, bool delay_ } if (!rq->q->mq_ops) - dm_old_requeue_request(rq); + dm_old_requeue_request(rq, delay_ms); else - dm_mq_delay_requeue_request(rq, delay_requeue ? 100/*ms*/ : 0); + dm_mq_delay_requeue_request(rq, delay_ms); rq_completed(md, rw, false); } @@ -455,6 +451,7 @@ static void init_tio(struct dm_rq_target_io *tio, struct request *rq, tio->clone = NULL; tio->orig = rq; tio->error = 0; + tio->completed = 0; /* * Avoid initializing info for blk-mq; it passes * target-specific data through info.ptr diff --git a/drivers/md/dm-rq.h b/drivers/md/dm-rq.h index 9813922e4fe5..f43c45460aac 100644 --- a/drivers/md/dm-rq.h +++ b/drivers/md/dm-rq.h @@ -29,6 +29,7 @@ struct dm_rq_target_io { struct dm_stats_aux stats_aux; unsigned long duration_jiffies; unsigned n_sectors; + unsigned completed; }; /* diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 1ba41048b438..1113b42e1eda 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -1663,7 +1663,7 @@ __find_pending_exception(struct dm_snapshot *s, static void remap_exception(struct dm_snapshot *s, struct dm_exception *e, struct bio *bio, chunk_t chunk) { - bio->bi_bdev = s->cow->bdev; + bio_set_dev(bio, s->cow->bdev); bio->bi_iter.bi_sector = chunk_to_sector(s->store, dm_chunk_number(e->new_chunk) + (chunk - e->old_chunk)) + @@ -1681,7 +1681,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio) init_tracked_chunk(bio); if (bio->bi_opf & REQ_PREFLUSH) { - bio->bi_bdev = s->cow->bdev; + bio_set_dev(bio, s->cow->bdev); return DM_MAPIO_REMAPPED; } @@ -1769,7 +1769,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio) goto out; } } else { - bio->bi_bdev = s->origin->bdev; + bio_set_dev(bio, s->origin->bdev); track_chunk(s, bio, chunk); } @@ -1802,9 +1802,9 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio) if (bio->bi_opf & REQ_PREFLUSH) { if (!dm_bio_get_target_bio_nr(bio)) - bio->bi_bdev = s->origin->bdev; + bio_set_dev(bio, s->origin->bdev); else - bio->bi_bdev = s->cow->bdev; + bio_set_dev(bio, s->cow->bdev); return DM_MAPIO_REMAPPED; } @@ -1824,7 +1824,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio) chunk >= s->first_merging_chunk && chunk < (s->first_merging_chunk + s->num_merging_chunks)) { - bio->bi_bdev = s->origin->bdev; + bio_set_dev(bio, s->origin->bdev); bio_list_add(&s->bios_queued_during_merge, bio); r = DM_MAPIO_SUBMITTED; goto out_unlock; @@ -1838,7 +1838,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio) } redirect_to_origin: - bio->bi_bdev = s->origin->bdev; + bio_set_dev(bio, s->origin->bdev); if (bio_data_dir(bio) == WRITE) { up_write(&s->lock); @@ -2285,7 +2285,7 @@ static int origin_map(struct dm_target *ti, struct bio *bio) struct dm_origin *o = ti->private; unsigned available_sectors; - bio->bi_bdev = o->dev->bdev; + bio_set_dev(bio, o->dev->bdev); if (unlikely(bio->bi_opf & REQ_PREFLUSH)) return DM_MAPIO_REMAPPED; diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index a0375530b07f..b5e892149c54 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -270,7 +270,7 @@ static int stripe_map_range(struct stripe_c *sc, struct bio *bio, stripe_map_range_sector(sc, bio_end_sector(bio), target_stripe, &end); if (begin < end) { - bio->bi_bdev = sc->stripe[target_stripe].dev->bdev; + bio_set_dev(bio, sc->stripe[target_stripe].dev->bdev); bio->bi_iter.bi_sector = begin + sc->stripe[target_stripe].physical_start; bio->bi_iter.bi_size = to_bytes(end - begin); @@ -291,7 +291,7 @@ static int stripe_map(struct dm_target *ti, struct bio *bio) if (bio->bi_opf & REQ_PREFLUSH) { target_bio_nr = dm_bio_get_target_bio_nr(bio); BUG_ON(target_bio_nr >= sc->stripes); - bio->bi_bdev = sc->stripe[target_bio_nr].dev->bdev; + bio_set_dev(bio, sc->stripe[target_bio_nr].dev->bdev); return DM_MAPIO_REMAPPED; } if (unlikely(bio_op(bio) == REQ_OP_DISCARD) || @@ -306,7 +306,7 @@ static int stripe_map(struct dm_target *ti, struct bio *bio) &stripe, &bio->bi_iter.bi_sector); bio->bi_iter.bi_sector += sc->stripe[stripe].physical_start; - bio->bi_bdev = sc->stripe[stripe].dev->bdev; + bio_set_dev(bio, sc->stripe[stripe].dev->bdev); return DM_MAPIO_REMAPPED; } @@ -351,25 +351,6 @@ static size_t stripe_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff, return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i); } -static void stripe_dax_flush(struct dm_target *ti, pgoff_t pgoff, void *addr, - size_t size) -{ - sector_t dev_sector, sector = pgoff * PAGE_SECTORS; - struct stripe_c *sc = ti->private; - struct dax_device *dax_dev; - struct block_device *bdev; - uint32_t stripe; - - stripe_map_sector(sc, sector, &stripe, &dev_sector); - dev_sector += sc->stripe[stripe].physical_start; - dax_dev = sc->stripe[stripe].dev->dax_dev; - bdev = sc->stripe[stripe].dev->bdev; - - if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(size, PAGE_SIZE), &pgoff)) - return; - dax_flush(dax_dev, pgoff, addr, size); -} - /* * Stripe status: * @@ -430,9 +411,7 @@ static int stripe_end_io(struct dm_target *ti, struct bio *bio, return DM_ENDIO_DONE; memset(major_minor, 0, sizeof(major_minor)); - sprintf(major_minor, "%d:%d", - MAJOR(disk_devt(bio->bi_bdev->bd_disk)), - MINOR(disk_devt(bio->bi_bdev->bd_disk))); + sprintf(major_minor, "%d:%d", MAJOR(bio_dev(bio)), MINOR(bio_dev(bio))); /* * Test to see which stripe drive triggered the event @@ -491,7 +470,6 @@ static struct target_type stripe_target = { .io_hints = stripe_io_hints, .direct_access = stripe_dax_direct_access, .dax_copy_from_iter = stripe_dax_copy_from_iter, - .dax_flush = stripe_dax_flush, }; int __init dm_stripe_init(void) diff --git a/drivers/md/dm-switch.c b/drivers/md/dm-switch.c index 871c18fe000d..4c8de1ff78ca 100644 --- a/drivers/md/dm-switch.c +++ b/drivers/md/dm-switch.c @@ -251,7 +251,7 @@ static void switch_dtr(struct dm_target *ti) */ static int switch_ctr(struct dm_target *ti, unsigned argc, char **argv) { - static struct dm_arg _args[] = { + static const struct dm_arg _args[] = { {1, (KMALLOC_MAX_SIZE - sizeof(struct switch_ctx)) / sizeof(struct switch_path), "Invalid number of paths"}, {1, UINT_MAX, "Invalid region size"}, {0, 0, "Invalid number of optional args"}, @@ -322,7 +322,7 @@ static int switch_map(struct dm_target *ti, struct bio *bio) sector_t offset = dm_target_offset(ti, bio->bi_iter.bi_sector); unsigned path_nr = switch_get_path_nr(sctx, offset); - bio->bi_bdev = sctx->path_list[path_nr].dmdev->bdev; + bio_set_dev(bio, sctx->path_list[path_nr].dmdev->bdev); bio->bi_iter.bi_sector = sctx->path_list[path_nr].start + offset; return DM_MAPIO_REMAPPED; diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 28a4071cdf85..ef7b8f201f73 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -806,7 +806,8 @@ int dm_table_add_target(struct dm_table *t, const char *type, /* * Target argument parsing helpers. */ -static int validate_next_arg(struct dm_arg *arg, struct dm_arg_set *arg_set, +static int validate_next_arg(const struct dm_arg *arg, + struct dm_arg_set *arg_set, unsigned *value, char **error, unsigned grouped) { const char *arg_str = dm_shift_arg(arg_set); @@ -824,14 +825,14 @@ static int validate_next_arg(struct dm_arg *arg, struct dm_arg_set *arg_set, return 0; } -int dm_read_arg(struct dm_arg *arg, struct dm_arg_set *arg_set, +int dm_read_arg(const struct dm_arg *arg, struct dm_arg_set *arg_set, unsigned *value, char **error) { return validate_next_arg(arg, arg_set, value, error, 0); } EXPORT_SYMBOL(dm_read_arg); -int dm_read_arg_group(struct dm_arg *arg, struct dm_arg_set *arg_set, +int dm_read_arg_group(const struct dm_arg *arg, struct dm_arg_set *arg_set, unsigned *value, char **error) { return validate_next_arg(arg, arg_set, value, error, 1); diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 9dec2f8cc739..1e25705209c2 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -679,7 +679,7 @@ static void remap(struct thin_c *tc, struct bio *bio, dm_block_t block) struct pool *pool = tc->pool; sector_t bi_sector = bio->bi_iter.bi_sector; - bio->bi_bdev = tc->pool_dev->bdev; + bio_set_dev(bio, tc->pool_dev->bdev); if (block_size_is_power_of_two(pool)) bio->bi_iter.bi_sector = (block << pool->sectors_per_block_shift) | @@ -691,7 +691,7 @@ static void remap(struct thin_c *tc, struct bio *bio, dm_block_t block) static void remap_to_origin(struct thin_c *tc, struct bio *bio) { - bio->bi_bdev = tc->origin_dev->bdev; + bio_set_dev(bio, tc->origin_dev->bdev); } static int bio_triggers_commit(struct thin_c *tc, struct bio *bio) @@ -3041,7 +3041,7 @@ static int parse_pool_features(struct dm_arg_set *as, struct pool_features *pf, unsigned argc; const char *arg_name; - static struct dm_arg _args[] = { + static const struct dm_arg _args[] = { {0, 4, "Invalid number of pool feature arguments"}, }; @@ -3313,7 +3313,7 @@ static int pool_map(struct dm_target *ti, struct bio *bio) * As this is a singleton target, ti->begin is always zero. */ spin_lock_irqsave(&pool->lock, flags); - bio->bi_bdev = pt->data_dev->bdev; + bio_set_dev(bio, pt->data_dev->bdev); r = DM_MAPIO_REMAPPED; spin_unlock_irqrestore(&pool->lock, flags); diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index b46705ebf01f..bda3caca23ca 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -637,7 +637,7 @@ static int verity_map(struct dm_target *ti, struct bio *bio) struct dm_verity *v = ti->private; struct dm_verity_io *io; - bio->bi_bdev = v->data_dev->bdev; + bio_set_dev(bio, v->data_dev->bdev); bio->bi_iter.bi_sector = verity_map_sector(v, bio->bi_iter.bi_sector); if (((unsigned)bio->bi_iter.bi_sector | bio_sectors(bio)) & @@ -839,7 +839,7 @@ static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v) struct dm_target *ti = v->ti; const char *arg_name; - static struct dm_arg _args[] = { + static const struct dm_arg _args[] = { {0, DM_VERITY_OPTS_MAX, "Invalid number of feature args"}, }; diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index a4fa2ada6883..70485de37b66 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -409,7 +409,7 @@ static struct dmz_mblock *dmz_fetch_mblock(struct dmz_metadata *zmd, } bio->bi_iter.bi_sector = dmz_blk2sect(block); - bio->bi_bdev = zmd->dev->bdev; + bio_set_dev(bio, zmd->dev->bdev); bio->bi_private = mblk; bio->bi_end_io = dmz_mblock_bio_end_io; bio_set_op_attrs(bio, REQ_OP_READ, REQ_META | REQ_PRIO); @@ -564,7 +564,7 @@ static void dmz_write_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk, set_bit(DMZ_META_WRITING, &mblk->state); bio->bi_iter.bi_sector = dmz_blk2sect(block); - bio->bi_bdev = zmd->dev->bdev; + bio_set_dev(bio, zmd->dev->bdev); bio->bi_private = mblk; bio->bi_end_io = dmz_mblock_bio_end_io; bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_META | REQ_PRIO); @@ -586,7 +586,7 @@ static int dmz_rdwr_block(struct dmz_metadata *zmd, int op, sector_t block, return -ENOMEM; bio->bi_iter.bi_sector = dmz_blk2sect(block); - bio->bi_bdev = zmd->dev->bdev; + bio_set_dev(bio, zmd->dev->bdev); bio_set_op_attrs(bio, op, REQ_SYNC | REQ_META | REQ_PRIO); bio_add_page(bio, page, DMZ_BLOCK_SIZE, 0); ret = submit_bio_wait(bio); diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c index b08bbbd4d902..b87c1741da4b 100644 --- a/drivers/md/dm-zoned-target.c +++ b/drivers/md/dm-zoned-target.c @@ -238,7 +238,7 @@ static void dmz_submit_write_bio(struct dmz_target *dmz, struct dm_zone *zone, struct dmz_bioctx *bioctx = dm_per_bio_data(bio, sizeof(struct dmz_bioctx)); /* Setup and submit the BIO */ - bio->bi_bdev = dmz->dev->bdev; + bio_set_dev(bio, dmz->dev->bdev); bio->bi_iter.bi_sector = dmz_start_sect(dmz->metadata, zone) + dmz_blk2sect(chunk_block); atomic_inc(&bioctx->ref); generic_make_request(bio); @@ -586,7 +586,7 @@ static int dmz_map(struct dm_target *ti, struct bio *bio) (unsigned long long)dmz_chunk_block(dmz->dev, dmz_bio_block(bio)), (unsigned int)dmz_bio_blocks(bio)); - bio->bi_bdev = dev->bdev; + bio_set_dev(bio, dev->bdev); if (!nr_sectors && bio_op(bio) != REQ_OP_WRITE) return DM_MAPIO_REMAPPED; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index d669fddd9290..6e54145969c5 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -510,7 +510,7 @@ static void start_io_acct(struct dm_io *io) io->start_time = jiffies; cpu = part_stat_lock(); - part_round_stats(cpu, &dm_disk(md)->part0); + part_round_stats(md->queue, cpu, &dm_disk(md)->part0); part_stat_unlock(); atomic_set(&dm_disk(md)->part0.in_flight[rw], atomic_inc_return(&md->pending[rw])); @@ -529,7 +529,7 @@ static void end_io_acct(struct dm_io *io) int pending; int rw = bio_data_dir(bio); - generic_end_io_acct(rw, &dm_disk(md)->part0, io->start_time); + generic_end_io_acct(md->queue, rw, &dm_disk(md)->part0, io->start_time); if (unlikely(dm_stats_used(&md->stats))) dm_stats_account_io(&md->stats, bio_data_dir(bio), @@ -841,10 +841,10 @@ static void clone_endio(struct bio *bio) if (unlikely(error == BLK_STS_TARGET)) { if (bio_op(bio) == REQ_OP_WRITE_SAME && - !bdev_get_queue(bio->bi_bdev)->limits.max_write_same_sectors) + !bio->bi_disk->queue->limits.max_write_same_sectors) disable_write_same(md); if (bio_op(bio) == REQ_OP_WRITE_ZEROES && - !bdev_get_queue(bio->bi_bdev)->limits.max_write_zeroes_sectors) + !bio->bi_disk->queue->limits.max_write_zeroes_sectors) disable_write_zeroes(md); } @@ -987,24 +987,6 @@ static size_t dm_dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, return ret; } -static void dm_dax_flush(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, - size_t size) -{ - struct mapped_device *md = dax_get_private(dax_dev); - sector_t sector = pgoff * PAGE_SECTORS; - struct dm_target *ti; - int srcu_idx; - - ti = dm_dax_get_live_target(md, sector, &srcu_idx); - - if (!ti) - goto out; - if (ti->type->dax_flush) - ti->type->dax_flush(ti, pgoff, addr, size); - out: - dm_put_live_table(md, srcu_idx); -} - /* * A target may call dm_accept_partial_bio only from the map routine. It is * allowed for all bio types except REQ_PREFLUSH. @@ -1205,8 +1187,8 @@ static void __map_bio(struct dm_target_io *tio) break; case DM_MAPIO_REMAPPED: /* the bio has been remapped so dispatch it */ - trace_block_bio_remap(bdev_get_queue(clone->bi_bdev), clone, - tio->io->bio->bi_bdev->bd_dev, sector); + trace_block_bio_remap(clone->bi_disk->queue, clone, + bio_dev(tio->io->bio), sector); generic_make_request(clone); break; case DM_MAPIO_KILL: @@ -1532,7 +1514,7 @@ static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio) map = dm_get_live_table(md, &srcu_idx); - generic_start_io_acct(rw, bio_sectors(bio), &dm_disk(md)->part0); + generic_start_io_acct(q, rw, bio_sectors(bio), &dm_disk(md)->part0); /* if we're suspended, we have to queue this io for later */ if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) { @@ -1786,7 +1768,7 @@ static struct mapped_device *alloc_dev(int minor) goto bad; bio_init(&md->flush_bio, NULL, 0); - md->flush_bio.bi_bdev = md->bdev; + bio_set_dev(&md->flush_bio, md->bdev); md->flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC; dm_stats_init(&md->stats); @@ -2992,7 +2974,6 @@ static const struct block_device_operations dm_blk_dops = { static const struct dax_operations dm_dax_ops = { .direct_access = dm_dax_direct_access, .copy_from_iter = dm_dax_copy_from_iter, - .flush = dm_dax_flush, }; /* diff --git a/drivers/md/faulty.c b/drivers/md/faulty.c index 06a64d5d8c6c..38264b38420f 100644 --- a/drivers/md/faulty.c +++ b/drivers/md/faulty.c @@ -216,12 +216,12 @@ static bool faulty_make_request(struct mddev *mddev, struct bio *bio) if (failit) { struct bio *b = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set); - b->bi_bdev = conf->rdev->bdev; + bio_set_dev(b, conf->rdev->bdev); b->bi_private = bio; b->bi_end_io = faulty_fail; bio = b; } else - bio->bi_bdev = conf->rdev->bdev; + bio_set_dev(bio, conf->rdev->bdev); generic_make_request(bio); return true; diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 5f1eb9189542..c464fb48039a 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -275,17 +275,17 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio) bio = split; } - bio->bi_bdev = tmp_dev->rdev->bdev; + bio_set_dev(bio, tmp_dev->rdev->bdev); bio->bi_iter.bi_sector = bio->bi_iter.bi_sector - start_sector + data_offset; if (unlikely((bio_op(bio) == REQ_OP_DISCARD) && - !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) { + !blk_queue_discard(bio->bi_disk->queue))) { /* Just ignore it */ bio_endio(bio); } else { if (mddev->gendisk) - trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), + trace_block_bio_remap(bio->bi_disk->queue, bio, disk_devt(mddev->gendisk), bio_sector); mddev_check_writesame(mddev, bio); diff --git a/drivers/md/md.c b/drivers/md/md.c index b01e458d31e9..08fcaebc61bd 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -422,7 +422,7 @@ static void submit_flushes(struct work_struct *ws) bi = bio_alloc_mddev(GFP_NOIO, 0, mddev); bi->bi_end_io = md_end_flush; bi->bi_private = rdev; - bi->bi_bdev = rdev->bdev; + bio_set_dev(bi, rdev->bdev); bi->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; atomic_inc(&mddev->flush_pending); submit_bio(bi); @@ -772,7 +772,7 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev, atomic_inc(&rdev->nr_pending); - bio->bi_bdev = rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev; + bio_set_dev(bio, rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev); bio->bi_iter.bi_sector = sector; bio_add_page(bio, page, size, 0); bio->bi_private = rdev; @@ -803,8 +803,10 @@ int sync_page_io(struct md_rdev *rdev, sector_t sector, int size, struct bio *bio = md_bio_alloc_sync(rdev->mddev); int ret; - bio->bi_bdev = (metadata_op && rdev->meta_bdev) ? - rdev->meta_bdev : rdev->bdev; + if (metadata_op && rdev->meta_bdev) + bio_set_dev(bio, rdev->meta_bdev); + else + bio_set_dev(bio, rdev->bdev); bio_set_op_attrs(bio, op, op_flags); if (metadata_op) bio->bi_iter.bi_sector = sector + rdev->sb_start; @@ -1536,7 +1538,8 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_ } else if (sb->bblog_offset != 0) rdev->badblocks.shift = 0; - if (le32_to_cpu(sb->feature_map) & MD_FEATURE_PPL) { + if ((le32_to_cpu(sb->feature_map) & + (MD_FEATURE_PPL | MD_FEATURE_MULTIPLE_PPLS))) { rdev->ppl.offset = (__s16)le16_to_cpu(sb->ppl.offset); rdev->ppl.size = le16_to_cpu(sb->ppl.size); rdev->ppl.sector = rdev->sb_start + rdev->ppl.offset; @@ -1655,10 +1658,15 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev) if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL) set_bit(MD_HAS_JOURNAL, &mddev->flags); - if (le32_to_cpu(sb->feature_map) & MD_FEATURE_PPL) { + if (le32_to_cpu(sb->feature_map) & + (MD_FEATURE_PPL | MD_FEATURE_MULTIPLE_PPLS)) { if (le32_to_cpu(sb->feature_map) & (MD_FEATURE_BITMAP_OFFSET | MD_FEATURE_JOURNAL)) return -EINVAL; + if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_PPL) && + (le32_to_cpu(sb->feature_map) & + MD_FEATURE_MULTIPLE_PPLS)) + return -EINVAL; set_bit(MD_HAS_PPL, &mddev->flags); } } else if (mddev->pers == NULL) { @@ -1875,7 +1883,11 @@ retry: sb->feature_map |= cpu_to_le32(MD_FEATURE_JOURNAL); if (test_bit(MD_HAS_PPL, &mddev->flags)) { - sb->feature_map |= cpu_to_le32(MD_FEATURE_PPL); + if (test_bit(MD_HAS_MULTIPLE_PPLS, &mddev->flags)) + sb->feature_map |= + cpu_to_le32(MD_FEATURE_MULTIPLE_PPLS); + else + sb->feature_map |= cpu_to_le32(MD_FEATURE_PPL); sb->ppl.offset = cpu_to_le16(rdev->ppl.offset); sb->ppl.size = cpu_to_le16(rdev->ppl.size); } @@ -4283,6 +4295,8 @@ new_dev_store(struct mddev *mddev, const char *buf, size_t len) if (err) export_rdev(rdev); mddev_unlock(mddev); + if (!err) + md_new_event(mddev); return err ? err : len; } @@ -7836,7 +7850,7 @@ static const struct file_operations md_seq_fops = { .open = md_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release, .poll = mdstat_poll, }; diff --git a/drivers/md/md.h b/drivers/md/md.h index 09db03455801..561d22b9a9a8 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -236,6 +236,7 @@ enum mddev_flags { * never cause the array to become failed. */ MD_HAS_PPL, /* The raid array has PPL feature set */ + MD_HAS_MULTIPLE_PPLS, /* The raid array has multiple PPLs feature set */ }; enum mddev_sb_flags { @@ -509,6 +510,11 @@ static inline void md_sync_acct(struct block_device *bdev, unsigned long nr_sect atomic_add(nr_sectors, &bdev->bd_contains->bd_disk->sync_io); } +static inline void md_sync_acct_bio(struct bio *bio, unsigned long nr_sectors) +{ + atomic_add(nr_sectors, &bio->bi_disk->sync_io); +} + struct md_personality { char *name; @@ -721,14 +727,14 @@ static inline void mddev_clear_unsupported_flags(struct mddev *mddev, static inline void mddev_check_writesame(struct mddev *mddev, struct bio *bio) { if (bio_op(bio) == REQ_OP_WRITE_SAME && - !bdev_get_queue(bio->bi_bdev)->limits.max_write_same_sectors) + !bio->bi_disk->queue->limits.max_write_same_sectors) mddev->queue->limits.max_write_same_sectors = 0; } static inline void mddev_check_write_zeroes(struct mddev *mddev, struct bio *bio) { if (bio_op(bio) == REQ_OP_WRITE_ZEROES && - !bdev_get_queue(bio->bi_bdev)->limits.max_write_zeroes_sectors) + !bio->bi_disk->queue->limits.max_write_zeroes_sectors) mddev->queue->limits.max_write_zeroes_sectors = 0; } #endif /* _MD_MD_H */ diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index 23a162ba6c56..b68e0666b9b0 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -134,7 +134,7 @@ static bool multipath_make_request(struct mddev *mddev, struct bio * bio) __bio_clone_fast(&mp_bh->bio, bio); mp_bh->bio.bi_iter.bi_sector += multipath->rdev->data_offset; - mp_bh->bio.bi_bdev = multipath->rdev->bdev; + bio_set_dev(&mp_bh->bio, multipath->rdev->bdev); mp_bh->bio.bi_opf |= REQ_FAILFAST_TRANSPORT; mp_bh->bio.bi_end_io = multipath_end_request; mp_bh->bio.bi_private = mp_bh; @@ -345,17 +345,17 @@ static void multipathd(struct md_thread *thread) if ((mp_bh->path = multipath_map (conf))<0) { pr_err("multipath: %s: unrecoverable IO read error for block %llu\n", - bdevname(bio->bi_bdev,b), + bio_devname(bio, b), (unsigned long long)bio->bi_iter.bi_sector); multipath_end_bh_io(mp_bh, BLK_STS_IOERR); } else { pr_err("multipath: %s: redirecting sector %llu to another IO path\n", - bdevname(bio->bi_bdev,b), + bio_devname(bio, b), (unsigned long long)bio->bi_iter.bi_sector); *bio = *(mp_bh->master_bio); bio->bi_iter.bi_sector += conf->multipaths[mp_bh->path].rdev->data_offset; - bio->bi_bdev = conf->multipaths[mp_bh->path].rdev->bdev; + bio_set_dev(bio, conf->multipaths[mp_bh->path].rdev->bdev); bio->bi_opf |= REQ_FAILFAST_TRANSPORT; bio->bi_end_io = multipath_end_request; bio->bi_private = mp_bh; diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 94d9ae9b0fd0..5a00fc118470 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -30,7 +30,8 @@ ((1L << MD_HAS_JOURNAL) | \ (1L << MD_JOURNAL_CLEAN) | \ (1L << MD_FAILFAST_SUPPORTED) |\ - (1L << MD_HAS_PPL)) + (1L << MD_HAS_PPL) | \ + (1L << MD_HAS_MULTIPLE_PPLS)) static int raid0_congested(struct mddev *mddev, int bits) { @@ -539,6 +540,7 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio) !discard_bio) continue; bio_chain(discard_bio, bio); + bio_clone_blkcg_association(discard_bio, bio); if (mddev->gendisk) trace_block_bio_remap(bdev_get_queue(rdev->bdev), discard_bio, disk_devt(mddev->gendisk), @@ -588,14 +590,13 @@ static bool raid0_make_request(struct mddev *mddev, struct bio *bio) zone = find_zone(mddev->private, §or); tmp_dev = map_sector(mddev, zone, sector, §or); - bio->bi_bdev = tmp_dev->bdev; + bio_set_dev(bio, tmp_dev->bdev); bio->bi_iter.bi_sector = sector + zone->dev_start + tmp_dev->data_offset; if (mddev->gendisk) - trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), - bio, disk_devt(mddev->gendisk), - bio_sector); + trace_block_bio_remap(bio->bi_disk->queue, bio, + disk_devt(mddev->gendisk), bio_sector); mddev_check_writesame(mddev, bio); mddev_check_write_zeroes(mddev, bio); generic_make_request(bio); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index f50958ded9f0..f3f3e40dc9d8 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -48,7 +48,8 @@ #define UNSUPPORTED_MDDEV_FLAGS \ ((1L << MD_HAS_JOURNAL) | \ (1L << MD_JOURNAL_CLEAN) | \ - (1L << MD_HAS_PPL)) + (1L << MD_HAS_PPL) | \ + (1L << MD_HAS_MULTIPLE_PPLS)) /* * Number of guaranteed r1bios in case of extreme VM load: @@ -786,13 +787,13 @@ static void flush_bio_list(struct r1conf *conf, struct bio *bio) while (bio) { /* submit pending writes */ struct bio *next = bio->bi_next; - struct md_rdev *rdev = (void*)bio->bi_bdev; + struct md_rdev *rdev = (void *)bio->bi_disk; bio->bi_next = NULL; - bio->bi_bdev = rdev->bdev; + bio_set_dev(bio, rdev->bdev); if (test_bit(Faulty, &rdev->flags)) { bio_io_error(bio); } else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) && - !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) + !blk_queue_discard(bio->bi_disk->queue))) /* Just ignore it */ bio_endio(bio); else @@ -1273,7 +1274,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, read_bio->bi_iter.bi_sector = r1_bio->sector + mirror->rdev->data_offset; - read_bio->bi_bdev = mirror->rdev->bdev; + bio_set_dev(read_bio, mirror->rdev->bdev); read_bio->bi_end_io = raid1_end_read_request; bio_set_op_attrs(read_bio, op, do_sync); if (test_bit(FailFast, &mirror->rdev->flags) && @@ -1282,9 +1283,8 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, read_bio->bi_private = r1_bio; if (mddev->gendisk) - trace_block_bio_remap(bdev_get_queue(read_bio->bi_bdev), - read_bio, disk_devt(mddev->gendisk), - r1_bio->sector); + trace_block_bio_remap(read_bio->bi_disk->queue, read_bio, + disk_devt(mddev->gendisk), r1_bio->sector); generic_make_request(read_bio); } @@ -1496,7 +1496,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, mbio->bi_iter.bi_sector = (r1_bio->sector + conf->mirrors[i].rdev->data_offset); - mbio->bi_bdev = conf->mirrors[i].rdev->bdev; + bio_set_dev(mbio, conf->mirrors[i].rdev->bdev); mbio->bi_end_io = raid1_end_write_request; mbio->bi_opf = bio_op(bio) | (bio->bi_opf & (REQ_SYNC | REQ_FUA)); if (test_bit(FailFast, &conf->mirrors[i].rdev->flags) && @@ -1508,11 +1508,11 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, atomic_inc(&r1_bio->remaining); if (mddev->gendisk) - trace_block_bio_remap(bdev_get_queue(mbio->bi_bdev), + trace_block_bio_remap(mbio->bi_disk->queue, mbio, disk_devt(mddev->gendisk), r1_bio->sector); /* flush_pending_writes() needs access to the rdev so...*/ - mbio->bi_bdev = (void*)conf->mirrors[i].rdev; + mbio->bi_disk = (void *)conf->mirrors[i].rdev; cb = blk_check_plugged(raid1_unplug, mddev, sizeof(*plug)); if (cb) @@ -1990,8 +1990,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio) * Don't fail devices as that won't really help. */ pr_crit_ratelimited("md/raid1:%s: %s: unrecoverable I/O read error for block %llu\n", - mdname(mddev), - bdevname(bio->bi_bdev, b), + mdname(mddev), bio_devname(bio, b), (unsigned long long)r1_bio->sector); for (d = 0; d < conf->raid_disks * 2; d++) { rdev = conf->mirrors[d].rdev; @@ -2082,7 +2081,7 @@ static void process_checks(struct r1bio *r1_bio) b->bi_status = status; b->bi_iter.bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset; - b->bi_bdev = conf->mirrors[i].rdev->bdev; + bio_set_dev(b, conf->mirrors[i].rdev->bdev); b->bi_end_io = end_sync_read; rp->raid_bio = r1_bio; b->bi_private = rp; @@ -2350,7 +2349,7 @@ static int narrow_write_error(struct r1bio *r1_bio, int i) bio_trim(wbio, sector - r1_bio->sector, sectors); wbio->bi_iter.bi_sector += rdev->data_offset; - wbio->bi_bdev = rdev->bdev; + bio_set_dev(wbio, rdev->bdev); if (submit_bio_wait(wbio) < 0) /* failure! */ @@ -2440,7 +2439,6 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio) struct mddev *mddev = conf->mddev; struct bio *bio; struct md_rdev *rdev; - dev_t bio_dev; sector_t bio_sector; clear_bit(R1BIO_ReadError, &r1_bio->state); @@ -2454,7 +2452,6 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio) */ bio = r1_bio->bios[r1_bio->read_disk]; - bio_dev = bio->bi_bdev->bd_dev; bio_sector = conf->mirrors[r1_bio->read_disk].rdev->data_offset + r1_bio->sector; bio_put(bio); r1_bio->bios[r1_bio->read_disk] = NULL; @@ -2564,6 +2561,23 @@ static int init_resync(struct r1conf *conf) return 0; } +static struct r1bio *raid1_alloc_init_r1buf(struct r1conf *conf) +{ + struct r1bio *r1bio = mempool_alloc(conf->r1buf_pool, GFP_NOIO); + struct resync_pages *rps; + struct bio *bio; + int i; + + for (i = conf->poolinfo->raid_disks; i--; ) { + bio = r1bio->bios[i]; + rps = bio->bi_private; + bio_reset(bio); + bio->bi_private = rps; + } + r1bio->master_bio = NULL; + return r1bio; +} + /* * perform a "sync" on one "block" * @@ -2649,7 +2663,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, bitmap_cond_end_sync(mddev->bitmap, sector_nr, mddev_is_clustered(mddev) && (sector_nr + 2 * RESYNC_SECTORS > conf->cluster_sync_high)); - r1_bio = mempool_alloc(conf->r1buf_pool, GFP_NOIO); + r1_bio = raid1_alloc_init_r1buf(conf); raise_barrier(conf, sector_nr); @@ -2727,7 +2741,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, if (bio->bi_end_io) { atomic_inc(&rdev->nr_pending); bio->bi_iter.bi_sector = sector_nr + rdev->data_offset; - bio->bi_bdev = rdev->bdev; + bio_set_dev(bio, rdev->bdev); if (test_bit(FailFast, &rdev->flags)) bio->bi_opf |= MD_FAILFAST; } @@ -2853,7 +2867,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, bio = r1_bio->bios[i]; if (bio->bi_end_io == end_sync_read) { read_targets--; - md_sync_acct(bio->bi_bdev, nr_sectors); + md_sync_acct_bio(bio, nr_sectors); if (read_targets == 1) bio->bi_opf &= ~MD_FAILFAST; generic_make_request(bio); @@ -2862,7 +2876,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, } else { atomic_set(&r1_bio->remaining, 1); bio = r1_bio->bios[r1_bio->read_disk]; - md_sync_acct(bio->bi_bdev, nr_sectors); + md_sync_acct_bio(bio, nr_sectors); if (read_targets == 1) bio->bi_opf &= ~MD_FAILFAST; generic_make_request(bio); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index f55d4cc085f6..374df5796649 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -901,13 +901,13 @@ static void flush_pending_writes(struct r10conf *conf) while (bio) { /* submit pending writes */ struct bio *next = bio->bi_next; - struct md_rdev *rdev = (void*)bio->bi_bdev; + struct md_rdev *rdev = (void*)bio->bi_disk; bio->bi_next = NULL; - bio->bi_bdev = rdev->bdev; + bio_set_dev(bio, rdev->bdev); if (test_bit(Faulty, &rdev->flags)) { bio_io_error(bio); } else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) && - !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) + !blk_queue_discard(bio->bi_disk->queue))) /* Just ignore it */ bio_endio(bio); else @@ -1085,13 +1085,13 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule) while (bio) { /* submit pending writes */ struct bio *next = bio->bi_next; - struct md_rdev *rdev = (void*)bio->bi_bdev; + struct md_rdev *rdev = (void*)bio->bi_disk; bio->bi_next = NULL; - bio->bi_bdev = rdev->bdev; + bio_set_dev(bio, rdev->bdev); if (test_bit(Faulty, &rdev->flags)) { bio_io_error(bio); } else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) && - !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) + !blk_queue_discard(bio->bi_disk->queue))) /* Just ignore it */ bio_endio(bio); else @@ -1200,7 +1200,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr + choose_data_offset(r10_bio, rdev); - read_bio->bi_bdev = rdev->bdev; + bio_set_dev(read_bio, rdev->bdev); read_bio->bi_end_io = raid10_end_read_request; bio_set_op_attrs(read_bio, op, do_sync); if (test_bit(FailFast, &rdev->flags) && @@ -1209,7 +1209,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, read_bio->bi_private = r10_bio; if (mddev->gendisk) - trace_block_bio_remap(bdev_get_queue(read_bio->bi_bdev), + trace_block_bio_remap(read_bio->bi_disk->queue, read_bio, disk_devt(mddev->gendisk), r10_bio->sector); generic_make_request(read_bio); @@ -1249,7 +1249,7 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio, mbio->bi_iter.bi_sector = (r10_bio->devs[n_copy].addr + choose_data_offset(r10_bio, rdev)); - mbio->bi_bdev = rdev->bdev; + bio_set_dev(mbio, rdev->bdev); mbio->bi_end_io = raid10_end_write_request; bio_set_op_attrs(mbio, op, do_sync | do_fua); if (!replacement && test_bit(FailFast, @@ -1259,11 +1259,11 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio, mbio->bi_private = r10_bio; if (conf->mddev->gendisk) - trace_block_bio_remap(bdev_get_queue(mbio->bi_bdev), + trace_block_bio_remap(mbio->bi_disk->queue, mbio, disk_devt(conf->mddev->gendisk), r10_bio->sector); /* flush_pending_writes() needs access to the rdev so...*/ - mbio->bi_bdev = (void *)rdev; + mbio->bi_disk = (void *)rdev; atomic_inc(&r10_bio->remaining); @@ -2094,7 +2094,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio) if (test_bit(FailFast, &conf->mirrors[d].rdev->flags)) tbio->bi_opf |= MD_FAILFAST; tbio->bi_iter.bi_sector += conf->mirrors[d].rdev->data_offset; - tbio->bi_bdev = conf->mirrors[d].rdev->bdev; + bio_set_dev(tbio, conf->mirrors[d].rdev->bdev); generic_make_request(tbio); } @@ -2552,7 +2552,7 @@ static int narrow_write_error(struct r10bio *r10_bio, int i) wsector = r10_bio->devs[i].addr + (sector - r10_bio->sector); wbio->bi_iter.bi_sector = wsector + choose_data_offset(r10_bio, rdev); - wbio->bi_bdev = rdev->bdev; + bio_set_dev(wbio, rdev->bdev); bio_set_op_attrs(wbio, REQ_OP_WRITE, 0); if (submit_bio_wait(wbio) < 0) @@ -2575,7 +2575,6 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio) struct bio *bio; struct r10conf *conf = mddev->private; struct md_rdev *rdev = r10_bio->devs[slot].rdev; - dev_t bio_dev; sector_t bio_last_sector; /* we got a read error. Maybe the drive is bad. Maybe just @@ -2587,7 +2586,6 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio) * frozen. */ bio = r10_bio->devs[slot].bio; - bio_dev = bio->bi_bdev->bd_dev; bio_last_sector = r10_bio->devs[slot].addr + rdev->data_offset + r10_bio->sectors; bio_put(bio); r10_bio->devs[slot].bio = NULL; @@ -2798,6 +2796,35 @@ static int init_resync(struct r10conf *conf) return 0; } +static struct r10bio *raid10_alloc_init_r10buf(struct r10conf *conf) +{ + struct r10bio *r10bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO); + struct rsync_pages *rp; + struct bio *bio; + int nalloc; + int i; + + if (test_bit(MD_RECOVERY_SYNC, &conf->mddev->recovery) || + test_bit(MD_RECOVERY_RESHAPE, &conf->mddev->recovery)) + nalloc = conf->copies; /* resync */ + else + nalloc = 2; /* recovery */ + + for (i = 0; i < nalloc; i++) { + bio = r10bio->devs[i].bio; + rp = bio->bi_private; + bio_reset(bio); + bio->bi_private = rp; + bio = r10bio->devs[i].repl_bio; + if (bio) { + rp = bio->bi_private; + bio_reset(bio); + bio->bi_private = rp; + } + } + return r10bio; +} + /* * perform a "sync" on one "block" * @@ -2950,7 +2977,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, /* Again, very different code for resync and recovery. * Both must result in an r10bio with a list of bios that - * have bi_end_io, bi_sector, bi_bdev set, + * have bi_end_io, bi_sector, bi_disk set, * and bi_private set to the r10bio. * For recovery, we may actually create several r10bios * with 2 bios in each, that correspond to the bios in the main one. @@ -3027,7 +3054,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, atomic_inc(&mreplace->nr_pending); rcu_read_unlock(); - r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO); + r10_bio = raid10_alloc_init_r10buf(conf); r10_bio->state = 0; raise_barrier(conf, rb2 != NULL); atomic_set(&r10_bio->remaining, 0); @@ -3095,7 +3122,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, from_addr = r10_bio->devs[j].addr; bio->bi_iter.bi_sector = from_addr + rdev->data_offset; - bio->bi_bdev = rdev->bdev; + bio_set_dev(bio, rdev->bdev); atomic_inc(&rdev->nr_pending); /* and we write to 'i' (if not in_sync) */ @@ -3117,7 +3144,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, bio_set_op_attrs(bio, REQ_OP_WRITE, 0); bio->bi_iter.bi_sector = to_addr + mrdev->data_offset; - bio->bi_bdev = mrdev->bdev; + bio_set_dev(bio, mrdev->bdev); atomic_inc(&r10_bio->remaining); } else r10_bio->devs[1].bio->bi_end_io = NULL; @@ -3143,7 +3170,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, bio_set_op_attrs(bio, REQ_OP_WRITE, 0); bio->bi_iter.bi_sector = to_addr + mreplace->data_offset; - bio->bi_bdev = mreplace->bdev; + bio_set_dev(bio, mreplace->bdev); atomic_inc(&r10_bio->remaining); break; } @@ -3236,7 +3263,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, } if (sync_blocks < max_sync) max_sync = sync_blocks; - r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO); + r10_bio = raid10_alloc_init_r10buf(conf); r10_bio->state = 0; r10_bio->mddev = mddev; @@ -3289,7 +3316,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, if (test_bit(FailFast, &rdev->flags)) bio->bi_opf |= MD_FAILFAST; bio->bi_iter.bi_sector = sector + rdev->data_offset; - bio->bi_bdev = rdev->bdev; + bio_set_dev(bio, rdev->bdev); count++; rdev = rcu_dereference(conf->mirrors[d].replacement); @@ -3311,7 +3338,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, if (test_bit(FailFast, &rdev->flags)) bio->bi_opf |= MD_FAILFAST; bio->bi_iter.bi_sector = sector + rdev->data_offset; - bio->bi_bdev = rdev->bdev; + bio_set_dev(bio, rdev->bdev); count++; rcu_read_unlock(); } @@ -3367,7 +3394,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, r10_bio->sectors = nr_sectors; if (bio->bi_end_io == end_sync_read) { - md_sync_acct(bio->bi_bdev, nr_sectors); + md_sync_acct_bio(bio, nr_sectors); bio->bi_status = 0; generic_make_request(bio); } @@ -4360,7 +4387,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, read_more: /* Now schedule reads for blocks from sector_nr to last */ - r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO); + r10_bio = raid10_alloc_init_r10buf(conf); r10_bio->state = 0; raise_barrier(conf, sectors_done != 0); atomic_set(&r10_bio->remaining, 0); @@ -4383,7 +4410,7 @@ read_more: read_bio = bio_alloc_mddev(GFP_KERNEL, RESYNC_PAGES, mddev); - read_bio->bi_bdev = rdev->bdev; + bio_set_dev(read_bio, rdev->bdev); read_bio->bi_iter.bi_sector = (r10_bio->devs[r10_bio->read_slot].addr + rdev->data_offset); read_bio->bi_private = r10_bio; @@ -4417,7 +4444,7 @@ read_more: if (!rdev2 || test_bit(Faulty, &rdev2->flags)) continue; - b->bi_bdev = rdev2->bdev; + bio_set_dev(b, rdev2->bdev); b->bi_iter.bi_sector = r10_bio->devs[s/2].addr + rdev2->new_data_offset; b->bi_end_io = end_reshape_write; @@ -4449,7 +4476,7 @@ read_more: r10_bio->sectors = nr_sectors; /* Now submit the read */ - md_sync_acct(read_bio->bi_bdev, r10_bio->sectors); + md_sync_acct_bio(read_bio, r10_bio->sectors); atomic_inc(&r10_bio->remaining); read_bio->bi_next = NULL; generic_make_request(read_bio); @@ -4511,7 +4538,7 @@ static void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio) } atomic_inc(&rdev->nr_pending); rcu_read_unlock(); - md_sync_acct(b->bi_bdev, r10_bio->sectors); + md_sync_acct_bio(b, r10_bio->sectors); atomic_inc(&r10_bio->remaining); b->bi_next = NULL; generic_make_request(b); diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index 2dcbafa8e66c..0b7406ac8ce1 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -745,7 +745,7 @@ static struct bio *r5l_bio_alloc(struct r5l_log *log) struct bio *bio = bio_alloc_bioset(GFP_NOIO, BIO_MAX_PAGES, log->bs); bio_set_op_attrs(bio, REQ_OP_WRITE, 0); - bio->bi_bdev = log->rdev->bdev; + bio_set_dev(bio, log->rdev->bdev); bio->bi_iter.bi_sector = log->rdev->data_offset + log->log_start; return bio; @@ -1313,7 +1313,7 @@ void r5l_flush_stripe_to_raid(struct r5l_log *log) if (!do_flush) return; bio_reset(&log->flush_bio); - log->flush_bio.bi_bdev = log->rdev->bdev; + bio_set_dev(&log->flush_bio, log->rdev->bdev); log->flush_bio.bi_end_io = r5l_log_flush_endio; log->flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; submit_bio(&log->flush_bio); @@ -1691,7 +1691,7 @@ static int r5l_recovery_fetch_ra_pool(struct r5l_log *log, sector_t offset) { bio_reset(ctx->ra_bio); - ctx->ra_bio->bi_bdev = log->rdev->bdev; + bio_set_dev(ctx->ra_bio, log->rdev->bdev); bio_set_op_attrs(ctx->ra_bio, REQ_OP_READ, 0); ctx->ra_bio->bi_iter.bi_sector = log->rdev->data_offset + offset; @@ -2529,11 +2529,18 @@ static void r5l_write_super(struct r5l_log *log, sector_t cp) static ssize_t r5c_journal_mode_show(struct mddev *mddev, char *page) { - struct r5conf *conf = mddev->private; + struct r5conf *conf; int ret; - if (!conf->log) + ret = mddev_lock(mddev); + if (ret) + return ret; + + conf = mddev->private; + if (!conf || !conf->log) { + mddev_unlock(mddev); return 0; + } switch (conf->log->r5c_journal_mode) { case R5C_JOURNAL_MODE_WRITE_THROUGH: @@ -2551,6 +2558,7 @@ static ssize_t r5c_journal_mode_show(struct mddev *mddev, char *page) default: ret = 0; } + mddev_unlock(mddev); return ret; } diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c index 44ad5baf3206..cd026c88f7ef 100644 --- a/drivers/md/raid5-ppl.c +++ b/drivers/md/raid5-ppl.c @@ -87,6 +87,8 @@ * The current io_unit accepting new stripes is always at the end of the list. */ +#define PPL_SPACE_SIZE (128 * 1024) + struct ppl_conf { struct mddev *mddev; @@ -122,6 +124,10 @@ struct ppl_log { * always at the end of io_list */ spinlock_t io_list_lock; struct list_head io_list; /* all io_units of this log */ + + sector_t next_io_sector; + unsigned int entry_space; + bool use_multippl; }; #define PPL_IO_INLINE_BVECS 32 @@ -264,13 +270,12 @@ static int ppl_log_stripe(struct ppl_log *log, struct stripe_head *sh) int i; sector_t data_sector = 0; int data_disks = 0; - unsigned int entry_space = (log->rdev->ppl.size << 9) - PPL_HEADER_SIZE; struct r5conf *conf = sh->raid_conf; pr_debug("%s: stripe: %llu\n", __func__, (unsigned long long)sh->sector); /* check if current io_unit is full */ - if (io && (io->pp_size == entry_space || + if (io && (io->pp_size == log->entry_space || io->entries_count == PPL_HDR_MAX_ENTRIES)) { pr_debug("%s: add io_unit blocked by seq: %llu\n", __func__, io->seq); @@ -415,7 +420,7 @@ static void ppl_submit_iounit_bio(struct ppl_io_unit *io, struct bio *bio) pr_debug("%s: seq: %llu size: %u sector: %llu dev: %s\n", __func__, io->seq, bio->bi_iter.bi_size, (unsigned long long)bio->bi_iter.bi_sector, - bdevname(bio->bi_bdev, b)); + bio_devname(bio, b)); submit_bio(bio); } @@ -451,12 +456,25 @@ static void ppl_submit_iounit(struct ppl_io_unit *io) pplhdr->entries_count = cpu_to_le32(io->entries_count); pplhdr->checksum = cpu_to_le32(~crc32c_le(~0, pplhdr, PPL_HEADER_SIZE)); + /* Rewind the buffer if current PPL is larger then remaining space */ + if (log->use_multippl && + log->rdev->ppl.sector + log->rdev->ppl.size - log->next_io_sector < + (PPL_HEADER_SIZE + io->pp_size) >> 9) + log->next_io_sector = log->rdev->ppl.sector; + + bio->bi_end_io = ppl_log_endio; bio->bi_opf = REQ_OP_WRITE | REQ_FUA; - bio->bi_bdev = log->rdev->bdev; - bio->bi_iter.bi_sector = log->rdev->ppl.sector; + bio_set_dev(bio, log->rdev->bdev); + bio->bi_iter.bi_sector = log->next_io_sector; bio_add_page(bio, io->header_page, PAGE_SIZE, 0); + pr_debug("%s: log->current_io_sector: %llu\n", __func__, + (unsigned long long)log->next_io_sector); + + if (log->use_multippl) + log->next_io_sector += (PPL_HEADER_SIZE + io->pp_size) >> 9; + list_for_each_entry(sh, &io->stripe_list, log_list) { /* entries for full stripe writes have no partial parity */ if (test_bit(STRIPE_FULL_WRITE, &sh->state)) @@ -468,7 +486,7 @@ static void ppl_submit_iounit(struct ppl_io_unit *io) bio = bio_alloc_bioset(GFP_NOIO, BIO_MAX_PAGES, ppl_conf->bs); bio->bi_opf = prev->bi_opf; - bio->bi_bdev = prev->bi_bdev; + bio_copy_dev(bio, prev); bio->bi_iter.bi_sector = bio_end_sector(prev); bio_add_page(bio, sh->ppl_page, PAGE_SIZE, 0); @@ -813,12 +831,14 @@ out: return ret; } -static int ppl_recover(struct ppl_log *log, struct ppl_header *pplhdr) +static int ppl_recover(struct ppl_log *log, struct ppl_header *pplhdr, + sector_t offset) { struct ppl_conf *ppl_conf = log->ppl_conf; struct md_rdev *rdev = log->rdev; struct mddev *mddev = rdev->mddev; - sector_t ppl_sector = rdev->ppl.sector + (PPL_HEADER_SIZE >> 9); + sector_t ppl_sector = rdev->ppl.sector + offset + + (PPL_HEADER_SIZE >> 9); struct page *page; int i; int ret = 0; @@ -902,6 +922,9 @@ static int ppl_write_empty_header(struct ppl_log *log) return -ENOMEM; pplhdr = page_address(page); + /* zero out PPL space to avoid collision with old PPLs */ + blkdev_issue_zeroout(rdev->bdev, rdev->ppl.sector, + log->rdev->ppl.size, GFP_NOIO, 0); memset(pplhdr->reserved, 0xff, PPL_HDR_RESERVED); pplhdr->signature = cpu_to_le32(log->ppl_conf->signature); pplhdr->checksum = cpu_to_le32(~crc32c_le(~0, pplhdr, PAGE_SIZE)); @@ -922,63 +945,110 @@ static int ppl_load_distributed(struct ppl_log *log) struct ppl_conf *ppl_conf = log->ppl_conf; struct md_rdev *rdev = log->rdev; struct mddev *mddev = rdev->mddev; - struct page *page; - struct ppl_header *pplhdr; + struct page *page, *page2, *tmp; + struct ppl_header *pplhdr = NULL, *prev_pplhdr = NULL; u32 crc, crc_stored; u32 signature; - int ret = 0; + int ret = 0, i; + sector_t pplhdr_offset = 0, prev_pplhdr_offset = 0; pr_debug("%s: disk: %d\n", __func__, rdev->raid_disk); - - /* read PPL header */ + /* read PPL headers, find the recent one */ page = alloc_page(GFP_KERNEL); if (!page) return -ENOMEM; - if (!sync_page_io(rdev, rdev->ppl.sector - rdev->data_offset, - PAGE_SIZE, page, REQ_OP_READ, 0, false)) { - md_error(mddev, rdev); - ret = -EIO; - goto out; + page2 = alloc_page(GFP_KERNEL); + if (!page2) { + __free_page(page); + return -ENOMEM; } - pplhdr = page_address(page); - /* check header validity */ - crc_stored = le32_to_cpu(pplhdr->checksum); - pplhdr->checksum = 0; - crc = ~crc32c_le(~0, pplhdr, PAGE_SIZE); + /* searching ppl area for latest ppl */ + while (pplhdr_offset < rdev->ppl.size - (PPL_HEADER_SIZE >> 9)) { + if (!sync_page_io(rdev, + rdev->ppl.sector - rdev->data_offset + + pplhdr_offset, PAGE_SIZE, page, REQ_OP_READ, + 0, false)) { + md_error(mddev, rdev); + ret = -EIO; + /* if not able to read - don't recover any PPL */ + pplhdr = NULL; + break; + } + pplhdr = page_address(page); + + /* check header validity */ + crc_stored = le32_to_cpu(pplhdr->checksum); + pplhdr->checksum = 0; + crc = ~crc32c_le(~0, pplhdr, PAGE_SIZE); + + if (crc_stored != crc) { + pr_debug("%s: ppl header crc does not match: stored: 0x%x calculated: 0x%x (offset: %llu)\n", + __func__, crc_stored, crc, + (unsigned long long)pplhdr_offset); + pplhdr = prev_pplhdr; + pplhdr_offset = prev_pplhdr_offset; + break; + } - if (crc_stored != crc) { - pr_debug("%s: ppl header crc does not match: stored: 0x%x calculated: 0x%x\n", - __func__, crc_stored, crc); - ppl_conf->mismatch_count++; - goto out; - } + signature = le32_to_cpu(pplhdr->signature); - signature = le32_to_cpu(pplhdr->signature); + if (mddev->external) { + /* + * For external metadata the header signature is set and + * validated in userspace. + */ + ppl_conf->signature = signature; + } else if (ppl_conf->signature != signature) { + pr_debug("%s: ppl header signature does not match: stored: 0x%x configured: 0x%x (offset: %llu)\n", + __func__, signature, ppl_conf->signature, + (unsigned long long)pplhdr_offset); + pplhdr = prev_pplhdr; + pplhdr_offset = prev_pplhdr_offset; + break; + } - if (mddev->external) { - /* - * For external metadata the header signature is set and - * validated in userspace. - */ - ppl_conf->signature = signature; - } else if (ppl_conf->signature != signature) { - pr_debug("%s: ppl header signature does not match: stored: 0x%x configured: 0x%x\n", - __func__, signature, ppl_conf->signature); - ppl_conf->mismatch_count++; - goto out; + if (prev_pplhdr && le64_to_cpu(prev_pplhdr->generation) > + le64_to_cpu(pplhdr->generation)) { + /* previous was newest */ + pplhdr = prev_pplhdr; + pplhdr_offset = prev_pplhdr_offset; + break; + } + + prev_pplhdr_offset = pplhdr_offset; + prev_pplhdr = pplhdr; + + tmp = page; + page = page2; + page2 = tmp; + + /* calculate next potential ppl offset */ + for (i = 0; i < le32_to_cpu(pplhdr->entries_count); i++) + pplhdr_offset += + le32_to_cpu(pplhdr->entries[i].pp_size) >> 9; + pplhdr_offset += PPL_HEADER_SIZE >> 9; } + /* no valid ppl found */ + if (!pplhdr) + ppl_conf->mismatch_count++; + else + pr_debug("%s: latest PPL found at offset: %llu, with generation: %llu\n", + __func__, (unsigned long long)pplhdr_offset, + le64_to_cpu(pplhdr->generation)); + /* attempt to recover from log if we are starting a dirty array */ - if (!mddev->pers && mddev->recovery_cp != MaxSector) - ret = ppl_recover(log, pplhdr); -out: + if (pplhdr && !mddev->pers && mddev->recovery_cp != MaxSector) + ret = ppl_recover(log, pplhdr, pplhdr_offset); + /* write empty header if we are starting the array */ if (!ret && !mddev->pers) ret = ppl_write_empty_header(log); __free_page(page); + __free_page(page2); pr_debug("%s: return: %d mismatch_count: %d recovered_entries: %d\n", __func__, ret, ppl_conf->mismatch_count, @@ -1031,6 +1101,7 @@ static int ppl_load(struct ppl_conf *ppl_conf) static void __ppl_exit_log(struct ppl_conf *ppl_conf) { clear_bit(MD_HAS_PPL, &ppl_conf->mddev->flags); + clear_bit(MD_HAS_MULTIPLE_PPLS, &ppl_conf->mddev->flags); kfree(ppl_conf->child_logs); @@ -1099,6 +1170,22 @@ static int ppl_validate_rdev(struct md_rdev *rdev) return 0; } +static void ppl_init_child_log(struct ppl_log *log, struct md_rdev *rdev) +{ + if ((rdev->ppl.size << 9) >= (PPL_SPACE_SIZE + + PPL_HEADER_SIZE) * 2) { + log->use_multippl = true; + set_bit(MD_HAS_MULTIPLE_PPLS, + &log->ppl_conf->mddev->flags); + log->entry_space = PPL_SPACE_SIZE; + } else { + log->use_multippl = false; + log->entry_space = (log->rdev->ppl.size << 9) - + PPL_HEADER_SIZE; + } + log->next_io_sector = rdev->ppl.sector; +} + int ppl_init_log(struct r5conf *conf) { struct ppl_conf *ppl_conf; @@ -1196,6 +1283,7 @@ int ppl_init_log(struct r5conf *conf) q = bdev_get_queue(rdev->bdev); if (test_bit(QUEUE_FLAG_WC, &q->queue_flags)) need_cache_flush = true; + ppl_init_child_log(log, rdev); } } @@ -1261,6 +1349,7 @@ int ppl_modify_log(struct r5conf *conf, struct md_rdev *rdev, bool add) if (!ret) { log->rdev = rdev; ret = ppl_write_empty_header(log); + ppl_init_child_log(log, rdev); } } else { log->rdev = NULL; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 0fc2748aaf95..4188a4881148 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -494,7 +494,6 @@ static int grow_buffers(struct stripe_head *sh, gfp_t gfp) return 0; } -static void raid5_build_block(struct stripe_head *sh, int i, int previous); static void stripe_set_idx(sector_t stripe, struct r5conf *conf, int previous, struct stripe_head *sh); @@ -530,7 +529,7 @@ retry: WARN_ON(1); } dev->flags = 0; - raid5_build_block(sh, i, previous); + dev->sector = raid5_compute_blocknr(sh, i, previous); } if (read_seqcount_retry(&conf->gen_lock, seq)) goto retry; @@ -1096,7 +1095,7 @@ again: set_bit(STRIPE_IO_STARTED, &sh->state); - bi->bi_bdev = rdev->bdev; + bio_set_dev(bi, rdev->bdev); bio_set_op_attrs(bi, op, op_flags); bi->bi_end_io = op_is_write(op) ? raid5_end_write_request @@ -1145,7 +1144,7 @@ again: set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags); if (conf->mddev->gendisk) - trace_block_bio_remap(bdev_get_queue(bi->bi_bdev), + trace_block_bio_remap(bi->bi_disk->queue, bi, disk_devt(conf->mddev->gendisk), sh->dev[i].sector); if (should_defer && op_is_write(op)) @@ -1160,7 +1159,7 @@ again: set_bit(STRIPE_IO_STARTED, &sh->state); - rbi->bi_bdev = rrdev->bdev; + bio_set_dev(rbi, rrdev->bdev); bio_set_op_attrs(rbi, op, op_flags); BUG_ON(!op_is_write(op)); rbi->bi_end_io = raid5_end_write_request; @@ -1193,7 +1192,7 @@ again: if (op == REQ_OP_DISCARD) rbi->bi_vcnt = 0; if (conf->mddev->gendisk) - trace_block_bio_remap(bdev_get_queue(rbi->bi_bdev), + trace_block_bio_remap(rbi->bi_disk->queue, rbi, disk_devt(conf->mddev->gendisk), sh->dev[i].sector); if (should_defer && op_is_write(op)) @@ -2662,14 +2661,6 @@ static void raid5_end_write_request(struct bio *bi) raid5_release_stripe(sh->batch_head); } -static void raid5_build_block(struct stripe_head *sh, int i, int previous) -{ - struct r5dev *dev = &sh->dev[i]; - - dev->flags = 0; - dev->sector = raid5_compute_blocknr(sh, i, previous); -} - static void raid5_error(struct mddev *mddev, struct md_rdev *rdev) { char b[BDEVNAME_SIZE]; @@ -5092,10 +5083,12 @@ static int raid5_congested(struct mddev *mddev, int bits) static int in_chunk_boundary(struct mddev *mddev, struct bio *bio) { struct r5conf *conf = mddev->private; - sector_t sector = bio->bi_iter.bi_sector + get_start_sect(bio->bi_bdev); + sector_t sector = bio->bi_iter.bi_sector; unsigned int chunk_sectors; unsigned int bio_sectors = bio_sectors(bio); + WARN_ON_ONCE(bio->bi_partno); + chunk_sectors = min(conf->chunk_sectors, conf->prev_chunk_sectors); return chunk_sectors >= ((sector & (chunk_sectors - 1)) + bio_sectors); @@ -5231,7 +5224,7 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio) atomic_inc(&rdev->nr_pending); rcu_read_unlock(); raid_bio->bi_next = (void*)rdev; - align_bi->bi_bdev = rdev->bdev; + bio_set_dev(align_bi, rdev->bdev); bio_clear_flag(align_bi, BIO_SEG_VALID); if (is_badblock(rdev, align_bi->bi_iter.bi_sector, @@ -5253,7 +5246,7 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio) spin_unlock_irq(&conf->device_lock); if (mddev->gendisk) - trace_block_bio_remap(bdev_get_queue(align_bi->bi_bdev), + trace_block_bio_remap(align_bi->bi_disk->queue, align_bi, disk_devt(mddev->gendisk), raid_bio->bi_iter.bi_sector); generic_make_request(align_bi); @@ -6235,6 +6228,10 @@ static void raid5_do_work(struct work_struct *work) spin_unlock_irq(&conf->device_lock); + flush_deferred_bios(conf); + + r5l_flush_stripe_to_raid(conf->log); + async_tx_issue_pending_all(); blk_finish_plug(&plug); @@ -7241,6 +7238,7 @@ static int raid5_run(struct mddev *mddev) pr_warn("md/raid:%s: using journal device and PPL not allowed - disabling PPL\n", mdname(mddev)); clear_bit(MD_HAS_PPL, &mddev->flags); + clear_bit(MD_HAS_MULTIPLE_PPLS, &mddev->flags); } if (mddev->private == NULL) |