diff options
Diffstat (limited to 'drivers/md/dm.c')
-rw-r--r-- | drivers/md/dm.c | 134 |
1 files changed, 112 insertions, 22 deletions
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index fc4f743f3b53..fe7c56e10435 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -68,10 +68,12 @@ union map_info *dm_get_mapinfo(struct bio *bio) #define DMF_FROZEN 2 #define DMF_FREEING 3 #define DMF_DELETING 4 +#define DMF_NOFLUSH_SUSPENDING 5 struct mapped_device { struct rw_semaphore io_lock; struct semaphore suspend_lock; + spinlock_t pushback_lock; rwlock_t map_lock; atomic_t holders; atomic_t open_count; @@ -89,7 +91,8 @@ struct mapped_device { */ atomic_t pending; wait_queue_head_t wait; - struct bio_list deferred; + struct bio_list deferred; + struct bio_list pushback; /* * The current mapping. @@ -121,8 +124,8 @@ struct mapped_device { }; #define MIN_IOS 256 -static kmem_cache_t *_io_cache; -static kmem_cache_t *_tio_cache; +static struct kmem_cache *_io_cache; +static struct kmem_cache *_tio_cache; static int __init local_init(void) { @@ -444,23 +447,50 @@ int dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo) * you this clearly demarcated crap. *---------------------------------------------------------------*/ +static int __noflush_suspending(struct mapped_device *md) +{ + return test_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); +} + /* * Decrements the number of outstanding ios that a bio has been * cloned into, completing the original io if necc. */ static void dec_pending(struct dm_io *io, int error) { - if (error) + unsigned long flags; + + /* Push-back supersedes any I/O errors */ + if (error && !(io->error > 0 && __noflush_suspending(io->md))) io->error = error; if (atomic_dec_and_test(&io->io_count)) { + if (io->error == DM_ENDIO_REQUEUE) { + /* + * Target requested pushing back the I/O. + * This must be handled before the sleeper on + * suspend queue merges the pushback list. + */ + spin_lock_irqsave(&io->md->pushback_lock, flags); + if (__noflush_suspending(io->md)) + bio_list_add(&io->md->pushback, io->bio); + else + /* noflush suspend was interrupted. */ + io->error = -EIO; + spin_unlock_irqrestore(&io->md->pushback_lock, flags); + } + if (end_io_acct(io)) /* nudge anyone waiting on suspend queue */ wake_up(&io->md->wait); - blk_add_trace_bio(io->md->queue, io->bio, BLK_TA_COMPLETE); + if (io->error != DM_ENDIO_REQUEUE) { + blk_add_trace_bio(io->md->queue, io->bio, + BLK_TA_COMPLETE); + + bio_endio(io->bio, io->bio->bi_size, io->error); + } - bio_endio(io->bio, io->bio->bi_size, io->error); free_io(io->md, io); } } @@ -480,12 +510,19 @@ static int clone_endio(struct bio *bio, unsigned int done, int error) if (endio) { r = endio(tio->ti, bio, error, &tio->info); - if (r < 0) + if (r < 0 || r == DM_ENDIO_REQUEUE) + /* + * error and requeue request are handled + * in dec_pending(). + */ error = r; - - else if (r > 0) - /* the target wants another shot at the io */ + else if (r == DM_ENDIO_INCOMPLETE) + /* The target will handle the io */ return 1; + else if (r) { + DMWARN("unimplemented target endio return value: %d", r); + BUG(); + } } dec_pending(tio->io, error); @@ -543,7 +580,7 @@ static void __map_bio(struct dm_target *ti, struct bio *clone, atomic_inc(&tio->io->io_count); sector = clone->bi_sector; r = ti->type->map(ti, clone, &tio->info); - if (r > 0) { + if (r == DM_MAPIO_REMAPPED) { /* the bio has been remapped so dispatch it */ blk_add_trace_remap(bdev_get_queue(clone->bi_bdev), clone, @@ -551,10 +588,8 @@ static void __map_bio(struct dm_target *ti, struct bio *clone, clone->bi_sector); generic_make_request(clone); - } - - else if (r < 0) { - /* error the io and bail out */ + } else if (r < 0 || r == DM_MAPIO_REQUEUE) { + /* error the io and bail out, or requeue it if needed */ md = tio->io->md; dec_pending(tio->io, r); /* @@ -563,6 +598,9 @@ static void __map_bio(struct dm_target *ti, struct bio *clone, clone->bi_private = md->bs; bio_put(clone); free_tio(md, tio); + } else if (r) { + DMWARN("unimplemented target map return value: %d", r); + BUG(); } } @@ -948,6 +986,7 @@ static struct mapped_device *alloc_dev(int minor) memset(md, 0, sizeof(*md)); init_rwsem(&md->io_lock); init_MUTEX(&md->suspend_lock); + spin_lock_init(&md->pushback_lock); rwlock_init(&md->map_lock); atomic_set(&md->holders, 1); atomic_set(&md->open_count, 0); @@ -966,8 +1005,8 @@ static struct mapped_device *alloc_dev(int minor) md->queue->issue_flush_fn = dm_flush_all; md->io_pool = mempool_create_slab_pool(MIN_IOS, _io_cache); - if (!md->io_pool) - goto bad2; + if (!md->io_pool) + goto bad2; md->tio_pool = mempool_create_slab_pool(MIN_IOS, _tio_cache); if (!md->tio_pool) @@ -1275,12 +1314,15 @@ static void unlock_fs(struct mapped_device *md) * dm_bind_table, dm_suspend must be called to flush any in * flight bios and ensure that any further io gets deferred. */ -int dm_suspend(struct mapped_device *md, int do_lockfs) +int dm_suspend(struct mapped_device *md, unsigned suspend_flags) { struct dm_table *map = NULL; + unsigned long flags; DECLARE_WAITQUEUE(wait, current); struct bio *def; int r = -EINVAL; + int do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG ? 1 : 0; + int noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG ? 1 : 0; down(&md->suspend_lock); @@ -1289,6 +1331,13 @@ int dm_suspend(struct mapped_device *md, int do_lockfs) map = dm_get_table(md); + /* + * DMF_NOFLUSH_SUSPENDING must be set before presuspend. + * This flag is cleared before dm_suspend returns. + */ + if (noflush) + set_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); + /* This does not get reverted if there's an error later. */ dm_table_presuspend_targets(map); @@ -1296,11 +1345,14 @@ int dm_suspend(struct mapped_device *md, int do_lockfs) if (!md->suspended_bdev) { DMWARN("bdget failed in dm_suspend"); r = -ENOMEM; - goto out; + goto flush_and_out; } - /* Flush I/O to the device. */ - if (do_lockfs) { + /* + * Flush I/O to the device. + * noflush supersedes do_lockfs, because lock_fs() needs to flush I/Os. + */ + if (do_lockfs && !noflush) { r = lock_fs(md); if (r) goto out; @@ -1336,6 +1388,14 @@ int dm_suspend(struct mapped_device *md, int do_lockfs) down_write(&md->io_lock); remove_wait_queue(&md->wait, &wait); + if (noflush) { + spin_lock_irqsave(&md->pushback_lock, flags); + clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); + bio_list_merge_head(&md->deferred, &md->pushback); + bio_list_init(&md->pushback); + spin_unlock_irqrestore(&md->pushback_lock, flags); + } + /* were we interrupted ? */ r = -EINTR; if (atomic_read(&md->pending)) { @@ -1344,7 +1404,7 @@ int dm_suspend(struct mapped_device *md, int do_lockfs) __flush_deferred_io(md, def); up_write(&md->io_lock); unlock_fs(md); - goto out; + goto out; /* pushback list is already flushed, so skip flush */ } up_write(&md->io_lock); @@ -1354,6 +1414,25 @@ int dm_suspend(struct mapped_device *md, int do_lockfs) r = 0; +flush_and_out: + if (r && noflush) { + /* + * Because there may be already I/Os in the pushback list, + * flush them before return. + */ + down_write(&md->io_lock); + + spin_lock_irqsave(&md->pushback_lock, flags); + clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); + bio_list_merge_head(&md->deferred, &md->pushback); + bio_list_init(&md->pushback); + spin_unlock_irqrestore(&md->pushback_lock, flags); + + def = bio_list_get(&md->deferred); + __flush_deferred_io(md, def); + up_write(&md->io_lock); + } + out: if (r && md->suspended_bdev) { bdput(md->suspended_bdev); @@ -1440,6 +1519,17 @@ int dm_suspended(struct mapped_device *md) return test_bit(DMF_SUSPENDED, &md->flags); } +int dm_noflush_suspending(struct dm_target *ti) +{ + struct mapped_device *md = dm_table_get_md(ti->table); + int r = __noflush_suspending(md); + + dm_put(md); + + return r; +} +EXPORT_SYMBOL_GPL(dm_noflush_suspending); + static struct block_device_operations dm_blk_dops = { .open = dm_blk_open, .release = dm_blk_close, |