summaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/bitmap.c11
-rw-r--r--drivers/md/dm-crypt.c9
-rw-r--r--drivers/md/dm-io.c2
-rw-r--r--drivers/md/dm-kcopyd.c55
-rw-r--r--drivers/md/dm-log-userspace-transfer.c2
-rw-r--r--drivers/md/dm-log.c8
-rw-r--r--drivers/md/dm-mpath.c22
-rw-r--r--drivers/md/dm-raid.c2
-rw-r--r--drivers/md/dm-raid1.c2
-rw-r--r--drivers/md/dm-table.c31
-rw-r--r--drivers/md/dm.c52
-rw-r--r--drivers/md/dm.h2
-rw-r--r--drivers/md/linear.c21
-rw-r--r--drivers/md/md.c100
-rw-r--r--drivers/md/md.h4
-rw-r--r--drivers/md/multipath.c39
-rw-r--r--drivers/md/raid0.c61
-rw-r--r--drivers/md/raid1.c89
-rw-r--r--drivers/md/raid10.c102
-rw-r--r--drivers/md/raid5.c122
-rw-r--r--drivers/md/raid5.h2
21 files changed, 256 insertions, 482 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 9a35320fb59f..5c9362792f1d 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -347,7 +347,7 @@ static void write_page(struct bitmap *bitmap, struct page *page, int wait)
atomic_inc(&bitmap->pending_writes);
set_buffer_locked(bh);
set_buffer_mapped(bh);
- submit_bh(WRITE | REQ_UNPLUG | REQ_SYNC, bh);
+ submit_bh(WRITE | REQ_SYNC, bh);
bh = bh->b_this_page;
}
@@ -854,7 +854,7 @@ static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
if (bitmap->flags & BITMAP_HOSTENDIAN)
set_bit(bit, kaddr);
else
- ext2_set_bit(bit, kaddr);
+ __test_and_set_bit_le(bit, kaddr);
kunmap_atomic(kaddr, KM_USER0);
PRINTK("set file bit %lu page %lu\n", bit, page->index);
}
@@ -1050,7 +1050,7 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
if (bitmap->flags & BITMAP_HOSTENDIAN)
b = test_bit(bit, paddr);
else
- b = ext2_test_bit(bit, paddr);
+ b = test_bit_le(bit, paddr);
kunmap_atomic(paddr, KM_USER0);
if (b) {
/* if the disk bit is set, set the memory bit */
@@ -1226,7 +1226,7 @@ void bitmap_daemon_work(mddev_t *mddev)
clear_bit(file_page_offset(bitmap, j),
paddr);
else
- ext2_clear_bit(file_page_offset(bitmap, j),
+ __test_and_clear_bit_le(file_page_offset(bitmap, j),
paddr);
kunmap_atomic(paddr, KM_USER0);
} else
@@ -1339,8 +1339,7 @@ int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sect
prepare_to_wait(&bitmap->overflow_wait, &__wait,
TASK_UNINTERRUPTIBLE);
spin_unlock_irq(&bitmap->lock);
- md_unplug(bitmap->mddev);
- schedule();
+ io_schedule();
finish_wait(&bitmap->overflow_wait, &__wait);
continue;
}
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 4e054bd91664..2c62c1169f78 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -991,11 +991,6 @@ static void clone_init(struct dm_crypt_io *io, struct bio *clone)
clone->bi_destructor = dm_crypt_bio_destructor;
}
-static void kcryptd_unplug(struct crypt_config *cc)
-{
- blk_unplug(bdev_get_queue(cc->dev->bdev));
-}
-
static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp)
{
struct crypt_config *cc = io->target->private;
@@ -1008,10 +1003,8 @@ static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp)
* one in order to decrypt the whole bio data *afterwards*.
*/
clone = bio_alloc_bioset(gfp, bio_segments(base_bio), cc->bs);
- if (!clone) {
- kcryptd_unplug(cc);
+ if (!clone)
return 1;
- }
crypt_inc_pending(io);
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 136d4f71a116..76a5af00a26b 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -352,7 +352,7 @@ static void dispatch_io(int rw, unsigned int num_regions,
BUG_ON(num_regions > DM_IO_MAX_REGIONS);
if (sync)
- rw |= REQ_SYNC | REQ_UNPLUG;
+ rw |= REQ_SYNC;
/*
* For multiple regions we need to be careful to rewind
diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c
index 924f5f0084c2..1bb73a13ca40 100644
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -37,13 +37,6 @@ struct dm_kcopyd_client {
unsigned int nr_pages;
unsigned int nr_free_pages;
- /*
- * Block devices to unplug.
- * Non-NULL pointer means that a block device has some pending requests
- * and needs to be unplugged.
- */
- struct block_device *unplug[2];
-
struct dm_io_client *io_client;
wait_queue_head_t destroyq;
@@ -315,31 +308,6 @@ static int run_complete_job(struct kcopyd_job *job)
return 0;
}
-/*
- * Unplug the block device at the specified index.
- */
-static void unplug(struct dm_kcopyd_client *kc, int rw)
-{
- if (kc->unplug[rw] != NULL) {
- blk_unplug(bdev_get_queue(kc->unplug[rw]));
- kc->unplug[rw] = NULL;
- }
-}
-
-/*
- * Prepare block device unplug. If there's another device
- * to be unplugged at the same array index, we unplug that
- * device first.
- */
-static void prepare_unplug(struct dm_kcopyd_client *kc, int rw,
- struct block_device *bdev)
-{
- if (likely(kc->unplug[rw] == bdev))
- return;
- unplug(kc, rw);
- kc->unplug[rw] = bdev;
-}
-
static void complete_io(unsigned long error, void *context)
{
struct kcopyd_job *job = (struct kcopyd_job *) context;
@@ -386,16 +354,10 @@ static int run_io_job(struct kcopyd_job *job)
.client = job->kc->io_client,
};
- if (job->rw == READ) {
+ if (job->rw == READ)
r = dm_io(&io_req, 1, &job->source, NULL);
- prepare_unplug(job->kc, READ, job->source.bdev);
- } else {
- if (job->num_dests > 1)
- io_req.bi_rw |= REQ_UNPLUG;
+ else
r = dm_io(&io_req, job->num_dests, job->dests, NULL);
- if (!(io_req.bi_rw & REQ_UNPLUG))
- prepare_unplug(job->kc, WRITE, job->dests[0].bdev);
- }
return r;
}
@@ -466,6 +428,7 @@ static void do_work(struct work_struct *work)
{
struct dm_kcopyd_client *kc = container_of(work,
struct dm_kcopyd_client, kcopyd_work);
+ struct blk_plug plug;
/*
* The order that these are called is *very* important.
@@ -473,18 +436,12 @@ static void do_work(struct work_struct *work)
* Pages jobs when successful will jump onto the io jobs
* list. io jobs call wake when they complete and it all
* starts again.
- *
- * Note that io_jobs add block devices to the unplug array,
- * this array is cleared with "unplug" calls. It is thus
- * forbidden to run complete_jobs after io_jobs and before
- * unplug because the block device could be destroyed in
- * job completion callback.
*/
+ blk_start_plug(&plug);
process_jobs(&kc->complete_jobs, kc, run_complete_job);
process_jobs(&kc->pages_jobs, kc, run_pages_job);
process_jobs(&kc->io_jobs, kc, run_io_job);
- unplug(kc, READ);
- unplug(kc, WRITE);
+ blk_finish_plug(&plug);
}
/*
@@ -665,8 +622,6 @@ int dm_kcopyd_client_create(unsigned int nr_pages,
INIT_LIST_HEAD(&kc->io_jobs);
INIT_LIST_HEAD(&kc->pages_jobs);
- memset(kc->unplug, 0, sizeof(kc->unplug));
-
kc->job_pool = mempool_create_slab_pool(MIN_JOBS, _job_cache);
if (!kc->job_pool)
goto bad_slab;
diff --git a/drivers/md/dm-log-userspace-transfer.c b/drivers/md/dm-log-userspace-transfer.c
index 049eaf12aaab..1f23e048f077 100644
--- a/drivers/md/dm-log-userspace-transfer.c
+++ b/drivers/md/dm-log-userspace-transfer.c
@@ -134,7 +134,7 @@ static void cn_ulog_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp)
{
struct dm_ulog_request *tfr = (struct dm_ulog_request *)(msg + 1);
- if (!cap_raised(nsp->eff_cap, CAP_SYS_ADMIN))
+ if (!cap_raised(current_cap(), CAP_SYS_ADMIN))
return;
spin_lock(&receiving_list_lock);
diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c
index 6951536ea29c..57968eb382c1 100644
--- a/drivers/md/dm-log.c
+++ b/drivers/md/dm-log.c
@@ -251,20 +251,20 @@ struct log_c {
*/
static inline int log_test_bit(uint32_t *bs, unsigned bit)
{
- return ext2_test_bit(bit, (unsigned long *) bs) ? 1 : 0;
+ return test_bit_le(bit, (unsigned long *) bs) ? 1 : 0;
}
static inline void log_set_bit(struct log_c *l,
uint32_t *bs, unsigned bit)
{
- ext2_set_bit(bit, (unsigned long *) bs);
+ __test_and_set_bit_le(bit, (unsigned long *) bs);
l->touched_cleaned = 1;
}
static inline void log_clear_bit(struct log_c *l,
uint32_t *bs, unsigned bit)
{
- ext2_clear_bit(bit, (unsigned long *) bs);
+ __test_and_clear_bit_le(bit, (unsigned long *) bs);
l->touched_dirtied = 1;
}
@@ -740,7 +740,7 @@ static int core_get_resync_work(struct dm_dirty_log *log, region_t *region)
return 0;
do {
- *region = ext2_find_next_zero_bit(
+ *region = find_next_zero_bit_le(
(unsigned long *) lc->sync_bits,
lc->region_count,
lc->sync_search);
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index b82d28819e2a..4b0b63c290a6 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -1283,24 +1283,22 @@ static int do_end_io(struct multipath *m, struct request *clone,
if (!error && !clone->errors)
return 0; /* I/O complete */
- if (error == -EOPNOTSUPP)
- return error;
-
- if (clone->cmd_flags & REQ_DISCARD)
- /*
- * Pass all discard request failures up.
- * FIXME: only fail_path if the discard failed due to a
- * transport problem. This requires precise understanding
- * of the underlying failure (e.g. the SCSI sense).
- */
+ if (error == -EOPNOTSUPP || error == -EREMOTEIO)
return error;
if (mpio->pgpath)
fail_path(mpio->pgpath);
spin_lock_irqsave(&m->lock, flags);
- if (!m->nr_valid_paths && !m->queue_if_no_path && !__must_push_back(m))
- r = -EIO;
+ if (!m->nr_valid_paths) {
+ if (!m->queue_if_no_path) {
+ if (!__must_push_back(m))
+ r = -EIO;
+ } else {
+ if (error == -EBADE)
+ r = error;
+ }
+ }
spin_unlock_irqrestore(&m->lock, flags);
return r;
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index b9e1e15ef11c..5ef136cdba91 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -394,7 +394,7 @@ static void raid_unplug(struct dm_target_callbacks *cb)
{
struct raid_set *rs = container_of(cb, struct raid_set, callbacks);
- md_raid5_unplug_device(rs->md.private);
+ md_raid5_kick_device(rs->md.private);
}
/*
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index dee326775c60..976ad4688afc 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -842,8 +842,6 @@ static void do_mirror(struct work_struct *work)
do_reads(ms, &reads);
do_writes(ms, &writes);
do_failures(ms, &failures);
-
- dm_table_unplug_all(ms->ti->table);
}
/*-----------------------------------------------------------------
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 38e4eb1bb965..416d4e258df6 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -55,6 +55,7 @@ struct dm_table {
struct dm_target *targets;
unsigned discards_supported:1;
+ unsigned integrity_supported:1;
/*
* Indicates the rw permissions for the new logical
@@ -859,7 +860,7 @@ int dm_table_alloc_md_mempools(struct dm_table *t)
return -EINVAL;
}
- t->mempools = dm_alloc_md_mempools(type);
+ t->mempools = dm_alloc_md_mempools(type, t->integrity_supported);
if (!t->mempools)
return -ENOMEM;
@@ -935,8 +936,10 @@ static int dm_table_prealloc_integrity(struct dm_table *t, struct mapped_device
struct dm_dev_internal *dd;
list_for_each_entry(dd, devices, list)
- if (bdev_get_integrity(dd->dm_dev.bdev))
+ if (bdev_get_integrity(dd->dm_dev.bdev)) {
+ t->integrity_supported = 1;
return blk_integrity_register(dm_disk(md), NULL);
+ }
return 0;
}
@@ -1275,29 +1278,6 @@ int dm_table_any_busy_target(struct dm_table *t)
return 0;
}
-void dm_table_unplug_all(struct dm_table *t)
-{
- struct dm_dev_internal *dd;
- struct list_head *devices = dm_table_get_devices(t);
- struct dm_target_callbacks *cb;
-
- list_for_each_entry(dd, devices, list) {
- struct request_queue *q = bdev_get_queue(dd->dm_dev.bdev);
- char b[BDEVNAME_SIZE];
-
- if (likely(q))
- blk_unplug(q);
- else
- DMWARN_LIMIT("%s: Cannot unplug nonexistent device %s",
- dm_device_name(t->md),
- bdevname(dd->dm_dev.bdev, b));
- }
-
- list_for_each_entry(cb, &t->target_callbacks, list)
- if (cb->unplug_fn)
- cb->unplug_fn(cb);
-}
-
struct mapped_device *dm_table_get_md(struct dm_table *t)
{
return t->md;
@@ -1345,4 +1325,3 @@ EXPORT_SYMBOL(dm_table_get_mode);
EXPORT_SYMBOL(dm_table_get_md);
EXPORT_SYMBOL(dm_table_put);
EXPORT_SYMBOL(dm_table_get);
-EXPORT_SYMBOL(dm_table_unplug_all);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index eaa3af0e0632..0cf68b478878 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -477,7 +477,8 @@ static void start_io_acct(struct dm_io *io)
cpu = part_stat_lock();
part_round_stats(cpu, &dm_disk(md)->part0);
part_stat_unlock();
- dm_disk(md)->part0.in_flight[rw] = atomic_inc_return(&md->pending[rw]);
+ atomic_set(&dm_disk(md)->part0.in_flight[rw],
+ atomic_inc_return(&md->pending[rw]));
}
static void end_io_acct(struct dm_io *io)
@@ -497,8 +498,8 @@ static void end_io_acct(struct dm_io *io)
* After this is decremented the bio must not be touched if it is
* a flush.
*/
- dm_disk(md)->part0.in_flight[rw] = pending =
- atomic_dec_return(&md->pending[rw]);
+ pending = atomic_dec_return(&md->pending[rw]);
+ atomic_set(&dm_disk(md)->part0.in_flight[rw], pending);
pending += atomic_read(&md->pending[rw^0x1]);
/* nudge anyone waiting on suspend queue */
@@ -807,8 +808,6 @@ void dm_requeue_unmapped_request(struct request *clone)
dm_unprep_request(rq);
spin_lock_irqsave(q->queue_lock, flags);
- if (elv_queue_empty(q))
- blk_plug_device(q);
blk_requeue_request(q, rq);
spin_unlock_irqrestore(q->queue_lock, flags);
@@ -1613,10 +1612,10 @@ static void dm_request_fn(struct request_queue *q)
* number of in-flight I/Os after the queue is stopped in
* dm_suspend().
*/
- while (!blk_queue_plugged(q) && !blk_queue_stopped(q)) {
+ while (!blk_queue_stopped(q)) {
rq = blk_peek_request(q);
if (!rq)
- goto plug_and_out;
+ goto delay_and_out;
/* always use block 0 to find the target for flushes for now */
pos = 0;
@@ -1627,7 +1626,7 @@ static void dm_request_fn(struct request_queue *q)
BUG_ON(!dm_target_is_valid(ti));
if (ti->type->busy && ti->type->busy(ti))
- goto plug_and_out;
+ goto delay_and_out;
blk_start_request(rq);
clone = rq->special;
@@ -1647,11 +1646,8 @@ requeued:
BUG_ON(!irqs_disabled());
spin_lock(q->queue_lock);
-plug_and_out:
- if (!elv_queue_empty(q))
- /* Some requests still remain, retry later */
- blk_plug_device(q);
-
+delay_and_out:
+ blk_delay_queue(q, HZ / 10);
out:
dm_table_put(map);
@@ -1680,20 +1676,6 @@ static int dm_lld_busy(struct request_queue *q)
return r;
}
-static void dm_unplug_all(struct request_queue *q)
-{
- struct mapped_device *md = q->queuedata;
- struct dm_table *map = dm_get_live_table(md);
-
- if (map) {
- if (dm_request_based(md))
- generic_unplug_device(q);
-
- dm_table_unplug_all(map);
- dm_table_put(map);
- }
-}
-
static int dm_any_congested(void *congested_data, int bdi_bits)
{
int r = bdi_bits;
@@ -1817,7 +1799,6 @@ static void dm_init_md_queue(struct mapped_device *md)
md->queue->backing_dev_info.congested_data = md;
blk_queue_make_request(md->queue, dm_request);
blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
- md->queue->unplug_fn = dm_unplug_all;
blk_queue_merge_bvec(md->queue, dm_merge_bvec);
blk_queue_flush(md->queue, REQ_FLUSH | REQ_FUA);
}
@@ -2263,8 +2244,6 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
int r = 0;
DECLARE_WAITQUEUE(wait, current);
- dm_unplug_all(md->queue);
-
add_wait_queue(&md->wait, &wait);
while (1) {
@@ -2539,7 +2518,6 @@ int dm_resume(struct mapped_device *md)
clear_bit(DMF_SUSPENDED, &md->flags);
- dm_table_unplug_all(map);
r = 0;
out:
dm_table_put(map);
@@ -2643,9 +2621,10 @@ int dm_noflush_suspending(struct dm_target *ti)
}
EXPORT_SYMBOL_GPL(dm_noflush_suspending);
-struct dm_md_mempools *dm_alloc_md_mempools(unsigned type)
+struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity)
{
struct dm_md_mempools *pools = kmalloc(sizeof(*pools), GFP_KERNEL);
+ unsigned int pool_size = (type == DM_TYPE_BIO_BASED) ? 16 : MIN_IOS;
if (!pools)
return NULL;
@@ -2662,13 +2641,18 @@ struct dm_md_mempools *dm_alloc_md_mempools(unsigned type)
if (!pools->tio_pool)
goto free_io_pool_and_out;
- pools->bs = (type == DM_TYPE_BIO_BASED) ?
- bioset_create(16, 0) : bioset_create(MIN_IOS, 0);
+ pools->bs = bioset_create(pool_size, 0);
if (!pools->bs)
goto free_tio_pool_and_out;
+ if (integrity && bioset_integrity_create(pools->bs, pool_size))
+ goto free_bioset_and_out;
+
return pools;
+free_bioset_and_out:
+ bioset_free(pools->bs);
+
free_tio_pool_and_out:
mempool_destroy(pools->tio_pool);
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 0c2dd5f4af76..1aaf16746da8 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -149,7 +149,7 @@ void dm_kcopyd_exit(void);
/*
* Mempool operations
*/
-struct dm_md_mempools *dm_alloc_md_mempools(unsigned type);
+struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity);
void dm_free_md_mempools(struct dm_md_mempools *pools);
#endif
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 8a2f767f26d8..abfb59a61ede 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -87,22 +87,6 @@ static int linear_mergeable_bvec(struct request_queue *q,
return maxsectors << 9;
}
-static void linear_unplug(struct request_queue *q)
-{
- mddev_t *mddev = q->queuedata;
- linear_conf_t *conf;
- int i;
-
- rcu_read_lock();
- conf = rcu_dereference(mddev->private);
-
- for (i=0; i < mddev->raid_disks; i++) {
- struct request_queue *r_queue = bdev_get_queue(conf->disks[i].rdev->bdev);
- blk_unplug(r_queue);
- }
- rcu_read_unlock();
-}
-
static int linear_congested(void *data, int bits)
{
mddev_t *mddev = data;
@@ -216,7 +200,6 @@ static int linear_run (mddev_t *mddev)
if (md_check_no_bitmap(mddev))
return -EINVAL;
- mddev->queue->queue_lock = &mddev->queue->__queue_lock;
conf = linear_conf(mddev, mddev->raid_disks);
if (!conf)
@@ -225,11 +208,9 @@ static int linear_run (mddev_t *mddev)
md_set_array_sectors(mddev, linear_size(mddev, 0, 0));
blk_queue_merge_bvec(mddev->queue, linear_mergeable_bvec);
- mddev->queue->unplug_fn = linear_unplug;
mddev->queue->backing_dev_info.congested_fn = linear_congested;
mddev->queue->backing_dev_info.congested_data = mddev;
- md_integrity_register(mddev);
- return 0;
+ return md_integrity_register(mddev);
}
static void free_conf(struct rcu_head *head)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index b76cfc89e1b5..06ecea751a39 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -287,6 +287,7 @@ static int md_make_request(struct request_queue *q, struct bio *bio)
mddev_t *mddev = q->queuedata;
int rv;
int cpu;
+ unsigned int sectors;
if (mddev == NULL || mddev->pers == NULL
|| !mddev->ready) {
@@ -311,12 +312,16 @@ static int md_make_request(struct request_queue *q, struct bio *bio)
atomic_inc(&mddev->active_io);
rcu_read_unlock();
+ /*
+ * save the sectors now since our bio can
+ * go away inside make_request
+ */
+ sectors = bio_sectors(bio);
rv = mddev->pers->make_request(mddev, bio);
cpu = part_stat_lock();
part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
- part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw],
- bio_sectors(bio));
+ part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], sectors);
part_stat_unlock();
if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
@@ -548,6 +553,9 @@ static mddev_t * mddev_find(dev_t unit)
{
mddev_t *mddev, *new = NULL;
+ if (unit && MAJOR(unit) != MD_MAJOR)
+ unit &= ~((1<<MdpMinorShift)-1);
+
retry:
spin_lock(&all_mddevs_lock);
@@ -772,8 +780,7 @@ void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
bio->bi_end_io = super_written;
atomic_inc(&mddev->pending_writes);
- submit_bio(REQ_WRITE | REQ_SYNC | REQ_UNPLUG | REQ_FLUSH | REQ_FUA,
- bio);
+ submit_bio(REQ_WRITE | REQ_SYNC | REQ_FLUSH | REQ_FUA, bio);
}
void md_super_wait(mddev_t *mddev)
@@ -801,7 +808,7 @@ int sync_page_io(mdk_rdev_t *rdev, sector_t sector, int size,
struct completion event;
int ret;
- rw |= REQ_SYNC | REQ_UNPLUG;
+ rw |= REQ_SYNC;
bio->bi_bdev = (metadata_op && rdev->meta_bdev) ?
rdev->meta_bdev : rdev->bdev;
@@ -1796,8 +1803,12 @@ int md_integrity_register(mddev_t *mddev)
mdname(mddev));
return -EINVAL;
}
- printk(KERN_NOTICE "md: data integrity on %s enabled\n",
- mdname(mddev));
+ printk(KERN_NOTICE "md: data integrity enabled on %s\n", mdname(mddev));
+ if (bioset_integrity_create(mddev->bio_set, BIO_POOL_SIZE)) {
+ printk(KERN_ERR "md: failed to create integrity pool for %s\n",
+ mdname(mddev));
+ return -EINVAL;
+ }
return 0;
}
EXPORT_SYMBOL(md_integrity_register);
@@ -1947,8 +1958,6 @@ static int lock_rdev(mdk_rdev_t *rdev, dev_t dev, int shared)
__bdevname(dev, b));
return PTR_ERR(bdev);
}
- if (!shared)
- set_bit(AllReserved, &rdev->flags);
rdev->bdev = bdev;
return err;
}
@@ -2465,6 +2474,9 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len)
if (rdev->raid_disk != -1)
return -EBUSY;
+ if (test_bit(MD_RECOVERY_RUNNING, &rdev->mddev->recovery))
+ return -EBUSY;
+
if (rdev->mddev->pers->hot_add_disk == NULL)
return -EINVAL;
@@ -2610,12 +2622,11 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len)
mddev_lock(mddev);
list_for_each_entry(rdev2, &mddev->disks, same_set)
- if (test_bit(AllReserved, &rdev2->flags) ||
- (rdev->bdev == rdev2->bdev &&
- rdev != rdev2 &&
- overlaps(rdev->data_offset, rdev->sectors,
- rdev2->data_offset,
- rdev2->sectors))) {
+ if (rdev->bdev == rdev2->bdev &&
+ rdev != rdev2 &&
+ overlaps(rdev->data_offset, rdev->sectors,
+ rdev2->data_offset,
+ rdev2->sectors)) {
overlap = 1;
break;
}
@@ -4133,10 +4144,10 @@ array_size_store(mddev_t *mddev, const char *buf, size_t len)
}
mddev->array_sectors = sectors;
- set_capacity(mddev->gendisk, mddev->array_sectors);
- if (mddev->pers)
+ if (mddev->pers) {
+ set_capacity(mddev->gendisk, mddev->array_sectors);
revalidate_disk(mddev->gendisk);
-
+ }
return len;
}
@@ -4619,6 +4630,7 @@ static int do_md_run(mddev_t *mddev)
}
set_capacity(mddev->gendisk, mddev->array_sectors);
revalidate_disk(mddev->gendisk);
+ mddev->changed = 1;
kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
out:
return err;
@@ -4707,6 +4719,7 @@ static void md_clean(mddev_t *mddev)
mddev->sync_speed_min = mddev->sync_speed_max = 0;
mddev->recovery = 0;
mddev->in_sync = 0;
+ mddev->changed = 0;
mddev->degraded = 0;
mddev->safemode = 0;
mddev->bitmap_info.offset = 0;
@@ -4807,7 +4820,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
__md_stop_writes(mddev);
md_stop(mddev);
mddev->queue->merge_bvec_fn = NULL;
- mddev->queue->unplug_fn = NULL;
mddev->queue->backing_dev_info.congested_fn = NULL;
/* tell userspace to handle 'inactive' */
@@ -4822,6 +4834,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
set_capacity(disk, 0);
mutex_unlock(&mddev->open_mutex);
+ mddev->changed = 1;
revalidate_disk(disk);
if (mddev->ro)
@@ -5578,6 +5591,8 @@ static int update_raid_disks(mddev_t *mddev, int raid_disks)
mddev->delta_disks = raid_disks - mddev->raid_disks;
rv = mddev->pers->check_reshape(mddev);
+ if (rv < 0)
+ mddev->delta_disks = 0;
return rv;
}
@@ -6004,7 +6019,7 @@ static int md_open(struct block_device *bdev, fmode_t mode)
atomic_inc(&mddev->openers);
mutex_unlock(&mddev->open_mutex);
- check_disk_size_change(mddev->gendisk, bdev);
+ check_disk_change(bdev);
out:
return err;
}
@@ -6019,6 +6034,21 @@ static int md_release(struct gendisk *disk, fmode_t mode)
return 0;
}
+
+static int md_media_changed(struct gendisk *disk)
+{
+ mddev_t *mddev = disk->private_data;
+
+ return mddev->changed;
+}
+
+static int md_revalidate(struct gendisk *disk)
+{
+ mddev_t *mddev = disk->private_data;
+
+ mddev->changed = 0;
+ return 0;
+}
static const struct block_device_operations md_fops =
{
.owner = THIS_MODULE,
@@ -6029,6 +6059,8 @@ static const struct block_device_operations md_fops =
.compat_ioctl = md_compat_ioctl,
#endif
.getgeo = md_getgeo,
+ .media_changed = md_media_changed,
+ .revalidate_disk= md_revalidate,
};
static int md_thread(void * arg)
@@ -6662,8 +6694,6 @@ EXPORT_SYMBOL_GPL(md_allow_write);
void md_unplug(mddev_t *mddev)
{
- if (mddev->queue)
- blk_unplug(mddev->queue);
if (mddev->plug)
mddev->plug->unplug_fn(mddev->plug);
}
@@ -6846,7 +6876,6 @@ void md_do_sync(mddev_t *mddev)
>= mddev->resync_max - mddev->curr_resync_completed
)) {
/* time to update curr_resync_completed */
- md_unplug(mddev);
wait_event(mddev->recovery_wait,
atomic_read(&mddev->recovery_active) == 0);
mddev->curr_resync_completed = j;
@@ -6922,7 +6951,6 @@ void md_do_sync(mddev_t *mddev)
* about not overloading the IO subsystem. (things like an
* e2fsck being done on the RAID array should execute fast)
*/
- md_unplug(mddev);
cond_resched();
currspeed = ((unsigned long)(io_sectors-mddev->resync_mark_cnt))/2
@@ -6941,8 +6969,6 @@ void md_do_sync(mddev_t *mddev)
* this also signals 'finished resyncing' to md_stop
*/
out:
- md_unplug(mddev);
-
wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
/* tell personality that we are finished */
@@ -6985,9 +7011,6 @@ void md_do_sync(mddev_t *mddev)
} else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
mddev->resync_min = mddev->curr_resync_completed;
mddev->curr_resync = 0;
- if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
- mddev->curr_resync_completed = 0;
- sysfs_notify(&mddev->kobj, NULL, "sync_completed");
wake_up(&resync_wait);
set_bit(MD_RECOVERY_DONE, &mddev->recovery);
md_wakeup_thread(mddev->thread);
@@ -7028,7 +7051,7 @@ static int remove_and_add_spares(mddev_t *mddev)
}
}
- if (mddev->degraded && ! mddev->ro && !mddev->recovery_disabled) {
+ if (mddev->degraded && !mddev->recovery_disabled) {
list_for_each_entry(rdev, &mddev->disks, same_set) {
if (rdev->raid_disk >= 0 &&
!test_bit(In_sync, &rdev->flags) &&
@@ -7151,7 +7174,20 @@ void md_check_recovery(mddev_t *mddev)
/* Only thing we do on a ro array is remove
* failed devices.
*/
- remove_and_add_spares(mddev);
+ mdk_rdev_t *rdev;
+ list_for_each_entry(rdev, &mddev->disks, same_set)
+ if (rdev->raid_disk >= 0 &&
+ !test_bit(Blocked, &rdev->flags) &&
+ test_bit(Faulty, &rdev->flags) &&
+ atomic_read(&rdev->nr_pending)==0) {
+ if (mddev->pers->hot_remove_disk(
+ mddev, rdev->raid_disk)==0) {
+ char nm[20];
+ sprintf(nm,"rd%d", rdev->raid_disk);
+ sysfs_remove_link(&mddev->kobj, nm);
+ rdev->raid_disk = -1;
+ }
+ }
clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
goto unlock;
}
@@ -7321,7 +7357,7 @@ static int __init md_init(void)
{
int ret = -ENOMEM;
- md_wq = alloc_workqueue("md", WQ_RESCUER, 0);
+ md_wq = alloc_workqueue("md", WQ_MEM_RECLAIM, 0);
if (!md_wq)
goto err_wq;
diff --git a/drivers/md/md.h b/drivers/md/md.h
index eec517ced31a..12215d437fcc 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -93,8 +93,6 @@ struct mdk_rdev_s
#define Faulty 1 /* device is known to have a fault */
#define In_sync 2 /* device is in_sync with rest of array */
#define WriteMostly 4 /* Avoid reading if at all possible */
-#define AllReserved 6 /* If whole device is reserved for
- * one array */
#define AutoDetected 7 /* added by auto-detect */
#define Blocked 8 /* An error occured on an externally
* managed array, don't allow writes
@@ -276,6 +274,8 @@ struct mddev_s
atomic_t active; /* general refcount */
atomic_t openers; /* number of active opens */
+ int changed; /* True if we might need to
+ * reread partition info */
int degraded; /* whether md should consider
* adding a spare
*/
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index 6d7ddf32ef2e..c35890990985 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -106,36 +106,6 @@ static void multipath_end_request(struct bio *bio, int error)
rdev_dec_pending(rdev, conf->mddev);
}
-static void unplug_slaves(mddev_t *mddev)
-{
- multipath_conf_t *conf = mddev->private;
- int i;
-
- rcu_read_lock();
- for (i=0; i<mddev->raid_disks; i++) {
- mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev);
- if (rdev && !test_bit(Faulty, &rdev->flags)
- && atomic_read(&rdev->nr_pending)) {
- struct request_queue *r_queue = bdev_get_queue(rdev->bdev);
-
- atomic_inc(&rdev->nr_pending);
- rcu_read_unlock();
-
- blk_unplug(r_queue);
-
- rdev_dec_pending(rdev, mddev);
- rcu_read_lock();
- }
- }
- rcu_read_unlock();
-}
-
-static void multipath_unplug(struct request_queue *q)
-{
- unplug_slaves(q->queuedata);
-}
-
-
static int multipath_make_request(mddev_t *mddev, struct bio * bio)
{
multipath_conf_t *conf = mddev->private;
@@ -345,7 +315,7 @@ static int multipath_remove_disk(mddev_t *mddev, int number)
p->rdev = rdev;
goto abort;
}
- md_integrity_register(mddev);
+ err = md_integrity_register(mddev);
}
abort:
@@ -435,7 +405,6 @@ static int multipath_run (mddev_t *mddev)
* bookkeeping area. [whatever we allocate in multipath_run(),
* should be freed in multipath_stop()]
*/
- mddev->queue->queue_lock = &mddev->queue->__queue_lock;
conf = kzalloc(sizeof(multipath_conf_t), GFP_KERNEL);
mddev->private = conf;
@@ -518,10 +487,12 @@ static int multipath_run (mddev_t *mddev)
*/
md_set_array_sectors(mddev, multipath_size(mddev, 0, 0));
- mddev->queue->unplug_fn = multipath_unplug;
mddev->queue->backing_dev_info.congested_fn = multipath_congested;
mddev->queue->backing_dev_info.congested_data = mddev;
- md_integrity_register(mddev);
+
+ if (md_integrity_register(mddev))
+ goto out_free_conf;
+
return 0;
out_free_conf:
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index a39f4c355e55..e86bf3682e1e 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -25,21 +25,6 @@
#include "raid0.h"
#include "raid5.h"
-static void raid0_unplug(struct request_queue *q)
-{
- mddev_t *mddev = q->queuedata;
- raid0_conf_t *conf = mddev->private;
- mdk_rdev_t **devlist = conf->devlist;
- int raid_disks = conf->strip_zone[0].nb_dev;
- int i;
-
- for (i=0; i < raid_disks; i++) {
- struct request_queue *r_queue = bdev_get_queue(devlist[i]->bdev);
-
- blk_unplug(r_queue);
- }
-}
-
static int raid0_congested(void *data, int bits)
{
mddev_t *mddev = data;
@@ -179,6 +164,14 @@ static int create_strip_zones(mddev_t *mddev, raid0_conf_t **private_conf)
rdev1->new_raid_disk = j;
}
+ if (mddev->level == 1) {
+ /* taiking over a raid1 array-
+ * we have only one active disk
+ */
+ j = 0;
+ rdev1->new_raid_disk = j;
+ }
+
if (j < 0 || j >= mddev->raid_disks) {
printk(KERN_ERR "md/raid0:%s: bad disk number %d - "
"aborting!\n", mdname(mddev), j);
@@ -264,7 +257,6 @@ static int create_strip_zones(mddev_t *mddev, raid0_conf_t **private_conf)
mdname(mddev),
(unsigned long long)smallest->sectors);
}
- mddev->queue->unplug_fn = raid0_unplug;
mddev->queue->backing_dev_info.congested_fn = raid0_congested;
mddev->queue->backing_dev_info.congested_data = mddev;
@@ -353,7 +345,6 @@ static int raid0_run(mddev_t *mddev)
if (md_check_no_bitmap(mddev))
return -EINVAL;
blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
- mddev->queue->queue_lock = &mddev->queue->__queue_lock;
/* if private is not null, we are here after takeover */
if (mddev->private == NULL) {
@@ -388,8 +379,7 @@ static int raid0_run(mddev_t *mddev)
blk_queue_merge_bvec(mddev->queue, raid0_mergeable_bvec);
dump_zones(mddev);
- md_integrity_register(mddev);
- return 0;
+ return md_integrity_register(mddev);
}
static int raid0_stop(mddev_t *mddev)
@@ -644,12 +634,39 @@ static void *raid0_takeover_raid10(mddev_t *mddev)
return priv_conf;
}
+static void *raid0_takeover_raid1(mddev_t *mddev)
+{
+ raid0_conf_t *priv_conf;
+
+ /* Check layout:
+ * - (N - 1) mirror drives must be already faulty
+ */
+ if ((mddev->raid_disks - 1) != mddev->degraded) {
+ printk(KERN_ERR "md/raid0:%s: (N - 1) mirrors drives must be already faulty!\n",
+ mdname(mddev));
+ return ERR_PTR(-EINVAL);
+ }
+
+ /* Set new parameters */
+ mddev->new_level = 0;
+ mddev->new_layout = 0;
+ mddev->new_chunk_sectors = 128; /* by default set chunk size to 64k */
+ mddev->delta_disks = 1 - mddev->raid_disks;
+ mddev->raid_disks = 1;
+ /* make sure it will be not marked as dirty */
+ mddev->recovery_cp = MaxSector;
+
+ create_strip_zones(mddev, &priv_conf);
+ return priv_conf;
+}
+
static void *raid0_takeover(mddev_t *mddev)
{
/* raid0 can take over:
* raid4 - if all data disks are active.
* raid5 - providing it is Raid4 layout and one disk is faulty
* raid10 - assuming we have all necessary active disks
+ * raid1 - with (N -1) mirror drives faulty
*/
if (mddev->level == 4)
return raid0_takeover_raid45(mddev);
@@ -665,6 +682,12 @@ static void *raid0_takeover(mddev_t *mddev)
if (mddev->level == 10)
return raid0_takeover_raid10(mddev);
+ if (mddev->level == 1)
+ return raid0_takeover_raid1(mddev);
+
+ printk(KERN_ERR "Takeover from raid%i to raid0 not supported\n",
+ mddev->level);
+
return ERR_PTR(-EINVAL);
}
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index a23ffa397ba9..c2a21ae56d97 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -52,23 +52,16 @@
#define NR_RAID1_BIOS 256
-static void unplug_slaves(mddev_t *mddev);
-
static void allow_barrier(conf_t *conf);
static void lower_barrier(conf_t *conf);
static void * r1bio_pool_alloc(gfp_t gfp_flags, void *data)
{
struct pool_info *pi = data;
- r1bio_t *r1_bio;
int size = offsetof(r1bio_t, bios[pi->raid_disks]);
/* allocate a r1bio with room for raid_disks entries in the bios array */
- r1_bio = kzalloc(size, gfp_flags);
- if (!r1_bio && pi->mddev)
- unplug_slaves(pi->mddev);
-
- return r1_bio;
+ return kzalloc(size, gfp_flags);
}
static void r1bio_pool_free(void *r1_bio, void *data)
@@ -91,10 +84,8 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
int i, j;
r1_bio = r1bio_pool_alloc(gfp_flags, pi);
- if (!r1_bio) {
- unplug_slaves(pi->mddev);
+ if (!r1_bio)
return NULL;
- }
/*
* Allocate bios : 1 for reading, n-1 for writing
@@ -520,37 +511,6 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
return new_disk;
}
-static void unplug_slaves(mddev_t *mddev)
-{
- conf_t *conf = mddev->private;
- int i;
-
- rcu_read_lock();
- for (i=0; i<mddev->raid_disks; i++) {
- mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
- if (rdev && !test_bit(Faulty, &rdev->flags) && atomic_read(&rdev->nr_pending)) {
- struct request_queue *r_queue = bdev_get_queue(rdev->bdev);
-
- atomic_inc(&rdev->nr_pending);
- rcu_read_unlock();
-
- blk_unplug(r_queue);
-
- rdev_dec_pending(rdev, mddev);
- rcu_read_lock();
- }
- }
- rcu_read_unlock();
-}
-
-static void raid1_unplug(struct request_queue *q)
-{
- mddev_t *mddev = q->queuedata;
-
- unplug_slaves(mddev);
- md_wakeup_thread(mddev->thread);
-}
-
static int raid1_congested(void *data, int bits)
{
mddev_t *mddev = data;
@@ -580,20 +540,16 @@ static int raid1_congested(void *data, int bits)
}
-static int flush_pending_writes(conf_t *conf)
+static void flush_pending_writes(conf_t *conf)
{
/* Any writes that have been queued but are awaiting
* bitmap updates get flushed here.
- * We return 1 if any requests were actually submitted.
*/
- int rv = 0;
-
spin_lock_irq(&conf->device_lock);
if (conf->pending_bio_list.head) {
struct bio *bio;
bio = bio_list_get(&conf->pending_bio_list);
- blk_remove_plug(conf->mddev->queue);
spin_unlock_irq(&conf->device_lock);
/* flush any pending bitmap writes to
* disk before proceeding w/ I/O */
@@ -605,10 +561,14 @@ static int flush_pending_writes(conf_t *conf)
generic_make_request(bio);
bio = next;
}
- rv = 1;
} else
spin_unlock_irq(&conf->device_lock);
- return rv;
+}
+
+static void md_kick_device(mddev_t *mddev)
+{
+ blk_flush_plug(current);
+ md_wakeup_thread(mddev->thread);
}
/* Barriers....
@@ -640,8 +600,7 @@ static void raise_barrier(conf_t *conf)
/* Wait until no block IO is waiting */
wait_event_lock_irq(conf->wait_barrier, !conf->nr_waiting,
- conf->resync_lock,
- raid1_unplug(conf->mddev->queue));
+ conf->resync_lock, md_kick_device(conf->mddev));
/* block any new IO from starting */
conf->barrier++;
@@ -649,8 +608,7 @@ static void raise_barrier(conf_t *conf)
/* Now wait for all pending IO to complete */
wait_event_lock_irq(conf->wait_barrier,
!conf->nr_pending && conf->barrier < RESYNC_DEPTH,
- conf->resync_lock,
- raid1_unplug(conf->mddev->queue));
+ conf->resync_lock, md_kick_device(conf->mddev));
spin_unlock_irq(&conf->resync_lock);
}
@@ -672,7 +630,7 @@ static void wait_barrier(conf_t *conf)
conf->nr_waiting++;
wait_event_lock_irq(conf->wait_barrier, !conf->barrier,
conf->resync_lock,
- raid1_unplug(conf->mddev->queue));
+ md_kick_device(conf->mddev));
conf->nr_waiting--;
}
conf->nr_pending++;
@@ -709,7 +667,7 @@ static void freeze_array(conf_t *conf)
conf->nr_pending == conf->nr_queued+1,
conf->resync_lock,
({ flush_pending_writes(conf);
- raid1_unplug(conf->mddev->queue); }));
+ md_kick_device(conf->mddev); }));
spin_unlock_irq(&conf->resync_lock);
}
static void unfreeze_array(conf_t *conf)
@@ -959,7 +917,6 @@ static int make_request(mddev_t *mddev, struct bio * bio)
atomic_inc(&r1_bio->remaining);
spin_lock_irqsave(&conf->device_lock, flags);
bio_list_add(&conf->pending_bio_list, mbio);
- blk_plug_device(mddev->queue);
spin_unlock_irqrestore(&conf->device_lock, flags);
}
r1_bio_write_done(r1_bio, bio->bi_vcnt, behind_pages, behind_pages != NULL);
@@ -968,7 +925,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
/* In case raid1d snuck in to freeze_array */
wake_up(&conf->wait_barrier);
- if (do_sync)
+ if (do_sync || !bitmap)
md_wakeup_thread(mddev->thread);
return 0;
@@ -1175,7 +1132,7 @@ static int raid1_remove_disk(mddev_t *mddev, int number)
p->rdev = rdev;
goto abort;
}
- md_integrity_register(mddev);
+ err = md_integrity_register(mddev);
}
abort:
@@ -1558,7 +1515,6 @@ static void raid1d(mddev_t *mddev)
unsigned long flags;
conf_t *conf = mddev->private;
struct list_head *head = &conf->retry_list;
- int unplug=0;
mdk_rdev_t *rdev;
md_check_recovery(mddev);
@@ -1566,7 +1522,7 @@ static void raid1d(mddev_t *mddev)
for (;;) {
char b[BDEVNAME_SIZE];
- unplug += flush_pending_writes(conf);
+ flush_pending_writes(conf);
spin_lock_irqsave(&conf->device_lock, flags);
if (list_empty(head)) {
@@ -1580,10 +1536,9 @@ static void raid1d(mddev_t *mddev)
mddev = r1_bio->mddev;
conf = mddev->private;
- if (test_bit(R1BIO_IsSync, &r1_bio->state)) {
+ if (test_bit(R1BIO_IsSync, &r1_bio->state))
sync_request_write(mddev, r1_bio);
- unplug = 1;
- } else {
+ else {
int disk;
/* we got a read error. Maybe the drive is bad. Maybe just
@@ -1633,14 +1588,11 @@ static void raid1d(mddev_t *mddev)
bio->bi_end_io = raid1_end_read_request;
bio->bi_rw = READ | do_sync;
bio->bi_private = r1_bio;
- unplug = 1;
generic_make_request(bio);
}
}
cond_resched();
}
- if (unplug)
- unplug_slaves(mddev);
}
@@ -2021,7 +1973,6 @@ static int run(mddev_t *mddev)
if (IS_ERR(conf))
return PTR_ERR(conf);
- mddev->queue->queue_lock = &conf->device_lock;
list_for_each_entry(rdev, &mddev->disks, same_set) {
disk_stack_limits(mddev->gendisk, rdev->bdev,
rdev->data_offset << 9);
@@ -2064,11 +2015,9 @@ static int run(mddev_t *mddev)
md_set_array_sectors(mddev, raid1_size(mddev, 0, 0));
- mddev->queue->unplug_fn = raid1_unplug;
mddev->queue->backing_dev_info.congested_fn = raid1_congested;
mddev->queue->backing_dev_info.congested_data = mddev;
- md_integrity_register(mddev);
- return 0;
+ return md_integrity_register(mddev);
}
static int stop(mddev_t *mddev)
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 69b659544390..f7b62370b374 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -57,23 +57,16 @@
*/
#define NR_RAID10_BIOS 256
-static void unplug_slaves(mddev_t *mddev);
-
static void allow_barrier(conf_t *conf);
static void lower_barrier(conf_t *conf);
static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data)
{
conf_t *conf = data;
- r10bio_t *r10_bio;
int size = offsetof(struct r10bio_s, devs[conf->copies]);
/* allocate a r10bio with room for raid_disks entries in the bios array */
- r10_bio = kzalloc(size, gfp_flags);
- if (!r10_bio && conf->mddev)
- unplug_slaves(conf->mddev);
-
- return r10_bio;
+ return kzalloc(size, gfp_flags);
}
static void r10bio_pool_free(void *r10_bio, void *data)
@@ -106,10 +99,8 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data)
int nalloc;
r10_bio = r10bio_pool_alloc(gfp_flags, conf);
- if (!r10_bio) {
- unplug_slaves(conf->mddev);
+ if (!r10_bio)
return NULL;
- }
if (test_bit(MD_RECOVERY_SYNC, &conf->mddev->recovery))
nalloc = conf->copies; /* resync */
@@ -597,37 +588,6 @@ rb_out:
return disk;
}
-static void unplug_slaves(mddev_t *mddev)
-{
- conf_t *conf = mddev->private;
- int i;
-
- rcu_read_lock();
- for (i=0; i < conf->raid_disks; i++) {
- mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
- if (rdev && !test_bit(Faulty, &rdev->flags) && atomic_read(&rdev->nr_pending)) {
- struct request_queue *r_queue = bdev_get_queue(rdev->bdev);
-
- atomic_inc(&rdev->nr_pending);
- rcu_read_unlock();
-
- blk_unplug(r_queue);
-
- rdev_dec_pending(rdev, mddev);
- rcu_read_lock();
- }
- }
- rcu_read_unlock();
-}
-
-static void raid10_unplug(struct request_queue *q)
-{
- mddev_t *mddev = q->queuedata;
-
- unplug_slaves(q->queuedata);
- md_wakeup_thread(mddev->thread);
-}
-
static int raid10_congested(void *data, int bits)
{
mddev_t *mddev = data;
@@ -649,20 +609,16 @@ static int raid10_congested(void *data, int bits)
return ret;
}
-static int flush_pending_writes(conf_t *conf)
+static void flush_pending_writes(conf_t *conf)
{
/* Any writes that have been queued but are awaiting
* bitmap updates get flushed here.
- * We return 1 if any requests were actually submitted.
*/
- int rv = 0;
-
spin_lock_irq(&conf->device_lock);
if (conf->pending_bio_list.head) {
struct bio *bio;
bio = bio_list_get(&conf->pending_bio_list);
- blk_remove_plug(conf->mddev->queue);
spin_unlock_irq(&conf->device_lock);
/* flush any pending bitmap writes to disk
* before proceeding w/ I/O */
@@ -674,11 +630,16 @@ static int flush_pending_writes(conf_t *conf)
generic_make_request(bio);
bio = next;
}
- rv = 1;
} else
spin_unlock_irq(&conf->device_lock);
- return rv;
}
+
+static void md_kick_device(mddev_t *mddev)
+{
+ blk_flush_plug(current);
+ md_wakeup_thread(mddev->thread);
+}
+
/* Barriers....
* Sometimes we need to suspend IO while we do something else,
* either some resync/recovery, or reconfigure the array.
@@ -708,8 +669,7 @@ static void raise_barrier(conf_t *conf, int force)
/* Wait until no block IO is waiting (unless 'force') */
wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting,
- conf->resync_lock,
- raid10_unplug(conf->mddev->queue));
+ conf->resync_lock, md_kick_device(conf->mddev));
/* block any new IO from starting */
conf->barrier++;
@@ -717,8 +677,7 @@ static void raise_barrier(conf_t *conf, int force)
/* No wait for all pending IO to complete */
wait_event_lock_irq(conf->wait_barrier,
!conf->nr_pending && conf->barrier < RESYNC_DEPTH,
- conf->resync_lock,
- raid10_unplug(conf->mddev->queue));
+ conf->resync_lock, md_kick_device(conf->mddev));
spin_unlock_irq(&conf->resync_lock);
}
@@ -739,7 +698,7 @@ static void wait_barrier(conf_t *conf)
conf->nr_waiting++;
wait_event_lock_irq(conf->wait_barrier, !conf->barrier,
conf->resync_lock,
- raid10_unplug(conf->mddev->queue));
+ md_kick_device(conf->mddev));
conf->nr_waiting--;
}
conf->nr_pending++;
@@ -776,7 +735,7 @@ static void freeze_array(conf_t *conf)
conf->nr_pending == conf->nr_queued+1,
conf->resync_lock,
({ flush_pending_writes(conf);
- raid10_unplug(conf->mddev->queue); }));
+ md_kick_device(conf->mddev); }));
spin_unlock_irq(&conf->resync_lock);
}
@@ -971,7 +930,6 @@ static int make_request(mddev_t *mddev, struct bio * bio)
atomic_inc(&r10_bio->remaining);
spin_lock_irqsave(&conf->device_lock, flags);
bio_list_add(&conf->pending_bio_list, mbio);
- blk_plug_device(mddev->queue);
spin_unlock_irqrestore(&conf->device_lock, flags);
}
@@ -988,7 +946,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
/* In case raid10d snuck in to freeze_array */
wake_up(&conf->wait_barrier);
- if (do_sync)
+ if (do_sync || !mddev->bitmap)
md_wakeup_thread(mddev->thread);
return 0;
@@ -1230,7 +1188,7 @@ static int raid10_remove_disk(mddev_t *mddev, int number)
p->rdev = rdev;
goto abort;
}
- md_integrity_register(mddev);
+ err = md_integrity_register(mddev);
}
abort:
@@ -1681,7 +1639,6 @@ static void raid10d(mddev_t *mddev)
unsigned long flags;
conf_t *conf = mddev->private;
struct list_head *head = &conf->retry_list;
- int unplug=0;
mdk_rdev_t *rdev;
md_check_recovery(mddev);
@@ -1689,7 +1646,7 @@ static void raid10d(mddev_t *mddev)
for (;;) {
char b[BDEVNAME_SIZE];
- unplug += flush_pending_writes(conf);
+ flush_pending_writes(conf);
spin_lock_irqsave(&conf->device_lock, flags);
if (list_empty(head)) {
@@ -1703,13 +1660,11 @@ static void raid10d(mddev_t *mddev)
mddev = r10_bio->mddev;
conf = mddev->private;
- if (test_bit(R10BIO_IsSync, &r10_bio->state)) {
+ if (test_bit(R10BIO_IsSync, &r10_bio->state))
sync_request_write(mddev, r10_bio);
- unplug = 1;
- } else if (test_bit(R10BIO_IsRecover, &r10_bio->state)) {
+ else if (test_bit(R10BIO_IsRecover, &r10_bio->state))
recovery_request_write(mddev, r10_bio);
- unplug = 1;
- } else {
+ else {
int mirror;
/* we got a read error. Maybe the drive is bad. Maybe just
* the block and we can fix it.
@@ -1756,14 +1711,11 @@ static void raid10d(mddev_t *mddev)
bio->bi_rw = READ | do_sync;
bio->bi_private = r10_bio;
bio->bi_end_io = raid10_end_read_request;
- unplug = 1;
generic_make_request(bio);
}
}
cond_resched();
}
- if (unplug)
- unplug_slaves(mddev);
}
@@ -2304,8 +2256,6 @@ static int run(mddev_t *mddev)
if (!conf)
goto out;
- mddev->queue->queue_lock = &conf->device_lock;
-
mddev->thread = conf->thread;
conf->thread = NULL;
@@ -2376,7 +2326,6 @@ static int run(mddev_t *mddev)
md_set_array_sectors(mddev, size);
mddev->resync_max_sectors = size;
- mddev->queue->unplug_fn = raid10_unplug;
mddev->queue->backing_dev_info.congested_fn = raid10_congested;
mddev->queue->backing_dev_info.congested_data = mddev;
@@ -2394,7 +2343,10 @@ static int run(mddev_t *mddev)
if (conf->near_copies < conf->raid_disks)
blk_queue_merge_bvec(mddev->queue, raid10_mergeable_bvec);
- md_integrity_register(mddev);
+
+ if (md_integrity_register(mddev))
+ goto out_free_conf;
+
return 0;
out_free_conf:
@@ -2463,11 +2415,13 @@ static void *raid10_takeover_raid0(mddev_t *mddev)
mddev->recovery_cp = MaxSector;
conf = setup_conf(mddev);
- if (!IS_ERR(conf))
+ if (!IS_ERR(conf)) {
list_for_each_entry(rdev, &mddev->disks, same_set)
if (rdev->raid_disk >= 0)
rdev->new_raid_disk = rdev->raid_disk * 2;
-
+ conf->barrier = 1;
+ }
+
return conf;
}
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 5044babfcda0..e867ee42b152 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -433,8 +433,6 @@ static int has_failed(raid5_conf_t *conf)
return 0;
}
-static void unplug_slaves(mddev_t *mddev);
-
static struct stripe_head *
get_active_stripe(raid5_conf_t *conf, sector_t sector,
int previous, int noblock, int noquiesce)
@@ -463,8 +461,7 @@ get_active_stripe(raid5_conf_t *conf, sector_t sector,
< (conf->max_nr_stripes *3/4)
|| !conf->inactive_blocked),
conf->device_lock,
- md_raid5_unplug_device(conf)
- );
+ md_raid5_kick_device(conf));
conf->inactive_blocked = 0;
} else
init_stripe(sh, sector, previous);
@@ -1473,8 +1470,7 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
wait_event_lock_irq(conf->wait_for_stripe,
!list_empty(&conf->inactive_list),
conf->device_lock,
- unplug_slaves(conf->mddev)
- );
+ blk_flush_plug(current));
osh = get_free_stripe(conf);
spin_unlock_irq(&conf->device_lock);
atomic_set(&nsh->count, 1);
@@ -3645,58 +3641,19 @@ static void activate_bit_delay(raid5_conf_t *conf)
}
}
-static void unplug_slaves(mddev_t *mddev)
-{
- raid5_conf_t *conf = mddev->private;
- int i;
- int devs = max(conf->raid_disks, conf->previous_raid_disks);
-
- rcu_read_lock();
- for (i = 0; i < devs; i++) {
- mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
- if (rdev && !test_bit(Faulty, &rdev->flags) && atomic_read(&rdev->nr_pending)) {
- struct request_queue *r_queue = bdev_get_queue(rdev->bdev);
-
- atomic_inc(&rdev->nr_pending);
- rcu_read_unlock();
-
- blk_unplug(r_queue);
-
- rdev_dec_pending(rdev, mddev);
- rcu_read_lock();
- }
- }
- rcu_read_unlock();
-}
-
-void md_raid5_unplug_device(raid5_conf_t *conf)
+void md_raid5_kick_device(raid5_conf_t *conf)
{
- unsigned long flags;
-
- spin_lock_irqsave(&conf->device_lock, flags);
-
- if (plugger_remove_plug(&conf->plug)) {
- conf->seq_flush++;
- raid5_activate_delayed(conf);
- }
+ blk_flush_plug(current);
+ raid5_activate_delayed(conf);
md_wakeup_thread(conf->mddev->thread);
-
- spin_unlock_irqrestore(&conf->device_lock, flags);
-
- unplug_slaves(conf->mddev);
}
-EXPORT_SYMBOL_GPL(md_raid5_unplug_device);
+EXPORT_SYMBOL_GPL(md_raid5_kick_device);
static void raid5_unplug(struct plug_handle *plug)
{
raid5_conf_t *conf = container_of(plug, raid5_conf_t, plug);
- md_raid5_unplug_device(conf);
-}
-static void raid5_unplug_queue(struct request_queue *q)
-{
- mddev_t *mddev = q->queuedata;
- md_raid5_unplug_device(mddev->private);
+ md_raid5_kick_device(conf);
}
int md_raid5_congested(mddev_t *mddev, int bits)
@@ -4100,7 +4057,7 @@ static int make_request(mddev_t *mddev, struct bio * bi)
* add failed due to overlap. Flush everything
* and wait a while
*/
- md_raid5_unplug_device(conf);
+ md_raid5_kick_device(conf);
release_stripe(sh);
schedule();
goto retry;
@@ -4365,7 +4322,6 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
if (sector_nr >= max_sector) {
/* just being told to finish up .. nothing much to do */
- unplug_slaves(mddev);
if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) {
end_reshape(conf);
@@ -4569,7 +4525,6 @@ static void raid5d(mddev_t *mddev)
spin_unlock_irq(&conf->device_lock);
async_tx_issue_pending_all();
- unplug_slaves(mddev);
pr_debug("--- raid5d inactive\n");
}
@@ -5205,7 +5160,6 @@ static int run(mddev_t *mddev)
mddev->queue->backing_dev_info.congested_data = mddev;
mddev->queue->backing_dev_info.congested_fn = raid5_congested;
mddev->queue->queue_lock = &conf->device_lock;
- mddev->queue->unplug_fn = raid5_unplug_queue;
chunk_size = mddev->chunk_sectors << 9;
blk_queue_io_min(mddev->queue, chunk_size);
@@ -5517,7 +5471,6 @@ static int raid5_start_reshape(mddev_t *mddev)
raid5_conf_t *conf = mddev->private;
mdk_rdev_t *rdev;
int spares = 0;
- int added_devices = 0;
unsigned long flags;
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
@@ -5527,8 +5480,8 @@ static int raid5_start_reshape(mddev_t *mddev)
return -ENOSPC;
list_for_each_entry(rdev, &mddev->disks, same_set)
- if ((rdev->raid_disk < 0 || rdev->raid_disk >= conf->raid_disks)
- && !test_bit(Faulty, &rdev->flags))
+ if (!test_bit(In_sync, &rdev->flags)
+ && !test_bit(Faulty, &rdev->flags))
spares++;
if (spares - mddev->degraded < mddev->delta_disks - conf->max_degraded)
@@ -5571,34 +5524,35 @@ static int raid5_start_reshape(mddev_t *mddev)
* to correctly record the "partially reconstructed" state of
* such devices during the reshape and confusion could result.
*/
- if (mddev->delta_disks >= 0)
- list_for_each_entry(rdev, &mddev->disks, same_set)
- if (rdev->raid_disk < 0 &&
- !test_bit(Faulty, &rdev->flags)) {
- if (raid5_add_disk(mddev, rdev) == 0) {
- char nm[20];
- if (rdev->raid_disk >= conf->previous_raid_disks) {
- set_bit(In_sync, &rdev->flags);
- added_devices++;
- } else
- rdev->recovery_offset = 0;
- sprintf(nm, "rd%d", rdev->raid_disk);
- if (sysfs_create_link(&mddev->kobj,
- &rdev->kobj, nm))
- /* Failure here is OK */;
- } else
- break;
- } else if (rdev->raid_disk >= conf->previous_raid_disks
- && !test_bit(Faulty, &rdev->flags)) {
- /* This is a spare that was manually added */
- set_bit(In_sync, &rdev->flags);
- added_devices++;
- }
+ if (mddev->delta_disks >= 0) {
+ int added_devices = 0;
+ list_for_each_entry(rdev, &mddev->disks, same_set)
+ if (rdev->raid_disk < 0 &&
+ !test_bit(Faulty, &rdev->flags)) {
+ if (raid5_add_disk(mddev, rdev) == 0) {
+ char nm[20];
+ if (rdev->raid_disk
+ >= conf->previous_raid_disks) {
+ set_bit(In_sync, &rdev->flags);
+ added_devices++;
+ } else
+ rdev->recovery_offset = 0;
+ sprintf(nm, "rd%d", rdev->raid_disk);
+ if (sysfs_create_link(&mddev->kobj,
+ &rdev->kobj, nm))
+ /* Failure here is OK */;
+ }
+ } else if (rdev->raid_disk >= conf->previous_raid_disks
+ && !test_bit(Faulty, &rdev->flags)) {
+ /* This is a spare that was manually added */
+ set_bit(In_sync, &rdev->flags);
+ added_devices++;
+ }
- /* When a reshape changes the number of devices, ->degraded
- * is measured against the larger of the pre and post number of
- * devices.*/
- if (mddev->delta_disks > 0) {
+ /* When a reshape changes the number of devices,
+ * ->degraded is measured against the larger of the
+ * pre and post number of devices.
+ */
spin_lock_irqsave(&conf->device_lock, flags);
mddev->degraded += (conf->raid_disks - conf->previous_raid_disks)
- added_devices;
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 2ace0582b409..8d563a4f022a 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -503,6 +503,6 @@ static inline int algorithm_is_DDF(int layout)
}
extern int md_raid5_congested(mddev_t *mddev, int bits);
-extern void md_raid5_unplug_device(raid5_conf_t *conf);
+extern void md_raid5_kick_device(raid5_conf_t *conf);
extern int raid5_set_cache_size(mddev_t *mddev, int size);
#endif