summaryrefslogtreecommitdiffstats
path: root/drivers/md/raid5.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r--drivers/md/raid5.c230
1 files changed, 134 insertions, 96 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 96c690279fc6..dc574f303f8b 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -201,11 +201,11 @@ static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh)
if (test_bit(STRIPE_HANDLE, &sh->state)) {
if (test_bit(STRIPE_DELAYED, &sh->state)) {
list_add_tail(&sh->lru, &conf->delayed_list);
- blk_plug_device(conf->mddev->queue);
+ plugger_set_plug(&conf->plug);
} else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
sh->bm_seq - conf->seq_write > 0) {
list_add_tail(&sh->lru, &conf->bitmap_list);
- blk_plug_device(conf->mddev->queue);
+ plugger_set_plug(&conf->plug);
} else {
clear_bit(STRIPE_BIT_DELAY, &sh->state);
list_add_tail(&sh->lru, &conf->handle_list);
@@ -434,7 +434,6 @@ static int has_failed(raid5_conf_t *conf)
}
static void unplug_slaves(mddev_t *mddev);
-static void raid5_unplug_device(struct request_queue *q);
static struct stripe_head *
get_active_stripe(raid5_conf_t *conf, sector_t sector,
@@ -464,7 +463,7 @@ get_active_stripe(raid5_conf_t *conf, sector_t sector,
< (conf->max_nr_stripes *3/4)
|| !conf->inactive_blocked),
conf->device_lock,
- raid5_unplug_device(conf->mddev->queue)
+ md_raid5_unplug_device(conf)
);
conf->inactive_blocked = 0;
} else
@@ -507,9 +506,12 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
int rw;
struct bio *bi;
mdk_rdev_t *rdev;
- if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags))
- rw = WRITE;
- else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags))
+ if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags)) {
+ if (test_and_clear_bit(R5_WantFUA, &sh->dev[i].flags))
+ rw = WRITE_FUA;
+ else
+ rw = WRITE;
+ } else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags))
rw = READ;
else
continue;
@@ -1032,6 +1034,8 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
while (wbi && wbi->bi_sector <
dev->sector + STRIPE_SECTORS) {
+ if (wbi->bi_rw & REQ_FUA)
+ set_bit(R5_WantFUA, &dev->flags);
tx = async_copy_data(1, wbi, dev->page,
dev->sector, tx);
wbi = r5_next_bio(wbi, dev->sector);
@@ -1049,15 +1053,22 @@ static void ops_complete_reconstruct(void *stripe_head_ref)
int pd_idx = sh->pd_idx;
int qd_idx = sh->qd_idx;
int i;
+ bool fua = false;
pr_debug("%s: stripe %llu\n", __func__,
(unsigned long long)sh->sector);
+ for (i = disks; i--; )
+ fua |= test_bit(R5_WantFUA, &sh->dev[i].flags);
+
for (i = disks; i--; ) {
struct r5dev *dev = &sh->dev[i];
- if (dev->written || i == pd_idx || i == qd_idx)
+ if (dev->written || i == pd_idx || i == qd_idx) {
set_bit(R5_UPTODATE, &dev->flags);
+ if (fua)
+ set_bit(R5_WantFUA, &dev->flags);
+ }
}
if (sh->reconstruct_state == reconstruct_state_drain_run)
@@ -1337,10 +1348,14 @@ static int grow_stripes(raid5_conf_t *conf, int num)
struct kmem_cache *sc;
int devs = max(conf->raid_disks, conf->previous_raid_disks);
- sprintf(conf->cache_name[0],
- "raid%d-%s", conf->level, mdname(conf->mddev));
- sprintf(conf->cache_name[1],
- "raid%d-%s-alt", conf->level, mdname(conf->mddev));
+ if (conf->mddev->gendisk)
+ sprintf(conf->cache_name[0],
+ "raid%d-%s", conf->level, mdname(conf->mddev));
+ else
+ sprintf(conf->cache_name[0],
+ "raid%d-%p", conf->level, conf->mddev);
+ sprintf(conf->cache_name[1], "%s-alt", conf->cache_name[0]);
+
conf->active_name = 0;
sc = kmem_cache_create(conf->cache_name[conf->active_name],
sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev),
@@ -3278,7 +3293,7 @@ static void handle_stripe5(struct stripe_head *sh)
if (dec_preread_active) {
/* We delay this until after ops_run_io so that if make_request
- * is waiting on a barrier, it won't continue until the writes
+ * is waiting on a flush, it won't continue until the writes
* have actually been submitted.
*/
atomic_dec(&conf->preread_active_stripes);
@@ -3580,7 +3595,7 @@ static void handle_stripe6(struct stripe_head *sh)
if (dec_preread_active) {
/* We delay this until after ops_run_io so that if make_request
- * is waiting on a barrier, it won't continue until the writes
+ * is waiting on a flush, it won't continue until the writes
* have actually been submitted.
*/
atomic_dec(&conf->preread_active_stripes);
@@ -3614,7 +3629,7 @@ static void raid5_activate_delayed(raid5_conf_t *conf)
list_add_tail(&sh->lru, &conf->hold_list);
}
} else
- blk_plug_device(conf->mddev->queue);
+ plugger_set_plug(&conf->plug);
}
static void activate_bit_delay(raid5_conf_t *conf)
@@ -3655,36 +3670,44 @@ static void unplug_slaves(mddev_t *mddev)
rcu_read_unlock();
}
-static void raid5_unplug_device(struct request_queue *q)
+void md_raid5_unplug_device(raid5_conf_t *conf)
{
- mddev_t *mddev = q->queuedata;
- raid5_conf_t *conf = mddev->private;
unsigned long flags;
spin_lock_irqsave(&conf->device_lock, flags);
- if (blk_remove_plug(q)) {
+ if (plugger_remove_plug(&conf->plug)) {
conf->seq_flush++;
raid5_activate_delayed(conf);
}
- md_wakeup_thread(mddev->thread);
+ md_wakeup_thread(conf->mddev->thread);
spin_unlock_irqrestore(&conf->device_lock, flags);
- unplug_slaves(mddev);
+ unplug_slaves(conf->mddev);
}
+EXPORT_SYMBOL_GPL(md_raid5_unplug_device);
-static int raid5_congested(void *data, int bits)
+static void raid5_unplug(struct plug_handle *plug)
+{
+ raid5_conf_t *conf = container_of(plug, raid5_conf_t, plug);
+ md_raid5_unplug_device(conf);
+}
+
+static void raid5_unplug_queue(struct request_queue *q)
+{
+ mddev_t *mddev = q->queuedata;
+ md_raid5_unplug_device(mddev->private);
+}
+
+int md_raid5_congested(mddev_t *mddev, int bits)
{
- mddev_t *mddev = data;
raid5_conf_t *conf = mddev->private;
/* No difference between reads and writes. Just check
* how busy the stripe_cache is
*/
- if (mddev_congested(mddev, bits))
- return 1;
if (conf->inactive_blocked)
return 1;
if (conf->quiesce)
@@ -3694,6 +3717,15 @@ static int raid5_congested(void *data, int bits)
return 0;
}
+EXPORT_SYMBOL_GPL(md_raid5_congested);
+
+static int raid5_congested(void *data, int bits)
+{
+ mddev_t *mddev = data;
+
+ return mddev_congested(mddev, bits) ||
+ md_raid5_congested(mddev, bits);
+}
/* We want read requests to align with chunks where possible,
* but write requests don't need to.
@@ -3844,9 +3876,9 @@ static int chunk_aligned_read(mddev_t *mddev, struct bio * raid_bio)
return 0;
}
/*
- * use bio_clone to make a copy of the bio
+ * use bio_clone_mddev to make a copy of the bio
*/
- align_bi = bio_clone(raid_bio, GFP_NOIO);
+ align_bi = bio_clone_mddev(raid_bio, GFP_NOIO, mddev);
if (!align_bi)
return 0;
/*
@@ -3958,14 +3990,8 @@ static int make_request(mddev_t *mddev, struct bio * bi)
const int rw = bio_data_dir(bi);
int remaining;
- if (unlikely(bio_rw_flagged(bi, BIO_RW_BARRIER))) {
- /* Drain all pending writes. We only really need
- * to ensure they have been submitted, but this is
- * easier.
- */
- mddev->pers->quiesce(mddev, 1);
- mddev->pers->quiesce(mddev, 0);
- md_barrier_request(mddev, bi);
+ if (unlikely(bi->bi_rw & REQ_FLUSH)) {
+ md_flush_request(mddev, bi);
return 0;
}
@@ -4075,7 +4101,7 @@ static int make_request(mddev_t *mddev, struct bio * bi)
* add failed due to overlap. Flush everything
* and wait a while
*/
- raid5_unplug_device(mddev->queue);
+ md_raid5_unplug_device(conf);
release_stripe(sh);
schedule();
goto retry;
@@ -4083,7 +4109,7 @@ static int make_request(mddev_t *mddev, struct bio * bi)
finish_wait(&conf->wait_for_overlap, &w);
set_bit(STRIPE_HANDLE, &sh->state);
clear_bit(STRIPE_DELAYED, &sh->state);
- if (mddev->barrier &&
+ if ((bi->bi_rw & REQ_SYNC) &&
!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
atomic_inc(&conf->preread_active_stripes);
release_stripe(sh);
@@ -4106,13 +4132,6 @@ static int make_request(mddev_t *mddev, struct bio * bi)
bio_endio(bi, 0);
}
- if (mddev->barrier) {
- /* We need to wait for the stripes to all be handled.
- * So: wait for preread_active_stripes to drop to 0.
- */
- wait_event(mddev->thread->wqueue,
- atomic_read(&conf->preread_active_stripes) == 0);
- }
return 0;
}
@@ -4341,7 +4360,7 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
raid5_conf_t *conf = mddev->private;
struct stripe_head *sh;
sector_t max_sector = mddev->dev_sectors;
- int sync_blocks;
+ sector_t sync_blocks;
int still_degraded = 0;
int i;
@@ -4566,23 +4585,15 @@ raid5_show_stripe_cache_size(mddev_t *mddev, char *page)
return 0;
}
-static ssize_t
-raid5_store_stripe_cache_size(mddev_t *mddev, const char *page, size_t len)
+int
+raid5_set_cache_size(mddev_t *mddev, int size)
{
raid5_conf_t *conf = mddev->private;
- unsigned long new;
int err;
- if (len >= PAGE_SIZE)
- return -EINVAL;
- if (!conf)
- return -ENODEV;
-
- if (strict_strtoul(page, 10, &new))
- return -EINVAL;
- if (new <= 16 || new > 32768)
+ if (size <= 16 || size > 32768)
return -EINVAL;
- while (new < conf->max_nr_stripes) {
+ while (size < conf->max_nr_stripes) {
if (drop_one_stripe(conf))
conf->max_nr_stripes--;
else
@@ -4591,11 +4602,32 @@ raid5_store_stripe_cache_size(mddev_t *mddev, const char *page, size_t len)
err = md_allow_write(mddev);
if (err)
return err;
- while (new > conf->max_nr_stripes) {
+ while (size > conf->max_nr_stripes) {
if (grow_one_stripe(conf))
conf->max_nr_stripes++;
else break;
}
+ return 0;
+}
+EXPORT_SYMBOL(raid5_set_cache_size);
+
+static ssize_t
+raid5_store_stripe_cache_size(mddev_t *mddev, const char *page, size_t len)
+{
+ raid5_conf_t *conf = mddev->private;
+ unsigned long new;
+ int err;
+
+ if (len >= PAGE_SIZE)
+ return -EINVAL;
+ if (!conf)
+ return -ENODEV;
+
+ if (strict_strtoul(page, 10, &new))
+ return -EINVAL;
+ err = raid5_set_cache_size(mddev, new);
+ if (err)
+ return err;
return len;
}
@@ -4958,7 +4990,7 @@ static int only_parity(int raid_disk, int algo, int raid_disks, int max_degraded
static int run(mddev_t *mddev)
{
raid5_conf_t *conf;
- int working_disks = 0, chunk_size;
+ int working_disks = 0;
int dirty_parity_disks = 0;
mdk_rdev_t *rdev;
sector_t reshape_offset = 0;
@@ -5144,42 +5176,47 @@ static int run(mddev_t *mddev)
"reshape");
}
- /* read-ahead size must cover two whole stripes, which is
- * 2 * (datadisks) * chunksize where 'n' is the number of raid devices
- */
- {
- int data_disks = conf->previous_raid_disks - conf->max_degraded;
- int stripe = data_disks *
- ((mddev->chunk_sectors << 9) / PAGE_SIZE);
- if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe)
- mddev->queue->backing_dev_info.ra_pages = 2 * stripe;
- }
/* Ok, everything is just fine now */
if (mddev->to_remove == &raid5_attrs_group)
mddev->to_remove = NULL;
- else if (sysfs_create_group(&mddev->kobj, &raid5_attrs_group))
+ else if (mddev->kobj.sd &&
+ sysfs_create_group(&mddev->kobj, &raid5_attrs_group))
printk(KERN_WARNING
- "md/raid:%s: failed to create sysfs attributes.\n",
+ "raid5: failed to create sysfs attributes for %s\n",
mdname(mddev));
+ md_set_array_sectors(mddev, raid5_size(mddev, 0, 0));
- mddev->queue->queue_lock = &conf->device_lock;
+ plugger_init(&conf->plug, raid5_unplug);
+ mddev->plug = &conf->plug;
+ if (mddev->queue) {
+ int chunk_size;
+ /* read-ahead size must cover two whole stripes, which
+ * is 2 * (datadisks) * chunksize where 'n' is the
+ * number of raid devices
+ */
+ int data_disks = conf->previous_raid_disks - conf->max_degraded;
+ int stripe = data_disks *
+ ((mddev->chunk_sectors << 9) / PAGE_SIZE);
+ if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe)
+ mddev->queue->backing_dev_info.ra_pages = 2 * stripe;
- mddev->queue->unplug_fn = raid5_unplug_device;
- mddev->queue->backing_dev_info.congested_data = mddev;
- mddev->queue->backing_dev_info.congested_fn = raid5_congested;
+ blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec);
- md_set_array_sectors(mddev, raid5_size(mddev, 0, 0));
+ mddev->queue->backing_dev_info.congested_data = mddev;
+ mddev->queue->backing_dev_info.congested_fn = raid5_congested;
+ mddev->queue->queue_lock = &conf->device_lock;
+ mddev->queue->unplug_fn = raid5_unplug_queue;
- blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec);
- chunk_size = mddev->chunk_sectors << 9;
- blk_queue_io_min(mddev->queue, chunk_size);
- blk_queue_io_opt(mddev->queue, chunk_size *
- (conf->raid_disks - conf->max_degraded));
+ chunk_size = mddev->chunk_sectors << 9;
+ blk_queue_io_min(mddev->queue, chunk_size);
+ blk_queue_io_opt(mddev->queue, chunk_size *
+ (conf->raid_disks - conf->max_degraded));
- list_for_each_entry(rdev, &mddev->disks, same_set)
- disk_stack_limits(mddev->gendisk, rdev->bdev,
- rdev->data_offset << 9);
+ list_for_each_entry(rdev, &mddev->disks, same_set)
+ disk_stack_limits(mddev->gendisk, rdev->bdev,
+ rdev->data_offset << 9);
+ }
return 0;
abort:
@@ -5200,8 +5237,9 @@ static int stop(mddev_t *mddev)
md_unregister_thread(mddev->thread);
mddev->thread = NULL;
- mddev->queue->backing_dev_info.congested_fn = NULL;
- blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
+ if (mddev->queue)
+ mddev->queue->backing_dev_info.congested_fn = NULL;
+ plugger_flush(&conf->plug); /* the unplug fn references 'conf'*/
free_conf(conf);
mddev->private = NULL;
mddev->to_remove = &raid5_attrs_group;
@@ -5291,6 +5329,8 @@ static int raid5_spare_active(mddev_t *mddev)
int i;
raid5_conf_t *conf = mddev->private;
struct disk_info *tmp;
+ int count = 0;
+ unsigned long flags;
for (i = 0; i < conf->raid_disks; i++) {
tmp = conf->disks + i;
@@ -5298,14 +5338,15 @@ static int raid5_spare_active(mddev_t *mddev)
&& tmp->rdev->recovery_offset == MaxSector
&& !test_bit(Faulty, &tmp->rdev->flags)
&& !test_and_set_bit(In_sync, &tmp->rdev->flags)) {
- unsigned long flags;
- spin_lock_irqsave(&conf->device_lock, flags);
- mddev->degraded--;
- spin_unlock_irqrestore(&conf->device_lock, flags);
+ count++;
+ sysfs_notify_dirent(tmp->rdev->sysfs_state);
}
}
+ spin_lock_irqsave(&conf->device_lock, flags);
+ mddev->degraded -= count;
+ spin_unlock_irqrestore(&conf->device_lock, flags);
print_raid5_conf(conf);
- return 0;
+ return count;
}
static int raid5_remove_disk(mddev_t *mddev, int number)
@@ -5545,10 +5586,7 @@ static int raid5_start_reshape(mddev_t *mddev)
sprintf(nm, "rd%d", rdev->raid_disk);
if (sysfs_create_link(&mddev->kobj,
&rdev->kobj, nm))
- printk(KERN_WARNING
- "md/raid:%s: failed to create "
- " link %s\n",
- mdname(mddev), nm);
+ /* Failure here is OK */;
} else
break;
}
@@ -5603,7 +5641,7 @@ static void end_reshape(raid5_conf_t *conf)
/* read-ahead size must cover two whole stripes, which is
* 2 * (datadisks) * chunksize where 'n' is the number of raid devices
*/
- {
+ if (conf->mddev->queue) {
int data_disks = conf->raid_disks - conf->max_degraded;
int stripe = data_disks * ((conf->chunk_sectors << 9)
/ PAGE_SIZE);