summaryrefslogtreecommitdiffstats
path: root/drivers/md/raid10.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/raid10.c')
-rw-r--r--drivers/md/raid10.c88
1 files changed, 55 insertions, 33 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 42e64e4e5e25..c67aa54694ae 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -120,7 +120,7 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data)
* Allocate bios.
*/
for (j = nalloc ; j-- ; ) {
- bio = bio_alloc(gfp_flags, RESYNC_PAGES);
+ bio = bio_kmalloc(gfp_flags, RESYNC_PAGES);
if (!bio)
goto out_free_bio;
r10_bio->devs[j].bio = bio;
@@ -799,13 +799,13 @@ static int make_request(mddev_t *mddev, struct bio * bio)
int i;
int chunk_sects = conf->chunk_mask + 1;
const int rw = bio_data_dir(bio);
- const bool do_sync = bio_rw_flagged(bio, BIO_RW_SYNCIO);
- struct bio_list bl;
+ const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
+ const unsigned long do_fua = (bio->bi_rw & REQ_FUA);
unsigned long flags;
mdk_rdev_t *blocked_rdev;
- if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
- md_barrier_request(mddev, bio);
+ if (unlikely(bio->bi_rw & REQ_FLUSH)) {
+ md_flush_request(mddev, bio);
return 0;
}
@@ -825,11 +825,29 @@ static int make_request(mddev_t *mddev, struct bio * bio)
*/
bp = bio_split(bio,
chunk_sects - (bio->bi_sector & (chunk_sects - 1)) );
+
+ /* Each of these 'make_request' calls will call 'wait_barrier'.
+ * If the first succeeds but the second blocks due to the resync
+ * thread raising the barrier, we will deadlock because the
+ * IO to the underlying device will be queued in generic_make_request
+ * and will never complete, so will never reduce nr_pending.
+ * So increment nr_waiting here so no new raise_barriers will
+ * succeed, and so the second wait_barrier cannot block.
+ */
+ spin_lock_irq(&conf->resync_lock);
+ conf->nr_waiting++;
+ spin_unlock_irq(&conf->resync_lock);
+
if (make_request(mddev, &bp->bio1))
generic_make_request(&bp->bio1);
if (make_request(mddev, &bp->bio2))
generic_make_request(&bp->bio2);
+ spin_lock_irq(&conf->resync_lock);
+ conf->nr_waiting--;
+ wake_up(&conf->wait_barrier);
+ spin_unlock_irq(&conf->resync_lock);
+
bio_pair_release(bp);
return 0;
bad_map:
@@ -871,7 +889,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
}
mirror = conf->mirrors + disk;
- read_bio = bio_clone(bio, GFP_NOIO);
+ read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
r10_bio->devs[slot].bio = read_bio;
@@ -879,7 +897,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
mirror->rdev->data_offset;
read_bio->bi_bdev = mirror->rdev->bdev;
read_bio->bi_end_io = raid10_end_read_request;
- read_bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO);
+ read_bio->bi_rw = READ | do_sync;
read_bio->bi_private = r10_bio;
generic_make_request(read_bio);
@@ -931,42 +949,42 @@ static int make_request(mddev_t *mddev, struct bio * bio)
goto retry_write;
}
- atomic_set(&r10_bio->remaining, 0);
+ atomic_set(&r10_bio->remaining, 1);
+ bitmap_startwrite(mddev->bitmap, bio->bi_sector, r10_bio->sectors, 0);
- bio_list_init(&bl);
for (i = 0; i < conf->copies; i++) {
struct bio *mbio;
int d = r10_bio->devs[i].devnum;
if (!r10_bio->devs[i].bio)
continue;
- mbio = bio_clone(bio, GFP_NOIO);
+ mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
r10_bio->devs[i].bio = mbio;
mbio->bi_sector = r10_bio->devs[i].addr+
conf->mirrors[d].rdev->data_offset;
mbio->bi_bdev = conf->mirrors[d].rdev->bdev;
mbio->bi_end_io = raid10_end_write_request;
- mbio->bi_rw = WRITE | (do_sync << BIO_RW_SYNCIO);
+ mbio->bi_rw = WRITE | do_sync | do_fua;
mbio->bi_private = r10_bio;
atomic_inc(&r10_bio->remaining);
- bio_list_add(&bl, mbio);
+ spin_lock_irqsave(&conf->device_lock, flags);
+ bio_list_add(&conf->pending_bio_list, mbio);
+ blk_plug_device(mddev->queue);
+ spin_unlock_irqrestore(&conf->device_lock, flags);
}
- if (unlikely(!atomic_read(&r10_bio->remaining))) {
- /* the array is dead */
+ if (atomic_dec_and_test(&r10_bio->remaining)) {
+ /* This matches the end of raid10_end_write_request() */
+ bitmap_endwrite(r10_bio->mddev->bitmap, r10_bio->sector,
+ r10_bio->sectors,
+ !test_bit(R10BIO_Degraded, &r10_bio->state),
+ 0);
md_write_end(mddev);
raid_end_bio_io(r10_bio);
- return 0;
}
- bitmap_startwrite(mddev->bitmap, bio->bi_sector, r10_bio->sectors, 0);
- spin_lock_irqsave(&conf->device_lock, flags);
- bio_list_merge(&conf->pending_bio_list, &bl);
- blk_plug_device(mddev->queue);
- spin_unlock_irqrestore(&conf->device_lock, flags);
-
/* In case raid10d snuck in to freeze_array */
wake_up(&conf->wait_barrier);
@@ -1098,6 +1116,8 @@ static int raid10_spare_active(mddev_t *mddev)
int i;
conf_t *conf = mddev->private;
mirror_info_t *tmp;
+ int count = 0;
+ unsigned long flags;
/*
* Find all non-in_sync disks within the RAID10 configuration
@@ -1108,15 +1128,16 @@ static int raid10_spare_active(mddev_t *mddev)
if (tmp->rdev
&& !test_bit(Faulty, &tmp->rdev->flags)
&& !test_and_set_bit(In_sync, &tmp->rdev->flags)) {
- unsigned long flags;
- spin_lock_irqsave(&conf->device_lock, flags);
- mddev->degraded--;
- spin_unlock_irqrestore(&conf->device_lock, flags);
+ count++;
+ sysfs_notify_dirent(tmp->rdev->sysfs_state);
}
}
+ spin_lock_irqsave(&conf->device_lock, flags);
+ mddev->degraded -= count;
+ spin_unlock_irqrestore(&conf->device_lock, flags);
print_conf(conf);
- return 0;
+ return count;
}
@@ -1536,7 +1557,7 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
test_bit(In_sync, &rdev->flags)) {
atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
- success = sync_page_io(rdev->bdev,
+ success = sync_page_io(rdev,
r10_bio->devs[sl].addr +
sect + rdev->data_offset,
s<<9,
@@ -1575,7 +1596,7 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
atomic_add(s, &rdev->corrected_errors);
- if (sync_page_io(rdev->bdev,
+ if (sync_page_io(rdev,
r10_bio->devs[sl].addr +
sect + rdev->data_offset,
s<<9, conf->tmppage, WRITE)
@@ -1612,7 +1633,7 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
char b[BDEVNAME_SIZE];
atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
- if (sync_page_io(rdev->bdev,
+ if (sync_page_io(rdev,
r10_bio->devs[sl].addr +
sect + rdev->data_offset,
s<<9, conf->tmppage,
@@ -1716,7 +1737,7 @@ static void raid10d(mddev_t *mddev)
raid_end_bio_io(r10_bio);
bio_put(bio);
} else {
- const bool do_sync = bio_rw_flagged(r10_bio->master_bio, BIO_RW_SYNCIO);
+ const unsigned long do_sync = (r10_bio->master_bio->bi_rw & REQ_SYNC);
bio_put(bio);
rdev = conf->mirrors[mirror].rdev;
if (printk_ratelimit())
@@ -1725,12 +1746,13 @@ static void raid10d(mddev_t *mddev)
mdname(mddev),
bdevname(rdev->bdev,b),
(unsigned long long)r10_bio->sector);
- bio = bio_clone(r10_bio->master_bio, GFP_NOIO);
+ bio = bio_clone_mddev(r10_bio->master_bio,
+ GFP_NOIO, mddev);
r10_bio->devs[r10_bio->read_slot].bio = bio;
bio->bi_sector = r10_bio->devs[r10_bio->read_slot].addr
+ rdev->data_offset;
bio->bi_bdev = rdev->bdev;
- bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO);
+ bio->bi_rw = READ | do_sync;
bio->bi_private = r10_bio;
bio->bi_end_io = raid10_end_read_request;
unplug = 1;
@@ -1798,7 +1820,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
int disk;
int i;
int max_sync;
- int sync_blocks;
+ sector_t sync_blocks;
sector_t sectors_skipped = 0;
int chunks_skipped = 0;