diff options
Diffstat (limited to 'drivers/md/raid10.c')
-rw-r--r-- | drivers/md/raid10.c | 88 |
1 files changed, 55 insertions, 33 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 42e64e4e5e25..c67aa54694ae 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -120,7 +120,7 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data) * Allocate bios. */ for (j = nalloc ; j-- ; ) { - bio = bio_alloc(gfp_flags, RESYNC_PAGES); + bio = bio_kmalloc(gfp_flags, RESYNC_PAGES); if (!bio) goto out_free_bio; r10_bio->devs[j].bio = bio; @@ -799,13 +799,13 @@ static int make_request(mddev_t *mddev, struct bio * bio) int i; int chunk_sects = conf->chunk_mask + 1; const int rw = bio_data_dir(bio); - const bool do_sync = bio_rw_flagged(bio, BIO_RW_SYNCIO); - struct bio_list bl; + const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); + const unsigned long do_fua = (bio->bi_rw & REQ_FUA); unsigned long flags; mdk_rdev_t *blocked_rdev; - if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) { - md_barrier_request(mddev, bio); + if (unlikely(bio->bi_rw & REQ_FLUSH)) { + md_flush_request(mddev, bio); return 0; } @@ -825,11 +825,29 @@ static int make_request(mddev_t *mddev, struct bio * bio) */ bp = bio_split(bio, chunk_sects - (bio->bi_sector & (chunk_sects - 1)) ); + + /* Each of these 'make_request' calls will call 'wait_barrier'. + * If the first succeeds but the second blocks due to the resync + * thread raising the barrier, we will deadlock because the + * IO to the underlying device will be queued in generic_make_request + * and will never complete, so will never reduce nr_pending. + * So increment nr_waiting here so no new raise_barriers will + * succeed, and so the second wait_barrier cannot block. + */ + spin_lock_irq(&conf->resync_lock); + conf->nr_waiting++; + spin_unlock_irq(&conf->resync_lock); + if (make_request(mddev, &bp->bio1)) generic_make_request(&bp->bio1); if (make_request(mddev, &bp->bio2)) generic_make_request(&bp->bio2); + spin_lock_irq(&conf->resync_lock); + conf->nr_waiting--; + wake_up(&conf->wait_barrier); + spin_unlock_irq(&conf->resync_lock); + bio_pair_release(bp); return 0; bad_map: @@ -871,7 +889,7 @@ static int make_request(mddev_t *mddev, struct bio * bio) } mirror = conf->mirrors + disk; - read_bio = bio_clone(bio, GFP_NOIO); + read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev); r10_bio->devs[slot].bio = read_bio; @@ -879,7 +897,7 @@ static int make_request(mddev_t *mddev, struct bio * bio) mirror->rdev->data_offset; read_bio->bi_bdev = mirror->rdev->bdev; read_bio->bi_end_io = raid10_end_read_request; - read_bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO); + read_bio->bi_rw = READ | do_sync; read_bio->bi_private = r10_bio; generic_make_request(read_bio); @@ -931,42 +949,42 @@ static int make_request(mddev_t *mddev, struct bio * bio) goto retry_write; } - atomic_set(&r10_bio->remaining, 0); + atomic_set(&r10_bio->remaining, 1); + bitmap_startwrite(mddev->bitmap, bio->bi_sector, r10_bio->sectors, 0); - bio_list_init(&bl); for (i = 0; i < conf->copies; i++) { struct bio *mbio; int d = r10_bio->devs[i].devnum; if (!r10_bio->devs[i].bio) continue; - mbio = bio_clone(bio, GFP_NOIO); + mbio = bio_clone_mddev(bio, GFP_NOIO, mddev); r10_bio->devs[i].bio = mbio; mbio->bi_sector = r10_bio->devs[i].addr+ conf->mirrors[d].rdev->data_offset; mbio->bi_bdev = conf->mirrors[d].rdev->bdev; mbio->bi_end_io = raid10_end_write_request; - mbio->bi_rw = WRITE | (do_sync << BIO_RW_SYNCIO); + mbio->bi_rw = WRITE | do_sync | do_fua; mbio->bi_private = r10_bio; atomic_inc(&r10_bio->remaining); - bio_list_add(&bl, mbio); + spin_lock_irqsave(&conf->device_lock, flags); + bio_list_add(&conf->pending_bio_list, mbio); + blk_plug_device(mddev->queue); + spin_unlock_irqrestore(&conf->device_lock, flags); } - if (unlikely(!atomic_read(&r10_bio->remaining))) { - /* the array is dead */ + if (atomic_dec_and_test(&r10_bio->remaining)) { + /* This matches the end of raid10_end_write_request() */ + bitmap_endwrite(r10_bio->mddev->bitmap, r10_bio->sector, + r10_bio->sectors, + !test_bit(R10BIO_Degraded, &r10_bio->state), + 0); md_write_end(mddev); raid_end_bio_io(r10_bio); - return 0; } - bitmap_startwrite(mddev->bitmap, bio->bi_sector, r10_bio->sectors, 0); - spin_lock_irqsave(&conf->device_lock, flags); - bio_list_merge(&conf->pending_bio_list, &bl); - blk_plug_device(mddev->queue); - spin_unlock_irqrestore(&conf->device_lock, flags); - /* In case raid10d snuck in to freeze_array */ wake_up(&conf->wait_barrier); @@ -1098,6 +1116,8 @@ static int raid10_spare_active(mddev_t *mddev) int i; conf_t *conf = mddev->private; mirror_info_t *tmp; + int count = 0; + unsigned long flags; /* * Find all non-in_sync disks within the RAID10 configuration @@ -1108,15 +1128,16 @@ static int raid10_spare_active(mddev_t *mddev) if (tmp->rdev && !test_bit(Faulty, &tmp->rdev->flags) && !test_and_set_bit(In_sync, &tmp->rdev->flags)) { - unsigned long flags; - spin_lock_irqsave(&conf->device_lock, flags); - mddev->degraded--; - spin_unlock_irqrestore(&conf->device_lock, flags); + count++; + sysfs_notify_dirent(tmp->rdev->sysfs_state); } } + spin_lock_irqsave(&conf->device_lock, flags); + mddev->degraded -= count; + spin_unlock_irqrestore(&conf->device_lock, flags); print_conf(conf); - return 0; + return count; } @@ -1536,7 +1557,7 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio) test_bit(In_sync, &rdev->flags)) { atomic_inc(&rdev->nr_pending); rcu_read_unlock(); - success = sync_page_io(rdev->bdev, + success = sync_page_io(rdev, r10_bio->devs[sl].addr + sect + rdev->data_offset, s<<9, @@ -1575,7 +1596,7 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio) atomic_inc(&rdev->nr_pending); rcu_read_unlock(); atomic_add(s, &rdev->corrected_errors); - if (sync_page_io(rdev->bdev, + if (sync_page_io(rdev, r10_bio->devs[sl].addr + sect + rdev->data_offset, s<<9, conf->tmppage, WRITE) @@ -1612,7 +1633,7 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio) char b[BDEVNAME_SIZE]; atomic_inc(&rdev->nr_pending); rcu_read_unlock(); - if (sync_page_io(rdev->bdev, + if (sync_page_io(rdev, r10_bio->devs[sl].addr + sect + rdev->data_offset, s<<9, conf->tmppage, @@ -1716,7 +1737,7 @@ static void raid10d(mddev_t *mddev) raid_end_bio_io(r10_bio); bio_put(bio); } else { - const bool do_sync = bio_rw_flagged(r10_bio->master_bio, BIO_RW_SYNCIO); + const unsigned long do_sync = (r10_bio->master_bio->bi_rw & REQ_SYNC); bio_put(bio); rdev = conf->mirrors[mirror].rdev; if (printk_ratelimit()) @@ -1725,12 +1746,13 @@ static void raid10d(mddev_t *mddev) mdname(mddev), bdevname(rdev->bdev,b), (unsigned long long)r10_bio->sector); - bio = bio_clone(r10_bio->master_bio, GFP_NOIO); + bio = bio_clone_mddev(r10_bio->master_bio, + GFP_NOIO, mddev); r10_bio->devs[r10_bio->read_slot].bio = bio; bio->bi_sector = r10_bio->devs[r10_bio->read_slot].addr + rdev->data_offset; bio->bi_bdev = rdev->bdev; - bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO); + bio->bi_rw = READ | do_sync; bio->bi_private = r10_bio; bio->bi_end_io = raid10_end_read_request; unplug = 1; @@ -1798,7 +1820,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i int disk; int i; int max_sync; - int sync_blocks; + sector_t sync_blocks; sector_t sectors_skipped = 0; int chunks_skipped = 0; |