diff options
Diffstat (limited to 'drivers/md/raid5-cache.c')
-rw-r--r-- | drivers/md/raid5-cache.c | 365 |
1 files changed, 296 insertions, 69 deletions
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index d7bfb6fc8aef..3f307be01b10 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -20,6 +20,7 @@ #include <linux/crc32c.h> #include <linux/random.h> #include <linux/kthread.h> +#include <linux/types.h> #include "md.h" #include "raid5.h" #include "bitmap.h" @@ -162,9 +163,62 @@ struct r5l_log { /* to submit async io_units, to fulfill ordering of flush */ struct work_struct deferred_io_work; + /* to disable write back during in degraded mode */ + struct work_struct disable_writeback_work; + + /* to for chunk_aligned_read in writeback mode, details below */ + spinlock_t tree_lock; + struct radix_tree_root big_stripe_tree; }; /* + * Enable chunk_aligned_read() with write back cache. + * + * Each chunk may contain more than one stripe (for example, a 256kB + * chunk contains 64 4kB-page, so this chunk contain 64 stripes). For + * chunk_aligned_read, these stripes are grouped into one "big_stripe". + * For each big_stripe, we count how many stripes of this big_stripe + * are in the write back cache. These data are tracked in a radix tree + * (big_stripe_tree). We use radix_tree item pointer as the counter. + * r5c_tree_index() is used to calculate keys for the radix tree. + * + * chunk_aligned_read() calls r5c_big_stripe_cached() to look up + * big_stripe of each chunk in the tree. If this big_stripe is in the + * tree, chunk_aligned_read() aborts. This look up is protected by + * rcu_read_lock(). + * + * It is necessary to remember whether a stripe is counted in + * big_stripe_tree. Instead of adding new flag, we reuses existing flags: + * STRIPE_R5C_PARTIAL_STRIPE and STRIPE_R5C_FULL_STRIPE. If either of these + * two flags are set, the stripe is counted in big_stripe_tree. This + * requires moving set_bit(STRIPE_R5C_PARTIAL_STRIPE) to + * r5c_try_caching_write(); and moving clear_bit of + * STRIPE_R5C_PARTIAL_STRIPE and STRIPE_R5C_FULL_STRIPE to + * r5c_finish_stripe_write_out(). + */ + +/* + * radix tree requests lowest 2 bits of data pointer to be 2b'00. + * So it is necessary to left shift the counter by 2 bits before using it + * as data pointer of the tree. + */ +#define R5C_RADIX_COUNT_SHIFT 2 + +/* + * calculate key for big_stripe_tree + * + * sect: align_bi->bi_iter.bi_sector or sh->sector + */ +static inline sector_t r5c_tree_index(struct r5conf *conf, + sector_t sect) +{ + sector_t offset; + + offset = sector_div(sect, conf->chunk_sectors); + return sect; +} + +/* * an IO range starts from a meta data block and end at the next meta data * block. The io unit's the meta data block tracks data/parity followed it. io * unit is written to log disk with normal write, as we always flush log disk @@ -335,17 +389,30 @@ void r5c_check_cached_full_stripe(struct r5conf *conf) /* * Total log space (in sectors) needed to flush all data in cache * - * Currently, writing-out phase automatically includes all pending writes - * to the same sector. So the reclaim of each stripe takes up to - * (conf->raid_disks + 1) pages of log space. + * To avoid deadlock due to log space, it is necessary to reserve log + * space to flush critical stripes (stripes that occupying log space near + * last_checkpoint). This function helps check how much log space is + * required to flush all cached stripes. * - * To totally avoid deadlock due to log space, the code reserves - * (conf->raid_disks + 1) pages for each stripe in cache, which is not - * necessary in most cases. + * To reduce log space requirements, two mechanisms are used to give cache + * flush higher priorities: + * 1. In handle_stripe_dirtying() and schedule_reconstruction(), + * stripes ALREADY in journal can be flushed w/o pending writes; + * 2. In r5l_write_stripe() and r5c_cache_data(), stripes NOT in journal + * can be delayed (r5l_add_no_space_stripe). * - * To improve this, we will need writing-out phase to be able to NOT include - * pending writes, which will reduce the requirement to - * (conf->max_degraded + 1) pages per stripe in cache. + * In cache flush, the stripe goes through 1 and then 2. For a stripe that + * already passed 1, flushing it requires at most (conf->max_degraded + 1) + * pages of journal space. For stripes that has not passed 1, flushing it + * requires (conf->raid_disks + 1) pages of journal space. There are at + * most (conf->group_cnt + 1) stripe that passed 1. So total journal space + * required to flush all cached stripes (in pages) is: + * + * (stripe_in_journal_count - group_cnt - 1) * (max_degraded + 1) + + * (group_cnt + 1) * (raid_disks + 1) + * or + * (stripe_in_journal_count) * (max_degraded + 1) + + * (group_cnt + 1) * (raid_disks - max_degraded) */ static sector_t r5c_log_required_to_flush_cache(struct r5conf *conf) { @@ -354,8 +421,9 @@ static sector_t r5c_log_required_to_flush_cache(struct r5conf *conf) if (!r5c_is_writeback(log)) return 0; - return BLOCK_SECTORS * (conf->raid_disks + 1) * - atomic_read(&log->stripe_in_journal_count); + return BLOCK_SECTORS * + ((conf->max_degraded + 1) * atomic_read(&log->stripe_in_journal_count) + + (conf->raid_disks - conf->max_degraded) * (conf->group_cnt + 1)); } /* @@ -410,16 +478,6 @@ void r5c_make_stripe_write_out(struct stripe_head *sh) if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) atomic_inc(&conf->preread_active_stripes); - - if (test_and_clear_bit(STRIPE_R5C_PARTIAL_STRIPE, &sh->state)) { - BUG_ON(atomic_read(&conf->r5c_cached_partial_stripes) == 0); - atomic_dec(&conf->r5c_cached_partial_stripes); - } - - if (test_and_clear_bit(STRIPE_R5C_FULL_STRIPE, &sh->state)) { - BUG_ON(atomic_read(&conf->r5c_cached_full_stripes) == 0); - atomic_dec(&conf->r5c_cached_full_stripes); - } } static void r5c_handle_data_cached(struct stripe_head *sh) @@ -611,6 +669,21 @@ static void r5l_submit_io_async(struct work_struct *work) r5l_do_submit_io(log, io); } +static void r5c_disable_writeback_async(struct work_struct *work) +{ + struct r5l_log *log = container_of(work, struct r5l_log, + disable_writeback_work); + struct mddev *mddev = log->rdev->mddev; + + if (log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_THROUGH) + return; + pr_info("md/raid:%s: Disabling writeback cache for degraded array.\n", + mdname(mddev)); + mddev_suspend(mddev); + log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH; + mddev_resume(mddev); +} + static void r5l_submit_current_io(struct r5l_log *log) { struct r5l_io_unit *io = log->current_io; @@ -1254,6 +1327,10 @@ static void r5c_flush_stripe(struct r5conf *conf, struct stripe_head *sh) atomic_inc(&conf->active_stripes); r5c_make_stripe_write_out(sh); + if (test_bit(STRIPE_R5C_PARTIAL_STRIPE, &sh->state)) + atomic_inc(&conf->r5c_flushing_partial_stripes); + else + atomic_inc(&conf->r5c_flushing_full_stripes); raid5_release_stripe(sh); } @@ -1296,12 +1373,16 @@ static void r5c_do_reclaim(struct r5conf *conf) unsigned long flags; int total_cached; int stripes_to_flush; + int flushing_partial, flushing_full; if (!r5c_is_writeback(log)) return; + flushing_partial = atomic_read(&conf->r5c_flushing_partial_stripes); + flushing_full = atomic_read(&conf->r5c_flushing_full_stripes); total_cached = atomic_read(&conf->r5c_cached_partial_stripes) + - atomic_read(&conf->r5c_cached_full_stripes); + atomic_read(&conf->r5c_cached_full_stripes) - + flushing_full - flushing_partial; if (total_cached > conf->min_nr_stripes * 3 / 4 || atomic_read(&conf->empty_inactive_list_nr) > 0) @@ -1311,7 +1392,7 @@ static void r5c_do_reclaim(struct r5conf *conf) */ stripes_to_flush = R5C_RECLAIM_STRIPE_GROUP; else if (total_cached > conf->min_nr_stripes * 1 / 2 || - atomic_read(&conf->r5c_cached_full_stripes) > + atomic_read(&conf->r5c_cached_full_stripes) - flushing_full > R5C_FULL_STRIPE_FLUSH_BATCH) /* * if stripe cache pressure moderate, or if there is many full @@ -1345,9 +1426,9 @@ static void r5c_do_reclaim(struct r5conf *conf) !test_bit(STRIPE_HANDLE, &sh->state) && atomic_read(&sh->count) == 0) { r5c_flush_stripe(conf, sh); + if (count++ >= R5C_RECLAIM_STRIPE_GROUP) + break; } - if (count++ >= R5C_RECLAIM_STRIPE_GROUP) - break; } spin_unlock(&conf->device_lock); spin_unlock_irqrestore(&log->stripe_in_journal_lock, flags); @@ -1393,8 +1474,6 @@ static void r5l_do_reclaim(struct r5l_log *log) next_checkpoint = r5c_calculate_new_cp(conf); spin_unlock_irq(&log->io_list_lock); - BUG_ON(reclaimable < 0); - if (reclaimable == 0 || !write_super) return; @@ -1682,8 +1761,7 @@ out: static struct stripe_head * r5c_recovery_alloc_stripe(struct r5conf *conf, - sector_t stripe_sect, - sector_t log_start) + sector_t stripe_sect) { struct stripe_head *sh; @@ -1692,7 +1770,6 @@ r5c_recovery_alloc_stripe(struct r5conf *conf, return NULL; /* no more stripe available */ r5l_recovery_reset_stripe(sh); - sh->log_start = log_start; return sh; } @@ -1862,7 +1939,7 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log, stripe_sect); if (!sh) { - sh = r5c_recovery_alloc_stripe(conf, stripe_sect, ctx->pos); + sh = r5c_recovery_alloc_stripe(conf, stripe_sect); /* * cannot get stripe from raid5_get_active_stripe * try replay some stripes @@ -1871,7 +1948,7 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log, r5c_recovery_replay_stripes( cached_stripe_list, ctx); sh = r5c_recovery_alloc_stripe( - conf, stripe_sect, ctx->pos); + conf, stripe_sect); } if (!sh) { pr_debug("md/raid:%s: Increasing stripe cache size to %d to recovery data on journal.\n", @@ -1879,8 +1956,8 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log, conf->min_nr_stripes * 2); raid5_set_cache_size(mddev, conf->min_nr_stripes * 2); - sh = r5c_recovery_alloc_stripe( - conf, stripe_sect, ctx->pos); + sh = r5c_recovery_alloc_stripe(conf, + stripe_sect); } if (!sh) { pr_err("md/raid:%s: Cannot get enough stripes due to memory pressure. Recovery failed.\n", @@ -1894,7 +1971,6 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log, if (!test_bit(STRIPE_R5C_CACHING, &sh->state) && test_bit(R5_Wantwrite, &sh->dev[sh->pd_idx].flags)) { r5l_recovery_replay_one_stripe(conf, sh, ctx); - sh->log_start = ctx->pos; list_move_tail(&sh->lru, cached_stripe_list); } r5l_recovery_load_data(log, sh, ctx, payload, @@ -1933,8 +2009,6 @@ static void r5c_recovery_load_one_stripe(struct r5l_log *log, set_bit(R5_UPTODATE, &dev->flags); } } - list_add_tail(&sh->r5c, &log->stripe_in_journal_list); - atomic_inc(&log->stripe_in_journal_count); } /* @@ -2067,9 +2141,10 @@ static int r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log, struct r5l_recovery_ctx *ctx) { - struct stripe_head *sh, *next; + struct stripe_head *sh; struct mddev *mddev = log->rdev->mddev; struct page *page; + sector_t next_checkpoint = MaxSector; page = alloc_page(GFP_KERNEL); if (!page) { @@ -2078,7 +2153,9 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log, return -ENOMEM; } - list_for_each_entry_safe(sh, next, &ctx->cached_list, lru) { + WARN_ON(list_empty(&ctx->cached_list)); + + list_for_each_entry(sh, &ctx->cached_list, lru) { struct r5l_meta_block *mb; int i; int offset; @@ -2123,14 +2200,42 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log, sync_page_io(log->rdev, ctx->pos, PAGE_SIZE, page, REQ_OP_WRITE, REQ_FUA, false); sh->log_start = ctx->pos; + list_add_tail(&sh->r5c, &log->stripe_in_journal_list); + atomic_inc(&log->stripe_in_journal_count); ctx->pos = write_pos; ctx->seq += 1; + next_checkpoint = sh->log_start; + } + log->next_checkpoint = next_checkpoint; + __free_page(page); + return 0; +} + +static void r5c_recovery_flush_data_only_stripes(struct r5l_log *log, + struct r5l_recovery_ctx *ctx) +{ + struct mddev *mddev = log->rdev->mddev; + struct r5conf *conf = mddev->private; + struct stripe_head *sh, *next; + + if (ctx->data_only_stripes == 0) + return; + log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_BACK; + + list_for_each_entry_safe(sh, next, &ctx->cached_list, lru) { + r5c_make_stripe_write_out(sh); + set_bit(STRIPE_HANDLE, &sh->state); list_del_init(&sh->lru); raid5_release_stripe(sh); } - __free_page(page); - return 0; + + md_wakeup_thread(conf->mddev->thread); + /* reuse conf->wait_for_quiescent in recovery */ + wait_event(conf->wait_for_quiescent, + atomic_read(&conf->active_stripes) == 0); + + log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH; } static int r5l_recovery_log(struct r5l_log *log) @@ -2139,7 +2244,6 @@ static int r5l_recovery_log(struct r5l_log *log) struct r5l_recovery_ctx ctx; int ret; sector_t pos; - struct stripe_head *sh; ctx.pos = log->last_checkpoint; ctx.seq = log->last_cp_seq; @@ -2160,35 +2264,31 @@ static int r5l_recovery_log(struct r5l_log *log) pos = ctx.pos; ctx.seq += 10000; - if (ctx.data_only_stripes == 0) { - log->next_checkpoint = ctx.pos; - r5l_log_write_empty_meta_block(log, ctx.pos, ctx.seq++); - ctx.pos = r5l_ring_add(log, ctx.pos, BLOCK_SECTORS); - } else { - sh = list_last_entry(&ctx.cached_list, struct stripe_head, lru); - log->next_checkpoint = sh->log_start; - } if ((ctx.data_only_stripes == 0) && (ctx.data_parity_stripes == 0)) pr_debug("md/raid:%s: starting from clean shutdown\n", mdname(mddev)); - else { - pr_debug("md/raid:%s: recoverying %d data-only stripes and %d data-parity stripes\n", + else + pr_debug("md/raid:%s: recovering %d data-only stripes and %d data-parity stripes\n", mdname(mddev), ctx.data_only_stripes, ctx.data_parity_stripes); - if (ctx.data_only_stripes > 0) - if (r5c_recovery_rewrite_data_only_stripes(log, &ctx)) { - pr_err("md/raid:%s: failed to rewrite stripes to journal\n", - mdname(mddev)); - return -EIO; - } + if (ctx.data_only_stripes == 0) { + log->next_checkpoint = ctx.pos; + r5l_log_write_empty_meta_block(log, ctx.pos, ctx.seq++); + ctx.pos = r5l_ring_add(log, ctx.pos, BLOCK_SECTORS); + } else if (r5c_recovery_rewrite_data_only_stripes(log, &ctx)) { + pr_err("md/raid:%s: failed to rewrite stripes to journal\n", + mdname(mddev)); + return -EIO; } log->log_start = ctx.pos; log->seq = ctx.seq; log->last_checkpoint = pos; r5l_write_super(log, pos); + + r5c_recovery_flush_data_only_stripes(log, &ctx); return 0; } @@ -2250,6 +2350,10 @@ static ssize_t r5c_journal_mode_store(struct mddev *mddev, val > R5C_JOURNAL_MODE_WRITE_BACK) return -EINVAL; + if (raid5_calc_degraded(conf) > 0 && + val == R5C_JOURNAL_MODE_WRITE_BACK) + return -EINVAL; + mddev_suspend(mddev); conf->log->r5c_journal_mode = val; mddev_resume(mddev); @@ -2280,6 +2384,10 @@ int r5c_try_caching_write(struct r5conf *conf, int i; struct r5dev *dev; int to_cache = 0; + void **pslot; + sector_t tree_index; + int ret; + uintptr_t refcount; BUG_ON(!r5c_is_writeback(log)); @@ -2304,6 +2412,16 @@ int r5c_try_caching_write(struct r5conf *conf, set_bit(STRIPE_R5C_CACHING, &sh->state); } + /* + * When run in degraded mode, array is set to write-through mode. + * This check helps drain pending write safely in the transition to + * write-through mode. + */ + if (s->failed) { + r5c_make_stripe_write_out(sh); + return -EAGAIN; + } + for (i = disks; i--; ) { dev = &sh->dev[i]; /* if non-overwrite, use writing-out phase */ @@ -2314,6 +2432,44 @@ int r5c_try_caching_write(struct r5conf *conf, } } + /* if the stripe is not counted in big_stripe_tree, add it now */ + if (!test_bit(STRIPE_R5C_PARTIAL_STRIPE, &sh->state) && + !test_bit(STRIPE_R5C_FULL_STRIPE, &sh->state)) { + tree_index = r5c_tree_index(conf, sh->sector); + spin_lock(&log->tree_lock); + pslot = radix_tree_lookup_slot(&log->big_stripe_tree, + tree_index); + if (pslot) { + refcount = (uintptr_t)radix_tree_deref_slot_protected( + pslot, &log->tree_lock) >> + R5C_RADIX_COUNT_SHIFT; + radix_tree_replace_slot( + &log->big_stripe_tree, pslot, + (void *)((refcount + 1) << R5C_RADIX_COUNT_SHIFT)); + } else { + /* + * this radix_tree_insert can fail safely, so no + * need to call radix_tree_preload() + */ + ret = radix_tree_insert( + &log->big_stripe_tree, tree_index, + (void *)(1 << R5C_RADIX_COUNT_SHIFT)); + if (ret) { + spin_unlock(&log->tree_lock); + r5c_make_stripe_write_out(sh); + return -EAGAIN; + } + } + spin_unlock(&log->tree_lock); + + /* + * set STRIPE_R5C_PARTIAL_STRIPE, this shows the stripe is + * counted in the radix tree + */ + set_bit(STRIPE_R5C_PARTIAL_STRIPE, &sh->state); + atomic_inc(&conf->r5c_cached_partial_stripes); + } + for (i = disks; i--; ) { dev = &sh->dev[i]; if (dev->towrite) { @@ -2354,6 +2510,8 @@ void r5c_release_extra_page(struct stripe_head *sh) struct page *p = sh->dev[i].orig_page; sh->dev[i].orig_page = sh->dev[i].page; + clear_bit(R5_OrigPageUPTDODATE, &sh->dev[i].flags); + if (!using_disk_info_extra_page) put_page(p); } @@ -2386,17 +2544,20 @@ void r5c_finish_stripe_write_out(struct r5conf *conf, struct stripe_head *sh, struct stripe_head_state *s) { + struct r5l_log *log = conf->log; int i; int do_wakeup = 0; + sector_t tree_index; + void **pslot; + uintptr_t refcount; - if (!conf->log || - !test_bit(R5_InJournal, &sh->dev[sh->pd_idx].flags)) + if (!log || !test_bit(R5_InJournal, &sh->dev[sh->pd_idx].flags)) return; WARN_ON(test_bit(STRIPE_R5C_CACHING, &sh->state)); clear_bit(R5_InJournal, &sh->dev[sh->pd_idx].flags); - if (conf->log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_THROUGH) + if (log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_THROUGH) return; for (i = sh->disks; i--; ) { @@ -2418,15 +2579,45 @@ void r5c_finish_stripe_write_out(struct r5conf *conf, if (do_wakeup) wake_up(&conf->wait_for_overlap); - if (conf->log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_THROUGH) - return; - - spin_lock_irq(&conf->log->stripe_in_journal_lock); + spin_lock_irq(&log->stripe_in_journal_lock); list_del_init(&sh->r5c); - spin_unlock_irq(&conf->log->stripe_in_journal_lock); + spin_unlock_irq(&log->stripe_in_journal_lock); sh->log_start = MaxSector; - atomic_dec(&conf->log->stripe_in_journal_count); - r5c_update_log_state(conf->log); + + atomic_dec(&log->stripe_in_journal_count); + r5c_update_log_state(log); + + /* stop counting this stripe in big_stripe_tree */ + if (test_bit(STRIPE_R5C_PARTIAL_STRIPE, &sh->state) || + test_bit(STRIPE_R5C_FULL_STRIPE, &sh->state)) { + tree_index = r5c_tree_index(conf, sh->sector); + spin_lock(&log->tree_lock); + pslot = radix_tree_lookup_slot(&log->big_stripe_tree, + tree_index); + BUG_ON(pslot == NULL); + refcount = (uintptr_t)radix_tree_deref_slot_protected( + pslot, &log->tree_lock) >> + R5C_RADIX_COUNT_SHIFT; + if (refcount == 1) + radix_tree_delete(&log->big_stripe_tree, tree_index); + else + radix_tree_replace_slot( + &log->big_stripe_tree, pslot, + (void *)((refcount - 1) << R5C_RADIX_COUNT_SHIFT)); + spin_unlock(&log->tree_lock); + } + + if (test_and_clear_bit(STRIPE_R5C_PARTIAL_STRIPE, &sh->state)) { + BUG_ON(atomic_read(&conf->r5c_cached_partial_stripes) == 0); + atomic_dec(&conf->r5c_flushing_partial_stripes); + atomic_dec(&conf->r5c_cached_partial_stripes); + } + + if (test_and_clear_bit(STRIPE_R5C_FULL_STRIPE, &sh->state)) { + BUG_ON(atomic_read(&conf->r5c_cached_full_stripes) == 0); + atomic_dec(&conf->r5c_flushing_full_stripes); + atomic_dec(&conf->r5c_cached_full_stripes); + } } int @@ -2486,6 +2677,22 @@ r5c_cache_data(struct r5l_log *log, struct stripe_head *sh, return 0; } +/* check whether this big stripe is in write back cache. */ +bool r5c_big_stripe_cached(struct r5conf *conf, sector_t sect) +{ + struct r5l_log *log = conf->log; + sector_t tree_index; + void *slot; + + if (!log) + return false; + + WARN_ON_ONCE(!rcu_read_lock_held()); + tree_index = r5c_tree_index(conf, sect); + slot = radix_tree_lookup(&log->big_stripe_tree, tree_index); + return slot != NULL; +} + static int r5l_load_log(struct r5l_log *log) { struct md_rdev *rdev = log->rdev; @@ -2561,6 +2768,19 @@ ioerr: return ret; } +void r5c_update_on_rdev_error(struct mddev *mddev) +{ + struct r5conf *conf = mddev->private; + struct r5l_log *log = conf->log; + + if (!log) + return; + + if (raid5_calc_degraded(conf) > 0 && + conf->log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_BACK) + schedule_work(&log->disable_writeback_work); +} + int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev) { struct request_queue *q = bdev_get_queue(rdev->bdev); @@ -2619,6 +2839,9 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev) if (!log->meta_pool) goto out_mempool; + spin_lock_init(&log->tree_lock); + INIT_RADIX_TREE(&log->big_stripe_tree, GFP_NOWAIT | __GFP_NOWARN); + log->reclaim_thread = md_register_thread(r5l_reclaim_thread, log->rdev->mddev, "reclaim"); if (!log->reclaim_thread) @@ -2633,20 +2856,23 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev) spin_lock_init(&log->no_space_stripes_lock); INIT_WORK(&log->deferred_io_work, r5l_submit_io_async); + INIT_WORK(&log->disable_writeback_work, r5c_disable_writeback_async); log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH; INIT_LIST_HEAD(&log->stripe_in_journal_list); spin_lock_init(&log->stripe_in_journal_lock); atomic_set(&log->stripe_in_journal_count, 0); + rcu_assign_pointer(conf->log, log); + if (r5l_load_log(log)) goto error; - rcu_assign_pointer(conf->log, log); set_bit(MD_HAS_JOURNAL, &conf->mddev->flags); return 0; error: + rcu_assign_pointer(conf->log, NULL); md_unregister_thread(&log->reclaim_thread); reclaim_thread: mempool_destroy(log->meta_pool); @@ -2663,6 +2889,7 @@ io_kc: void r5l_exit_log(struct r5l_log *log) { + flush_work(&log->disable_writeback_work); md_unregister_thread(&log->reclaim_thread); mempool_destroy(log->meta_pool); bioset_free(log->bs); |