From b330e6a49dc3e9145de5c986b29bbbb884351e92 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 11 Mar 2019 23:31:06 -0700 Subject: md: convert to kvmalloc The code really just wants a big flat buffer, so just do that. Link: http://lkml.kernel.org/r/20181217131929.11727-3-kent.overstreet@gmail.com Signed-off-by: Kent Overstreet Reviewed-by: Matthew Wilcox Cc: Shaohua Li Cc: Alexey Dobriyan Cc: Al Viro Cc: Dave Hansen Cc: Eric Paris Cc: Marcelo Ricardo Leitner Cc: Neil Horman Cc: Paul Moore Cc: Pravin B Shelar Cc: Stephen Smalley Cc: Vlad Yasevich Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/raid5.c | 87 ++++++++++++++++++++++++------------------------------ 1 file changed, 39 insertions(+), 48 deletions(-) (limited to 'drivers/md/raid5.c') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index cecea901ab8c..77ffd09be486 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -54,7 +54,6 @@ #include #include #include -#include #include #include @@ -1394,22 +1393,16 @@ static void ops_complete_compute(void *stripe_head_ref) } /* return a pointer to the address conversion region of the scribble buffer */ -static addr_conv_t *to_addr_conv(struct stripe_head *sh, - struct raid5_percpu *percpu, int i) +static struct page **to_addr_page(struct raid5_percpu *percpu, int i) { - void *addr; - - addr = flex_array_get(percpu->scribble, i); - return addr + sizeof(struct page *) * (sh->disks + 2); + return percpu->scribble + i * percpu->scribble_obj_size; } /* return a pointer to the address conversion region of the scribble buffer */ -static struct page **to_addr_page(struct raid5_percpu *percpu, int i) +static addr_conv_t *to_addr_conv(struct stripe_head *sh, + struct raid5_percpu *percpu, int i) { - void *addr; - - addr = flex_array_get(percpu->scribble, i); - return addr; + return (void *) (to_addr_page(percpu, i) + sh->disks + 2); } static struct dma_async_tx_descriptor * @@ -2238,21 +2231,23 @@ static int grow_stripes(struct r5conf *conf, int num) * calculate over all devices (not just the data blocks), using zeros in place * of the P and Q blocks. */ -static struct flex_array *scribble_alloc(int num, int cnt, gfp_t flags) +static int scribble_alloc(struct raid5_percpu *percpu, + int num, int cnt, gfp_t flags) { - struct flex_array *ret; - size_t len; + size_t obj_size = + sizeof(struct page *) * (num+2) + + sizeof(addr_conv_t) * (num+2); + void *scribble; - len = sizeof(struct page *) * (num+2) + sizeof(addr_conv_t) * (num+2); - ret = flex_array_alloc(len, cnt, flags); - if (!ret) - return NULL; - /* always prealloc all elements, so no locking is required */ - if (flex_array_prealloc(ret, 0, cnt, flags)) { - flex_array_free(ret); - return NULL; - } - return ret; + scribble = kvmalloc_array(cnt, obj_size, flags); + if (!scribble) + return -ENOMEM; + + kvfree(percpu->scribble); + + percpu->scribble = scribble; + percpu->scribble_obj_size = obj_size; + return 0; } static int resize_chunks(struct r5conf *conf, int new_disks, int new_sectors) @@ -2270,23 +2265,18 @@ static int resize_chunks(struct r5conf *conf, int new_disks, int new_sectors) return 0; mddev_suspend(conf->mddev); get_online_cpus(); + for_each_present_cpu(cpu) { struct raid5_percpu *percpu; - struct flex_array *scribble; percpu = per_cpu_ptr(conf->percpu, cpu); - scribble = scribble_alloc(new_disks, - new_sectors / STRIPE_SECTORS, - GFP_NOIO); - - if (scribble) { - flex_array_free(percpu->scribble); - percpu->scribble = scribble; - } else { - err = -ENOMEM; + err = scribble_alloc(percpu, new_disks, + new_sectors / STRIPE_SECTORS, + GFP_NOIO); + if (err) break; - } } + put_online_cpus(); mddev_resume(conf->mddev); if (!err) { @@ -6742,25 +6732,26 @@ raid5_size(struct mddev *mddev, sector_t sectors, int raid_disks) static void free_scratch_buffer(struct r5conf *conf, struct raid5_percpu *percpu) { safe_put_page(percpu->spare_page); - if (percpu->scribble) - flex_array_free(percpu->scribble); percpu->spare_page = NULL; + kvfree(percpu->scribble); percpu->scribble = NULL; } static int alloc_scratch_buffer(struct r5conf *conf, struct raid5_percpu *percpu) { - if (conf->level == 6 && !percpu->spare_page) + if (conf->level == 6 && !percpu->spare_page) { percpu->spare_page = alloc_page(GFP_KERNEL); - if (!percpu->scribble) - percpu->scribble = scribble_alloc(max(conf->raid_disks, - conf->previous_raid_disks), - max(conf->chunk_sectors, - conf->prev_chunk_sectors) - / STRIPE_SECTORS, - GFP_KERNEL); - - if (!percpu->scribble || (conf->level == 6 && !percpu->spare_page)) { + if (!percpu->spare_page) + return -ENOMEM; + } + + if (scribble_alloc(percpu, + max(conf->raid_disks, + conf->previous_raid_disks), + max(conf->chunk_sectors, + conf->prev_chunk_sectors) + / STRIPE_SECTORS, + GFP_KERNEL)) { free_scratch_buffer(conf, percpu); return -ENOMEM; } -- cgit v1.2.3-55-g7522 From a596d08677320925b69e70c0fdc4c0f59384a65e Mon Sep 17 00:00:00 2001 From: Mariusz Dabrowski Date: Mon, 18 Feb 2019 15:04:09 +0100 Subject: raid5: set write hint for PPL When the Partial Parity Log is enabled, circular buffer is used to store PPL data. Each write to RAID device causes overwrite of data in this buffer so some write_hint can be set to those request to help drives handle garbage collection. This patch adds new sysfs attribute which can be used to specify which write_hint should be assigned to PPL. Acked-by: Guoqing Jiang Signed-off-by: Mariusz Dabrowski Signed-off-by: Song Liu --- Documentation/admin-guide/md.rst | 3 ++ drivers/md/raid5-log.h | 1 + drivers/md/raid5-ppl.c | 63 ++++++++++++++++++++++++++++++++++++++++ drivers/md/raid5.c | 1 + 4 files changed, 68 insertions(+) (limited to 'drivers/md/raid5.c') diff --git a/Documentation/admin-guide/md.rst b/Documentation/admin-guide/md.rst index 84de718f24a4..3c51084ffd37 100644 --- a/Documentation/admin-guide/md.rst +++ b/Documentation/admin-guide/md.rst @@ -756,3 +756,6 @@ These currently include: The cache mode for raid5. raid5 could include an extra disk for caching. The mode can be "write-throuth" and "write-back". The default is "write-through". + + ppl_write_hint + NVMe stream ID to be set for each PPL write request. diff --git a/drivers/md/raid5-log.h b/drivers/md/raid5-log.h index bfb811407061..43c714a8798c 100644 --- a/drivers/md/raid5-log.h +++ b/drivers/md/raid5-log.h @@ -45,6 +45,7 @@ extern void ppl_stripe_write_finished(struct stripe_head *sh); extern int ppl_modify_log(struct r5conf *conf, struct md_rdev *rdev, bool add); extern void ppl_quiesce(struct r5conf *conf, int quiesce); extern int ppl_handle_flush_request(struct r5l_log *log, struct bio *bio); +extern struct md_sysfs_entry ppl_write_hint; static inline bool raid5_has_log(struct r5conf *conf) { diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c index 3a7c36326589..f2b3020c2ac8 100644 --- a/drivers/md/raid5-ppl.c +++ b/drivers/md/raid5-ppl.c @@ -21,6 +21,7 @@ #include #include "md.h" #include "raid5.h" +#include "raid5-log.h" /* * PPL consists of a 4KB header (struct ppl_header) and at least 128KB for @@ -116,6 +117,8 @@ struct ppl_conf { /* stripes to retry if failed to allocate io_unit */ struct list_head no_mem_stripes; spinlock_t no_mem_stripes_lock; + + unsigned short write_hint; }; struct ppl_log { @@ -476,6 +479,7 @@ static void ppl_submit_iounit(struct ppl_io_unit *io) bio_set_dev(bio, log->rdev->bdev); bio->bi_iter.bi_sector = log->next_io_sector; bio_add_page(bio, io->header_page, PAGE_SIZE, 0); + bio->bi_write_hint = ppl_conf->write_hint; pr_debug("%s: log->current_io_sector: %llu\n", __func__, (unsigned long long)log->next_io_sector); @@ -505,6 +509,7 @@ static void ppl_submit_iounit(struct ppl_io_unit *io) bio = bio_alloc_bioset(GFP_NOIO, BIO_MAX_PAGES, &ppl_conf->bs); bio->bi_opf = prev->bi_opf; + bio->bi_write_hint = prev->bi_write_hint; bio_copy_dev(bio, prev); bio->bi_iter.bi_sector = bio_end_sector(prev); bio_add_page(bio, sh->ppl_page, PAGE_SIZE, 0); @@ -1409,6 +1414,7 @@ int ppl_init_log(struct r5conf *conf) atomic64_set(&ppl_conf->seq, 0); INIT_LIST_HEAD(&ppl_conf->no_mem_stripes); spin_lock_init(&ppl_conf->no_mem_stripes_lock); + ppl_conf->write_hint = RWF_WRITE_LIFE_NOT_SET; if (!mddev->external) { ppl_conf->signature = ~crc32c_le(~0, mddev->uuid, sizeof(mddev->uuid)); @@ -1503,3 +1509,60 @@ int ppl_modify_log(struct r5conf *conf, struct md_rdev *rdev, bool add) return ret; } + +static ssize_t +ppl_write_hint_show(struct mddev *mddev, char *buf) +{ + size_t ret = 0; + struct r5conf *conf; + struct ppl_conf *ppl_conf = NULL; + + spin_lock(&mddev->lock); + conf = mddev->private; + if (conf && raid5_has_ppl(conf)) + ppl_conf = conf->log_private; + ret = sprintf(buf, "%d\n", ppl_conf ? ppl_conf->write_hint : 0); + spin_unlock(&mddev->lock); + + return ret; +} + +static ssize_t +ppl_write_hint_store(struct mddev *mddev, const char *page, size_t len) +{ + struct r5conf *conf; + struct ppl_conf *ppl_conf; + int err = 0; + unsigned short new; + + if (len >= PAGE_SIZE) + return -EINVAL; + if (kstrtou16(page, 10, &new)) + return -EINVAL; + + err = mddev_lock(mddev); + if (err) + return err; + + conf = mddev->private; + if (!conf) { + err = -ENODEV; + } else if (raid5_has_ppl(conf)) { + ppl_conf = conf->log_private; + if (!ppl_conf) + err = -EINVAL; + else + ppl_conf->write_hint = new; + } else { + err = -EINVAL; + } + + mddev_unlock(mddev); + + return err ?: len; +} + +struct md_sysfs_entry +ppl_write_hint = __ATTR(ppl_write_hint, S_IRUGO | S_IWUSR, + ppl_write_hint_show, + ppl_write_hint_store); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index cecea901ab8c..09562d7cc080 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -6660,6 +6660,7 @@ static struct attribute *raid5_attrs[] = { &raid5_skip_copy.attr, &raid5_rmw_level.attr, &r5c_journal_mode.attr, + &ppl_write_hint.attr, NULL, }; static struct attribute_group raid5_attrs_group = { -- cgit v1.2.3-55-g7522 From e406f12dde1a8375d77ea02d91f313fb1a9c6aec Mon Sep 17 00:00:00 2001 From: Aditya Pakki Date: Mon, 4 Mar 2019 16:48:54 -0600 Subject: md: Fix failed allocation of md_register_thread mddev->sync_thread can be set to NULL on kzalloc failure downstream. The patch checks for such a scenario and frees allocated resources. Committer node: Added similar fix to raid5.c, as suggested by Guoqing. Cc: stable@vger.kernel.org # v3.16+ Acked-by: Guoqing Jiang Signed-off-by: Aditya Pakki Signed-off-by: Song Liu --- drivers/md/raid10.c | 2 ++ drivers/md/raid5.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'drivers/md/raid5.c') diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index ecef42bfe19d..3b6880dd648d 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -3939,6 +3939,8 @@ static int raid10_run(struct mddev *mddev) set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); mddev->sync_thread = md_register_thread(md_do_sync, mddev, "reshape"); + if (!mddev->sync_thread) + goto out_free_conf; } return 0; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 09562d7cc080..992fd08437d8 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -7403,6 +7403,8 @@ static int raid5_run(struct mddev *mddev) set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); mddev->sync_thread = md_register_thread(md_do_sync, mddev, "reshape"); + if (!mddev->sync_thread) + goto abort; } /* Ok, everything is just fine now */ -- cgit v1.2.3-55-g7522