diff options
Diffstat (limited to 'drivers/mtd/ubi/scan.c')
-rw-r--r-- | drivers/mtd/ubi/scan.c | 426 |
1 files changed, 324 insertions, 102 deletions
diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c index aed19f33b8f3..3c631863bf40 100644 --- a/drivers/mtd/ubi/scan.c +++ b/drivers/mtd/ubi/scan.c @@ -29,7 +29,7 @@ * objects which are kept in volume RB-tree with root at the @volumes field. * The RB-tree is indexed by the volume ID. * - * Found logical eraseblocks are represented by &struct ubi_scan_leb objects. + * Scanned logical eraseblocks are represented by &struct ubi_scan_leb objects. * These objects are kept in per-volume RB-trees with the root at the * corresponding &struct ubi_scan_volume object. To put it differently, we keep * an RB-tree of per-volume objects and each of these objects is the root of @@ -38,12 +38,40 @@ * Corrupted physical eraseblocks are put to the @corr list, free physical * eraseblocks are put to the @free list and the physical eraseblock to be * erased are put to the @erase list. + * + * UBI tries to distinguish between 2 types of corruptions. + * 1. Corruptions caused by power cuts. These are harmless and expected + * corruptions and UBI tries to handle them gracefully, without printing too + * many warnings and error messages. The idea is that we do not lose + * important data in these case - we may lose only the data which was being + * written to the media just before the power cut happened, and the upper + * layers (e.g., UBIFS) are supposed to handle these situations. UBI puts + * these PEBs to the head of the @erase list and they are scheduled for + * erasure. + * + * 2. Unexpected corruptions which are not caused by power cuts. During + * scanning, such PEBs are put to the @corr list and UBI preserves them. + * Obviously, this lessens the amount of available PEBs, and if at some + * point UBI runs out of free PEBs, it switches to R/O mode. UBI also loudly + * informs about such PEBs every time the MTD device is attached. + * + * However, it is difficult to reliably distinguish between these types of + * corruptions and UBI's strategy is as follows. UBI assumes (2.) if the VID + * header is corrupted and the data area does not contain all 0xFFs, and there + * were not bit-flips or integrity errors while reading the data area. Otherwise + * UBI assumes (1.). The assumptions are: + * o if the data area contains only 0xFFs, there is no data, and it is safe + * to just erase this PEB. + * o if the data area has bit-flips and data integrity errors (ECC errors on + * NAND), it is probably a PEB which was being erased when power cut + * happened. */ #include <linux/err.h> #include <linux/slab.h> #include <linux/crc32.h> #include <linux/math64.h> +#include <linux/random.h> #include "ubi.h" #ifdef CONFIG_MTD_UBI_DEBUG_PARANOID @@ -61,27 +89,30 @@ static struct ubi_vid_hdr *vidh; * @si: scanning information * @pnum: physical eraseblock number to add * @ec: erase counter of the physical eraseblock + * @to_head: if not zero, add to the head of the list * @list: the list to add to * - * This function adds physical eraseblock @pnum to free, erase, corrupted or - * alien lists. Returns zero in case of success and a negative error code in - * case of failure. + * This function adds physical eraseblock @pnum to free, erase, or alien lists. + * If @to_head is not zero, PEB will be added to the head of the list, which + * basically means it will be processed first later. E.g., we add corrupted + * PEBs (corrupted due to power cuts) to the head of the erase list to make + * sure we erase them first and get rid of corruptions ASAP. This function + * returns zero in case of success and a negative error code in case of + * failure. */ -static int add_to_list(struct ubi_scan_info *si, int pnum, int ec, +static int add_to_list(struct ubi_scan_info *si, int pnum, int ec, int to_head, struct list_head *list) { struct ubi_scan_leb *seb; - if (list == &si->free) + if (list == &si->free) { dbg_bld("add to free: PEB %d, EC %d", pnum, ec); - else if (list == &si->erase) + } else if (list == &si->erase) { dbg_bld("add to erase: PEB %d, EC %d", pnum, ec); - else if (list == &si->corr) { - dbg_bld("add to corrupted: PEB %d, EC %d", pnum, ec); - si->corr_count += 1; - } else if (list == &si->alien) + } else if (list == &si->alien) { dbg_bld("add to alien: PEB %d, EC %d", pnum, ec); - else + si->alien_peb_count += 1; + } else BUG(); seb = kmalloc(sizeof(struct ubi_scan_leb), GFP_KERNEL); @@ -90,7 +121,37 @@ static int add_to_list(struct ubi_scan_info *si, int pnum, int ec, seb->pnum = pnum; seb->ec = ec; - list_add_tail(&seb->u.list, list); + if (to_head) + list_add(&seb->u.list, list); + else + list_add_tail(&seb->u.list, list); + return 0; +} + +/** + * add_corrupted - add a corrupted physical eraseblock. + * @si: scanning information + * @pnum: physical eraseblock number to add + * @ec: erase counter of the physical eraseblock + * + * This function adds corrupted physical eraseblock @pnum to the 'corr' list. + * The corruption was presumably not caused by a power cut. Returns zero in + * case of success and a negative error code in case of failure. + */ +static int add_corrupted(struct ubi_scan_info *si, int pnum, int ec) +{ + struct ubi_scan_leb *seb; + + dbg_bld("add to corrupted: PEB %d, EC %d", pnum, ec); + + seb = kmalloc(sizeof(struct ubi_scan_leb), GFP_KERNEL); + if (!seb) + return -ENOMEM; + + si->corr_peb_count += 1; + seb->pnum = pnum; + seb->ec = ec; + list_add(&seb->u.list, &si->corr); return 0; } @@ -254,8 +315,8 @@ static int compare_lebs(struct ubi_device *ubi, const struct ubi_scan_leb *seb, * created before sequence numbers support has been added. At * that times we used 32-bit LEB versions stored in logical * eraseblocks. That was before UBI got into mainline. We do not - * support these images anymore. Well, those images will work - * still work, but only if no unclean reboots happened. + * support these images anymore. Well, those images still work, + * but only if no unclean reboots happened. */ ubi_err("unsupported on-flash UBI format\n"); return -EINVAL; @@ -281,19 +342,25 @@ static int compare_lebs(struct ubi_device *ubi, const struct ubi_scan_leb *seb, return 1; } } else { - pnum = seb->pnum; + if (!seb->copy_flag) { + /* It is not a copy, so it is newer */ + dbg_bld("first PEB %d is newer, copy_flag is unset", + pnum); + return bitflips << 1; + } vh = ubi_zalloc_vid_hdr(ubi, GFP_KERNEL); if (!vh) return -ENOMEM; + pnum = seb->pnum; err = ubi_io_read_vid_hdr(ubi, pnum, vh, 0); if (err) { if (err == UBI_IO_BITFLIPS) bitflips = 1; else { dbg_err("VID of PEB %d header is bad, but it " - "was OK earlier", pnum); + "was OK earlier, err %d", pnum, err); if (err > 0) err = -EIO; @@ -301,14 +368,6 @@ static int compare_lebs(struct ubi_device *ubi, const struct ubi_scan_leb *seb, } } - if (!vh->copy_flag) { - /* It is not a copy, so it is newer */ - dbg_bld("first PEB %d is newer, copy_flag is unset", - pnum); - err = bitflips << 1; - goto out_free_vidh; - } - vid_hdr = vh; } @@ -459,18 +518,15 @@ int ubi_scan_add_used(struct ubi_device *ubi, struct ubi_scan_info *si, if (err) return err; - if (cmp_res & 4) - err = add_to_list(si, seb->pnum, seb->ec, - &si->corr); - else - err = add_to_list(si, seb->pnum, seb->ec, - &si->erase); + err = add_to_list(si, seb->pnum, seb->ec, cmp_res & 4, + &si->erase); if (err) return err; seb->ec = ec; seb->pnum = pnum; seb->scrub = ((cmp_res & 2) || bitflips); + seb->copy_flag = vid_hdr->copy_flag; seb->sqnum = sqnum; if (sv->highest_lnum == lnum) @@ -483,10 +539,8 @@ int ubi_scan_add_used(struct ubi_device *ubi, struct ubi_scan_info *si, * This logical eraseblock is older than the one found * previously. */ - if (cmp_res & 4) - return add_to_list(si, pnum, ec, &si->corr); - else - return add_to_list(si, pnum, ec, &si->erase); + return add_to_list(si, pnum, ec, cmp_res & 4, + &si->erase); } } @@ -506,8 +560,9 @@ int ubi_scan_add_used(struct ubi_device *ubi, struct ubi_scan_info *si, seb->ec = ec; seb->pnum = pnum; seb->lnum = lnum; - seb->sqnum = sqnum; seb->scrub = bitflips; + seb->copy_flag = vid_hdr->copy_flag; + seb->sqnum = sqnum; if (sv->highest_lnum <= lnum) { sv->highest_lnum = lnum; @@ -663,8 +718,8 @@ out_free: struct ubi_scan_leb *ubi_scan_get_free_peb(struct ubi_device *ubi, struct ubi_scan_info *si) { - int err = 0, i; - struct ubi_scan_leb *seb; + int err = 0; + struct ubi_scan_leb *seb, *tmp_seb; if (!list_empty(&si->free)) { seb = list_entry(si->free.next, struct ubi_scan_leb, u.list); @@ -673,38 +728,86 @@ struct ubi_scan_leb *ubi_scan_get_free_peb(struct ubi_device *ubi, return seb; } - for (i = 0; i < 2; i++) { - struct list_head *head; - struct ubi_scan_leb *tmp_seb; + /* + * We try to erase the first physical eraseblock from the erase list + * and pick it if we succeed, or try to erase the next one if not. And + * so forth. We don't want to take care about bad eraseblocks here - + * they'll be handled later. + */ + list_for_each_entry_safe(seb, tmp_seb, &si->erase, u.list) { + if (seb->ec == UBI_SCAN_UNKNOWN_EC) + seb->ec = si->mean_ec; + + err = ubi_scan_erase_peb(ubi, si, seb->pnum, seb->ec+1); + if (err) + continue; - if (i == 0) - head = &si->erase; - else - head = &si->corr; + seb->ec += 1; + list_del(&seb->u.list); + dbg_bld("return PEB %d, EC %d", seb->pnum, seb->ec); + return seb; + } + ubi_err("no free eraseblocks"); + return ERR_PTR(-ENOSPC); +} + +/** + * check_corruption - check the data area of PEB. + * @ubi: UBI device description object + * @vid_hrd: the (corrupted) VID header of this PEB + * @pnum: the physical eraseblock number to check + * + * This is a helper function which is used to distinguish between VID header + * corruptions caused by power cuts and other reasons. If the PEB contains only + * 0xFF bytes in the data area, the VID header is most probably corrupted + * because of a power cut (%0 is returned in this case). Otherwise, it was + * probably corrupted for some other reasons (%1 is returned in this case). A + * negative error code is returned if a read error occurred. + * + * If the corruption reason was a power cut, UBI can safely erase this PEB. + * Otherwise, it should preserve it to avoid possibly destroying important + * information. + */ +static int check_corruption(struct ubi_device *ubi, struct ubi_vid_hdr *vid_hdr, + int pnum) +{ + int err; + + mutex_lock(&ubi->buf_mutex); + memset(ubi->peb_buf1, 0x00, ubi->leb_size); + + err = ubi_io_read(ubi, ubi->peb_buf1, pnum, ubi->leb_start, + ubi->leb_size); + if (err == UBI_IO_BITFLIPS || err == -EBADMSG) { /* - * We try to erase the first physical eraseblock from the @head - * list and pick it if we succeed, or try to erase the - * next one if not. And so forth. We don't want to take care - * about bad eraseblocks here - they'll be handled later. + * Bit-flips or integrity errors while reading the data area. + * It is difficult to say for sure what type of corruption is + * this, but presumably a power cut happened while this PEB was + * erased, so it became unstable and corrupted, and should be + * erased. */ - list_for_each_entry_safe(seb, tmp_seb, head, u.list) { - if (seb->ec == UBI_SCAN_UNKNOWN_EC) - seb->ec = si->mean_ec; + return 0; + } - err = ubi_scan_erase_peb(ubi, si, seb->pnum, seb->ec+1); - if (err) - continue; + if (err) + return err; - seb->ec += 1; - list_del(&seb->u.list); - dbg_bld("return PEB %d, EC %d", seb->pnum, seb->ec); - return seb; - } + if (ubi_check_pattern(ubi->peb_buf1, 0xFF, ubi->leb_size)) { + mutex_unlock(&ubi->buf_mutex); + return 0; } - ubi_err("no eraseblocks found"); - return ERR_PTR(-ENOSPC); + ubi_err("PEB %d contains corrupted VID header, and the data does not " + "contain all 0xFF, this may be a non-UBI PEB or a severe VID " + "header corruption which requires manual inspection", pnum); + ubi_dbg_dump_vid_hdr(vid_hdr); + dbg_msg("hexdump of PEB %d offset %d, length %d", + pnum, ubi->leb_start, ubi->leb_size); + ubi_dbg_print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, + ubi->peb_buf1, ubi->leb_size, 1); + mutex_unlock(&ubi->buf_mutex); + return 1; } /** @@ -720,7 +823,7 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, int pnum) { long long uninitialized_var(ec); - int err, bitflips = 0, vol_id, ec_corr = 0; + int err, bitflips = 0, vol_id, ec_err = 0; dbg_bld("scan PEB %d", pnum); @@ -741,24 +844,37 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, err = ubi_io_read_ec_hdr(ubi, pnum, ech, 0); if (err < 0) return err; - else if (err == UBI_IO_BITFLIPS) + switch (err) { + case 0: + break; + case UBI_IO_BITFLIPS: bitflips = 1; - else if (err == UBI_IO_PEB_EMPTY) - return add_to_list(si, pnum, UBI_SCAN_UNKNOWN_EC, &si->erase); - else if (err == UBI_IO_BAD_EC_HDR) { + break; + case UBI_IO_FF: + si->empty_peb_count += 1; + return add_to_list(si, pnum, UBI_SCAN_UNKNOWN_EC, 0, + &si->erase); + case UBI_IO_FF_BITFLIPS: + si->empty_peb_count += 1; + return add_to_list(si, pnum, UBI_SCAN_UNKNOWN_EC, 1, + &si->erase); + case UBI_IO_BAD_HDR_EBADMSG: + case UBI_IO_BAD_HDR: /* * We have to also look at the VID header, possibly it is not * corrupted. Set %bitflips flag in order to make this PEB be * moved and EC be re-created. */ - ec_corr = 1; + ec_err = err; ec = UBI_SCAN_UNKNOWN_EC; bitflips = 1; + break; + default: + ubi_err("'ubi_io_read_ec_hdr()' returned unknown code %d", err); + return -EINVAL; } - si->is_empty = 0; - - if (!ec_corr) { + if (!ec_err) { int image_seq; /* Make sure UBI version is OK */ @@ -811,21 +927,67 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, err = ubi_io_read_vid_hdr(ubi, pnum, vidh, 0); if (err < 0) return err; - else if (err == UBI_IO_BITFLIPS) + switch (err) { + case 0: + break; + case UBI_IO_BITFLIPS: bitflips = 1; - else if (err == UBI_IO_BAD_VID_HDR || - (err == UBI_IO_PEB_FREE && ec_corr)) { - /* VID header is corrupted */ - err = add_to_list(si, pnum, ec, &si->corr); + break; + case UBI_IO_BAD_HDR_EBADMSG: + if (ec_err == UBI_IO_BAD_HDR_EBADMSG) + /* + * Both EC and VID headers are corrupted and were read + * with data integrity error, probably this is a bad + * PEB, bit it is not marked as bad yet. This may also + * be a result of power cut during erasure. + */ + si->maybe_bad_peb_count += 1; + case UBI_IO_BAD_HDR: + if (ec_err) + /* + * Both headers are corrupted. There is a possibility + * that this a valid UBI PEB which has corresponding + * LEB, but the headers are corrupted. However, it is + * impossible to distinguish it from a PEB which just + * contains garbage because of a power cut during erase + * operation. So we just schedule this PEB for erasure. + */ + err = 0; + else + /* + * The EC was OK, but the VID header is corrupted. We + * have to check what is in the data area. + */ + err = check_corruption(ubi, vidh, pnum); + + if (err < 0) + return err; + else if (!err) + /* This corruption is caused by a power cut */ + err = add_to_list(si, pnum, ec, 1, &si->erase); + else + /* This is an unexpected corruption */ + err = add_corrupted(si, pnum, ec); if (err) return err; goto adjust_mean_ec; - } else if (err == UBI_IO_PEB_FREE) { - /* No VID header - the physical eraseblock is free */ - err = add_to_list(si, pnum, ec, &si->free); + case UBI_IO_FF_BITFLIPS: + err = add_to_list(si, pnum, ec, 1, &si->erase); if (err) return err; goto adjust_mean_ec; + case UBI_IO_FF: + if (ec_err) + err = add_to_list(si, pnum, ec, 1, &si->erase); + else + err = add_to_list(si, pnum, ec, 0, &si->free); + if (err) + return err; + goto adjust_mean_ec; + default: + ubi_err("'ubi_io_read_vid_hdr()' returned unknown code %d", + err); + return -EINVAL; } vol_id = be32_to_cpu(vidh->vol_id); @@ -836,11 +998,11 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, switch (vidh->compat) { case UBI_COMPAT_DELETE: ubi_msg("\"delete\" compatible internal volume %d:%d" - " found, remove it", vol_id, lnum); - err = add_to_list(si, pnum, ec, &si->corr); + " found, will remove it", vol_id, lnum); + err = add_to_list(si, pnum, ec, 1, &si->erase); if (err) return err; - break; + return 0; case UBI_COMPAT_RO: ubi_msg("read-only compatible internal volume %d:%d" @@ -852,10 +1014,9 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, case UBI_COMPAT_PRESERVE: ubi_msg("\"preserve\" compatible internal volume %d:%d" " found", vol_id, lnum); - err = add_to_list(si, pnum, ec, &si->alien); + err = add_to_list(si, pnum, ec, 0, &si->alien); if (err) return err; - si->alien_peb_count += 1; return 0; case UBI_COMPAT_REJECT: @@ -865,7 +1026,7 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, } } - if (ec_corr) + if (ec_err) ubi_warn("valid VID header but corrupted EC header at PEB %d", pnum); err = ubi_scan_add_used(ubi, si, pnum, ec, vidh, bitflips); @@ -873,7 +1034,7 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, return err; adjust_mean_ec: - if (!ec_corr) { + if (!ec_err) { si->ec_sum += ec; si->ec_count += 1; if (ec > si->max_ec) @@ -886,6 +1047,80 @@ adjust_mean_ec: } /** + * check_what_we_have - check what PEB were found by scanning. + * @ubi: UBI device description object + * @si: scanning information + * + * This is a helper function which takes a look what PEBs were found by + * scanning, and decides whether the flash is empty and should be formatted and + * whether there are too many corrupted PEBs and we should not attach this + * MTD device. Returns zero if we should proceed with attaching the MTD device, + * and %-EINVAL if we should not. + */ +static int check_what_we_have(struct ubi_device *ubi, struct ubi_scan_info *si) +{ + struct ubi_scan_leb *seb; + int max_corr, peb_count; + + peb_count = ubi->peb_count - si->bad_peb_count - si->alien_peb_count; + max_corr = peb_count / 20 ?: 8; + + /* + * Few corrupted PEBs is not a problem and may be just a result of + * unclean reboots. However, many of them may indicate some problems + * with the flash HW or driver. + */ + if (si->corr_peb_count) { + ubi_err("%d PEBs are corrupted and preserved", + si->corr_peb_count); + printk(KERN_ERR "Corrupted PEBs are:"); + list_for_each_entry(seb, &si->corr, u.list) + printk(KERN_CONT " %d", seb->pnum); + printk(KERN_CONT "\n"); + + /* + * If too many PEBs are corrupted, we refuse attaching, + * otherwise, only print a warning. + */ + if (si->corr_peb_count >= max_corr) { + ubi_err("too many corrupted PEBs, refusing this device"); + return -EINVAL; + } + } + + if (si->empty_peb_count + si->maybe_bad_peb_count == peb_count) { + /* + * All PEBs are empty, or almost all - a couple PEBs look like + * they may be bad PEBs which were not marked as bad yet. + * + * This piece of code basically tries to distinguish between + * the following situations: + * + * 1. Flash is empty, but there are few bad PEBs, which are not + * marked as bad so far, and which were read with error. We + * want to go ahead and format this flash. While formatting, + * the faulty PEBs will probably be marked as bad. + * + * 2. Flash contains non-UBI data and we do not want to format + * it and destroy possibly important information. + */ + if (si->maybe_bad_peb_count <= 2) { + si->is_empty = 1; + ubi_msg("empty MTD device detected"); + get_random_bytes(&ubi->image_seq, + sizeof(ubi->image_seq)); + } else { + ubi_err("MTD device is not UBI-formatted and possibly " + "contains non-UBI data - refusing it"); + return -EINVAL; + } + + } + + return 0; +} + +/** * ubi_scan - scan an MTD device. * @ubi: UBI device description object * @@ -909,7 +1144,6 @@ struct ubi_scan_info *ubi_scan(struct ubi_device *ubi) INIT_LIST_HEAD(&si->erase); INIT_LIST_HEAD(&si->alien); si->volumes = RB_ROOT; - si->is_empty = 1; err = -ENOMEM; ech = kzalloc(ubi->ec_hdr_alsize, GFP_KERNEL); @@ -935,21 +1169,9 @@ struct ubi_scan_info *ubi_scan(struct ubi_device *ubi) if (si->ec_count) si->mean_ec = div_u64(si->ec_sum, si->ec_count); - if (si->is_empty) - ubi_msg("empty MTD device detected"); - - /* - * Few corrupted PEBs are not a problem and may be just a result of - * unclean reboots. However, many of them may indicate some problems - * with the flash HW or driver. Print a warning in this case. - */ - if (si->corr_count >= 8 || si->corr_count >= ubi->peb_count / 4) { - ubi_warn("%d PEBs are corrupted", si->corr_count); - printk(KERN_WARNING "corrupted PEBs are:"); - list_for_each_entry(seb, &si->corr, u.list) - printk(KERN_CONT " %d", seb->pnum); - printk(KERN_CONT "\n"); - } + err = check_what_we_have(ubi, si); + if (err) + goto out_vidh; /* * In case of unknown erase counter we use the mean erase counter |