From dedb0d48a9d4d57086526b94a4b64da789a646e4 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 9 Jan 2009 21:02:37 +0200 Subject: UBIFS: do not commit twice VFS calls '->sync_fs()' twice - first time with @wait = 0, second time with @wait = 1. As a result, we may commit and synchronize write-buffers twice. Avoid doing this by returning immediatelly if @wait = 0. Signed-off-by: Artem Bityutskiy --- fs/ubifs/super.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 89556ee72518..a7fc97f4d9de 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -432,18 +432,19 @@ static int ubifs_sync_fs(struct super_block *sb, int wait) int i, err; struct ubifs_info *c = sb->s_fs_info; struct writeback_control wbc = { - .sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_NONE, + .sync_mode = WB_SYNC_ALL, .range_start = 0, .range_end = LLONG_MAX, .nr_to_write = LONG_MAX, }; /* - * Note by akpm about WB_SYNC_NONE used above: zero @wait is just an - * advisory thing to help the file system shove lots of data into the - * queues. If some gets missed then it'll be picked up on the second + * Zero @wait is just an advisory thing to help the file system shove + * lots of data into the queues, and there will be the second * '->sync_fs()' call, with non-zero @wait. */ + if (!wait) + return 0; if (sb->s_flags & MS_RDONLY) return 0; -- cgit v1.2.3-55-g7522 From e8b815663b1bfd9c255af5176604ec0eafdf6ed7 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Thu, 15 Jan 2009 17:43:23 +0200 Subject: UBIFS: constify operations Mark super, file, and inode operation structcutes with 'const'. Signed-off-by: Artem Bityutskiy --- fs/ubifs/dir.c | 4 ++-- fs/ubifs/file.c | 8 ++++---- fs/ubifs/super.c | 2 +- fs/ubifs/ubifs.h | 14 +++++++------- 4 files changed, 14 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index f448ab1f9c38..d29b771cce45 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -1199,7 +1199,7 @@ int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry, return 0; } -struct inode_operations ubifs_dir_inode_operations = { +const struct inode_operations ubifs_dir_inode_operations = { .lookup = ubifs_lookup, .create = ubifs_create, .link = ubifs_link, @@ -1219,7 +1219,7 @@ struct inode_operations ubifs_dir_inode_operations = { #endif }; -struct file_operations ubifs_dir_operations = { +const struct file_operations ubifs_dir_operations = { .llseek = ubifs_dir_llseek, .release = ubifs_dir_release, .read = generic_read_dir, diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index bf37374567fa..17443d97e6f1 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1541,7 +1541,7 @@ static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma) return 0; } -struct address_space_operations ubifs_file_address_operations = { +const struct address_space_operations ubifs_file_address_operations = { .readpage = ubifs_readpage, .writepage = ubifs_writepage, .write_begin = ubifs_write_begin, @@ -1551,7 +1551,7 @@ struct address_space_operations ubifs_file_address_operations = { .releasepage = ubifs_releasepage, }; -struct inode_operations ubifs_file_inode_operations = { +const struct inode_operations ubifs_file_inode_operations = { .setattr = ubifs_setattr, .getattr = ubifs_getattr, #ifdef CONFIG_UBIFS_FS_XATTR @@ -1562,14 +1562,14 @@ struct inode_operations ubifs_file_inode_operations = { #endif }; -struct inode_operations ubifs_symlink_inode_operations = { +const struct inode_operations ubifs_symlink_inode_operations = { .readlink = generic_readlink, .follow_link = ubifs_follow_link, .setattr = ubifs_setattr, .getattr = ubifs_getattr, }; -struct file_operations ubifs_file_operations = { +const struct file_operations ubifs_file_operations = { .llseek = generic_file_llseek, .read = do_sync_read, .write = do_sync_write, diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index a7fc97f4d9de..53811e567a69 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1778,7 +1778,7 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) return 0; } -struct super_operations ubifs_super_operations = { +const struct super_operations ubifs_super_operations = { .alloc_inode = ubifs_alloc_inode, .destroy_inode = ubifs_destroy_inode, .put_super = ubifs_put_super, diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index fc2a4cc66d03..0881897a4208 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -1405,13 +1405,13 @@ extern struct list_head ubifs_infos; extern spinlock_t ubifs_infos_lock; extern atomic_long_t ubifs_clean_zn_cnt; extern struct kmem_cache *ubifs_inode_slab; -extern struct super_operations ubifs_super_operations; -extern struct address_space_operations ubifs_file_address_operations; -extern struct file_operations ubifs_file_operations; -extern struct inode_operations ubifs_file_inode_operations; -extern struct file_operations ubifs_dir_operations; -extern struct inode_operations ubifs_dir_inode_operations; -extern struct inode_operations ubifs_symlink_inode_operations; +extern const struct super_operations ubifs_super_operations; +extern const struct address_space_operations ubifs_file_address_operations; +extern const struct file_operations ubifs_file_operations; +extern const struct inode_operations ubifs_file_inode_operations; +extern const struct file_operations ubifs_dir_operations; +extern const struct inode_operations ubifs_dir_inode_operations; +extern const struct inode_operations ubifs_symlink_inode_operations; extern struct backing_dev_info ubifs_backing_dev_info; extern struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT]; -- cgit v1.2.3-55-g7522 From a50412e3f8ce95d7ed558370d7dde5171fd04283 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 6 Jan 2009 19:54:02 +0200 Subject: UBIFS: do not treat all data as short term UBIFS wrongly tells UBI that all data is short term. Use proper hints instead. Thanks to Xiaochuan-Xu for noticing this. Signed-off-by: Artem Bityutskiy --- fs/ubifs/journal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index 9b7c54e0cd2a..a11ca0958a23 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -208,7 +208,7 @@ again: offs = 0; out: - err = ubifs_wbuf_seek_nolock(wbuf, lnum, offs, UBI_SHORTTERM); + err = ubifs_wbuf_seek_nolock(wbuf, lnum, offs, wbuf->dtype); if (err) goto out_unlock; -- cgit v1.2.3-55-g7522 From 7078202e55b565582fcbd831a8dd3069bdc72610 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 19 Jan 2009 19:57:27 +0200 Subject: UBIFS: document dark_wm and dead_wm better Just add more commentaries. Also some commentary fixes for lprops flags. Signed-off-by: Artem Bityutskiy --- fs/ubifs/gc.c | 20 ++++++++++++++++++++ fs/ubifs/super.c | 11 ++--------- fs/ubifs/ubifs.h | 4 ++-- 3 files changed, 24 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c index 9832f9abe28e..b2e5f1133377 100644 --- a/fs/ubifs/gc.c +++ b/fs/ubifs/gc.c @@ -31,6 +31,26 @@ * to be reused. Garbage collection will cause the number of dirty index nodes * to grow, however sufficient space is reserved for the index to ensure the * commit will never run out of space. + * + * Notes about dead watermark. At current UBIFS implementation we assume that + * LEBs which have less than @c->dead_wm bytes of free + dirty space are full + * and not worth garbage-collecting. The dead watermark is one min. I/O unit + * size, or min. UBIFS node size, depending on what is greater. Indeed, UBIFS + * Garbage Collector has to synchronize the GC head's write buffer before + * returning, so this is about wasting one min. I/O unit. However, UBIFS GC can + * actually reclaim even very small pieces of dirty space by garbage collecting + * enough dirty LEBs, but we do not bother doing this at this implementation. + * + * Notes about dark watermark. The results of GC work depends on how big are + * the UBIFS nodes GC deals with. Large nodes make GC waste more space. Indeed, + * if GC move data from LEB A to LEB B and nodes in LEB A are large, GC would + * have to waste large pieces of free space at the end of LEB B, because nodes + * from LEB A would not fit. And the worst situation is when all nodes are of + * maximum size. So dark watermark is the amount of free + dirty space in LEB + * which are guaranteed to be reclaimable. If LEB has less space, the GC migh + * be unable to reclaim it. So, LEBs with free + dirty greater than dark + * watermark are "good" LEBs from GC's point of few. The other LEBs are not so + * good, and GC takes extra care when moving them. */ #include diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 53811e567a69..da99da098efd 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -573,15 +573,8 @@ static int init_constants_early(struct ubifs_info *c) c->ranges[UBIFS_IDX_NODE].max_len = INT_MAX; /* - * Initialize dead and dark LEB space watermarks. - * - * Dead space is the space which cannot be used. Its watermark is - * equivalent to min. I/O unit or minimum node size if it is greater - * then min. I/O unit. - * - * Dark space is the space which might be used, or might not, depending - * on which node should be written to the LEB. Its watermark is - * equivalent to maximum UBIFS node size. + * Initialize dead and dark LEB space watermarks. See gc.c for comments + * about these values. */ c->dead_wm = ALIGN(MIN_WRITE_SZ, c->min_io_size); c->dark_wm = ALIGN(UBIFS_MAX_NODE_SZ, c->min_io_size); diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 0881897a4208..2e78d6ac007e 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -426,9 +426,9 @@ struct ubifs_unclean_leb { * LEB properties flags. * * LPROPS_UNCAT: not categorized - * LPROPS_DIRTY: dirty > 0, not index + * LPROPS_DIRTY: dirty > free, dirty >= @c->dead_wm, not index * LPROPS_DIRTY_IDX: dirty + free > @c->min_idx_node_sze and index - * LPROPS_FREE: free > 0, not empty, not index + * LPROPS_FREE: free > 0, dirty < @c->dead_wm, not empty, not index * LPROPS_HEAP_CNT: number of heaps used for storing categorized LEBs * LPROPS_EMPTY: LEB is empty, not taken * LPROPS_FREEABLE: free + dirty == leb_size, not index, not taken -- cgit v1.2.3-55-g7522 From 82c1593cad3dfc97661764c8bc62aa1a416e9ea8 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 20 Jan 2009 16:46:02 +0200 Subject: UBIFS: simplify locking This patch simplifies lock_[23]_inodes functions. We do not have to care about locking order, because UBIFS does this for @i_mutex and this is enough. Thanks to Al Viro for suggesting this. Signed-off-by: Artem Bityutskiy --- fs/ubifs/dir.c | 92 +++++++++++++++++++++++----------------------------------- 1 file changed, 36 insertions(+), 56 deletions(-) (limited to 'fs') diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index d29b771cce45..f55d523c52bb 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -482,30 +482,29 @@ static int ubifs_dir_release(struct inode *dir, struct file *file) } /** - * lock_2_inodes - lock two UBIFS inodes. + * lock_2_inodes - a wrapper for locking two UBIFS inodes. * @inode1: first inode * @inode2: second inode + * + * We do not implement any tricks to guarantee strict lock ordering, because + * VFS has already done it for us on the @i_mutex. So this is just a simple + * wrapper function. */ static void lock_2_inodes(struct inode *inode1, struct inode *inode2) { - if (inode1->i_ino < inode2->i_ino) { - mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_2); - mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_3); - } else { - mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_2); - mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_3); - } + mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_1); + mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_2); } /** - * unlock_2_inodes - unlock two UBIFS inodes inodes. + * unlock_2_inodes - a wrapper for unlocking two UBIFS inodes. * @inode1: first inode * @inode2: second inode */ static void unlock_2_inodes(struct inode *inode1, struct inode *inode2) { - mutex_unlock(&ubifs_inode(inode1)->ui_mutex); mutex_unlock(&ubifs_inode(inode2)->ui_mutex); + mutex_unlock(&ubifs_inode(inode1)->ui_mutex); } static int ubifs_link(struct dentry *old_dentry, struct inode *dir, @@ -527,6 +526,8 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir, dbg_gen("dent '%.*s' to ino %lu (nlink %d) in dir ino %lu", dentry->d_name.len, dentry->d_name.name, inode->i_ino, inode->i_nlink, dir->i_ino); + ubifs_assert(mutex_is_locked(&dir->i_mutex)); + ubifs_assert(mutex_is_locked(&inode->i_mutex)); err = dbg_check_synced_i_size(inode); if (err) return err; @@ -580,6 +581,8 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry) dbg_gen("dent '%.*s' from ino %lu (nlink %d) in dir ino %lu", dentry->d_name.len, dentry->d_name.name, inode->i_ino, inode->i_nlink, dir->i_ino); + ubifs_assert(mutex_is_locked(&dir->i_mutex)); + ubifs_assert(mutex_is_locked(&inode->i_mutex)); err = dbg_check_synced_i_size(inode); if (err) return err; @@ -667,7 +670,8 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry) dbg_gen("directory '%.*s', ino %lu in dir ino %lu", dentry->d_name.len, dentry->d_name.name, inode->i_ino, dir->i_ino); - + ubifs_assert(mutex_is_locked(&dir->i_mutex)); + ubifs_assert(mutex_is_locked(&inode->i_mutex)); err = check_dir_empty(c, dentry->d_inode); if (err) return err; @@ -922,59 +926,30 @@ out_budg: } /** - * lock_3_inodes - lock three UBIFS inodes for rename. + * lock_3_inodes - a wrapper for locking three UBIFS inodes. * @inode1: first inode * @inode2: second inode * @inode3: third inode * - * For 'ubifs_rename()', @inode1 may be the same as @inode2 whereas @inode3 may - * be null. + * This function is used for 'ubifs_rename()' and @inode1 may be the same as + * @inode2 whereas @inode3 may be %NULL. + * + * We do not implement any tricks to guarantee strict lock ordering, because + * VFS has already done it for us on the @i_mutex. So this is just a simple + * wrapper function. */ static void lock_3_inodes(struct inode *inode1, struct inode *inode2, struct inode *inode3) { - struct inode *i1, *i2, *i3; - - if (!inode3) { - if (inode1 != inode2) { - lock_2_inodes(inode1, inode2); - return; - } - mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_1); - return; - } - - if (inode1 == inode2) { - lock_2_inodes(inode1, inode3); - return; - } - - /* 3 different inodes */ - if (inode1 < inode2) { - i3 = inode2; - if (inode1 < inode3) { - i1 = inode1; - i2 = inode3; - } else { - i1 = inode3; - i2 = inode1; - } - } else { - i3 = inode1; - if (inode2 < inode3) { - i1 = inode2; - i2 = inode3; - } else { - i1 = inode3; - i2 = inode2; - } - } - mutex_lock_nested(&ubifs_inode(i1)->ui_mutex, WB_MUTEX_1); - lock_2_inodes(i2, i3); + mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_1); + if (inode2 != inode1) + mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_2); + if (inode3) + mutex_lock_nested(&ubifs_inode(inode3)->ui_mutex, WB_MUTEX_3); } /** - * unlock_3_inodes - unlock three UBIFS inodes for rename. + * unlock_3_inodes - a wrapper for unlocking three UBIFS inodes for rename. * @inode1: first inode * @inode2: second inode * @inode3: third inode @@ -982,11 +957,11 @@ static void lock_3_inodes(struct inode *inode1, struct inode *inode2, static void unlock_3_inodes(struct inode *inode1, struct inode *inode2, struct inode *inode3) { - mutex_unlock(&ubifs_inode(inode1)->ui_mutex); - if (inode1 != inode2) - mutex_unlock(&ubifs_inode(inode2)->ui_mutex); if (inode3) mutex_unlock(&ubifs_inode(inode3)->ui_mutex); + if (inode1 != inode2) + mutex_unlock(&ubifs_inode(inode2)->ui_mutex); + mutex_unlock(&ubifs_inode(inode1)->ui_mutex); } static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry, @@ -1020,6 +995,11 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry, "dir ino %lu", old_dentry->d_name.len, old_dentry->d_name.name, old_inode->i_ino, old_dir->i_ino, new_dentry->d_name.len, new_dentry->d_name.name, new_dir->i_ino); + ubifs_assert(mutex_is_locked(&old_dir->i_mutex)); + ubifs_assert(mutex_is_locked(&new_dir->i_mutex)); + if (unlink) + ubifs_assert(mutex_is_locked(&new_inode->i_mutex)); + if (unlink && is_dir) { err = check_dir_empty(c, new_inode); -- cgit v1.2.3-55-g7522 From e4d9b6cbfc98d696a28d2c24a3d49768695811ee Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 23 Jan 2009 14:17:36 +0200 Subject: UBIFS: fix LEB list freeing When freeing the c->idx_lebs list, we have to release the LEBs as well, because we might be called from mount to read-only mode code. Otherwise the LEBs stay taken forever, which may cause problems when we re-mount back ro RW mode. Signed-off-by: Artem Bityutskiy --- fs/ubifs/gc.c | 16 ++++++++++++---- fs/ubifs/lprops.c | 8 ++++++++ fs/ubifs/super.c | 42 +++++++++++++++++++++++++++--------------- fs/ubifs/ubifs.h | 2 +- 4 files changed, 48 insertions(+), 20 deletions(-) (limited to 'fs') diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c index b2e5f1133377..9760154d874b 100644 --- a/fs/ubifs/gc.c +++ b/fs/ubifs/gc.c @@ -830,21 +830,29 @@ out: * ubifs_destroy_idx_gc - destroy idx_gc list. * @c: UBIFS file-system description object * - * This function destroys the idx_gc list. It is called when unmounting or - * remounting read-only so locks are not needed. + * This function destroys the @c->idx_gc list. It is called when unmounting or + * remounting read-only so locks are not needed. Returns zero in case of + * success and a negative error code in case of failure. */ -void ubifs_destroy_idx_gc(struct ubifs_info *c) +int ubifs_destroy_idx_gc(struct ubifs_info *c) { + int ret = 0; + while (!list_empty(&c->idx_gc)) { + int err; struct ubifs_gced_idx_leb *idx_gc; idx_gc = list_entry(c->idx_gc.next, struct ubifs_gced_idx_leb, list); - c->idx_gc_cnt -= 1; + err = ubifs_change_one_lp(c, idx_gc->lnum, LPROPS_NC, + LPROPS_NC, 0, LPROPS_TAKEN, -1); + if (err && !ret) + ret = err; list_del(&idx_gc->list); kfree(idx_gc); } + return ret; } /** diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c index dfd2bcece27a..68328c59762b 100644 --- a/fs/ubifs/lprops.c +++ b/fs/ubifs/lprops.c @@ -678,6 +678,9 @@ int ubifs_change_one_lp(struct ubifs_info *c, int lnum, int free, int dirty, out: ubifs_release_lprops(c); + if (err) + ubifs_err("cannot change properties of LEB %d, error %d", + lnum, err); return err; } @@ -714,6 +717,9 @@ int ubifs_update_one_lp(struct ubifs_info *c, int lnum, int free, int dirty, out: ubifs_release_lprops(c); + if (err) + ubifs_err("cannot update properties of LEB %d, error %d", + lnum, err); return err; } @@ -737,6 +743,8 @@ int ubifs_read_one_lp(struct ubifs_info *c, int lnum, struct ubifs_lprops *lp) lpp = ubifs_lpt_lookup(c, lnum); if (IS_ERR(lpp)) { err = PTR_ERR(lpp); + ubifs_err("cannot read properties of LEB %d, error %d", + lnum, err); goto out; } diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index da99da098efd..807bbd3c8b4b 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1469,9 +1469,6 @@ static int ubifs_remount_rw(struct ubifs_info *c) { int err, lnum; - if (c->ro_media) - return -EINVAL; - mutex_lock(&c->umount_mutex); c->remounting_rw = 1; c->always_chk_crc = 1; @@ -1605,9 +1602,13 @@ out: */ static void commit_on_unmount(struct ubifs_info *c) { - struct super_block *sb = c->vfs_sb; long long bud_bytes; + if (!c->fast_unmount) { + dbg_gen("skip committing - fast unmount enabled"); + return; + } + /* * This function is called before the background thread is stopped, so * we may race with ongoing commit, which means we have to take @@ -1617,8 +1618,11 @@ static void commit_on_unmount(struct ubifs_info *c) bud_bytes = c->bud_bytes; spin_unlock(&c->buds_lock); - if (!c->fast_unmount && !(sb->s_flags & MS_RDONLY) && bud_bytes) + if (bud_bytes) { + dbg_gen("run commit"); ubifs_run_commit(c); + } else + dbg_gen("journal is empty, do not run commit"); } /** @@ -1633,6 +1637,8 @@ static void ubifs_remount_ro(struct ubifs_info *c) int i, err; ubifs_assert(!c->need_recovery); + ubifs_assert(!c->ro_media); + commit_on_unmount(c); mutex_lock(&c->umount_mutex); @@ -1646,16 +1652,17 @@ static void ubifs_remount_ro(struct ubifs_info *c) del_timer_sync(&c->jheads[i].wbuf.timer); } - if (!c->ro_media) { - c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY); - c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS); - c->mst_node->gc_lnum = cpu_to_le32(c->gc_lnum); - err = ubifs_write_master(c); - if (err) - ubifs_ro_mode(c, err); - } + c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY); + c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS); + c->mst_node->gc_lnum = cpu_to_le32(c->gc_lnum); + err = ubifs_write_master(c); + if (err) + ubifs_ro_mode(c, err); + + err = ubifs_destroy_idx_gc(c); + if (err) + ubifs_ro_mode(c, err); - ubifs_destroy_idx_gc(c); free_wbufs(c); vfree(c->orph_buf); c->orph_buf = NULL; @@ -1754,6 +1761,11 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) } if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) { + if (c->ro_media) { + ubifs_msg("cannot re-mount R/W, UBIFS is working in " + "R/O mode"); + return -EINVAL; + } err = ubifs_remount_rw(c); if (err) return err; @@ -2044,7 +2056,7 @@ static void ubifs_kill_sb(struct super_block *sb) * We do 'commit_on_unmount()' here instead of 'ubifs_put_super()' * in order to be outside BKL. */ - if (sb->s_root) + if (sb->s_root && !(sb->s_flags & MS_RDONLY)) commit_on_unmount(c); /* The un-mount routine is actually done in put_super() */ generic_shutdown_super(sb); diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 2e78d6ac007e..ee9517a7b024 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -1593,7 +1593,7 @@ int ubifs_replay_journal(struct ubifs_info *c); int ubifs_garbage_collect(struct ubifs_info *c, int anyway); int ubifs_gc_start_commit(struct ubifs_info *c); int ubifs_gc_end_commit(struct ubifs_info *c); -void ubifs_destroy_idx_gc(struct ubifs_info *c); +int ubifs_destroy_idx_gc(struct ubifs_info *c); int ubifs_get_idx_gc_leb(struct ubifs_info *c); int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp); -- cgit v1.2.3-55-g7522 From 84abf972ccff5c13d10b672972949eba431a6e0e Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 23 Jan 2009 14:54:59 +0200 Subject: UBIFS: add re-mount debugging checks We observe space corrupted accounting when re-mounting. So add some debbugging checks to catch problems like this. Signed-off-by: Artem Bityutskiy --- fs/ubifs/budget.c | 35 +++++++++++----- fs/ubifs/debug.c | 120 +++++++++++++++++++++++++++++++++++++++--------------- fs/ubifs/debug.h | 36 +++++++++------- fs/ubifs/file.c | 1 - fs/ubifs/lprops.c | 4 +- fs/ubifs/super.c | 14 +++++-- fs/ubifs/ubifs.h | 3 +- 7 files changed, 148 insertions(+), 65 deletions(-) (limited to 'fs') diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index 175f9c590b77..f393620890ee 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c @@ -689,7 +689,7 @@ long long ubifs_reported_space(const struct ubifs_info *c, long long free) } /** - * ubifs_get_free_space - return amount of free space. + * ubifs_get_free_space_nolock - return amount of free space. * @c: UBIFS file-system description object * * This function calculates amount of free space to report to user-space. @@ -704,16 +704,14 @@ long long ubifs_reported_space(const struct ubifs_info *c, long long free) * traditional file-systems, because they have way less overhead than UBIFS. * So, to keep users happy, UBIFS tries to take the overhead into account. */ -long long ubifs_get_free_space(struct ubifs_info *c) +long long ubifs_get_free_space_nolock(struct ubifs_info *c) { - int min_idx_lebs, rsvd_idx_lebs, lebs; + int rsvd_idx_lebs, lebs; long long available, outstanding, free; - spin_lock(&c->space_lock); - min_idx_lebs = c->min_idx_lebs; - ubifs_assert(min_idx_lebs == ubifs_calc_min_idx_lebs(c)); + ubifs_assert(c->min_idx_lebs == ubifs_calc_min_idx_lebs(c)); outstanding = c->budg_data_growth + c->budg_dd_growth; - available = ubifs_calc_available(c, min_idx_lebs); + available = ubifs_calc_available(c, c->min_idx_lebs); /* * When reporting free space to user-space, UBIFS guarantees that it is @@ -726,15 +724,14 @@ long long ubifs_get_free_space(struct ubifs_info *c) * Note, the calculations below are similar to what we have in * 'do_budget_space()', so refer there for comments. */ - if (min_idx_lebs > c->lst.idx_lebs) - rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs; + if (c->min_idx_lebs > c->lst.idx_lebs) + rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs; else rsvd_idx_lebs = 0; lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - c->lst.taken_empty_lebs; lebs -= rsvd_idx_lebs; available += lebs * (c->dark_wm - c->leb_overhead); - spin_unlock(&c->space_lock); if (available > outstanding) free = ubifs_reported_space(c, available - outstanding); @@ -742,3 +739,21 @@ long long ubifs_get_free_space(struct ubifs_info *c) free = 0; return free; } + +/** + * ubifs_get_free_space - return amount of free space. + * @c: UBIFS file-system description object + * + * This function calculates and retuns amount of free space to report to + * user-space. + */ +long long ubifs_get_free_space(struct ubifs_info *c) +{ + long long free; + + spin_lock(&c->space_lock); + free = ubifs_get_free_space_nolock(c); + spin_unlock(&c->space_lock); + + return free; +} diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 792c5a16c182..9a41f6f245b7 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -620,9 +620,11 @@ void dbg_dump_budg(struct ubifs_info *c) c->dark_wm, c->dead_wm, c->max_idx_node_sz); printk(KERN_DEBUG "\tgc_lnum %d, ihead_lnum %d\n", c->gc_lnum, c->ihead_lnum); - for (i = 0; i < c->jhead_cnt; i++) - printk(KERN_DEBUG "\tjhead %d\t LEB %d\n", - c->jheads[i].wbuf.jhead, c->jheads[i].wbuf.lnum); + /* If we are in R/O mode, journal heads do not exist */ + if (c->jheads) + for (i = 0; i < c->jhead_cnt; i++) + printk(KERN_DEBUG "\tjhead %d\t LEB %d\n", + c->jheads[i].wbuf.jhead, c->jheads[i].wbuf.lnum); for (rb = rb_first(&c->buds); rb; rb = rb_next(rb)) { bud = rb_entry(rb, struct ubifs_bud, rb); printk(KERN_DEBUG "\tbud LEB %d\n", bud->lnum); @@ -637,10 +639,7 @@ void dbg_dump_budg(struct ubifs_info *c) /* Print budgeting predictions */ available = ubifs_calc_available(c, c->min_idx_lebs); outstanding = c->budg_data_growth + c->budg_dd_growth; - if (available > outstanding) - free = ubifs_reported_space(c, available - outstanding); - else - free = 0; + free = ubifs_get_free_space_nolock(c); printk(KERN_DEBUG "Budgeting predictions:\n"); printk(KERN_DEBUG "\tavailable: %lld, outstanding %lld, free %lld\n", available, outstanding, free); @@ -860,6 +859,65 @@ void dbg_dump_index(struct ubifs_info *c) dbg_walk_index(c, NULL, dump_znode, NULL); } +/** + * dbg_save_space_info - save information about flash space. + * @c: UBIFS file-system description object + * + * This function saves information about UBIFS free space, dirty space, etc, in + * order to check it later. + */ +void dbg_save_space_info(struct ubifs_info *c) +{ + struct ubifs_debug_info *d = c->dbg; + + ubifs_get_lp_stats(c, &d->saved_lst); + + spin_lock(&c->space_lock); + d->saved_free = ubifs_get_free_space_nolock(c); + spin_unlock(&c->space_lock); +} + +/** + * dbg_check_space_info - check flash space information. + * @c: UBIFS file-system description object + * + * This function compares current flash space information with the information + * which was saved when the 'dbg_save_space_info()' function was called. + * Returns zero if the information has not changed, and %-EINVAL it it has + * changed. + */ +int dbg_check_space_info(struct ubifs_info *c) +{ + struct ubifs_debug_info *d = c->dbg; + struct ubifs_lp_stats lst; + long long avail, free; + + spin_lock(&c->space_lock); + avail = ubifs_calc_available(c, c->min_idx_lebs); + spin_unlock(&c->space_lock); + free = ubifs_get_free_space(c); + + if (free != d->saved_free) { + ubifs_err("free space changed from %lld to %lld", + d->saved_free, free); + goto out; + } + + return 0; + +out: + ubifs_msg("saved lprops statistics dump"); + dbg_dump_lstats(&d->saved_lst); + ubifs_get_lp_stats(c, &lst); + ubifs_msg("current lprops statistics dump"); + dbg_dump_lstats(&d->saved_lst); + spin_lock(&c->space_lock); + dbg_dump_budg(c); + spin_unlock(&c->space_lock); + dump_stack(); + return -EINVAL; +} + /** * dbg_check_synced_i_size - check synchronized inode size. * @inode: inode to check @@ -2409,7 +2467,7 @@ void ubifs_debugging_exit(struct ubifs_info *c) * Root directory for UBIFS stuff in debugfs. Contains sub-directories which * contain the stuff specific to particular file-system mounts. */ -static struct dentry *debugfs_rootdir; +static struct dentry *dfs_rootdir; /** * dbg_debugfs_init - initialize debugfs file-system. @@ -2421,9 +2479,9 @@ static struct dentry *debugfs_rootdir; */ int dbg_debugfs_init(void) { - debugfs_rootdir = debugfs_create_dir("ubifs", NULL); - if (IS_ERR(debugfs_rootdir)) { - int err = PTR_ERR(debugfs_rootdir); + dfs_rootdir = debugfs_create_dir("ubifs", NULL); + if (IS_ERR(dfs_rootdir)) { + int err = PTR_ERR(dfs_rootdir); ubifs_err("cannot create \"ubifs\" debugfs directory, " "error %d\n", err); return err; @@ -2437,7 +2495,7 @@ int dbg_debugfs_init(void) */ void dbg_debugfs_exit(void) { - debugfs_remove(debugfs_rootdir); + debugfs_remove(dfs_rootdir); } static int open_debugfs_file(struct inode *inode, struct file *file) @@ -2452,13 +2510,13 @@ static ssize_t write_debugfs_file(struct file *file, const char __user *buf, struct ubifs_info *c = file->private_data; struct ubifs_debug_info *d = c->dbg; - if (file->f_path.dentry == d->dump_lprops) + if (file->f_path.dentry == d->dfs_dump_lprops) dbg_dump_lprops(c); - else if (file->f_path.dentry == d->dump_budg) { + else if (file->f_path.dentry == d->dfs_dump_budg) { spin_lock(&c->space_lock); dbg_dump_budg(c); spin_unlock(&c->space_lock); - } else if (file->f_path.dentry == d->dump_tnc) { + } else if (file->f_path.dentry == d->dfs_dump_tnc) { mutex_lock(&c->tnc_mutex); dbg_dump_tnc(c); mutex_unlock(&c->tnc_mutex); @@ -2469,7 +2527,7 @@ static ssize_t write_debugfs_file(struct file *file, const char __user *buf, return count; } -static const struct file_operations debugfs_fops = { +static const struct file_operations dfs_fops = { .open = open_debugfs_file, .write = write_debugfs_file, .owner = THIS_MODULE, @@ -2494,36 +2552,32 @@ int dbg_debugfs_init_fs(struct ubifs_info *c) struct dentry *dent; struct ubifs_debug_info *d = c->dbg; - sprintf(d->debugfs_dir_name, "ubi%d_%d", c->vi.ubi_num, c->vi.vol_id); - d->debugfs_dir = debugfs_create_dir(d->debugfs_dir_name, - debugfs_rootdir); - if (IS_ERR(d->debugfs_dir)) { - err = PTR_ERR(d->debugfs_dir); + sprintf(d->dfs_dir_name, "ubi%d_%d", c->vi.ubi_num, c->vi.vol_id); + d->dfs_dir = debugfs_create_dir(d->dfs_dir_name, dfs_rootdir); + if (IS_ERR(d->dfs_dir)) { + err = PTR_ERR(d->dfs_dir); ubifs_err("cannot create \"%s\" debugfs directory, error %d\n", - d->debugfs_dir_name, err); + d->dfs_dir_name, err); goto out; } fname = "dump_lprops"; - dent = debugfs_create_file(fname, S_IWUGO, d->debugfs_dir, c, - &debugfs_fops); + dent = debugfs_create_file(fname, S_IWUGO, d->dfs_dir, c, &dfs_fops); if (IS_ERR(dent)) goto out_remove; - d->dump_lprops = dent; + d->dfs_dump_lprops = dent; fname = "dump_budg"; - dent = debugfs_create_file(fname, S_IWUGO, d->debugfs_dir, c, - &debugfs_fops); + dent = debugfs_create_file(fname, S_IWUGO, d->dfs_dir, c, &dfs_fops); if (IS_ERR(dent)) goto out_remove; - d->dump_budg = dent; + d->dfs_dump_budg = dent; fname = "dump_tnc"; - dent = debugfs_create_file(fname, S_IWUGO, d->debugfs_dir, c, - &debugfs_fops); + dent = debugfs_create_file(fname, S_IWUGO, d->dfs_dir, c, &dfs_fops); if (IS_ERR(dent)) goto out_remove; - d->dump_tnc = dent; + d->dfs_dump_tnc = dent; return 0; @@ -2531,7 +2585,7 @@ out_remove: err = PTR_ERR(dent); ubifs_err("cannot create \"%s\" debugfs directory, error %d\n", fname, err); - debugfs_remove_recursive(d->debugfs_dir); + debugfs_remove_recursive(d->dfs_dir); out: return err; } @@ -2542,7 +2596,7 @@ out: */ void dbg_debugfs_exit_fs(struct ubifs_info *c) { - debugfs_remove_recursive(c->dbg->debugfs_dir); + debugfs_remove_recursive(c->dbg->dfs_dir); } #endif /* CONFIG_UBIFS_FS_DEBUG */ diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index 9820d6999f7e..c1cd73b2e06e 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h @@ -41,15 +41,17 @@ * @chk_lpt_wastage: used by LPT tree size checker * @chk_lpt_lebs: used by LPT tree size checker * @new_nhead_offs: used by LPT tree size checker - * @new_ihead_lnum: used by debugging to check ihead_lnum - * @new_ihead_offs: used by debugging to check ihead_offs + * @new_ihead_lnum: used by debugging to check @c->ihead_lnum + * @new_ihead_offs: used by debugging to check @c->ihead_offs * - * debugfs_dir_name: name of debugfs directory containing this file-system's - * files - * debugfs_dir: direntry object of the file-system debugfs directory - * dump_lprops: "dump lprops" debugfs knob - * dump_budg: "dump budgeting information" debugfs knob - * dump_tnc: "dump TNC" debugfs knob + * @saved_lst: saved lprops statistics (used by 'dbg_save_space_info()') + * @saved_free: saved free space (used by 'dbg_save_space_info()') + * + * dfs_dir_name: name of debugfs directory containing this file-system's files + * dfs_dir: direntry object of the file-system debugfs directory + * dfs_dump_lprops: "dump lprops" debugfs knob + * dfs_dump_budg: "dump budgeting information" debugfs knob + * dfs_dump_tnc: "dump TNC" debugfs knob */ struct ubifs_debug_info { void *buf; @@ -69,11 +71,14 @@ struct ubifs_debug_info { int new_ihead_lnum; int new_ihead_offs; - char debugfs_dir_name[100]; - struct dentry *debugfs_dir; - struct dentry *dump_lprops; - struct dentry *dump_budg; - struct dentry *dump_tnc; + struct ubifs_lp_stats saved_lst; + long long saved_free; + + char dfs_dir_name[100]; + struct dentry *dfs_dir; + struct dentry *dfs_dump_lprops; + struct dentry *dfs_dump_budg; + struct dentry *dfs_dump_tnc; }; #define ubifs_assert(expr) do { \ @@ -297,7 +302,8 @@ int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb, dbg_znode_callback znode_cb, void *priv); /* Checking functions */ - +void dbg_save_space_info(struct ubifs_info *c); +int dbg_check_space_info(struct ubifs_info *c); int dbg_check_lprops(struct ubifs_info *c); int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot); int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot); @@ -439,6 +445,8 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c); #define dbg_walk_index(c, leaf_cb, znode_cb, priv) 0 #define dbg_old_index_check_init(c, zroot) 0 +#define dbg_save_space_info(c) ({}) +#define dbg_check_space_info(c) 0 #define dbg_check_old_index(c, zroot) 0 #define dbg_check_cats(c) 0 #define dbg_check_ltab(c) 0 diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 17443d97e6f1..93b6de51f261 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -432,7 +432,6 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, int uninitialized_var(err), appending = !!(pos + len > inode->i_size); struct page *page; - ubifs_assert(ubifs_inode(inode)->ui_size == inode->i_size); if (unlikely(c->ro_media)) diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c index 68328c59762b..4cdd284dea56 100644 --- a/fs/ubifs/lprops.c +++ b/fs/ubifs/lprops.c @@ -635,10 +635,10 @@ const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c, * @c: UBIFS file-system description object * @st: return statistics */ -void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *st) +void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *lst) { spin_lock(&c->space_lock); - memcpy(st, &c->lst, sizeof(struct ubifs_lp_stats)); + memcpy(lst, &c->lst, sizeof(struct ubifs_lp_stats)); spin_unlock(&c->space_lock); } diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 807bbd3c8b4b..5c814a71f33a 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1470,6 +1470,7 @@ static int ubifs_remount_rw(struct ubifs_info *c) int err, lnum; mutex_lock(&c->umount_mutex); + dbg_save_space_info(c); c->remounting_rw = 1; c->always_chk_crc = 1; @@ -1573,8 +1574,9 @@ static int ubifs_remount_rw(struct ubifs_info *c) c->vfs_sb->s_flags &= ~MS_RDONLY; c->remounting_rw = 0; c->always_chk_crc = 0; + err = dbg_check_space_info(c); mutex_unlock(&c->umount_mutex); - return 0; + return err; out: vfree(c->orph_buf); @@ -1629,8 +1631,8 @@ static void commit_on_unmount(struct ubifs_info *c) * ubifs_remount_ro - re-mount in read-only mode. * @c: UBIFS file-system description object * - * We rely on VFS to have stopped writing. Possibly the background thread could - * be running a commit, however kthread_stop will wait in that case. + * We assume VFS has stopped writing. Possibly the background thread could be + * running a commit, however kthread_stop will wait in that case. */ static void ubifs_remount_ro(struct ubifs_info *c) { @@ -1640,13 +1642,14 @@ static void ubifs_remount_ro(struct ubifs_info *c) ubifs_assert(!c->ro_media); commit_on_unmount(c); - mutex_lock(&c->umount_mutex); if (c->bgt) { kthread_stop(c->bgt); c->bgt = NULL; } + dbg_save_space_info(c); + for (i = 0; i < c->jhead_cnt; i++) { ubifs_wbuf_sync(&c->jheads[i].wbuf); del_timer_sync(&c->jheads[i].wbuf.timer); @@ -1669,6 +1672,9 @@ static void ubifs_remount_ro(struct ubifs_info *c) vfree(c->ileb_buf); c->ileb_buf = NULL; ubifs_lpt_free(c, 1); + err = dbg_check_space_info(c); + if (err) + ubifs_ro_mode(c, err); mutex_unlock(&c->umount_mutex); } diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index ee9517a7b024..f1754354029f 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -1495,6 +1495,7 @@ void ubifs_release_ino_dirty(struct ubifs_info *c, struct inode *inode, void ubifs_cancel_ino_op(struct ubifs_info *c, struct inode *inode, struct ubifs_budget_req *req); long long ubifs_get_free_space(struct ubifs_info *c); +long long ubifs_get_free_space_nolock(struct ubifs_info *c); int ubifs_calc_min_idx_lebs(struct ubifs_info *c); void ubifs_convert_page_budget(struct ubifs_info *c); long long ubifs_reported_space(const struct ubifs_info *c, long long free); @@ -1646,7 +1647,7 @@ const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c, const struct ubifs_lprops *lp, int free, int dirty, int flags, int idx_gc_cnt); -void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *stats); +void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *lst); void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops, int cat); void ubifs_replace_cat(struct ubifs_info *c, struct ubifs_lprops *old_lprops, -- cgit v1.2.3-55-g7522 From b4978e949104844224ecf786170c9263efa601f3 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 23 Jan 2009 18:23:03 +0200 Subject: UBIFS: always clean up GC LEB space When we mount UBIFS, GC LEB may contain out-of-date information, and UBIFS should update lprops and set free space for thei LEB. Currently UBIFS does this only if mounted R/W. But for R/O mount we have to do the same, because otherwise we will have incorrect FS free space reported to user-space. Signed-off-by: Artem Bityutskiy --- fs/ubifs/super.c | 47 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 5c814a71f33a..336073e4c391 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -397,6 +397,7 @@ static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_namelen = UBIFS_MAX_NLEN; buf->f_fsid.val[0] = le32_to_cpu(uuid[0]) ^ le32_to_cpu(uuid[2]); buf->f_fsid.val[1] = le32_to_cpu(uuid[1]) ^ le32_to_cpu(uuid[3]); + ubifs_assert(buf->f_bfree <= c->block_cnt); return 0; } @@ -735,12 +736,12 @@ static void init_constants_master(struct ubifs_info *c) * take_gc_lnum - reserve GC LEB. * @c: UBIFS file-system description object * - * This function ensures that the LEB reserved for garbage collection is - * unmapped and is marked as "taken" in lprops. We also have to set free space - * to LEB size and dirty space to zero, because lprops may contain out-of-date - * information if the file-system was un-mounted before it has been committed. - * This function returns zero in case of success and a negative error code in - * case of failure. + * This function ensures that the LEB reserved for garbage collection is marked + * as "taken" in lprops. We also have to set free space to LEB size and dirty + * space to zero, because lprops may contain out-of-date information if the + * file-system was un-mounted before it has been committed. This function + * returns zero in case of success and a negative error code in case of + * failure. */ static int take_gc_lnum(struct ubifs_info *c) { @@ -751,10 +752,6 @@ static int take_gc_lnum(struct ubifs_info *c) return -EINVAL; } - err = ubifs_leb_unmap(c, c->gc_lnum); - if (err) - return err; - /* And we have to tell lprops that this LEB is taken */ err = ubifs_change_one_lp(c, c->gc_lnum, c->leb_size, 0, LPROPS_TAKEN, 0, 0); @@ -1280,10 +1277,19 @@ static int mount_ubifs(struct ubifs_info *c) if (err) goto out_orphans; err = ubifs_rcvry_gc_commit(c); - } else + } else { err = take_gc_lnum(c); - if (err) - goto out_orphans; + if (err) + goto out_orphans; + + /* + * GC LEB may contain garbage if there was an unclean + * reboot, and it should be un-mapped. + */ + err = ubifs_leb_unmap(c, c->gc_lnum); + if (err) + return err; + } err = dbg_check_lprops(c); if (err) @@ -1292,6 +1298,16 @@ static int mount_ubifs(struct ubifs_info *c) err = ubifs_recover_size(c); if (err) goto out_orphans; + } else { + /* + * Even if we mount read-only, we have to set space in GC LEB + * to proper value because this affects UBIFS free space + * reporting. We do not want to have a situation when + * re-mounting from R/O to R/W changes amount of free space. + */ + err = take_gc_lnum(c); + if (err) + goto out_orphans; } spin_lock(&ubifs_infos_lock); @@ -1316,6 +1332,8 @@ static int mount_ubifs(struct ubifs_info *c) goto out_infos; c->always_chk_crc = 0; + /* GC LEB has to be empty and taken at this point */ + ubifs_assert(c->lst.taken_empty_lebs == 1); ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"", c->vi.ubi_num, c->vi.vol_id, c->vi.name); @@ -1561,7 +1579,7 @@ static int ubifs_remount_rw(struct ubifs_info *c) if (c->need_recovery) err = ubifs_rcvry_gc_commit(c); else - err = take_gc_lnum(c); + err = ubifs_leb_unmap(c, c->gc_lnum); if (err) goto out; @@ -1786,6 +1804,7 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) c->bu.buf = NULL; } + ubifs_assert(c->lst.taken_empty_lebs == 1); return 0; } -- cgit v1.2.3-55-g7522 From 49d128aa60751a010640f4763d11577e2f508853 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 26 Jan 2009 10:55:40 +0200 Subject: UBIFS: ensure orphan area head is initialized When mounting read-only the orphan area head is not initialized. It must be initialized when remounting read/write, but it was not. This patch fixes that. [Artem: sorry, added comment tweaking noise] Signed-off-by: Adrian Hunter Signed-off-by: Artem Bityutskiy --- fs/ubifs/orphan.c | 38 +++++++++++++++++++------------------- fs/ubifs/super.c | 6 ++++++ fs/ubifs/ubifs.h | 1 + 3 files changed, 26 insertions(+), 19 deletions(-) (limited to 'fs') diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c index 9e6f403f170e..152a7b34a141 100644 --- a/fs/ubifs/orphan.c +++ b/fs/ubifs/orphan.c @@ -46,7 +46,7 @@ * Orphans are accumulated in a rb-tree. When an inode's link count drops to * zero, the inode number is added to the rb-tree. It is removed from the tree * when the inode is deleted. Any new orphans that are in the orphan tree when - * the commit is run, are written to the orphan area in 1 or more orph nodes. + * the commit is run, are written to the orphan area in 1 or more orphan nodes. * If the orphan area is full, it is consolidated to make space. There is * always enough space because validation prevents the user from creating more * than the maximum number of orphans allowed. @@ -231,7 +231,7 @@ static int tot_avail_orphs(struct ubifs_info *c) } /** - * do_write_orph_node - write a node + * do_write_orph_node - write a node to the orphan head. * @c: UBIFS file-system description object * @len: length of node * @atomic: write atomically @@ -264,11 +264,11 @@ static int do_write_orph_node(struct ubifs_info *c, int len, int atomic) } /** - * write_orph_node - write an orph node + * write_orph_node - write an orphan node. * @c: UBIFS file-system description object * @atomic: write atomically * - * This function builds an orph node from the cnext list and writes it to the + * This function builds an orphan node from the cnext list and writes it to the * orphan head. On success, %0 is returned, otherwise a negative error code * is returned. */ @@ -326,11 +326,11 @@ static int write_orph_node(struct ubifs_info *c, int atomic) } /** - * write_orph_nodes - write orph nodes until there are no more to commit + * write_orph_nodes - write orphan nodes until there are no more to commit. * @c: UBIFS file-system description object * @atomic: write atomically * - * This function writes orph nodes for all the orphans to commit. On success, + * This function writes orphan nodes for all the orphans to commit. On success, * %0 is returned, otherwise a negative error code is returned. */ static int write_orph_nodes(struct ubifs_info *c, int atomic) @@ -478,14 +478,14 @@ int ubifs_orphan_end_commit(struct ubifs_info *c) } /** - * clear_orphans - erase all LEBs used for orphans. + * ubifs_clear_orphans - erase all LEBs used for orphans. * @c: UBIFS file-system description object * * If recovery is not required, then the orphans from the previous session * are not needed. This function locates the LEBs used to record * orphans, and un-maps them. */ -static int clear_orphans(struct ubifs_info *c) +int ubifs_clear_orphans(struct ubifs_info *c) { int lnum, err; @@ -547,9 +547,9 @@ static int insert_dead_orphan(struct ubifs_info *c, ino_t inum) * do_kill_orphans - remove orphan inodes from the index. * @c: UBIFS file-system description object * @sleb: scanned LEB - * @last_cmt_no: cmt_no of last orph node read is passed and returned here + * @last_cmt_no: cmt_no of last orphan node read is passed and returned here * @outofdate: whether the LEB is out of date is returned here - * @last_flagged: whether the end orph node is encountered + * @last_flagged: whether the end orphan node is encountered * * This function is a helper to the 'kill_orphans()' function. It goes through * every orphan node in a LEB and for every inode number recorded, removes @@ -580,8 +580,8 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb, /* * The commit number on the master node may be less, because * of a failed commit. If there are several failed commits in a - * row, the commit number written on orph nodes will continue to - * increase (because the commit number is adjusted here) even + * row, the commit number written on orphan nodes will continue + * to increase (because the commit number is adjusted here) even * though the commit number on the master node stays the same * because the master node has not been re-written. */ @@ -589,9 +589,9 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb, c->cmt_no = cmt_no; if (cmt_no < *last_cmt_no && *last_flagged) { /* - * The last orph node had a higher commit number and was - * flagged as the last written for that commit number. - * That makes this orph node, out of date. + * The last orphan node had a higher commit number and + * was flagged as the last written for that commit + * number. That makes this orphan node, out of date. */ if (!first) { ubifs_err("out of order commit number %llu in " @@ -658,10 +658,10 @@ static int kill_orphans(struct ubifs_info *c) /* * Orph nodes always start at c->orph_first and are written to each * successive LEB in turn. Generally unused LEBs will have been unmapped - * but may contain out of date orph nodes if the unmap didn't go - * through. In addition, the last orph node written for each commit is + * but may contain out of date orphan nodes if the unmap didn't go + * through. In addition, the last orphan node written for each commit is * marked (top bit of orph->cmt_no is set to 1). It is possible that - * there are orph nodes from the next commit (i.e. the commit did not + * there are orphan nodes from the next commit (i.e. the commit did not * complete successfully). In that case, no orphans will have been lost * due to the way that orphans are written, and any orphans added will * be valid orphans anyway and so can be deleted. @@ -718,7 +718,7 @@ int ubifs_mount_orphans(struct ubifs_info *c, int unclean, int read_only) if (unclean) err = kill_orphans(c); else if (!read_only) - err = clear_orphans(c); + err = ubifs_clear_orphans(c); return err; } diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 336073e4c391..fd7fc7f3b7a6 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1524,6 +1524,12 @@ static int ubifs_remount_rw(struct ubifs_info *c) err = ubifs_recover_inl_heads(c, c->sbuf); if (err) goto out; + } else { + /* A readonly mount is not allowed to have orphans */ + ubifs_assert(c->tot_orphans == 0); + err = ubifs_clear_orphans(c); + if (err) + goto out; } if (!(c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY))) { diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index f1754354029f..9999ff0aaa43 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -1604,6 +1604,7 @@ void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum); int ubifs_orphan_start_commit(struct ubifs_info *c); int ubifs_orphan_end_commit(struct ubifs_info *c); int ubifs_mount_orphans(struct ubifs_info *c, int unclean, int read_only); +int ubifs_clear_orphans(struct ubifs_info *c); /* lpt.c */ int ubifs_calc_lpt_geom(struct ubifs_info *c); -- cgit v1.2.3-55-g7522 From 6ba87c9b920bea8c2703308d31eb7de925242c30 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 26 Jan 2009 16:12:20 +0200 Subject: UBIFS: fix assertions I introduce wrong assertions in one of the previous commits, this patch fixes them. Also, initialize debugfs after the debugging check. This is a little nicer because we want the FS data to be accessible to external users after everything has been initialized. Signed-off-by: Artem Bityutskiy --- fs/ubifs/super.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index fd7fc7f3b7a6..dbfc88714716 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1320,20 +1320,21 @@ static int mount_ubifs(struct ubifs_info *c) else { c->need_recovery = 0; ubifs_msg("recovery completed"); + /* GC LEB has to be empty and taken at this point */ + ubifs_assert(c->lst.taken_empty_lebs == 1); } - } + } else + ubifs_assert(c->lst.taken_empty_lebs == 1); - err = dbg_debugfs_init_fs(c); + err = dbg_check_filesystem(c); if (err) goto out_infos; - err = dbg_check_filesystem(c); + err = dbg_debugfs_init_fs(c); if (err) goto out_infos; c->always_chk_crc = 0; - /* GC LEB has to be empty and taken at this point */ - ubifs_assert(c->lst.taken_empty_lebs == 1); ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"", c->vi.ubi_num, c->vi.vol_id, c->vi.name); @@ -1663,7 +1664,7 @@ static void ubifs_remount_ro(struct ubifs_info *c) int i, err; ubifs_assert(!c->need_recovery); - ubifs_assert(!c->ro_media); + ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY)); commit_on_unmount(c); mutex_lock(&c->umount_mutex); -- cgit v1.2.3-55-g7522 From 6f7ab6d458bbfc2f55d295fa3e6b9e69cdb1d517 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 27 Jan 2009 16:12:31 +0200 Subject: UBIFS: fix no_chk_data_crc When data CRC checking is disabled, UBIFS returns incorrect return code from the 'try_read_node()' function (0 instead of 1, which means CRC error), which make the caller re-read the data node again, but using a different code patch, so the second read is fine. Thus, we read the same node twice. And the result of this is that UBIFS is slower with no_chk_data_crc option than it is with chk_data_crc option. This patches fixes the problem. Reported-by: Reuben Dowle Signed-off-by: Artem Bityutskiy --- fs/ubifs/io.c | 22 ++++++++++++++-------- fs/ubifs/tnc.c | 12 ++++++++---- fs/ubifs/ubifs.h | 2 +- 3 files changed, 23 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c index 01682713af69..e8e632a1dcdf 100644 --- a/fs/ubifs/io.c +++ b/fs/ubifs/io.c @@ -29,7 +29,7 @@ * would have been wasted for padding to the nearest minimal I/O unit boundary. * Instead, data first goes to the write-buffer and is flushed when the * buffer is full or when it is not used for some time (by timer). This is - * similarto the mechanism is used by JFFS2. + * similar to the mechanism is used by JFFS2. * * Write-buffers are defined by 'struct ubifs_wbuf' objects and protected by * mutexes defined inside these objects. Since sometimes upper-level code @@ -75,7 +75,7 @@ void ubifs_ro_mode(struct ubifs_info *c, int err) * @lnum: logical eraseblock number * @offs: offset within the logical eraseblock * @quiet: print no messages - * @chk_crc: indicates whether to always check the CRC + * @must_chk_crc: indicates whether to always check the CRC * * This function checks node magic number and CRC checksum. This function also * validates node length to prevent UBIFS from becoming crazy when an attacker @@ -83,11 +83,17 @@ void ubifs_ro_mode(struct ubifs_info *c, int err) * node length in the common header could cause UBIFS to read memory outside of * allocated buffer when checking the CRC checksum. * - * This function returns zero in case of success %-EUCLEAN in case of bad CRC - * or magic. + * This function may skip data nodes CRC checking if @c->no_chk_data_crc is + * true, which is controlled by corresponding UBIFS mount option. However, if + * @must_chk_crc is true, then @c->no_chk_data_crc is ignored and CRC is + * checked. Similarly, if @c->always_chk_crc is true, @c->no_chk_data_crc is + * ignored and CRC is checked. + * + * This function returns zero in case of success and %-EUCLEAN in case of bad + * CRC or magic. */ int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, - int offs, int quiet, int chk_crc) + int offs, int quiet, int must_chk_crc) { int err = -EINVAL, type, node_len; uint32_t crc, node_crc, magic; @@ -123,9 +129,9 @@ int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, node_len > c->ranges[type].max_len) goto out_len; - if (!chk_crc && type == UBIFS_DATA_NODE && !c->always_chk_crc) - if (c->no_chk_data_crc) - return 0; + if (!must_chk_crc && type == UBIFS_DATA_NODE && !c->always_chk_crc && + c->no_chk_data_crc) + return 0; crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); node_crc = le32_to_cpu(ch->crc); diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index f7e36f545527..fa28a84c6a1b 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c @@ -443,6 +443,11 @@ static int tnc_read_node_nm(struct ubifs_info *c, struct ubifs_zbranch *zbr, * This function performs that same function as ubifs_read_node except that * it does not require that there is actually a node present and instead * the return code indicates if a node was read. + * + * Note, this function does not check CRC of data nodes if @c->no_chk_data_crc + * is true (it is controlled by corresponding mount option). However, if + * @c->always_chk_crc is true, @c->no_chk_data_crc is ignored and CRC is always + * checked. */ static int try_read_node(const struct ubifs_info *c, void *buf, int type, int len, int lnum, int offs) @@ -470,9 +475,8 @@ static int try_read_node(const struct ubifs_info *c, void *buf, int type, if (node_len != len) return 0; - if (type == UBIFS_DATA_NODE && !c->always_chk_crc) - if (c->no_chk_data_crc) - return 0; + if (type == UBIFS_DATA_NODE && !c->always_chk_crc && c->no_chk_data_crc) + return 1; crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); node_crc = le32_to_cpu(ch->crc); @@ -1506,7 +1510,7 @@ out: * * Note, if the bulk-read buffer length (@bu->buf_len) is known, this function * makes sure bulk-read nodes fit the buffer. Otherwise, this function prepares - * maxumum possible amount of nodes for bulk-read. + * maximum possible amount of nodes for bulk-read. */ int ubifs_tnc_get_bu_keys(struct ubifs_info *c, struct bu_info *bu) { diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 9999ff0aaa43..29dfa816077b 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -1428,7 +1428,7 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len, int ubifs_write_node(struct ubifs_info *c, void *node, int len, int lnum, int offs, int dtype); int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, - int offs, int quiet, int chk_crc); + int offs, int quiet, int must_chk_crc); void ubifs_prepare_node(struct ubifs_info *c, void *buf, int len, int pad); void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last); int ubifs_io_init(struct ubifs_info *c); -- cgit v1.2.3-55-g7522 From f0e0059b9c18426cffdcc04161062251a8f9741e Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Sun, 25 Jan 2009 20:53:00 -0600 Subject: don't reallocate sxp variable passed into xfs_swapext fixes kernel.org bugzilla 12538, xfs_fsr fails on 2.6.29-rc kernels Regression caused by 743bb4650da9e2595d6cedd01c680b5b9398c74a This was an embarrasing mistake, reallocating the sxp pointer passed in from the main ioctl switch. Signed-off-by: Eric Sandeen Tested-by: Paul Martin Reviewed-by: Felix Blyakher Signed-off-by: Felix Blyakher --- fs/xfs/xfs_dfrag.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index b4c1ee713492..f8278cfcc1d3 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c @@ -55,17 +55,11 @@ xfs_swapext( struct file *file, *target_file; int error = 0; - sxp = kmem_alloc(sizeof(xfs_swapext_t), KM_MAYFAIL); - if (!sxp) { - error = XFS_ERROR(ENOMEM); - goto out; - } - /* Pull information for the target fd */ file = fget((int)sxp->sx_fdtarget); if (!file) { error = XFS_ERROR(EINVAL); - goto out_free_sxp; + goto out; } if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) { @@ -109,8 +103,6 @@ xfs_swapext( fput(target_file); out_put_file: fput(file); - out_free_sxp: - kmem_free(sxp); out: return error; } -- cgit v1.2.3-55-g7522 From 4a29d2005b0f28d018d36d209c47f3973a725df5 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 27 Jan 2009 15:22:54 +0200 Subject: UBIFS: fix LPT out-of-space bug (again) The function to traverse and dirty the LPT was still not dirtying all nodes, with the result that the LPT could run out of space. Signed-off-by: Adrian Hunter Signed-off-by: Artem Bityutskiy --- fs/ubifs/lpt_commit.c | 44 ++++++++++++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c index 96ca95707175..3216a1f277f8 100644 --- a/fs/ubifs/lpt_commit.c +++ b/fs/ubifs/lpt_commit.c @@ -556,23 +556,23 @@ no_space: } /** - * next_pnode - find next pnode. + * next_pnode_to_dirty - find next pnode to dirty. * @c: UBIFS file-system description object * @pnode: pnode * - * This function returns the next pnode or %NULL if there are no more pnodes. + * This function returns the next pnode to dirty or %NULL if there are no more + * pnodes. Note that pnodes that have never been written (lnum == 0) are + * skipped. */ -static struct ubifs_pnode *next_pnode(struct ubifs_info *c, - struct ubifs_pnode *pnode) +static struct ubifs_pnode *next_pnode_to_dirty(struct ubifs_info *c, + struct ubifs_pnode *pnode) { struct ubifs_nnode *nnode; int iip; /* Try to go right */ nnode = pnode->parent; - iip = pnode->iip + 1; - if (iip < UBIFS_LPT_FANOUT) { - /* We assume here that LEB zero is never an LPT LEB */ + for (iip = pnode->iip + 1; iip < UBIFS_LPT_FANOUT; iip++) { if (nnode->nbranch[iip].lnum) return ubifs_get_pnode(c, nnode, iip); } @@ -583,8 +583,11 @@ static struct ubifs_pnode *next_pnode(struct ubifs_info *c, nnode = nnode->parent; if (!nnode) return NULL; - /* We assume here that LEB zero is never an LPT LEB */ - } while (iip >= UBIFS_LPT_FANOUT || !nnode->nbranch[iip].lnum); + for (; iip < UBIFS_LPT_FANOUT; iip++) { + if (nnode->nbranch[iip].lnum) + break; + } + } while (iip >= UBIFS_LPT_FANOUT); /* Go right */ nnode = ubifs_get_nnode(c, nnode, iip); @@ -593,12 +596,29 @@ static struct ubifs_pnode *next_pnode(struct ubifs_info *c, /* Go down to level 1 */ while (nnode->level > 1) { - nnode = ubifs_get_nnode(c, nnode, 0); + for (iip = 0; iip < UBIFS_LPT_FANOUT; iip++) { + if (nnode->nbranch[iip].lnum) + break; + } + if (iip >= UBIFS_LPT_FANOUT) { + /* + * Should not happen, but we need to keep going + * if it does. + */ + iip = 0; + } + nnode = ubifs_get_nnode(c, nnode, iip); if (IS_ERR(nnode)) return (void *)nnode; } - return ubifs_get_pnode(c, nnode, 0); + for (iip = 0; iip < UBIFS_LPT_FANOUT; iip++) + if (nnode->nbranch[iip].lnum) + break; + if (iip >= UBIFS_LPT_FANOUT) + /* Should not happen, but we need to keep going if it does */ + iip = 0; + return ubifs_get_pnode(c, nnode, iip); } /** @@ -688,7 +708,7 @@ static int make_tree_dirty(struct ubifs_info *c) pnode = pnode_lookup(c, 0); while (pnode) { do_make_pnode_dirty(c, pnode); - pnode = next_pnode(c, pnode); + pnode = next_pnode_to_dirty(c, pnode); if (IS_ERR(pnode)) return PTR_ERR(pnode); } -- cgit v1.2.3-55-g7522 From 3eb14297c4b85af0c5e6605e18d93b6031330d71 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 29 Jan 2009 11:17:24 +0200 Subject: UBIFS: sync wbufs after syncing inodes and pages All writes go through wbufs so they must be sync'd after syncing inodes and pages. Signed-off-by: Adrian Hunter Signed-off-by: Artem Bityutskiy --- fs/ubifs/super.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index dbfc88714716..3ddd754262b4 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -450,16 +450,6 @@ static int ubifs_sync_fs(struct super_block *sb, int wait) if (sb->s_flags & MS_RDONLY) return 0; - /* - * Synchronize write buffers, because 'ubifs_run_commit()' does not - * do this if it waits for an already running commit. - */ - for (i = 0; i < c->jhead_cnt; i++) { - err = ubifs_wbuf_sync(&c->jheads[i].wbuf); - if (err) - return err; - } - /* * VFS calls '->sync_fs()' before synchronizing all dirty inodes and * pages, so synchronize them first, then commit the journal. Strictly @@ -471,6 +461,16 @@ static int ubifs_sync_fs(struct super_block *sb, int wait) */ generic_sync_sb_inodes(sb, &wbc); + /* + * Synchronize write buffers, because 'ubifs_run_commit()' does not + * do this if it waits for an already running commit. + */ + for (i = 0; i < c->jhead_cnt; i++) { + err = ubifs_wbuf_sync(&c->jheads[i].wbuf); + if (err) + return err; + } + err = ubifs_run_commit(c); if (err) return err; -- cgit v1.2.3-55-g7522 From 227c75c91dbfa037d109ab7ef45b7f5ba9cab6d0 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 29 Jan 2009 11:53:51 +0200 Subject: UBIFS: spelling fix 'date' -> 'data' Signed-off-by: Adrian Hunter Signed-off-by: Artem Bityutskiy --- fs/ubifs/debug.c | 2 +- fs/ubifs/gc.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 9a41f6f245b7..e975bd82f38b 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -1407,7 +1407,7 @@ int dbg_check_tnc(struct ubifs_info *c, int extra) * @c: UBIFS file-system description object * @leaf_cb: called for each leaf node * @znode_cb: called for each indexing node - * @priv: private date which is passed to callbacks + * @priv: private data which is passed to callbacks * * This function walks the UBIFS index and calls the @leaf_cb for each leaf * node and @znode_cb for each indexing node. Returns zero in case of success diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c index 9760154d874b..bad3339a800d 100644 --- a/fs/ubifs/gc.c +++ b/fs/ubifs/gc.c @@ -401,7 +401,7 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp) /* * Don't release the LEB until after the next commit, because - * it may contain date which is needed for recovery. So + * it may contain data which is needed for recovery. So * although we freed this LEB, it will become usable only after * the commit. */ -- cgit v1.2.3-55-g7522 From b466f17d780c5b72427f36aef22ecdec9f1d0689 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 29 Jan 2009 12:59:33 +0200 Subject: UBIFS: remount ro fixes - preserve the idx_gc list - it will be needed in the same state, should UBIFS be remounted rw again - prevent remounting ro if we have switched to read only mode (due to a fatal error) Signed-off-by: Adrian Hunter Signed-off-by: Artem Bityutskiy --- fs/ubifs/gc.c | 18 +++++------------- fs/ubifs/super.c | 14 +++++++------- fs/ubifs/ubifs.h | 2 +- 3 files changed, 13 insertions(+), 21 deletions(-) (limited to 'fs') diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c index bad3339a800d..a711d33b3d3e 100644 --- a/fs/ubifs/gc.c +++ b/fs/ubifs/gc.c @@ -830,29 +830,21 @@ out: * ubifs_destroy_idx_gc - destroy idx_gc list. * @c: UBIFS file-system description object * - * This function destroys the @c->idx_gc list. It is called when unmounting or - * remounting read-only so locks are not needed. Returns zero in case of - * success and a negative error code in case of failure. + * This function destroys the @c->idx_gc list. It is called when unmounting + * so locks are not needed. Returns zero in case of success and a negative + * error code in case of failure. */ -int ubifs_destroy_idx_gc(struct ubifs_info *c) +void ubifs_destroy_idx_gc(struct ubifs_info *c) { - int ret = 0; - while (!list_empty(&c->idx_gc)) { - int err; struct ubifs_gced_idx_leb *idx_gc; idx_gc = list_entry(c->idx_gc.next, struct ubifs_gced_idx_leb, list); - err = ubifs_change_one_lp(c, idx_gc->lnum, LPROPS_NC, - LPROPS_NC, 0, LPROPS_TAKEN, -1); - if (err && !ret) - ret = err; + c->idx_gc_cnt -= 1; list_del(&idx_gc->list); kfree(idx_gc); } - - return ret; } /** diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 3ddd754262b4..daa679d3a03e 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1687,10 +1687,6 @@ static void ubifs_remount_ro(struct ubifs_info *c) if (err) ubifs_ro_mode(c, err); - err = ubifs_destroy_idx_gc(c); - if (err) - ubifs_ro_mode(c, err); - free_wbufs(c); vfree(c->orph_buf); c->orph_buf = NULL; @@ -1793,15 +1789,19 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) { if (c->ro_media) { - ubifs_msg("cannot re-mount R/W, UBIFS is working in " - "R/O mode"); + ubifs_msg("cannot re-mount due to prior errors"); return -EINVAL; } err = ubifs_remount_rw(c); if (err) return err; - } else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) + } else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) { + if (c->ro_media) { + ubifs_msg("cannot re-mount due to prior errors"); + return -EINVAL; + } ubifs_remount_ro(c); + } if (c->bulk_read == 1) bu_init(c); diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 29dfa816077b..535f87426791 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -1594,7 +1594,7 @@ int ubifs_replay_journal(struct ubifs_info *c); int ubifs_garbage_collect(struct ubifs_info *c, int anyway); int ubifs_gc_start_commit(struct ubifs_info *c); int ubifs_gc_end_commit(struct ubifs_info *c); -int ubifs_destroy_idx_gc(struct ubifs_info *c); +void ubifs_destroy_idx_gc(struct ubifs_info *c); int ubifs_get_idx_gc_leb(struct ubifs_info *c); int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp); -- cgit v1.2.3-55-g7522 From a2b9df3ff691db8e5e521dccd231a8098bbf7416 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Thu, 29 Jan 2009 16:22:54 +0200 Subject: UBIFS: return sensible error codes When mounting/re-mounting, UBIFS returns EINVAL even if the ENOSPC or EROFS codes are are much better, just because we have not found references to ENOSPC/EROFS in mount (2) man pages. This patch changes this behaviour and makes UBIFS return real error code, because: 1. It is just less confusing and more logical 2. mount is not described in SuSv3, so it seems to be not really well-standartized 3. we do not cover all cases, and any random undocumented in man pages error code may be returned anyway Signed-off-by: Artem Bityutskiy --- fs/ubifs/master.c | 2 +- fs/ubifs/super.c | 11 +++-------- 2 files changed, 4 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c index 71d5493bf565..a88f33801b98 100644 --- a/fs/ubifs/master.c +++ b/fs/ubifs/master.c @@ -354,7 +354,7 @@ int ubifs_write_master(struct ubifs_info *c) int err, lnum, offs, len; if (c->ro_media) - return -EINVAL; + return -EROFS; lnum = UBIFS_MST_LNUM; offs = c->mst_offs + c->mst_node_alsz; diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index daa679d3a03e..ab85eb8cce79 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1085,12 +1085,7 @@ static int check_free_space(struct ubifs_info *c) ubifs_err("insufficient free space to mount in read/write mode"); dbg_dump_budg(c); dbg_dump_lprops(c); - /* - * We return %-EINVAL instead of %-ENOSPC because it seems to - * be the closest error code mentioned in the mount function - * documentation. - */ - return -EINVAL; + return -ENOSPC; } return 0; } @@ -1790,7 +1785,7 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) { if (c->ro_media) { ubifs_msg("cannot re-mount due to prior errors"); - return -EINVAL; + return -EROFS; } err = ubifs_remount_rw(c); if (err) @@ -1798,7 +1793,7 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) } else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) { if (c->ro_media) { ubifs_msg("cannot re-mount due to prior errors"); - return -EINVAL; + return -EROFS; } ubifs_remount_ro(c); } -- cgit v1.2.3-55-g7522 From 27ad27993313312a4ad0047d0a944c425cd511a5 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Thu, 29 Jan 2009 16:34:30 +0200 Subject: UBIFS: remove fast unmounting This UBIFS feature has never worked properly, and it was a mistake to add it because we simply have no use-cases. So, lets still accept the fast_unmount mount option, but ignore it. This does not change much, because UBIFS commit in sync_fs anyway, and sync_fs is called while unmounting. Signed-off-by: Artem Bityutskiy --- Documentation/filesystems/ubifs.txt | 7 ------ fs/ubifs/super.c | 50 ++++--------------------------------- fs/ubifs/ubifs.h | 2 -- 3 files changed, 5 insertions(+), 54 deletions(-) (limited to 'fs') diff --git a/Documentation/filesystems/ubifs.txt b/Documentation/filesystems/ubifs.txt index 84da2a4ba25a..12fedb7834c6 100644 --- a/Documentation/filesystems/ubifs.txt +++ b/Documentation/filesystems/ubifs.txt @@ -79,13 +79,6 @@ Mount options (*) == default. -norm_unmount (*) commit on unmount; the journal is committed - when the file-system is unmounted so that the - next mount does not have to replay the journal - and it becomes very fast; -fast_unmount do not commit on unmount; this option makes - unmount faster, but the next mount slower - because of the need to replay the journal. bulk_read read more in one go to take advantage of flash media that read faster sequentially no_bulk_read (*) do not bulk-read diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index ab85eb8cce79..1182b66a5491 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -957,13 +957,16 @@ static int ubifs_parse_options(struct ubifs_info *c, char *options, token = match_token(p, tokens, args); switch (token) { + /* + * %Opt_fast_unmount and %Opt_norm_unmount options are ignored. + * We accepte them in order to be backware-compatible. But this + * should be removed at some point. + */ case Opt_fast_unmount: c->mount_opts.unmount_mode = 2; - c->fast_unmount = 1; break; case Opt_norm_unmount: c->mount_opts.unmount_mode = 1; - c->fast_unmount = 0; break; case Opt_bulk_read: c->mount_opts.bulk_read = 2; @@ -1359,7 +1362,6 @@ static int mount_ubifs(struct ubifs_info *c) c->uuid[4], c->uuid[5], c->uuid[6], c->uuid[7], c->uuid[8], c->uuid[9], c->uuid[10], c->uuid[11], c->uuid[12], c->uuid[13], c->uuid[14], c->uuid[15]); - dbg_msg("fast unmount: %d", c->fast_unmount); dbg_msg("big_lpt %d", c->big_lpt); dbg_msg("log LEBs: %d (%d - %d)", c->log_lebs, UBIFS_LOG_LNUM, c->log_last); @@ -1615,38 +1617,6 @@ out: return err; } -/** - * commit_on_unmount - commit the journal when un-mounting. - * @c: UBIFS file-system description object - * - * This function is called during un-mounting and re-mounting, and it commits - * the journal unless the "fast unmount" mode is enabled. - */ -static void commit_on_unmount(struct ubifs_info *c) -{ - long long bud_bytes; - - if (!c->fast_unmount) { - dbg_gen("skip committing - fast unmount enabled"); - return; - } - - /* - * This function is called before the background thread is stopped, so - * we may race with ongoing commit, which means we have to take - * @c->bud_lock to access @c->bud_bytes. - */ - spin_lock(&c->buds_lock); - bud_bytes = c->bud_bytes; - spin_unlock(&c->buds_lock); - - if (bud_bytes) { - dbg_gen("run commit"); - ubifs_run_commit(c); - } else - dbg_gen("journal is empty, do not run commit"); -} - /** * ubifs_remount_ro - re-mount in read-only mode. * @c: UBIFS file-system description object @@ -1661,7 +1631,6 @@ static void ubifs_remount_ro(struct ubifs_info *c) ubifs_assert(!c->need_recovery); ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY)); - commit_on_unmount(c); mutex_lock(&c->umount_mutex); if (c->bgt) { kthread_stop(c->bgt); @@ -2077,15 +2046,6 @@ out_close: static void ubifs_kill_sb(struct super_block *sb) { - struct ubifs_info *c = sb->s_fs_info; - - /* - * We do 'commit_on_unmount()' here instead of 'ubifs_put_super()' - * in order to be outside BKL. - */ - if (sb->s_root && !(sb->s_flags & MS_RDONLY)) - commit_on_unmount(c); - /* The un-mount routine is actually done in put_super() */ generic_shutdown_super(sb); } diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 535f87426791..039a68bee29a 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -961,7 +961,6 @@ struct ubifs_debug_info; * @cs_lock: commit state lock * @cmt_wq: wait queue to sleep on if the log is full and a commit is running * - * @fast_unmount: do not run journal commit before un-mounting * @big_lpt: flag that LPT is too big to write whole during commit * @no_chk_data_crc: do not check CRCs when reading data nodes (except during * recovery) @@ -1202,7 +1201,6 @@ struct ubifs_info { spinlock_t cs_lock; wait_queue_head_t cmt_wq; - unsigned int fast_unmount:1; unsigned int big_lpt:1; unsigned int no_chk_data_crc:1; unsigned int bulk_read:1; -- cgit v1.2.3-55-g7522 From ea455f8ab68338ba69f5d3362b342c115bea8e13 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 12 Jan 2009 23:20:31 +0100 Subject: ocfs2: Push out dropping of dentry lock to ocfs2_wq Dropping of last reference to dentry lock is a complicated operation involving dropping of reference to inode. This can get complicated and quota code in particular needs to obtain some quota locks which leads to potential deadlock. Thus we defer dropping of inode reference to ocfs2_wq. Signed-off-by: Jan Kara Signed-off-by: Mark Fasheh --- fs/ocfs2/dcache.c | 42 +++++++++++++++++++++++++++++++++++++++--- fs/ocfs2/dcache.h | 9 ++++++++- fs/ocfs2/ocfs2.h | 6 ++++++ fs/ocfs2/super.c | 3 +++ 4 files changed, 56 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index b1cc7c381e88..e9d7c2038c0f 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c @@ -38,6 +38,7 @@ #include "dlmglue.h" #include "file.h" #include "inode.h" +#include "super.h" static int ocfs2_dentry_revalidate(struct dentry *dentry, @@ -294,6 +295,34 @@ out_attach: return ret; } +static DEFINE_SPINLOCK(dentry_list_lock); + +/* We limit the number of dentry locks to drop in one go. We have + * this limit so that we don't starve other users of ocfs2_wq. */ +#define DL_INODE_DROP_COUNT 64 + +/* Drop inode references from dentry locks */ +void ocfs2_drop_dl_inodes(struct work_struct *work) +{ + struct ocfs2_super *osb = container_of(work, struct ocfs2_super, + dentry_lock_work); + struct ocfs2_dentry_lock *dl; + int drop_count = DL_INODE_DROP_COUNT; + + spin_lock(&dentry_list_lock); + while (osb->dentry_lock_list && drop_count--) { + dl = osb->dentry_lock_list; + osb->dentry_lock_list = dl->dl_next; + spin_unlock(&dentry_list_lock); + iput(dl->dl_inode); + kfree(dl); + spin_lock(&dentry_list_lock); + } + if (osb->dentry_lock_list) + queue_work(ocfs2_wq, &osb->dentry_lock_work); + spin_unlock(&dentry_list_lock); +} + /* * ocfs2_dentry_iput() and friends. * @@ -318,16 +347,23 @@ out_attach: static void ocfs2_drop_dentry_lock(struct ocfs2_super *osb, struct ocfs2_dentry_lock *dl) { - iput(dl->dl_inode); ocfs2_simple_drop_lockres(osb, &dl->dl_lockres); ocfs2_lock_res_free(&dl->dl_lockres); - kfree(dl); + + /* We leave dropping of inode reference to ocfs2_wq as that can + * possibly lead to inode deletion which gets tricky */ + spin_lock(&dentry_list_lock); + if (!osb->dentry_lock_list) + queue_work(ocfs2_wq, &osb->dentry_lock_work); + dl->dl_next = osb->dentry_lock_list; + osb->dentry_lock_list = dl; + spin_unlock(&dentry_list_lock); } void ocfs2_dentry_lock_put(struct ocfs2_super *osb, struct ocfs2_dentry_lock *dl) { - int unlock = 0; + int unlock; BUG_ON(dl->dl_count == 0); diff --git a/fs/ocfs2/dcache.h b/fs/ocfs2/dcache.h index c091c34d9883..d06e16c06640 100644 --- a/fs/ocfs2/dcache.h +++ b/fs/ocfs2/dcache.h @@ -29,8 +29,13 @@ extern struct dentry_operations ocfs2_dentry_ops; struct ocfs2_dentry_lock { + /* Use count of dentry lock */ unsigned int dl_count; - u64 dl_parent_blkno; + union { + /* Linked list of dentry locks to release */ + struct ocfs2_dentry_lock *dl_next; + u64 dl_parent_blkno; + }; /* * The ocfs2_dentry_lock keeps an inode reference until @@ -47,6 +52,8 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry, struct inode *inode, void ocfs2_dentry_lock_put(struct ocfs2_super *osb, struct ocfs2_dentry_lock *dl); +void ocfs2_drop_dl_inodes(struct work_struct *work); + struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno, int skip_unhashed); diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index ad5c24a29edd..077384135f4e 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -210,6 +210,7 @@ struct ocfs2_journal; struct ocfs2_slot_info; struct ocfs2_recovery_map; struct ocfs2_quota_recovery; +struct ocfs2_dentry_lock; struct ocfs2_super { struct task_struct *commit_task; @@ -325,6 +326,11 @@ struct ocfs2_super struct list_head blocked_lock_list; unsigned long blocked_lock_count; + /* List of dentry locks to release. Anyone can add locks to + * the list, ocfs2_wq processes the list */ + struct ocfs2_dentry_lock *dentry_lock_list; + struct work_struct dentry_lock_work; + wait_queue_head_t osb_mount_event; /* Truncate log info */ diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 43ed11345b59..b1cb38fbe807 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1887,6 +1887,9 @@ static int ocfs2_initialize_super(struct super_block *sb, INIT_WORK(&journal->j_recovery_work, ocfs2_complete_recovery); journal->j_state = OCFS2_JOURNAL_FREE; + INIT_WORK(&osb->dentry_lock_work, ocfs2_drop_dl_inodes); + osb->dentry_lock_list = NULL; + /* get some pseudo constants for clustersize bits */ osb->s_clustersize_bits = le32_to_cpu(di->id2.i_super.s_clustersize_bits); -- cgit v1.2.3-55-g7522 From f8afead7169f0f28a4b421bcbdb510e52a2d094d Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 12 Jan 2009 23:20:32 +0100 Subject: ocfs2: Fix possible deadlock in ocfs2_write_dquot() It could happen that some limit has been set via quotactl() and in parallel ->mark_dirty() is called from another thread doing e.g. dquot_alloc_space(). In such case ocfs2_write_dquot() must not try to sync the dquot because that needs global quota lock but that ranks above transaction start. Signed-off-by: Jan Kara Signed-off-by: Mark Fasheh --- fs/ocfs2/quota_global.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index f4efa89baee5..1ed0f7c86869 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -754,7 +754,9 @@ static int ocfs2_mark_dquot_dirty(struct dquot *dquot) if (dquot->dq_flags & mask) sync = 1; spin_unlock(&dq_data_lock); - if (!sync) { + /* This is a slight hack but we can't afford getting global quota + * lock if we already have a transaction started. */ + if (!sync || journal_current_handle()) { status = ocfs2_write_dquot(dquot); goto out; } -- cgit v1.2.3-55-g7522 From 0e0333429a6280e6eb3c98845e4eed90d5f8078a Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Wed, 17 Dec 2008 14:23:52 -0800 Subject: configfs: Silence lockdep on mkdir(), rmdir() and configfs_depend_item() When attaching default groups (subdirs) of a new group (in mkdir() or in configfs_register()), configfs recursively takes inode's mutexes along the path from the parent of the new group to the default subdirs. This is needed to ensure that the VFS will not race with operations on these sub-dirs. This is safe for the following reasons: - the VFS allows one to lock first an inode and second one of its children (The lock subclasses for this pattern are respectively I_MUTEX_PARENT and I_MUTEX_CHILD); - from this rule any inode path can be recursively locked in descending order as long as it stays under a single mountpoint and does not follow symlinks. Unfortunately lockdep does not know (yet?) how to handle such recursion. I've tried to use Peter Zijlstra's lock_set_subclass() helper to upgrade i_mutexes from I_MUTEX_CHILD to I_MUTEX_PARENT when we know that we might recursively lock some of their descendant, but this usage does not seem to fit the purpose of lock_set_subclass() because it leads to several i_mutex locked with subclass I_MUTEX_PARENT by the same task. >From inside configfs it is not possible to serialize those recursive locking with a top-level one, because mkdir() and rmdir() are already called with inodes locked by the VFS. So using some mutex_lock_nest_lock() is not an option. I am proposing two solutions: 1) one that wraps recursive mutex_lock()s with lockdep_off()/lockdep_on(). 2) (as suggested earlier by Peter Zijlstra) one that puts the i_mutexes recursively locked in different classes based on their depth from the top-level config_group created. This induces an arbitrary limit (MAX_LOCK_DEPTH - 2 == 46) on the nesting of configfs default groups whenever lockdep is activated but this limit looks reasonably high. Unfortunately, this alos isolates VFS operations on configfs default groups from the others and thus lowers the chances to detect locking issues. This patch implements solution 1). Solution 2) looks better from lockdep's point of view, but fails with configfs_depend_item(). This needs to rework the locking scheme of configfs_depend_item() by removing the variable lock recursion depth, and I think that it's doable thanks to the configfs_dirent_lock. For now, let's stick to solution 1). Signed-off-by: Louis Rilling Acked-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/configfs/dir.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) (limited to 'fs') diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 8e93341f3e82..9c2358391147 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -553,12 +553,24 @@ static void detach_groups(struct config_group *group) child = sd->s_dentry; + /* + * Note: we hide this from lockdep since we have no way + * to teach lockdep about recursive + * I_MUTEX_PARENT -> I_MUTEX_CHILD patterns along a path + * in an inode tree, which are valid as soon as + * I_MUTEX_PARENT -> I_MUTEX_CHILD is valid from a + * parent inode to one of its children. + */ + lockdep_off(); mutex_lock(&child->d_inode->i_mutex); + lockdep_on(); configfs_detach_group(sd->s_element); child->d_inode->i_flags |= S_DEAD; + lockdep_off(); mutex_unlock(&child->d_inode->i_mutex); + lockdep_on(); d_delete(child); dput(child); @@ -748,11 +760,22 @@ static int configfs_attach_item(struct config_item *parent_item, * We are going to remove an inode and its dentry but * the VFS may already have hit and used them. Thus, * we must lock them as rmdir() would. + * + * Note: we hide this from lockdep since we have no way + * to teach lockdep about recursive + * I_MUTEX_PARENT -> I_MUTEX_CHILD patterns along a path + * in an inode tree, which are valid as soon as + * I_MUTEX_PARENT -> I_MUTEX_CHILD is valid from a + * parent inode to one of its children. */ + lockdep_off(); mutex_lock(&dentry->d_inode->i_mutex); + lockdep_on(); configfs_remove_dir(item); dentry->d_inode->i_flags |= S_DEAD; + lockdep_off(); mutex_unlock(&dentry->d_inode->i_mutex); + lockdep_on(); d_delete(dentry); } } @@ -787,14 +810,25 @@ static int configfs_attach_group(struct config_item *parent_item, * * We must also lock the inode to remove it safely in case of * error, as rmdir() would. + * + * Note: we hide this from lockdep since we have no way + * to teach lockdep about recursive + * I_MUTEX_PARENT -> I_MUTEX_CHILD patterns along a path + * in an inode tree, which are valid as soon as + * I_MUTEX_PARENT -> I_MUTEX_CHILD is valid from a + * parent inode to one of its children. */ + lockdep_off(); mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD); + lockdep_on(); ret = populate_groups(to_config_group(item)); if (ret) { configfs_detach_item(item); dentry->d_inode->i_flags |= S_DEAD; } + lockdep_off(); mutex_unlock(&dentry->d_inode->i_mutex); + lockdep_on(); if (ret) d_delete(dentry); } @@ -956,7 +990,17 @@ static int configfs_depend_prep(struct dentry *origin, BUG_ON(!origin || !sd); /* Lock this guy on the way down */ + /* + * Note: we hide this from lockdep since we have no way + * to teach lockdep about recursive + * I_MUTEX_PARENT -> I_MUTEX_CHILD patterns along a path + * in an inode tree, which are valid as soon as + * I_MUTEX_PARENT -> I_MUTEX_CHILD is valid from a + * parent inode to one of its children. + */ + lockdep_off(); mutex_lock(&sd->s_dentry->d_inode->i_mutex); + lockdep_on(); if (sd->s_element == target) /* Boo-yah */ goto out; @@ -970,7 +1014,9 @@ static int configfs_depend_prep(struct dentry *origin, } /* We looped all our children and didn't find target */ + lockdep_off(); mutex_unlock(&sd->s_dentry->d_inode->i_mutex); + lockdep_on(); ret = -ENOENT; out: @@ -990,11 +1036,16 @@ static void configfs_depend_rollback(struct dentry *origin, struct dentry *dentry = item->ci_dentry; while (dentry != origin) { + /* See comments in configfs_depend_prep() */ + lockdep_off(); mutex_unlock(&dentry->d_inode->i_mutex); + lockdep_on(); dentry = dentry->d_parent; } + lockdep_off(); mutex_unlock(&origin->d_inode->i_mutex); + lockdep_on(); } int configfs_depend_item(struct configfs_subsystem *subsys, @@ -1329,8 +1380,16 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) } /* Wait until the racing operation terminates */ + /* + * Note: we hide this from lockdep since we are locked + * with subclass I_MUTEX_NORMAL from vfs_rmdir() (why + * not I_MUTEX_CHILD?), and I_MUTEX_XATTR or + * I_MUTEX_QUOTA are not relevant for the locked inode. + */ + lockdep_off(); mutex_lock(wait_mutex); mutex_unlock(wait_mutex); + lockdep_on(); } } while (ret == -EAGAIN); -- cgit v1.2.3-55-g7522 From 554e7f9e043e29da79c044f7a55efe4fad40701e Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Thu, 8 Jan 2009 08:21:43 +0800 Subject: ocfs2: Access the xattr bucket only before modifying it. In ocfs2_xattr_value_truncate, we may call b-tree codes which will extend the journal transaction. It has a potential problem that it may let the already-accessed-but-not-dirtied buffers gone. So we'd better access the bucket after we call ocfs2_xattr_value_truncate. And as for the root buffer for the xattr value, b-tree code will acess and dirty it, so we don't need to worry about it. Signed-off-by: Tao Ma Signed-off-by: Mark Fasheh --- fs/ocfs2/xattr.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index e1d638af6ac3..915039fffe6e 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -4729,13 +4729,6 @@ static int ocfs2_xattr_bucket_value_truncate(struct inode *inode, vb.vb_xv = (struct ocfs2_xattr_value_root *) (vb.vb_bh->b_data + offset % blocksize); - ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket, - OCFS2_JOURNAL_ACCESS_WRITE); - if (ret) { - mlog_errno(ret); - goto out; - } - /* * From here on out we have to dirty the bucket. The generic * value calls only modify one of the bucket's bhs, but we need @@ -4748,12 +4741,18 @@ static int ocfs2_xattr_bucket_value_truncate(struct inode *inode, ret = ocfs2_xattr_value_truncate(inode, &vb, len, ctxt); if (ret) { mlog_errno(ret); - goto out_dirty; + goto out; + } + + ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto out; } xe->xe_value_size = cpu_to_le64(len); -out_dirty: ocfs2_xattr_bucket_journal_dirty(ctxt->handle, bucket); out: -- cgit v1.2.3-55-g7522 From a4b91965d39d5d53b470d6aa62cba155a6f3ffe1 Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Thu, 29 Jan 2009 17:12:31 -0800 Subject: ocfs2: Wakeup the downconvert thread after a successful cancel convert When two nodes holding PR locks on a resource concurrently attempt to upconvert the locks to EX, the master sends a BAST to one of the nodes. This message tells that node to first cancel convert the upconvert request, followed by downconvert to a NL. Only when this lock is downconverted to NL, can the master upconvert the first node's lock to EX. While the fs was doing the cancel convert, it was forgetting to wake up the dc thread after a successful cancel, leading to a deadlock. Reported-and-Tested-by: David Teigland Signed-off-by: Sunil Mushran Signed-off-by: Mark Fasheh --- fs/ocfs2/dlmglue.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index b0c4cadd4c45..206a2370876a 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -2860,6 +2860,10 @@ static void ocfs2_unlock_ast(void *opaque, int error) case OCFS2_UNLOCK_CANCEL_CONVERT: mlog(0, "Cancel convert success for %s\n", lockres->l_name); lockres->l_action = OCFS2_AST_INVALID; + /* Downconvert thread may have requeued this lock, we + * need to wake it. */ + if (lockres->l_flags & OCFS2_LOCK_BLOCKED) + ocfs2_wake_downconvert_thread(ocfs2_get_lockres_osb(lockres)); break; case OCFS2_UNLOCK_DROP_LOCK: lockres->l_level = DLM_LOCK_IV; -- cgit v1.2.3-55-g7522 From fd4ef231962ab44fd1004e87f9d7c6809f00cd64 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Thu, 29 Jan 2009 15:06:21 -0800 Subject: ocfs2: add quota call to ocfs2_remove_btree_range() We weren't reclaiming the clusters which get free'd from this function, so any user punching holes in a file would still have those bytes accounted against him/her. Add the call to vfs_dq_free_space_nodirty() to fix this. Interestingly enough, the journal credits calculation already took this into account. Signed-off-by: Mark Fasheh Acked-by: Jan Kara --- fs/ocfs2/alloc.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index d861096c9d81..60fe74035db5 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -5390,6 +5390,9 @@ int ocfs2_remove_btree_range(struct inode *inode, goto out; } + vfs_dq_free_space_nodirty(inode, + ocfs2_clusters_to_bytes(inode->i_sb, len)); + ret = ocfs2_remove_extent(inode, et, cpos, len, handle, meta_ac, dealloc); if (ret) { -- cgit v1.2.3-55-g7522 From 6139a2360987f55e4490a7813cf69df74ec8b93a Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 22 Jan 2009 15:37:47 +1100 Subject: xfs: Check buffer lengths in log recovery Before trying to obtain, read or write a buffer, check that the buffer length is actually valid. If it is not valid, then something read in the recovery process has been corrupted and we should abort recovery. Reported-by: Eric Sesterhenn Tested-by: Eric Sesterhenn Reviewed-by: Christoph Hellwig Reviewed-by: Felix Blyakher Signed-off-by: Dave Chinner Signed-off-by: Felix Blyakher --- fs/xfs/xfs_log_recover.c | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 35cca98bd94c..b1047de2fffd 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -70,16 +70,21 @@ STATIC void xlog_recover_check_summary(xlog_t *); xfs_buf_t * xlog_get_bp( xlog_t *log, - int num_bblks) + int nbblks) { - ASSERT(num_bblks > 0); + if (nbblks <= 0 || nbblks > log->l_logBBsize) { + xlog_warn("XFS: Invalid block length (0x%x) given for buffer", nbblks); + XFS_ERROR_REPORT("xlog_get_bp(1)", + XFS_ERRLEVEL_HIGH, log->l_mp); + return NULL; + } if (log->l_sectbb_log) { - if (num_bblks > 1) - num_bblks += XLOG_SECTOR_ROUNDUP_BBCOUNT(log, 1); - num_bblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, num_bblks); + if (nbblks > 1) + nbblks += XLOG_SECTOR_ROUNDUP_BBCOUNT(log, 1); + nbblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, nbblks); } - return xfs_buf_get_noaddr(BBTOB(num_bblks), log->l_mp->m_logdev_targp); + return xfs_buf_get_noaddr(BBTOB(nbblks), log->l_mp->m_logdev_targp); } void @@ -102,6 +107,13 @@ xlog_bread( { int error; + if (nbblks <= 0 || nbblks > log->l_logBBsize) { + xlog_warn("XFS: Invalid block length (0x%x) given for buffer", nbblks); + XFS_ERROR_REPORT("xlog_bread(1)", + XFS_ERRLEVEL_HIGH, log->l_mp); + return EFSCORRUPTED; + } + if (log->l_sectbb_log) { blk_no = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, blk_no); nbblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, nbblks); @@ -139,6 +151,13 @@ xlog_bwrite( { int error; + if (nbblks <= 0 || nbblks > log->l_logBBsize) { + xlog_warn("XFS: Invalid block length (0x%x) given for buffer", nbblks); + XFS_ERROR_REPORT("xlog_bwrite(1)", + XFS_ERRLEVEL_HIGH, log->l_mp); + return EFSCORRUPTED; + } + if (log->l_sectbb_log) { blk_no = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, blk_no); nbblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, nbblks); -- cgit v1.2.3-55-g7522 From 43f3f057c56d030546145696627f13f95735be95 Mon Sep 17 00:00:00 2001 From: Felix Blyakher Date: Thu, 22 Jan 2009 21:34:05 -0600 Subject: [XFS] Warn on transaction in flight on read-only remount Till VFS can correctly support read-only remount without racing, use WARN_ON instead of BUG_ON on detecting transaction in flight after quiescing filesystem. Signed-off-by: Felix Blyakher Reviewed-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_sync.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 2ed035354c26..a608e72fa405 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -371,7 +371,11 @@ xfs_quiesce_attr( /* flush inodes and push all remaining buffers out to disk */ xfs_quiesce_fs(mp); - ASSERT_ALWAYS(atomic_read(&mp->m_active_trans) == 0); + /* + * Just warn here till VFS can correctly support + * read-only remount without racing. + */ + WARN_ON(atomic_read(&mp->m_active_trans) != 0); /* Push the superblock and write an unmount record */ error = xfs_log_sbcount(mp, 1); -- cgit v1.2.3-55-g7522