From 89a55897a2fbbceb94480952784004bf23911d38 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 14 Oct 2010 14:52:27 -0400 Subject: Btrfs: fix df regression The new ENOSPC stuff breaks out the raid types which breaks the way we were reporting df to the system. This fixes it back so that Available is the total space available to data and used is the actual bytes used by the filesystem. This means that Available is Total - data used - all of the metadata space. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/super.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index f2393b390318..afab6ca14d03 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -716,18 +716,25 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) struct list_head *head = &root->fs_info->space_info; struct btrfs_space_info *found; u64 total_used = 0; + u64 total_used_data = 0; int bits = dentry->d_sb->s_blocksize_bits; __be32 *fsid = (__be32 *)root->fs_info->fsid; rcu_read_lock(); - list_for_each_entry_rcu(found, head, list) + list_for_each_entry_rcu(found, head, list) { + if (found->flags & (BTRFS_BLOCK_GROUP_METADATA | + BTRFS_BLOCK_GROUP_SYSTEM)) + total_used_data += found->disk_total; + else + total_used_data += found->disk_used; total_used += found->disk_used; + } rcu_read_unlock(); buf->f_namelen = BTRFS_NAME_LEN; buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; buf->f_bfree = buf->f_blocks - (total_used >> bits); - buf->f_bavail = buf->f_bfree; + buf->f_bavail = buf->f_blocks - (total_used_data >> bits); buf->f_bsize = dentry->d_sb->s_blocksize; buf->f_type = BTRFS_SUPER_MAGIC; -- cgit v1.2.3-55-g7522 From 0e78340f3c1fc603e8016c8ac304766bcc65506e Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 22 Oct 2010 15:26:53 -0400 Subject: Btrfs: fix error handling in btrfs_get_sb If we failed to find the root subvol id, or the subvol=, we would deactivate the locked super and close the devices. The problem is at this point we have gotten the SB all setup, which includes setting super_operations, so when we'd deactiveate the super, we'd do a close_ctree() which closes the devices, so we'd end up closing the devices twice. So if you do something like this mount /dev/sda1 /mnt/test1 mount /dev/sda1 /mnt/test2 -o subvol=xxx umount /mnt/test1 it would blow up (if subvol xxx doesn't exist). This patch fixes that problem. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/super.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index afab6ca14d03..d1867cda92a7 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -629,7 +629,7 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags, if (IS_ERR(root)) { error = PTR_ERR(root); deactivate_locked_super(s); - goto error; + goto error_free_subvol_name; } /* if they gave us a subvolume name bind mount into that */ if (strcmp(subvol_name, ".")) { @@ -643,14 +643,14 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags, deactivate_locked_super(s); error = PTR_ERR(new_root); dput(root); - goto error_close_devices; + goto error_free_subvol_name; } if (!new_root->d_inode) { dput(root); dput(new_root); deactivate_locked_super(s); error = -ENXIO; - goto error_close_devices; + goto error_free_subvol_name; } dput(root); root = new_root; @@ -668,7 +668,6 @@ error_close_devices: btrfs_close_devices(fs_devices); error_free_subvol_name: kfree(subvol_name); -error: return error; } -- cgit v1.2.3-55-g7522 From 0af3d00bad38d3bb9912a60928ad0669f17bdb76 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 21 Jun 2010 14:48:16 -0400 Subject: Btrfs: create special free space cache inode In order to save free space cache, we need an inode to hold the data, and we need a special item to point at the right inode for the right block group. So first, create a special item that will point to the right inode, and the number of extent entries we will have and the number of bitmaps we will have. We truncate and pre-allocate space everytime to make sure it's uptodate. This feature will be turned on as soon as you mount with -o space_cache, however it is safe to boot into old kernels, they will just generate the cache the old fashion way. When you boot back into a newer kernel we will notice that we modified and not the cache and automatically discard the cache. Signed-off-by: Josef Bacik --- fs/btrfs/ctree.h | 74 ++++++++++++-- fs/btrfs/disk-io.c | 3 +- fs/btrfs/extent-tree.c | 231 ++++++++++++++++++++++++++++++++++++++++++-- fs/btrfs/free-space-cache.c | 155 +++++++++++++++++++++++++++++ fs/btrfs/free-space-cache.h | 11 +++ fs/btrfs/inode.c | 95 ++++++++++++++---- fs/btrfs/relocation.c | 91 ++++++++++++++++- fs/btrfs/super.c | 7 +- fs/btrfs/transaction.c | 43 ++++++--- fs/btrfs/transaction.h | 4 + 10 files changed, 668 insertions(+), 46 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index eaf286abad17..46f52e1beade 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -99,6 +99,9 @@ struct btrfs_ordered_sum; */ #define BTRFS_EXTENT_CSUM_OBJECTID -10ULL +/* For storing free space cache */ +#define BTRFS_FREE_SPACE_OBJECTID -11ULL + /* dummy objectid represents multiple objectids */ #define BTRFS_MULTIPLE_OBJECTIDS -255ULL @@ -265,6 +268,22 @@ struct btrfs_chunk { /* additional stripes go here */ } __attribute__ ((__packed__)); +#define BTRFS_FREE_SPACE_EXTENT 1 +#define BTRFS_FREE_SPACE_BITMAP 2 + +struct btrfs_free_space_entry { + __le64 offset; + __le64 bytes; + u8 type; +} __attribute__ ((__packed__)); + +struct btrfs_free_space_header { + struct btrfs_disk_key location; + __le64 generation; + __le64 num_entries; + __le64 num_bitmaps; +} __attribute__ ((__packed__)); + static inline unsigned long btrfs_chunk_item_size(int num_stripes) { BUG_ON(num_stripes == 0); @@ -365,8 +384,10 @@ struct btrfs_super_block { char label[BTRFS_LABEL_SIZE]; + __le64 cache_generation; + /* future expansion */ - __le64 reserved[32]; + __le64 reserved[31]; u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE]; } __attribute__ ((__packed__)); @@ -375,12 +396,12 @@ struct btrfs_super_block { * ones specified below then we will fail to mount */ #define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0) -#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (2ULL << 0) +#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1) #define BTRFS_FEATURE_COMPAT_SUPP 0ULL #define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL -#define BTRFS_FEATURE_INCOMPAT_SUPP \ - (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \ +#define BTRFS_FEATURE_INCOMPAT_SUPP \ + (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \ BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL) /* @@ -750,6 +771,14 @@ enum btrfs_caching_type { BTRFS_CACHE_FINISHED = 2, }; +enum btrfs_disk_cache_state { + BTRFS_DC_WRITTEN = 0, + BTRFS_DC_ERROR = 1, + BTRFS_DC_CLEAR = 2, + BTRFS_DC_SETUP = 3, + BTRFS_DC_NEED_WRITE = 4, +}; + struct btrfs_caching_control { struct list_head list; struct mutex mutex; @@ -763,6 +792,7 @@ struct btrfs_block_group_cache { struct btrfs_key key; struct btrfs_block_group_item item; struct btrfs_fs_info *fs_info; + struct inode *inode; spinlock_t lock; u64 pinned; u64 reserved; @@ -773,8 +803,11 @@ struct btrfs_block_group_cache { int extents_thresh; int free_extents; int total_bitmaps; - int ro; - int dirty; + int ro:1; + int dirty:1; + int iref:1; + + int disk_cache_state; /* cache tracking stuff */ int cached; @@ -1192,6 +1225,7 @@ struct btrfs_root { #define BTRFS_MOUNT_NOSSD (1 << 9) #define BTRFS_MOUNT_DISCARD (1 << 10) #define BTRFS_MOUNT_FORCE_COMPRESS (1 << 11) +#define BTRFS_MOUNT_SPACE_CACHE (1 << 12) #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) @@ -1665,6 +1699,27 @@ static inline void btrfs_set_dir_item_key(struct extent_buffer *eb, write_eb_member(eb, item, struct btrfs_dir_item, location, key); } +BTRFS_SETGET_FUNCS(free_space_entries, struct btrfs_free_space_header, + num_entries, 64); +BTRFS_SETGET_FUNCS(free_space_bitmaps, struct btrfs_free_space_header, + num_bitmaps, 64); +BTRFS_SETGET_FUNCS(free_space_generation, struct btrfs_free_space_header, + generation, 64); + +static inline void btrfs_free_space_key(struct extent_buffer *eb, + struct btrfs_free_space_header *h, + struct btrfs_disk_key *key) +{ + read_eb_member(eb, h, struct btrfs_free_space_header, location, key); +} + +static inline void btrfs_set_free_space_key(struct extent_buffer *eb, + struct btrfs_free_space_header *h, + struct btrfs_disk_key *key) +{ + write_eb_member(eb, h, struct btrfs_free_space_header, location, key); +} + /* struct btrfs_disk_key */ BTRFS_SETGET_STACK_FUNCS(disk_key_objectid, struct btrfs_disk_key, objectid, 64); @@ -1876,6 +1931,8 @@ BTRFS_SETGET_STACK_FUNCS(super_incompat_flags, struct btrfs_super_block, incompat_flags, 64); BTRFS_SETGET_STACK_FUNCS(super_csum_type, struct btrfs_super_block, csum_type, 16); +BTRFS_SETGET_STACK_FUNCS(super_cache_generation, struct btrfs_super_block, + cache_generation, 64); static inline int btrfs_super_csum_size(struct btrfs_super_block *s) { @@ -2115,6 +2172,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root, struct btrfs_block_group_cache *cache); int btrfs_set_block_group_rw(struct btrfs_root *root, struct btrfs_block_group_cache *cache); +void btrfs_put_block_group_cache(struct btrfs_fs_info *info); /* ctree.c */ int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, int level, int *slot); @@ -2426,6 +2484,10 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root); int btrfs_prealloc_file_range(struct inode *inode, int mode, u64 start, u64 num_bytes, u64 min_size, loff_t actual_len, u64 *alloc_hint); +int btrfs_prealloc_file_range_trans(struct inode *inode, + struct btrfs_trans_handle *trans, int mode, + u64 start, u64 num_bytes, u64 min_size, + loff_t actual_len, u64 *alloc_hint); extern const struct dentry_operations btrfs_dentry_operations; /* ioctl.c */ diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 64f10082f048..45cf64fc1e3e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1685,7 +1685,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, __setup_root(4096, 4096, 4096, 4096, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); - bh = btrfs_read_dev_super(fs_devices->latest_bdev); if (!bh) goto fail_iput; @@ -1993,6 +1992,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, if (!(sb->s_flags & MS_RDONLY)) { down_read(&fs_info->cleanup_work_sem); btrfs_orphan_cleanup(fs_info->fs_root); + btrfs_orphan_cleanup(fs_info->tree_root); up_read(&fs_info->cleanup_work_sem); } @@ -2421,6 +2421,7 @@ int close_ctree(struct btrfs_root *root) fs_info->closing = 1; smp_mb(); + btrfs_put_block_group_cache(fs_info); if (!(fs_info->sb->s_flags & MS_RDONLY)) { ret = btrfs_commit_super(root); if (ret) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 32d094002a57..aab40fb3faed 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2688,6 +2688,109 @@ next_block_group(struct btrfs_root *root, return cache; } +static int cache_save_setup(struct btrfs_block_group_cache *block_group, + struct btrfs_trans_handle *trans, + struct btrfs_path *path) +{ + struct btrfs_root *root = block_group->fs_info->tree_root; + struct inode *inode = NULL; + u64 alloc_hint = 0; + int num_pages = 0; + int retries = 0; + int ret = 0; + + /* + * If this block group is smaller than 100 megs don't bother caching the + * block group. + */ + if (block_group->key.offset < (100 * 1024 * 1024)) { + spin_lock(&block_group->lock); + block_group->disk_cache_state = BTRFS_DC_WRITTEN; + spin_unlock(&block_group->lock); + return 0; + } + +again: + inode = lookup_free_space_inode(root, block_group, path); + if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { + ret = PTR_ERR(inode); + btrfs_release_path(root, path); + goto out; + } + + if (IS_ERR(inode)) { + BUG_ON(retries); + retries++; + + if (block_group->ro) + goto out_free; + + ret = create_free_space_inode(root, trans, block_group, path); + if (ret) + goto out_free; + goto again; + } + + /* + * We want to set the generation to 0, that way if anything goes wrong + * from here on out we know not to trust this cache when we load up next + * time. + */ + BTRFS_I(inode)->generation = 0; + ret = btrfs_update_inode(trans, root, inode); + WARN_ON(ret); + + if (i_size_read(inode) > 0) { + ret = btrfs_truncate_free_space_cache(root, trans, path, + inode); + if (ret) + goto out_put; + } + + spin_lock(&block_group->lock); + if (block_group->cached != BTRFS_CACHE_FINISHED) { + spin_unlock(&block_group->lock); + goto out_put; + } + spin_unlock(&block_group->lock); + + num_pages = (int)div64_u64(block_group->key.offset, 1024 * 1024 * 1024); + if (!num_pages) + num_pages = 1; + + /* + * Just to make absolutely sure we have enough space, we're going to + * preallocate 12 pages worth of space for each block group. In + * practice we ought to use at most 8, but we need extra space so we can + * add our header and have a terminator between the extents and the + * bitmaps. + */ + num_pages *= 16; + num_pages *= PAGE_CACHE_SIZE; + + ret = btrfs_check_data_free_space(inode, num_pages); + if (ret) + goto out_put; + + ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages, + num_pages, num_pages, + &alloc_hint); + btrfs_free_reserved_data_space(inode, num_pages); +out_put: + iput(inode); +out_free: + btrfs_release_path(root, path); +out: + spin_lock(&block_group->lock); + if (ret) + block_group->disk_cache_state = BTRFS_DC_ERROR; + else + block_group->disk_cache_state = BTRFS_DC_SETUP; + spin_unlock(&block_group->lock); + + return ret; +} + int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, struct btrfs_root *root) { @@ -2700,6 +2803,25 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, if (!path) return -ENOMEM; +again: + while (1) { + cache = btrfs_lookup_first_block_group(root->fs_info, last); + while (cache) { + if (cache->disk_cache_state == BTRFS_DC_CLEAR) + break; + cache = next_block_group(root, cache); + } + if (!cache) { + if (last == 0) + break; + last = 0; + continue; + } + err = cache_save_setup(cache, trans, path); + last = cache->key.objectid + cache->key.offset; + btrfs_put_block_group(cache); + } + while (1) { if (last == 0) { err = btrfs_run_delayed_refs(trans, root, @@ -2709,6 +2831,11 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, cache = btrfs_lookup_first_block_group(root->fs_info, last); while (cache) { + if (cache->disk_cache_state == BTRFS_DC_CLEAR) { + btrfs_put_block_group(cache); + goto again; + } + if (cache->dirty) break; cache = next_block_group(root, cache); @@ -2883,11 +3010,16 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes) struct btrfs_space_info *data_sinfo; struct btrfs_root *root = BTRFS_I(inode)->root; u64 used; - int ret = 0, committed = 0; + int ret = 0, committed = 0, alloc_chunk = 1; /* make sure bytes are sectorsize aligned */ bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); + if (root == root->fs_info->tree_root) { + alloc_chunk = 0; + committed = 1; + } + data_sinfo = BTRFS_I(inode)->space_info; if (!data_sinfo) goto alloc; @@ -2906,7 +3038,7 @@ again: * if we don't have enough free bytes in this space then we need * to alloc a new chunk. */ - if (!data_sinfo->full) { + if (!data_sinfo->full && alloc_chunk) { u64 alloc_target; data_sinfo->force_alloc = 1; @@ -3777,12 +3909,12 @@ static int update_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, int alloc) { - struct btrfs_block_group_cache *cache; + struct btrfs_block_group_cache *cache = NULL; struct btrfs_fs_info *info = root->fs_info; - int factor; u64 total = num_bytes; u64 old_val; u64 byte_in_group; + int factor; /* block accounting for super block */ spin_lock(&info->delalloc_lock); @@ -3804,11 +3936,17 @@ static int update_block_group(struct btrfs_trans_handle *trans, factor = 2; else factor = 1; + byte_in_group = bytenr - cache->key.objectid; WARN_ON(byte_in_group > cache->key.offset); spin_lock(&cache->space_info->lock); spin_lock(&cache->lock); + + if (btrfs_super_cache_generation(&info->super_copy) != 0 && + cache->disk_cache_state < BTRFS_DC_CLEAR) + cache->disk_cache_state = BTRFS_DC_CLEAR; + cache->dirty = 1; old_val = btrfs_block_group_used(&cache->item); num_bytes = min(total, cache->key.offset - byte_in_group); @@ -7814,6 +7952,40 @@ out: return ret; } +void btrfs_put_block_group_cache(struct btrfs_fs_info *info) +{ + struct btrfs_block_group_cache *block_group; + u64 last = 0; + + while (1) { + struct inode *inode; + + block_group = btrfs_lookup_first_block_group(info, last); + while (block_group) { + spin_lock(&block_group->lock); + if (block_group->iref) + break; + spin_unlock(&block_group->lock); + block_group = next_block_group(info->tree_root, + block_group); + } + if (!block_group) { + if (last == 0) + break; + last = 0; + continue; + } + + inode = block_group->inode; + block_group->iref = 0; + block_group->inode = NULL; + spin_unlock(&block_group->lock); + iput(inode); + last = block_group->key.objectid + block_group->key.offset; + btrfs_put_block_group(block_group); + } +} + int btrfs_free_block_groups(struct btrfs_fs_info *info) { struct btrfs_block_group_cache *block_group; @@ -7897,6 +8069,8 @@ int btrfs_read_block_groups(struct btrfs_root *root) struct btrfs_key key; struct btrfs_key found_key; struct extent_buffer *leaf; + int need_clear = 0; + u64 cache_gen; root = info->extent_root; key.objectid = 0; @@ -7906,6 +8080,11 @@ int btrfs_read_block_groups(struct btrfs_root *root) if (!path) return -ENOMEM; + cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy); + if (cache_gen != 0 && + btrfs_super_generation(&root->fs_info->super_copy) != cache_gen) + need_clear = 1; + while (1) { ret = find_first_block_group(root, path, &key); if (ret > 0) @@ -7928,6 +8107,9 @@ int btrfs_read_block_groups(struct btrfs_root *root) INIT_LIST_HEAD(&cache->list); INIT_LIST_HEAD(&cache->cluster_list); + if (need_clear) + cache->disk_cache_state = BTRFS_DC_CLEAR; + /* * we only want to have 32k of ram per block group for keeping * track of free space, and if we pass 1/2 of that we want to @@ -8032,6 +8214,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, cache->key.offset = size; cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; cache->sectorsize = root->sectorsize; + cache->fs_info = root->fs_info; /* * we only want to have 32k of ram per block group for keeping track @@ -8088,7 +8271,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, struct btrfs_path *path; struct btrfs_block_group_cache *block_group; struct btrfs_free_cluster *cluster; + struct btrfs_root *tree_root = root->fs_info->tree_root; struct btrfs_key key; + struct inode *inode; int ret; root = root->fs_info->extent_root; @@ -8097,8 +8282,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, BUG_ON(!block_group); BUG_ON(!block_group->ro); - memcpy(&key, &block_group->key, sizeof(key)); - /* make sure this block group isn't part of an allocation cluster */ cluster = &root->fs_info->data_alloc_cluster; spin_lock(&cluster->refill_lock); @@ -8117,6 +8300,40 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); BUG_ON(!path); + inode = lookup_free_space_inode(root, block_group, path); + if (!IS_ERR(inode)) { + btrfs_orphan_add(trans, inode); + clear_nlink(inode); + /* One for the block groups ref */ + spin_lock(&block_group->lock); + if (block_group->iref) { + block_group->iref = 0; + block_group->inode = NULL; + spin_unlock(&block_group->lock); + iput(inode); + } else { + spin_unlock(&block_group->lock); + } + /* One for our lookup ref */ + iput(inode); + } + + key.objectid = BTRFS_FREE_SPACE_OBJECTID; + key.offset = block_group->key.objectid; + key.type = 0; + + ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1); + if (ret < 0) + goto out; + if (ret > 0) + btrfs_release_path(tree_root, path); + if (ret == 0) { + ret = btrfs_del_item(trans, tree_root, path); + if (ret) + goto out; + btrfs_release_path(tree_root, path); + } + spin_lock(&root->fs_info->block_group_cache_lock); rb_erase(&block_group->cache_node, &root->fs_info->block_group_cache_tree); @@ -8140,6 +8357,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, block_group->space_info->bytes_readonly -= block_group->key.offset; spin_unlock(&block_group->space_info->lock); + memcpy(&key, &block_group->key, sizeof(key)); + btrfs_clear_space_info_full(root->fs_info); btrfs_put_block_group(block_group); diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index f488fac04d99..05efcc7061a7 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -23,10 +23,165 @@ #include "ctree.h" #include "free-space-cache.h" #include "transaction.h" +#include "disk-io.h" #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) #define MAX_CACHE_BYTES_PER_GIG (32 * 1024) +struct inode *lookup_free_space_inode(struct btrfs_root *root, + struct btrfs_block_group_cache + *block_group, struct btrfs_path *path) +{ + struct btrfs_key key; + struct btrfs_key location; + struct btrfs_disk_key disk_key; + struct btrfs_free_space_header *header; + struct extent_buffer *leaf; + struct inode *inode = NULL; + int ret; + + spin_lock(&block_group->lock); + if (block_group->inode) + inode = igrab(block_group->inode); + spin_unlock(&block_group->lock); + if (inode) + return inode; + + key.objectid = BTRFS_FREE_SPACE_OBJECTID; + key.offset = block_group->key.objectid; + key.type = 0; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + return ERR_PTR(ret); + if (ret > 0) { + btrfs_release_path(root, path); + return ERR_PTR(-ENOENT); + } + + leaf = path->nodes[0]; + header = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_free_space_header); + btrfs_free_space_key(leaf, header, &disk_key); + btrfs_disk_key_to_cpu(&location, &disk_key); + btrfs_release_path(root, path); + + inode = btrfs_iget(root->fs_info->sb, &location, root, NULL); + if (!inode) + return ERR_PTR(-ENOENT); + if (IS_ERR(inode)) + return inode; + if (is_bad_inode(inode)) { + iput(inode); + return ERR_PTR(-ENOENT); + } + + spin_lock(&block_group->lock); + if (!root->fs_info->closing) { + block_group->inode = igrab(inode); + block_group->iref = 1; + } + spin_unlock(&block_group->lock); + + return inode; +} + +int create_free_space_inode(struct btrfs_root *root, + struct btrfs_trans_handle *trans, + struct btrfs_block_group_cache *block_group, + struct btrfs_path *path) +{ + struct btrfs_key key; + struct btrfs_disk_key disk_key; + struct btrfs_free_space_header *header; + struct btrfs_inode_item *inode_item; + struct extent_buffer *leaf; + u64 objectid; + int ret; + + ret = btrfs_find_free_objectid(trans, root, 0, &objectid); + if (ret < 0) + return ret; + + ret = btrfs_insert_empty_inode(trans, root, path, objectid); + if (ret) + return ret; + + leaf = path->nodes[0]; + inode_item = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_inode_item); + btrfs_item_key(leaf, &disk_key, path->slots[0]); + memset_extent_buffer(leaf, 0, (unsigned long)inode_item, + sizeof(*inode_item)); + btrfs_set_inode_generation(leaf, inode_item, trans->transid); + btrfs_set_inode_size(leaf, inode_item, 0); + btrfs_set_inode_nbytes(leaf, inode_item, 0); + btrfs_set_inode_uid(leaf, inode_item, 0); + btrfs_set_inode_gid(leaf, inode_item, 0); + btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600); + btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS | + BTRFS_INODE_PREALLOC | BTRFS_INODE_NODATASUM); + btrfs_set_inode_nlink(leaf, inode_item, 1); + btrfs_set_inode_transid(leaf, inode_item, trans->transid); + btrfs_set_inode_block_group(leaf, inode_item, + block_group->key.objectid); + btrfs_mark_buffer_dirty(leaf); + btrfs_release_path(root, path); + + key.objectid = BTRFS_FREE_SPACE_OBJECTID; + key.offset = block_group->key.objectid; + key.type = 0; + + ret = btrfs_insert_empty_item(trans, root, path, &key, + sizeof(struct btrfs_free_space_header)); + if (ret < 0) { + btrfs_release_path(root, path); + return ret; + } + leaf = path->nodes[0]; + header = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_free_space_header); + memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header)); + btrfs_set_free_space_key(leaf, header, &disk_key); + btrfs_mark_buffer_dirty(leaf); + btrfs_release_path(root, path); + + return 0; +} + +int btrfs_truncate_free_space_cache(struct btrfs_root *root, + struct btrfs_trans_handle *trans, + struct btrfs_path *path, + struct inode *inode) +{ + loff_t oldsize; + int ret = 0; + + trans->block_rsv = root->orphan_block_rsv; + ret = btrfs_block_rsv_check(trans, root, + root->orphan_block_rsv, + 0, 5); + if (ret) + return ret; + + oldsize = i_size_read(inode); + btrfs_i_size_write(inode, 0); + truncate_pagecache(inode, oldsize, 0); + + /* + * We don't need an orphan item because truncating the free space cache + * will never be split across transactions. + */ + ret = btrfs_truncate_inode_items(trans, root, inode, + 0, BTRFS_EXTENT_DATA_KEY); + if (ret) { + WARN_ON(1); + return ret; + } + + return btrfs_update_inode(trans, root, inode); +} + static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize, u64 offset) { diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h index 890a8e79011b..45be29e5f01e 100644 --- a/fs/btrfs/free-space-cache.h +++ b/fs/btrfs/free-space-cache.h @@ -27,6 +27,17 @@ struct btrfs_free_space { struct list_head list; }; +struct inode *lookup_free_space_inode(struct btrfs_root *root, + struct btrfs_block_group_cache + *block_group, struct btrfs_path *path); +int create_free_space_inode(struct btrfs_root *root, + struct btrfs_trans_handle *trans, + struct btrfs_block_group_cache *block_group, + struct btrfs_path *path); +int btrfs_truncate_free_space_cache(struct btrfs_root *root, + struct btrfs_trans_handle *trans, + struct btrfs_path *path, + struct inode *inode); int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, u64 bytenr, u64 size); int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c03864406af3..1af1ea88e8a8 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1700,6 +1700,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) ordered_extent->len); BUG_ON(ret); } else { + BUG_ON(root == root->fs_info->tree_root); ret = insert_reserved_file_extent(trans, inode, ordered_extent->file_offset, ordered_extent->start, @@ -3196,7 +3197,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY); - if (root->ref_cows) + if (root->ref_cows || root == root->fs_info->tree_root) btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); path = btrfs_alloc_path(); @@ -3344,7 +3345,8 @@ delete: } else { break; } - if (found_extent && root->ref_cows) { + if (found_extent && (root->ref_cows || + root == root->fs_info->tree_root)) { btrfs_set_path_blocking(path); ret = btrfs_free_extent(trans, root, extent_start, extent_num_bytes, 0, @@ -3675,7 +3677,8 @@ void btrfs_evict_inode(struct inode *inode) int ret; truncate_inode_pages(&inode->i_data, 0); - if (inode->i_nlink && btrfs_root_refs(&root->root_item) != 0) + if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 || + root == root->fs_info->tree_root)) goto no_delete; if (is_bad_inode(inode)) { @@ -3888,7 +3891,14 @@ static void inode_tree_del(struct inode *inode) } spin_unlock(&root->inode_lock); - if (empty && btrfs_root_refs(&root->root_item) == 0) { + /* + * Free space cache has inodes in the tree root, but the tree root has a + * root_refs of 0, so this could end up dropping the tree root as a + * snapshot, so we need the extra !root->fs_info->tree_root check to + * make sure we don't drop it. + */ + if (empty && btrfs_root_refs(&root->root_item) == 0 && + root != root->fs_info->tree_root) { synchronize_srcu(&root->fs_info->subvol_srcu); spin_lock(&root->inode_lock); empty = RB_EMPTY_ROOT(&root->inode_tree); @@ -4282,14 +4292,24 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; int ret = 0; + bool nolock = false; if (BTRFS_I(inode)->dummy_inode) return 0; + smp_mb(); + nolock = (root->fs_info->closing && root == root->fs_info->tree_root); + if (wbc->sync_mode == WB_SYNC_ALL) { - trans = btrfs_join_transaction(root, 1); + if (nolock) + trans = btrfs_join_transaction_nolock(root, 1); + else + trans = btrfs_join_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); - ret = btrfs_commit_transaction(trans, root); + if (nolock) + ret = btrfs_end_transaction_nolock(trans, root); + else + ret = btrfs_commit_transaction(trans, root); } return ret; } @@ -6308,6 +6328,21 @@ void btrfs_destroy_inode(struct inode *inode) spin_unlock(&root->fs_info->ordered_extent_lock); } + if (root == root->fs_info->tree_root) { + struct btrfs_block_group_cache *block_group; + + block_group = btrfs_lookup_block_group(root->fs_info, + BTRFS_I(inode)->block_group); + if (block_group && block_group->inode == inode) { + spin_lock(&block_group->lock); + block_group->inode = NULL; + spin_unlock(&block_group->lock); + btrfs_put_block_group(block_group); + } else if (block_group) { + btrfs_put_block_group(block_group); + } + } + spin_lock(&root->orphan_lock); if (!list_empty(&BTRFS_I(inode)->i_orphan)) { printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n", @@ -6340,7 +6375,8 @@ int btrfs_drop_inode(struct inode *inode) { struct btrfs_root *root = BTRFS_I(inode)->root; - if (btrfs_root_refs(&root->root_item) == 0) + if (btrfs_root_refs(&root->root_item) == 0 && + root != root->fs_info->tree_root) return 1; else return generic_drop_inode(inode); @@ -6757,27 +6793,33 @@ out_unlock: return err; } -int btrfs_prealloc_file_range(struct inode *inode, int mode, - u64 start, u64 num_bytes, u64 min_size, - loff_t actual_len, u64 *alloc_hint) +static int __btrfs_prealloc_file_range(struct inode *inode, int mode, + u64 start, u64 num_bytes, u64 min_size, + loff_t actual_len, u64 *alloc_hint, + struct btrfs_trans_handle *trans) { - struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_key ins; u64 cur_offset = start; int ret = 0; + bool own_trans = true; + if (trans) + own_trans = false; while (num_bytes > 0) { - trans = btrfs_start_transaction(root, 3); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - break; + if (own_trans) { + trans = btrfs_start_transaction(root, 3); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + break; + } } ret = btrfs_reserve_extent(trans, root, num_bytes, min_size, 0, *alloc_hint, (u64)-1, &ins, 1); if (ret) { - btrfs_end_transaction(trans, root); + if (own_trans) + btrfs_end_transaction(trans, root); break; } @@ -6810,11 +6852,30 @@ int btrfs_prealloc_file_range(struct inode *inode, int mode, ret = btrfs_update_inode(trans, root, inode); BUG_ON(ret); - btrfs_end_transaction(trans, root); + if (own_trans) + btrfs_end_transaction(trans, root); } return ret; } +int btrfs_prealloc_file_range(struct inode *inode, int mode, + u64 start, u64 num_bytes, u64 min_size, + loff_t actual_len, u64 *alloc_hint) +{ + return __btrfs_prealloc_file_range(inode, mode, start, num_bytes, + min_size, actual_len, alloc_hint, + NULL); +} + +int btrfs_prealloc_file_range_trans(struct inode *inode, + struct btrfs_trans_handle *trans, int mode, + u64 start, u64 num_bytes, u64 min_size, + loff_t actual_len, u64 *alloc_hint) +{ + return __btrfs_prealloc_file_range(inode, mode, start, num_bytes, + min_size, actual_len, alloc_hint, trans); +} + static long btrfs_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) { diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index b37d723b9d4a..af339eee55b8 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -29,6 +29,7 @@ #include "locking.h" #include "btrfs_inode.h" #include "async-thread.h" +#include "free-space-cache.h" /* * backref_node, mapping_node and tree_block start with this @@ -3191,6 +3192,54 @@ static int block_use_full_backref(struct reloc_control *rc, return ret; } +static int delete_block_group_cache(struct btrfs_fs_info *fs_info, + struct inode *inode, u64 ino) +{ + struct btrfs_key key; + struct btrfs_path *path; + struct btrfs_root *root = fs_info->tree_root; + struct btrfs_trans_handle *trans; + unsigned long nr; + int ret = 0; + + if (inode) + goto truncate; + + key.objectid = ino; + key.type = BTRFS_INODE_ITEM_KEY; + key.offset = 0; + + inode = btrfs_iget(fs_info->sb, &key, root, NULL); + if (!inode || IS_ERR(inode) || is_bad_inode(inode)) { + if (inode && !IS_ERR(inode)) + iput(inode); + return -ENOENT; + } + +truncate: + path = btrfs_alloc_path(); + if (!path) { + ret = -ENOMEM; + goto out; + } + + trans = btrfs_join_transaction(root, 0); + if (IS_ERR(trans)) { + btrfs_free_path(path); + goto out; + } + + ret = btrfs_truncate_free_space_cache(root, trans, path, inode); + + btrfs_free_path(path); + nr = trans->blocks_used; + btrfs_end_transaction(trans, root); + btrfs_btree_balance_dirty(root, nr); +out: + iput(inode); + return ret; +} + /* * helper to add tree blocks for backref of type BTRFS_EXTENT_DATA_REF_KEY * this function scans fs tree to find blocks reference the data extent @@ -3217,15 +3266,27 @@ static int find_data_references(struct reloc_control *rc, int counted; int ret; - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - ref_root = btrfs_extent_data_ref_root(leaf, ref); ref_objectid = btrfs_extent_data_ref_objectid(leaf, ref); ref_offset = btrfs_extent_data_ref_offset(leaf, ref); ref_count = btrfs_extent_data_ref_count(leaf, ref); + /* + * This is an extent belonging to the free space cache, lets just delete + * it and redo the search. + */ + if (ref_root == BTRFS_ROOT_TREE_OBJECTID) { + ret = delete_block_group_cache(rc->extent_root->fs_info, + NULL, ref_objectid); + if (ret != -ENOENT) + return ret; + ret = 0; + } + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + root = read_fs_root(rc->extent_root->fs_info, ref_root); if (IS_ERR(root)) { err = PTR_ERR(root); @@ -3860,6 +3921,8 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) { struct btrfs_fs_info *fs_info = extent_root->fs_info; struct reloc_control *rc; + struct inode *inode; + struct btrfs_path *path; int ret; int rw = 0; int err = 0; @@ -3882,6 +3945,26 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) rw = 1; } + path = btrfs_alloc_path(); + if (!path) { + err = -ENOMEM; + goto out; + } + + inode = lookup_free_space_inode(fs_info->tree_root, rc->block_group, + path); + btrfs_free_path(path); + + if (!IS_ERR(inode)) + ret = delete_block_group_cache(fs_info, inode, 0); + else + ret = PTR_ERR(inode); + + if (ret && ret != -ENOENT) { + err = ret; + goto out; + } + rc->data_inode = create_reloc_inode(fs_info, rc->block_group); if (IS_ERR(rc->data_inode)) { err = PTR_ERR(rc->data_inode); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 1776dbd8dc98..5c23eb8d6ba3 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -68,7 +68,7 @@ enum { Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd, Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress, Opt_compress_force, Opt_notreelog, Opt_ratio, Opt_flushoncommit, - Opt_discard, Opt_err, + Opt_discard, Opt_space_cache, Opt_err, }; static match_table_t tokens = { @@ -92,6 +92,7 @@ static match_table_t tokens = { {Opt_flushoncommit, "flushoncommit"}, {Opt_ratio, "metadata_ratio=%d"}, {Opt_discard, "discard"}, + {Opt_space_cache, "space_cache"}, {Opt_err, NULL}, }; @@ -235,6 +236,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) case Opt_discard: btrfs_set_opt(info->mount_opt, DISCARD); break; + case Opt_space_cache: + printk(KERN_INFO "btrfs: enabling disk space caching\n"); + btrfs_set_opt(info->mount_opt, SPACE_CACHE); + break; case Opt_err: printk(KERN_INFO "btrfs: unrecognized mount option " "'%s'\n", p); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 66e4c66cc63b..e7144c48ed79 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -163,6 +163,7 @@ enum btrfs_trans_type { TRANS_START, TRANS_JOIN, TRANS_USERSPACE, + TRANS_JOIN_NOLOCK, }; static int may_wait_transaction(struct btrfs_root *root, int type) @@ -186,7 +187,8 @@ again: if (!h) return ERR_PTR(-ENOMEM); - mutex_lock(&root->fs_info->trans_mutex); + if (type != TRANS_JOIN_NOLOCK) + mutex_lock(&root->fs_info->trans_mutex); if (may_wait_transaction(root, type)) wait_current_trans(root); @@ -195,7 +197,8 @@ again: cur_trans = root->fs_info->running_transaction; cur_trans->use_count++; - mutex_unlock(&root->fs_info->trans_mutex); + if (type != TRANS_JOIN_NOLOCK) + mutex_unlock(&root->fs_info->trans_mutex); h->transid = cur_trans->transid; h->transaction = cur_trans; @@ -224,9 +227,11 @@ again: } } - mutex_lock(&root->fs_info->trans_mutex); + if (type != TRANS_JOIN_NOLOCK) + mutex_lock(&root->fs_info->trans_mutex); record_root_in_trans(h, root); - mutex_unlock(&root->fs_info->trans_mutex); + if (type != TRANS_JOIN_NOLOCK) + mutex_unlock(&root->fs_info->trans_mutex); if (!current->journal_info && type != TRANS_USERSPACE) current->journal_info = h; @@ -244,6 +249,12 @@ struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, return start_transaction(root, 0, TRANS_JOIN); } +struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root, + int num_blocks) +{ + return start_transaction(root, 0, TRANS_JOIN_NOLOCK); +} + struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, int num_blocks) { @@ -348,7 +359,7 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, } static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, - struct btrfs_root *root, int throttle) + struct btrfs_root *root, int throttle, int lock) { struct btrfs_transaction *cur_trans = trans->transaction; struct btrfs_fs_info *info = root->fs_info; @@ -376,18 +387,19 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, btrfs_trans_release_metadata(trans, root); - if (!root->fs_info->open_ioctl_trans && + if (lock && !root->fs_info->open_ioctl_trans && should_end_transaction(trans, root)) trans->transaction->blocked = 1; - if (cur_trans->blocked && !cur_trans->in_commit) { + if (lock && cur_trans->blocked && !cur_trans->in_commit) { if (throttle) return btrfs_commit_transaction(trans, root); else wake_up_process(info->transaction_kthread); } - mutex_lock(&info->trans_mutex); + if (lock) + mutex_lock(&info->trans_mutex); WARN_ON(cur_trans != info->running_transaction); WARN_ON(cur_trans->num_writers < 1); cur_trans->num_writers--; @@ -395,7 +407,8 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, if (waitqueue_active(&cur_trans->writer_wait)) wake_up(&cur_trans->writer_wait); put_transaction(cur_trans); - mutex_unlock(&info->trans_mutex); + if (lock) + mutex_unlock(&info->trans_mutex); if (current->journal_info == trans) current->journal_info = NULL; @@ -411,13 +424,19 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, int btrfs_end_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - return __btrfs_end_transaction(trans, root, 0); + return __btrfs_end_transaction(trans, root, 0, 1); } int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - return __btrfs_end_transaction(trans, root, 1); + return __btrfs_end_transaction(trans, root, 1, 1); +} + +int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + return __btrfs_end_transaction(trans, root, 0, 0); } /* @@ -966,6 +985,8 @@ static void update_super_roots(struct btrfs_root *root) super->root = root_item->bytenr; super->generation = root_item->generation; super->root_level = root_item->level; + if (super->cache_generation != 0 || btrfs_test_opt(root, SPACE_CACHE)) + super->cache_generation = root_item->generation; } int btrfs_transaction_in_commit(struct btrfs_fs_info *info) diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index e104986d0bfd..15f83e1c1ef7 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -87,10 +87,14 @@ static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans, int btrfs_end_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root); +int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans, + struct btrfs_root *root); struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, int num_items); struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, int num_blocks); +struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root, + int num_blocks); struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, int num_blocks); int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, -- cgit v1.2.3-55-g7522 From 88c2ba3b069f1e0f4694124d02985fa7620a19f1 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 21 Sep 2010 14:21:34 -0400 Subject: Btrfs: Add a clear_cache mount option If something goes wrong with the free space cache we need a way to make sure it's not loaded on mount and that it's cleared for everybody. When you pass the clear_cache option it will make it so all block groups are setup to be cleared, which keeps them from being loaded and then they will be truncated when the transaction is committed. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/ctree.h | 1 + fs/btrfs/extent-tree.c | 2 ++ fs/btrfs/free-space-cache.c | 2 -- fs/btrfs/super.c | 6 +++++- 4 files changed, 8 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index b155a0e49eeb..633e559e000e 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1229,6 +1229,7 @@ struct btrfs_root { #define BTRFS_MOUNT_DISCARD (1 << 10) #define BTRFS_MOUNT_FORCE_COMPRESS (1 << 11) #define BTRFS_MOUNT_SPACE_CACHE (1 << 12) +#define BTRFS_MOUNT_CLEAR_CACHE (1 << 13) #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 137833e1fc26..1a94ee4c4fbb 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -8198,6 +8198,8 @@ int btrfs_read_block_groups(struct btrfs_root *root) if (cache_gen != 0 && btrfs_super_generation(&root->fs_info->super_copy) != cache_gen) need_clear = 1; + if (btrfs_test_opt(root, CLEAR_CACHE)) + need_clear = 1; while (1) { ret = find_first_block_group(root, path, &key); diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index baa193423fb8..22ee0dc2e6b8 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -242,8 +242,6 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, */ spin_lock(&block_group->lock); if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { - printk(KERN_ERR "not reading block group %llu, dcs is %d\n", block_group->key.objectid, - block_group->disk_cache_state); spin_unlock(&block_group->lock); return 0; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 5c23eb8d6ba3..5f56213908e7 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -68,7 +68,7 @@ enum { Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd, Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress, Opt_compress_force, Opt_notreelog, Opt_ratio, Opt_flushoncommit, - Opt_discard, Opt_space_cache, Opt_err, + Opt_discard, Opt_space_cache, Opt_clear_cache, Opt_err, }; static match_table_t tokens = { @@ -93,6 +93,7 @@ static match_table_t tokens = { {Opt_ratio, "metadata_ratio=%d"}, {Opt_discard, "discard"}, {Opt_space_cache, "space_cache"}, + {Opt_clear_cache, "clear_cache"}, {Opt_err, NULL}, }; @@ -239,6 +240,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) case Opt_space_cache: printk(KERN_INFO "btrfs: enabling disk space caching\n"); btrfs_set_opt(info->mount_opt, SPACE_CACHE); + case Opt_clear_cache: + printk(KERN_INFO "btrfs: force clearing of disk cache\n"); + btrfs_set_opt(info->mount_opt, CLEAR_CACHE); break; case Opt_err: printk(KERN_INFO "btrfs: unrecognized mount option " -- cgit v1.2.3-55-g7522 From d0b678cb0a26783ab7238784f1e7e608e5caafa3 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Fri, 29 Oct 2010 15:14:23 -0400 Subject: Btrfs: Use ERR_CAST helpers Use ERR_CAST(x) rather than ERR_PTR(PTR_ERR(x)). The former makes more clear what is the purpose of the operation, which otherwise looks like a no-op. The semantic patch that makes this change is as follows: (http://coccinelle.lip6.fr/) // @@ type T; T x; identifier f; @@ T f (...) { <+... - ERR_PTR(PTR_ERR(x)) + x ...+> } @@ expression x; @@ - ERR_PTR(PTR_ERR(x)) + ERR_CAST(x) // Signed-off-by: Julia Lawall Cc: Chris Mason Signed-off-by: Andrew Morton Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 4 ++-- fs/btrfs/super.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 454ca52d6451..23cb8da3ff66 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -335,7 +335,7 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, goto out; } if (IS_ERR(rb_node)) { - em = ERR_PTR(PTR_ERR(rb_node)); + em = ERR_CAST(rb_node); goto out; } em = rb_entry(rb_node, struct extent_map, rb_node); @@ -384,7 +384,7 @@ struct extent_map *search_extent_mapping(struct extent_map_tree *tree, goto out; } if (IS_ERR(rb_node)) { - em = ERR_PTR(PTR_ERR(rb_node)); + em = ERR_CAST(rb_node); goto out; } em = rb_entry(rb_node, struct extent_map, rb_node); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 65b62daa3f80..d7fb2733d028 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -389,7 +389,7 @@ static struct dentry *get_default_root(struct super_block *sb, find_root: new_root = btrfs_read_fs_root_no_name(root->fs_info, &location); if (IS_ERR(new_root)) - return ERR_PTR(PTR_ERR(new_root)); + return ERR_CAST(new_root); if (btrfs_root_refs(&new_root->root_item) == 0) return ERR_PTR(-ENOENT); -- cgit v1.2.3-55-g7522 From 559af8211433b8c0b20e6c43c61409cb9c9c2996 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 29 Oct 2010 15:14:37 -0400 Subject: Btrfs: cleanup warnings from gcc 4.6 (nonbugs) These are all the cases where a variable is set, but not read which are not bugs as far as I can see, but simply leftovers. Still needs more review. Found by gcc 4.6's new warnings Signed-off-by: Andi Kleen Cc: Chris Mason Signed-off-by: Andrew Morton Signed-off-by: Chris Mason --- fs/btrfs/compression.c | 2 -- fs/btrfs/ctree.c | 20 ++------------------ fs/btrfs/disk-io.c | 11 ----------- fs/btrfs/extent-tree.c | 2 -- fs/btrfs/extent_io.c | 9 --------- fs/btrfs/inode.c | 14 -------------- fs/btrfs/ioctl.c | 2 -- fs/btrfs/ordered-data.c | 2 -- fs/btrfs/root-tree.c | 2 -- fs/btrfs/super.c | 6 ++---- fs/btrfs/tree-defrag.c | 2 -- fs/btrfs/tree-log.c | 15 --------------- fs/btrfs/volumes.c | 4 ---- fs/btrfs/xattr.c | 2 -- fs/btrfs/zlib.c | 5 ----- 15 files changed, 4 insertions(+), 94 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 396039b3a8a2..7845d1f7d1d9 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -163,7 +163,6 @@ fail: */ static void end_compressed_bio_read(struct bio *bio, int err) { - struct extent_io_tree *tree; struct compressed_bio *cb = bio->bi_private; struct inode *inode; struct page *page; @@ -187,7 +186,6 @@ static void end_compressed_bio_read(struct bio *bio, int err) /* ok, we're the last bio for this extent, lets start * the decompression. */ - tree = &BTRFS_I(inode)->io_tree; ret = btrfs_zlib_decompress_biovec(cb->compressed_pages, cb->start, cb->orig_bio->bi_io_vec, diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 6921231e0efb..9ac171599258 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -200,7 +200,6 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, struct extent_buffer **cow_ret, u64 new_root_objectid) { struct extent_buffer *cow; - u32 nritems; int ret = 0; int level; struct btrfs_disk_key disk_key; @@ -210,7 +209,6 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, WARN_ON(root->ref_cows && trans->transid != root->last_trans); level = btrfs_header_level(buf); - nritems = btrfs_header_nritems(buf); if (level == 0) btrfs_item_key(buf, &disk_key, 0); else @@ -1008,7 +1006,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, int wret; int pslot; int orig_slot = path->slots[level]; - int err_on_enospc = 0; u64 orig_ptr; if (level == 0) @@ -1071,8 +1068,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, BTRFS_NODEPTRS_PER_BLOCK(root) / 4) return 0; - if (btrfs_header_nritems(mid) < 2) - err_on_enospc = 1; + btrfs_header_nritems(mid); left = read_node_slot(root, parent, pslot - 1); if (left) { @@ -1103,8 +1099,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, wret = push_node_left(trans, root, left, mid, 1); if (wret < 0) ret = wret; - if (btrfs_header_nritems(mid) < 2) - err_on_enospc = 1; + btrfs_header_nritems(mid); } /* @@ -1224,14 +1219,12 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, int wret; int pslot; int orig_slot = path->slots[level]; - u64 orig_ptr; if (level == 0) return 1; mid = path->nodes[level]; WARN_ON(btrfs_header_generation(mid) != trans->transid); - orig_ptr = btrfs_node_blockptr(mid, orig_slot); if (level < BTRFS_MAX_LEVEL - 1) parent = path->nodes[level + 1]; @@ -2567,7 +2560,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, { struct btrfs_disk_key disk_key; struct extent_buffer *right = path->nodes[0]; - int slot; int i; int push_space = 0; int push_items = 0; @@ -2579,8 +2571,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, u32 this_item_size; u32 old_left_item_size; - slot = path->slots[1]; - if (empty) nr = min(right_nritems, max_slot); else @@ -3349,7 +3339,6 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, { int ret = 0; int slot; - int slot_orig; struct extent_buffer *leaf; struct btrfs_item *item; u32 nritems; @@ -3359,7 +3348,6 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, unsigned int size_diff; int i; - slot_orig = path->slots[0]; leaf = path->nodes[0]; slot = path->slots[0]; @@ -3464,7 +3452,6 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans, { int ret = 0; int slot; - int slot_orig; struct extent_buffer *leaf; struct btrfs_item *item; u32 nritems; @@ -3473,7 +3460,6 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans, unsigned int old_size; int i; - slot_orig = path->slots[0]; leaf = path->nodes[0]; nritems = btrfs_header_nritems(leaf); @@ -3806,7 +3792,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, struct btrfs_key *cpu_key, u32 *data_size, int nr) { - struct extent_buffer *leaf; int ret = 0; int slot; int i; @@ -3823,7 +3808,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, if (ret < 0) goto out; - leaf = path->nodes[0]; slot = path->slots[0]; BUG_ON(slot < 0); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 77e5dabfd45a..e163424c7fce 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -338,7 +338,6 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) struct extent_io_tree *tree; u64 start = (u64)page->index << PAGE_CACHE_SHIFT; u64 found_start; - int found_level; unsigned long len; struct extent_buffer *eb; int ret; @@ -369,8 +368,6 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) WARN_ON(1); goto err; } - found_level = btrfs_header_level(eb); - csum_tree_block(root, eb, 0); err: free_extent_buffer(eb); @@ -543,11 +540,9 @@ int btrfs_congested_async(struct btrfs_fs_info *info, int iodone) static void run_one_async_start(struct btrfs_work *work) { - struct btrfs_fs_info *fs_info; struct async_submit_bio *async; async = container_of(work, struct async_submit_bio, work); - fs_info = BTRFS_I(async->inode)->root->fs_info; async->submit_bio_start(async->inode, async->rw, async->bio, async->mirror_num, async->bio_flags, async->bio_offset); @@ -860,12 +855,8 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, u64 parent_transid) { struct extent_buffer *buf = NULL; - struct inode *btree_inode = root->fs_info->btree_inode; - struct extent_io_tree *io_tree; int ret; - io_tree = &BTRFS_I(btree_inode)->io_tree; - buf = btrfs_find_create_tree_block(root, bytenr, blocksize); if (!buf) return NULL; @@ -1387,7 +1378,6 @@ static int bio_ready_for_csum(struct bio *bio) u64 start = 0; struct page *page; struct extent_io_tree *io_tree = NULL; - struct btrfs_fs_info *info = NULL; struct bio_vec *bvec; int i; int ret; @@ -1406,7 +1396,6 @@ static int bio_ready_for_csum(struct bio *bio) buf_len = page->private >> 2; start = page_offset(page) + bvec->bv_offset; io_tree = &BTRFS_I(page->mapping->host)->io_tree; - info = BTRFS_I(page->mapping->host)->root->fs_info; } /* are we fully contained in this bio? */ if (buf_len <= length) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 59c8daaacf0c..df754108b952 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5719,7 +5719,6 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans, u64 generation; u64 refs; u64 flags; - u64 last = 0; u32 nritems; u32 blocksize; struct btrfs_key key; @@ -5787,7 +5786,6 @@ reada: generation); if (ret) break; - last = bytenr + blocksize; nread++; } wc->reada_slot = slot; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 7dc31c39ca59..3b7eaee0f912 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1857,10 +1857,8 @@ static int submit_one_bio(int rw, struct bio *bio, int mirror_num, struct page *page = bvec->bv_page; struct extent_io_tree *tree = bio->bi_private; u64 start; - u64 end; start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; - end = start + bvec->bv_len - 1; bio->bi_private = NULL; @@ -2160,7 +2158,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, u64 last_byte = i_size_read(inode); u64 block_start; u64 iosize; - u64 unlock_start; sector_t sector; struct extent_state *cached_state = NULL; struct extent_map *em; @@ -2285,7 +2282,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, if (tree->ops && tree->ops->writepage_end_io_hook) tree->ops->writepage_end_io_hook(page, start, page_end, NULL, 1); - unlock_start = page_end + 1; goto done; } @@ -2296,7 +2292,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, if (tree->ops && tree->ops->writepage_end_io_hook) tree->ops->writepage_end_io_hook(page, cur, page_end, NULL, 1); - unlock_start = page_end + 1; break; } em = epd->get_extent(inode, page, pg_offset, cur, @@ -2343,7 +2338,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, cur += iosize; pg_offset += iosize; - unlock_start = cur; continue; } /* leave this out until we have a page_mkwrite call */ @@ -2429,7 +2423,6 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, pgoff_t index; pgoff_t end; /* Inclusive */ int scanned = 0; - int range_whole = 0; pagevec_init(&pvec, 0); if (wbc->range_cyclic) { @@ -2438,8 +2431,6 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, } else { index = wbc->range_start >> PAGE_CACHE_SHIFT; end = wbc->range_end >> PAGE_CACHE_SHIFT; - if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) - range_whole = 1; scanned = 1; } retry: diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0aa24717cd58..609f3bbbd1ed 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -319,8 +319,6 @@ static noinline int compress_file_range(struct inode *inode, struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; u64 num_bytes; - u64 orig_start; - u64 disk_num_bytes; u64 blocksize = root->sectorsize; u64 actual_end; u64 isize = i_size_read(inode); @@ -335,8 +333,6 @@ static noinline int compress_file_range(struct inode *inode, int i; int will_compress; - orig_start = start; - actual_end = min_t(u64, isize, end + 1); again: will_compress = 0; @@ -371,7 +367,6 @@ again: total_compressed = min(total_compressed, max_uncompressed); num_bytes = (end - start + blocksize) & ~(blocksize - 1); num_bytes = max(blocksize, num_bytes); - disk_num_bytes = num_bytes; total_in = 0; ret = 0; @@ -467,7 +462,6 @@ again: if (total_compressed >= total_in) { will_compress = 0; } else { - disk_num_bytes = total_compressed; num_bytes = total_in; } } @@ -757,8 +751,6 @@ static noinline int cow_file_range(struct inode *inode, u64 disk_num_bytes; u64 cur_alloc_size; u64 blocksize = root->sectorsize; - u64 actual_end; - u64 isize = i_size_read(inode); struct btrfs_key ins; struct extent_map *em; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; @@ -770,8 +762,6 @@ static noinline int cow_file_range(struct inode *inode, btrfs_set_trans_block_group(trans, inode); trans->block_rsv = &root->fs_info->delalloc_block_rsv; - actual_end = min_t(u64, isize, end + 1); - num_bytes = (end - start + blocksize) & ~(blocksize - 1); num_bytes = max(blocksize, num_bytes); disk_num_bytes = num_bytes; @@ -2274,7 +2264,6 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) { struct btrfs_path *path; struct extent_buffer *leaf; - struct btrfs_item *item; struct btrfs_key key, found_key; struct btrfs_trans_handle *trans; struct inode *inode; @@ -2312,7 +2301,6 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) /* pull out the item */ leaf = path->nodes[0]; - item = btrfs_item_nr(leaf, path->slots[0]); btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); /* make sure the item matches what we want */ @@ -5701,7 +5689,6 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode, struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_dio_private *dip; struct bio_vec *bvec = bio->bi_io_vec; - u64 start; int skip_sum; int write = rw & REQ_WRITE; int ret = 0; @@ -5727,7 +5714,6 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode, dip->inode = inode; dip->logical_offset = file_offset; - start = dip->logical_offset; dip->bytes = 0; do { dip->bytes += bvec->bv_len; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 8079ebfeaf50..60f662c4778b 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -708,7 +708,6 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, char *sizestr; char *devstr = NULL; int ret = 0; - int namelen; int mod = 0; if (root->fs_info->sb->s_flags & MS_RDONLY) @@ -722,7 +721,6 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, return PTR_ERR(vol_args); vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; - namelen = strlen(vol_args->name); mutex_lock(&root->fs_info->volume_mutex); sizestr = vol_args->name; diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index e56c72bc5add..f4621f6deca1 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -526,7 +526,6 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) { u64 end; u64 orig_end; - u64 wait_end; struct btrfs_ordered_extent *ordered; int found; @@ -537,7 +536,6 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) if (orig_end > INT_LIMIT(loff_t)) orig_end = INT_LIMIT(loff_t); } - wait_end = orig_end; again: /* start IO across the range first to instantiate any delalloc * extents diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 2d958be761c8..6a1086e83ffc 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -181,7 +181,6 @@ int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid) { struct btrfs_root *dead_root; - struct btrfs_item *item; struct btrfs_root_item *ri; struct btrfs_key key; struct btrfs_key found_key; @@ -214,7 +213,6 @@ again: nritems = btrfs_header_nritems(leaf); slot = path->slots[0]; } - item = btrfs_item_nr(leaf, slot); btrfs_item_key_to_cpu(leaf, &key, slot); if (btrfs_key_type(&key) != BTRFS_ROOT_ITEM_KEY) goto next; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index d7fb2733d028..0002e6d1a16f 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -61,6 +61,8 @@ static void btrfs_put_super(struct super_block *sb) ret = close_ctree(root); sb->s_fs_info = NULL; + + (void)ret; /* FIXME: need to fix VFS to return error? */ } enum { @@ -445,7 +447,6 @@ static int btrfs_fill_super(struct super_block *sb, { struct inode *inode; struct dentry *root_dentry; - struct btrfs_super_block *disk_super; struct btrfs_root *tree_root; struct btrfs_key key; int err; @@ -467,7 +468,6 @@ static int btrfs_fill_super(struct super_block *sb, return PTR_ERR(tree_root); } sb->s_fs_info = tree_root; - disk_super = &tree_root->fs_info->super_copy; key.objectid = BTRFS_FIRST_FREE_OBJECTID; key.type = BTRFS_INODE_ITEM_KEY; @@ -580,7 +580,6 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags, char *subvol_name = NULL; u64 subvol_objectid = 0; int error = 0; - int found = 0; if (!(flags & MS_RDONLY)) mode |= FMODE_WRITE; @@ -616,7 +615,6 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags, goto error_close_devices; } - found = 1; btrfs_close_devices(fs_devices); } else { char b[BDEVNAME_SIZE]; diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index f7ac8e013ed7..992ab425599d 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -36,7 +36,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, int ret = 0; int wret; int level; - int orig_level; int is_extent = 0; int next_key_ret = 0; u64 last_ret = 0; @@ -64,7 +63,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, return -ENOMEM; level = btrfs_header_level(root->node); - orig_level = level; if (level == 0) goto out; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 224fb5b3daad..a29f19384a27 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -786,7 +786,6 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, { struct inode *dir; int ret; - struct btrfs_key location; struct btrfs_inode_ref *ref; struct btrfs_dir_item *di; struct inode *inode; @@ -795,10 +794,6 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, unsigned long ref_ptr; unsigned long ref_end; - location.objectid = key->objectid; - location.type = BTRFS_INODE_ITEM_KEY; - location.offset = 0; - /* * it is possible that we didn't log all the parent directories * for a given inode. If we don't find the dir, just don't @@ -1583,7 +1578,6 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, struct btrfs_path *path; struct btrfs_root *root = wc->replay_dest; struct btrfs_key key; - u32 item_size; int level; int i; int ret; @@ -1601,7 +1595,6 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, nritems = btrfs_header_nritems(eb); for (i = 0; i < nritems; i++) { btrfs_item_key_to_cpu(eb, &key, i); - item_size = btrfs_item_size_nr(eb, i); /* inode keys are done during the first stage */ if (key.type == BTRFS_INODE_ITEM_KEY && @@ -1668,7 +1661,6 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, struct walk_control *wc) { u64 root_owner; - u64 root_gen; u64 bytenr; u64 ptr_gen; struct extent_buffer *next; @@ -1698,7 +1690,6 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, parent = path->nodes[*level]; root_owner = btrfs_header_owner(parent); - root_gen = btrfs_header_generation(parent); next = btrfs_find_create_tree_block(root, bytenr, blocksize); @@ -1749,7 +1740,6 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans, struct walk_control *wc) { u64 root_owner; - u64 root_gen; int i; int slot; int ret; @@ -1757,8 +1747,6 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans, for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { slot = path->slots[i]; if (slot + 1 < btrfs_header_nritems(path->nodes[i])) { - struct extent_buffer *node; - node = path->nodes[i]; path->slots[i]++; *level = i; WARN_ON(*level == 0); @@ -1771,7 +1759,6 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans, parent = path->nodes[*level + 1]; root_owner = btrfs_header_owner(parent); - root_gen = btrfs_header_generation(parent); wc->process_func(root, path->nodes[*level], wc, btrfs_header_generation(path->nodes[*level])); if (wc->free) { @@ -2729,7 +2716,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, struct btrfs_key max_key; struct btrfs_root *log = root->log_root; struct extent_buffer *src = NULL; - u32 size; int err = 0; int ret; int nritems; @@ -2793,7 +2779,6 @@ again: break; src = path->nodes[0]; - size = btrfs_item_size_nr(src, path->slots[0]); if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) { ins_nr++; goto next_slot; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 28681e729b1d..91851b555e2e 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1901,7 +1901,6 @@ int btrfs_balance(struct btrfs_root *dev_root) u64 size_to_free; struct btrfs_path *path; struct btrfs_key key; - struct btrfs_chunk *chunk; struct btrfs_root *chunk_root = dev_root->fs_info->chunk_root; struct btrfs_trans_handle *trans; struct btrfs_key found_key; @@ -1965,9 +1964,6 @@ int btrfs_balance(struct btrfs_root *dev_root) if (found_key.objectid != key.objectid) break; - chunk = btrfs_item_ptr(path->nodes[0], - path->slots[0], - struct btrfs_chunk); /* chunk zero is special */ if (found_key.offset == 0) break; diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 88ecbb215878..698fdd2c739c 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -178,7 +178,6 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) struct inode *inode = dentry->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_path *path; - struct btrfs_item *item; struct extent_buffer *leaf; struct btrfs_dir_item *di; int ret = 0, slot, advance; @@ -234,7 +233,6 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) } advance = 1; - item = btrfs_item_nr(leaf, slot); btrfs_item_key_to_cpu(leaf, &found_key, slot); /* check to make sure this item is what we want */ diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index 3e2b90eaa239..b9cd5445f71c 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c @@ -199,8 +199,6 @@ int btrfs_zlib_compress_pages(struct address_space *mapping, int nr_pages = 0; struct page *in_page = NULL; struct page *out_page = NULL; - int out_written = 0; - int in_read = 0; unsigned long bytes_left; *out_pages = 0; @@ -233,9 +231,6 @@ int btrfs_zlib_compress_pages(struct address_space *mapping, workspace->def_strm.avail_out = PAGE_CACHE_SIZE; workspace->def_strm.avail_in = min(len, PAGE_CACHE_SIZE); - out_written = 0; - in_read = 0; - while (workspace->def_strm.total_in < len) { ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH); if (ret != Z_OK) { -- cgit v1.2.3-55-g7522 From 4260f7c7516f4c209cf0ca34fda99cc9a0847772 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 29 Oct 2010 15:46:43 -0400 Subject: Btrfs: allow subvol deletion by unprivileged user with -o user_subvol_rm_allowed Add a mount option user_subvol_rm_allowed that allows users to delete a (potentially non-empty!) subvol when they would otherwise we allowed to do an rmdir(2). We duplicate the may_delete() checks from the core VFS code to implement identical security checks (minus the directory size check). We additionally require that the user has write+exec permission on the subvol root inode. Signed-off-by: Sage Weil Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/ioctl.c | 115 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- fs/btrfs/super.c | 5 +++ 3 files changed, 116 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index e5d66b13c175..8db9234f6b41 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1234,6 +1234,7 @@ struct btrfs_root { #define BTRFS_MOUNT_FORCE_COMPRESS (1 << 11) #define BTRFS_MOUNT_SPACE_CACHE (1 << 12) #define BTRFS_MOUNT_CLEAR_CACHE (1 << 13) +#define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14) #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index fdd88f2f1ece..463d91b4dd3a 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -409,6 +409,76 @@ fail: return ret; } +/* copy of check_sticky in fs/namei.c() +* It's inline, so penalty for filesystems that don't use sticky bit is +* minimal. +*/ +static inline int btrfs_check_sticky(struct inode *dir, struct inode *inode) +{ + uid_t fsuid = current_fsuid(); + + if (!(dir->i_mode & S_ISVTX)) + return 0; + if (inode->i_uid == fsuid) + return 0; + if (dir->i_uid == fsuid) + return 0; + return !capable(CAP_FOWNER); +} + +/* copy of may_delete in fs/namei.c() + * Check whether we can remove a link victim from directory dir, check + * whether the type of victim is right. + * 1. We can't do it if dir is read-only (done in permission()) + * 2. We should have write and exec permissions on dir + * 3. We can't remove anything from append-only dir + * 4. We can't do anything with immutable dir (done in permission()) + * 5. If the sticky bit on dir is set we should either + * a. be owner of dir, or + * b. be owner of victim, or + * c. have CAP_FOWNER capability + * 6. If the victim is append-only or immutable we can't do antyhing with + * links pointing to it. + * 7. If we were asked to remove a directory and victim isn't one - ENOTDIR. + * 8. If we were asked to remove a non-directory and victim isn't one - EISDIR. + * 9. We can't remove a root or mountpoint. + * 10. We don't allow removal of NFS sillyrenamed files; it's handled by + * nfs_async_unlink(). + */ + +static int btrfs_may_delete(struct inode *dir,struct dentry *victim,int isdir) +{ + int error; + + if (!victim->d_inode) + return -ENOENT; + + BUG_ON(victim->d_parent->d_inode != dir); + audit_inode_child(victim, dir); + + error = inode_permission(dir, MAY_WRITE | MAY_EXEC); + if (error) + return error; + if (IS_APPEND(dir)) + return -EPERM; + if (btrfs_check_sticky(dir, victim->d_inode)|| + IS_APPEND(victim->d_inode)|| + IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode)) + return -EPERM; + if (isdir) { + if (!S_ISDIR(victim->d_inode->i_mode)) + return -ENOTDIR; + if (IS_ROOT(victim)) + return -EBUSY; + } else if (S_ISDIR(victim->d_inode->i_mode)) + return -EISDIR; + if (IS_DEADDIR(dir)) + return -ENOENT; + if (victim->d_flags & DCACHE_NFSFS_RENAMED) + return -EBUSY; + return 0; +} + /* copy of may_create in fs/namei.c() */ static inline int btrfs_may_create(struct inode *dir, struct dentry *child) { @@ -1274,9 +1344,6 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, int ret; int err = 0; - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - vol_args = memdup_user(arg, sizeof(*vol_args)); if (IS_ERR(vol_args)) return PTR_ERR(vol_args); @@ -1306,13 +1373,51 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, } inode = dentry->d_inode; + dest = BTRFS_I(inode)->root; + if (!capable(CAP_SYS_ADMIN)){ + /* + * Regular user. Only allow this with a special mount + * option, when the user has write+exec access to the + * subvol root, and when rmdir(2) would have been + * allowed. + * + * Note that this is _not_ check that the subvol is + * empty or doesn't contain data that we wouldn't + * otherwise be able to delete. + * + * Users who want to delete empty subvols should try + * rmdir(2). + */ + err = -EPERM; + if (!btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED)) + goto out_dput; + + /* + * Do not allow deletion if the parent dir is the same + * as the dir to be deleted. That means the ioctl + * must be called on the dentry referencing the root + * of the subvol, not a random directory contained + * within it. + */ + err = -EINVAL; + if (root == dest) + goto out_dput; + + err = inode_permission(inode, MAY_WRITE | MAY_EXEC); + if (err) + goto out_dput; + + /* check if subvolume may be deleted by a non-root user */ + err = btrfs_may_delete(dir, dentry, 1); + if (err) + goto out_dput; + } + if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) { err = -EINVAL; goto out_dput; } - dest = BTRFS_I(inode)->root; - mutex_lock(&inode->i_mutex); err = d_invalidate(dentry); if (err) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 0002e6d1a16f..718b10de2049 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -71,6 +71,7 @@ enum { Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress, Opt_compress_force, Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, Opt_space_cache, Opt_clear_cache, Opt_err, + Opt_user_subvol_rm_allowed, }; static match_table_t tokens = { @@ -96,6 +97,7 @@ static match_table_t tokens = { {Opt_discard, "discard"}, {Opt_space_cache, "space_cache"}, {Opt_clear_cache, "clear_cache"}, + {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"}, {Opt_err, NULL}, }; @@ -246,6 +248,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) printk(KERN_INFO "btrfs: force clearing of disk cache\n"); btrfs_set_opt(info->mount_opt, CLEAR_CACHE); break; + case Opt_user_subvol_rm_allowed: + btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED); + break; case Opt_err: printk(KERN_INFO "btrfs: unrecognized mount option " "'%s'\n", p); -- cgit v1.2.3-55-g7522