summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/backref.c22
-rw-r--r--fs/btrfs/compression.c6
-rw-r--r--fs/btrfs/ctree.c36
-rw-r--r--fs/btrfs/ctree.h66
-rw-r--r--fs/btrfs/delayed-inode.c6
-rw-r--r--fs/btrfs/delayed-inode.h2
-rw-r--r--fs/btrfs/delayed-ref.c12
-rw-r--r--fs/btrfs/dev-replace.c132
-rw-r--r--fs/btrfs/dev-replace.h7
-rw-r--r--fs/btrfs/disk-io.c68
-rw-r--r--fs/btrfs/extent-tree.c38
-rw-r--r--fs/btrfs/extent_io.c85
-rw-r--r--fs/btrfs/extent_io.h8
-rw-r--r--fs/btrfs/extent_map.c3
-rw-r--r--fs/btrfs/file-item.c92
-rw-r--r--fs/btrfs/file.c111
-rw-r--r--fs/btrfs/inode.c315
-rw-r--r--fs/btrfs/ioctl.c140
-rw-r--r--fs/btrfs/ordered-data.c3
-rw-r--r--fs/btrfs/print-tree.c23
-rw-r--r--fs/btrfs/reada.c268
-rw-r--r--fs/btrfs/root-tree.c2
-rw-r--r--fs/btrfs/scrub.c30
-rw-r--r--fs/btrfs/send.c36
-rw-r--r--fs/btrfs/super.c45
-rw-r--r--fs/btrfs/transaction.c13
-rw-r--r--fs/btrfs/volumes.c47
-rw-r--r--fs/btrfs/xattr.c2
-rw-r--r--fs/devpts/inode.c20
-rw-r--r--fs/xfs/xfs_log_recover.c4
30 files changed, 994 insertions, 648 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index b90cd3776f8e..80e8472d618b 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -148,8 +148,7 @@ int __init btrfs_prelim_ref_init(void)
void btrfs_prelim_ref_exit(void)
{
- if (btrfs_prelim_ref_cache)
- kmem_cache_destroy(btrfs_prelim_ref_cache);
+ kmem_cache_destroy(btrfs_prelim_ref_cache);
}
/*
@@ -566,17 +565,14 @@ static void __merge_refs(struct list_head *head, int mode)
struct __prelim_ref *pos2 = pos1, *tmp;
list_for_each_entry_safe_continue(pos2, tmp, head, list) {
- struct __prelim_ref *xchg, *ref1 = pos1, *ref2 = pos2;
+ struct __prelim_ref *ref1 = pos1, *ref2 = pos2;
struct extent_inode_elem *eie;
if (!ref_for_same_block(ref1, ref2))
continue;
if (mode == 1) {
- if (!ref1->parent && ref2->parent) {
- xchg = ref1;
- ref1 = ref2;
- ref2 = xchg;
- }
+ if (!ref1->parent && ref2->parent)
+ swap(ref1, ref2);
} else {
if (ref1->parent != ref2->parent)
continue;
@@ -1406,7 +1402,8 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
read_extent_buffer(eb, dest + bytes_left,
name_off, name_len);
if (eb != eb_in) {
- btrfs_tree_read_unlock_blocking(eb);
+ if (!path->skip_locking)
+ btrfs_tree_read_unlock_blocking(eb);
free_extent_buffer(eb);
}
ret = btrfs_find_item(fs_root, path, parent, 0,
@@ -1426,9 +1423,10 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
eb = path->nodes[0];
/* make sure we can use eb after releasing the path */
if (eb != eb_in) {
- atomic_inc(&eb->refs);
- btrfs_tree_read_lock(eb);
- btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
+ if (!path->skip_locking)
+ btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
+ path->nodes[0] = NULL;
+ path->locks[0] = 0;
}
btrfs_release_path(path);
iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index c473c42d7d6c..3346cd8f9910 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -637,11 +637,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
faili = nr_pages - 1;
cb->nr_pages = nr_pages;
- /* In the parent-locked case, we only locked the range we are
- * interested in. In all other cases, we can opportunistically
- * cache decompressed data that goes beyond the requested range. */
- if (!(bio_flags & EXTENT_BIO_PARENT_LOCKED))
- add_ra_bio_pages(inode, em_start + em_len, cb);
+ add_ra_bio_pages(inode, em_start + em_len, cb);
/* include any pages we added in add_ra-bio_pages */
uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE;
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 769e0ff1b4ce..77592931ab4f 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -311,7 +311,7 @@ struct tree_mod_root {
struct tree_mod_elem {
struct rb_node node;
- u64 index; /* shifted logical */
+ u64 logical;
u64 seq;
enum mod_log_op op;
@@ -435,11 +435,11 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
/*
* key order of the log:
- * index -> sequence
+ * node/leaf start address -> sequence
*
- * the index is the shifted logical of the *new* root node for root replace
- * operations, or the shifted logical of the affected block for all other
- * operations.
+ * The 'start address' is the logical address of the *new* root node
+ * for root replace operations, or the logical address of the affected
+ * block for all other operations.
*
* Note: must be called with write lock (tree_mod_log_write_lock).
*/
@@ -460,9 +460,9 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
while (*new) {
cur = container_of(*new, struct tree_mod_elem, node);
parent = *new;
- if (cur->index < tm->index)
+ if (cur->logical < tm->logical)
new = &((*new)->rb_left);
- else if (cur->index > tm->index)
+ else if (cur->logical > tm->logical)
new = &((*new)->rb_right);
else if (cur->seq < tm->seq)
new = &((*new)->rb_left);
@@ -523,7 +523,7 @@ alloc_tree_mod_elem(struct extent_buffer *eb, int slot,
if (!tm)
return NULL;
- tm->index = eb->start >> PAGE_CACHE_SHIFT;
+ tm->logical = eb->start;
if (op != MOD_LOG_KEY_ADD) {
btrfs_node_key(eb, &tm->key, slot);
tm->blockptr = btrfs_node_blockptr(eb, slot);
@@ -588,7 +588,7 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
goto free_tms;
}
- tm->index = eb->start >> PAGE_CACHE_SHIFT;
+ tm->logical = eb->start;
tm->slot = src_slot;
tm->move.dst_slot = dst_slot;
tm->move.nr_items = nr_items;
@@ -699,7 +699,7 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
goto free_tms;
}
- tm->index = new_root->start >> PAGE_CACHE_SHIFT;
+ tm->logical = new_root->start;
tm->old_root.logical = old_root->start;
tm->old_root.level = btrfs_header_level(old_root);
tm->generation = btrfs_header_generation(old_root);
@@ -739,16 +739,15 @@ __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq,
struct rb_node *node;
struct tree_mod_elem *cur = NULL;
struct tree_mod_elem *found = NULL;
- u64 index = start >> PAGE_CACHE_SHIFT;
tree_mod_log_read_lock(fs_info);
tm_root = &fs_info->tree_mod_log;
node = tm_root->rb_node;
while (node) {
cur = container_of(node, struct tree_mod_elem, node);
- if (cur->index < index) {
+ if (cur->logical < start) {
node = node->rb_left;
- } else if (cur->index > index) {
+ } else if (cur->logical > start) {
node = node->rb_right;
} else if (cur->seq < min_seq) {
node = node->rb_left;
@@ -1230,9 +1229,10 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
return NULL;
/*
- * the very last operation that's logged for a root is the replacement
- * operation (if it is replaced at all). this has the index of the *new*
- * root, making it the very first operation that's logged for this root.
+ * the very last operation that's logged for a root is the
+ * replacement operation (if it is replaced at all). this has
+ * the logical address of the *new* root, making it the very
+ * first operation that's logged for this root.
*/
while (1) {
tm = tree_mod_log_search_oldest(fs_info, root_logical,
@@ -1336,7 +1336,7 @@ __tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
if (!next)
break;
tm = container_of(next, struct tree_mod_elem, node);
- if (tm->index != first_tm->index)
+ if (tm->logical != first_tm->logical)
break;
}
tree_mod_log_read_unlock(fs_info);
@@ -5361,7 +5361,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
goto out;
}
- tmp_buf = kmalloc(left_root->nodesize, GFP_NOFS);
+ tmp_buf = kmalloc(left_root->nodesize, GFP_KERNEL);
if (!tmp_buf) {
ret = -ENOMEM;
goto out;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 6661ad8b4088..c3e634924149 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -100,6 +100,9 @@ struct btrfs_ordered_sum;
/* tracks free space in block groups. */
#define BTRFS_FREE_SPACE_TREE_OBJECTID 10ULL
+/* device stats in the device tree */
+#define BTRFS_DEV_STATS_OBJECTID 0ULL
+
/* for storing balance parameters in the root tree */
#define BTRFS_BALANCE_OBJECTID -4ULL
@@ -1002,8 +1005,10 @@ struct btrfs_dev_replace {
pid_t lock_owner;
atomic_t nesting_level;
struct mutex lock_finishing_cancel_unmount;
- struct mutex lock_management_lock;
- struct mutex lock;
+ rwlock_t lock;
+ atomic_t read_locks;
+ atomic_t blocking_readers;
+ wait_queue_head_t read_lock_wq;
struct btrfs_scrub_progress scrub_progress;
};
@@ -1822,6 +1827,9 @@ struct btrfs_fs_info {
spinlock_t reada_lock;
struct radix_tree_root reada_tree;
+ /* readahead works cnt */
+ atomic_t reada_works_cnt;
+
/* Extent buffer radix tree */
spinlock_t buffer_lock;
struct radix_tree_root buffer_radix;
@@ -2185,13 +2193,43 @@ struct btrfs_ioctl_defrag_range_args {
*/
#define BTRFS_QGROUP_RELATION_KEY 246
+/*
+ * Obsolete name, see BTRFS_TEMPORARY_ITEM_KEY.
+ */
#define BTRFS_BALANCE_ITEM_KEY 248
/*
- * Persistantly stores the io stats in the device tree.
- * One key for all stats, (0, BTRFS_DEV_STATS_KEY, devid).
+ * The key type for tree items that are stored persistently, but do not need to
+ * exist for extended period of time. The items can exist in any tree.
+ *
+ * [subtype, BTRFS_TEMPORARY_ITEM_KEY, data]
+ *
+ * Existing items:
+ *
+ * - balance status item
+ * (BTRFS_BALANCE_OBJECTID, BTRFS_TEMPORARY_ITEM_KEY, 0)
*/
-#define BTRFS_DEV_STATS_KEY 249
+#define BTRFS_TEMPORARY_ITEM_KEY 248
+
+/*
+ * Obsolete name, see BTRFS_PERSISTENT_ITEM_KEY
+ */
+#define BTRFS_DEV_STATS_KEY 249
+
+/*
+ * The key type for tree items that are stored persistently and usually exist
+ * for a long period, eg. filesystem lifetime. The item kinds can be status
+ * information, stats or preference values. The item can exist in any tree.
+ *
+ * [subtype, BTRFS_PERSISTENT_ITEM_KEY, data]
+ *
+ * Existing items:
+ *
+ * - device statistics, store IO stats in the device tree, one key for all
+ * stats
+ * (BTRFS_DEV_STATS_OBJECTID, BTRFS_DEV_STATS_KEY, 0)
+ */
+#define BTRFS_PERSISTENT_ITEM_KEY 249
/*
* Persistantly stores the device replace state in the device tree.
@@ -2241,7 +2279,7 @@ struct btrfs_ioctl_defrag_range_args {
#define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15)
#define BTRFS_MOUNT_AUTO_DEFRAG (1 << 16)
#define BTRFS_MOUNT_INODE_MAP_CACHE (1 << 17)
-#define BTRFS_MOUNT_RECOVERY (1 << 18)
+#define BTRFS_MOUNT_USEBACKUPROOT (1 << 18)
#define BTRFS_MOUNT_SKIP_BALANCE (1 << 19)
#define BTRFS_MOUNT_CHECK_INTEGRITY (1 << 20)
#define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21)
@@ -2250,6 +2288,7 @@ struct btrfs_ioctl_defrag_range_args {
#define BTRFS_MOUNT_FRAGMENT_DATA (1 << 24)
#define BTRFS_MOUNT_FRAGMENT_METADATA (1 << 25)
#define BTRFS_MOUNT_FREE_SPACE_TREE (1 << 26)
+#define BTRFS_MOUNT_NOLOGREPLAY (1 << 27)
#define BTRFS_DEFAULT_COMMIT_INTERVAL (30)
#define BTRFS_DEFAULT_MAX_INLINE (2048)
@@ -2353,6 +2392,9 @@ struct btrfs_map_token {
unsigned long offset;
};
+#define BTRFS_BYTES_TO_BLKS(fs_info, bytes) \
+ ((bytes) >> (fs_info)->sb->s_blocksize_bits)
+
static inline void btrfs_init_map_token (struct btrfs_map_token *token)
{
token->kaddr = NULL;
@@ -3448,8 +3490,7 @@ u64 btrfs_csum_bytes_to_leaves(struct btrfs_root *root, u64 csum_bytes);
static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root,
unsigned num_items)
{
- return (root->nodesize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) *
- 2 * num_items;
+ return root->nodesize * BTRFS_MAX_LEVEL * 2 * num_items;
}
/*
@@ -4027,7 +4068,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct inode *dir, u64 objectid,
const char *name, int name_len);
-int btrfs_truncate_page(struct inode *inode, loff_t from, loff_t len,
+int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
int front);
int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
@@ -4151,7 +4192,8 @@ void btrfs_sysfs_remove_mounted(struct btrfs_fs_info *fs_info);
ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
/* super.c */
-int btrfs_parse_options(struct btrfs_root *root, char *options);
+int btrfs_parse_options(struct btrfs_root *root, char *options,
+ unsigned long new_flags);
int btrfs_sync_fs(struct super_block *sb, int wait);
#ifdef CONFIG_PRINTK
@@ -4525,8 +4567,8 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
struct btrfs_key *start, struct btrfs_key *end);
int btrfs_reada_wait(void *handle);
void btrfs_reada_detach(void *handle);
-int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
- u64 start, int err);
+int btree_readahead_hook(struct btrfs_fs_info *fs_info,
+ struct extent_buffer *eb, u64 start, int err);
static inline int is_fstree(u64 rootid)
{
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 0be47e4b8136..a20d541bb190 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -43,8 +43,7 @@ int __init btrfs_delayed_inode_init(void)
void btrfs_delayed_inode_exit(void)
{
- if (delayed_node_cache)
- kmem_cache_destroy(delayed_node_cache);
+ kmem_cache_destroy(delayed_node_cache);
}
static inline void btrfs_init_delayed_node(
@@ -1689,7 +1688,7 @@ int btrfs_should_delete_dir_index(struct list_head *del_list,
*
*/
int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
- struct list_head *ins_list)
+ struct list_head *ins_list, bool *emitted)
{
struct btrfs_dir_item *di;
struct btrfs_delayed_item *curr, *next;
@@ -1733,6 +1732,7 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
if (over)
return 1;
+ *emitted = true;
}
return 0;
}
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index f70119f25421..0167853c84ae 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -144,7 +144,7 @@ void btrfs_put_delayed_items(struct list_head *ins_list,
int btrfs_should_delete_dir_index(struct list_head *del_list,
u64 index);
int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
- struct list_head *ins_list);
+ struct list_head *ins_list, bool *emitted);
/* for init */
int __init btrfs_delayed_inode_init(void);
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 914ac13bd92f..430b3689b112 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -929,14 +929,10 @@ btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr)
void btrfs_delayed_ref_exit(void)
{
- if (btrfs_delayed_ref_head_cachep)
- kmem_cache_destroy(btrfs_delayed_ref_head_cachep);
- if (btrfs_delayed_tree_ref_cachep)
- kmem_cache_destroy(btrfs_delayed_tree_ref_cachep);
- if (btrfs_delayed_data_ref_cachep)
- kmem_cache_destroy(btrfs_delayed_data_ref_cachep);
- if (btrfs_delayed_extent_op_cachep)
- kmem_cache_destroy(btrfs_delayed_extent_op_cachep);
+ kmem_cache_destroy(btrfs_delayed_ref_head_cachep);
+ kmem_cache_destroy(btrfs_delayed_tree_ref_cachep);
+ kmem_cache_destroy(btrfs_delayed_data_ref_cachep);
+ kmem_cache_destroy(btrfs_delayed_extent_op_cachep);
}
int btrfs_delayed_ref_init(void)
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index cbb7dbfb3fff..ff2db7a6c894 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -202,13 +202,13 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
struct btrfs_dev_replace_item *ptr;
struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
- btrfs_dev_replace_lock(dev_replace);
+ btrfs_dev_replace_lock(dev_replace, 0);
if (!dev_replace->is_valid ||
!dev_replace->item_needs_writeback) {
- btrfs_dev_replace_unlock(dev_replace);
+ btrfs_dev_replace_unlock(dev_replace, 0);
return 0;
}
- btrfs_dev_replace_unlock(dev_replace);
+ btrfs_dev_replace_unlock(dev_replace, 0);
key.objectid = 0;
key.type = BTRFS_DEV_REPLACE_KEY;
@@ -264,7 +264,7 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
ptr = btrfs_item_ptr(eb, path->slots[0],
struct btrfs_dev_replace_item);
- btrfs_dev_replace_lock(dev_replace);
+ btrfs_dev_replace_lock(dev_replace, 1);
if (dev_replace->srcdev)
btrfs_set_dev_replace_src_devid(eb, ptr,
dev_replace->srcdev->devid);
@@ -287,7 +287,7 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
btrfs_set_dev_replace_cursor_right(eb, ptr,
dev_replace->cursor_right);
dev_replace->item_needs_writeback = 0;
- btrfs_dev_replace_unlock(dev_replace);
+ btrfs_dev_replace_unlock(dev_replace, 1);
btrfs_mark_buffer_dirty(eb);
@@ -356,7 +356,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
return PTR_ERR(trans);
}
- btrfs_dev_replace_lock(dev_replace);
+ btrfs_dev_replace_lock(dev_replace, 1);
switch (dev_replace->replace_state) {
case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
@@ -395,7 +395,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
dev_replace->is_valid = 1;
dev_replace->item_needs_writeback = 1;
args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR;
- btrfs_dev_replace_unlock(dev_replace);
+ btrfs_dev_replace_unlock(dev_replace, 1);
ret = btrfs_sysfs_add_device_link(tgt_device->fs_devices, tgt_device);
if (ret)
@@ -407,7 +407,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
- btrfs_dev_replace_lock(dev_replace);
+ btrfs_dev_replace_lock(dev_replace, 1);
goto leave;
}
@@ -433,7 +433,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
leave:
dev_replace->srcdev = NULL;
dev_replace->tgtdev = NULL;
- btrfs_dev_replace_unlock(dev_replace);
+ btrfs_dev_replace_unlock(dev_replace, 1);
btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device);
return ret;
}
@@ -471,18 +471,18 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
/* don't allow cancel or unmount to disturb the finishing procedure */
mutex_lock(&dev_replace->lock_finishing_cancel_unmount);
- btrfs_dev_replace_lock(dev_replace);
+ btrfs_dev_replace_lock(dev_replace, 0);
/* was the operation canceled, or is it finished? */
if (dev_replace->replace_state !=
BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED) {
- btrfs_dev_replace_unlock(dev_replace);
+ btrfs_dev_replace_unlock(dev_replace, 0);
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
return 0;
}
tgt_device = dev_replace->tgtdev;
src_device = dev_replace->srcdev;
- btrfs_dev_replace_unlock(dev_replace);
+ btrfs_dev_replace_unlock(dev_replace, 0);
/*
* flush all outstanding I/O and inode extent mappings before the
@@ -507,7 +507,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
/* keep away write_all_supers() during the finishing procedure */
mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
mutex_lock(&root->fs_info->chunk_mutex);
- btrfs_dev_replace_lock(dev_replace);
+ btrfs_dev_replace_lock(dev_replace, 1);
dev_replace->replace_state =
scrub_ret ? BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED
: BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED;
@@ -528,7 +528,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
rcu_str_deref(src_device->name),
src_device->devid,
rcu_str_deref(tgt_device->name), scrub_ret);
- btrfs_dev_replace_unlock(dev_replace);
+ btrfs_dev_replace_unlock(dev_replace, 1);
mutex_unlock(&root->fs_info->chunk_mutex);
mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
mutex_unlock(&uuid_mutex);
@@ -565,7 +565,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list);
fs_info->fs_devices->rw_devices++;
- btrfs_dev_replace_unlock(dev_replace);
+ btrfs_dev_replace_unlock(dev_replace, 1);
btrfs_rm_dev_replace_blocked(fs_info);
@@ -649,7 +649,7 @@ void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info,
struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
struct btrfs_device *srcdev;
- btrfs_dev_replace_lock(dev_replace);
+ btrfs_dev_replace_lock(dev_replace, 0);
/* even if !dev_replace_is_valid, the values are good enough for
* the replace_status ioctl */
args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR;
@@ -675,7 +675,7 @@ void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info,
div_u64(btrfs_device_get_total_bytes(srcdev), 1000));
break;
}
- btrfs_dev_replace_unlock(dev_replace);
+ btrfs_dev_replace_unlock(dev_replace, 0);
}
int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info,
@@ -698,13 +698,13 @@ static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
return -EROFS;
mutex_lock(&dev_replace->lock_finishing_cancel_unmount);
- btrfs_dev_replace_lock(dev_replace);
+ btrfs_dev_replace_lock(dev_replace, 1);
switch (dev_replace->replace_state) {
case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED:
result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED;
- btrfs_dev_replace_unlock(dev_replace);
+ btrfs_dev_replace_unlock(dev_replace, 1);
goto leave;
case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
@@ -717,7 +717,7 @@ static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED;
dev_replace->time_stopped = get_seconds();
dev_replace->item_needs_writeback = 1;
- btrfs_dev_replace_unlock(dev_replace);
+ btrfs_dev_replace_unlock(dev_replace, 1);
btrfs_scrub_cancel(fs_info);
trans = btrfs_start_transaction(root, 0);
@@ -740,7 +740,7 @@ void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info)
struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
mutex_lock(&dev_replace->lock_finishing_cancel_unmount);
- btrfs_dev_replace_lock(dev_replace);
+ btrfs_dev_replace_lock(dev_replace, 1);
switch (dev_replace->replace_state) {
case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
@@ -756,7 +756,7 @@ void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info)
break;
}
- btrfs_dev_replace_unlock(dev_replace);
+ btrfs_dev_replace_unlock(dev_replace, 1);
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
}
@@ -766,12 +766,12 @@ int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info)
struct task_struct *task;
struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
- btrfs_dev_replace_lock(dev_replace);
+ btrfs_dev_replace_lock(dev_replace, 1);
switch (dev_replace->replace_state) {
case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED:
- btrfs_dev_replace_unlock(dev_replace);
+ btrfs_dev_replace_unlock(dev_replace, 1);
return 0;
case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
break;
@@ -784,10 +784,10 @@ int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info)
btrfs_info(fs_info, "cannot continue dev_replace, tgtdev is missing");
btrfs_info(fs_info,
"you may cancel the operation after 'mount -o degraded'");
- btrfs_dev_replace_unlock(dev_replace);
+ btrfs_dev_replace_unlock(dev_replace, 1);
return 0;
}
- btrfs_dev_replace_unlock(dev_replace);
+ btrfs_dev_replace_unlock(dev_replace, 1);
WARN_ON(atomic_xchg(
&fs_info->mutually_exclusive_operation_running, 1));
@@ -802,7 +802,7 @@ static int btrfs_dev_replace_kthread(void *data)
struct btrfs_ioctl_dev_replace_args *status_args;
u64 progress;
- status_args = kzalloc(sizeof(*status_args), GFP_NOFS);
+ status_args = kzalloc(sizeof(*status_args), GFP_KERNEL);
if (status_args) {
btrfs_dev_replace_status(fs_info, status_args);
progress = status_args->status.progress_1000;
@@ -865,48 +865,58 @@ int btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace)
return 1;
}
-void btrfs_dev_replace_lock(struct btrfs_dev_replace *dev_replace)
+void btrfs_dev_replace_lock(struct btrfs_dev_replace *dev_replace, int rw)
{
- /* the beginning is just an optimization for the typical case */
- if (atomic_read(&dev_replace->nesting_level) == 0) {
-acquire_lock:
- /* this is not a nested case where the same thread
- * is trying to acqurire the same lock twice */
- mutex_lock(&dev_replace->lock);
- mutex_lock(&dev_replace->lock_management_lock);
- dev_replace->lock_owner = current->pid;
- atomic_inc(&dev_replace->nesting_level);
- mutex_unlock(&dev_replace->lock_management_lock);
- return;
+ if (rw == 1) {
+ /* write */
+again:
+ wait_event(dev_replace->read_lock_wq,
+ atomic_read(&dev_replace->blocking_readers) == 0);
+ write_lock(&dev_replace->lock);
+ if (atomic_read(&dev_replace->blocking_readers)) {
+ write_unlock(&dev_replace->lock);
+ goto again;
+ }
+ } else {
+ read_lock(&dev_replace->lock);
+ atomic_inc(&dev_replace->read_locks);
}
+}
- mutex_lock(&dev_replace->lock_management_lock);
- if (atomic_read(&dev_replace->nesting_level) > 0 &&
- dev_replace->lock_owner == current->pid) {
- WARN_ON(!mutex_is_locked(&dev_replace->lock));
- atomic_inc(&dev_replace->nesting_level);
- mutex_unlock(&dev_replace->lock_management_lock);
- return;
+void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace, int rw)
+{
+ if (rw == 1) {
+ /* write */
+ ASSERT(atomic_read(&dev_replace->blocking_readers) == 0);
+ write_unlock(&dev_replace->lock);
+ } else {
+ ASSERT(atomic_read(&dev_replace->read_locks) > 0);
+ atomic_dec(&dev_replace->read_locks);
+ read_unlock(&dev_replace->lock);
}
+}
- mutex_unlock(&dev_replace->lock_management_lock);
- goto acquire_lock;
+/* inc blocking cnt and release read lock */
+void btrfs_dev_replace_set_lock_blocking(
+ struct btrfs_dev_replace *dev_replace)
+{
+ /* only set blocking for read lock */
+ ASSERT(atomic_read(&dev_replace->read_locks) > 0);
+ atomic_inc(&dev_replace->blocking_readers);
+ read_unlock(&dev_replace->lock);
}
-void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace)
+/* acquire read lock and dec blocking cnt */
+void btrfs_dev_replace_clear_lock_blocking(
+ struct btrfs_dev_replace *dev_replace)
{
- WARN_ON(!mutex_is_locked(&dev_replace->lock));
- mutex_lock(&dev_replace->lock_management_lock);
- WARN_ON(atomic_read(&dev_replace->nesting_level) < 1);
- WARN_ON(dev_replace->lock_owner != current->pid);
- atomic_dec(&dev_replace->nesting_level);
- if (atomic_read(&dev_replace->nesting_level) == 0) {
- dev_replace->lock_owner = 0;
- mutex_unlock(&dev_replace->lock_management_lock);
- mutex_unlock(&dev_replace->lock);
- } else {
- mutex_unlock(&dev_replace->lock_management_lock);
- }
+ /* only set blocking for read lock */
+ ASSERT(atomic_read(&dev_replace->read_locks) > 0);
+ ASSERT(atomic_read(&dev_replace->blocking_readers) > 0);
+ read_lock(&dev_replace->lock);
+ if (atomic_dec_and_test(&dev_replace->blocking_readers) &&
+ waitqueue_active(&dev_replace->read_lock_wq))
+ wake_up(&dev_replace->read_lock_wq);
}
void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info)
diff --git a/fs/btrfs/dev-replace.h b/fs/btrfs/dev-replace.h
index 20035cbbf021..29e3ef5f96bd 100644
--- a/fs/btrfs/dev-replace.h
+++ b/fs/btrfs/dev-replace.h
@@ -34,8 +34,11 @@ int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info,
void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info);
int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info);
int btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace);
-void btrfs_dev_replace_lock(struct btrfs_dev_replace *dev_replace);
-void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace);
+void btrfs_dev_replace_lock(struct btrfs_dev_replace *dev_replace, int rw);
+void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace, int rw);
+void btrfs_dev_replace_set_lock_blocking(struct btrfs_dev_replace *dev_replace);
+void btrfs_dev_replace_clear_lock_blocking(
+ struct btrfs_dev_replace *dev_replace);
static inline void btrfs_dev_replace_stats_inc(atomic64_t *stat_value)
{
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 4545e2e2ad45..a998ef15ec6d 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -110,8 +110,7 @@ int __init btrfs_end_io_wq_init(void)
void btrfs_end_io_wq_exit(void)
{
- if (btrfs_end_io_wq_cache)
- kmem_cache_destroy(btrfs_end_io_wq_cache);
+ kmem_cache_destroy(btrfs_end_io_wq_cache);
}
/*
@@ -612,6 +611,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
int found_level;
struct extent_buffer *eb;
struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
+ struct btrfs_fs_info *fs_info = root->fs_info;
int ret = 0;
int reads_done;
@@ -637,21 +637,21 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
found_start = btrfs_header_bytenr(eb);
if (found_start != eb->start) {
- btrfs_err_rl(eb->fs_info, "bad tree block start %llu %llu",
- found_start, eb->start);
+ btrfs_err_rl(fs_info, "bad tree block start %llu %llu",
+ found_start, eb->start);
ret = -EIO;
goto err;
}
- if (check_tree_block_fsid(root->fs_info, eb)) {
- btrfs_err_rl(eb->fs_info, "bad fsid on block %llu",
- eb->start);
+ if (check_tree_block_fsid(fs_info, eb)) {
+ btrfs_err_rl(fs_info, "bad fsid on block %llu",
+ eb->start);
ret = -EIO;
goto err;
}
found_level = btrfs_header_level(eb);
if (found_level >= BTRFS_MAX_LEVEL) {
- btrfs_err(root->fs_info, "bad tree block level %d",
- (int)btrfs_header_level(eb));
+ btrfs_err(fs_info, "bad tree block level %d",
+ (int)btrfs_header_level(eb));
ret = -EIO;
goto err;
}
@@ -659,7 +659,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb),
eb, found_level);
- ret = csum_tree_block(root->fs_info, eb, 1);
+ ret = csum_tree_block(fs_info, eb, 1);
if (ret) {
ret = -EIO;
goto err;
@@ -680,7 +680,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
err:
if (reads_done &&
test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
- btree_readahead_hook(root, eb, eb->start, ret);
+ btree_readahead_hook(fs_info, eb, eb->start, ret);
if (ret) {
/*
@@ -699,14 +699,13 @@ out:
static int btree_io_failed_hook(struct page *page, int failed_mirror)
{
struct extent_buffer *eb;
- struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
eb = (struct extent_buffer *)page->private;
set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
eb->read_mirror = failed_mirror;
atomic_dec(&eb->io_pages);
if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
- btree_readahead_hook(root, eb, eb->start, -EIO);
+ btree_readahead_hook(eb->fs_info, eb, eb->start, -EIO);
return -EIO; /* we fixed nothing */
}
@@ -1296,9 +1295,10 @@ static void __setup_root(u32 nodesize, u32 sectorsize, u32 stripesize,
spin_lock_init(&root->root_item_lock);
}
-static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info)
+static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info,
+ gfp_t flags)
{
- struct btrfs_root *root = kzalloc(sizeof(*root), GFP_NOFS);
+ struct btrfs_root *root = kzalloc(sizeof(*root), flags);
if (root)
root->fs_info = fs_info;
return root;
@@ -1310,7 +1310,7 @@ struct btrfs_root *btrfs_alloc_dummy_root(void)
{
struct btrfs_root *root;
- root = btrfs_alloc_root(NULL);
+ root = btrfs_alloc_root(NULL, GFP_KERNEL);
if (!root)
return ERR_PTR(-ENOMEM);
__setup_root(4096, 4096, 4096, root, NULL, 1);
@@ -1332,7 +1332,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
int ret = 0;
uuid_le uuid;
- root = btrfs_alloc_root(fs_info);
+ root = btrfs_alloc_root(fs_info, GFP_KERNEL);
if (!root)
return ERR_PTR(-ENOMEM);
@@ -1408,7 +1408,7 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
struct btrfs_root *tree_root = fs_info->tree_root;
struct extent_buffer *leaf;
- root = btrfs_alloc_root(fs_info);
+ root = btrfs_alloc_root(fs_info, GFP_NOFS);
if (!root)
return ERR_PTR(-ENOMEM);
@@ -1506,7 +1506,7 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
if (!path)
return ERR_PTR(-ENOMEM);
- root = btrfs_alloc_root(fs_info);
+ root = btrfs_alloc_root(fs_info, GFP_NOFS);
if (!root) {
ret = -ENOMEM;
goto alloc_fail;
@@ -2272,9 +2272,11 @@ static void btrfs_init_dev_replace_locks(struct btrfs_fs_info *fs_info)
fs_info->dev_replace.lock_owner = 0;
atomic_set(&fs_info->dev_replace.nesting_level, 0);
mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount);
- mutex_init(&fs_info->dev_replace.lock_management_lock);
- mutex_init(&fs_info->dev_replace.lock);
+ rwlock_init(&fs_info->dev_replace.lock);
+ atomic_set(&fs_info->dev_replace.read_locks, 0);
+ atomic_set(&fs_info->dev_replace.blocking_readers, 0);
init_waitqueue_head(&fs_info->replace_wait);
+ init_waitqueue_head(&fs_info->dev_replace.read_lock_wq);
}
static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info)
@@ -2385,7 +2387,7 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
return -EIO;
}
- log_tree_root = btrfs_alloc_root(fs_info);
+ log_tree_root = btrfs_alloc_root(fs_info, GFP_KERNEL);
if (!log_tree_root)
return -ENOMEM;
@@ -2510,8 +2512,8 @@ int open_ctree(struct super_block *sb,
int backup_index = 0;
int max_active;
- tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info);
- chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info);
+ tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info, GFP_KERNEL);
+ chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info, GFP_KERNEL);
if (!tree_root || !chunk_root) {
err = -ENOMEM;
goto fail;
@@ -2603,6 +2605,7 @@ int open_ctree(struct super_block *sb,
atomic_set(&fs_info->nr_async_bios, 0);
atomic_set(&fs_info->defrag_running, 0);
atomic_set(&fs_info->qgroup_op_seq, 0);
+ atomic_set(&fs_info->reada_works_cnt, 0);
atomic64_set(&fs_info->tree_mod_seq, 0);
fs_info->sb = sb;
fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE;
@@ -2622,7 +2625,7 @@ int open_ctree(struct super_block *sb,
INIT_LIST_HEAD(&fs_info->ordered_roots);
spin_lock_init(&fs_info->ordered_root_lock);
fs_info->delayed_root = kmalloc(sizeof(struct btrfs_delayed_root),
- GFP_NOFS);
+ GFP_KERNEL);
if (!fs_info->delayed_root) {
err = -ENOMEM;
goto fail_iput;
@@ -2750,7 +2753,7 @@ int open_ctree(struct super_block *sb,
*/
fs_info->compress_type = BTRFS_COMPRESS_ZLIB;
- ret = btrfs_parse_options(tree_root, options);
+ ret = btrfs_parse_options(tree_root, options, sb->s_flags);
if (ret) {
err = ret;
goto fail_alloc;
@@ -3029,8 +3032,9 @@ retry_root_backup:
if (ret)
goto fail_trans_kthread;
- /* do not make disk changes in broken FS */
- if (btrfs_super_log_root(disk_super) != 0) {
+ /* do not make disk changes in broken FS or nologreplay is given */
+ if (btrfs_super_log_root(disk_super) != 0 &&
+ !btrfs_test_opt(tree_root, NOLOGREPLAY)) {
ret = btrfs_replay_log(fs_info, fs_devices);
if (ret) {
err = ret;
@@ -3146,6 +3150,12 @@ retry_root_backup:
fs_info->open = 1;
+ /*
+ * backuproot only affect mount behavior, and if open_ctree succeeded,
+ * no need to keep the flag
+ */
+ btrfs_clear_opt(fs_info->mount_opt, USEBACKUPROOT);
+
return 0;
fail_qgroup:
@@ -3200,7 +3210,7 @@ fail:
return err;
recovery_tree_root:
- if (!btrfs_test_opt(tree_root, RECOVERY))
+ if (!btrfs_test_opt(tree_root, USEBACKUPROOT))
goto fail_tree_roots;
free_root_pointers(fs_info, 0);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index e2287c7c10be..083783b53536 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4838,7 +4838,7 @@ static inline int need_do_async_reclaim(struct btrfs_space_info *space_info,
u64 thresh = div_factor_fine(space_info->total_bytes, 98);
/* If we're just plain full then async reclaim just slows us down. */
- if (space_info->bytes_used >= thresh)
+ if ((space_info->bytes_used + space_info->bytes_reserved) >= thresh)
return 0;
return (used >= thresh && !btrfs_fs_closing(fs_info) &&
@@ -5373,27 +5373,33 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
block_rsv->size = min_t(u64, num_bytes, SZ_512M);
- num_bytes = sinfo->bytes_used + sinfo->bytes_pinned +
- sinfo->bytes_reserved + sinfo->bytes_readonly +
- sinfo->bytes_may_use;
-
- if (sinfo->total_bytes > num_bytes) {
- num_bytes = sinfo->total_bytes - num_bytes;
- block_rsv->reserved += num_bytes;
- sinfo->bytes_may_use += num_bytes;
- trace_btrfs_space_reservation(fs_info, "space_info",
- sinfo->flags, num_bytes, 1);
- }
-
- if (block_rsv->reserved >= block_rsv->size) {
+ if (block_rsv->reserved < block_rsv->size) {
+ num_bytes = sinfo->bytes_used + sinfo->bytes_pinned +
+ sinfo->bytes_reserved + sinfo->bytes_readonly +
+ sinfo->bytes_may_use;
+ if (sinfo->total_bytes > num_bytes) {
+ num_bytes = sinfo->total_bytes - num_bytes;
+ num_bytes = min(num_bytes,
+ block_rsv->size - block_rsv->reserved);
+ block_rsv->reserved += num_bytes;
+ sinfo->bytes_may_use += num_bytes;
+ trace_btrfs_space_reservation(fs_info, "space_info",
+ sinfo->flags, num_bytes,
+ 1);
+ }
+ } else if (block_rsv->reserved > block_rsv->size) {
num_bytes = block_rsv->reserved - block_rsv->size;
sinfo->bytes_may_use -= num_bytes;
trace_btrfs_space_reservation(fs_info, "space_info",
sinfo->flags, num_bytes, 0);
block_rsv->reserved = block_rsv->size;
- block_rsv->full = 1;
}
+ if (block_rsv->reserved == block_rsv->size)
+ block_rsv->full = 1;
+ else
+ block_rsv->full = 0;
+
spin_unlock(&block_rsv->lock);
spin_unlock(&sinfo->lock);
}
@@ -7018,7 +7024,7 @@ btrfs_lock_cluster(struct btrfs_block_group_cache *block_group,
struct btrfs_free_cluster *cluster,
int delalloc)
{
- struct btrfs_block_group_cache *used_bg;
+ struct btrfs_block_group_cache *used_bg = NULL;
bool locked = false;
again:
spin_lock(&cluster->refill_lock);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 2e7c97a3f344..76a0c8597d98 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -206,10 +206,8 @@ void extent_io_exit(void)
* destroy caches.
*/
rcu_barrier();
- if (extent_state_cache)
- kmem_cache_destroy(extent_state_cache);
- if (extent_buffer_cache)
- kmem_cache_destroy(extent_buffer_cache);
+ kmem_cache_destroy(extent_state_cache);
+ kmem_cache_destroy(extent_buffer_cache);
if (btrfs_bioset)
bioset_free(btrfs_bioset);
}
@@ -232,7 +230,7 @@ static struct extent_state *alloc_extent_state(gfp_t mask)
if (!state)
return state;
state->state = 0;
- state->private = 0;
+ state->failrec = NULL;
RB_CLEAR_NODE(&state->rb_node);
btrfs_leak_debug_add(&state->leak_list, &states);
atomic_set(&state->refs, 1);
@@ -1844,7 +1842,8 @@ out:
* set the private field for a given byte offset in the tree. If there isn't
* an extent_state there already, this does nothing.
*/
-static int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
+static noinline int set_state_failrec(struct extent_io_tree *tree, u64 start,
+ struct io_failure_record *failrec)
{
struct rb_node *node;
struct extent_state *state;
@@ -1865,13 +1864,14 @@ static int set_state_private(struct extent_io_tree *tree, u64 start, u64 private
ret = -ENOENT;
goto out;
}
- state->private = private;
+ state->failrec = failrec;
out:
spin_unlock(&tree->lock);
return ret;
}
-int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
+static noinline int get_state_failrec(struct extent_io_tree *tree, u64 start,
+ struct io_failure_record **failrec)
{
struct rb_node *node;
struct extent_state *state;
@@ -1892,7 +1892,7 @@ int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
ret = -ENOENT;
goto out;
}
- *private = state->private;
+ *failrec = state->failrec;
out:
spin_unlock(&tree->lock);
return ret;
@@ -1972,7 +1972,7 @@ int free_io_failure(struct inode *inode, struct io_failure_record *rec)
int err = 0;
struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
- set_state_private(failure_tree, rec->start, 0);
+ set_state_failrec(failure_tree, rec->start, NULL);
ret = clear_extent_bits(failure_tree, rec->start,
rec->start + rec->len - 1,
EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS);
@@ -2089,7 +2089,6 @@ int clean_io_failure(struct inode *inode, u64 start, struct page *page,
unsigned int pg_offset)
{
u64 private;
- u64 private_failure;
struct io_failure_record *failrec;
struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
struct extent_state *state;
@@ -2102,12 +2101,11 @@ int clean_io_failure(struct inode *inode, u64 start, struct page *page,
if (!ret)
return 0;
- ret = get_state_private(&BTRFS_I(inode)->io_failure_tree, start,
- &private_failure);
+ ret = get_state_failrec(&BTRFS_I(inode)->io_failure_tree, start,
+ &failrec);
if (ret)
return 0;
- failrec = (struct io_failure_record *)(unsigned long) private_failure;
BUG_ON(!failrec->this_mirror);
if (failrec->in_validation) {
@@ -2167,7 +2165,7 @@ void btrfs_free_io_failure_record(struct inode *inode, u64 start, u64 end)
next = next_state(state);
- failrec = (struct io_failure_record *)(unsigned long)state->private;
+ failrec = state->failrec;
free_extent_state(state);
kfree(failrec);
@@ -2177,10 +2175,9 @@ void btrfs_free_io_failure_record(struct inode *inode, u64 start, u64 end)
}
int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
- struct io_failure_record **failrec_ret)
+ struct io_failure_record **failrec_ret)
{
struct io_failure_record *failrec;
- u64 private;
struct extent_map *em;
struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
@@ -2188,7 +2185,7 @@ int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
int ret;
u64 logical;
- ret = get_state_private(failure_tree, start, &private);
+ ret = get_state_failrec(failure_tree, start, &failrec);
if (ret) {
failrec = kzalloc(sizeof(*failrec), GFP_NOFS);
if (!failrec)
@@ -2237,8 +2234,7 @@ int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
ret = set_extent_bits(failure_tree, start, end,
EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS);
if (ret >= 0)
- ret = set_state_private(failure_tree, start,
- (u64)(unsigned long)failrec);
+ ret = set_state_failrec(failure_tree, start, failrec);
/* set the bits in the inode's tree */
if (ret >= 0)
ret = set_extent_bits(tree, start, end, EXTENT_DAMAGED,
@@ -2248,7 +2244,6 @@ int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
return ret;
}
} else {
- failrec = (struct io_failure_record *)(unsigned long)private;
pr_debug("Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu, validation=%d\n",
failrec->logical, failrec->start, failrec->len,
failrec->in_validation);
@@ -2897,12 +2892,11 @@ static int __do_readpage(struct extent_io_tree *tree,
struct block_device *bdev;
int ret;
int nr = 0;
- int parent_locked = *bio_flags & EXTENT_BIO_PARENT_LOCKED;
size_t pg_offset = 0;
size_t iosize;
size_t disk_io_size;
size_t blocksize = inode->i_sb->s_blocksize;
- unsigned long this_bio_flag = *bio_flags & EXTENT_BIO_PARENT_LOCKED;
+ unsigned long this_bio_flag = 0;
set_page_extent_mapped(page);
@@ -2942,18 +2936,16 @@ static int __do_readpage(struct extent_io_tree *tree,
kunmap_atomic(userpage);
set_extent_uptodate(tree, cur, cur + iosize - 1,
&cached, GFP_NOFS);
- if (!parent_locked)
- unlock_extent_cached(tree, cur,
- cur + iosize - 1,
- &cached, GFP_NOFS);
+ unlock_extent_cached(tree, cur,
+ cur + iosize - 1,
+ &cached, GFP_NOFS);
break;
}
em = __get_extent_map(inode, page, pg_offset, cur,
end - cur + 1, get_extent, em_cached);
if (IS_ERR_OR_NULL(em)) {
SetPageError(page);
- if (!parent_locked)
- unlock_extent(tree, cur, end);
+ unlock_extent(tree, cur, end);
break;
}
extent_offset = cur - em->start;
@@ -3038,12 +3030,9 @@ static int __do_readpage(struct extent_io_tree *tree,
set_extent_uptodate(tree, cur, cur + iosize - 1,
&cached, GFP_NOFS);
- if (parent_locked)
- free_extent_state(cached);
- else
- unlock_extent_cached(tree, cur,
- cur + iosize - 1,
- &cached, GFP_NOFS);
+ unlock_extent_cached(tree, cur,
+ cur + iosize - 1,
+ &cached, GFP_NOFS);
cur = cur + iosize;
pg_offset += iosize;
continue;
@@ -3052,8 +3041,7 @@ static int __do_readpage(struct extent_io_tree *tree,
if (test_range_bit(tree, cur, cur_end,
EXTENT_UPTODATE, 1, NULL)) {
check_page_uptodate(tree, page);
- if (!parent_locked)
- unlock_extent(tree, cur, cur + iosize - 1);
+ unlock_extent(tree, cur, cur + iosize - 1);
cur = cur + iosize;
pg_offset += iosize;
continue;
@@ -3063,8 +3051,7 @@ static int __do_readpage(struct extent_io_tree *tree,
*/
if (block_start == EXTENT_MAP_INLINE) {
SetPageError(page);
- if (!parent_locked)
- unlock_extent(tree, cur, cur + iosize - 1);
+ unlock_extent(tree, cur, cur + iosize - 1);
cur = cur + iosize;
pg_offset += iosize;
continue;
@@ -3083,8 +3070,7 @@ static int __do_readpage(struct extent_io_tree *tree,
*bio_flags = this_bio_flag;
} else {
SetPageError(page);
- if (!parent_locked)
- unlock_extent(tree, cur, cur + iosize - 1);
+ unlock_extent(tree, cur, cur + iosize - 1);
}
cur = cur + iosize;
pg_offset += iosize;
@@ -3186,7 +3172,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
while (1) {
lock_extent(tree, start, end);
- ordered = btrfs_lookup_ordered_extent(inode, start);
+ ordered = btrfs_lookup_ordered_range(inode, start,
+ PAGE_CACHE_SIZE);
if (!ordered)
break;
unlock_extent(tree, start, end);
@@ -3213,20 +3200,6 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
return ret;
}
-int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page,
- get_extent_t *get_extent, int mirror_num)
-{
- struct bio *bio = NULL;
- unsigned long bio_flags = EXTENT_BIO_PARENT_LOCKED;
- int ret;
-
- ret = __do_readpage(tree, page, get_extent, NULL, &bio, mirror_num,
- &bio_flags, READ, NULL);
- if (bio)
- ret = submit_one_bio(READ, bio, mirror_num, bio_flags);
- return ret;
-}
-
static noinline void update_nr_written(struct page *page,
struct writeback_control *wbc,
unsigned long nr_written)
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 0377413bd4b9..5dbf92e68fbd 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -29,7 +29,6 @@
*/
#define EXTENT_BIO_COMPRESSED 1
#define EXTENT_BIO_TREE_LOG 2
-#define EXTENT_BIO_PARENT_LOCKED 4
#define EXTENT_BIO_FLAG_SHIFT 16
/* these are bit numbers for test/set bit */
@@ -62,6 +61,7 @@
struct extent_state;
struct btrfs_root;
struct btrfs_io_bio;
+struct io_failure_record;
typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw,
struct bio *bio, int mirror_num,
@@ -112,8 +112,7 @@ struct extent_state {
atomic_t refs;
unsigned state;
- /* for use by the FS */
- u64 private;
+ struct io_failure_record *failrec;
#ifdef CONFIG_BTRFS_DEBUG
struct list_head leak_list;
@@ -210,8 +209,6 @@ static inline int lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end);
int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
get_extent_t *get_extent, int mirror_num);
-int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page,
- get_extent_t *get_extent, int mirror_num);
int __init extent_io_init(void);
void extent_io_exit(void);
@@ -345,7 +342,6 @@ int extent_readpages(struct extent_io_tree *tree,
get_extent_t get_extent);
int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len, get_extent_t *get_extent);
-int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private);
void set_page_extent_mapped(struct page *page);
struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 84fb56d5c018..cdbadeaef202 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -20,8 +20,7 @@ int __init extent_map_init(void)
void extent_map_exit(void)
{
- if (extent_map_cache)
- kmem_cache_destroy(extent_map_cache);
+ kmem_cache_destroy(extent_map_cache);
}
/**
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index a67e1c828d0f..1c50a7b09b4e 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -172,6 +172,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
u64 item_start_offset = 0;
u64 item_last_offset = 0;
u64 disk_bytenr;
+ u64 page_bytes_left;
u32 diff;
int nblocks;
int bio_index = 0;
@@ -220,6 +221,8 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
disk_bytenr = (u64)bio->bi_iter.bi_sector << 9;
if (dio)
offset = logical_offset;
+
+ page_bytes_left = bvec->bv_len;
while (bio_index < bio->bi_vcnt) {
if (!dio)
offset = page_offset(bvec->bv_page) + bvec->bv_offset;
@@ -243,7 +246,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
if (BTRFS_I(inode)->root->root_key.objectid ==
BTRFS_DATA_RELOC_TREE_OBJECTID) {
set_extent_bits(io_tree, offset,
- offset + bvec->bv_len - 1,
+ offset + root->sectorsize - 1,
EXTENT_NODATASUM, GFP_NOFS);
} else {
btrfs_info(BTRFS_I(inode)->root->fs_info,
@@ -281,11 +284,17 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
found:
csum += count * csum_size;
nblocks -= count;
- bio_index += count;
+
while (count--) {
- disk_bytenr += bvec->bv_len;
- offset += bvec->bv_len;
- bvec++;
+ disk_bytenr += root->sectorsize;
+ offset += root->sectorsize;
+ page_bytes_left -= root->sectorsize;
+ if (!page_bytes_left) {
+ bio_index++;
+ bvec++;
+ page_bytes_left = bvec->bv_len;
+ }
+
}
}
btrfs_free_path(path);
@@ -432,6 +441,8 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
struct bio_vec *bvec = bio->bi_io_vec;
int bio_index = 0;
int index;
+ int nr_sectors;
+ int i;
unsigned long total_bytes = 0;
unsigned long this_sum_bytes = 0;
u64 offset;
@@ -459,41 +470,56 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
if (!contig)
offset = page_offset(bvec->bv_page) + bvec->bv_offset;
- if (offset >= ordered->file_offset + ordered->len ||
- offset < ordered->file_offset) {
- unsigned long bytes_left;
- sums->len = this_sum_bytes;
- this_sum_bytes = 0;
- btrfs_add_ordered_sum(inode, ordered, sums);
- btrfs_put_ordered_extent(ordered);
+ data = kmap_atomic(bvec->bv_page);
- bytes_left = bio->bi_iter.bi_size - total_bytes;
+ nr_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info,
+ bvec->bv_len + root->sectorsize
+ - 1);
+
+ for (i = 0; i < nr_sectors; i++) {
+ if (offset >= ordered->file_offset + ordered->len ||
+ offset < ordered->file_offset) {
+ unsigned long bytes_left;
+
+ kunmap_atomic(data);
+ sums->len = this_sum_bytes;
+ this_sum_bytes = 0;
+ btrfs_add_ordered_sum(inode, ordered, sums);
+ btrfs_put_ordered_extent(ordered);
+
+ bytes_left = bio->bi_iter.bi_size - total_bytes;
+
+ sums = kzalloc(btrfs_ordered_sum_size(root, bytes_left),
+ GFP_NOFS);
+ BUG_ON(!sums); /* -ENOMEM */
+ sums->len = bytes_left;
+ ordered = btrfs_lookup_ordered_extent(inode,
+ offset);
+ ASSERT(ordered); /* Logic error */
+ sums->bytenr = ((u64)bio->bi_iter.bi_sector << 9)
+ + total_bytes;
+ index = 0;
+
+ data = kmap_atomic(bvec->bv_page);
+ }
- sums = kzalloc(btrfs_ordered_sum_size(root, bytes_left),
- GFP_NOFS);
- BUG_ON(!sums); /* -ENOMEM */
- sums->len = bytes_left;
- ordered = btrfs_lookup_ordered_extent(inode, offset);
- BUG_ON(!ordered); /* Logic error */
- sums->bytenr = ((u64)bio->bi_iter.bi_sector << 9) +
- total_bytes;
- index = 0;
+ sums->sums[index] = ~(u32)0;
+ sums->sums[index]
+ = btrfs_csum_data(data + bvec->bv_offset
+ + (i * root->sectorsize),
+ sums->sums[index],
+ root->sectorsize);
+ btrfs_csum_final(sums->sums[index],
+ (char *)(sums->sums + index));
+ index++;
+ offset += root->sectorsize;
+ this_sum_bytes += root->sectorsize;
+ total_bytes += root->sectorsize;
}
- data = kmap_atomic(bvec->bv_page);
- sums->sums[index] = ~(u32)0;
- sums->sums[index] = btrfs_csum_data(data + bvec->bv_offset,
- sums->sums[index],
- bvec->bv_len);
kunmap_atomic(data);
- btrfs_csum_final(sums->sums[index],
- (char *)(sums->sums + index));
bio_index++;
- index++;
- total_bytes += bvec->bv_len;
- this_sum_bytes += bvec->bv_len;
- offset += bvec->bv_len;
bvec++;
}
this_sum_bytes = 0;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index cadfebaaf8c4..03de2466db23 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -498,7 +498,7 @@ int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
loff_t isize = i_size_read(inode);
start_pos = pos & ~((u64)root->sectorsize - 1);
- num_bytes = ALIGN(write_bytes + pos - start_pos, root->sectorsize);
+ num_bytes = round_up(write_bytes + pos - start_pos, root->sectorsize);
end_of_last_block = start_pos + num_bytes - 1;
err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
@@ -1379,16 +1379,19 @@ fail:
static noinline int
lock_and_cleanup_extent_if_need(struct inode *inode, struct page **pages,
size_t num_pages, loff_t pos,
+ size_t write_bytes,
u64 *lockstart, u64 *lockend,
struct extent_state **cached_state)
{
+ struct btrfs_root *root = BTRFS_I(inode)->root;
u64 start_pos;
u64 last_pos;
int i;
int ret = 0;
- start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1);
- last_pos = start_pos + ((u64)num_pages << PAGE_CACHE_SHIFT) - 1;
+ start_pos = round_down(pos, root->sectorsize);
+ last_pos = start_pos
+ + round_up(pos + write_bytes - start_pos, root->sectorsize) - 1;
if (start_pos < inode->i_size) {
struct btrfs_ordered_extent *ordered;
@@ -1503,6 +1506,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
while (iov_iter_count(i) > 0) {
size_t offset = pos & (PAGE_CACHE_SIZE - 1);
+ size_t sector_offset;
size_t write_bytes = min(iov_iter_count(i),
nrptrs * (size_t)PAGE_CACHE_SIZE -
offset);
@@ -1511,6 +1515,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
size_t reserve_bytes;
size_t dirty_pages;
size_t copied;
+ size_t dirty_sectors;
+ size_t num_sectors;
WARN_ON(num_pages > nrptrs);
@@ -1523,7 +1529,9 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
break;
}
- reserve_bytes = num_pages << PAGE_CACHE_SHIFT;
+ sector_offset = pos & (root->sectorsize - 1);
+ reserve_bytes = round_up(write_bytes + sector_offset,
+ root->sectorsize);
if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
BTRFS_INODE_PREALLOC)) &&
@@ -1539,7 +1547,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
*/
num_pages = DIV_ROUND_UP(write_bytes + offset,
PAGE_CACHE_SIZE);
- reserve_bytes = num_pages << PAGE_CACHE_SHIFT;
+ reserve_bytes = round_up(write_bytes + sector_offset,
+ root->sectorsize);
goto reserve_metadata;
}
@@ -1573,8 +1582,8 @@ again:
break;
ret = lock_and_cleanup_extent_if_need(inode, pages, num_pages,
- pos, &lockstart, &lockend,
- &cached_state);
+ pos, write_bytes, &lockstart,
+ &lockend, &cached_state);
if (ret < 0) {
if (ret == -EAGAIN)
goto again;
@@ -1609,9 +1618,16 @@ again:
* we still have an outstanding extent for the chunk we actually
* managed to copy.
*/
- if (num_pages > dirty_pages) {
- release_bytes = (num_pages - dirty_pages) <<
- PAGE_CACHE_SHIFT;
+ num_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info,
+ reserve_bytes);
+ dirty_sectors = round_up(copied + sector_offset,
+ root->sectorsize);
+ dirty_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info,
+ dirty_sectors);
+
+ if (num_sectors > dirty_sectors) {
+ release_bytes = (write_bytes - copied)
+ & ~((u64)root->sectorsize - 1);
if (copied > 0) {
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents++;
@@ -1630,7 +1646,8 @@ again:
}
}
- release_bytes = dirty_pages << PAGE_CACHE_SHIFT;
+ release_bytes = round_up(copied + sector_offset,
+ root->sectorsize);
if (copied > 0)
ret = btrfs_dirty_pages(root, inode, pages,
@@ -1651,8 +1668,7 @@ again:
if (only_release_metadata && copied > 0) {
lockstart = round_down(pos, root->sectorsize);
- lockend = lockstart +
- (dirty_pages << PAGE_CACHE_SHIFT) - 1;
+ lockend = round_up(pos + copied, root->sectorsize) - 1;
set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
lockend, EXTENT_NORESERVE, NULL,
@@ -1758,6 +1774,8 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
ssize_t err;
loff_t pos;
size_t count;
+ loff_t oldsize;
+ int clean_page = 0;
inode_lock(inode);
err = generic_write_checks(iocb, from);
@@ -1796,14 +1814,17 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
pos = iocb->ki_pos;
count = iov_iter_count(from);
start_pos = round_down(pos, root->sectorsize);
- if (start_pos > i_size_read(inode)) {
+ oldsize = i_size_read(inode);
+ if (start_pos > oldsize) {
/* Expand hole size to cover write data, preventing empty gap */
end_pos = round_up(pos + count, root->sectorsize);
- err = btrfs_cont_expand(inode, i_size_read(inode), end_pos);
+ err = btrfs_cont_expand(inode, oldsize, end_pos);
if (err) {
inode_unlock(inode);
goto out;
}
+ if (start_pos > round_up(oldsize, root->sectorsize))
+ clean_page = 1;
}
if (sync)
@@ -1815,6 +1836,9 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
num_written = __btrfs_buffered_write(file, from, pos);
if (num_written > 0)
iocb->ki_pos = pos + num_written;
+ if (clean_page)
+ pagecache_isize_extended(inode, oldsize,
+ i_size_read(inode));
}
inode_unlock(inode);
@@ -2290,10 +2314,10 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
int ret = 0;
int err = 0;
unsigned int rsv_count;
- bool same_page;
+ bool same_block;
bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
u64 ino_size;
- bool truncated_page = false;
+ bool truncated_block = false;
bool updated_inode = false;
ret = btrfs_wait_ordered_range(inode, offset, len);
@@ -2301,7 +2325,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
return ret;
inode_lock(inode);
- ino_size = round_up(inode->i_size, PAGE_CACHE_SIZE);
+ ino_size = round_up(inode->i_size, root->sectorsize);
ret = find_first_non_hole(inode, &offset, &len);
if (ret < 0)
goto out_only_mutex;
@@ -2314,31 +2338,30 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
lockstart = round_up(offset, BTRFS_I(inode)->root->sectorsize);
lockend = round_down(offset + len,
BTRFS_I(inode)->root->sectorsize) - 1;
- same_page = ((offset >> PAGE_CACHE_SHIFT) ==
- ((offset + len - 1) >> PAGE_CACHE_SHIFT));
-
+ same_block = (BTRFS_BYTES_TO_BLKS(root->fs_info, offset))
+ == (BTRFS_BYTES_TO_BLKS(root->fs_info, offset + len - 1));
/*
- * We needn't truncate any page which is beyond the end of the file
+ * We needn't truncate any block which is beyond the end of the file
* because we are sure there is no data there.
*/
/*
- * Only do this if we are in the same page and we aren't doing the
- * entire page.
+ * Only do this if we are in the same block and we aren't doing the
+ * entire block.
*/
- if (same_page && len < PAGE_CACHE_SIZE) {
+ if (same_block && len < root->sectorsize) {
if (offset < ino_size) {
- truncated_page = true;
- ret = btrfs_truncate_page(inode, offset, len, 0);
+ truncated_block = true;
+ ret = btrfs_truncate_block(inode, offset, len, 0);
} else {
ret = 0;
}
goto out_only_mutex;
}
- /* zero back part of the first page */
+ /* zero back part of the first block */
if (offset < ino_size) {
- truncated_page = true;
- ret = btrfs_truncate_page(inode, offset, 0, 0);
+ truncated_block = true;
+ ret = btrfs_truncate_block(inode, offset, 0, 0);
if (ret) {
inode_unlock(inode);
return ret;
@@ -2373,9 +2396,10 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
if (!ret) {
/* zero the front end of the last page */
if (tail_start + tail_len < ino_size) {
- truncated_page = true;
- ret = btrfs_truncate_page(inode,
- tail_start + tail_len, 0, 1);
+ truncated_block = true;
+ ret = btrfs_truncate_block(inode,
+ tail_start + tail_len,
+ 0, 1);
if (ret)
goto out_only_mutex;
}
@@ -2541,7 +2565,7 @@ out_trans:
goto out_free;
inode_inc_iversion(inode);
- inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ inode->i_mtime = inode->i_ctime = current_fs_time(inode->i_sb);
trans->block_rsv = &root->fs_info->trans_block_rsv;
ret = btrfs_update_inode(trans, root, inode);
@@ -2555,7 +2579,7 @@ out:
unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
&cached_state, GFP_NOFS);
out_only_mutex:
- if (!updated_inode && truncated_page && !ret && !err) {
+ if (!updated_inode && truncated_block && !ret && !err) {
/*
* If we only end up zeroing part of a page, we still need to
* update the inode item, so that all the time fields are
@@ -2608,7 +2632,7 @@ static int add_falloc_range(struct list_head *head, u64 start, u64 len)
return 0;
}
insert:
- range = kmalloc(sizeof(*range), GFP_NOFS);
+ range = kmalloc(sizeof(*range), GFP_KERNEL);
if (!range)
return -ENOMEM;
range->start = start;
@@ -2675,10 +2699,10 @@ static long btrfs_fallocate(struct file *file, int mode,
} else if (offset + len > inode->i_size) {
/*
* If we are fallocating from the end of the file onward we
- * need to zero out the end of the page if i_size lands in the
- * middle of a page.
+ * need to zero out the end of the block if i_size lands in the
+ * middle of a block.
*/
- ret = btrfs_truncate_page(inode, inode->i_size, 0, 0);
+ ret = btrfs_truncate_block(inode, inode->i_size, 0, 0);
if (ret)
goto out;
}
@@ -2709,7 +2733,7 @@ static long btrfs_fallocate(struct file *file, int mode,
btrfs_put_ordered_extent(ordered);
unlock_extent_cached(&BTRFS_I(inode)->io_tree,
alloc_start, locked_end,
- &cached_state, GFP_NOFS);
+ &cached_state, GFP_KERNEL);
/*
* we can't wait on the range with the transaction
* running or with the extent lock held
@@ -2791,7 +2815,7 @@ static long btrfs_fallocate(struct file *file, int mode,
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
} else {
- inode->i_ctime = CURRENT_TIME;
+ inode->i_ctime = current_fs_time(inode->i_sb);
i_size_write(inode, actual_end);
btrfs_ordered_update_i_size(inode, actual_end, NULL);
ret = btrfs_update_inode(trans, root, inode);
@@ -2803,7 +2827,7 @@ static long btrfs_fallocate(struct file *file, int mode,
}
out_unlock:
unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
- &cached_state, GFP_NOFS);
+ &cached_state, GFP_KERNEL);
out:
/*
* As we waited the extent range, the data_rsv_map must be empty
@@ -2936,8 +2960,7 @@ const struct file_operations btrfs_file_operations = {
void btrfs_auto_defrag_exit(void)
{
- if (btrfs_inode_defrag_cachep)
- kmem_cache_destroy(btrfs_inode_defrag_cachep);
+ kmem_cache_destroy(btrfs_inode_defrag_cachep);
}
int btrfs_auto_defrag_init(void)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 5f06eb1f4384..d0d152f39771 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -263,7 +263,7 @@ static noinline int cow_file_range_inline(struct btrfs_root *root,
data_len = compressed_size;
if (start > 0 ||
- actual_end > PAGE_CACHE_SIZE ||
+ actual_end > root->sectorsize ||
data_len > BTRFS_MAX_INLINE_DATA_SIZE(root) ||
(!compressed_size &&
(actual_end & (root->sectorsize - 1)) == 0) ||
@@ -2002,7 +2002,8 @@ again:
if (PagePrivate2(page))
goto out;
- ordered = btrfs_lookup_ordered_extent(inode, page_start);
+ ordered = btrfs_lookup_ordered_range(inode, page_start,
+ PAGE_CACHE_SIZE);
if (ordered) {
unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start,
page_end, &cached_state, GFP_NOFS);
@@ -4013,7 +4014,8 @@ err:
btrfs_i_size_write(dir, dir->i_size - name_len * 2);
inode_inc_iversion(inode);
inode_inc_iversion(dir);
- inode->i_ctime = dir->i_mtime = dir->i_ctime = CURRENT_TIME;
+ inode->i_ctime = dir->i_mtime =
+ dir->i_ctime = current_fs_time(inode->i_sb);
ret = btrfs_update_inode(trans, root, dir);
out:
return ret;
@@ -4156,7 +4158,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
btrfs_i_size_write(dir, dir->i_size - name_len * 2);
inode_inc_iversion(dir);
- dir->i_mtime = dir->i_ctime = CURRENT_TIME;
+ dir->i_mtime = dir->i_ctime = current_fs_time(dir->i_sb);
ret = btrfs_update_inode_fallback(trans, root, dir);
if (ret)
btrfs_abort_transaction(trans, root, ret);
@@ -4211,11 +4213,20 @@ static int truncate_space_check(struct btrfs_trans_handle *trans,
{
int ret;
+ /*
+ * This is only used to apply pressure to the enospc system, we don't
+ * intend to use this reservation at all.
+ */
bytes_deleted = btrfs_csum_bytes_to_leaves(root, bytes_deleted);
+ bytes_deleted *= root->nodesize;
ret = btrfs_block_rsv_add(root, &root->fs_info->trans_block_rsv,
bytes_deleted, BTRFS_RESERVE_NO_FLUSH);
- if (!ret)
+ if (!ret) {
+ trace_btrfs_space_reservation(root->fs_info, "transaction",
+ trans->transid,
+ bytes_deleted, 1);
trans->bytes_reserved += bytes_deleted;
+ }
return ret;
}
@@ -4248,7 +4259,8 @@ static int truncate_inline_extent(struct inode *inode,
* read the extent item from disk (data not in the page cache).
*/
btrfs_release_path(path);
- return btrfs_truncate_page(inode, offset, page_end - offset, 0);
+ return btrfs_truncate_block(inode, offset, page_end - offset,
+ 0);
}
btrfs_set_file_extent_ram_bytes(leaf, fi, size);
@@ -4601,17 +4613,17 @@ error:
}
/*
- * btrfs_truncate_page - read, zero a chunk and write a page
+ * btrfs_truncate_block - read, zero a chunk and write a block
* @inode - inode that we're zeroing
* @from - the offset to start zeroing
* @len - the length to zero, 0 to zero the entire range respective to the
* offset
* @front - zero up to the offset instead of from the offset on
*
- * This will find the page for the "from" offset and cow the page and zero the
+ * This will find the block for the "from" offset and cow the block and zero the
* part we want to zero. This is used with truncate and hole punching.
*/
-int btrfs_truncate_page(struct inode *inode, loff_t from, loff_t len,
+int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
int front)
{
struct address_space *mapping = inode->i_mapping;
@@ -4622,18 +4634,19 @@ int btrfs_truncate_page(struct inode *inode, loff_t from, loff_t len,
char *kaddr;
u32 blocksize = root->sectorsize;
pgoff_t index = from >> PAGE_CACHE_SHIFT;
- unsigned offset = from & (PAGE_CACHE_SIZE-1);
+ unsigned offset = from & (blocksize - 1);
struct page *page;
gfp_t mask = btrfs_alloc_write_mask(mapping);
int ret = 0;
- u64 page_start;
- u64 page_end;
+ u64 block_start;
+ u64 block_end;
if ((offset & (blocksize - 1)) == 0 &&
(!len || ((len & (blocksize - 1)) == 0)))
goto out;
+
ret = btrfs_delalloc_reserve_space(inode,
- round_down(from, PAGE_CACHE_SIZE), PAGE_CACHE_SIZE);
+ round_down(from, blocksize), blocksize);
if (ret)
goto out;
@@ -4641,14 +4654,14 @@ again:
page = find_or_create_page(mapping, index, mask);
if (!page) {
btrfs_delalloc_release_space(inode,
- round_down(from, PAGE_CACHE_SIZE),
- PAGE_CACHE_SIZE);
+ round_down(from, blocksize),
+ blocksize);
ret = -ENOMEM;
goto out;
}
- page_start = page_offset(page);
- page_end = page_start + PAGE_CACHE_SIZE - 1;
+ block_start = round_down(from, blocksize);
+ block_end = block_start + blocksize - 1;
if (!PageUptodate(page)) {
ret = btrfs_readpage(NULL, page);
@@ -4665,12 +4678,12 @@ again:
}
wait_on_page_writeback(page);
- lock_extent_bits(io_tree, page_start, page_end, &cached_state);
+ lock_extent_bits(io_tree, block_start, block_end, &cached_state);
set_page_extent_mapped(page);
- ordered = btrfs_lookup_ordered_extent(inode, page_start);
+ ordered = btrfs_lookup_ordered_extent(inode, block_start);
if (ordered) {
- unlock_extent_cached(io_tree, page_start, page_end,
+ unlock_extent_cached(io_tree, block_start, block_end,
&cached_state, GFP_NOFS);
unlock_page(page);
page_cache_release(page);
@@ -4679,39 +4692,41 @@ again:
goto again;
}
- clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end,
+ clear_extent_bit(&BTRFS_I(inode)->io_tree, block_start, block_end,
EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
0, 0, &cached_state, GFP_NOFS);
- ret = btrfs_set_extent_delalloc(inode, page_start, page_end,
+ ret = btrfs_set_extent_delalloc(inode, block_start, block_end,
&cached_state);
if (ret) {
- unlock_extent_cached(io_tree, page_start, page_end,
+ unlock_extent_cached(io_tree, block_start, block_end,
&cached_state, GFP_NOFS);
goto out_unlock;
}
- if (offset != PAGE_CACHE_SIZE) {
+ if (offset != blocksize) {
if (!len)
- len = PAGE_CACHE_SIZE - offset;
+ len = blocksize - offset;
kaddr = kmap(page);
if (front)
- memset(kaddr, 0, offset);
+ memset(kaddr + (block_start - page_offset(page)),
+ 0, offset);
else
- memset(kaddr + offset, 0, len);
+ memset(kaddr + (block_start - page_offset(page)) + offset,
+ 0, len);
flush_dcache_page(page);
kunmap(page);
}
ClearPageChecked(page);
set_page_dirty(page);
- unlock_extent_cached(io_tree, page_start, page_end, &cached_state,
+ unlock_extent_cached(io_tree, block_start, block_end, &cached_state,
GFP_NOFS);
out_unlock:
if (ret)
- btrfs_delalloc_release_space(inode, page_start,
- PAGE_CACHE_SIZE);
+ btrfs_delalloc_release_space(inode, block_start,
+ blocksize);
unlock_page(page);
page_cache_release(page);
out:
@@ -4782,11 +4797,11 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
int err = 0;
/*
- * If our size started in the middle of a page we need to zero out the
- * rest of the page before we expand the i_size, otherwise we could
+ * If our size started in the middle of a block we need to zero out the
+ * rest of the block before we expand the i_size, otherwise we could
* expose stale data.
*/
- err = btrfs_truncate_page(inode, oldsize, 0, 0);
+ err = btrfs_truncate_block(inode, oldsize, 0, 0);
if (err)
return err;
@@ -4895,7 +4910,6 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
}
if (newsize > oldsize) {
- truncate_pagecache(inode, newsize);
/*
* Don't do an expanding truncate while snapshoting is ongoing.
* This is to ensure the snapshot captures a fully consistent
@@ -4918,6 +4932,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
i_size_write(inode, newsize);
btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL);
+ pagecache_isize_extended(inode, oldsize, newsize);
ret = btrfs_update_inode(trans, root, inode);
btrfs_end_write_no_snapshoting(root);
btrfs_end_transaction(trans, root);
@@ -5588,7 +5603,7 @@ static struct inode *new_simple_dir(struct super_block *s,
inode->i_op = &btrfs_dir_ro_inode_operations;
inode->i_fop = &simple_dir_operations;
inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;
- inode->i_mtime = CURRENT_TIME;
+ inode->i_mtime = current_fs_time(inode->i_sb);
inode->i_atime = inode->i_mtime;
inode->i_ctime = inode->i_mtime;
BTRFS_I(inode)->i_otime = inode->i_mtime;
@@ -5717,6 +5732,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
char *name_ptr;
int name_len;
int is_curr = 0; /* ctx->pos points to the current index? */
+ bool emitted;
/* FIXME, use a real flag for deciding about the key type */
if (root->fs_info->tree_root == root)
@@ -5745,6 +5761,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
if (ret < 0)
goto err;
+ emitted = false;
while (1) {
leaf = path->nodes[0];
slot = path->slots[0];
@@ -5788,7 +5805,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
if (name_len <= sizeof(tmp_name)) {
name_ptr = tmp_name;
} else {
- name_ptr = kmalloc(name_len, GFP_NOFS);
+ name_ptr = kmalloc(name_len, GFP_KERNEL);
if (!name_ptr) {
ret = -ENOMEM;
goto err;
@@ -5824,6 +5841,7 @@ skip:
if (over)
goto nopos;
+ emitted = true;
di_len = btrfs_dir_name_len(leaf, di) +
btrfs_dir_data_len(leaf, di) + sizeof(*di);
di_cur += di_len;
@@ -5836,11 +5854,20 @@ next:
if (key_type == BTRFS_DIR_INDEX_KEY) {
if (is_curr)
ctx->pos++;
- ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list);
+ ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list, &emitted);
if (ret)
goto nopos;
}
+ /*
+ * If we haven't emitted any dir entry, we must not touch ctx->pos as
+ * it was was set to the termination value in previous call. We assume
+ * that "." and ".." were emitted if we reach this point and set the
+ * termination value as well for an empty directory.
+ */
+ if (ctx->pos > 2 && !emitted)
+ goto nopos;
+
/* Reached end of directory/root. Bump pos past the last item. */
ctx->pos++;
@@ -6160,7 +6187,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
inode_init_owner(inode, dir, mode);
inode_set_bytes(inode, 0);
- inode->i_mtime = CURRENT_TIME;
+ inode->i_mtime = current_fs_time(inode->i_sb);
inode->i_atime = inode->i_mtime;
inode->i_ctime = inode->i_mtime;
BTRFS_I(inode)->i_otime = inode->i_mtime;
@@ -6273,7 +6300,8 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
btrfs_i_size_write(parent_inode, parent_inode->i_size +
name_len * 2);
inode_inc_iversion(parent_inode);
- parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
+ parent_inode->i_mtime = parent_inode->i_ctime =
+ current_fs_time(parent_inode->i_sb);
ret = btrfs_update_inode(trans, root, parent_inode);
if (ret)
btrfs_abort_transaction(trans, root, ret);
@@ -6491,7 +6519,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
BTRFS_I(inode)->dir_index = 0ULL;
inc_nlink(inode);
inode_inc_iversion(inode);
- inode->i_ctime = CURRENT_TIME;
+ inode->i_ctime = current_fs_time(inode->i_sb);
ihold(inode);
set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags);
@@ -7752,9 +7780,9 @@ static int btrfs_check_dio_repairable(struct inode *inode,
}
static int dio_read_error(struct inode *inode, struct bio *failed_bio,
- struct page *page, u64 start, u64 end,
- int failed_mirror, bio_end_io_t *repair_endio,
- void *repair_arg)
+ struct page *page, unsigned int pgoff,
+ u64 start, u64 end, int failed_mirror,
+ bio_end_io_t *repair_endio, void *repair_arg)
{
struct io_failure_record *failrec;
struct bio *bio;
@@ -7775,7 +7803,9 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio,
return -EIO;
}
- if (failed_bio->bi_vcnt > 1)
+ if ((failed_bio->bi_vcnt > 1)
+ || (failed_bio->bi_io_vec->bv_len
+ > BTRFS_I(inode)->root->sectorsize))
read_mode = READ_SYNC | REQ_FAILFAST_DEV;
else
read_mode = READ_SYNC;
@@ -7783,7 +7813,7 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio,
isector = start - btrfs_io_bio(failed_bio)->logical;
isector >>= inode->i_sb->s_blocksize_bits;
bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page,
- 0, isector, repair_endio, repair_arg);
+ pgoff, isector, repair_endio, repair_arg);
if (!bio) {
free_io_failure(inode, failrec);
return -EIO;
@@ -7813,12 +7843,17 @@ struct btrfs_retry_complete {
static void btrfs_retry_endio_nocsum(struct bio *bio)
{
struct btrfs_retry_complete *done = bio->bi_private;
+ struct inode *inode;
struct bio_vec *bvec;
int i;
if (bio->bi_error)
goto end;
+ ASSERT(bio->bi_vcnt == 1);
+ inode = bio->bi_io_vec->bv_page->mapping->host;
+ ASSERT(bio->bi_io_vec->bv_len == BTRFS_I(inode)->root->sectorsize);
+
done->uptodate = 1;
bio_for_each_segment_all(bvec, bio, i)
clean_io_failure(done->inode, done->start, bvec->bv_page, 0);
@@ -7830,25 +7865,35 @@ end:
static int __btrfs_correct_data_nocsum(struct inode *inode,
struct btrfs_io_bio *io_bio)
{
+ struct btrfs_fs_info *fs_info;
struct bio_vec *bvec;
struct btrfs_retry_complete done;
u64 start;
+ unsigned int pgoff;
+ u32 sectorsize;
+ int nr_sectors;
int i;
int ret;
+ fs_info = BTRFS_I(inode)->root->fs_info;
+ sectorsize = BTRFS_I(inode)->root->sectorsize;
+
start = io_bio->logical;
done.inode = inode;
bio_for_each_segment_all(bvec, &io_bio->bio, i) {
-try_again:
+ nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec->bv_len);
+ pgoff = bvec->bv_offset;
+
+next_block_or_try_again:
done.uptodate = 0;
done.start = start;
init_completion(&done.done);
- ret = dio_read_error(inode, &io_bio->bio, bvec->bv_page, start,
- start + bvec->bv_len - 1,
- io_bio->mirror_num,
- btrfs_retry_endio_nocsum, &done);
+ ret = dio_read_error(inode, &io_bio->bio, bvec->bv_page,
+ pgoff, start, start + sectorsize - 1,
+ io_bio->mirror_num,
+ btrfs_retry_endio_nocsum, &done);
if (ret)
return ret;
@@ -7856,10 +7901,15 @@ try_again:
if (!done.uptodate) {
/* We might have another mirror, so try again */
- goto try_again;
+ goto next_block_or_try_again;
}
- start += bvec->bv_len;
+ start += sectorsize;
+
+ if (nr_sectors--) {
+ pgoff += sectorsize;
+ goto next_block_or_try_again;
+ }
}
return 0;
@@ -7869,7 +7919,9 @@ static void btrfs_retry_endio(struct bio *bio)
{
struct btrfs_retry_complete *done = bio->bi_private;
struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
+ struct inode *inode;
struct bio_vec *bvec;
+ u64 start;
int uptodate;
int ret;
int i;
@@ -7878,13 +7930,20 @@ static void btrfs_retry_endio(struct bio *bio)
goto end;
uptodate = 1;
+
+ start = done->start;
+
+ ASSERT(bio->bi_vcnt == 1);
+ inode = bio->bi_io_vec->bv_page->mapping->host;
+ ASSERT(bio->bi_io_vec->bv_len == BTRFS_I(inode)->root->sectorsize);
+
bio_for_each_segment_all(bvec, bio, i) {
ret = __readpage_endio_check(done->inode, io_bio, i,
- bvec->bv_page, 0,
- done->start, bvec->bv_len);
+ bvec->bv_page, bvec->bv_offset,
+ done->start, bvec->bv_len);
if (!ret)
clean_io_failure(done->inode, done->start,
- bvec->bv_page, 0);
+ bvec->bv_page, bvec->bv_offset);
else
uptodate = 0;
}
@@ -7898,20 +7957,34 @@ end:
static int __btrfs_subio_endio_read(struct inode *inode,
struct btrfs_io_bio *io_bio, int err)
{
+ struct btrfs_fs_info *fs_info;
struct bio_vec *bvec;
struct btrfs_retry_complete done;
u64 start;
u64 offset = 0;
+ u32 sectorsize;
+ int nr_sectors;
+ unsigned int pgoff;
+ int csum_pos;
int i;
int ret;
+ fs_info = BTRFS_I(inode)->root->fs_info;
+ sectorsize = BTRFS_I(inode)->root->sectorsize;
+
err = 0;
start = io_bio->logical;
done.inode = inode;
bio_for_each_segment_all(bvec, &io_bio->bio, i) {
- ret = __readpage_endio_check(inode, io_bio, i, bvec->bv_page,
- 0, start, bvec->bv_len);
+ nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec->bv_len);
+
+ pgoff = bvec->bv_offset;
+next_block:
+ csum_pos = BTRFS_BYTES_TO_BLKS(fs_info, offset);
+ ret = __readpage_endio_check(inode, io_bio, csum_pos,
+ bvec->bv_page, pgoff, start,
+ sectorsize);
if (likely(!ret))
goto next;
try_again:
@@ -7919,10 +7992,10 @@ try_again:
done.start = start;
init_completion(&done.done);
- ret = dio_read_error(inode, &io_bio->bio, bvec->bv_page, start,
- start + bvec->bv_len - 1,
- io_bio->mirror_num,
- btrfs_retry_endio, &done);
+ ret = dio_read_error(inode, &io_bio->bio, bvec->bv_page,
+ pgoff, start, start + sectorsize - 1,
+ io_bio->mirror_num,
+ btrfs_retry_endio, &done);
if (ret) {
err = ret;
goto next;
@@ -7935,8 +8008,15 @@ try_again:
goto try_again;
}
next:
- offset += bvec->bv_len;
- start += bvec->bv_len;
+ offset += sectorsize;
+ start += sectorsize;
+
+ ASSERT(nr_sectors);
+
+ if (--nr_sectors) {
+ pgoff += sectorsize;
+ goto next_block;
+ }
}
return err;
@@ -8188,9 +8268,11 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
u64 file_offset = dip->logical_offset;
u64 submit_len = 0;
u64 map_length;
- int nr_pages = 0;
- int ret;
+ u32 blocksize = root->sectorsize;
int async_submit = 0;
+ int nr_sectors;
+ int ret;
+ int i;
map_length = orig_bio->bi_iter.bi_size;
ret = btrfs_map_block(root->fs_info, rw, start_sector << 9,
@@ -8220,9 +8302,12 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
atomic_inc(&dip->pending_bios);
while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) {
- if (map_length < submit_len + bvec->bv_len ||
- bio_add_page(bio, bvec->bv_page, bvec->bv_len,
- bvec->bv_offset) < bvec->bv_len) {
+ nr_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info, bvec->bv_len);
+ i = 0;
+next_block:
+ if (unlikely(map_length < submit_len + blocksize ||
+ bio_add_page(bio, bvec->bv_page, blocksize,
+ bvec->bv_offset + (i * blocksize)) < blocksize)) {
/*
* inc the count before we submit the bio so
* we know the end IO handler won't happen before
@@ -8243,7 +8328,6 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
file_offset += submit_len;
submit_len = 0;
- nr_pages = 0;
bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev,
start_sector, GFP_NOFS);
@@ -8261,9 +8345,14 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
bio_put(bio);
goto out_err;
}
+
+ goto next_block;
} else {
- submit_len += bvec->bv_len;
- nr_pages++;
+ submit_len += blocksize;
+ if (--nr_sectors) {
+ i++;
+ goto next_block;
+ }
bvec++;
}
}
@@ -8628,6 +8717,8 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
struct extent_state *cached_state = NULL;
u64 page_start = page_offset(page);
u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
+ u64 start;
+ u64 end;
int inode_evicting = inode->i_state & I_FREEING;
/*
@@ -8647,14 +8738,18 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
if (!inode_evicting)
lock_extent_bits(tree, page_start, page_end, &cached_state);
- ordered = btrfs_lookup_ordered_extent(inode, page_start);
+again:
+ start = page_start;
+ ordered = btrfs_lookup_ordered_range(inode, start,
+ page_end - start + 1);
if (ordered) {
+ end = min(page_end, ordered->file_offset + ordered->len - 1);
/*
* IO on this page will never be started, so we need
* to account for any ordered extents now
*/
if (!inode_evicting)
- clear_extent_bit(tree, page_start, page_end,
+ clear_extent_bit(tree, start, end,
EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
EXTENT_DEFRAG, 1, 0, &cached_state,
@@ -8671,22 +8766,26 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
spin_lock_irq(&tree->lock);
set_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags);
- new_len = page_start - ordered->file_offset;
+ new_len = start - ordered->file_offset;
if (new_len < ordered->truncated_len)
ordered->truncated_len = new_len;
spin_unlock_irq(&tree->lock);
if (btrfs_dec_test_ordered_pending(inode, &ordered,
- page_start,
- PAGE_CACHE_SIZE, 1))
+ start,
+ end - start + 1, 1))
btrfs_finish_ordered_io(ordered);
}
btrfs_put_ordered_extent(ordered);
if (!inode_evicting) {
cached_state = NULL;
- lock_extent_bits(tree, page_start, page_end,
+ lock_extent_bits(tree, start, end,
&cached_state);
}
+
+ start = end + 1;
+ if (start < page_end)
+ goto again;
}
/*
@@ -8747,15 +8846,28 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
loff_t size;
int ret;
int reserved = 0;
+ u64 reserved_space;
u64 page_start;
u64 page_end;
+ u64 end;
+
+ reserved_space = PAGE_CACHE_SIZE;
sb_start_pagefault(inode->i_sb);
page_start = page_offset(page);
page_end = page_start + PAGE_CACHE_SIZE - 1;
+ end = page_end;
+ /*
+ * Reserving delalloc space after obtaining the page lock can lead to
+ * deadlock. For example, if a dirty page is locked by this function
+ * and the call to btrfs_delalloc_reserve_space() ends up triggering
+ * dirty page write out, then the btrfs_writepage() function could
+ * end up waiting indefinitely to get a lock on the page currently
+ * being processed by btrfs_page_mkwrite() function.
+ */
ret = btrfs_delalloc_reserve_space(inode, page_start,
- PAGE_CACHE_SIZE);
+ reserved_space);
if (!ret) {
ret = file_update_time(vma->vm_file);
reserved = 1;
@@ -8789,7 +8901,7 @@ again:
* we can't set the delalloc bits if there are pending ordered
* extents. Drop our locks and wait for them to finish
*/
- ordered = btrfs_lookup_ordered_extent(inode, page_start);
+ ordered = btrfs_lookup_ordered_range(inode, page_start, page_end);
if (ordered) {
unlock_extent_cached(io_tree, page_start, page_end,
&cached_state, GFP_NOFS);
@@ -8799,6 +8911,18 @@ again:
goto again;
}
+ if (page->index == ((size - 1) >> PAGE_CACHE_SHIFT)) {
+ reserved_space = round_up(size - page_start, root->sectorsize);
+ if (reserved_space < PAGE_CACHE_SIZE) {
+ end = page_start + reserved_space - 1;
+ spin_lock(&BTRFS_I(inode)->lock);
+ BTRFS_I(inode)->outstanding_extents++;
+ spin_unlock(&BTRFS_I(inode)->lock);
+ btrfs_delalloc_release_space(inode, page_start,
+ PAGE_CACHE_SIZE - reserved_space);
+ }
+ }
+
/*
* XXX - page_mkwrite gets called every time the page is dirtied, even
* if it was already dirty, so for space accounting reasons we need to
@@ -8806,12 +8930,12 @@ again:
* is probably a better way to do this, but for now keep consistent with
* prepare_pages in the normal write path.
*/
- clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end,
+ clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, end,
EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
0, 0, &cached_state, GFP_NOFS);
- ret = btrfs_set_extent_delalloc(inode, page_start, page_end,
+ ret = btrfs_set_extent_delalloc(inode, page_start, end,
&cached_state);
if (ret) {
unlock_extent_cached(io_tree, page_start, page_end,
@@ -8850,7 +8974,7 @@ out_unlock:
}
unlock_page(page);
out:
- btrfs_delalloc_release_space(inode, page_start, PAGE_CACHE_SIZE);
+ btrfs_delalloc_release_space(inode, page_start, reserved_space);
out_noreserve:
sb_end_pagefault(inode->i_sb);
return ret;
@@ -9176,16 +9300,11 @@ void btrfs_destroy_cachep(void)
* destroy cache.
*/
rcu_barrier();
- if (btrfs_inode_cachep)
- kmem_cache_destroy(btrfs_inode_cachep);
- if (btrfs_trans_handle_cachep)
- kmem_cache_destroy(btrfs_trans_handle_cachep);
- if (btrfs_transaction_cachep)
- kmem_cache_destroy(btrfs_transaction_cachep);
- if (btrfs_path_cachep)
- kmem_cache_destroy(btrfs_path_cachep);
- if (btrfs_free_space_cachep)
- kmem_cache_destroy(btrfs_free_space_cachep);
+ kmem_cache_destroy(btrfs_inode_cachep);
+ kmem_cache_destroy(btrfs_trans_handle_cachep);
+ kmem_cache_destroy(btrfs_transaction_cachep);
+ kmem_cache_destroy(btrfs_path_cachep);
+ kmem_cache_destroy(btrfs_free_space_cachep);
}
int btrfs_init_cachep(void)
@@ -9236,7 +9355,6 @@ static int btrfs_getattr(struct vfsmount *mnt,
generic_fillattr(inode, stat);
stat->dev = BTRFS_I(inode)->root->anon_dev;
- stat->blksize = PAGE_CACHE_SIZE;
spin_lock(&BTRFS_I(inode)->lock);
delalloc_bytes = BTRFS_I(inode)->delalloc_bytes;
@@ -9254,7 +9372,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct btrfs_root *dest = BTRFS_I(new_dir)->root;
struct inode *new_inode = d_inode(new_dentry);
struct inode *old_inode = d_inode(old_dentry);
- struct timespec ctime = CURRENT_TIME;
u64 index = 0;
u64 root_objectid;
int ret;
@@ -9351,9 +9468,9 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
inode_inc_iversion(old_dir);
inode_inc_iversion(new_dir);
inode_inc_iversion(old_inode);
- old_dir->i_ctime = old_dir->i_mtime = ctime;
- new_dir->i_ctime = new_dir->i_mtime = ctime;
- old_inode->i_ctime = ctime;
+ old_dir->i_ctime = old_dir->i_mtime =
+ new_dir->i_ctime = new_dir->i_mtime =
+ old_inode->i_ctime = current_fs_time(old_dir->i_sb);
if (old_dentry->d_parent != new_dentry->d_parent)
btrfs_record_unlink_dir(trans, old_dir, old_inode, 1);
@@ -9378,7 +9495,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (new_inode) {
inode_inc_iversion(new_inode);
- new_inode->i_ctime = CURRENT_TIME;
+ new_inode->i_ctime = current_fs_time(new_inode->i_sb);
if (unlikely(btrfs_ino(new_inode) ==
BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
root_objectid = BTRFS_I(new_inode)->location.objectid;
@@ -9856,7 +9973,7 @@ next:
*alloc_hint = ins.objectid + ins.offset;
inode_inc_iversion(inode);
- inode->i_ctime = CURRENT_TIME;
+ inode->i_ctime = current_fs_time(inode->i_sb);
BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC;
if (!(mode & FALLOC_FL_KEEP_SIZE) &&
(actual_len > inode->i_size) &&
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index ab390c7958f5..c1193b0c5e49 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -347,7 +347,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
btrfs_update_iflags(inode);
inode_inc_iversion(inode);
- inode->i_ctime = CURRENT_TIME;
+ inode->i_ctime = current_fs_time(inode->i_sb);
ret = btrfs_update_inode(trans, root, inode);
btrfs_end_transaction(trans, root);
@@ -443,7 +443,7 @@ static noinline int create_subvol(struct inode *dir,
struct btrfs_root *root = BTRFS_I(dir)->root;
struct btrfs_root *new_root;
struct btrfs_block_rsv block_rsv;
- struct timespec cur_time = CURRENT_TIME;
+ struct timespec cur_time = current_fs_time(dir->i_sb);
struct inode *inode;
int ret;
int err;
@@ -844,10 +844,6 @@ static noinline int btrfs_mksubvol(struct path *parent,
if (IS_ERR(dentry))
goto out_unlock;
- error = -EEXIST;
- if (d_really_is_positive(dentry))
- goto out_dput;
-
error = btrfs_may_create(dir, dentry);
if (error)
goto out_dput;
@@ -2792,24 +2788,29 @@ out:
static struct page *extent_same_get_page(struct inode *inode, pgoff_t index)
{
struct page *page;
- struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
page = grab_cache_page(inode->i_mapping, index);
if (!page)
- return NULL;
+ return ERR_PTR(-ENOMEM);
if (!PageUptodate(page)) {
- if (extent_read_full_page_nolock(tree, page, btrfs_get_extent,
- 0))
- return NULL;
+ int ret;
+
+ ret = btrfs_readpage(NULL, page);
+ if (ret)
+ return ERR_PTR(ret);
lock_page(page);
if (!PageUptodate(page)) {
unlock_page(page);
page_cache_release(page);
- return NULL;
+ return ERR_PTR(-EIO);
+ }
+ if (page->mapping != inode->i_mapping) {
+ unlock_page(page);
+ page_cache_release(page);
+ return ERR_PTR(-EAGAIN);
}
}
- unlock_page(page);
return page;
}
@@ -2821,17 +2822,31 @@ static int gather_extent_pages(struct inode *inode, struct page **pages,
pgoff_t index = off >> PAGE_CACHE_SHIFT;
for (i = 0; i < num_pages; i++) {
+again:
pages[i] = extent_same_get_page(inode, index + i);
- if (!pages[i])
- return -ENOMEM;
+ if (IS_ERR(pages[i])) {
+ int err = PTR_ERR(pages[i]);
+
+ if (err == -EAGAIN)
+ goto again;
+ pages[i] = NULL;
+ return err;
+ }
}
return 0;
}
-static inline void lock_extent_range(struct inode *inode, u64 off, u64 len)
+static int lock_extent_range(struct inode *inode, u64 off, u64 len,
+ bool retry_range_locking)
{
- /* do any pending delalloc/csum calc on src, one way or
- another, and lock file content */
+ /*
+ * Do any pending delalloc/csum calculations on inode, one way or
+ * another, and lock file content.
+ * The locking order is:
+ *
+ * 1) pages
+ * 2) range in the inode's io tree
+ */
while (1) {
struct btrfs_ordered_extent *ordered;
lock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1);
@@ -2849,8 +2864,11 @@ static inline void lock_extent_range(struct inode *inode, u64 off, u64 len)
unlock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1);
if (ordered)
btrfs_put_ordered_extent(ordered);
+ if (!retry_range_locking)
+ return -EAGAIN;
btrfs_wait_ordered_range(inode, off, len);
}
+ return 0;
}
static void btrfs_double_inode_unlock(struct inode *inode1, struct inode *inode2)
@@ -2875,15 +2893,24 @@ static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1,
unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
}
-static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1,
- struct inode *inode2, u64 loff2, u64 len)
+static int btrfs_double_extent_lock(struct inode *inode1, u64 loff1,
+ struct inode *inode2, u64 loff2, u64 len,
+ bool retry_range_locking)
{
+ int ret;
+
if (inode1 < inode2) {
swap(inode1, inode2);
swap(loff1, loff2);
}
- lock_extent_range(inode1, loff1, len);
- lock_extent_range(inode2, loff2, len);
+ ret = lock_extent_range(inode1, loff1, len, retry_range_locking);
+ if (ret)
+ return ret;
+ ret = lock_extent_range(inode2, loff2, len, retry_range_locking);
+ if (ret)
+ unlock_extent(&BTRFS_I(inode1)->io_tree, loff1,
+ loff1 + len - 1);
+ return ret;
}
struct cmp_pages {
@@ -2899,11 +2926,15 @@ static void btrfs_cmp_data_free(struct cmp_pages *cmp)
for (i = 0; i < cmp->num_pages; i++) {
pg = cmp->src_pages[i];
- if (pg)
+ if (pg) {
+ unlock_page(pg);
page_cache_release(pg);
+ }
pg = cmp->dst_pages[i];
- if (pg)
+ if (pg) {
+ unlock_page(pg);
page_cache_release(pg);
+ }
}
kfree(cmp->src_pages);
kfree(cmp->dst_pages);
@@ -2923,8 +2954,8 @@ static int btrfs_cmp_data_prepare(struct inode *src, u64 loff,
* of the array is bounded by len, which is in turn bounded by
* BTRFS_MAX_DEDUPE_LEN.
*/
- src_pgarr = kzalloc(num_pages * sizeof(struct page *), GFP_NOFS);
- dst_pgarr = kzalloc(num_pages * sizeof(struct page *), GFP_NOFS);
+ src_pgarr = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL);
+ dst_pgarr = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL);
if (!src_pgarr || !dst_pgarr) {
kfree(src_pgarr);
kfree(dst_pgarr);
@@ -2964,6 +2995,8 @@ static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst,
src_page = cmp->src_pages[i];
dst_page = cmp->dst_pages[i];
+ ASSERT(PageLocked(src_page));
+ ASSERT(PageLocked(dst_page));
addr = kmap_atomic(src_page);
dst_addr = kmap_atomic(dst_page);
@@ -3076,14 +3109,46 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
goto out_unlock;
}
+again:
ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, &cmp);
if (ret)
goto out_unlock;
if (same_inode)
- lock_extent_range(src, same_lock_start, same_lock_len);
+ ret = lock_extent_range(src, same_lock_start, same_lock_len,
+ false);
else
- btrfs_double_extent_lock(src, loff, dst, dst_loff, len);
+ ret = btrfs_double_extent_lock(src, loff, dst, dst_loff, len,
+ false);
+ /*
+ * If one of the inodes has dirty pages in the respective range or
+ * ordered extents, we need to flush dellaloc and wait for all ordered
+ * extents in the range. We must unlock the pages and the ranges in the
+ * io trees to avoid deadlocks when flushing delalloc (requires locking
+ * pages) and when waiting for ordered extents to complete (they require
+ * range locking).
+ */
+ if (ret == -EAGAIN) {
+ /*
+ * Ranges in the io trees already unlocked. Now unlock all
+ * pages before waiting for all IO to complete.
+ */
+ btrfs_cmp_data_free(&cmp);
+ if (same_inode) {
+ btrfs_wait_ordered_range(src, same_lock_start,
+ same_lock_len);
+ } else {
+ btrfs_wait_ordered_range(src, loff, len);
+ btrfs_wait_ordered_range(dst, dst_loff, len);
+ }
+ goto again;
+ }
+ ASSERT(ret == 0);
+ if (WARN_ON(ret)) {
+ /* ranges in the io trees already unlocked */
+ btrfs_cmp_data_free(&cmp);
+ return ret;
+ }
/* pass original length for comparison so we stay within i_size */
ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen, &cmp);
@@ -3146,7 +3211,7 @@ static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
inode_inc_iversion(inode);
if (!no_time_update)
- inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ inode->i_mtime = inode->i_ctime = current_fs_time(inode->i_sb);
/*
* We round up to the block size at eof when determining which
* extents to clone above, but shouldn't round up the file size.
@@ -3793,9 +3858,15 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
u64 lock_start = min_t(u64, off, destoff);
u64 lock_len = max_t(u64, off, destoff) + len - lock_start;
- lock_extent_range(src, lock_start, lock_len);
+ ret = lock_extent_range(src, lock_start, lock_len, true);
} else {
- btrfs_double_extent_lock(src, off, inode, destoff, len);
+ ret = btrfs_double_extent_lock(src, off, inode, destoff, len,
+ true);
+ }
+ ASSERT(ret == 0);
+ if (WARN_ON(ret)) {
+ /* ranges in the io trees already unlocked */
+ goto out_unlock;
}
ret = btrfs_clone(src, inode, off, olen, len, destoff, 0);
@@ -3812,8 +3883,9 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
* Truncate page cache pages so that future reads will see the cloned
* data immediately and not the previous data.
*/
- truncate_inode_pages_range(&inode->i_data, destoff,
- PAGE_CACHE_ALIGN(destoff + len) - 1);
+ truncate_inode_pages_range(&inode->i_data,
+ round_down(destoff, PAGE_CACHE_SIZE),
+ round_up(destoff + len, PAGE_CACHE_SIZE) - 1);
out_unlock:
if (!same_inode)
btrfs_double_inode_unlock(src, inode);
@@ -4954,7 +5026,7 @@ static long _btrfs_ioctl_set_received_subvol(struct file *file,
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_root_item *root_item = &root->root_item;
struct btrfs_trans_handle *trans;
- struct timespec ct = CURRENT_TIME;
+ struct timespec ct = current_fs_time(inode->i_sb);
int ret = 0;
int received_uuid_changed;
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 8c27292ea9ea..988eb1513aa5 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -1114,6 +1114,5 @@ int __init ordered_data_init(void)
void ordered_data_exit(void)
{
- if (btrfs_ordered_extent_cache)
- kmem_cache_destroy(btrfs_ordered_extent_cache);
+ kmem_cache_destroy(btrfs_ordered_extent_cache);
}
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index 647ab12fdf5d..147dc6ca5de1 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -295,8 +295,27 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
btrfs_dev_extent_chunk_offset(l, dev_extent),
btrfs_dev_extent_length(l, dev_extent));
break;
- case BTRFS_DEV_STATS_KEY:
- printk(KERN_INFO "\t\tdevice stats\n");
+ case BTRFS_PERSISTENT_ITEM_KEY:
+ printk(KERN_INFO "\t\tpersistent item objectid %llu offset %llu\n",
+ key.objectid, key.offset);
+ switch (key.objectid) {
+ case BTRFS_DEV_STATS_OBJECTID:
+ printk(KERN_INFO "\t\tdevice stats\n");
+ break;
+ default:
+ printk(KERN_INFO "\t\tunknown persistent item\n");
+ }
+ break;
+ case BTRFS_TEMPORARY_ITEM_KEY:
+ printk(KERN_INFO "\t\ttemporary item objectid %llu offset %llu\n",
+ key.objectid, key.offset);
+ switch (key.objectid) {
+ case BTRFS_BALANCE_OBJECTID:
+ printk(KERN_INFO "\t\tbalance status\n");
+ break;
+ default:
+ printk(KERN_INFO "\t\tunknown temporary item\n");
+ }
break;
case BTRFS_DEV_REPLACE_KEY:
printk(KERN_INFO "\t\tdev replace\n");
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index 619f92963e27..b892914968c1 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -72,7 +72,7 @@ struct reada_extent {
spinlock_t lock;
struct reada_zone *zones[BTRFS_MAX_MIRRORS];
int nzones;
- struct btrfs_device *scheduled_for;
+ int scheduled;
};
struct reada_zone {
@@ -101,67 +101,53 @@ static void reada_start_machine(struct btrfs_fs_info *fs_info);
static void __reada_start_machine(struct btrfs_fs_info *fs_info);
static int reada_add_block(struct reada_control *rc, u64 logical,
- struct btrfs_key *top, int level, u64 generation);
+ struct btrfs_key *top, u64 generation);
/* recurses */
/* in case of err, eb might be NULL */
-static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
- u64 start, int err)
+static void __readahead_hook(struct btrfs_fs_info *fs_info,
+ struct reada_extent *re, struct extent_buffer *eb,
+ u64 start, int err)
{
int level = 0;
int nritems;
int i;
u64 bytenr;
u64 generation;
- struct reada_extent *re;
- struct btrfs_fs_info *fs_info = root->fs_info;
struct list_head list;
- unsigned long index = start >> PAGE_CACHE_SHIFT;
- struct btrfs_device *for_dev;
if (eb)
level = btrfs_header_level(eb);
- /* find extent */
- spin_lock(&fs_info->reada_lock);
- re = radix_tree_lookup(&fs_info->reada_tree, index);
- if (re)
- re->refcnt++;
- spin_unlock(&fs_info->reada_lock);
-
- if (!re)
- return -1;
-
spin_lock(&re->lock);
/*
* just take the full list from the extent. afterwards we
* don't need the lock anymore
*/
list_replace_init(&re->extctl, &list);
- for_dev = re->scheduled_for;
- re->scheduled_for = NULL;
+ re->scheduled = 0;
spin_unlock(&re->lock);
- if (err == 0) {
- nritems = level ? btrfs_header_nritems(eb) : 0;
- generation = btrfs_header_generation(eb);
- /*
- * FIXME: currently we just set nritems to 0 if this is a leaf,
- * effectively ignoring the content. In a next step we could
- * trigger more readahead depending from the content, e.g.
- * fetch the checksums for the extents in the leaf.
- */
- } else {
- /*
- * this is the error case, the extent buffer has not been
- * read correctly. We won't access anything from it and
- * just cleanup our data structures. Effectively this will
- * cut the branch below this node from read ahead.
- */
- nritems = 0;
- generation = 0;
- }
+ /*
+ * this is the error case, the extent buffer has not been
+ * read correctly. We won't access anything from it and
+ * just cleanup our data structures. Effectively this will
+ * cut the branch below this node from read ahead.
+ */
+ if (err)
+ goto cleanup;
+ /*
+ * FIXME: currently we just set nritems to 0 if this is a leaf,
+ * effectively ignoring the content. In a next step we could
+ * trigger more readahead depending from the content, e.g.
+ * fetch the checksums for the extents in the leaf.
+ */
+ if (!level)
+ goto cleanup;
+
+ nritems = btrfs_header_nritems(eb);
+ generation = btrfs_header_generation(eb);
for (i = 0; i < nritems; i++) {
struct reada_extctl *rec;
u64 n_gen;
@@ -188,19 +174,20 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
*/
#ifdef DEBUG
if (rec->generation != generation) {
- btrfs_debug(root->fs_info,
- "generation mismatch for (%llu,%d,%llu) %llu != %llu",
- key.objectid, key.type, key.offset,
- rec->generation, generation);
+ btrfs_debug(fs_info,
+ "generation mismatch for (%llu,%d,%llu) %llu != %llu",
+ key.objectid, key.type, key.offset,
+ rec->generation, generation);
}
#endif
if (rec->generation == generation &&
btrfs_comp_cpu_keys(&key, &rc->key_end) < 0 &&
btrfs_comp_cpu_keys(&next_key, &rc->key_start) > 0)
- reada_add_block(rc, bytenr, &next_key,
- level - 1, n_gen);
+ reada_add_block(rc, bytenr, &next_key, n_gen);
}
}
+
+cleanup:
/*
* free extctl records
*/
@@ -222,26 +209,37 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
reada_extent_put(fs_info, re); /* one ref for each entry */
}
- reada_extent_put(fs_info, re); /* our ref */
- if (for_dev)
- atomic_dec(&for_dev->reada_in_flight);
- return 0;
+ return;
}
/*
* start is passed separately in case eb in NULL, which may be the case with
* failed I/O
*/
-int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
- u64 start, int err)
+int btree_readahead_hook(struct btrfs_fs_info *fs_info,
+ struct extent_buffer *eb, u64 start, int err)
{
- int ret;
+ int ret = 0;
+ struct reada_extent *re;
- ret = __readahead_hook(root, eb, start, err);
+ /* find extent */
+ spin_lock(&fs_info->reada_lock);
+ re = radix_tree_lookup(&fs_info->reada_tree,
+ start >> PAGE_CACHE_SHIFT);
+ if (re)
+ re->refcnt++;
+ spin_unlock(&fs_info->reada_lock);
+ if (!re) {
+ ret = -1;
+ goto start_machine;
+ }
- reada_start_machine(root->fs_info);
+ __readahead_hook(fs_info, re, eb, start, err);
+ reada_extent_put(fs_info, re); /* our ref */
+start_machine:
+ reada_start_machine(fs_info);
return ret;
}
@@ -260,18 +258,14 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info,
spin_lock(&fs_info->reada_lock);
ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone,
logical >> PAGE_CACHE_SHIFT, 1);
- if (ret == 1)
+ if (ret == 1 && logical >= zone->start && logical <= zone->end) {
kref_get(&zone->refcnt);
- spin_unlock(&fs_info->reada_lock);
-
- if (ret == 1) {
- if (logical >= zone->start && logical < zone->end)
- return zone;
- spin_lock(&fs_info->reada_lock);
- kref_put(&zone->refcnt, reada_zone_release);
spin_unlock(&fs_info->reada_lock);
+ return zone;
}
+ spin_unlock(&fs_info->reada_lock);
+
cache = btrfs_lookup_block_group(fs_info, logical);
if (!cache)
return NULL;
@@ -280,7 +274,7 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info,
end = start + cache->key.offset - 1;
btrfs_put_block_group(cache);
- zone = kzalloc(sizeof(*zone), GFP_NOFS);
+ zone = kzalloc(sizeof(*zone), GFP_KERNEL);
if (!zone)
return NULL;
@@ -307,8 +301,10 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info,
kfree(zone);
ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone,
logical >> PAGE_CACHE_SHIFT, 1);
- if (ret == 1)
+ if (ret == 1 && logical >= zone->start && logical <= zone->end)
kref_get(&zone->refcnt);
+ else
+ zone = NULL;
}
spin_unlock(&fs_info->reada_lock);
@@ -317,7 +313,7 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info,
static struct reada_extent *reada_find_extent(struct btrfs_root *root,
u64 logical,
- struct btrfs_key *top, int level)
+ struct btrfs_key *top)
{
int ret;
struct reada_extent *re = NULL;
@@ -330,9 +326,9 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
u64 length;
int real_stripes;
int nzones = 0;
- int i;
unsigned long index = logical >> PAGE_CACHE_SHIFT;
int dev_replace_is_ongoing;
+ int have_zone = 0;
spin_lock(&fs_info->reada_lock);
re = radix_tree_lookup(&fs_info->reada_tree, index);
@@ -343,7 +339,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
if (re)
return re;
- re = kzalloc(sizeof(*re), GFP_NOFS);
+ re = kzalloc(sizeof(*re), GFP_KERNEL);
if (!re)
return NULL;
@@ -375,11 +371,16 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
struct reada_zone *zone;
dev = bbio->stripes[nzones].dev;
+
+ /* cannot read ahead on missing device. */
+ if (!dev->bdev)
+ continue;
+
zone = reada_find_zone(fs_info, dev, logical, bbio);
if (!zone)
- break;
+ continue;
- re->zones[nzones] = zone;
+ re->zones[re->nzones++] = zone;
spin_lock(&zone->lock);
if (!zone->elems)
kref_get(&zone->refcnt);
@@ -389,14 +390,13 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
kref_put(&zone->refcnt, reada_zone_release);
spin_unlock(&fs_info->reada_lock);
}
- re->nzones = nzones;
- if (nzones == 0) {
+ if (re->nzones == 0) {
/* not a single zone found, error and out */
goto error;
}
/* insert extent in reada_tree + all per-device trees, all or nothing */
- btrfs_dev_replace_lock(&fs_info->dev_replace);
+ btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
spin_lock(&fs_info->reada_lock);
ret = radix_tree_insert(&fs_info->reada_tree, index, re);
if (ret == -EEXIST) {
@@ -404,19 +404,20 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
BUG_ON(!re_exist);
re_exist->refcnt++;
spin_unlock(&fs_info->reada_lock);
- btrfs_dev_replace_unlock(&fs_info->dev_replace);
+ btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
goto error;
}
if (ret) {
spin_unlock(&fs_info->reada_lock);
- btrfs_dev_replace_unlock(&fs_info->dev_replace);
+ btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
goto error;
}
prev_dev = NULL;
dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(
&fs_info->dev_replace);
- for (i = 0; i < nzones; ++i) {
- dev = bbio->stripes[i].dev;
+ for (nzones = 0; nzones < re->nzones; ++nzones) {
+ dev = re->zones[nzones]->device;
+
if (dev == prev_dev) {
/*
* in case of DUP, just add the first zone. As both
@@ -427,15 +428,9 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
*/
continue;
}
- if (!dev->bdev) {
- /*
- * cannot read ahead on missing device, but for RAID5/6,
- * REQ_GET_READ_MIRRORS return 1. So don't skip missing
- * device for such case.
- */
- if (nzones > 1)
- continue;
- }
+ if (!dev->bdev)
+ continue;
+
if (dev_replace_is_ongoing &&
dev == fs_info->dev_replace.tgtdev) {
/*
@@ -447,8 +442,8 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
prev_dev = dev;
ret = radix_tree_insert(&dev->reada_extents, index, re);
if (ret) {
- while (--i >= 0) {
- dev = bbio->stripes[i].dev;
+ while (--nzones >= 0) {
+ dev = re->zones[nzones]->device;
BUG_ON(dev == NULL);
/* ignore whether the entry was inserted */
radix_tree_delete(&dev->reada_extents, index);
@@ -456,21 +451,24 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
BUG_ON(fs_info == NULL);
radix_tree_delete(&fs_info->reada_tree, index);
spin_unlock(&fs_info->reada_lock);
- btrfs_dev_replace_unlock(&fs_info->dev_replace);
+ btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
goto error;
}
+ have_zone = 1;
}
spin_unlock(&fs_info->reada_lock);
- btrfs_dev_replace_unlock(&fs_info->dev_replace);
+ btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
+
+ if (!have_zone)
+ goto error;
btrfs_put_bbio(bbio);
return re;
error:
- while (nzones) {
+ for (nzones = 0; nzones < re->nzones; ++nzones) {
struct reada_zone *zone;
- --nzones;
zone = re->zones[nzones];
kref_get(&zone->refcnt);
spin_lock(&zone->lock);
@@ -531,8 +529,6 @@ static void reada_extent_put(struct btrfs_fs_info *fs_info,
kref_put(&zone->refcnt, reada_zone_release);
spin_unlock(&fs_info->reada_lock);
}
- if (re->scheduled_for)
- atomic_dec(&re->scheduled_for->reada_in_flight);
kfree(re);
}
@@ -556,17 +552,17 @@ static void reada_control_release(struct kref *kref)
}
static int reada_add_block(struct reada_control *rc, u64 logical,
- struct btrfs_key *top, int level, u64 generation)
+ struct btrfs_key *top, u64 generation)
{
struct btrfs_root *root = rc->root;
struct reada_extent *re;
struct reada_extctl *rec;
- re = reada_find_extent(root, logical, top, level); /* takes one ref */
+ re = reada_find_extent(root, logical, top); /* takes one ref */
if (!re)
return -1;
- rec = kzalloc(sizeof(*rec), GFP_NOFS);
+ rec = kzalloc(sizeof(*rec), GFP_KERNEL);
if (!rec) {
reada_extent_put(root->fs_info, re);
return -ENOMEM;
@@ -662,7 +658,6 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
u64 logical;
int ret;
int i;
- int need_kick = 0;
spin_lock(&fs_info->reada_lock);
if (dev->reada_curr_zone == NULL) {
@@ -679,7 +674,7 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
*/
ret = radix_tree_gang_lookup(&dev->reada_extents, (void **)&re,
dev->reada_next >> PAGE_CACHE_SHIFT, 1);
- if (ret == 0 || re->logical >= dev->reada_curr_zone->end) {
+ if (ret == 0 || re->logical > dev->reada_curr_zone->end) {
ret = reada_pick_zone(dev);
if (!ret) {
spin_unlock(&fs_info->reada_lock);
@@ -698,6 +693,15 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
spin_unlock(&fs_info->reada_lock);
+ spin_lock(&re->lock);
+ if (re->scheduled || list_empty(&re->extctl)) {
+ spin_unlock(&re->lock);
+ reada_extent_put(fs_info, re);
+ return 0;
+ }
+ re->scheduled = 1;
+ spin_unlock(&re->lock);
+
/*
* find mirror num
*/
@@ -709,29 +713,20 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
}
logical = re->logical;
- spin_lock(&re->lock);
- if (re->scheduled_for == NULL) {
- re->scheduled_for = dev;
- need_kick = 1;
- }
- spin_unlock(&re->lock);
-
- reada_extent_put(fs_info, re);
-
- if (!need_kick)
- return 0;
-
atomic_inc(&dev->reada_in_flight);
ret = reada_tree_block_flagged(fs_info->extent_root, logical,
mirror_num, &eb);
if (ret)
- __readahead_hook(fs_info->extent_root, NULL, logical, ret);
+ __readahead_hook(fs_info, re, NULL, logical, ret);
else if (eb)
- __readahead_hook(fs_info->extent_root, eb, eb->start, ret);
+ __readahead_hook(fs_info, re, eb, eb->start, ret);
if (eb)
free_extent_buffer(eb);
+ atomic_dec(&dev->reada_in_flight);
+ reada_extent_put(fs_info, re);
+
return 1;
}
@@ -752,6 +747,8 @@ static void reada_start_machine_worker(struct btrfs_work *work)
set_task_ioprio(current, BTRFS_IOPRIO_READA);
__reada_start_machine(fs_info);
set_task_ioprio(current, old_ioprio);
+
+ atomic_dec(&fs_info->reada_works_cnt);
}
static void __reada_start_machine(struct btrfs_fs_info *fs_info)
@@ -783,15 +780,19 @@ static void __reada_start_machine(struct btrfs_fs_info *fs_info)
* enqueue to workers to finish it. This will distribute the load to
* the cores.
*/
- for (i = 0; i < 2; ++i)
+ for (i = 0; i < 2; ++i) {
reada_start_machine(fs_info);
+ if (atomic_read(&fs_info->reada_works_cnt) >
+ BTRFS_MAX_MIRRORS * 2)
+ break;
+ }
}
static void reada_start_machine(struct btrfs_fs_info *fs_info)
{
struct reada_machine_work *rmw;
- rmw = kzalloc(sizeof(*rmw), GFP_NOFS);
+ rmw = kzalloc(sizeof(*rmw), GFP_KERNEL);
if (!rmw) {
/* FIXME we cannot handle this properly right now */
BUG();
@@ -801,6 +802,7 @@ static void reada_start_machine(struct btrfs_fs_info *fs_info)
rmw->fs_info = fs_info;
btrfs_queue_work(fs_info->readahead_workers, &rmw->work);
+ atomic_inc(&fs_info->reada_works_cnt);
}
#ifdef DEBUG
@@ -848,10 +850,9 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all)
if (ret == 0)
break;
printk(KERN_DEBUG
- " re: logical %llu size %u empty %d for %lld",
+ " re: logical %llu size %u empty %d scheduled %d",
re->logical, fs_info->tree_root->nodesize,
- list_empty(&re->extctl), re->scheduled_for ?
- re->scheduled_for->devid : -1);
+ list_empty(&re->extctl), re->scheduled);
for (i = 0; i < re->nzones; ++i) {
printk(KERN_CONT " zone %llu-%llu devs",
@@ -878,27 +879,21 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all)
index, 1);
if (ret == 0)
break;
- if (!re->scheduled_for) {
+ if (!re->scheduled) {
index = (re->logical >> PAGE_CACHE_SHIFT) + 1;
continue;
}
printk(KERN_DEBUG
- "re: logical %llu size %u list empty %d for %lld",
+ "re: logical %llu size %u list empty %d scheduled %d",
re->logical, fs_info->tree_root->nodesize,
- list_empty(&re->extctl),
- re->scheduled_for ? re->scheduled_for->devid : -1);
+ list_empty(&re->extctl), re->scheduled);
for (i = 0; i < re->nzones; ++i) {
printk(KERN_CONT " zone %llu-%llu devs",
re->zones[i]->start,
re->zones[i]->end);
- for (i = 0; i < re->nzones; ++i) {
- printk(KERN_CONT " zone %llu-%llu devs",
- re->zones[i]->start,
- re->zones[i]->end);
- for (j = 0; j < re->zones[i]->ndevs; ++j) {
- printk(KERN_CONT " %lld",
- re->zones[i]->devs[j]->devid);
- }
+ for (j = 0; j < re->zones[i]->ndevs; ++j) {
+ printk(KERN_CONT " %lld",
+ re->zones[i]->devs[j]->devid);
}
}
printk(KERN_CONT "\n");
@@ -917,7 +912,6 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
struct reada_control *rc;
u64 start;
u64 generation;
- int level;
int ret;
struct extent_buffer *node;
static struct btrfs_key max_key = {
@@ -926,7 +920,7 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
.offset = (u64)-1
};
- rc = kzalloc(sizeof(*rc), GFP_NOFS);
+ rc = kzalloc(sizeof(*rc), GFP_KERNEL);
if (!rc)
return ERR_PTR(-ENOMEM);
@@ -940,11 +934,10 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
node = btrfs_root_node(root);
start = node->start;
- level = btrfs_header_level(node);
generation = btrfs_header_generation(node);
free_extent_buffer(node);
- ret = reada_add_block(rc, start, &max_key, level, generation);
+ ret = reada_add_block(rc, start, &max_key, generation);
if (ret) {
kfree(rc);
return ERR_PTR(ret);
@@ -959,8 +952,11 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
int btrfs_reada_wait(void *handle)
{
struct reada_control *rc = handle;
+ struct btrfs_fs_info *fs_info = rc->root->fs_info;
while (atomic_read(&rc->elems)) {
+ if (!atomic_read(&fs_info->reada_works_cnt))
+ reada_start_machine(fs_info);
wait_event_timeout(rc->wait, atomic_read(&rc->elems) == 0,
5 * HZ);
dump_devs(rc->root->fs_info,
@@ -977,9 +973,13 @@ int btrfs_reada_wait(void *handle)
int btrfs_reada_wait(void *handle)
{
struct reada_control *rc = handle;
+ struct btrfs_fs_info *fs_info = rc->root->fs_info;
while (atomic_read(&rc->elems)) {
- wait_event(rc->wait, atomic_read(&rc->elems) == 0);
+ if (!atomic_read(&fs_info->reada_works_cnt))
+ reada_start_machine(fs_info);
+ wait_event_timeout(rc->wait, atomic_read(&rc->elems) == 0,
+ (HZ + 9) / 10);
}
kref_put(&rc->refcnt, reada_control_release);
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 7cf8509deda7..a25f3b21491b 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -488,7 +488,7 @@ void btrfs_update_root_times(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
struct btrfs_root_item *item = &root->root_item;
- struct timespec ct = CURRENT_TIME;
+ struct timespec ct = current_fs_time(root->fs_info->sb);
spin_lock(&root->root_item_lock);
btrfs_set_root_ctransid(item, trans->transid);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 92bf5ee732fb..e42aa27c96e9 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -461,7 +461,7 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
struct btrfs_fs_info *fs_info = dev->dev_root->fs_info;
int ret;
- sctx = kzalloc(sizeof(*sctx), GFP_NOFS);
+ sctx = kzalloc(sizeof(*sctx), GFP_KERNEL);
if (!sctx)
goto nomem;
atomic_set(&sctx->refs, 1);
@@ -472,7 +472,7 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) {
struct scrub_bio *sbio;
- sbio = kzalloc(sizeof(*sbio), GFP_NOFS);
+ sbio = kzalloc(sizeof(*sbio), GFP_KERNEL);
if (!sbio)
goto nomem;
sctx->bios[i] = sbio;
@@ -1654,7 +1654,7 @@ static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
again:
if (!wr_ctx->wr_curr_bio) {
wr_ctx->wr_curr_bio = kzalloc(sizeof(*wr_ctx->wr_curr_bio),
- GFP_NOFS);
+ GFP_KERNEL);
if (!wr_ctx->wr_curr_bio) {
mutex_unlock(&wr_ctx->wr_lock);
return -ENOMEM;
@@ -1671,7 +1671,8 @@ again:
sbio->dev = wr_ctx->tgtdev;
bio = sbio->bio;
if (!bio) {
- bio = btrfs_io_bio_alloc(GFP_NOFS, wr_ctx->pages_per_wr_bio);
+ bio = btrfs_io_bio_alloc(GFP_KERNEL,
+ wr_ctx->pages_per_wr_bio);
if (!bio) {
mutex_unlock(&wr_ctx->wr_lock);
return -ENOMEM;
@@ -2076,7 +2077,8 @@ again:
sbio->dev = spage->dev;
bio = sbio->bio;
if (!bio) {
- bio = btrfs_io_bio_alloc(GFP_NOFS, sctx->pages_per_rd_bio);
+ bio = btrfs_io_bio_alloc(GFP_KERNEL,
+ sctx->pages_per_rd_bio);
if (!bio)
return -ENOMEM;
sbio->bio = bio;
@@ -2241,7 +2243,7 @@ static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
struct scrub_block *sblock;
int index;
- sblock = kzalloc(sizeof(*sblock), GFP_NOFS);
+ sblock = kzalloc(sizeof(*sblock), GFP_KERNEL);
if (!sblock) {
spin_lock(&sctx->stat_lock);
sctx->stat.malloc_errors++;
@@ -2259,7 +2261,7 @@ static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
struct scrub_page *spage;
u64 l = min_t(u64, len, PAGE_SIZE);
- spage = kzalloc(sizeof(*spage), GFP_NOFS);
+ spage = kzalloc(sizeof(*spage), GFP_KERNEL);
if (!spage) {
leave_nomem:
spin_lock(&sctx->stat_lock);
@@ -2286,7 +2288,7 @@ leave_nomem:
spage->have_csum = 0;
}
sblock->page_count++;
- spage->page = alloc_page(GFP_NOFS);
+ spage->page = alloc_page(GFP_KERNEL);
if (!spage->page)
goto leave_nomem;
len -= l;
@@ -2541,7 +2543,7 @@ static int scrub_pages_for_parity(struct scrub_parity *sparity,
struct scrub_block *sblock;
int index;
- sblock = kzalloc(sizeof(*sblock), GFP_NOFS);
+ sblock = kzalloc(sizeof(*sblock), GFP_KERNEL);
if (!sblock) {
spin_lock(&sctx->stat_lock);
sctx->stat.malloc_errors++;
@@ -2561,7 +2563,7 @@ static int scrub_pages_for_parity(struct scrub_parity *sparity,
struct scrub_page *spage;
u64 l = min_t(u64, len, PAGE_SIZE);
- spage = kzalloc(sizeof(*spage), GFP_NOFS);
+ spage = kzalloc(sizeof(*spage), GFP_KERNEL);
if (!spage) {
leave_nomem:
spin_lock(&sctx->stat_lock);
@@ -2591,7 +2593,7 @@ leave_nomem:
spage->have_csum = 0;
}
sblock->page_count++;
- spage->page = alloc_page(GFP_NOFS);
+ spage->page = alloc_page(GFP_KERNEL);
if (!spage->page)
goto leave_nomem;
len -= l;
@@ -3857,16 +3859,16 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
return -EIO;
}
- btrfs_dev_replace_lock(&fs_info->dev_replace);
+ btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
if (dev->scrub_device ||
(!is_dev_replace &&
btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))) {
- btrfs_dev_replace_unlock(&fs_info->dev_replace);
+ btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
mutex_unlock(&fs_info->scrub_lock);
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
return -EINPROGRESS;
}
- btrfs_dev_replace_unlock(&fs_info->dev_replace);
+ btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
ret = scrub_workers_get(fs_info, is_dev_replace);
if (ret) {
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 63a6152be04b..d2e29925f1da 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -304,7 +304,7 @@ static struct fs_path *fs_path_alloc(void)
{
struct fs_path *p;
- p = kmalloc(sizeof(*p), GFP_NOFS);
+ p = kmalloc(sizeof(*p), GFP_KERNEL);
if (!p)
return NULL;
p->reversed = 0;
@@ -363,11 +363,11 @@ static int fs_path_ensure_buf(struct fs_path *p, int len)
* First time the inline_buf does not suffice
*/
if (p->buf == p->inline_buf) {
- tmp_buf = kmalloc(len, GFP_NOFS);
+ tmp_buf = kmalloc(len, GFP_KERNEL);
if (tmp_buf)
memcpy(tmp_buf, p->buf, old_buf_len);
} else {
- tmp_buf = krealloc(p->buf, len, GFP_NOFS);
+ tmp_buf = krealloc(p->buf, len, GFP_KERNEL);
}
if (!tmp_buf)
return -ENOMEM;
@@ -995,7 +995,7 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path,
* values are small.
*/
buf_len = PATH_MAX;
- buf = kmalloc(buf_len, GFP_NOFS);
+ buf = kmalloc(buf_len, GFP_KERNEL);
if (!buf) {
ret = -ENOMEM;
goto out;
@@ -1042,7 +1042,7 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path,
buf = NULL;
} else {
char *tmp = krealloc(buf, buf_len,
- GFP_NOFS | __GFP_NOWARN);
+ GFP_KERNEL | __GFP_NOWARN);
if (!tmp)
kfree(buf);
@@ -1303,7 +1303,7 @@ static int find_extent_clone(struct send_ctx *sctx,
/* We only use this path under the commit sem */
tmp_path->need_commit_sem = 0;
- backref_ctx = kmalloc(sizeof(*backref_ctx), GFP_NOFS);
+ backref_ctx = kmalloc(sizeof(*backref_ctx), GFP_KERNEL);
if (!backref_ctx) {
ret = -ENOMEM;
goto out;
@@ -1984,7 +1984,7 @@ static int name_cache_insert(struct send_ctx *sctx,
nce_head = radix_tree_lookup(&sctx->name_cache,
(unsigned long)nce->ino);
if (!nce_head) {
- nce_head = kmalloc(sizeof(*nce_head), GFP_NOFS);
+ nce_head = kmalloc(sizeof(*nce_head), GFP_KERNEL);
if (!nce_head) {
kfree(nce);
return -ENOMEM;
@@ -2179,7 +2179,7 @@ out_cache:
/*
* Store the result of the lookup in the name cache.
*/
- nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_NOFS);
+ nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_KERNEL);
if (!nce) {
ret = -ENOMEM;
goto out;
@@ -2315,7 +2315,7 @@ static int send_subvol_begin(struct send_ctx *sctx)
if (!path)
return -ENOMEM;
- name = kmalloc(BTRFS_PATH_NAME_MAX, GFP_NOFS);
+ name = kmalloc(BTRFS_PATH_NAME_MAX, GFP_KERNEL);
if (!name) {
btrfs_free_path(path);
return -ENOMEM;
@@ -2730,7 +2730,7 @@ static int __record_ref(struct list_head *head, u64 dir,
{
struct recorded_ref *ref;
- ref = kmalloc(sizeof(*ref), GFP_NOFS);
+ ref = kmalloc(sizeof(*ref), GFP_KERNEL);
if (!ref)
return -ENOMEM;
@@ -2755,7 +2755,7 @@ static int dup_ref(struct recorded_ref *ref, struct list_head *list)
{
struct recorded_ref *new;
- new = kmalloc(sizeof(*ref), GFP_NOFS);
+ new = kmalloc(sizeof(*ref), GFP_KERNEL);
if (!new)
return -ENOMEM;
@@ -2818,7 +2818,7 @@ add_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino)
struct rb_node *parent = NULL;
struct orphan_dir_info *entry, *odi;
- odi = kmalloc(sizeof(*odi), GFP_NOFS);
+ odi = kmalloc(sizeof(*odi), GFP_KERNEL);
if (!odi)
return ERR_PTR(-ENOMEM);
odi->ino = dir_ino;
@@ -2973,7 +2973,7 @@ static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino, bool orphanized)
struct rb_node *parent = NULL;
struct waiting_dir_move *entry, *dm;
- dm = kmalloc(sizeof(*dm), GFP_NOFS);
+ dm = kmalloc(sizeof(*dm), GFP_KERNEL);
if (!dm)
return -ENOMEM;
dm->ino = ino;
@@ -3040,7 +3040,7 @@ static int add_pending_dir_move(struct send_ctx *sctx,
int exists = 0;
int ret;
- pm = kmalloc(sizeof(*pm), GFP_NOFS);
+ pm = kmalloc(sizeof(*pm), GFP_KERNEL);
if (!pm)
return -ENOMEM;
pm->parent_ino = parent_ino;
@@ -4280,7 +4280,7 @@ static int __find_xattr(int num, struct btrfs_key *di_key,
strncmp(name, ctx->name, name_len) == 0) {
ctx->found_idx = num;
ctx->found_data_len = data_len;
- ctx->found_data = kmemdup(data, data_len, GFP_NOFS);
+ ctx->found_data = kmemdup(data, data_len, GFP_KERNEL);
if (!ctx->found_data)
return -ENOMEM;
return 1;
@@ -4481,7 +4481,7 @@ static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len)
while (index <= last_index) {
unsigned cur_len = min_t(unsigned, len,
PAGE_CACHE_SIZE - pg_offset);
- page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
+ page = find_or_create_page(inode->i_mapping, index, GFP_KERNEL);
if (!page) {
ret = -ENOMEM;
break;
@@ -5989,7 +5989,7 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
goto out;
}
- sctx = kzalloc(sizeof(struct send_ctx), GFP_NOFS);
+ sctx = kzalloc(sizeof(struct send_ctx), GFP_KERNEL);
if (!sctx) {
ret = -ENOMEM;
goto out;
@@ -5997,7 +5997,7 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
INIT_LIST_HEAD(&sctx->new_refs);
INIT_LIST_HEAD(&sctx->deleted_refs);
- INIT_RADIX_TREE(&sctx->name_cache, GFP_NOFS);
+ INIT_RADIX_TREE(&sctx->name_cache, GFP_KERNEL);
INIT_LIST_HEAD(&sctx->name_cache_list);
sctx->flags = arg->flags;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index d41e09fe8e38..bf75200c6f86 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -303,7 +303,8 @@ enum {
Opt_check_integrity_print_mask, Opt_fatal_errors, Opt_rescan_uuid_tree,
Opt_commit_interval, Opt_barrier, Opt_nodefrag, Opt_nodiscard,
Opt_noenospc_debug, Opt_noflushoncommit, Opt_acl, Opt_datacow,
- Opt_datasum, Opt_treelog, Opt_noinode_cache,
+ Opt_datasum, Opt_treelog, Opt_noinode_cache, Opt_usebackuproot,
+ Opt_nologreplay, Opt_norecovery,
#ifdef CONFIG_BTRFS_DEBUG
Opt_fragment_data, Opt_fragment_metadata, Opt_fragment_all,
#endif
@@ -335,6 +336,8 @@ static const match_table_t tokens = {
{Opt_noacl, "noacl"},
{Opt_notreelog, "notreelog"},
{Opt_treelog, "treelog"},
+ {Opt_nologreplay, "nologreplay"},
+ {Opt_norecovery, "norecovery"},
{Opt_flushoncommit, "flushoncommit"},
{Opt_noflushoncommit, "noflushoncommit"},
{Opt_ratio, "metadata_ratio=%d"},
@@ -352,7 +355,8 @@ static const match_table_t tokens = {
{Opt_inode_cache, "inode_cache"},
{Opt_noinode_cache, "noinode_cache"},
{Opt_no_space_cache, "nospace_cache"},
- {Opt_recovery, "recovery"},
+ {Opt_recovery, "recovery"}, /* deprecated */
+ {Opt_usebackuproot, "usebackuproot"},
{Opt_skip_balance, "skip_balance"},
{Opt_check_integrity, "check_int"},
{Opt_check_integrity_including_extent_data, "check_int_data"},
@@ -373,7 +377,8 @@ static const match_table_t tokens = {
* reading in a new superblock is parsed here.
* XXX JDM: This needs to be cleaned up for remount.
*/
-int btrfs_parse_options(struct btrfs_root *root, char *options)
+int btrfs_parse_options(struct btrfs_root *root, char *options,
+ unsigned long new_flags)
{
struct btrfs_fs_info *info = root->fs_info;
substring_t args[MAX_OPT_ARGS];
@@ -393,8 +398,12 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
else if (cache_gen)
btrfs_set_opt(info->mount_opt, SPACE_CACHE);
+ /*
+ * Even the options are empty, we still need to do extra check
+ * against new flags
+ */
if (!options)
- goto out;
+ goto check;
/*
* strsep changes the string, duplicate it because parse_options
@@ -606,6 +615,11 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
btrfs_clear_and_info(root, NOTREELOG,
"enabling tree log");
break;
+ case Opt_norecovery:
+ case Opt_nologreplay:
+ btrfs_set_and_info(root, NOLOGREPLAY,
+ "disabling log replay at mount time");
+ break;
case Opt_flushoncommit:
btrfs_set_and_info(root, FLUSHONCOMMIT,
"turning on flush-on-commit");
@@ -696,8 +710,12 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
"disabling auto defrag");
break;
case Opt_recovery:
- btrfs_info(root->fs_info, "enabling auto recovery");
- btrfs_set_opt(info->mount_opt, RECOVERY);
+ btrfs_warn(root->fs_info,
+ "'recovery' is deprecated, use 'usebackuproot' instead");
+ case Opt_usebackuproot:
+ btrfs_info(root->fs_info,
+ "trying to use backup root at mount time");
+ btrfs_set_opt(info->mount_opt, USEBACKUPROOT);
break;
case Opt_skip_balance:
btrfs_set_opt(info->mount_opt, SKIP_BALANCE);
@@ -792,6 +810,15 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
break;
}
}
+check:
+ /*
+ * Extra check for current option against current flag
+ */
+ if (btrfs_test_opt(root, NOLOGREPLAY) && !(new_flags & MS_RDONLY)) {
+ btrfs_err(root->fs_info,
+ "nologreplay must be used with ro mount option");
+ ret = -EINVAL;
+ }
out:
if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE) &&
!btrfs_test_opt(root, FREE_SPACE_TREE) &&
@@ -1202,6 +1229,8 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
seq_puts(seq, ",ssd");
if (btrfs_test_opt(root, NOTREELOG))
seq_puts(seq, ",notreelog");
+ if (btrfs_test_opt(root, NOLOGREPLAY))
+ seq_puts(seq, ",nologreplay");
if (btrfs_test_opt(root, FLUSHONCOMMIT))
seq_puts(seq, ",flushoncommit");
if (btrfs_test_opt(root, DISCARD))
@@ -1228,8 +1257,6 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
seq_puts(seq, ",inode_cache");
if (btrfs_test_opt(root, SKIP_BALANCE))
seq_puts(seq, ",skip_balance");
- if (btrfs_test_opt(root, RECOVERY))
- seq_puts(seq, ",recovery");
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
if (btrfs_test_opt(root, CHECK_INTEGRITY_INCLUDING_EXTENT_DATA))
seq_puts(seq, ",check_int_data");
@@ -1685,7 +1712,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
}
}
- ret = btrfs_parse_options(root, data);
+ ret = btrfs_parse_options(root, data, *flags);
if (ret) {
ret = -EINVAL;
goto restore;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index b6031ce474f7..43885e51b882 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -637,6 +637,8 @@ struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv(
trans->block_rsv = &root->fs_info->trans_block_rsv;
trans->bytes_reserved = num_bytes;
+ trace_btrfs_space_reservation(root->fs_info, "transaction",
+ trans->transid, num_bytes, 1);
return trans;
}
@@ -1333,7 +1335,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
struct dentry *dentry;
struct extent_buffer *tmp;
struct extent_buffer *old;
- struct timespec cur_time = CURRENT_TIME;
+ struct timespec cur_time;
int ret = 0;
u64 to_reserve = 0;
u64 index = 0;
@@ -1375,12 +1377,16 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
rsv = trans->block_rsv;
trans->block_rsv = &pending->block_rsv;
trans->bytes_reserved = trans->block_rsv->reserved;
-
+ trace_btrfs_space_reservation(root->fs_info, "transaction",
+ trans->transid,
+ trans->bytes_reserved, 1);
dentry = pending->dentry;
parent_inode = pending->dir;
parent_root = BTRFS_I(parent_inode)->root;
record_root_in_trans(trans, parent_root);
+ cur_time = current_fs_time(parent_inode->i_sb);
+
/*
* insert the directory item
*/
@@ -1523,7 +1529,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
btrfs_i_size_write(parent_inode, parent_inode->i_size +
dentry->d_name.len * 2);
- parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
+ parent_inode->i_mtime = parent_inode->i_ctime =
+ current_fs_time(parent_inode->i_sb);
ret = btrfs_update_inode_fallback(trans, parent_root, parent_inode);
if (ret) {
btrfs_abort_transaction(trans, root, ret);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 366b335946fa..80857b4646c0 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -138,7 +138,7 @@ static struct btrfs_fs_devices *__alloc_fs_devices(void)
{
struct btrfs_fs_devices *fs_devs;
- fs_devs = kzalloc(sizeof(*fs_devs), GFP_NOFS);
+ fs_devs = kzalloc(sizeof(*fs_devs), GFP_KERNEL);
if (!fs_devs)
return ERR_PTR(-ENOMEM);
@@ -220,7 +220,7 @@ static struct btrfs_device *__alloc_device(void)
{
struct btrfs_device *dev;
- dev = kzalloc(sizeof(*dev), GFP_NOFS);
+ dev = kzalloc(sizeof(*dev), GFP_KERNEL);
if (!dev)
return ERR_PTR(-ENOMEM);
@@ -733,7 +733,8 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
* uuid mutex so nothing we touch in here is going to disappear.
*/
if (orig_dev->name) {
- name = rcu_string_strdup(orig_dev->name->str, GFP_NOFS);
+ name = rcu_string_strdup(orig_dev->name->str,
+ GFP_KERNEL);
if (!name) {
kfree(device);
goto error;
@@ -1714,12 +1715,12 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
} while (read_seqretry(&root->fs_info->profiles_lock, seq));
num_devices = root->fs_info->fs_devices->num_devices;
- btrfs_dev_replace_lock(&root->fs_info->dev_replace);
+ btrfs_dev_replace_lock(&root->fs_info->dev_replace, 0);
if (btrfs_dev_replace_is_ongoing(&root->fs_info->dev_replace)) {
WARN_ON(num_devices < 1);
num_devices--;
}
- btrfs_dev_replace_unlock(&root->fs_info->dev_replace);
+ btrfs_dev_replace_unlock(&root->fs_info->dev_replace, 0);
if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && num_devices <= 4) {
ret = BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET;
@@ -2287,7 +2288,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
goto error;
}
- name = rcu_string_strdup(device_path, GFP_NOFS);
+ name = rcu_string_strdup(device_path, GFP_KERNEL);
if (!name) {
kfree(device);
ret = -ENOMEM;
@@ -2966,7 +2967,7 @@ static int insert_balance_item(struct btrfs_root *root,
}
key.objectid = BTRFS_BALANCE_OBJECTID;
- key.type = BTRFS_BALANCE_ITEM_KEY;
+ key.type = BTRFS_TEMPORARY_ITEM_KEY;
key.offset = 0;
ret = btrfs_insert_empty_item(trans, root, path, &key,
@@ -3015,7 +3016,7 @@ static int del_balance_item(struct btrfs_root *root)
}
key.objectid = BTRFS_BALANCE_OBJECTID;
- key.type = BTRFS_BALANCE_ITEM_KEY;
+ key.type = BTRFS_TEMPORARY_ITEM_KEY;
key.offset = 0;
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
@@ -3686,12 +3687,12 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
}
num_devices = fs_info->fs_devices->num_devices;
- btrfs_dev_replace_lock(&fs_info->dev_replace);
+ btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) {
BUG_ON(num_devices < 1);
num_devices--;
}
- btrfs_dev_replace_unlock(&fs_info->dev_replace);
+ btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
if (num_devices == 1)
allowed |= BTRFS_BLOCK_GROUP_DUP;
@@ -3867,7 +3868,7 @@ int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
return -ENOMEM;
key.objectid = BTRFS_BALANCE_OBJECTID;
- key.type = BTRFS_BALANCE_ITEM_KEY;
+ key.type = BTRFS_TEMPORARY_ITEM_KEY;
key.offset = 0;
ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
@@ -5062,10 +5063,10 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
ret = 1;
free_extent_map(em);
- btrfs_dev_replace_lock(&fs_info->dev_replace);
+ btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))
ret++;
- btrfs_dev_replace_unlock(&fs_info->dev_replace);
+ btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
return ret;
}
@@ -5325,10 +5326,12 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
if (!bbio_ret)
goto out;
- btrfs_dev_replace_lock(dev_replace);
+ btrfs_dev_replace_lock(dev_replace, 0);
dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace);
if (!dev_replace_is_ongoing)
- btrfs_dev_replace_unlock(dev_replace);
+ btrfs_dev_replace_unlock(dev_replace, 0);
+ else
+ btrfs_dev_replace_set_lock_blocking(dev_replace);
if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 &&
!(rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)) &&
@@ -5751,8 +5754,10 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
bbio->mirror_num = map->num_stripes + 1;
}
out:
- if (dev_replace_is_ongoing)
- btrfs_dev_replace_unlock(dev_replace);
+ if (dev_replace_is_ongoing) {
+ btrfs_dev_replace_clear_lock_blocking(dev_replace);
+ btrfs_dev_replace_unlock(dev_replace, 0);
+ }
free_extent_map(em);
return ret;
}
@@ -6705,8 +6710,8 @@ int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info)
int item_size;
struct btrfs_dev_stats_item *ptr;
- key.objectid = 0;
- key.type = BTRFS_DEV_STATS_KEY;
+ key.objectid = BTRFS_DEV_STATS_OBJECTID;
+ key.type = BTRFS_PERSISTENT_ITEM_KEY;
key.offset = device->devid;
ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0);
if (ret) {
@@ -6753,8 +6758,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,
int ret;
int i;
- key.objectid = 0;
- key.type = BTRFS_DEV_STATS_KEY;
+ key.objectid = BTRFS_DEV_STATS_OBJECTID;
+ key.type = BTRFS_PERSISTENT_ITEM_KEY;
key.offset = device->devid;
path = btrfs_alloc_path();
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 6c68d6356197..f2a20d52b9db 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -249,7 +249,7 @@ int __btrfs_setxattr(struct btrfs_trans_handle *trans,
goto out;
inode_inc_iversion(inode);
- inode->i_ctime = CURRENT_TIME;
+ inode->i_ctime = current_fs_time(inode->i_sb);
set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags);
ret = btrfs_update_inode(trans, root, inode);
BUG_ON(ret);
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 1f107fd51328..655f21f99160 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -575,6 +575,26 @@ void devpts_kill_index(struct inode *ptmx_inode, int idx)
mutex_unlock(&allocated_ptys_lock);
}
+/*
+ * pty code needs to hold extra references in case of last /dev/tty close
+ */
+
+void devpts_add_ref(struct inode *ptmx_inode)
+{
+ struct super_block *sb = pts_sb_from_inode(ptmx_inode);
+
+ atomic_inc(&sb->s_active);
+ ihold(ptmx_inode);
+}
+
+void devpts_del_ref(struct inode *ptmx_inode)
+{
+ struct super_block *sb = pts_sb_from_inode(ptmx_inode);
+
+ iput(ptmx_inode);
+ deactivate_super(sb);
+}
+
/**
* devpts_pty_new -- create a new inode in /dev/pts/
* @ptmx_inode: inode of the master
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index da37beb76f6e..594f7e63b432 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -4491,7 +4491,7 @@ xlog_recover_process(
* know precisely what failed.
*/
if (pass == XLOG_RECOVER_CRCPASS) {
- if (rhead->h_crc && crc != le32_to_cpu(rhead->h_crc))
+ if (rhead->h_crc && crc != rhead->h_crc)
return -EFSBADCRC;
return 0;
}
@@ -4502,7 +4502,7 @@ xlog_recover_process(
* zero CRC check prevents warnings from being emitted when upgrading
* the kernel from one that does not add CRCs by default.
*/
- if (crc != le32_to_cpu(rhead->h_crc)) {
+ if (crc != rhead->h_crc) {
if (rhead->h_crc || xfs_sb_version_hascrc(&log->l_mp->m_sb)) {
xfs_alert(log->l_mp,
"log record CRC mismatch: found 0x%x, expected 0x%x.",