diff options
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r-- | fs/btrfs/inode.c | 659 |
1 files changed, 227 insertions, 432 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d94e3f68b9b1..53ca025655fc 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -42,6 +42,8 @@ #include <linux/blkdev.h> #include <linux/posix_acl_xattr.h> #include <linux/uio.h> +#include <linux/magic.h> +#include <linux/iversion.h> #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -67,7 +69,6 @@ struct btrfs_iget_args { }; struct btrfs_dio_data { - u64 outstanding_extents; u64 reserve; u64 unsubmitted_oe_range_start; u64 unsubmitted_oe_range_end; @@ -316,7 +317,7 @@ static noinline int cow_file_range_inline(struct btrfs_root *root, btrfs_free_path(path); return PTR_ERR(trans); } - trans->block_rsv = &fs_info->delalloc_block_rsv; + trans->block_rsv = &BTRFS_I(inode)->block_rsv; if (compressed_size && compressed_pages) extent_item_size = btrfs_file_extent_calc_inline_size( @@ -348,7 +349,6 @@ static noinline int cow_file_range_inline(struct btrfs_root *root, } set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags); - btrfs_delalloc_release_metadata(BTRFS_I(inode), end + 1 - start); btrfs_drop_extent_cache(BTRFS_I(inode), start, aligned_end - 1, 0); out: /* @@ -379,6 +379,7 @@ struct async_cow { struct page *locked_page; u64 start; u64 end; + unsigned int write_flags; struct list_head extents; struct btrfs_work work; }; @@ -458,7 +459,6 @@ static noinline void compress_file_range(struct inode *inode, { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_root *root = BTRFS_I(inode)->root; - u64 num_bytes; u64 blocksize = fs_info->sectorsize; u64 actual_end; u64 isize = i_size_read(inode); @@ -508,8 +508,6 @@ again: total_compressed = min_t(unsigned long, total_compressed, BTRFS_MAX_UNCOMPRESSED); - num_bytes = ALIGN(end - start + 1, blocksize); - num_bytes = max(blocksize, num_bytes); total_in = 0; ret = 0; @@ -539,10 +537,18 @@ again: * * If the compression fails for any reason, we set the pages * dirty again later on. + * + * Note that the remaining part is redirtied, the start pointer + * has moved, the end is the original one. */ - extent_range_clear_dirty_for_io(inode, start, end); - redirty = 1; - ret = btrfs_compress_pages(compress_type, + if (!redirty) { + extent_range_clear_dirty_for_io(inode, start, end); + redirty = 1; + } + + /* Compression level is applied here and only here */ + ret = btrfs_compress_pages( + compress_type | (fs_info->compress_level << 4), inode->i_mapping, start, pages, &nr_pages, @@ -570,7 +576,7 @@ again: cont: if (start == 0) { /* lets try to make an inline extent */ - if (ret || total_in < (actual_end - start)) { + if (ret || total_in < actual_end) { /* we didn't compress the entire range, try * to make an uncompressed inline extent. */ @@ -584,16 +590,21 @@ cont: } if (ret <= 0) { unsigned long clear_flags = EXTENT_DELALLOC | - EXTENT_DELALLOC_NEW | EXTENT_DEFRAG; + EXTENT_DELALLOC_NEW | EXTENT_DEFRAG | + EXTENT_DO_ACCOUNTING; unsigned long page_error_op; - clear_flags |= (ret < 0) ? EXTENT_DO_ACCOUNTING : 0; page_error_op = ret < 0 ? PAGE_SET_ERROR : 0; /* * inline extent creation worked or returned error, * we don't need to create any more async work items. * Unlock and free up our temp pages. + * + * We use DO_ACCOUNTING here because we need the + * delalloc_release_metadata to be done _after_ we drop + * our outstanding extent for clearing delalloc for this + * range. */ extent_clear_unlock_delalloc(inode, start, end, end, NULL, clear_flags, @@ -602,10 +613,6 @@ cont: PAGE_SET_WRITEBACK | page_error_op | PAGE_END_WRITEBACK); - if (ret == 0) - btrfs_free_reserved_data_space_noquota(inode, - start, - end - start + 1); goto free_pages_out; } } @@ -625,7 +632,6 @@ cont: */ total_in = ALIGN(total_in, PAGE_SIZE); if (total_compressed + blocksize <= total_in) { - num_bytes = total_in; *num_added += 1; /* @@ -633,12 +639,12 @@ cont: * allocation on disk for these compressed pages, and * will submit them to the elevator. */ - add_async_extent(async_cow, start, num_bytes, + add_async_extent(async_cow, start, total_in, total_compressed, pages, nr_pages, compress_type); - if (start + num_bytes < end) { - start += num_bytes; + if (start + total_in < end) { + start += total_in; pages = NULL; cond_resched(); goto again; @@ -765,11 +771,10 @@ retry: * all those pages down to the drive. */ if (!page_started && !ret) - extent_write_locked_range(io_tree, - inode, async_extent->start, + extent_write_locked_range(inode, + async_extent->start, async_extent->start + async_extent->ram_size - 1, - btrfs_get_extent, WB_SYNC_ALL); else if (ret) unlock_page(async_cow->locked_page); @@ -858,7 +863,8 @@ retry: async_extent->ram_size, ins.objectid, ins.offset, async_extent->pages, - async_extent->nr_pages)) { + async_extent->nr_pages, + async_cow->write_flags)) { struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; struct page *p = async_extent->pages[0]; const u64 start = async_extent->start; @@ -982,15 +988,19 @@ static noinline int cow_file_range(struct inode *inode, ret = cow_file_range_inline(root, inode, start, end, 0, BTRFS_COMPRESS_NONE, NULL); if (ret == 0) { + /* + * We use DO_ACCOUNTING here because we need the + * delalloc_release_metadata to be run _after_ we drop + * our outstanding extent for clearing delalloc for this + * range. + */ extent_clear_unlock_delalloc(inode, start, end, delalloc_end, NULL, EXTENT_LOCKED | EXTENT_DELALLOC | - EXTENT_DELALLOC_NEW | - EXTENT_DEFRAG, PAGE_UNLOCK | + EXTENT_DELALLOC_NEW | EXTENT_DEFRAG | + EXTENT_DO_ACCOUNTING, PAGE_UNLOCK | PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK); - btrfs_free_reserved_data_space_noquota(inode, start, - end - start + 1); *nr_written = *nr_written + (end - start + PAGE_SIZE) / PAGE_SIZE; *page_started = 1; @@ -1188,7 +1198,8 @@ static noinline void async_cow_free(struct btrfs_work *work) static int cow_file_range_async(struct inode *inode, struct page *locked_page, u64 start, u64 end, int *page_started, - unsigned long *nr_written) + unsigned long *nr_written, + unsigned int write_flags) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct async_cow *async_cow; @@ -1197,7 +1208,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, u64 cur_end; clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED, - 1, 0, NULL, GFP_NOFS); + 1, 0, NULL); while (start < end) { async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS); BUG_ON(!async_cow); /* -ENOMEM */ @@ -1205,6 +1216,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, async_cow->root = root; async_cow->locked_page = locked_page; async_cow->start = start; + async_cow->write_flags = write_flags; if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS && !btrfs_test_opt(fs_info, FORCE_COMPRESS)) @@ -1226,13 +1238,6 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, btrfs_queue_work(fs_info->delalloc_workers, &async_cow->work); - while (atomic_read(&fs_info->async_submit_draining) && - atomic_read(&fs_info->async_delalloc_pages)) { - wait_event(fs_info->async_submit_wait, - (atomic_read(&fs_info->async_delalloc_pages) == - 0)); - } - *nr_written += nr_pages; start = cur_end + 1; } @@ -1581,11 +1586,13 @@ static inline int need_force_cow(struct inode *inode, u64 start, u64 end) */ static int run_delalloc_range(void *private_data, struct page *locked_page, u64 start, u64 end, int *page_started, - unsigned long *nr_written) + unsigned long *nr_written, + struct writeback_control *wbc) { struct inode *inode = private_data; int ret; int force_cow = need_force_cow(inode, start, end); + unsigned int write_flags = wbc_to_write_flags(wbc); if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW && !force_cow) { ret = run_delalloc_nocow(inode, locked_page, start, end, @@ -1600,7 +1607,8 @@ static int run_delalloc_range(void *private_data, struct page *locked_page, set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, &BTRFS_I(inode)->runtime_flags); ret = cow_file_range_async(inode, locked_page, start, end, - page_started, nr_written); + page_started, nr_written, + write_flags); } if (ret) btrfs_cleanup_ordered_extents(inode, start, end - start + 1); @@ -1635,7 +1643,7 @@ static void btrfs_split_extent_hook(void *private_data, } spin_lock(&BTRFS_I(inode)->lock); - BTRFS_I(inode)->outstanding_extents++; + btrfs_mod_outstanding_extents(BTRFS_I(inode), 1); spin_unlock(&BTRFS_I(inode)->lock); } @@ -1665,7 +1673,7 @@ static void btrfs_merge_extent_hook(void *private_data, /* we're not bigger than the max, unreserve the space and go */ if (new_size <= BTRFS_MAX_EXTENT_SIZE) { spin_lock(&BTRFS_I(inode)->lock); - BTRFS_I(inode)->outstanding_extents--; + btrfs_mod_outstanding_extents(BTRFS_I(inode), -1); spin_unlock(&BTRFS_I(inode)->lock); return; } @@ -1696,7 +1704,7 @@ static void btrfs_merge_extent_hook(void *private_data, return; spin_lock(&BTRFS_I(inode)->lock); - BTRFS_I(inode)->outstanding_extents--; + btrfs_mod_outstanding_extents(BTRFS_I(inode), -1); spin_unlock(&BTRFS_I(inode)->lock); } @@ -1766,15 +1774,12 @@ static void btrfs_set_bit_hook(void *private_data, if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { struct btrfs_root *root = BTRFS_I(inode)->root; u64 len = state->end + 1 - state->start; + u32 num_extents = count_max_extents(len); bool do_list = !btrfs_is_free_space_inode(BTRFS_I(inode)); - if (*bits & EXTENT_FIRST_DELALLOC) { - *bits &= ~EXTENT_FIRST_DELALLOC; - } else { - spin_lock(&BTRFS_I(inode)->lock); - BTRFS_I(inode)->outstanding_extents++; - spin_unlock(&BTRFS_I(inode)->lock); - } + spin_lock(&BTRFS_I(inode)->lock); + btrfs_mod_outstanding_extents(BTRFS_I(inode), num_extents); + spin_unlock(&BTRFS_I(inode)->lock); /* For sanity tests */ if (btrfs_is_testing(fs_info)) @@ -1828,13 +1833,9 @@ static void btrfs_clear_bit_hook(void *private_data, struct btrfs_root *root = inode->root; bool do_list = !btrfs_is_free_space_inode(inode); - if (*bits & EXTENT_FIRST_DELALLOC) { - *bits &= ~EXTENT_FIRST_DELALLOC; - } else if (!(*bits & EXTENT_CLEAR_META_RESV)) { - spin_lock(&inode->lock); - inode->outstanding_extents -= num_extents; - spin_unlock(&inode->lock); - } + spin_lock(&inode->lock); + btrfs_mod_outstanding_extents(inode, -num_extents); + spin_unlock(&inode->lock); /* * We don't reserve metadata space for space cache inodes so we @@ -1955,7 +1956,21 @@ static blk_status_t __btrfs_submit_bio_done(void *private_data, struct bio *bio, /* * extent_io.c submission hook. This does the right thing for csum calculation - * on write, or reading the csums from the tree before a read + * on write, or reading the csums from the tree before a read. + * + * Rules about async/sync submit, + * a) read: sync submit + * + * b) write without checksum: sync submit + * + * c) write with checksum: + * c-1) if bio is issued by fsync: sync submit + * (sync_writers != 0) + * + * c-2) if root is reloc root: sync submit + * (only in case of buffered IO) + * + * c-3) otherwise: async submit */ static blk_status_t btrfs_submit_bio_hook(void *private_data, struct bio *bio, int mirror_num, unsigned long bio_flags, @@ -2027,20 +2042,21 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans, struct btrfs_ordered_sum *sum; list_for_each_entry(sum, list, list) { - trans->adding_csums = 1; + trans->adding_csums = true; btrfs_csum_file_blocks(trans, BTRFS_I(inode)->root->fs_info->csum_root, sum); - trans->adding_csums = 0; + trans->adding_csums = false; } return 0; } int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, + unsigned int extra_bits, struct extent_state **cached_state, int dedupe) { WARN_ON((end & (PAGE_SIZE - 1)) == 0); return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end, - cached_state); + extra_bits, cached_state); } /* see btrfs_writepage_start_hook for details on why this is required */ @@ -2085,7 +2101,7 @@ again: PAGE_SIZE); if (ordered) { unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, - page_end, &cached_state, GFP_NOFS); + page_end, &cached_state); unlock_page(page); btrfs_start_ordered_extent(inode, ordered, 1); btrfs_put_ordered_extent(ordered); @@ -2101,13 +2117,21 @@ again: goto out; } - btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state, - 0); + ret = btrfs_set_extent_delalloc(inode, page_start, page_end, 0, + &cached_state, 0); + if (ret) { + mapping_set_error(page->mapping, ret); + end_extent_writepage(page, ret, page_start, page_end); + ClearPageChecked(page); + goto out; + } + ClearPageChecked(page); set_page_dirty(page); + btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE); out: unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end, - &cached_state, GFP_NOFS); + &cached_state); out_page: unlock_page(page); put_page(page); @@ -2229,8 +2253,9 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, if (ret < 0) goto out; qg_released = ret; - ret = btrfs_alloc_reserved_file_extent(trans, root->root_key.objectid, - btrfs_ino(BTRFS_I(inode)), file_pos, qg_released, &ins); + ret = btrfs_alloc_reserved_file_extent(trans, root, + btrfs_ino(BTRFS_I(inode)), + file_pos, qg_released, &ins); out: btrfs_free_path(path); @@ -2464,7 +2489,7 @@ static noinline bool record_extent_backrefs(struct btrfs_path *path, ret = iterate_inodes_from_logical(old->bytenr + old->extent_offset, fs_info, path, record_one_backref, - old); + old, false); if (ret < 0 && ret != -ENOENT) return false; @@ -2682,7 +2707,7 @@ again: inode_add_bytes(inode, len); btrfs_release_path(path); - ret = btrfs_inc_extent_ref(trans, fs_info, new->bytenr, + ret = btrfs_inc_extent_ref(trans, root, new->bytenr, new->disk_len, 0, backref->root_id, backref->inum, new->file_pos); /* start - extent_offset */ @@ -2698,7 +2723,7 @@ out_free_path: btrfs_end_transaction(trans); out_unlock: unlock_extent_cached(&BTRFS_I(inode)->io_tree, lock_start, lock_end, - &cached, GFP_NOFS); + &cached); iput(inode); return ret; } @@ -2964,7 +2989,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) trans = NULL; goto out; } - trans->block_rsv = &fs_info->delalloc_block_rsv; + trans->block_rsv = &BTRFS_I(inode)->block_rsv; ret = btrfs_update_inode_fallback(trans, root, inode); if (ret) /* -ENOMEM or corruption */ btrfs_abort_transaction(trans, ret); @@ -2987,7 +3012,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) clear_extent_bit(io_tree, ordered_extent->file_offset, ordered_extent->file_offset + ordered_extent->len - 1, - EXTENT_DEFRAG, 0, 0, &cached_state, GFP_NOFS); + EXTENT_DEFRAG, 0, 0, &cached_state); } if (nolock) @@ -3000,12 +3025,14 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) goto out; } - trans->block_rsv = &fs_info->delalloc_block_rsv; + trans->block_rsv = &BTRFS_I(inode)->block_rsv; if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) compress_type = ordered_extent->compress_type; if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { BUG_ON(compress_type); + btrfs_qgroup_free_data(inode, NULL, ordered_extent->file_offset, + ordered_extent->len); ret = btrfs_mark_extent_written(trans, BTRFS_I(inode), ordered_extent->file_offset, ordered_extent->file_offset + @@ -3055,12 +3082,9 @@ out: ordered_extent->len - 1, clear_bits, (clear_bits & EXTENT_LOCKED) ? 1 : 0, - 0, &cached_state, GFP_NOFS); + 0, &cached_state); } - if (root != fs_info->tree_root) - btrfs_delalloc_release_metadata(BTRFS_I(inode), - ordered_extent->len); if (trans) btrfs_end_transaction(trans); @@ -3072,7 +3096,7 @@ out: else start = ordered_extent->file_offset; end = ordered_extent->file_offset + ordered_extent->len - 1; - clear_extent_uptodate(io_tree, start, end, NULL, GFP_NOFS); + clear_extent_uptodate(io_tree, start, end, NULL); /* Drop the cache for the part of the extent we didn't write. */ btrfs_drop_extent_cache(BTRFS_I(inode), start, end, 0); @@ -3779,7 +3803,8 @@ static int btrfs_read_locked_inode(struct inode *inode) BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item); BTRFS_I(inode)->last_trans = btrfs_inode_transid(leaf, inode_item); - inode->i_version = btrfs_inode_sequence(leaf, inode_item); + inode_set_iversion_queried(inode, + btrfs_inode_sequence(leaf, inode_item)); inode->i_generation = BTRFS_I(inode)->generation; inode->i_rdev = 0; rdev = btrfs_inode_rdev(leaf, inode_item); @@ -3947,7 +3972,8 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, &token); btrfs_set_token_inode_generation(leaf, item, BTRFS_I(inode)->generation, &token); - btrfs_set_token_inode_sequence(leaf, item, inode->i_version, &token); + btrfs_set_token_inode_sequence(leaf, item, inode_peek_iversion(inode), + &token); btrfs_set_token_inode_transid(leaf, item, trans->transid, &token); btrfs_set_token_inode_rdev(leaf, item, inode->i_rdev, &token); btrfs_set_token_inode_flags(leaf, item, BTRFS_I(inode)->flags, &token); @@ -4372,47 +4398,11 @@ static int truncate_space_check(struct btrfs_trans_handle *trans, } -static int truncate_inline_extent(struct inode *inode, - struct btrfs_path *path, - struct btrfs_key *found_key, - const u64 item_end, - const u64 new_size) -{ - struct extent_buffer *leaf = path->nodes[0]; - int slot = path->slots[0]; - struct btrfs_file_extent_item *fi; - u32 size = (u32)(new_size - found_key->offset); - struct btrfs_root *root = BTRFS_I(inode)->root; - - fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); - - if (btrfs_file_extent_compression(leaf, fi) != BTRFS_COMPRESS_NONE) { - loff_t offset = new_size; - loff_t page_end = ALIGN(offset, PAGE_SIZE); - - /* - * Zero out the remaining of the last page of our inline extent, - * instead of directly truncating our inline extent here - that - * would be much more complex (decompressing all the data, then - * compressing the truncated data, which might be bigger than - * the size of the inline extent, resize the extent, etc). - * We release the path because to get the page we might need to - * read the extent item from disk (data not in the page cache). - */ - btrfs_release_path(path); - return btrfs_truncate_block(inode, offset, page_end - offset, - 0); - } - - btrfs_set_file_extent_ram_bytes(leaf, fi, size); - size = btrfs_file_extent_calc_inline_size(size); - btrfs_truncate_item(root->fs_info, path, size, 1); - - if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) - inode_sub_bytes(inode, item_end + 1 - new_size); - - return 0; -} +/* + * Return this if we need to call truncate_block for the last bit of the + * truncate. + */ +#define NEED_TRUNCATE_BLOCK 1 /* * this can truncate away extent items, csum items and directory items. @@ -4451,9 +4441,9 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, int err = 0; u64 ino = btrfs_ino(BTRFS_I(inode)); u64 bytes_deleted = 0; - bool be_nice = 0; - bool should_throttle = 0; - bool should_end = 0; + bool be_nice = false; + bool should_throttle = false; + bool should_end = false; BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY); @@ -4463,7 +4453,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, */ if (!btrfs_is_free_space_inode(BTRFS_I(inode)) && test_bit(BTRFS_ROOT_REF_COWS, &root->state)) - be_nice = 1; + be_nice = true; path = btrfs_alloc_path(); if (!path) @@ -4573,11 +4563,6 @@ search_again: if (found_type != BTRFS_EXTENT_DATA_KEY) goto delete; - if (del_item) - last_size = found_key.offset; - else - last_size = new_size; - if (extent_type != BTRFS_FILE_EXTENT_INLINE) { u64 num_dec; extent_start = btrfs_file_extent_disk_bytenr(leaf, fi); @@ -4619,40 +4604,30 @@ search_again: */ if (!del_item && btrfs_file_extent_encryption(leaf, fi) == 0 && - btrfs_file_extent_other_encoding(leaf, fi) == 0) { - + btrfs_file_extent_other_encoding(leaf, fi) == 0 && + btrfs_file_extent_compression(leaf, fi) == 0) { + u32 size = (u32)(new_size - found_key.offset); + + btrfs_set_file_extent_ram_bytes(leaf, fi, size); + size = btrfs_file_extent_calc_inline_size(size); + btrfs_truncate_item(root->fs_info, path, size, 1); + } else if (!del_item) { /* - * Need to release path in order to truncate a - * compressed extent. So delete any accumulated - * extent items so far. + * We have to bail so the last_size is set to + * just before this extent. */ - if (btrfs_file_extent_compression(leaf, fi) != - BTRFS_COMPRESS_NONE && pending_del_nr) { - err = btrfs_del_items(trans, root, path, - pending_del_slot, - pending_del_nr); - if (err) { - btrfs_abort_transaction(trans, - err); - goto error; - } - pending_del_nr = 0; - } + err = NEED_TRUNCATE_BLOCK; + break; + } - err = truncate_inline_extent(inode, path, - &found_key, - item_end, - new_size); - if (err) { - btrfs_abort_transaction(trans, err); - goto error; - } - } else if (test_bit(BTRFS_ROOT_REF_COWS, - &root->state)) { + if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) inode_sub_bytes(inode, item_end + 1 - new_size); - } } delete: + if (del_item) + last_size = found_key.offset; + else + last_size = new_size; if (del_item) { if (!pending_del_nr) { /* no pending yet, add ourselves */ @@ -4669,14 +4644,14 @@ delete: } else { break; } - should_throttle = 0; + should_throttle = false; if (found_extent && (test_bit(BTRFS_ROOT_REF_COWS, &root->state) || root == fs_info->tree_root)) { btrfs_set_path_blocking(path); bytes_deleted += extent_num_bytes; - ret = btrfs_free_extent(trans, fs_info, extent_start, + ret = btrfs_free_extent(trans, root, extent_start, extent_num_bytes, 0, btrfs_header_owner(leaf), ino, extent_offset); @@ -4688,11 +4663,11 @@ delete: if (be_nice) { if (truncate_space_check(trans, root, extent_num_bytes)) { - should_end = 1; + should_end = true; } if (btrfs_should_throttle_delayed_refs(trans, fs_info)) - should_throttle = 1; + should_throttle = true; } } @@ -4797,12 +4772,15 @@ int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len, u64 block_start; u64 block_end; - if ((offset & (blocksize - 1)) == 0 && - (!len || ((len & (blocksize - 1)) == 0))) + if (IS_ALIGNED(offset, blocksize) && + (!len || IS_ALIGNED(len, blocksize))) goto out; + block_start = round_down(from, blocksize); + block_end = block_start + blocksize - 1; + ret = btrfs_delalloc_reserve_space(inode, &data_reserved, - round_down(from, blocksize), blocksize); + block_start, blocksize); if (ret) goto out; @@ -4810,15 +4788,12 @@ again: page = find_or_create_page(mapping, index, mask); if (!page) { btrfs_delalloc_release_space(inode, data_reserved, - round_down(from, blocksize), - blocksize); + block_start, blocksize); + btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize); ret = -ENOMEM; goto out; } - block_start = round_down(from, blocksize); - block_end = block_start + blocksize - 1; - if (!PageUptodate(page)) { ret = btrfs_readpage(NULL, page); lock_page(page); @@ -4840,7 +4815,7 @@ again: ordered = btrfs_lookup_ordered_extent(inode, block_start); if (ordered) { unlock_extent_cached(io_tree, block_start, block_end, - &cached_state, GFP_NOFS); + &cached_state); unlock_page(page); put_page(page); btrfs_start_ordered_extent(inode, ordered, 1); @@ -4851,13 +4826,13 @@ again: clear_extent_bit(&BTRFS_I(inode)->io_tree, block_start, block_end, EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, - 0, 0, &cached_state, GFP_NOFS); + 0, 0, &cached_state); - ret = btrfs_set_extent_delalloc(inode, block_start, block_end, + ret = btrfs_set_extent_delalloc(inode, block_start, block_end, 0, &cached_state, 0); if (ret) { unlock_extent_cached(io_tree, block_start, block_end, - &cached_state, GFP_NOFS); + &cached_state); goto out_unlock; } @@ -4876,13 +4851,13 @@ again: } ClearPageChecked(page); set_page_dirty(page); - unlock_extent_cached(io_tree, block_start, block_end, &cached_state, - GFP_NOFS); + unlock_extent_cached(io_tree, block_start, block_end, &cached_state); out_unlock: if (ret) btrfs_delalloc_release_space(inode, data_reserved, block_start, blocksize); + btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize); unlock_page(page); put_page(page); out: @@ -4977,7 +4952,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) if (!ordered) break; unlock_extent_cached(io_tree, hole_start, block_end - 1, - &cached_state, GFP_NOFS); + &cached_state); btrfs_start_ordered_extent(inode, ordered, 1); btrfs_put_ordered_extent(ordered); } @@ -5042,8 +5017,7 @@ next: break; } free_extent_map(em); - unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state, - GFP_NOFS); + unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state); return err; } @@ -5286,8 +5260,7 @@ static void evict_inode_truncate_pages(struct inode *inode) clear_extent_bit(io_tree, start, end, EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | - EXTENT_DEFRAG, 1, 1, - &cached_state, GFP_NOFS); + EXTENT_DEFRAG, 1, 1, &cached_state); cond_resched(); spin_lock(&io_tree->lock); @@ -5500,6 +5473,14 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, goto out_err; btrfs_dir_item_key_to_cpu(path->nodes[0], di, location); + if (location->type != BTRFS_INODE_ITEM_KEY && + location->type != BTRFS_ROOT_ITEM_KEY) { + btrfs_warn(root->fs_info, +"%s gets something invalid in DIR_ITEM (name %s, directory ino %llu, location(%llu %u %llu))", + __func__, name, btrfs_ino(BTRFS_I(dir)), + location->objectid, location->type, location->offset); + goto out_err; + } out: btrfs_free_path(path); return ret; @@ -5816,8 +5797,6 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) return inode; } - BUG_ON(location.type != BTRFS_ROOT_ITEM_KEY); - index = srcu_read_lock(&fs_info->subvol_srcu); ret = fixup_tree_root_location(fs_info, dir, dentry, &location, &sub_root); @@ -5940,7 +5919,6 @@ static int btrfs_filldir(void *addr, int entries, struct dir_context *ctx) static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_file_private *private = file->private_data; struct btrfs_dir_item *di; @@ -6008,9 +5986,6 @@ again: if (btrfs_should_delete_dir_index(&del_list, found_key.offset)) goto next; di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); - if (verify_dir_item(fs_info, leaf, slot, di)) - goto next; - name_len = btrfs_dir_name_len(leaf, di); if ((total_len + sizeof(struct dir_entry) + name_len) >= PAGE_SIZE) { @@ -6150,19 +6125,20 @@ static int btrfs_update_time(struct inode *inode, struct timespec *now, int flags) { struct btrfs_root *root = BTRFS_I(inode)->root; + bool dirty = flags & ~S_VERSION; if (btrfs_root_readonly(root)) return -EROFS; if (flags & S_VERSION) - inode_inc_iversion(inode); + dirty |= inode_maybe_inc_iversion(inode, dirty); if (flags & S_CTIME) inode->i_ctime = *now; if (flags & S_MTIME) inode->i_mtime = *now; if (flags & S_ATIME) inode->i_atime = *now; - return btrfs_dirty_inode(inode); + return dirty ? btrfs_dirty_inode(inode) : 0; } /* @@ -6343,7 +6319,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, } /* * index_cnt is ignored for everything but a dir, - * btrfs_get_inode_index_count has an explanation for the magic + * btrfs_set_inode_index_count has an explanation for the magic * number */ BTRFS_I(inode)->index_cnt = 2; @@ -6606,7 +6582,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, out_unlock: btrfs_end_transaction(trans); - btrfs_balance_delayed_items(fs_info); btrfs_btree_balance_dirty(fs_info); if (drop_inode) { inode_dec_link_count(inode); @@ -6687,7 +6662,6 @@ out_unlock: inode_dec_link_count(inode); iput(inode); } - btrfs_balance_delayed_items(fs_info); btrfs_btree_balance_dirty(fs_info); return err; @@ -6762,7 +6736,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, btrfs_log_new_name(trans, BTRFS_I(inode), NULL, parent); } - btrfs_balance_delayed_items(fs_info); fail: if (trans) btrfs_end_transaction(trans); @@ -6840,7 +6813,6 @@ out_fail: inode_dec_link_count(inode); iput(inode); } - btrfs_balance_delayed_items(fs_info); btrfs_btree_balance_dirty(fs_info); return err; @@ -6849,68 +6821,6 @@ out_fail_inode: goto out_fail; } -/* Find next extent map of a given extent map, caller needs to ensure locks */ -static struct extent_map *next_extent_map(struct extent_map *em) -{ - struct rb_node *next; - - next = rb_next(&em->rb_node); - if (!next) - return NULL; - return container_of(next, struct extent_map, rb_node); -} - -static struct extent_map *prev_extent_map(struct extent_map *em) -{ - struct rb_node *prev; - - prev = rb_prev(&em->rb_node); - if (!prev) - return NULL; - return container_of(prev, struct extent_map, rb_node); -} - -/* helper for btfs_get_extent. Given an existing extent in the tree, - * the existing extent is the nearest extent to map_start, - * and an extent that you want to insert, deal with overlap and insert - * the best fitted new extent into the tree. - */ -static int merge_extent_mapping(struct extent_map_tree *em_tree, - struct extent_map *existing, - struct extent_map *em, - u64 map_start) -{ - struct extent_map *prev; - struct extent_map *next; - u64 start; - u64 end; - u64 start_diff; - - BUG_ON(map_start < em->start || map_start >= extent_map_end(em)); - - if (existing->start > map_start) { - next = existing; - prev = prev_extent_map(next); - } else { - prev = existing; - next = next_extent_map(prev); - } - - start = prev ? extent_map_end(prev) : em->start; - start = max_t(u64, start, em->start); - end = next ? next->start : extent_map_end(em); - end = min_t(u64, end, extent_map_end(em)); - start_diff = start - em->start; - em->start = start; - em->len = end - start; - if (em->block_start < EXTENT_MAP_LAST_BYTE && - !test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { - em->block_start += start_diff; - em->block_len -= start_diff; - } - return add_extent_mapping(em_tree, em, 0); -} - static noinline int uncompress_inline(struct btrfs_path *path, struct page *page, size_t pg_offset, u64 extent_offset, @@ -6985,10 +6895,8 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode, struct extent_map *em = NULL; struct extent_map_tree *em_tree = &inode->extent_tree; struct extent_io_tree *io_tree = &inode->io_tree; - struct btrfs_trans_handle *trans = NULL; const bool new_inline = !page || create; -again: read_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, start, len); if (em) @@ -7027,8 +6935,7 @@ again: path->reada = READA_FORWARD; } - ret = btrfs_lookup_file_extent(trans, root, path, - objectid, start, trans != NULL); + ret = btrfs_lookup_file_extent(NULL, root, path, objectid, start, 0); if (ret < 0) { err = ret; goto out; @@ -7129,7 +7036,7 @@ next: em->orig_block_len = em->len; em->orig_start = em->start; ptr = btrfs_file_extent_inline_start(item) + extent_offset; - if (create == 0 && !PageUptodate(page)) { + if (!PageUptodate(page)) { if (btrfs_file_extent_compression(leaf, item) != BTRFS_COMPRESS_NONE) { ret = uncompress_inline(path, page, pg_offset, @@ -7150,25 +7057,6 @@ next: kunmap(page); } flush_dcache_page(page); - } else if (create && PageUptodate(page)) { - BUG(); - if (!trans) { - kunmap(page); - free_extent_map(em); - em = NULL; - - btrfs_release_path(path); - trans = btrfs_join_transaction(root); - - if (IS_ERR(trans)) - return ERR_CAST(trans); - goto again; - } - map = kmap(page); - write_extent_buffer(leaf, map + pg_offset, ptr, - copy_size); - kunmap(page); - btrfs_mark_buffer_dirty(leaf); } set_extent_uptodate(io_tree, em->start, extent_map_end(em) - 1, NULL, GFP_NOFS); @@ -7180,7 +7068,6 @@ not_found: em->len = len; not_found_em: em->block_start = EXTENT_MAP_HOLE; - set_bit(EXTENT_FLAG_VACANCY, &em->flags); insert: btrfs_release_path(path); if (em->start > start || extent_map_end(em) <= start) { @@ -7193,62 +7080,13 @@ insert: err = 0; write_lock(&em_tree->lock); - ret = add_extent_mapping(em_tree, em, 0); - /* it is possible that someone inserted the extent into the tree - * while we had the lock dropped. It is also possible that - * an overlapping map exists in the tree - */ - if (ret == -EEXIST) { - struct extent_map *existing; - - ret = 0; - - existing = search_extent_mapping(em_tree, start, len); - /* - * existing will always be non-NULL, since there must be - * extent causing the -EEXIST. - */ - if (existing->start == em->start && - extent_map_end(existing) >= extent_map_end(em) && - em->block_start == existing->block_start) { - /* - * The existing extent map already encompasses the - * entire extent map we tried to add. - */ - free_extent_map(em); - em = existing; - err = 0; - - } else if (start >= extent_map_end(existing) || - start <= existing->start) { - /* - * The existing extent map is the one nearest to - * the [start, start + len) range which overlaps - */ - err = merge_extent_mapping(em_tree, existing, - em, start); - free_extent_map(existing); - if (err) { - free_extent_map(em); - em = NULL; - } - } else { - free_extent_map(em); - em = existing; - err = 0; - } - } + err = btrfs_add_extent_mapping(em_tree, &em, start, len); write_unlock(&em_tree->lock); out: trace_btrfs_get_extent(root, inode, em); btrfs_free_path(path); - if (trans) { - ret = btrfs_end_transaction(trans); - if (!err) - err = ret; - } if (err) { free_extent_map(em); return ERR_PTR(err); @@ -7370,7 +7208,7 @@ struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode, em->block_start = EXTENT_MAP_DELALLOC; em->block_len = found; } - } else if (hole_em) { + } else { return hole_em; } out: @@ -7687,7 +7525,7 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend, break; unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, - cached_state, GFP_NOFS); + cached_state); if (ordered) { /* @@ -7797,33 +7635,6 @@ static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len, return em; } -static void adjust_dio_outstanding_extents(struct inode *inode, - struct btrfs_dio_data *dio_data, - const u64 len) -{ - unsigned num_extents = count_max_extents(len); - - /* - * If we have an outstanding_extents count still set then we're - * within our reservation, otherwise we need to adjust our inode - * counter appropriately. - */ - if (dio_data->outstanding_extents >= num_extents) { - dio_data->outstanding_extents -= num_extents; - } else { - /* - * If dio write length has been split due to no large enough - * contiguous space, we need to compensate our inode counter - * appropriately. - */ - u64 num_needed = num_extents - dio_data->outstanding_extents; - - spin_lock(&BTRFS_I(inode)->lock); - BTRFS_I(inode)->outstanding_extents += num_needed; - spin_unlock(&BTRFS_I(inode)->lock); - } -} - static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { @@ -7985,7 +7796,6 @@ unlock: if (!dio_data->overwrite && start + len > i_size_read(inode)) i_size_write(inode, start + len); - adjust_dio_outstanding_extents(inode, dio_data, len); WARN_ON(dio_data->reserve < len); dio_data->reserve -= len; dio_data->unsubmitted_oe_range_end = start + len; @@ -8000,7 +7810,7 @@ unlock: if (lockstart < lockend) { clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, unlock_bits, 1, 0, - &cached_state, GFP_NOFS); + &cached_state); } else { free_extent_state(cached_state); } @@ -8011,18 +7821,10 @@ unlock: unlock_err: clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, - unlock_bits, 1, 0, &cached_state, GFP_NOFS); + unlock_bits, 1, 0, &cached_state); err: if (dio_data) current->journal_info = dio_data; - /* - * Compensate the delalloc release we do in btrfs_direct_IO() when we - * write less data then expected, so that we don't underflow our inode's - * outstanding extents counter. - */ - if (create && dio_data) - adjust_dio_outstanding_extents(inode, dio_data, len); - return ret; } @@ -8035,15 +7837,12 @@ static inline blk_status_t submit_dio_repair_bio(struct inode *inode, BUG_ON(bio_op(bio) == REQ_OP_WRITE); - bio_get(bio); - ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DIO_REPAIR); if (ret) - goto err; + return ret; ret = btrfs_map_bio(fs_info, bio, mirror_num, 0); -err: - bio_put(bio); + return ret; } @@ -8097,6 +7896,7 @@ static blk_status_t dio_read_error(struct inode *inode, struct bio *failed_bio, int segs; int ret; blk_status_t status; + struct bio_vec bvec; BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE); @@ -8112,8 +7912,9 @@ static blk_status_t dio_read_error(struct inode *inode, struct bio *failed_bio, } segs = bio_segments(failed_bio); + bio_get_first_bvec(failed_bio, &bvec); if (segs > 1 || - (failed_bio->bi_io_vec->bv_len > btrfs_inode_sectorsize(inode))) + (bvec.bv_len > btrfs_inode_sectorsize(inode))) read_mode |= REQ_FAILFAST_DEV; isector = start - btrfs_io_bio(failed_bio)->logical; @@ -8156,7 +7957,7 @@ static void btrfs_retry_endio_nocsum(struct bio *bio) ASSERT(bio->bi_vcnt == 1); io_tree = &BTRFS_I(inode)->io_tree; failure_tree = &BTRFS_I(inode)->io_failure_tree; - ASSERT(bio->bi_io_vec->bv_len == btrfs_inode_sectorsize(inode)); + ASSERT(bio_first_bvec_all(bio)->bv_len == btrfs_inode_sectorsize(inode)); done->uptodate = 1; ASSERT(!bio_flagged(bio, BIO_CLONED)); @@ -8246,7 +8047,7 @@ static void btrfs_retry_endio(struct bio *bio) uptodate = 1; ASSERT(bio->bi_vcnt == 1); - ASSERT(bio->bi_io_vec->bv_len == btrfs_inode_sectorsize(done->inode)); + ASSERT(bio_first_bvec_all(bio)->bv_len == btrfs_inode_sectorsize(done->inode)); io_tree = &BTRFS_I(inode)->io_tree; failure_tree = &BTRFS_I(inode)->io_failure_tree; @@ -8495,7 +8296,7 @@ static void btrfs_end_dio_bio(struct bio *bio) if (dip->errors) { bio_io_error(dip->orig_bio); } else { - dip->dio_bio->bi_status = 0; + dip->dio_bio->bi_status = BLK_STS_OK; bio_endio(dip->orig_bio); } out: @@ -8542,11 +8343,10 @@ __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, u64 file_offset, bool write = bio_op(bio) == REQ_OP_WRITE; blk_status_t ret; + /* Check btrfs_submit_bio_hook() for rules about async submit. */ if (async_submit) async_submit = !atomic_read(&BTRFS_I(inode)->sync_writers); - bio_get(bio); - if (!write) { ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA); if (ret) @@ -8577,9 +8377,8 @@ __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, u64 file_offset, goto err; } map: - ret = btrfs_map_bio(fs_info, bio, 0, async_submit); + ret = btrfs_map_bio(fs_info, bio, 0, 0); err: - bio_put(bio); return ret; } @@ -8786,7 +8585,6 @@ free_ordered: } static ssize_t check_direct_IO(struct btrfs_fs_info *fs_info, - struct kiocb *iocb, const struct iov_iter *iter, loff_t offset) { int seg; @@ -8833,7 +8631,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) bool relock = false; ssize_t ret; - if (check_direct_IO(fs_info, iocb, iter, offset)) + if (check_direct_IO(fs_info, iter, offset)) return 0; inode_dio_begin(inode); @@ -8868,7 +8666,6 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) offset, count); if (ret) goto out; - dio_data.outstanding_extents = count_max_extents(count); /* * We need to know how many extents we reserved so that we can @@ -8915,6 +8712,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) } else if (ret >= 0 && (size_t)ret < count) btrfs_delalloc_release_space(inode, data_reserved, offset, count - (size_t)ret); + btrfs_delalloc_release_extents(BTRFS_I(inode), count); } out: if (wakeup) @@ -8937,7 +8735,7 @@ static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, if (ret) return ret; - return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent_fiemap); + return extent_fiemap(inode, fieinfo, start, len); } int btrfs_readpage(struct file *file, struct page *page) @@ -8949,7 +8747,6 @@ int btrfs_readpage(struct file *file, struct page *page) static int btrfs_writepage(struct page *page, struct writeback_control *wbc) { - struct extent_io_tree *tree; struct inode *inode = page->mapping->host; int ret; @@ -8968,8 +8765,7 @@ static int btrfs_writepage(struct page *page, struct writeback_control *wbc) redirty_page_for_writepage(wbc, page); return AOP_WRITEPAGE_ACTIVATE; } - tree = &BTRFS_I(page->mapping->host)->io_tree; - ret = extent_write_full_page(tree, page, btrfs_get_extent, wbc); + ret = extent_write_full_page(page, wbc); btrfs_add_delayed_iput(inode); return ret; } @@ -8980,7 +8776,7 @@ static int btrfs_writepages(struct address_space *mapping, struct extent_io_tree *tree; tree = &BTRFS_I(mapping->host)->io_tree; - return extent_writepages(tree, mapping, btrfs_get_extent, wbc); + return extent_writepages(tree, mapping, wbc); } static int @@ -8989,8 +8785,7 @@ btrfs_readpages(struct file *file, struct address_space *mapping, { struct extent_io_tree *tree; tree = &BTRFS_I(mapping->host)->io_tree; - return extent_readpages(tree, mapping, pages, nr_pages, - btrfs_get_extent); + return extent_readpages(tree, mapping, pages, nr_pages); } static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags) { @@ -9061,8 +8856,7 @@ again: EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DELALLOC_NEW | EXTENT_LOCKED | EXTENT_DO_ACCOUNTING | - EXTENT_DEFRAG, 1, 0, &cached_state, - GFP_NOFS); + EXTENT_DEFRAG, 1, 0, &cached_state); /* * whoever cleared the private bit is responsible * for the finish_ordered_io @@ -9119,7 +8913,7 @@ again: EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DELALLOC_NEW | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1, 1, - &cached_state, GFP_NOFS); + &cached_state); __btrfs_releasepage(page, GFP_NOFS); } @@ -9220,7 +9014,7 @@ again: PAGE_SIZE); if (ordered) { unlock_extent_cached(io_tree, page_start, page_end, - &cached_state, GFP_NOFS); + &cached_state); unlock_page(page); btrfs_start_ordered_extent(inode, ordered, 1); btrfs_put_ordered_extent(ordered); @@ -9232,9 +9026,6 @@ again: fs_info->sectorsize); if (reserved_space < PAGE_SIZE) { end = page_start + reserved_space - 1; - spin_lock(&BTRFS_I(inode)->lock); - BTRFS_I(inode)->outstanding_extents++; - spin_unlock(&BTRFS_I(inode)->lock); btrfs_delalloc_release_space(inode, data_reserved, page_start, PAGE_SIZE - reserved_space); } @@ -9250,13 +9041,13 @@ again: clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, end, EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, - 0, 0, &cached_state, GFP_NOFS); + 0, 0, &cached_state); - ret = btrfs_set_extent_delalloc(inode, page_start, end, + ret = btrfs_set_extent_delalloc(inode, page_start, end, 0, &cached_state, 0); if (ret) { unlock_extent_cached(io_tree, page_start, page_end, - &cached_state, GFP_NOFS); + &cached_state); ret = VM_FAULT_SIGBUS; goto out_unlock; } @@ -9282,16 +9073,18 @@ again: BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid; BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->root->last_log_commit; - unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS); + unlock_extent_cached(io_tree, page_start, page_end, &cached_state); out_unlock: if (!ret) { + btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE); sb_end_pagefault(inode->i_sb); extent_changeset_free(data_reserved); return VM_FAULT_LOCKED; } unlock_page(page); out: + btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE); btrfs_delalloc_release_space(inode, data_reserved, page_start, reserved_space); out_noreserve: @@ -9387,12 +9180,12 @@ static int btrfs_truncate(struct inode *inode) ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size, BTRFS_EXTENT_DATA_KEY); + trans->block_rsv = &fs_info->trans_block_rsv; if (ret != -ENOSPC && ret != -EAGAIN) { err = ret; break; } - trans->block_rsv = &fs_info->trans_block_rsv; ret = btrfs_update_inode(trans, root, inode); if (ret) { err = ret; @@ -9416,6 +9209,27 @@ static int btrfs_truncate(struct inode *inode) trans->block_rsv = rsv; } + /* + * We can't call btrfs_truncate_block inside a trans handle as we could + * deadlock with freeze, if we got NEED_TRUNCATE_BLOCK then we know + * we've truncated everything except the last little bit, and can do + * btrfs_truncate_block and then update the disk_i_size. + */ + if (ret == NEED_TRUNCATE_BLOCK) { + btrfs_end_transaction(trans); + btrfs_btree_balance_dirty(fs_info); + + ret = btrfs_truncate_block(inode, inode->i_size, 0, 0); + if (ret) + goto out; + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto out; + } + btrfs_ordered_update_i_size(inode, inode->i_size, NULL); + } + if (ret == 0 && inode->i_nlink > 0) { trans->block_rsv = root->orphan_block_rsv; ret = btrfs_orphan_del(trans, BTRFS_I(inode)); @@ -9480,10 +9294,11 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, struct inode *btrfs_alloc_inode(struct super_block *sb) { + struct btrfs_fs_info *fs_info = btrfs_sb(sb); struct btrfs_inode *ei; struct inode *inode; - ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS); + ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_KERNEL); if (!ei) return NULL; @@ -9506,8 +9321,9 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) spin_lock_init(&ei->lock); ei->outstanding_extents = 0; - ei->reserved_extents = 0; - + if (sb->s_magic != BTRFS_TEST_MAGIC) + btrfs_init_metadata_block_rsv(fs_info, &ei->block_rsv, + BTRFS_BLOCK_RSV_DELALLOC); ei->runtime_flags = 0; ei->prop_compress = BTRFS_COMPRESS_NONE; ei->defrag_compress = BTRFS_COMPRESS_NONE; @@ -9557,8 +9373,9 @@ void btrfs_destroy_inode(struct inode *inode) WARN_ON(!hlist_empty(&inode->i_dentry)); WARN_ON(inode->i_data.nrpages); + WARN_ON(BTRFS_I(inode)->block_rsv.reserved); + WARN_ON(BTRFS_I(inode)->block_rsv.size); WARN_ON(BTRFS_I(inode)->outstanding_extents); - WARN_ON(BTRFS_I(inode)->reserved_extents); WARN_ON(BTRFS_I(inode)->delalloc_bytes); WARN_ON(BTRFS_I(inode)->new_delalloc_bytes); WARN_ON(BTRFS_I(inode)->csum_bytes); @@ -9633,7 +9450,7 @@ void btrfs_destroy_cachep(void) kmem_cache_destroy(btrfs_free_space_cachep); } -int btrfs_init_cachep(void) +int __init btrfs_init_cachep(void) { btrfs_inode_cachep = kmem_cache_create("btrfs_inode", sizeof(struct btrfs_inode), 0, @@ -10337,19 +10154,6 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) ret = __start_delalloc_inodes(root, delay_iput, -1); if (ret > 0) ret = 0; - /* - * the filemap_flush will queue IO into the worker threads, but - * we have to make sure the IO is actually started and that - * ordered extents get created before we return - */ - atomic_inc(&fs_info->async_submit_draining); - while (atomic_read(&fs_info->nr_async_submits) || - atomic_read(&fs_info->async_delalloc_pages)) { - wait_event(fs_info->async_submit_wait, - (atomic_read(&fs_info->nr_async_submits) == 0 && - atomic_read(&fs_info->async_delalloc_pages) == 0)); - } - atomic_dec(&fs_info->async_submit_draining); return ret; } @@ -10391,14 +10195,6 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput, spin_unlock(&fs_info->delalloc_root_lock); ret = 0; - atomic_inc(&fs_info->async_submit_draining); - while (atomic_read(&fs_info->nr_async_submits) || - atomic_read(&fs_info->async_delalloc_pages)) { - wait_event(fs_info->async_submit_wait, - (atomic_read(&fs_info->nr_async_submits) == 0 && - atomic_read(&fs_info->async_delalloc_pages) == 0)); - } - atomic_dec(&fs_info->async_submit_draining); out: if (!list_empty_careful(&splice)) { spin_lock(&fs_info->delalloc_root_lock); @@ -10769,7 +10565,6 @@ out: btrfs_end_transaction(trans); if (ret) iput(inode); - btrfs_balance_delayed_items(fs_info); btrfs_btree_balance_dirty(fs_info); return ret; |