diff options
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r-- | fs/btrfs/inode.c | 1160 |
1 files changed, 806 insertions, 354 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e9b76bcd1c12..02bb099845fd 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -62,7 +62,7 @@ static const struct inode_operations btrfs_special_inode_operations; static const struct inode_operations btrfs_file_inode_operations; static const struct address_space_operations btrfs_aops; static const struct address_space_operations btrfs_symlink_aops; -static struct file_operations btrfs_dir_file_operations; +static const struct file_operations btrfs_dir_file_operations; static struct extent_io_ops btrfs_extent_io_ops; static struct kmem_cache *btrfs_inode_cachep; @@ -88,13 +88,14 @@ static noinline int cow_file_range(struct inode *inode, u64 start, u64 end, int *page_started, unsigned long *nr_written, int unlock); -static int btrfs_init_inode_security(struct inode *inode, struct inode *dir) +static int btrfs_init_inode_security(struct btrfs_trans_handle *trans, + struct inode *inode, struct inode *dir) { int err; - err = btrfs_init_acl(inode, dir); + err = btrfs_init_acl(trans, inode, dir); if (!err) - err = btrfs_xattr_security_init(inode, dir); + err = btrfs_xattr_security_init(trans, inode, dir); return err; } @@ -188,8 +189,18 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(leaf); btrfs_free_path(path); + /* + * we're an inline extent, so nobody can + * extend the file past i_size without locking + * a page we already have locked. + * + * We must do any isize and inode updates + * before we unlock the pages. Otherwise we + * could end up racing with unlink. + */ BTRFS_I(inode)->disk_i_size = inode->i_size; btrfs_update_inode(trans, root, inode); + return 0; fail: btrfs_free_path(path); @@ -230,8 +241,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, return 1; } - ret = btrfs_drop_extents(trans, root, inode, start, - aligned_end, aligned_end, start, + ret = btrfs_drop_extents(trans, inode, start, aligned_end, &hint_byte, 1); BUG_ON(ret); @@ -369,7 +379,8 @@ again: * change at any time if we discover bad compression ratios. */ if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) && - btrfs_test_opt(root, COMPRESS)) { + (btrfs_test_opt(root, COMPRESS) || + (BTRFS_I(inode)->force_compress))) { WARN_ON(pages); pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); @@ -416,7 +427,6 @@ again: start, end, total_compressed, pages); } - btrfs_end_transaction(trans, root); if (ret == 0) { /* * inline extent creation worked, we don't need @@ -424,12 +434,17 @@ again: * and free up our temp pages. */ extent_clear_unlock_delalloc(inode, - &BTRFS_I(inode)->io_tree, - start, end, NULL, 1, 0, - 0, 1, 1, 1, 0); - ret = 0; + &BTRFS_I(inode)->io_tree, + start, end, NULL, + EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | + EXTENT_CLEAR_DELALLOC | + EXTENT_CLEAR_ACCOUNTING | + EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK); + + btrfs_end_transaction(trans, root); goto free_pages_out; } + btrfs_end_transaction(trans, root); } if (will_compress) { @@ -469,7 +484,10 @@ again: nr_pages_ret = 0; /* flag the file so we don't compress in the future */ - BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS; + if (!btrfs_test_opt(root, FORCE_COMPRESS) && + !(BTRFS_I(inode)->force_compress)) { + BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS; + } } if (will_compress) { *num_added += 1; @@ -535,12 +553,11 @@ static noinline int submit_compressed_extents(struct inode *inode, struct btrfs_root *root = BTRFS_I(inode)->root; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; struct extent_io_tree *io_tree; - int ret; + int ret = 0; if (list_empty(&async_cow->extents)) return 0; - trans = btrfs_join_transaction(root, 1); while (!list_empty(&async_cow->extents)) { async_extent = list_entry(async_cow->extents.next, @@ -549,21 +566,22 @@ static noinline int submit_compressed_extents(struct inode *inode, io_tree = &BTRFS_I(inode)->io_tree; +retry: /* did the compression code fall back to uncompressed IO? */ if (!async_extent->pages) { int page_started = 0; unsigned long nr_written = 0; lock_extent(io_tree, async_extent->start, - async_extent->start + - async_extent->ram_size - 1, GFP_NOFS); + async_extent->start + + async_extent->ram_size - 1, GFP_NOFS); /* allocate blocks */ - cow_file_range(inode, async_cow->locked_page, - async_extent->start, - async_extent->start + - async_extent->ram_size - 1, - &page_started, &nr_written, 0); + ret = cow_file_range(inode, async_cow->locked_page, + async_extent->start, + async_extent->start + + async_extent->ram_size - 1, + &page_started, &nr_written, 0); /* * if page_started, cow_file_range inserted an @@ -571,7 +589,7 @@ static noinline int submit_compressed_extents(struct inode *inode, * and IO for us. Otherwise, we need to submit * all those pages down to the drive. */ - if (!page_started) + if (!page_started && !ret) extent_write_locked_range(io_tree, inode, async_extent->start, async_extent->start + @@ -586,6 +604,30 @@ static noinline int submit_compressed_extents(struct inode *inode, lock_extent(io_tree, async_extent->start, async_extent->start + async_extent->ram_size - 1, GFP_NOFS); + + trans = btrfs_join_transaction(root, 1); + ret = btrfs_reserve_extent(trans, root, + async_extent->compressed_size, + async_extent->compressed_size, + 0, alloc_hint, + (u64)-1, &ins, 1); + btrfs_end_transaction(trans, root); + + if (ret) { + int i; + for (i = 0; i < async_extent->nr_pages; i++) { + WARN_ON(async_extent->pages[i]->mapping); + page_cache_release(async_extent->pages[i]); + } + kfree(async_extent->pages); + async_extent->nr_pages = 0; + async_extent->pages = NULL; + unlock_extent(io_tree, async_extent->start, + async_extent->start + + async_extent->ram_size - 1, GFP_NOFS); + goto retry; + } + /* * here we're doing allocation and writeback of the * compressed pages @@ -594,12 +636,6 @@ static noinline int submit_compressed_extents(struct inode *inode, async_extent->start + async_extent->ram_size - 1, 0); - ret = btrfs_reserve_extent(trans, root, - async_extent->compressed_size, - async_extent->compressed_size, - 0, alloc_hint, - (u64)-1, &ins, 1); - BUG_ON(ret); em = alloc_extent_map(GFP_NOFS); em->start = async_extent->start; em->len = async_extent->ram_size; @@ -631,17 +667,18 @@ static noinline int submit_compressed_extents(struct inode *inode, BTRFS_ORDERED_COMPRESSED); BUG_ON(ret); - btrfs_end_transaction(trans, root); - /* * clear dirty, set writeback and unlock the pages. */ extent_clear_unlock_delalloc(inode, - &BTRFS_I(inode)->io_tree, - async_extent->start, - async_extent->start + - async_extent->ram_size - 1, - NULL, 1, 1, 0, 1, 1, 0, 0); + &BTRFS_I(inode)->io_tree, + async_extent->start, + async_extent->start + + async_extent->ram_size - 1, + NULL, EXTENT_CLEAR_UNLOCK_PAGE | + EXTENT_CLEAR_UNLOCK | + EXTENT_CLEAR_DELALLOC | + EXTENT_CLEAR_DIRTY | EXTENT_SET_WRITEBACK); ret = btrfs_submit_compressed_write(inode, async_extent->start, @@ -651,13 +688,11 @@ static noinline int submit_compressed_extents(struct inode *inode, async_extent->nr_pages); BUG_ON(ret); - trans = btrfs_join_transaction(root, 1); alloc_hint = ins.objectid + ins.offset; kfree(async_extent); cond_resched(); } - btrfs_end_transaction(trans, root); return 0; } @@ -712,9 +747,16 @@ static noinline int cow_file_range(struct inode *inode, start, end, 0, NULL); if (ret == 0) { extent_clear_unlock_delalloc(inode, - &BTRFS_I(inode)->io_tree, - start, end, NULL, 1, 1, - 1, 1, 1, 1, 0); + &BTRFS_I(inode)->io_tree, + start, end, NULL, + EXTENT_CLEAR_UNLOCK_PAGE | + EXTENT_CLEAR_UNLOCK | + EXTENT_CLEAR_DELALLOC | + EXTENT_CLEAR_ACCOUNTING | + EXTENT_CLEAR_DIRTY | + EXTENT_SET_WRITEBACK | + EXTENT_END_WRITEBACK); + *nr_written = *nr_written + (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE; *page_started = 1; @@ -731,13 +773,29 @@ static noinline int cow_file_range(struct inode *inode, em = search_extent_mapping(&BTRFS_I(inode)->extent_tree, start, num_bytes); if (em) { - alloc_hint = em->block_start; - free_extent_map(em); + /* + * if block start isn't an actual block number then find the + * first block in this inode and use that as a hint. If that + * block is also bogus then just don't worry about it. + */ + if (em->block_start >= EXTENT_MAP_LAST_BYTE) { + free_extent_map(em); + em = search_extent_mapping(em_tree, 0, 0); + if (em && em->block_start < EXTENT_MAP_LAST_BYTE) + alloc_hint = em->block_start; + if (em) + free_extent_map(em); + } else { + alloc_hint = em->block_start; + free_extent_map(em); + } } read_unlock(&BTRFS_I(inode)->extent_tree.lock); btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); while (disk_num_bytes > 0) { + unsigned long op; + cur_alloc_size = min(disk_num_bytes, root->fs_info->max_extent); ret = btrfs_reserve_extent(trans, root, cur_alloc_size, root->sectorsize, 0, alloc_hint, @@ -789,10 +847,13 @@ static noinline int cow_file_range(struct inode *inode, * Do set the Private2 bit so we know this page was properly * setup for writepage */ + op = unlock ? EXTENT_CLEAR_UNLOCK_PAGE : 0; + op |= EXTENT_CLEAR_UNLOCK | EXTENT_CLEAR_DELALLOC | + EXTENT_SET_PRIVATE2; + extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, start, start + ram_size - 1, - locked_page, unlock, 1, - 1, 0, 0, 0, 1); + locked_page, op); disk_num_bytes -= cur_alloc_size; num_bytes -= cur_alloc_size; alloc_hint = ins.objectid + ins.offset; @@ -864,8 +925,8 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, u64 cur_end; int limit = 10 * 1024 * 1042; - clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED | - EXTENT_DELALLOC, 1, 0, NULL, GFP_NOFS); + clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED, + 1, 0, NULL, GFP_NOFS); while (start < end) { async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS); async_cow->inode = inode; @@ -1006,6 +1067,7 @@ next_slot: if (found_key.offset > cur_offset) { extent_end = found_key.offset; + extent_type = 0; goto out_check; } @@ -1112,8 +1174,10 @@ out_check: BUG_ON(ret); extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, - cur_offset, cur_offset + num_bytes - 1, - locked_page, 1, 1, 1, 0, 0, 0, 1); + cur_offset, cur_offset + num_bytes - 1, + locked_page, EXTENT_CLEAR_UNLOCK_PAGE | + EXTENT_CLEAR_UNLOCK | EXTENT_CLEAR_DELALLOC | + EXTENT_SET_PRIVATE2); cur_offset = extent_end; if (cur_offset > end) break; @@ -1150,7 +1214,8 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC) ret = run_delalloc_nocow(inode, locked_page, start, end, page_started, 0, nr_written); - else if (!btrfs_test_opt(root, COMPRESS)) + else if (!btrfs_test_opt(root, COMPRESS) && + !(BTRFS_I(inode)->force_compress)) ret = cow_file_range(inode, locked_page, start, end, page_started, nr_written, 1); else @@ -1159,6 +1224,89 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, return ret; } +static int btrfs_split_extent_hook(struct inode *inode, + struct extent_state *orig, u64 split) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + u64 size; + + if (!(orig->state & EXTENT_DELALLOC)) + return 0; + + size = orig->end - orig->start + 1; + if (size > root->fs_info->max_extent) { + u64 num_extents; + u64 new_size; + + new_size = orig->end - split + 1; + num_extents = div64_u64(size + root->fs_info->max_extent - 1, + root->fs_info->max_extent); + + /* + * if we break a large extent up then leave oustanding_extents + * be, since we've already accounted for the large extent. + */ + if (div64_u64(new_size + root->fs_info->max_extent - 1, + root->fs_info->max_extent) < num_extents) + return 0; + } + + spin_lock(&BTRFS_I(inode)->accounting_lock); + BTRFS_I(inode)->outstanding_extents++; + spin_unlock(&BTRFS_I(inode)->accounting_lock); + + return 0; +} + +/* + * extent_io.c merge_extent_hook, used to track merged delayed allocation + * extents so we can keep track of new extents that are just merged onto old + * extents, such as when we are doing sequential writes, so we can properly + * account for the metadata space we'll need. + */ +static int btrfs_merge_extent_hook(struct inode *inode, + struct extent_state *new, + struct extent_state *other) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + u64 new_size, old_size; + u64 num_extents; + + /* not delalloc, ignore it */ + if (!(other->state & EXTENT_DELALLOC)) + return 0; + + old_size = other->end - other->start + 1; + if (new->start < other->start) + new_size = other->end - new->start + 1; + else + new_size = new->end - other->start + 1; + + /* we're not bigger than the max, unreserve the space and go */ + if (new_size <= root->fs_info->max_extent) { + spin_lock(&BTRFS_I(inode)->accounting_lock); + BTRFS_I(inode)->outstanding_extents--; + spin_unlock(&BTRFS_I(inode)->accounting_lock); + return 0; + } + + /* + * If we grew by another max_extent, just return, we want to keep that + * reserved amount. + */ + num_extents = div64_u64(old_size + root->fs_info->max_extent - 1, + root->fs_info->max_extent); + if (div64_u64(new_size + root->fs_info->max_extent - 1, + root->fs_info->max_extent) > num_extents) + return 0; + + spin_lock(&BTRFS_I(inode)->accounting_lock); + BTRFS_I(inode)->outstanding_extents--; + spin_unlock(&BTRFS_I(inode)->accounting_lock); + + return 0; +} + /* * extent_io.c set_bit_hook, used to track delayed allocation * bytes in this file, and to maintain the list of inodes that @@ -1167,6 +1315,7 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, unsigned long old, unsigned long bits) { + /* * set_bit and clear bit hooks normally require _irqsave/restore * but in this case, we are only testeing for the DELALLOC @@ -1174,6 +1323,10 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, */ if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { struct btrfs_root *root = BTRFS_I(inode)->root; + + spin_lock(&BTRFS_I(inode)->accounting_lock); + BTRFS_I(inode)->outstanding_extents++; + spin_unlock(&BTRFS_I(inode)->accounting_lock); btrfs_delalloc_reserve_space(root, inode, end - start + 1); spin_lock(&root->fs_info->delalloc_lock); BTRFS_I(inode)->delalloc_bytes += end - start + 1; @@ -1190,22 +1343,31 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, /* * extent_io.c clear_bit_hook, see set_bit_hook for why */ -static int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, - unsigned long old, unsigned long bits) +static int btrfs_clear_bit_hook(struct inode *inode, + struct extent_state *state, unsigned long bits) { /* * set_bit and clear bit hooks normally require _irqsave/restore * but in this case, we are only testeing for the DELALLOC * bit, which is only set or cleared with irqs on */ - if ((old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { + if ((state->state & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { struct btrfs_root *root = BTRFS_I(inode)->root; + if (bits & EXTENT_DO_ACCOUNTING) { + spin_lock(&BTRFS_I(inode)->accounting_lock); + BTRFS_I(inode)->outstanding_extents--; + spin_unlock(&BTRFS_I(inode)->accounting_lock); + btrfs_unreserve_metadata_for_delalloc(root, inode, 1); + } + spin_lock(&root->fs_info->delalloc_lock); - if (end - start + 1 > root->fs_info->delalloc_bytes) { + if (state->end - state->start + 1 > + root->fs_info->delalloc_bytes) { printk(KERN_INFO "btrfs warning: delalloc account " "%llu %llu\n", - (unsigned long long)end - start + 1, + (unsigned long long) + state->end - state->start + 1, (unsigned long long) root->fs_info->delalloc_bytes); btrfs_delalloc_free_space(root, inode, (u64)-1); @@ -1213,9 +1375,12 @@ static int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, BTRFS_I(inode)->delalloc_bytes = 0; } else { btrfs_delalloc_free_space(root, inode, - end - start + 1); - root->fs_info->delalloc_bytes -= end - start + 1; - BTRFS_I(inode)->delalloc_bytes -= end - start + 1; + state->end - + state->start + 1); + root->fs_info->delalloc_bytes -= state->end - + state->start + 1; + BTRFS_I(inode)->delalloc_bytes -= state->end - + state->start + 1; } if (BTRFS_I(inode)->delalloc_bytes == 0 && !list_empty(&BTRFS_I(inode)->delalloc_inodes)) { @@ -1347,12 +1512,13 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans, return 0; } -int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end) +int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, + struct extent_state **cached_state) { if ((end & (PAGE_CACHE_SIZE - 1)) == 0) WARN_ON(1); return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end, - GFP_NOFS); + cached_state, GFP_NOFS); } /* see btrfs_writepage_start_hook for details on why this is required */ @@ -1365,6 +1531,7 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work) { struct btrfs_writepage_fixup *fixup; struct btrfs_ordered_extent *ordered; + struct extent_state *cached_state = NULL; struct page *page; struct inode *inode; u64 page_start; @@ -1383,7 +1550,8 @@ again: page_start = page_offset(page); page_end = page_offset(page) + PAGE_CACHE_SIZE - 1; - lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); + lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, 0, + &cached_state, GFP_NOFS); /* already ordered? We're done */ if (PagePrivate2(page)) @@ -1391,17 +1559,18 @@ again: ordered = btrfs_lookup_ordered_extent(inode, page_start); if (ordered) { - unlock_extent(&BTRFS_I(inode)->io_tree, page_start, - page_end, GFP_NOFS); + unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, + page_end, &cached_state, GFP_NOFS); unlock_page(page); btrfs_start_ordered_extent(inode, ordered, 1); goto again; } - btrfs_set_extent_delalloc(inode, page_start, page_end); + btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state); ClearPageChecked(page); out: - unlock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); + unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end, + &cached_state, GFP_NOFS); out_page: unlock_page(page); page_cache_release(page); @@ -1447,7 +1616,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, struct inode *inode, u64 file_pos, u64 disk_bytenr, u64 disk_num_bytes, u64 num_bytes, u64 ram_bytes, - u64 locked_end, u8 compression, u8 encryption, u16 other_encoding, int extent_type) { @@ -1473,9 +1641,8 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, * the caller is expected to unpin it and allow it to be merged * with the others. */ - ret = btrfs_drop_extents(trans, root, inode, file_pos, - file_pos + num_bytes, locked_end, - file_pos, &hint, 0); + ret = btrfs_drop_extents(trans, inode, file_pos, file_pos + num_bytes, + &hint, 0); BUG_ON(ret); ins.objectid = inode->i_ino; @@ -1522,24 +1689,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, * before we start the transaction. It limits the amount of btree * reads required while inside the transaction. */ -static noinline void reada_csum(struct btrfs_root *root, - struct btrfs_path *path, - struct btrfs_ordered_extent *ordered_extent) -{ - struct btrfs_ordered_sum *sum; - u64 bytenr; - - sum = list_entry(ordered_extent->list.next, struct btrfs_ordered_sum, - list); - bytenr = sum->sums[0].bytenr; - - /* - * we don't care about the results, the point of this search is - * just to get the btree leaves into ram - */ - btrfs_lookup_csum(NULL, root->fs_info->csum_root, path, bytenr, 0); -} - /* as ordered data IO finishes, this gets called so we can finish * an ordered extent if the range of bytes in the file it covers are * fully written. @@ -1550,54 +1699,39 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) struct btrfs_trans_handle *trans; struct btrfs_ordered_extent *ordered_extent = NULL; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; - struct btrfs_path *path; + struct extent_state *cached_state = NULL; int compressed = 0; int ret; - ret = btrfs_dec_test_ordered_pending(inode, start, end - start + 1); + ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start, + end - start + 1); if (!ret) return 0; + BUG_ON(!ordered_extent); - /* - * before we join the transaction, try to do some of our IO. - * This will limit the amount of IO that we have to do with - * the transaction running. We're unlikely to need to do any - * IO if the file extents are new, the disk_i_size checks - * covers the most common case. - */ - if (start < BTRFS_I(inode)->disk_i_size) { - path = btrfs_alloc_path(); - if (path) { - ret = btrfs_lookup_file_extent(NULL, root, path, - inode->i_ino, - start, 0); - ordered_extent = btrfs_lookup_ordered_extent(inode, - start); - if (!list_empty(&ordered_extent->list)) { - btrfs_release_path(root, path); - reada_csum(root, path, ordered_extent); - } - btrfs_free_path(path); + if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { + BUG_ON(!list_empty(&ordered_extent->list)); + ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); + if (!ret) { + trans = btrfs_join_transaction(root, 1); + ret = btrfs_update_inode(trans, root, inode); + BUG_ON(ret); + btrfs_end_transaction(trans, root); } + goto out; } - trans = btrfs_join_transaction(root, 1); - - if (!ordered_extent) - ordered_extent = btrfs_lookup_ordered_extent(inode, start); - BUG_ON(!ordered_extent); - if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) - goto nocow; + lock_extent_bits(io_tree, ordered_extent->file_offset, + ordered_extent->file_offset + ordered_extent->len - 1, + 0, &cached_state, GFP_NOFS); - lock_extent(io_tree, ordered_extent->file_offset, - ordered_extent->file_offset + ordered_extent->len - 1, - GFP_NOFS); + trans = btrfs_join_transaction(root, 1); if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) compressed = 1; if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { BUG_ON(compressed); - ret = btrfs_mark_extent_written(trans, root, inode, + ret = btrfs_mark_extent_written(trans, inode, ordered_extent->file_offset, ordered_extent->file_offset + ordered_extent->len); @@ -1609,8 +1743,6 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) ordered_extent->disk_len, ordered_extent->len, ordered_extent->len, - ordered_extent->file_offset + - ordered_extent->len, compressed, 0, 0, BTRFS_FILE_EXTENT_REG); unpin_extent_cache(&BTRFS_I(inode)->extent_tree, @@ -1618,25 +1750,24 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) ordered_extent->len); BUG_ON(ret); } - unlock_extent(io_tree, ordered_extent->file_offset, - ordered_extent->file_offset + ordered_extent->len - 1, - GFP_NOFS); -nocow: + unlock_extent_cached(io_tree, ordered_extent->file_offset, + ordered_extent->file_offset + + ordered_extent->len - 1, &cached_state, GFP_NOFS); + add_pending_csums(trans, inode, ordered_extent->file_offset, &ordered_extent->list); - mutex_lock(&BTRFS_I(inode)->extent_mutex); - btrfs_ordered_update_i_size(inode, ordered_extent); - btrfs_update_inode(trans, root, inode); - btrfs_remove_ordered_extent(inode, ordered_extent); - mutex_unlock(&BTRFS_I(inode)->extent_mutex); - + /* this also removes the ordered extent from the tree */ + btrfs_ordered_update_i_size(inode, 0, ordered_extent); + ret = btrfs_update_inode(trans, root, inode); + BUG_ON(ret); + btrfs_end_transaction(trans, root); +out: /* once for us */ btrfs_put_ordered_extent(ordered_extent); /* once for the tree */ btrfs_put_ordered_extent(ordered_extent); - btrfs_end_transaction(trans, root); return 0; } @@ -1859,6 +1990,54 @@ zeroit: return -EIO; } +struct delayed_iput { + struct list_head list; + struct inode *inode; +}; + +void btrfs_add_delayed_iput(struct inode *inode) +{ + struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; + struct delayed_iput *delayed; + + if (atomic_add_unless(&inode->i_count, -1, 1)) + return; + + delayed = kmalloc(sizeof(*delayed), GFP_NOFS | __GFP_NOFAIL); + delayed->inode = inode; + + spin_lock(&fs_info->delayed_iput_lock); + list_add_tail(&delayed->list, &fs_info->delayed_iputs); + spin_unlock(&fs_info->delayed_iput_lock); +} + +void btrfs_run_delayed_iputs(struct btrfs_root *root) +{ + LIST_HEAD(list); + struct btrfs_fs_info *fs_info = root->fs_info; + struct delayed_iput *delayed; + int empty; + + spin_lock(&fs_info->delayed_iput_lock); + empty = list_empty(&fs_info->delayed_iputs); + spin_unlock(&fs_info->delayed_iput_lock); + if (empty) + return; + + down_read(&root->fs_info->cleanup_work_sem); + spin_lock(&fs_info->delayed_iput_lock); + list_splice_init(&fs_info->delayed_iputs, &list); + spin_unlock(&fs_info->delayed_iput_lock); + + while (!list_empty(&list)) { + delayed = list_entry(list.next, struct delayed_iput, list); + list_del(&delayed->list); + iput(delayed->inode); + kfree(delayed); + } + up_read(&root->fs_info->cleanup_work_sem); +} + /* * This creates an orphan entry for the given inode in case something goes * wrong in the middle of an unlink/truncate. @@ -1931,16 +2110,17 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) struct inode *inode; int ret = 0, nr_unlink = 0, nr_truncate = 0; - path = btrfs_alloc_path(); - if (!path) + if (!xchg(&root->clean_orphans, 0)) return; + + path = btrfs_alloc_path(); + BUG_ON(!path); path->reada = -1; key.objectid = BTRFS_ORPHAN_OBJECTID; btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY); key.offset = (u64)-1; - while (1) { ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) { @@ -1982,7 +2162,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) found_key.objectid = found_key.offset; found_key.type = BTRFS_INODE_ITEM_KEY; found_key.offset = 0; - inode = btrfs_iget(root->fs_info->sb, &found_key, root); + inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL); if (IS_ERR(inode)) break; @@ -2354,7 +2534,19 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) root = BTRFS_I(dir)->root; + /* + * 5 items for unlink inode + * 1 for orphan + */ + ret = btrfs_reserve_metadata_space(root, 6); + if (ret) + return ret; + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) { + btrfs_unreserve_metadata_space(root, 6); + return PTR_ERR(trans); + } btrfs_set_trans_block_group(trans, dir); @@ -2369,6 +2561,7 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) nr = trans->blocks_used; btrfs_end_transaction_throttle(trans, root); + btrfs_unreserve_metadata_space(root, 6); btrfs_btree_balance_dirty(root, nr); return ret; } @@ -2449,7 +2642,16 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) return -ENOTEMPTY; + ret = btrfs_reserve_metadata_space(root, 5); + if (ret) + return ret; + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) { + btrfs_unreserve_metadata_space(root, 5); + return PTR_ERR(trans); + } + btrfs_set_trans_block_group(trans, dir); if (unlikely(inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { @@ -2472,6 +2674,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) out: nr = trans->blocks_used; ret = btrfs_end_transaction_throttle(trans, root); + btrfs_unreserve_metadata_space(root, 5); btrfs_btree_balance_dirty(root, nr); if (ret && !err) @@ -2662,37 +2865,40 @@ out: * min_type is the minimum key type to truncate down to. If set to 0, this * will kill all the items on this inode, including the INODE_ITEM_KEY. */ -noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct inode *inode, - u64 new_size, u32 min_type) +int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode, + u64 new_size, u32 min_type) { - int ret; struct btrfs_path *path; - struct btrfs_key key; - struct btrfs_key found_key; - u32 found_type = (u8)-1; struct extent_buffer *leaf; struct btrfs_file_extent_item *fi; + struct btrfs_key key; + struct btrfs_key found_key; u64 extent_start = 0; u64 extent_num_bytes = 0; u64 extent_offset = 0; u64 item_end = 0; + u64 mask = root->sectorsize - 1; + u32 found_type = (u8)-1; int found_extent; int del_item; int pending_del_nr = 0; int pending_del_slot = 0; int extent_type = -1; int encoding; - u64 mask = root->sectorsize - 1; + int ret; + int err = 0; + + BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY); if (root->ref_cows) btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); + path = btrfs_alloc_path(); BUG_ON(!path); path->reada = -1; - /* FIXME, add redo link to tree so we don't leak on crash */ key.objectid = inode->i_ino; key.offset = (u64)-1; key.type = (u8)-1; @@ -2700,17 +2906,17 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, search_again: path->leave_spinning = 1; ret = btrfs_search_slot(trans, root, &key, path, -1, 1); - if (ret < 0) - goto error; + if (ret < 0) { + err = ret; + goto out; + } if (ret > 0) { /* there are no items in the tree for us to truncate, we're * done */ - if (path->slots[0] == 0) { - ret = 0; - goto error; - } + if (path->slots[0] == 0) + goto out; path->slots[0]--; } @@ -2745,28 +2951,17 @@ search_again: } item_end--; } - if (item_end < new_size) { - if (found_type == BTRFS_DIR_ITEM_KEY) - found_type = BTRFS_INODE_ITEM_KEY; - else if (found_type == BTRFS_EXTENT_ITEM_KEY) - found_type = BTRFS_EXTENT_DATA_KEY; - else if (found_type == BTRFS_EXTENT_DATA_KEY) - found_type = BTRFS_XATTR_ITEM_KEY; - else if (found_type == BTRFS_XATTR_ITEM_KEY) - found_type = BTRFS_INODE_REF_KEY; - else if (found_type) - found_type--; - else + if (found_type > min_type) { + del_item = 1; + } else { + if (item_end < new_size) break; - btrfs_set_key_type(&key, found_type); - goto next; + if (found_key.offset >= new_size) + del_item = 1; + else + del_item = 0; } - if (found_key.offset >= new_size) - del_item = 1; - else - del_item = 0; found_extent = 0; - /* FIXME, shrink the extent if the ref count is only 1 */ if (found_type != BTRFS_EXTENT_DATA_KEY) goto delete; @@ -2853,42 +3048,36 @@ delete: inode->i_ino, extent_offset); BUG_ON(ret); } -next: - if (path->slots[0] == 0) { - if (pending_del_nr) - goto del_pending; - btrfs_release_path(root, path); - if (found_type == BTRFS_INODE_ITEM_KEY) - break; - goto search_again; - } - path->slots[0]--; - if (pending_del_nr && - path->slots[0] + 1 != pending_del_slot) { - struct btrfs_key debug; -del_pending: - btrfs_item_key_to_cpu(path->nodes[0], &debug, - pending_del_slot); - ret = btrfs_del_items(trans, root, path, - pending_del_slot, - pending_del_nr); - BUG_ON(ret); - pending_del_nr = 0; + if (found_type == BTRFS_INODE_ITEM_KEY) + break; + + if (path->slots[0] == 0 || + path->slots[0] != pending_del_slot) { + if (root->ref_cows) { + err = -EAGAIN; + goto out; + } + if (pending_del_nr) { + ret = btrfs_del_items(trans, root, path, + pending_del_slot, + pending_del_nr); + BUG_ON(ret); + pending_del_nr = 0; + } btrfs_release_path(root, path); - if (found_type == BTRFS_INODE_ITEM_KEY) - break; goto search_again; + } else { + path->slots[0]--; } } - ret = 0; -error: +out: if (pending_del_nr) { ret = btrfs_del_items(trans, root, path, pending_del_slot, pending_del_nr); } btrfs_free_path(path); - return ret; + return err; } /* @@ -2901,6 +3090,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) struct btrfs_root *root = BTRFS_I(inode)->root; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct btrfs_ordered_extent *ordered; + struct extent_state *cached_state = NULL; char *kaddr; u32 blocksize = root->sectorsize; pgoff_t index = from >> PAGE_CACHE_SHIFT; @@ -2912,12 +3102,22 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) if ((offset & (blocksize - 1)) == 0) goto out; + ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE); + if (ret) + goto out; + + ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1); + if (ret) + goto out; ret = -ENOMEM; again: page = grab_cache_page(mapping, index); - if (!page) + if (!page) { + btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); + btrfs_unreserve_metadata_for_delalloc(root, inode, 1); goto out; + } page_start = page_offset(page); page_end = page_start + PAGE_CACHE_SIZE - 1; @@ -2937,12 +3137,14 @@ again: } wait_on_page_writeback(page); - lock_extent(io_tree, page_start, page_end, GFP_NOFS); + lock_extent_bits(io_tree, page_start, page_end, 0, &cached_state, + GFP_NOFS); set_page_extent_mapped(page); ordered = btrfs_lookup_ordered_extent(inode, page_start); if (ordered) { - unlock_extent(io_tree, page_start, page_end, GFP_NOFS); + unlock_extent_cached(io_tree, page_start, page_end, + &cached_state, GFP_NOFS); unlock_page(page); page_cache_release(page); btrfs_start_ordered_extent(inode, ordered, 1); @@ -2950,7 +3152,18 @@ again: goto again; } - btrfs_set_extent_delalloc(inode, page_start, page_end); + clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end, + EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING, + 0, 0, &cached_state, GFP_NOFS); + + ret = btrfs_set_extent_delalloc(inode, page_start, page_end, + &cached_state); + if (ret) { + unlock_extent_cached(io_tree, page_start, page_end, + &cached_state, GFP_NOFS); + goto out_unlock; + } + ret = 0; if (offset != PAGE_CACHE_SIZE) { kaddr = kmap(page); @@ -2960,9 +3173,13 @@ again: } ClearPageChecked(page); set_page_dirty(page); - unlock_extent(io_tree, page_start, page_end, GFP_NOFS); + unlock_extent_cached(io_tree, page_start, page_end, &cached_state, + GFP_NOFS); out_unlock: + if (ret) + btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); + btrfs_unreserve_metadata_for_delalloc(root, inode, 1); unlock_page(page); page_cache_release(page); out: @@ -2975,38 +3192,32 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) struct btrfs_root *root = BTRFS_I(inode)->root; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct extent_map *em; + struct extent_state *cached_state = NULL; u64 mask = root->sectorsize - 1; u64 hole_start = (inode->i_size + mask) & ~mask; u64 block_end = (size + mask) & ~mask; u64 last_byte; u64 cur_offset; u64 hole_size; - int err; + int err = 0; if (size <= hole_start) return 0; - err = btrfs_check_metadata_free_space(root); - if (err) - return err; - - btrfs_truncate_page(inode->i_mapping, inode->i_size); - while (1) { struct btrfs_ordered_extent *ordered; btrfs_wait_ordered_range(inode, hole_start, block_end - hole_start); - lock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); + lock_extent_bits(io_tree, hole_start, block_end - 1, 0, + &cached_state, GFP_NOFS); ordered = btrfs_lookup_ordered_extent(inode, hole_start); if (!ordered) break; - unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); + unlock_extent_cached(io_tree, hole_start, block_end - 1, + &cached_state, GFP_NOFS); btrfs_put_ordered_extent(ordered); } - trans = btrfs_start_transaction(root, 1); - btrfs_set_trans_block_group(trans, inode); - cur_offset = hole_start; while (1) { em = btrfs_get_extent(inode, NULL, 0, cur_offset, @@ -3014,34 +3225,121 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) BUG_ON(IS_ERR(em) || !em); last_byte = min(extent_map_end(em), block_end); last_byte = (last_byte + mask) & ~mask; - if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) { + if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { u64 hint_byte = 0; hole_size = last_byte - cur_offset; - err = btrfs_drop_extents(trans, root, inode, - cur_offset, - cur_offset + hole_size, - block_end, - cur_offset, &hint_byte, 1); + + err = btrfs_reserve_metadata_space(root, 2); if (err) break; + + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + + err = btrfs_drop_extents(trans, inode, cur_offset, + cur_offset + hole_size, + &hint_byte, 1); + BUG_ON(err); + err = btrfs_insert_file_extent(trans, root, inode->i_ino, cur_offset, 0, 0, hole_size, 0, hole_size, 0, 0, 0); + BUG_ON(err); + btrfs_drop_extent_cache(inode, hole_start, last_byte - 1, 0); + + btrfs_end_transaction(trans, root); + btrfs_unreserve_metadata_space(root, 2); } free_extent_map(em); cur_offset = last_byte; - if (err || cur_offset >= block_end) + if (cur_offset >= block_end) break; } - btrfs_end_transaction(trans, root); - unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); + unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state, + GFP_NOFS); return err; } +static int btrfs_setattr_size(struct inode *inode, struct iattr *attr) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_trans_handle *trans; + unsigned long nr; + int ret; + + if (attr->ia_size == inode->i_size) + return 0; + + if (attr->ia_size > inode->i_size) { + unsigned long limit; + limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; + if (attr->ia_size > inode->i_sb->s_maxbytes) + return -EFBIG; + if (limit != RLIM_INFINITY && attr->ia_size > limit) { + send_sig(SIGXFSZ, current, 0); + return -EFBIG; + } + } + + ret = btrfs_reserve_metadata_space(root, 1); + if (ret) + return ret; + + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + + ret = btrfs_orphan_add(trans, inode); + BUG_ON(ret); + + nr = trans->blocks_used; + btrfs_end_transaction(trans, root); + btrfs_unreserve_metadata_space(root, 1); + btrfs_btree_balance_dirty(root, nr); + + if (attr->ia_size > inode->i_size) { + ret = btrfs_cont_expand(inode, attr->ia_size); + if (ret) { + btrfs_truncate(inode); + return ret; + } + + i_size_write(inode, attr->ia_size); + btrfs_ordered_update_i_size(inode, inode->i_size, NULL); + + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + + ret = btrfs_update_inode(trans, root, inode); + BUG_ON(ret); + if (inode->i_nlink > 0) { + ret = btrfs_orphan_del(trans, inode); + BUG_ON(ret); + } + nr = trans->blocks_used; + btrfs_end_transaction(trans, root); + btrfs_btree_balance_dirty(root, nr); + return 0; + } + + /* + * We're truncating a file that used to have good data down to + * zero. Make sure it gets into the ordered flush list so that + * any new writes get down to disk quickly. + */ + if (attr->ia_size == 0) + BTRFS_I(inode)->ordered_data_close = 1; + + /* we don't support swapfiles, so vmtruncate shouldn't fail */ + ret = vmtruncate(inode, attr->ia_size); + BUG_ON(ret); + + return 0; +} + static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) { struct inode *inode = dentry->d_inode; @@ -3052,23 +3350,14 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) return err; if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { - if (attr->ia_size > inode->i_size) { - err = btrfs_cont_expand(inode, attr->ia_size); - if (err) - return err; - } else if (inode->i_size > 0 && - attr->ia_size == 0) { - - /* we're truncating a file that used to have good - * data down to zero. Make sure it gets into - * the ordered flush list so that any new writes - * get down to disk quickly. - */ - BTRFS_I(inode)->ordered_data_close = 1; - } + err = btrfs_setattr_size(inode, attr); + if (err) + return err; } + attr->ia_valid &= ~ATTR_SIZE; - err = inode_setattr(inode, attr); + if (attr->ia_valid) + err = inode_setattr(inode, attr); if (!err && ((attr->ia_valid & ATTR_MODE))) err = btrfs_acl_chmod(inode); @@ -3089,36 +3378,43 @@ void btrfs_delete_inode(struct inode *inode) } btrfs_wait_ordered_range(inode, 0, (u64)-1); + if (root->fs_info->log_root_recovering) { + BUG_ON(!list_empty(&BTRFS_I(inode)->i_orphan)); + goto no_delete; + } + if (inode->i_nlink > 0) { BUG_ON(btrfs_root_refs(&root->root_item) != 0); goto no_delete; } btrfs_i_size_write(inode, 0); - trans = btrfs_join_transaction(root, 1); - btrfs_set_trans_block_group(trans, inode); - ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size, 0); - if (ret) { - btrfs_orphan_del(NULL, inode); - goto no_delete_lock; - } + while (1) { + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0); - btrfs_orphan_del(trans, inode); + if (ret != -EAGAIN) + break; - nr = trans->blocks_used; - clear_inode(inode); + nr = trans->blocks_used; + btrfs_end_transaction(trans, root); + trans = NULL; + btrfs_btree_balance_dirty(root, nr); + } - btrfs_end_transaction(trans, root); - btrfs_btree_balance_dirty(root, nr); - return; + if (ret == 0) { + ret = btrfs_orphan_del(trans, inode); + BUG_ON(ret); + } -no_delete_lock: nr = trans->blocks_used; btrfs_end_transaction(trans, root); btrfs_btree_balance_dirty(root, nr); no_delete: clear_inode(inode); + return; } /* @@ -3353,6 +3649,7 @@ static noinline void init_btrfs_i(struct inode *inode) bi->generation = 0; bi->sequence = 0; bi->last_trans = 0; + bi->last_sub_trans = 0; bi->logged_trans = 0; bi->delalloc_bytes = 0; bi->reserved_bytes = 0; @@ -3361,6 +3658,7 @@ static noinline void init_btrfs_i(struct inode *inode) bi->index_cnt = (u64)-1; bi->last_unlink_trans = 0; bi->ordered_data_close = 0; + bi->force_compress = 0; extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); extent_io_tree_init(&BTRFS_I(inode)->io_tree, inode->i_mapping, GFP_NOFS); @@ -3370,7 +3668,6 @@ static noinline void init_btrfs_i(struct inode *inode) INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations); RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); - mutex_init(&BTRFS_I(inode)->extent_mutex); mutex_init(&BTRFS_I(inode)->log_mutex); } @@ -3410,7 +3707,7 @@ static struct inode *btrfs_iget_locked(struct super_block *s, * Returns in *is_new if the inode was read from disk */ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, - struct btrfs_root *root) + struct btrfs_root *root, int *new) { struct inode *inode; @@ -3425,6 +3722,8 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, inode_tree_add(inode); unlock_new_inode(inode); + if (new) + *new = 1; } return inode; @@ -3477,7 +3776,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) return NULL; if (location.type == BTRFS_INODE_ITEM_KEY) { - inode = btrfs_iget(dir->i_sb, &location, root); + inode = btrfs_iget(dir->i_sb, &location, root, NULL); return inode; } @@ -3492,10 +3791,17 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) else inode = new_simple_dir(dir->i_sb, &location, sub_root); } else { - inode = btrfs_iget(dir->i_sb, &location, sub_root); + inode = btrfs_iget(dir->i_sb, &location, sub_root, NULL); } srcu_read_unlock(&root->fs_info->subvol_srcu, index); + if (root != sub_root) { + down_read(&root->fs_info->cleanup_work_sem); + if (!(inode->i_sb->s_flags & MS_RDONLY)) + btrfs_orphan_cleanup(sub_root); + up_read(&root->fs_info->cleanup_work_sem); + } + return inode; } @@ -3503,12 +3809,14 @@ static int btrfs_dentry_delete(struct dentry *dentry) { struct btrfs_root *root; - if (!dentry->d_inode) - return 0; + if (!dentry->d_inode && !IS_ROOT(dentry)) + dentry = dentry->d_parent; - root = BTRFS_I(dentry->d_inode)->root; - if (btrfs_root_refs(&root->root_item) == 0) - return 1; + if (dentry->d_inode) { + root = BTRFS_I(dentry->d_inode)->root; + if (btrfs_root_refs(&root->root_item) == 0) + return 1; + } return 0; } @@ -3668,7 +3976,11 @@ skip: /* Reached end of directory/root. Bump pos past the last item. */ if (key_type == BTRFS_DIR_INDEX_KEY) - filp->f_pos = INT_LIMIT(off_t); + /* + * 32-bit glibc will use getdents64, but then strtol - + * so the last number we can serve is this. + */ + filp->f_pos = 0x7fffffff; else filp->f_pos++; nopos: @@ -3678,7 +3990,7 @@ err: return ret; } -int btrfs_write_inode(struct inode *inode, int wait) +int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; @@ -3687,7 +3999,7 @@ int btrfs_write_inode(struct inode *inode, int wait) if (root->fs_info->btree_inode == inode) return 0; - if (wait) { + if (wbc->sync_mode == WB_SYNC_ALL) { trans = btrfs_join_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); ret = btrfs_commit_transaction(trans, root); @@ -3990,11 +4302,18 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, if (!new_valid_dev(rdev)) return -EINVAL; - err = btrfs_check_metadata_free_space(root); + /* + * 2 for inode item and ref + * 2 for dir items + * 1 for xattr if selinux is on + */ + err = btrfs_reserve_metadata_space(root, 5); if (err) - goto fail; + return err; trans = btrfs_start_transaction(root, 1); + if (!trans) + goto fail; btrfs_set_trans_block_group(trans, dir); err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); @@ -4011,7 +4330,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, if (IS_ERR(inode)) goto out_unlock; - err = btrfs_init_inode_security(inode, dir); + err = btrfs_init_inode_security(trans, inode, dir); if (err) { drop_inode = 1; goto out_unlock; @@ -4032,6 +4351,7 @@ out_unlock: nr = trans->blocks_used; btrfs_end_transaction_throttle(trans, root); fail: + btrfs_unreserve_metadata_space(root, 5); if (drop_inode) { inode_dec_link_count(inode); iput(inode); @@ -4052,10 +4372,18 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, u64 objectid; u64 index = 0; - err = btrfs_check_metadata_free_space(root); + /* + * 2 for inode item and ref + * 2 for dir items + * 1 for xattr if selinux is on + */ + err = btrfs_reserve_metadata_space(root, 5); if (err) - goto fail; + return err; + trans = btrfs_start_transaction(root, 1); + if (!trans) + goto fail; btrfs_set_trans_block_group(trans, dir); err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); @@ -4073,7 +4401,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, if (IS_ERR(inode)) goto out_unlock; - err = btrfs_init_inode_security(inode, dir); + err = btrfs_init_inode_security(trans, inode, dir); if (err) { drop_inode = 1; goto out_unlock; @@ -4096,6 +4424,7 @@ out_unlock: nr = trans->blocks_used; btrfs_end_transaction_throttle(trans, root); fail: + btrfs_unreserve_metadata_space(root, 5); if (drop_inode) { inode_dec_link_count(inode); iput(inode); @@ -4118,10 +4447,20 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, if (inode->i_nlink == 0) return -ENOENT; - btrfs_inc_nlink(inode); - err = btrfs_check_metadata_free_space(root); + /* do not allow sys_link's with other subvols of the same device */ + if (root->objectid != BTRFS_I(inode)->root->objectid) + return -EPERM; + + /* + * 1 item for inode ref + * 2 items for dir items + */ + err = btrfs_reserve_metadata_space(root, 3); if (err) - goto fail; + return err; + + btrfs_inc_nlink(inode); + err = btrfs_set_inode_index(dir, &index); if (err) goto fail; @@ -4145,6 +4484,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, nr = trans->blocks_used; btrfs_end_transaction_throttle(trans, root); fail: + btrfs_unreserve_metadata_space(root, 3); if (drop_inode) { inode_dec_link_count(inode); iput(inode); @@ -4164,22 +4504,26 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) u64 index = 0; unsigned long nr = 1; - err = btrfs_check_metadata_free_space(root); + /* + * 2 items for inode and ref + * 2 items for dir items + * 1 for xattr if selinux is on + */ + err = btrfs_reserve_metadata_space(root, 5); if (err) - goto out_unlock; + return err; trans = btrfs_start_transaction(root, 1); - btrfs_set_trans_block_group(trans, dir); - - if (IS_ERR(trans)) { - err = PTR_ERR(trans); + if (!trans) { + err = -ENOMEM; goto out_unlock; } + btrfs_set_trans_block_group(trans, dir); err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); if (err) { err = -ENOSPC; - goto out_unlock; + goto out_fail; } inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, @@ -4194,7 +4538,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) drop_on_err = 1; - err = btrfs_init_inode_security(inode, dir); + err = btrfs_init_inode_security(trans, inode, dir); if (err) goto out_fail; @@ -4223,6 +4567,7 @@ out_fail: btrfs_end_transaction_throttle(trans, root); out_unlock: + btrfs_unreserve_metadata_space(root, 5); if (drop_on_err) iput(inode); btrfs_btree_balance_dirty(root, nr); @@ -4656,6 +5001,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) { struct extent_io_tree *tree; struct btrfs_ordered_extent *ordered; + struct extent_state *cached_state = NULL; u64 page_start = page_offset(page); u64 page_end = page_start + PAGE_CACHE_SIZE - 1; @@ -4674,7 +5020,8 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) btrfs_releasepage(page, GFP_NOFS); return; } - lock_extent(tree, page_start, page_end, GFP_NOFS); + lock_extent_bits(tree, page_start, page_end, 0, &cached_state, + GFP_NOFS); ordered = btrfs_lookup_ordered_extent(page->mapping->host, page_offset(page)); if (ordered) { @@ -4684,7 +5031,8 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) */ clear_extent_bit(tree, page_start, page_end, EXTENT_DIRTY | EXTENT_DELALLOC | - EXTENT_LOCKED, 1, 0, NULL, GFP_NOFS); + EXTENT_LOCKED | EXTENT_DO_ACCOUNTING, 1, 0, + &cached_state, GFP_NOFS); /* * whoever cleared the private bit is responsible * for the finish_ordered_io @@ -4694,11 +5042,13 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) page_start, page_end); } btrfs_put_ordered_extent(ordered); - lock_extent(tree, page_start, page_end, GFP_NOFS); + cached_state = NULL; + lock_extent_bits(tree, page_start, page_end, 0, &cached_state, + GFP_NOFS); } clear_extent_bit(tree, page_start, page_end, - EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC, - 1, 1, NULL, GFP_NOFS); + EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | + EXTENT_DO_ACCOUNTING, 1, 1, &cached_state, GFP_NOFS); __btrfs_releasepage(page, GFP_NOFS); ClearPageChecked(page); @@ -4731,6 +5081,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) struct btrfs_root *root = BTRFS_I(inode)->root; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct btrfs_ordered_extent *ordered; + struct extent_state *cached_state = NULL; char *kaddr; unsigned long zero_start; loff_t size; @@ -4747,6 +5098,13 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) goto out; } + ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1); + if (ret) { + btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); + ret = VM_FAULT_SIGBUS; + goto out; + } + ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */ again: lock_page(page); @@ -4762,7 +5120,8 @@ again: } wait_on_page_writeback(page); - lock_extent(io_tree, page_start, page_end, GFP_NOFS); + lock_extent_bits(io_tree, page_start, page_end, 0, &cached_state, + GFP_NOFS); set_page_extent_mapped(page); /* @@ -4771,14 +5130,34 @@ again: */ ordered = btrfs_lookup_ordered_extent(inode, page_start); if (ordered) { - unlock_extent(io_tree, page_start, page_end, GFP_NOFS); + unlock_extent_cached(io_tree, page_start, page_end, + &cached_state, GFP_NOFS); unlock_page(page); btrfs_start_ordered_extent(inode, ordered, 1); btrfs_put_ordered_extent(ordered); goto again; } - btrfs_set_extent_delalloc(inode, page_start, page_end); + /* + * XXX - page_mkwrite gets called every time the page is dirtied, even + * if it was already dirty, so for space accounting reasons we need to + * clear any delalloc bits for the range we are fixing to save. There + * is probably a better way to do this, but for now keep consistent with + * prepare_pages in the normal write path. + */ + clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end, + EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING, + 0, 0, &cached_state, GFP_NOFS); + + ret = btrfs_set_extent_delalloc(inode, page_start, page_end, + &cached_state); + if (ret) { + unlock_extent_cached(io_tree, page_start, page_end, + &cached_state, GFP_NOFS); + ret = VM_FAULT_SIGBUS; + btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); + goto out_unlock; + } ret = 0; /* page is wholly or partially inside EOF */ @@ -4797,10 +5176,13 @@ again: set_page_dirty(page); SetPageUptodate(page); - BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; - unlock_extent(io_tree, page_start, page_end, GFP_NOFS); + BTRFS_I(inode)->last_trans = root->fs_info->generation; + BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid; + + unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS); out_unlock: + btrfs_unreserve_metadata_for_delalloc(root, inode, 1); if (!ret) return VM_FAULT_LOCKED; unlock_page(page); @@ -4816,15 +5198,20 @@ static void btrfs_truncate(struct inode *inode) unsigned long nr; u64 mask = root->sectorsize - 1; - if (!S_ISREG(inode->i_mode)) + if (!S_ISREG(inode->i_mode)) { + WARN_ON(1); return; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) + } + + ret = btrfs_truncate_page(inode->i_mapping, inode->i_size); + if (ret) return; - btrfs_truncate_page(inode->i_mapping, inode->i_size); btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); + btrfs_ordered_update_i_size(inode, inode->i_size, NULL); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); /* * setattr is responsible for setting the ordered_data_close flag, @@ -4846,21 +5233,32 @@ static void btrfs_truncate(struct inode *inode) if (inode->i_size == 0 && BTRFS_I(inode)->ordered_data_close) btrfs_add_ordered_operation(trans, root, inode); - btrfs_set_trans_block_group(trans, inode); - btrfs_i_size_write(inode, inode->i_size); + while (1) { + ret = btrfs_truncate_inode_items(trans, root, inode, + inode->i_size, + BTRFS_EXTENT_DATA_KEY); + if (ret != -EAGAIN) + break; - ret = btrfs_orphan_add(trans, inode); - if (ret) - goto out; - /* FIXME, add redo link to tree so we don't leak on crash */ - ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size, - BTRFS_EXTENT_DATA_KEY); - btrfs_update_inode(trans, root, inode); + ret = btrfs_update_inode(trans, root, inode); + BUG_ON(ret); + + nr = trans->blocks_used; + btrfs_end_transaction(trans, root); + btrfs_btree_balance_dirty(root, nr); + + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + } + + if (ret == 0 && inode->i_nlink > 0) { + ret = btrfs_orphan_del(trans, inode); + BUG_ON(ret); + } - ret = btrfs_orphan_del(trans, inode); + ret = btrfs_update_inode(trans, root, inode); BUG_ON(ret); -out: nr = trans->blocks_used; ret = btrfs_end_transaction_throttle(trans, root); BUG_ON(ret); @@ -4916,7 +5314,12 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) if (!ei) return NULL; ei->last_trans = 0; + ei->last_sub_trans = 0; ei->logged_trans = 0; + ei->outstanding_extents = 0; + ei->reserved_extents = 0; + ei->root = NULL; + spin_lock_init(&ei->accounting_lock); btrfs_ordered_inode_tree_init(&ei->ordered_tree); INIT_LIST_HEAD(&ei->i_orphan); INIT_LIST_HEAD(&ei->ordered_operations); @@ -4932,6 +5335,14 @@ void btrfs_destroy_inode(struct inode *inode) WARN_ON(inode->i_data.nrpages); /* + * This can happen where we create an inode, but somebody else also + * created the same inode and we need to destroy the one we already + * created. + */ + if (!root) + goto free; + + /* * Make sure we're properly removed from the ordered operation * lists. */ @@ -4944,9 +5355,9 @@ void btrfs_destroy_inode(struct inode *inode) spin_lock(&root->list_lock); if (!list_empty(&BTRFS_I(inode)->i_orphan)) { - printk(KERN_ERR "BTRFS: inode %lu: inode still on the orphan" - " list\n", inode->i_ino); - dump_stack(); + printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n", + inode->i_ino); + list_del_init(&BTRFS_I(inode)->i_orphan); } spin_unlock(&root->list_lock); @@ -4966,6 +5377,7 @@ void btrfs_destroy_inode(struct inode *inode) } inode_tree_del(inode); btrfs_drop_extent_cache(inode, 0, (u64)-1, 0); +free: kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); } @@ -5070,7 +5482,15 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) return -ENOTEMPTY; - ret = btrfs_check_metadata_free_space(root); + /* + * We want to reserve the absolute worst case amount of items. So if + * both inodes are subvols and we need to unlink them then that would + * require 4 item modifications, but if they are both normal inodes it + * would require 5 item modifications, so we'll assume their normal + * inodes. So 5 * 2 is 10, plus 1 for the new link, so 11 total items + * should cover the worst case number of items we'll modify. + */ + ret = btrfs_reserve_metadata_space(root, 11); if (ret) return ret; @@ -5185,6 +5605,8 @@ out_fail: if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) up_read(&root->fs_info->subvol_sem); + + btrfs_unreserve_metadata_space(root, 11); return ret; } @@ -5192,7 +5614,7 @@ out_fail: * some fairly slow code that needs optimization. This walks the list * of all the inodes with pending delalloc and forces them to disk. */ -int btrfs_start_delalloc_inodes(struct btrfs_root *root) +int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) { struct list_head *head = &root->fs_info->delalloc_inodes; struct btrfs_inode *binode; @@ -5211,7 +5633,10 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root) spin_unlock(&root->fs_info->delalloc_lock); if (inode) { filemap_flush(inode->i_mapping); - iput(inode); + if (delay_iput) + btrfs_add_delayed_iput(inode); + else + iput(inode); } cond_resched(); spin_lock(&root->fs_info->delalloc_lock); @@ -5256,11 +5681,18 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) return -ENAMETOOLONG; - err = btrfs_check_metadata_free_space(root); + /* + * 2 items for inode item and ref + * 2 items for dir items + * 1 item for xattr if selinux is on + */ + err = btrfs_reserve_metadata_space(root, 5); if (err) - goto out_fail; + return err; trans = btrfs_start_transaction(root, 1); + if (!trans) + goto out_fail; btrfs_set_trans_block_group(trans, dir); err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); @@ -5278,7 +5710,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, if (IS_ERR(inode)) goto out_unlock; - err = btrfs_init_inode_security(inode, dir); + err = btrfs_init_inode_security(trans, inode, dir); if (err) { drop_inode = 1; goto out_unlock; @@ -5341,6 +5773,7 @@ out_unlock: nr = trans->blocks_used; btrfs_end_transaction_throttle(trans, root); out_fail: + btrfs_unreserve_metadata_space(root, 5); if (drop_inode) { inode_dec_link_count(inode); iput(inode); @@ -5349,56 +5782,83 @@ out_fail: return err; } -static int prealloc_file_range(struct btrfs_trans_handle *trans, - struct inode *inode, u64 start, u64 end, - u64 locked_end, u64 alloc_hint, int mode) +static int prealloc_file_range(struct inode *inode, u64 start, u64 end, + u64 alloc_hint, int mode, loff_t actual_len) { + struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_key ins; u64 alloc_size; u64 cur_offset = start; u64 num_bytes = end - start; int ret = 0; + u64 i_size; while (num_bytes > 0) { alloc_size = min(num_bytes, root->fs_info->max_extent); + + trans = btrfs_start_transaction(root, 1); + ret = btrfs_reserve_extent(trans, root, alloc_size, root->sectorsize, 0, alloc_hint, (u64)-1, &ins, 1); if (ret) { WARN_ON(1); - goto out; + goto stop_trans; } + + ret = btrfs_reserve_metadata_space(root, 3); + if (ret) { + btrfs_free_reserved_extent(root, ins.objectid, + ins.offset); + goto stop_trans; + } + ret = insert_reserved_file_extent(trans, inode, cur_offset, ins.objectid, ins.offset, ins.offset, - ins.offset, locked_end, - 0, 0, 0, + ins.offset, 0, 0, 0, BTRFS_FILE_EXTENT_PREALLOC); BUG_ON(ret); btrfs_drop_extent_cache(inode, cur_offset, cur_offset + ins.offset -1, 0); + num_bytes -= ins.offset; cur_offset += ins.offset; alloc_hint = ins.objectid + ins.offset; - } -out: - if (cur_offset > start) { + inode->i_ctime = CURRENT_TIME; BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; if (!(mode & FALLOC_FL_KEEP_SIZE) && - cur_offset > i_size_read(inode)) - btrfs_i_size_write(inode, cur_offset); + (actual_len > inode->i_size) && + (cur_offset > inode->i_size)) { + + if (cur_offset > actual_len) + i_size = actual_len; + else + i_size = cur_offset; + i_size_write(inode, i_size); + btrfs_ordered_update_i_size(inode, i_size, NULL); + } + ret = btrfs_update_inode(trans, root, inode); BUG_ON(ret); + + btrfs_end_transaction(trans, root); + btrfs_unreserve_metadata_space(root, 3); } + return ret; +stop_trans: + btrfs_end_transaction(trans, root); return ret; + } static long btrfs_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) { + struct extent_state *cached_state = NULL; u64 cur_offset; u64 last_byte; u64 alloc_start; @@ -5407,8 +5867,6 @@ static long btrfs_fallocate(struct inode *inode, int mode, u64 locked_end; u64 mask = BTRFS_I(inode)->root->sectorsize - 1; struct extent_map *em; - struct btrfs_trans_handle *trans; - struct btrfs_root *root; int ret; alloc_start = offset & ~mask; @@ -5427,9 +5885,7 @@ static long btrfs_fallocate(struct inode *inode, int mode, goto out; } - root = BTRFS_I(inode)->root; - - ret = btrfs_check_data_free_space(root, inode, + ret = btrfs_check_data_free_space(BTRFS_I(inode)->root, inode, alloc_end - alloc_start); if (ret) goto out; @@ -5438,27 +5894,20 @@ static long btrfs_fallocate(struct inode *inode, int mode, while (1) { struct btrfs_ordered_extent *ordered; - trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1); - if (!trans) { - ret = -EIO; - goto out_free; - } - /* the extent lock is ordered inside the running * transaction */ - lock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, - GFP_NOFS); + lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start, + locked_end, 0, &cached_state, GFP_NOFS); ordered = btrfs_lookup_first_ordered_extent(inode, alloc_end - 1); if (ordered && ordered->file_offset + ordered->len > alloc_start && ordered->file_offset < alloc_end) { btrfs_put_ordered_extent(ordered); - unlock_extent(&BTRFS_I(inode)->io_tree, - alloc_start, locked_end, GFP_NOFS); - btrfs_end_transaction(trans, BTRFS_I(inode)->root); - + unlock_extent_cached(&BTRFS_I(inode)->io_tree, + alloc_start, locked_end, + &cached_state, GFP_NOFS); /* * we can't wait on the range with the transaction * running or with the extent lock held @@ -5479,10 +5928,12 @@ static long btrfs_fallocate(struct inode *inode, int mode, BUG_ON(IS_ERR(em) || !em); last_byte = min(extent_map_end(em), alloc_end); last_byte = (last_byte + mask) & ~mask; - if (em->block_start == EXTENT_MAP_HOLE) { - ret = prealloc_file_range(trans, inode, cur_offset, - last_byte, locked_end + 1, - alloc_hint, mode); + if (em->block_start == EXTENT_MAP_HOLE || + (cur_offset >= inode->i_size && + !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { + ret = prealloc_file_range(inode, + cur_offset, last_byte, + alloc_hint, mode, offset+len); if (ret < 0) { free_extent_map(em); break; @@ -5498,12 +5949,11 @@ static long btrfs_fallocate(struct inode *inode, int mode, break; } } - unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, - GFP_NOFS); + unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, + &cached_state, GFP_NOFS); - btrfs_end_transaction(trans, BTRFS_I(inode)->root); -out_free: - btrfs_free_reserved_data_space(root, inode, alloc_end - alloc_start); + btrfs_free_reserved_data_space(BTRFS_I(inode)->root, inode, + alloc_end - alloc_start); out: mutex_unlock(&inode->i_mutex); return ret; @@ -5544,7 +5994,7 @@ static const struct inode_operations btrfs_dir_ro_inode_operations = { .permission = btrfs_permission, }; -static struct file_operations btrfs_dir_file_operations = { +static const struct file_operations btrfs_dir_file_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, .readdir = btrfs_real_readdir, @@ -5566,6 +6016,8 @@ static struct extent_io_ops btrfs_extent_io_ops = { .readpage_io_failed_hook = btrfs_io_failed_hook, .set_bit_hook = btrfs_set_bit_hook, .clear_bit_hook = btrfs_clear_bit_hook, + .merge_extent_hook = btrfs_merge_extent_hook, + .split_extent_hook = btrfs_split_extent_hook, }; /* @@ -5632,6 +6084,6 @@ static const struct inode_operations btrfs_symlink_inode_operations = { .removexattr = btrfs_removexattr, }; -struct dentry_operations btrfs_dentry_operations = { +const struct dentry_operations btrfs_dentry_operations = { .d_delete = btrfs_dentry_delete, }; |