summaryrefslogtreecommitdiffstats
path: root/fs/btrfs/file.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/file.c')
-rw-r--r--fs/btrfs/file.c208
1 files changed, 150 insertions, 58 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index ae6af072b635..1f2b99cb55ea 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -40,6 +40,7 @@
#include "tree-log.h"
#include "locking.h"
#include "volumes.h"
+#include "qgroup.h"
static struct kmem_cache *btrfs_inode_defrag_cachep;
/*
@@ -447,7 +448,7 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
write_bytes -= copied;
total_copied += copied;
- /* Return to btrfs_file_aio_write to fault page */
+ /* Return to btrfs_file_write_iter to fault page */
if (unlikely(copied == 0))
break;
@@ -470,11 +471,12 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages)
for (i = 0; i < num_pages; i++) {
/* page checked is some magic around finding pages that
* have been modified without going through btrfs_set_page_dirty
- * clear it here
+ * clear it here. There should be no need to mark the pages
+ * accessed as prepare_pages should have marked them accessed
+ * in prepare_pages via find_or_create_page()
*/
ClearPageChecked(pages[i]);
unlock_page(pages[i]);
- mark_page_accessed(pages[i]);
page_cache_release(pages[i]);
}
}
@@ -714,7 +716,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
int recow;
int ret;
int modify_tree = -1;
- int update_refs = (root->ref_cows || root == root->fs_info->tree_root);
+ int update_refs;
int found = 0;
int leafs_visited = 0;
@@ -724,6 +726,8 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
if (start >= BTRFS_I(inode)->disk_i_size && !replace_extent)
modify_tree = 0;
+ update_refs = (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
+ root == root->fs_info->tree_root);
while (1) {
recow = 0;
ret = btrfs_lookup_file_extent(trans, root, path, ino,
@@ -780,6 +784,18 @@ next_slot:
extent_end = search_start;
}
+ /*
+ * Don't skip extent items representing 0 byte lengths. They
+ * used to be created (bug) if while punching holes we hit
+ * -ENOSPC condition. So if we find one here, just ensure we
+ * delete it, otherwise we would insert a new file extent item
+ * with the same key (offset) as that 0 bytes length file
+ * extent item in the call to setup_items_for_insert() later
+ * in this function.
+ */
+ if (extent_end == key.offset && extent_end >= search_start)
+ goto delete_extent_item;
+
if (extent_end <= search_start) {
path->slots[0]++;
goto next_slot;
@@ -835,7 +851,7 @@ next_slot:
disk_bytenr, num_bytes, 0,
root->root_key.objectid,
new_key.objectid,
- start - extent_offset, 0);
+ start - extent_offset, 1);
BUG_ON(ret); /* -ENOMEM */
}
key.offset = start;
@@ -893,6 +909,7 @@ next_slot:
* | ------ extent ------ |
*/
if (start <= key.offset && end >= extent_end) {
+delete_extent_item:
if (del_nr == 0) {
del_slot = path->slots[0];
del_nr = 1;
@@ -1191,7 +1208,7 @@ again:
ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
root->root_key.objectid,
- ino, orig_offset, 0);
+ ino, orig_offset, 1);
BUG_ON(ret); /* -ENOMEM */
if (split == start) {
@@ -1658,27 +1675,22 @@ again:
}
static ssize_t __btrfs_direct_write(struct kiocb *iocb,
- const struct iovec *iov,
- unsigned long nr_segs, loff_t pos,
- size_t count, size_t ocount)
+ struct iov_iter *from,
+ loff_t pos)
{
struct file *file = iocb->ki_filp;
- struct iov_iter i;
ssize_t written;
ssize_t written_buffered;
loff_t endbyte;
int err;
- written = generic_file_direct_write(iocb, iov, &nr_segs, pos,
- count, ocount);
+ written = generic_file_direct_write(iocb, from, pos);
- if (written < 0 || written == count)
+ if (written < 0 || !iov_iter_count(from))
return written;
pos += written;
- count -= written;
- iov_iter_init(&i, iov, nr_segs, count, written);
- written_buffered = __btrfs_buffered_write(file, &i, pos);
+ written_buffered = __btrfs_buffered_write(file, from, pos);
if (written_buffered < 0) {
err = written_buffered;
goto out;
@@ -1713,9 +1725,8 @@ static void update_time_for_write(struct inode *inode)
inode_inc_iversion(inode);
}
-static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
- const struct iovec *iov,
- unsigned long nr_segs, loff_t pos)
+static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
+ struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
@@ -1724,18 +1735,12 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
u64 end_pos;
ssize_t num_written = 0;
ssize_t err = 0;
- size_t count, ocount;
+ size_t count = iov_iter_count(from);
bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host);
+ loff_t pos = iocb->ki_pos;
mutex_lock(&inode->i_mutex);
- err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
- if (err) {
- mutex_unlock(&inode->i_mutex);
- goto out;
- }
- count = ocount;
-
current->backing_dev_info = inode->i_mapping->backing_dev_info;
err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
if (err) {
@@ -1748,6 +1753,8 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
goto out;
}
+ iov_iter_truncate(from, count);
+
err = file_remove_suid(file);
if (err) {
mutex_unlock(&inode->i_mutex);
@@ -1789,14 +1796,9 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
atomic_inc(&BTRFS_I(inode)->sync_writers);
if (unlikely(file->f_flags & O_DIRECT)) {
- num_written = __btrfs_direct_write(iocb, iov, nr_segs,
- pos, count, ocount);
+ num_written = __btrfs_direct_write(iocb, from, pos);
} else {
- struct iov_iter i;
-
- iov_iter_init(&i, iov, nr_segs, count, num_written);
-
- num_written = __btrfs_buffered_write(file, &i, pos);
+ num_written = __btrfs_buffered_write(file, from, pos);
if (num_written > 0)
iocb->ki_pos = pos + num_written;
}
@@ -2009,8 +2011,10 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
if (!full_sync) {
ret = btrfs_wait_ordered_range(inode, start,
end - start + 1);
- if (ret)
+ if (ret) {
+ btrfs_end_transaction(trans, root);
goto out;
+ }
}
ret = btrfs_commit_transaction(trans, root);
} else {
@@ -2168,6 +2172,37 @@ out:
return 0;
}
+/*
+ * Find a hole extent on given inode and change start/len to the end of hole
+ * extent.(hole/vacuum extent whose em->start <= start &&
+ * em->start + em->len > start)
+ * When a hole extent is found, return 1 and modify start/len.
+ */
+static int find_first_non_hole(struct inode *inode, u64 *start, u64 *len)
+{
+ struct extent_map *em;
+ int ret = 0;
+
+ em = btrfs_get_extent(inode, NULL, 0, *start, *len, 0);
+ if (IS_ERR_OR_NULL(em)) {
+ if (!em)
+ ret = -ENOMEM;
+ else
+ ret = PTR_ERR(em);
+ return ret;
+ }
+
+ /* Hole or vacuum extent(only exists in no-hole mode) */
+ if (em->block_start == EXTENT_MAP_HOLE) {
+ ret = 1;
+ *len = em->start + em->len > *start + *len ?
+ 0 : *start + *len - em->start - em->len;
+ *start = em->start + em->len;
+ }
+ free_extent_map(em);
+ return ret;
+}
+
static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -2175,25 +2210,42 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
struct btrfs_path *path;
struct btrfs_block_rsv *rsv;
struct btrfs_trans_handle *trans;
- u64 lockstart = round_up(offset, BTRFS_I(inode)->root->sectorsize);
- u64 lockend = round_down(offset + len,
- BTRFS_I(inode)->root->sectorsize) - 1;
- u64 cur_offset = lockstart;
+ u64 lockstart;
+ u64 lockend;
+ u64 tail_start;
+ u64 tail_len;
+ u64 orig_start = offset;
+ u64 cur_offset;
u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
u64 drop_end;
int ret = 0;
int err = 0;
int rsv_count;
- bool same_page = ((offset >> PAGE_CACHE_SHIFT) ==
- ((offset + len - 1) >> PAGE_CACHE_SHIFT));
+ bool same_page;
bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
- u64 ino_size = round_up(inode->i_size, PAGE_CACHE_SIZE);
+ u64 ino_size;
ret = btrfs_wait_ordered_range(inode, offset, len);
if (ret)
return ret;
mutex_lock(&inode->i_mutex);
+ ino_size = round_up(inode->i_size, PAGE_CACHE_SIZE);
+ ret = find_first_non_hole(inode, &offset, &len);
+ if (ret < 0)
+ goto out_only_mutex;
+ if (ret && !len) {
+ /* Already in a large hole */
+ ret = 0;
+ goto out_only_mutex;
+ }
+
+ lockstart = round_up(offset , BTRFS_I(inode)->root->sectorsize);
+ lockend = round_down(offset + len,
+ BTRFS_I(inode)->root->sectorsize) - 1;
+ same_page = ((offset >> PAGE_CACHE_SHIFT) ==
+ ((offset + len - 1) >> PAGE_CACHE_SHIFT));
+
/*
* We needn't truncate any page which is beyond the end of the file
* because we are sure there is no data there.
@@ -2205,8 +2257,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
if (same_page && len < PAGE_CACHE_SIZE) {
if (offset < ino_size)
ret = btrfs_truncate_page(inode, offset, len, 0);
- mutex_unlock(&inode->i_mutex);
- return ret;
+ goto out_only_mutex;
}
/* zero back part of the first page */
@@ -2218,12 +2269,39 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
}
}
- /* zero the front end of the last page */
- if (offset + len < ino_size) {
- ret = btrfs_truncate_page(inode, offset + len, 0, 1);
- if (ret) {
- mutex_unlock(&inode->i_mutex);
- return ret;
+ /* Check the aligned pages after the first unaligned page,
+ * if offset != orig_start, which means the first unaligned page
+ * including serveral following pages are already in holes,
+ * the extra check can be skipped */
+ if (offset == orig_start) {
+ /* after truncate page, check hole again */
+ len = offset + len - lockstart;
+ offset = lockstart;
+ ret = find_first_non_hole(inode, &offset, &len);
+ if (ret < 0)
+ goto out_only_mutex;
+ if (ret && !len) {
+ ret = 0;
+ goto out_only_mutex;
+ }
+ lockstart = offset;
+ }
+
+ /* Check the tail unaligned part is in a hole */
+ tail_start = lockend + 1;
+ tail_len = offset + len - tail_start;
+ if (tail_len) {
+ ret = find_first_non_hole(inode, &tail_start, &tail_len);
+ if (unlikely(ret < 0))
+ goto out_only_mutex;
+ if (!ret) {
+ /* zero the front end of the last page */
+ if (tail_start + tail_len < ino_size) {
+ ret = btrfs_truncate_page(inode,
+ tail_start + tail_len, 0, 1);
+ if (ret)
+ goto out_only_mutex;
+ }
}
}
@@ -2249,9 +2327,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
if ((!ordered ||
(ordered->file_offset + ordered->len <= lockstart ||
ordered->file_offset > lockend)) &&
- !test_range_bit(&BTRFS_I(inode)->io_tree, lockstart,
- lockend, EXTENT_UPTODATE, 0,
- cached_state)) {
+ !btrfs_page_exists_in_range(inode, lockstart, lockend)) {
if (ordered)
btrfs_put_ordered_extent(ordered);
break;
@@ -2299,6 +2375,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
BUG_ON(ret);
trans->block_rsv = rsv;
+ cur_offset = lockstart;
+ len = lockend - cur_offset;
while (cur_offset < lockend) {
ret = __btrfs_drop_extents(trans, root, inode, path,
cur_offset, lockend + 1,
@@ -2339,6 +2417,14 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
rsv, min_size);
BUG_ON(ret); /* shouldn't happen */
trans->block_rsv = rsv;
+
+ ret = find_first_non_hole(inode, &cur_offset, &len);
+ if (unlikely(ret < 0))
+ break;
+ if (ret && !len) {
+ ret = 0;
+ break;
+ }
}
if (ret) {
@@ -2347,7 +2433,12 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
}
trans->block_rsv = &root->fs_info->trans_block_rsv;
- if (cur_offset < ino_size) {
+ /*
+ * Don't insert file hole extent item if it's for a range beyond eof
+ * (because it's useless) or if it represents a 0 bytes range (when
+ * cur_offset == drop_end).
+ */
+ if (cur_offset < ino_size && cur_offset < drop_end) {
ret = fill_holes(trans, inode, path, cur_offset, drop_end);
if (ret) {
err = ret;
@@ -2372,6 +2463,7 @@ out_free:
out:
unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
&cached_state, GFP_NOFS);
+out_only_mutex:
mutex_unlock(&inode->i_mutex);
if (ret && !err)
err = ret;
@@ -2633,11 +2725,11 @@ out:
const struct file_operations btrfs_file_operations = {
.llseek = btrfs_file_llseek,
- .read = do_sync_read,
- .write = do_sync_write,
- .aio_read = generic_file_aio_read,
+ .read = new_sync_read,
+ .write = new_sync_write,
+ .read_iter = generic_file_read_iter,
.splice_read = generic_file_splice_read,
- .aio_write = btrfs_file_aio_write,
+ .write_iter = btrfs_file_write_iter,
.mmap = btrfs_file_mmap,
.open = generic_file_open,
.release = btrfs_release_file,