summaryrefslogtreecommitdiffstats
path: root/fs/ext4/extents.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/extents.c')
-rw-r--r--fs/ext4/extents.c810
1 files changed, 695 insertions, 115 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 7a3832577923..94c8ee81f5e1 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -195,8 +195,7 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
if (S_ISREG(inode->i_mode))
block_group++;
}
- bg_start = (block_group * EXT4_BLOCKS_PER_GROUP(inode->i_sb)) +
- le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_first_data_block);
+ bg_start = ext4_group_first_block_no(inode->i_sb, block_group);
last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1;
/*
@@ -296,29 +295,44 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
* to allocate @blocks
* Worse case is one block per extent
*/
-int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks)
+int ext4_ext_calc_metadata_amount(struct inode *inode, sector_t lblock)
{
- int lcap, icap, rcap, leafs, idxs, num;
- int newextents = blocks;
-
- rcap = ext4_ext_space_root_idx(inode, 0);
- lcap = ext4_ext_space_block(inode, 0);
- icap = ext4_ext_space_block_idx(inode, 0);
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ int idxs, num = 0;
- /* number of new leaf blocks needed */
- num = leafs = (newextents + lcap - 1) / lcap;
+ idxs = ((inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
+ / sizeof(struct ext4_extent_idx));
/*
- * Worse case, we need separate index block(s)
- * to link all new leaf blocks
+ * If the new delayed allocation block is contiguous with the
+ * previous da block, it can share index blocks with the
+ * previous block, so we only need to allocate a new index
+ * block every idxs leaf blocks. At ldxs**2 blocks, we need
+ * an additional index block, and at ldxs**3 blocks, yet
+ * another index blocks.
*/
- idxs = (leafs + icap - 1) / icap;
- do {
- num += idxs;
- idxs = (idxs + icap - 1) / icap;
- } while (idxs > rcap);
+ if (ei->i_da_metadata_calc_len &&
+ ei->i_da_metadata_calc_last_lblock+1 == lblock) {
+ if ((ei->i_da_metadata_calc_len % idxs) == 0)
+ num++;
+ if ((ei->i_da_metadata_calc_len % (idxs*idxs)) == 0)
+ num++;
+ if ((ei->i_da_metadata_calc_len % (idxs*idxs*idxs)) == 0) {
+ num++;
+ ei->i_da_metadata_calc_len = 0;
+ } else
+ ei->i_da_metadata_calc_len++;
+ ei->i_da_metadata_calc_last_lblock++;
+ return num;
+ }
- return num;
+ /*
+ * In the worst case we need a new set of index blocks at
+ * every level of the inode's extent tree.
+ */
+ ei->i_da_metadata_calc_len = 1;
+ ei->i_da_metadata_calc_last_lblock = lblock;
+ return ext_depth(inode) + 1;
}
static int
@@ -425,7 +439,7 @@ static int __ext4_ext_check(const char *function, struct inode *inode,
return 0;
corrupted:
- ext4_error(inode->i_sb, function,
+ __ext4_error(inode->i_sb, function,
"bad header/extent in inode #%lu: %s - magic %x, "
"entries %u, max %u(%u), depth %u(%u)",
inode->i_ino, error_msg, le16_to_cpu(eh->eh_magic),
@@ -688,7 +702,12 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
}
eh = ext_block_hdr(bh);
ppos++;
- BUG_ON(ppos > depth);
+ if (unlikely(ppos > depth)) {
+ put_bh(bh);
+ EXT4_ERROR_INODE(inode,
+ "ppos %d > depth %d", ppos, depth);
+ goto err;
+ }
path[ppos].p_bh = bh;
path[ppos].p_hdr = eh;
i--;
@@ -723,7 +742,7 @@ err:
* insert new index [@logical;@ptr] into the block at @curp;
* check where to insert: before @curp or after @curp
*/
-static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
+int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
struct ext4_ext_path *curp,
int logical, ext4_fsblk_t ptr)
{
@@ -734,7 +753,12 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
if (err)
return err;
- BUG_ON(logical == le32_to_cpu(curp->p_idx->ei_block));
+ if (unlikely(logical == le32_to_cpu(curp->p_idx->ei_block))) {
+ EXT4_ERROR_INODE(inode,
+ "logical %d == ei_block %d!",
+ logical, le32_to_cpu(curp->p_idx->ei_block));
+ return -EIO;
+ }
len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx;
if (logical > le32_to_cpu(curp->p_idx->ei_block)) {
/* insert after */
@@ -764,9 +788,17 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
ext4_idx_store_pblock(ix, ptr);
le16_add_cpu(&curp->p_hdr->eh_entries, 1);
- BUG_ON(le16_to_cpu(curp->p_hdr->eh_entries)
- > le16_to_cpu(curp->p_hdr->eh_max));
- BUG_ON(ix > EXT_LAST_INDEX(curp->p_hdr));
+ if (unlikely(le16_to_cpu(curp->p_hdr->eh_entries)
+ > le16_to_cpu(curp->p_hdr->eh_max))) {
+ EXT4_ERROR_INODE(inode,
+ "logical %d == ei_block %d!",
+ logical, le32_to_cpu(curp->p_idx->ei_block));
+ return -EIO;
+ }
+ if (unlikely(ix > EXT_LAST_INDEX(curp->p_hdr))) {
+ EXT4_ERROR_INODE(inode, "ix > EXT_LAST_INDEX!");
+ return -EIO;
+ }
err = ext4_ext_dirty(handle, inode, curp);
ext4_std_error(inode->i_sb, err);
@@ -804,7 +836,10 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
/* if current leaf will be split, then we should use
* border from split point */
- BUG_ON(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr));
+ if (unlikely(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr))) {
+ EXT4_ERROR_INODE(inode, "p_ext > EXT_MAX_EXTENT!");
+ return -EIO;
+ }
if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) {
border = path[depth].p_ext[1].ee_block;
ext_debug("leaf will be split."
@@ -845,7 +880,11 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
/* initialize new leaf */
newblock = ablocks[--a];
- BUG_ON(newblock == 0);
+ if (unlikely(newblock == 0)) {
+ EXT4_ERROR_INODE(inode, "newblock == 0!");
+ err = -EIO;
+ goto cleanup;
+ }
bh = sb_getblk(inode->i_sb, newblock);
if (!bh) {
err = -EIO;
@@ -865,7 +904,14 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
ex = EXT_FIRST_EXTENT(neh);
/* move remainder of path[depth] to the new leaf */
- BUG_ON(path[depth].p_hdr->eh_entries != path[depth].p_hdr->eh_max);
+ if (unlikely(path[depth].p_hdr->eh_entries !=
+ path[depth].p_hdr->eh_max)) {
+ EXT4_ERROR_INODE(inode, "eh_entries %d != eh_max %d!",
+ path[depth].p_hdr->eh_entries,
+ path[depth].p_hdr->eh_max);
+ err = -EIO;
+ goto cleanup;
+ }
/* start copy from next extent */
/* TODO: we could do it by single memmove */
m = 0;
@@ -912,7 +958,11 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
/* create intermediate indexes */
k = depth - at - 1;
- BUG_ON(k < 0);
+ if (unlikely(k < 0)) {
+ EXT4_ERROR_INODE(inode, "k %d < 0!", k);
+ err = -EIO;
+ goto cleanup;
+ }
if (k)
ext_debug("create %d intermediate indices\n", k);
/* insert new index into current index block */
@@ -949,8 +999,14 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
ext_debug("cur 0x%p, last 0x%p\n", path[i].p_idx,
EXT_MAX_INDEX(path[i].p_hdr));
- BUG_ON(EXT_MAX_INDEX(path[i].p_hdr) !=
- EXT_LAST_INDEX(path[i].p_hdr));
+ if (unlikely(EXT_MAX_INDEX(path[i].p_hdr) !=
+ EXT_LAST_INDEX(path[i].p_hdr))) {
+ EXT4_ERROR_INODE(inode,
+ "EXT_MAX_INDEX != EXT_LAST_INDEX ee_block %d!",
+ le32_to_cpu(path[i].p_ext->ee_block));
+ err = -EIO;
+ goto cleanup;
+ }
while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) {
ext_debug("%d: move %d:%llu in new index %llu\n", i,
le32_to_cpu(path[i].p_idx->ei_block),
@@ -1007,7 +1063,8 @@ cleanup:
for (i = 0; i < depth; i++) {
if (!ablocks[i])
continue;
- ext4_free_blocks(handle, inode, ablocks[i], 1, 1);
+ ext4_free_blocks(handle, inode, 0, ablocks[i], 1,
+ EXT4_FREE_BLOCKS_METADATA);
}
}
kfree(ablocks);
@@ -1187,7 +1244,10 @@ ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path,
struct ext4_extent *ex;
int depth, ee_len;
- BUG_ON(path == NULL);
+ if (unlikely(path == NULL)) {
+ EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical);
+ return -EIO;
+ }
depth = path->p_depth;
*phys = 0;
@@ -1201,15 +1261,33 @@ ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path,
ex = path[depth].p_ext;
ee_len = ext4_ext_get_actual_len(ex);
if (*logical < le32_to_cpu(ex->ee_block)) {
- BUG_ON(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex);
+ if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) {
+ EXT4_ERROR_INODE(inode,
+ "EXT_FIRST_EXTENT != ex *logical %d ee_block %d!",
+ *logical, le32_to_cpu(ex->ee_block));
+ return -EIO;
+ }
while (--depth >= 0) {
ix = path[depth].p_idx;
- BUG_ON(ix != EXT_FIRST_INDEX(path[depth].p_hdr));
+ if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) {
+ EXT4_ERROR_INODE(inode,
+ "ix (%d) != EXT_FIRST_INDEX (%d) (depth %d)!",
+ ix != NULL ? ix->ei_block : 0,
+ EXT_FIRST_INDEX(path[depth].p_hdr) != NULL ?
+ EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block : 0,
+ depth);
+ return -EIO;
+ }
}
return 0;
}
- BUG_ON(*logical < (le32_to_cpu(ex->ee_block) + ee_len));
+ if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) {
+ EXT4_ERROR_INODE(inode,
+ "logical %d < ee_block %d + ee_len %d!",
+ *logical, le32_to_cpu(ex->ee_block), ee_len);
+ return -EIO;
+ }
*logical = le32_to_cpu(ex->ee_block) + ee_len - 1;
*phys = ext_pblock(ex) + ee_len - 1;
@@ -1235,7 +1313,10 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path,
int depth; /* Note, NOT eh_depth; depth from top of tree */
int ee_len;
- BUG_ON(path == NULL);
+ if (unlikely(path == NULL)) {
+ EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical);
+ return -EIO;
+ }
depth = path->p_depth;
*phys = 0;
@@ -1249,17 +1330,32 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path,
ex = path[depth].p_ext;
ee_len = ext4_ext_get_actual_len(ex);
if (*logical < le32_to_cpu(ex->ee_block)) {
- BUG_ON(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex);
+ if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) {
+ EXT4_ERROR_INODE(inode,
+ "first_extent(path[%d].p_hdr) != ex",
+ depth);
+ return -EIO;
+ }
while (--depth >= 0) {
ix = path[depth].p_idx;
- BUG_ON(ix != EXT_FIRST_INDEX(path[depth].p_hdr));
+ if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) {
+ EXT4_ERROR_INODE(inode,
+ "ix != EXT_FIRST_INDEX *logical %d!",
+ *logical);
+ return -EIO;
+ }
}
*logical = le32_to_cpu(ex->ee_block);
*phys = ext_pblock(ex);
return 0;
}
- BUG_ON(*logical < (le32_to_cpu(ex->ee_block) + ee_len));
+ if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) {
+ EXT4_ERROR_INODE(inode,
+ "logical %d < ee_block %d + ee_len %d!",
+ *logical, le32_to_cpu(ex->ee_block), ee_len);
+ return -EIO;
+ }
if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) {
/* next allocated block in this leaf */
@@ -1398,8 +1494,12 @@ static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
eh = path[depth].p_hdr;
ex = path[depth].p_ext;
- BUG_ON(ex == NULL);
- BUG_ON(eh == NULL);
+
+ if (unlikely(ex == NULL || eh == NULL)) {
+ EXT4_ERROR_INODE(inode,
+ "ex %p == NULL or eh %p == NULL", ex, eh);
+ return -EIO;
+ }
if (depth == 0) {
/* there is no tree at all */
@@ -1522,8 +1622,9 @@ int ext4_ext_try_to_merge(struct inode *inode,
merge_done = 1;
WARN_ON(eh->eh_entries == 0);
if (!eh->eh_entries)
- ext4_error(inode->i_sb, "ext4_ext_try_to_merge",
- "inode#%lu, eh->eh_entries = 0!", inode->i_ino);
+ ext4_error(inode->i_sb,
+ "inode#%lu, eh->eh_entries = 0!",
+ inode->i_ino);
}
return merge_done;
@@ -1586,7 +1687,7 @@ out:
*/
int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
struct ext4_ext_path *path,
- struct ext4_extent *newext)
+ struct ext4_extent *newext, int flag)
{
struct ext4_extent_header *eh;
struct ext4_extent *ex, *fex;
@@ -1596,13 +1697,20 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
ext4_lblk_t next;
unsigned uninitialized = 0;
- BUG_ON(ext4_ext_get_actual_len(newext) == 0);
+ if (unlikely(ext4_ext_get_actual_len(newext) == 0)) {
+ EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0");
+ return -EIO;
+ }
depth = ext_depth(inode);
ex = path[depth].p_ext;
- BUG_ON(path[depth].p_hdr == NULL);
+ if (unlikely(path[depth].p_hdr == NULL)) {
+ EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
+ return -EIO;
+ }
/* try to insert block into found extent and return */
- if (ex && ext4_can_extents_be_merged(inode, ex, newext)) {
+ if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO)
+ && ext4_can_extents_be_merged(inode, ex, newext)) {
ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n",
ext4_ext_is_uninitialized(newext),
ext4_ext_get_actual_len(newext),
@@ -1722,7 +1830,8 @@ has_space:
merge:
/* try to merge extents to the right */
- ext4_ext_try_to_merge(inode, path, nearex);
+ if (!(flag & EXT4_GET_BLOCKS_PRE_IO))
+ ext4_ext_try_to_merge(inode, path, nearex);
/* try to merge extents to the left */
@@ -1759,7 +1868,9 @@ int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
while (block < last && block != EXT_MAX_BLOCK) {
num = last - block;
/* find extent for this block */
+ down_read(&EXT4_I(inode)->i_data_sem);
path = ext4_ext_find_extent(inode, block, path);
+ up_read(&EXT4_I(inode)->i_data_sem);
if (IS_ERR(path)) {
err = PTR_ERR(path);
path = NULL;
@@ -1767,7 +1878,11 @@ int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
}
depth = ext_depth(inode);
- BUG_ON(path[depth].p_hdr == NULL);
+ if (unlikely(path[depth].p_hdr == NULL)) {
+ EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
+ err = -EIO;
+ break;
+ }
ex = path[depth].p_ext;
next = ext4_ext_next_allocated_block(path);
@@ -1818,7 +1933,11 @@ int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
cbex.ec_type = EXT4_EXT_CACHE_EXTENT;
}
- BUG_ON(cbex.ec_len == 0);
+ if (unlikely(cbex.ec_len == 0)) {
+ EXT4_ERROR_INODE(inode, "cbex.ec_len == 0");
+ err = -EIO;
+ break;
+ }
err = func(inode, path, &cbex, ex, cbdata);
ext4_ext_drop_refs(path);
@@ -1932,7 +2051,7 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
BUG_ON(cex->ec_type != EXT4_EXT_CACHE_GAP &&
cex->ec_type != EXT4_EXT_CACHE_EXTENT);
- if (block >= cex->ec_block && block < cex->ec_block + cex->ec_len) {
+ if (in_range(block, cex->ec_block, cex->ec_len)) {
ex->ee_block = cpu_to_le32(cex->ec_block);
ext4_ext_store_pblock(ex, cex->ec_start);
ex->ee_len = cpu_to_le16(cex->ec_len);
@@ -1955,14 +2074,16 @@ errout:
static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
struct ext4_ext_path *path)
{
- struct buffer_head *bh;
int err;
ext4_fsblk_t leaf;
/* free index block */
path--;
leaf = idx_pblock(path->p_idx);
- BUG_ON(path->p_hdr->eh_entries == 0);
+ if (unlikely(path->p_hdr->eh_entries == 0)) {
+ EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
+ return -EIO;
+ }
err = ext4_ext_get_access(handle, inode, path);
if (err)
return err;
@@ -1971,9 +2092,8 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
if (err)
return err;
ext_debug("index is empty, remove it, free block %llu\n", leaf);
- bh = sb_find_get_block(inode->i_sb, leaf);
- ext4_forget(handle, 1, inode, bh, leaf);
- ext4_free_blocks(handle, inode, leaf, 1, 1);
+ ext4_free_blocks(handle, inode, 0, leaf, 1,
+ EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
return err;
}
@@ -2040,12 +2160,11 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
struct ext4_extent *ex,
ext4_lblk_t from, ext4_lblk_t to)
{
- struct buffer_head *bh;
unsigned short ee_len = ext4_ext_get_actual_len(ex);
- int i, metadata = 0;
+ int flags = EXT4_FREE_BLOCKS_FORGET;
if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
- metadata = 1;
+ flags |= EXT4_FREE_BLOCKS_METADATA;
#ifdef EXTENTS_STATS
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@@ -2070,11 +2189,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
num = le32_to_cpu(ex->ee_block) + ee_len - from;
start = ext_pblock(ex) + ee_len - num;
ext_debug("free last %u blocks starting %llu\n", num, start);
- for (i = 0; i < num; i++) {
- bh = sb_find_get_block(inode->i_sb, start + i);
- ext4_forget(handle, 0, inode, bh, start + i);
- }
- ext4_free_blocks(handle, inode, start, num, metadata);
+ ext4_free_blocks(handle, inode, 0, start, num, flags);
} else if (from == le32_to_cpu(ex->ee_block)
&& to <= le32_to_cpu(ex->ee_block) + ee_len - 1) {
printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n",
@@ -2106,8 +2221,10 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
if (!path[depth].p_hdr)
path[depth].p_hdr = ext_block_hdr(path[depth].p_bh);
eh = path[depth].p_hdr;
- BUG_ON(eh == NULL);
-
+ if (unlikely(path[depth].p_hdr == NULL)) {
+ EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
+ return -EIO;
+ }
/* find where to start removing */
ex = EXT_LAST_EXTENT(eh);
@@ -2165,7 +2282,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
correct_index = 1;
credits += (ext_depth(inode)) + 1;
}
- credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
+ credits += EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
err = ext4_ext_truncate_extend_restart(handle, inode, credits);
if (err)
@@ -2378,6 +2495,7 @@ void ext4_ext_init(struct super_block *sb)
*/
if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) {
+#if defined(AGGRESSIVE_TEST) || defined(CHECK_BINSEARCH) || defined(EXTENTS_STATS)
printk(KERN_INFO "EXT4-fs: file extents enabled");
#ifdef AGGRESSIVE_TEST
printk(", aggressive tests");
@@ -2389,6 +2507,7 @@ void ext4_ext_init(struct super_block *sb)
printk(", stats");
#endif
printk("\n");
+#endif
#ifdef EXTENTS_STATS
spin_lock_init(&EXT4_SB(sb)->s_ext_stats_lock);
EXT4_SB(sb)->s_ext_min = 1 << 30;
@@ -2490,7 +2609,6 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
}
#define EXT4_EXT_ZERO_LEN 7
-
/*
* This function is called by ext4_ext_get_blocks() if someone tries to write
* to an uninitialized extent. It may result in splitting the uninitialized
@@ -2583,7 +2701,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
ex3->ee_block = cpu_to_le32(iblock);
ext4_ext_store_pblock(ex3, newblock);
ex3->ee_len = cpu_to_le16(allocated);
- err = ext4_ext_insert_extent(handle, inode, path, ex3);
+ err = ext4_ext_insert_extent(handle, inode, path,
+ ex3, 0);
if (err == -ENOSPC) {
err = ext4_ext_zeroout(inode, &orig_ex);
if (err)
@@ -2639,7 +2758,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
ext4_ext_store_pblock(ex3, newblock + max_blocks);
ex3->ee_len = cpu_to_le16(allocated - max_blocks);
ext4_ext_mark_uninitialized(ex3);
- err = ext4_ext_insert_extent(handle, inode, path, ex3);
+ err = ext4_ext_insert_extent(handle, inode, path, ex3, 0);
if (err == -ENOSPC) {
err = ext4_ext_zeroout(inode, &orig_ex);
if (err)
@@ -2757,7 +2876,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
err = ext4_ext_dirty(handle, inode, path + depth);
goto out;
insert:
- err = ext4_ext_insert_extent(handle, inode, path, &newex);
+ err = ext4_ext_insert_extent(handle, inode, path, &newex, 0);
if (err == -ENOSPC) {
err = ext4_ext_zeroout(inode, &orig_ex);
if (err)
@@ -2785,6 +2904,369 @@ fix_extent_len:
}
/*
+ * This function is called by ext4_ext_get_blocks() from
+ * ext4_get_blocks_dio_write() when DIO to write
+ * to an uninitialized extent.
+ *
+ * Writing to an uninitized extent may result in splitting the uninitialized
+ * extent into multiple /intialized unintialized extents (up to three)
+ * There are three possibilities:
+ * a> There is no split required: Entire extent should be uninitialized
+ * b> Splits in two extents: Write is happening at either end of the extent
+ * c> Splits in three extents: Somone is writing in middle of the extent
+ *
+ * One of more index blocks maybe needed if the extent tree grow after
+ * the unintialized extent split. To prevent ENOSPC occur at the IO
+ * complete, we need to split the uninitialized extent before DIO submit
+ * the IO. The uninitilized extent called at this time will be split
+ * into three uninitialized extent(at most). After IO complete, the part
+ * being filled will be convert to initialized by the end_io callback function
+ * via ext4_convert_unwritten_extents().
+ *
+ * Returns the size of uninitialized extent to be written on success.
+ */
+static int ext4_split_unwritten_extents(handle_t *handle,
+ struct inode *inode,
+ struct ext4_ext_path *path,
+ ext4_lblk_t iblock,
+ unsigned int max_blocks,
+ int flags)
+{
+ struct ext4_extent *ex, newex, orig_ex;
+ struct ext4_extent *ex1 = NULL;
+ struct ext4_extent *ex2 = NULL;
+ struct ext4_extent *ex3 = NULL;
+ struct ext4_extent_header *eh;
+ ext4_lblk_t ee_block;
+ unsigned int allocated, ee_len, depth;
+ ext4_fsblk_t newblock;
+ int err = 0;
+
+ ext_debug("ext4_split_unwritten_extents: inode %lu,"
+ "iblock %llu, max_blocks %u\n", inode->i_ino,
+ (unsigned long long)iblock, max_blocks);
+ depth = ext_depth(inode);
+ eh = path[depth].p_hdr;
+ ex = path[depth].p_ext;
+ ee_block = le32_to_cpu(ex->ee_block);
+ ee_len = ext4_ext_get_actual_len(ex);
+ allocated = ee_len - (iblock - ee_block);
+ newblock = iblock - ee_block + ext_pblock(ex);
+ ex2 = ex;
+ orig_ex.ee_block = ex->ee_block;
+ orig_ex.ee_len = cpu_to_le16(ee_len);
+ ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));
+
+ /*
+ * If the uninitialized extent begins at the same logical
+ * block where the write begins, and the write completely
+ * covers the extent, then we don't need to split it.
+ */
+ if ((iblock == ee_block) && (allocated <= max_blocks))
+ return allocated;
+
+ err = ext4_ext_get_access(handle, inode, path + depth);
+ if (err)
+ goto out;
+ /* ex1: ee_block to iblock - 1 : uninitialized */
+ if (iblock > ee_block) {
+ ex1 = ex;
+ ex1->ee_len = cpu_to_le16(iblock - ee_block);
+ ext4_ext_mark_uninitialized(ex1);
+ ex2 = &newex;
+ }
+ /*
+ * for sanity, update the length of the ex2 extent before
+ * we insert ex3, if ex1 is NULL. This is to avoid temporary
+ * overlap of blocks.
+ */
+ if (!ex1 && allocated > max_blocks)
+ ex2->ee_len = cpu_to_le16(max_blocks);
+ /* ex3: to ee_block + ee_len : uninitialised */
+ if (allocated > max_blocks) {
+ unsigned int newdepth;
+ ex3 = &newex;
+ ex3->ee_block = cpu_to_le32(iblock + max_blocks);
+ ext4_ext_store_pblock(ex3, newblock + max_blocks);
+ ex3->ee_len = cpu_to_le16(allocated - max_blocks);
+ ext4_ext_mark_uninitialized(ex3);
+ err = ext4_ext_insert_extent(handle, inode, path, ex3, flags);
+ if (err == -ENOSPC) {
+ err = ext4_ext_zeroout(inode, &orig_ex);
+ if (err)
+ goto fix_extent_len;
+ /* update the extent length and mark as initialized */
+ ex->ee_block = orig_ex.ee_block;
+ ex->ee_len = orig_ex.ee_len;
+ ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+ ext4_ext_dirty(handle, inode, path + depth);
+ /* zeroed the full extent */
+ /* blocks available from iblock */
+ return allocated;
+
+ } else if (err)
+ goto fix_extent_len;
+ /*
+ * The depth, and hence eh & ex might change
+ * as part of the insert above.
+ */
+ newdepth = ext_depth(inode);
+ /*
+ * update the extent length after successful insert of the
+ * split extent
+ */
+ orig_ex.ee_len = cpu_to_le16(ee_len -
+ ext4_ext_get_actual_len(ex3));
+ depth = newdepth;
+ ext4_ext_drop_refs(path);
+ path = ext4_ext_find_extent(inode, iblock, path);
+ if (IS_ERR(path)) {
+ err = PTR_ERR(path);
+ goto out;
+ }
+ eh = path[depth].p_hdr;
+ ex = path[depth].p_ext;
+ if (ex2 != &newex)
+ ex2 = ex;
+
+ err = ext4_ext_get_access(handle, inode, path + depth);
+ if (err)
+ goto out;
+
+ allocated = max_blocks;
+ }
+ /*
+ * If there was a change of depth as part of the
+ * insertion of ex3 above, we need to update the length
+ * of the ex1 extent again here
+ */
+ if (ex1 && ex1 != ex) {
+ ex1 = ex;
+ ex1->ee_len = cpu_to_le16(iblock - ee_block);
+ ext4_ext_mark_uninitialized(ex1);
+ ex2 = &newex;
+ }
+ /*
+ * ex2: iblock to iblock + maxblocks-1 : to be direct IO written,
+ * uninitialised still.
+ */
+ ex2->ee_block = cpu_to_le32(iblock);
+ ext4_ext_store_pblock(ex2, newblock);
+ ex2->ee_len = cpu_to_le16(allocated);
+ ext4_ext_mark_uninitialized(ex2);
+ if (ex2 != ex)
+ goto insert;
+ /* Mark modified extent as dirty */
+ err = ext4_ext_dirty(handle, inode, path + depth);
+ ext_debug("out here\n");
+ goto out;
+insert:
+ err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
+ if (err == -ENOSPC) {
+ err = ext4_ext_zeroout(inode, &orig_ex);
+ if (err)
+ goto fix_extent_len;
+ /* update the extent length and mark as initialized */
+ ex->ee_block = orig_ex.ee_block;
+ ex->ee_len = orig_ex.ee_len;
+ ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+ ext4_ext_dirty(handle, inode, path + depth);
+ /* zero out the first half */
+ return allocated;
+ } else if (err)
+ goto fix_extent_len;
+out:
+ ext4_ext_show_leaf(inode, path);
+ return err ? err : allocated;
+
+fix_extent_len:
+ ex->ee_block = orig_ex.ee_block;
+ ex->ee_len = orig_ex.ee_len;
+ ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+ ext4_ext_mark_uninitialized(ex);
+ ext4_ext_dirty(handle, inode, path + depth);
+ return err;
+}
+static int ext4_convert_unwritten_extents_endio(handle_t *handle,
+ struct inode *inode,
+ struct ext4_ext_path *path)
+{
+ struct ext4_extent *ex;
+ struct ext4_extent_header *eh;
+ int depth;
+ int err = 0;
+ int ret = 0;
+
+ depth = ext_depth(inode);
+ eh = path[depth].p_hdr;
+ ex = path[depth].p_ext;
+
+ err = ext4_ext_get_access(handle, inode, path + depth);
+ if (err)
+ goto out;
+ /* first mark the extent as initialized */
+ ext4_ext_mark_initialized(ex);
+
+ /*
+ * We have to see if it can be merged with the extent
+ * on the left.
+ */
+ if (ex > EXT_FIRST_EXTENT(eh)) {
+ /*
+ * To merge left, pass "ex - 1" to try_to_merge(),
+ * since it merges towards right _only_.
+ */
+ ret = ext4_ext_try_to_merge(inode, path, ex - 1);
+ if (ret) {
+ err = ext4_ext_correct_indexes(handle, inode, path);
+ if (err)
+ goto out;
+ depth = ext_depth(inode);
+ ex--;
+ }
+ }
+ /*
+ * Try to Merge towards right.
+ */
+ ret = ext4_ext_try_to_merge(inode, path, ex);
+ if (ret) {
+ err = ext4_ext_correct_indexes(handle, inode, path);
+ if (err)
+ goto out;
+ depth = ext_depth(inode);
+ }
+ /* Mark modified extent as dirty */
+ err = ext4_ext_dirty(handle, inode, path + depth);
+out:
+ ext4_ext_show_leaf(inode, path);
+ return err;
+}
+
+static void unmap_underlying_metadata_blocks(struct block_device *bdev,
+ sector_t block, int count)
+{
+ int i;
+ for (i = 0; i < count; i++)
+ unmap_underlying_metadata(bdev, block + i);
+}
+
+static int
+ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
+ ext4_lblk_t iblock, unsigned int max_blocks,
+ struct ext4_ext_path *path, int flags,
+ unsigned int allocated, struct buffer_head *bh_result,
+ ext4_fsblk_t newblock)
+{
+ int ret = 0;
+ int err = 0;
+ ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
+
+ ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical"
+ "block %llu, max_blocks %u, flags %d, allocated %u",
+ inode->i_ino, (unsigned long long)iblock, max_blocks,
+ flags, allocated);
+ ext4_ext_show_leaf(inode, path);
+
+ /* get_block() before submit the IO, split the extent */
+ if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
+ ret = ext4_split_unwritten_extents(handle,
+ inode, path, iblock,
+ max_blocks, flags);
+ /*
+ * Flag the inode(non aio case) or end_io struct (aio case)
+ * that this IO needs to convertion to written when IO is
+ * completed
+ */
+ if (io)
+ io->flag = EXT4_IO_UNWRITTEN;
+ else
+ ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
+ if (ext4_should_dioread_nolock(inode))
+ set_buffer_uninit(bh_result);
+ goto out;
+ }
+ /* IO end_io complete, convert the filled extent to written */
+ if ((flags & EXT4_GET_BLOCKS_CONVERT)) {
+ ret = ext4_convert_unwritten_extents_endio(handle, inode,
+ path);
+ if (ret >= 0)
+ ext4_update_inode_fsync_trans(handle, inode, 1);
+ goto out2;
+ }
+ /* buffered IO case */
+ /*
+ * repeat fallocate creation request
+ * we already have an unwritten extent
+ */
+ if (flags & EXT4_GET_BLOCKS_UNINIT_EXT)
+ goto map_out;
+
+ /* buffered READ or buffered write_begin() lookup */
+ if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
+ /*
+ * We have blocks reserved already. We
+ * return allocated blocks so that delalloc
+ * won't do block reservation for us. But
+ * the buffer head will be unmapped so that
+ * a read from the block returns 0s.
+ */
+ set_buffer_unwritten(bh_result);
+ goto out1;
+ }
+
+ /* buffered write, writepage time, convert*/
+ ret = ext4_ext_convert_to_initialized(handle, inode,
+ path, iblock,
+ max_blocks);
+ if (ret >= 0)
+ ext4_update_inode_fsync_trans(handle, inode, 1);
+out:
+ if (ret <= 0) {
+ err = ret;
+ goto out2;
+ } else
+ allocated = ret;
+ set_buffer_new(bh_result);
+ /*
+ * if we allocated more blocks than requested
+ * we need to make sure we unmap the extra block
+ * allocated. The actual needed block will get
+ * unmapped later when we find the buffer_head marked
+ * new.
+ */
+ if (allocated > max_blocks) {
+ unmap_underlying_metadata_blocks(inode->i_sb->s_bdev,
+ newblock + max_blocks,
+ allocated - max_blocks);
+ allocated = max_blocks;
+ }
+
+ /*
+ * If we have done fallocate with the offset that is already
+ * delayed allocated, we would have block reservation
+ * and quota reservation done in the delayed write path.
+ * But fallocate would have already updated quota and block
+ * count for this offset. So cancel these reservation
+ */
+ if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
+ ext4_da_update_reserve_space(inode, allocated, 0);
+
+map_out:
+ set_buffer_mapped(bh_result);
+out1:
+ if (allocated > max_blocks)
+ allocated = max_blocks;
+ ext4_ext_show_leaf(inode, path);
+ bh_result->b_bdev = inode->i_sb->s_bdev;
+ bh_result->b_blocknr = newblock;
+out2:
+ if (path) {
+ ext4_ext_drop_refs(path);
+ kfree(path);
+ }
+ return err ? err : allocated;
+}
+/*
* Block allocation/map/preallocation routine for extents based files
*
*
@@ -2809,11 +3291,12 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
{
struct ext4_ext_path *path = NULL;
struct ext4_extent_header *eh;
- struct ext4_extent newex, *ex;
+ struct ext4_extent newex, *ex, *last_ex;
ext4_fsblk_t newblock;
int err = 0, depth, ret, cache_type;
unsigned int allocated = 0;
struct ext4_allocation_request ar;
+ ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
__clear_bit(BH_New, &bh_result->b_state);
ext_debug("blocks %u/%u requested for inode %lu\n",
@@ -2860,7 +3343,13 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
* this situation is possible, though, _during_ tree modification;
* this is why assert can't be put in ext4_ext_find_extent()
*/
- BUG_ON(path[depth].p_ext == NULL && depth != 0);
+ if (unlikely(path[depth].p_ext == NULL && depth != 0)) {
+ EXT4_ERROR_INODE(inode, "bad extent address "
+ "iblock: %d, depth: %d pblock %lld",
+ iblock, depth, path[depth].p_block);
+ err = -EIO;
+ goto out2;
+ }
eh = path[depth].p_hdr;
ex = path[depth].p_ext;
@@ -2875,7 +3364,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
*/
ee_len = ext4_ext_get_actual_len(ex);
/* if found extent covers block, simply return it */
- if (iblock >= ee_block && iblock < ee_block + ee_len) {
+ if (in_range(iblock, ee_block, ee_len)) {
newblock = iblock - ee_block + ee_start;
/* number of remaining blocks in the extent */
allocated = ee_len - (iblock - ee_block);
@@ -2889,33 +3378,10 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
EXT4_EXT_CACHE_EXTENT);
goto out;
}
- if (flags & EXT4_GET_BLOCKS_UNINIT_EXT)
- goto out;
- if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
- if (allocated > max_blocks)
- allocated = max_blocks;
- /*
- * We have blocks reserved already. We
- * return allocated blocks so that delalloc
- * won't do block reservation for us. But
- * the buffer head will be unmapped so that
- * a read from the block returns 0s.
- */
- set_buffer_unwritten(bh_result);
- bh_result->b_bdev = inode->i_sb->s_bdev;
- bh_result->b_blocknr = newblock;
- goto out2;
- }
-
- ret = ext4_ext_convert_to_initialized(handle, inode,
- path, iblock,
- max_blocks);
- if (ret <= 0) {
- err = ret;
- goto out2;
- } else
- allocated = ret;
- goto outnew;
+ ret = ext4_ext_handle_uninitialized_extents(handle,
+ inode, iblock, max_blocks, path,
+ flags, allocated, bh_result, newblock);
+ return ret;
}
}
@@ -2986,29 +3452,75 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
/* try to insert new extent into found leaf and return */
ext4_ext_store_pblock(&newex, newblock);
newex.ee_len = cpu_to_le16(ar.len);
- if (flags & EXT4_GET_BLOCKS_UNINIT_EXT) /* Mark uninitialized */
+ /* Mark uninitialized */
+ if (flags & EXT4_GET_BLOCKS_UNINIT_EXT){
ext4_ext_mark_uninitialized(&newex);
- err = ext4_ext_insert_extent(handle, inode, path, &newex);
+ /*
+ * io_end structure was created for every IO write to an
+ * uninitialized extent. To avoid unecessary conversion,
+ * here we flag the IO that really needs the conversion.
+ * For non asycn direct IO case, flag the inode state
+ * that we need to perform convertion when IO is done.
+ */
+ if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
+ if (io)
+ io->flag = EXT4_IO_UNWRITTEN;
+ else
+ ext4_set_inode_state(inode,
+ EXT4_STATE_DIO_UNWRITTEN);
+ }
+ if (ext4_should_dioread_nolock(inode))
+ set_buffer_uninit(bh_result);
+ }
+
+ if (unlikely(EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL)) {
+ if (unlikely(!eh->eh_entries)) {
+ EXT4_ERROR_INODE(inode,
+ "eh->eh_entries == 0 ee_block %d",
+ ex->ee_block);
+ err = -EIO;
+ goto out2;
+ }
+ last_ex = EXT_LAST_EXTENT(eh);
+ if (iblock + ar.len > le32_to_cpu(last_ex->ee_block)
+ + ext4_ext_get_actual_len(last_ex))
+ EXT4_I(inode)->i_flags &= ~EXT4_EOFBLOCKS_FL;
+ }
+ err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
if (err) {
/* free data blocks we just allocated */
/* not a good idea to call discard here directly,
* but otherwise we'd need to call it every free() */
ext4_discard_preallocations(inode);
- ext4_free_blocks(handle, inode, ext_pblock(&newex),
- ext4_ext_get_actual_len(&newex), 0);
+ ext4_free_blocks(handle, inode, 0, ext_pblock(&newex),
+ ext4_ext_get_actual_len(&newex), 0);
goto out2;
}
/* previous routine could use block we allocated */
newblock = ext_pblock(&newex);
allocated = ext4_ext_get_actual_len(&newex);
-outnew:
+ if (allocated > max_blocks)
+ allocated = max_blocks;
set_buffer_new(bh_result);
- /* Cache only when it is _not_ an uninitialized extent */
- if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0)
+ /*
+ * Update reserved blocks/metadata blocks after successful
+ * block allocation which had been deferred till now.
+ */
+ if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
+ ext4_da_update_reserve_space(inode, allocated, 1);
+
+ /*
+ * Cache the extent and update transaction to commit on fdatasync only
+ * when it is _not_ an uninitialized extent.
+ */
+ if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) {
ext4_ext_put_in_cache(inode, iblock, allocated, newblock,
EXT4_EXT_CACHE_EXTENT);
+ ext4_update_inode_fsync_trans(handle, inode, 1);
+ } else
+ ext4_update_inode_fsync_trans(handle, inode, 0);
out:
if (allocated > max_blocks)
allocated = max_blocks;
@@ -3107,6 +3619,13 @@ static void ext4_falloc_update_inode(struct inode *inode,
i_size_write(inode, new_size);
if (new_size > EXT4_I(inode)->i_disksize)
ext4_update_i_disksize(inode, new_size);
+ } else {
+ /*
+ * Mark that we allocate beyond EOF so the subsequent truncate
+ * can proceed even if the new size is the same as i_size.
+ */
+ if (new_size > i_size_read(inode))
+ EXT4_I(inode)->i_flags |= EXT4_EOFBLOCKS_FL;
}
}
@@ -3201,6 +3720,64 @@ retry:
}
/*
+ * This function convert a range of blocks to written extents
+ * The caller of this function will pass the start offset and the size.
+ * all unwritten extents within this range will be converted to
+ * written extents.
+ *
+ * This function is called from the direct IO end io call back
+ * function, to convert the fallocated extents after IO is completed.
+ * Returns 0 on success.
+ */
+int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
+ ssize_t len)
+{
+ handle_t *handle;
+ ext4_lblk_t block;
+ unsigned int max_blocks;
+ int ret = 0;
+ int ret2 = 0;
+ struct buffer_head map_bh;
+ unsigned int credits, blkbits = inode->i_blkbits;
+
+ block = offset >> blkbits;
+ /*
+ * We can't just convert len to max_blocks because
+ * If blocksize = 4096 offset = 3072 and len = 2048
+ */
+ max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits)
+ - block;
+ /*
+ * credits to insert 1 extent into extent tree
+ */
+ credits = ext4_chunk_trans_blocks(inode, max_blocks);
+ while (ret >= 0 && ret < max_blocks) {
+ block = block + ret;
+ max_blocks = max_blocks - ret;
+ handle = ext4_journal_start(inode, credits);
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
+ break;
+ }
+ map_bh.b_state = 0;
+ ret = ext4_get_blocks(handle, inode, block,
+ max_blocks, &map_bh,
+ EXT4_GET_BLOCKS_IO_CONVERT_EXT);
+ if (ret <= 0) {
+ WARN_ON(ret <= 0);
+ printk(KERN_ERR "%s: ext4_ext_get_blocks "
+ "returned error inode#%lu, block=%u, "
+ "max_blocks=%u", __func__,
+ inode->i_ino, block, max_blocks);
+ }
+ ext4_mark_inode_dirty(handle, inode);
+ ret2 = ext4_journal_stop(handle);
+ if (ret <= 0 || ret2 )
+ break;
+ }
+ return ret > 0 ? ret2 : ret;
+}
+/*
* Callback function called for each extent to gather FIEMAP information.
*/
static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path,
@@ -3289,7 +3866,7 @@ static int ext4_xattr_fiemap(struct inode *inode,
int error = 0;
/* in-inode? */
- if (EXT4_I(inode)->i_state & EXT4_STATE_XATTR) {
+ if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) {
struct ext4_iloc iloc;
int offset; /* offset of xattr in inode */
@@ -3317,7 +3894,6 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len)
{
ext4_lblk_t start_blk;
- ext4_lblk_t len_blks;
int error = 0;
/* fallback to generic here if not in extents fmt */
@@ -3331,17 +3907,21 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
error = ext4_xattr_fiemap(inode, fieinfo);
} else {
+ ext4_lblk_t len_blks;
+ __u64 last_blk;
+
start_blk = start >> inode->i_sb->s_blocksize_bits;
- len_blks = len >> inode->i_sb->s_blocksize_bits;
+ last_blk = (start + len - 1) >> inode->i_sb->s_blocksize_bits;
+ if (last_blk >= EXT_MAX_BLOCK)
+ last_blk = EXT_MAX_BLOCK-1;
+ len_blks = ((ext4_lblk_t) last_blk) - start_blk + 1;
/*
* Walk the extent tree gathering extent information.
* ext4_ext_fiemap_cb will push extents back to user.
*/
- down_read(&EXT4_I(inode)->i_data_sem);
error = ext4_ext_walk_space(inode, start_blk, len_blks,
ext4_ext_fiemap_cb, fieinfo);
- up_read(&EXT4_I(inode)->i_data_sem);
}
return error;