summaryrefslogtreecommitdiffstats
path: root/fs/ext4/inode.c
diff options
context:
space:
mode:
authorEric Whitney2018-10-01 20:19:37 +0200
committerTheodore Ts'o2018-10-01 20:19:37 +0200
commit0b02f4c0d6d9e2c611dfbdd4317193e9dca740e6 (patch)
treea07c3e6aaad97caccc4e91214e8779b0a06bd901 /fs/ext4/inode.c
parentext4: add new pending reservation mechanism (diff)
downloadkernel-qcow2-linux-0b02f4c0d6d9e2c611dfbdd4317193e9dca740e6.tar.gz
kernel-qcow2-linux-0b02f4c0d6d9e2c611dfbdd4317193e9dca740e6.tar.xz
kernel-qcow2-linux-0b02f4c0d6d9e2c611dfbdd4317193e9dca740e6.zip
ext4: fix reserved cluster accounting at delayed write time
The code in ext4_da_map_blocks sometimes reserves space for more delayed allocated clusters than it should, resulting in premature ENOSPC, exceeded quota, and inaccurate free space reporting. Fix this by checking for written and unwritten blocks shared in the same cluster with the newly delayed allocated block. A cluster reservation should not be made for a cluster for which physical space has already been allocated. Signed-off-by: Eric Whitney <enwlinux@gmail.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r--fs/ext4/inode.c79
1 files changed, 61 insertions, 18 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index b83bf3308b5e..57c6dd38f071 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1781,6 +1781,65 @@ static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh)
}
/*
+ * ext4_insert_delayed_block - adds a delayed block to the extents status
+ * tree, incrementing the reserved cluster/block
+ * count or making a pending reservation
+ * where needed
+ *
+ * @inode - file containing the newly added block
+ * @lblk - logical block to be added
+ *
+ * Returns 0 on success, negative error code on failure.
+ */
+static int ext4_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ int ret;
+ bool allocated = false;
+
+ /*
+ * If the cluster containing lblk is shared with a delayed,
+ * written, or unwritten extent in a bigalloc file system, it's
+ * already been accounted for and does not need to be reserved.
+ * A pending reservation must be made for the cluster if it's
+ * shared with a written or unwritten extent and doesn't already
+ * have one. Written and unwritten extents can be purged from the
+ * extents status tree if the system is under memory pressure, so
+ * it's necessary to examine the extent tree if a search of the
+ * extents status tree doesn't get a match.
+ */
+ if (sbi->s_cluster_ratio == 1) {
+ ret = ext4_da_reserve_space(inode);
+ if (ret != 0) /* ENOSPC */
+ goto errout;
+ } else { /* bigalloc */
+ if (!ext4_es_scan_clu(inode, &ext4_es_is_delonly, lblk)) {
+ if (!ext4_es_scan_clu(inode,
+ &ext4_es_is_mapped, lblk)) {
+ ret = ext4_clu_mapped(inode,
+ EXT4_B2C(sbi, lblk));
+ if (ret < 0)
+ goto errout;
+ if (ret == 0) {
+ ret = ext4_da_reserve_space(inode);
+ if (ret != 0) /* ENOSPC */
+ goto errout;
+ } else {
+ allocated = true;
+ }
+ } else {
+ allocated = true;
+ }
+ }
+ }
+
+ ret = ext4_es_insert_delayed_block(inode, lblk, allocated);
+
+errout:
+ return ret;
+}
+
+/*
* This function is grabs code from the very beginning of
* ext4_map_blocks, but assumes that the caller is from delayed write
* time. This function looks up the requested blocks and sets the
@@ -1864,25 +1923,9 @@ add_delayed:
* XXX: __block_prepare_write() unmaps passed block,
* is it OK?
*/
- /*
- * If the block was allocated from previously allocated cluster,
- * then we don't need to reserve it again. However we still need
- * to reserve metadata for every block we're going to write.
- */
- if (EXT4_SB(inode->i_sb)->s_cluster_ratio == 1 ||
- !ext4_es_scan_clu(inode,
- &ext4_es_is_delayed, map->m_lblk)) {
- ret = ext4_da_reserve_space(inode);
- if (ret) {
- /* not enough space to reserve */
- retval = ret;
- goto out_unlock;
- }
- }
- ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
- ~0, EXTENT_STATUS_DELAYED);
- if (ret) {
+ ret = ext4_insert_delayed_block(inode, map->m_lblk);
+ if (ret != 0) {
retval = ret;
goto out_unlock;
}