summaryrefslogtreecommitdiffstats
path: root/fs/btrfs/tree-log.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/tree-log.c')
-rw-r--r--fs/btrfs/tree-log.c60
1 files changed, 43 insertions, 17 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 6c47f6ed3e94..6c8297bcfeb7 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3110,6 +3110,12 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
log->log_transid = root->log_transid;
root->log_start_pid = 0;
/*
+ * Update or create log root item under the root's log_mutex to prevent
+ * races with concurrent log syncs that can lead to failure to update
+ * log root item because it was not created yet.
+ */
+ ret = update_log_root(trans, log);
+ /*
* IO has been started, blocks of the log tree have WRITTEN flag set
* in their headers. new modifications of the log will be written to
* new positions. so it's safe to allow log writers to go in.
@@ -3128,8 +3134,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
mutex_unlock(&log_root_tree->log_mutex);
- ret = update_log_root(trans, log);
-
mutex_lock(&log_root_tree->log_mutex);
if (atomic_dec_and_test(&log_root_tree->log_writers)) {
/* atomic_dec_and_test implies a barrier */
@@ -3319,6 +3323,30 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
}
/*
+ * Check if an inode was logged in the current transaction. We can't always rely
+ * on an inode's logged_trans value, because it's an in-memory only field and
+ * therefore not persisted. This means that its value is lost if the inode gets
+ * evicted and loaded again from disk (in which case it has a value of 0, and
+ * certainly it is smaller then any possible transaction ID), when that happens
+ * the full_sync flag is set in the inode's runtime flags, so on that case we
+ * assume eviction happened and ignore the logged_trans value, assuming the
+ * worst case, that the inode was logged before in the current transaction.
+ */
+static bool inode_logged(struct btrfs_trans_handle *trans,
+ struct btrfs_inode *inode)
+{
+ if (inode->logged_trans == trans->transid)
+ return true;
+
+ if (inode->last_trans == trans->transid &&
+ test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags) &&
+ !test_bit(BTRFS_FS_LOG_RECOVERING, &trans->fs_info->flags))
+ return true;
+
+ return false;
+}
+
+/*
* If both a file and directory are logged, and unlinks or renames are
* mixed in, we have a few interesting corners:
*
@@ -3352,7 +3380,7 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
int bytes_del = 0;
u64 dir_ino = btrfs_ino(dir);
- if (dir->logged_trans < trans->transid)
+ if (!inode_logged(trans, dir))
return 0;
ret = join_running_log_trans(root);
@@ -3456,7 +3484,7 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
u64 index;
int ret;
- if (inode->logged_trans < trans->transid)
+ if (!inode_logged(trans, inode))
return 0;
ret = join_running_log_trans(root);
@@ -5416,9 +5444,19 @@ log_extents:
}
}
+ /*
+ * Don't update last_log_commit if we logged that an inode exists after
+ * it was loaded to memory (full_sync bit set).
+ * This is to prevent data loss when we do a write to the inode, then
+ * the inode gets evicted after all delalloc was flushed, then we log
+ * it exists (due to a rename for example) and then fsync it. This last
+ * fsync would do nothing (not logging the extents previously written).
+ */
spin_lock(&inode->lock);
inode->logged_trans = trans->transid;
- inode->last_log_commit = inode->last_sub_trans;
+ if (inode_only != LOG_INODE_EXISTS ||
+ !test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags))
+ inode->last_log_commit = inode->last_sub_trans;
spin_unlock(&inode->lock);
out_unlock:
mutex_unlock(&inode->log_mutex);
@@ -5478,7 +5516,6 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
{
int ret = 0;
struct dentry *old_parent = NULL;
- struct btrfs_inode *orig_inode = inode;
/*
* for regular files, if its inode is already on disk, we don't
@@ -5498,16 +5535,6 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
}
while (1) {
- /*
- * If we are logging a directory then we start with our inode,
- * not our parent's inode, so we need to skip setting the
- * logged_trans so that further down in the log code we don't
- * think this inode has already been logged.
- */
- if (inode != orig_inode)
- inode->logged_trans = trans->transid;
- smp_mb();
-
if (btrfs_must_commit_transaction(trans, inode)) {
ret = 1;
break;
@@ -6384,7 +6411,6 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
* if this directory was already logged any new
* names for this file/dir will get recorded
*/
- smp_mb();
if (dir->logged_trans == trans->transid)
return;