summaryrefslogtreecommitdiffstats
path: root/fs/btrfs/volumes.c
diff options
context:
space:
mode:
authorLiu Bo2015-07-17 10:49:19 +0200
committerDavid Sterba2016-02-23 13:10:10 +0100
commit73beece9ca07c003e0e4f4825b12be167334d4ad (patch)
treeb0575466fc5e0c7a493826963e7a4ffa49e31210 /fs/btrfs/volumes.c
parentLinux 4.5-rc4 (diff)
downloadkernel-qcow2-linux-73beece9ca07c003e0e4f4825b12be167334d4ad.tar.gz
kernel-qcow2-linux-73beece9ca07c003e0e4f4825b12be167334d4ad.tar.xz
kernel-qcow2-linux-73beece9ca07c003e0e4f4825b12be167334d4ad.zip
Btrfs: fix lockdep deadlock warning due to dev_replace
Xfstests btrfs/011 complains about a deadlock warning, [ 1226.649039] ========================================================= [ 1226.649039] [ INFO: possible irq lock inversion dependency detected ] [ 1226.649039] 4.1.0+ #270 Not tainted [ 1226.649039] --------------------------------------------------------- [ 1226.652955] kswapd0/46 just changed the state of lock: [ 1226.652955] (&delayed_node->mutex){+.+.-.}, at: [<ffffffff81458735>] __btrfs_release_delayed_node+0x45/0x1d0 [ 1226.652955] but this lock took another, RECLAIM_FS-unsafe lock in the past: [ 1226.652955] (&fs_info->dev_replace.lock){+.+.+.} and interrupts could create inverse lock ordering between them. [ 1226.652955] other info that might help us debug this: [ 1226.652955] Chain exists of: &delayed_node->mutex --> &found->groups_sem --> &fs_info->dev_replace.lock [ 1226.652955] Possible interrupt unsafe locking scenario: [ 1226.652955] CPU0 CPU1 [ 1226.652955] ---- ---- [ 1226.652955] lock(&fs_info->dev_replace.lock); [ 1226.652955] local_irq_disable(); [ 1226.652955] lock(&delayed_node->mutex); [ 1226.652955] lock(&found->groups_sem); [ 1226.652955] <Interrupt> [ 1226.652955] lock(&delayed_node->mutex); [ 1226.652955] *** DEADLOCK *** Commit 084b6e7c7607 ("btrfs: Fix a lockdep warning when running xfstest.") tried to fix a similar one that has the exactly same warning, but with that, we still run to this. The above lock chain comes from btrfs_commit_transaction ->btrfs_run_delayed_items ... ->__btrfs_update_delayed_inode ... ->__btrfs_cow_block ... ->find_free_extent ->cache_block_group ->load_free_space_cache ->btrfs_readpages ->submit_one_bio ... ->__btrfs_map_block ->btrfs_dev_replace_lock However, with high memory pressure, tasks which hold dev_replace.lock can be interrupted by kswapd and then kswapd is intended to release memory occupied by superblock, inodes and dentries, where we may call evict_inode, and it comes to [ 1226.652955] [<ffffffff81458735>] __btrfs_release_delayed_node+0x45/0x1d0 [ 1226.652955] [<ffffffff81459e74>] btrfs_remove_delayed_node+0x24/0x30 [ 1226.652955] [<ffffffff8140c5fe>] btrfs_evict_inode+0x34e/0x700 delayed_node->mutex may be acquired in __btrfs_release_delayed_node(), and it leads to a ABBA deadlock. To fix this, we can use "blocking rwlock" used in the case of extent_buffer, but things are simpler here since we only needs read's spinlock to blocking lock. With this, btrfs/011 no more produces warnings in dmesg. Signed-off-by: Liu Bo <bo.li.liu@oracle.com> Signed-off-by: David Sterba <dsterba@suse.com>
Diffstat (limited to 'fs/btrfs/volumes.c')
-rw-r--r--fs/btrfs/volumes.c24
1 files changed, 14 insertions, 10 deletions
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 366b335946fa..858a2e489977 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1714,12 +1714,12 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
} while (read_seqretry(&root->fs_info->profiles_lock, seq));
num_devices = root->fs_info->fs_devices->num_devices;
- btrfs_dev_replace_lock(&root->fs_info->dev_replace);
+ btrfs_dev_replace_lock(&root->fs_info->dev_replace, 0);
if (btrfs_dev_replace_is_ongoing(&root->fs_info->dev_replace)) {
WARN_ON(num_devices < 1);
num_devices--;
}
- btrfs_dev_replace_unlock(&root->fs_info->dev_replace);
+ btrfs_dev_replace_unlock(&root->fs_info->dev_replace, 0);
if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && num_devices <= 4) {
ret = BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET;
@@ -3686,12 +3686,12 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
}
num_devices = fs_info->fs_devices->num_devices;
- btrfs_dev_replace_lock(&fs_info->dev_replace);
+ btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) {
BUG_ON(num_devices < 1);
num_devices--;
}
- btrfs_dev_replace_unlock(&fs_info->dev_replace);
+ btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
if (num_devices == 1)
allowed |= BTRFS_BLOCK_GROUP_DUP;
@@ -5062,10 +5062,10 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
ret = 1;
free_extent_map(em);
- btrfs_dev_replace_lock(&fs_info->dev_replace);
+ btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))
ret++;
- btrfs_dev_replace_unlock(&fs_info->dev_replace);
+ btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
return ret;
}
@@ -5325,10 +5325,12 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
if (!bbio_ret)
goto out;
- btrfs_dev_replace_lock(dev_replace);
+ btrfs_dev_replace_lock(dev_replace, 0);
dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace);
if (!dev_replace_is_ongoing)
- btrfs_dev_replace_unlock(dev_replace);
+ btrfs_dev_replace_unlock(dev_replace, 0);
+ else
+ btrfs_dev_replace_set_lock_blocking(dev_replace);
if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 &&
!(rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)) &&
@@ -5751,8 +5753,10 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
bbio->mirror_num = map->num_stripes + 1;
}
out:
- if (dev_replace_is_ongoing)
- btrfs_dev_replace_unlock(dev_replace);
+ if (dev_replace_is_ongoing) {
+ btrfs_dev_replace_clear_lock_blocking(dev_replace);
+ btrfs_dev_replace_unlock(dev_replace, 0);
+ }
free_extent_map(em);
return ret;
}