From 88f306b68cbb36e500da4b9601b2e3d13dd683c4 Mon Sep 17 00:00:00 2001 From: Kirill A. Shutemov Date: Fri, 15 Jan 2016 16:57:31 -0800 Subject: mm: fix locking order in mm_take_all_locks() Dmitry Vyukov has reported[1] possible deadlock (triggered by his syzkaller fuzzer): Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&hugetlbfs_i_mmap_rwsem_key); lock(&mapping->i_mmap_rwsem); lock(&hugetlbfs_i_mmap_rwsem_key); lock(&mapping->i_mmap_rwsem); Both traces points to mm_take_all_locks() as a source of the problem. It doesn't take care about ordering or hugetlbfs_i_mmap_rwsem_key (aka mapping->i_mmap_rwsem for hugetlb mapping) vs. i_mmap_rwsem. huge_pmd_share() does memory allocation under hugetlbfs_i_mmap_rwsem_key and allocator can take i_mmap_rwsem if it hit reclaim. So we need to take i_mmap_rwsem from all hugetlb VMAs before taking i_mmap_rwsem from rest of VMAs. The patch also documents locking order for hugetlbfs_i_mmap_rwsem_key. [1] http://lkml.kernel.org/r/CACT4Y+Zu95tBs-0EvdiAKzUOsb4tczRRfCRTpLr4bg_OP9HuVg@mail.gmail.com Signed-off-by: Kirill A. Shutemov Reported-by: Dmitry Vyukov Reviewed-by: Michal Hocko Cc: Peter Zijlstra Cc: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mmap.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) (limited to 'mm/mmap.c') diff --git a/mm/mmap.c b/mm/mmap.c index b3f00b616b81..84b12624ceb0 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -3184,10 +3184,16 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping) * mapping->flags avoid to take the same lock twice, if more than one * vma in this mm is backed by the same anon_vma or address_space. * - * We can take all the locks in random order because the VM code - * taking i_mmap_rwsem or anon_vma->rwsem outside the mmap_sem never - * takes more than one of them in a row. Secondly we're protected - * against a concurrent mm_take_all_locks() by the mm_all_locks_mutex. + * We take locks in following order, accordingly to comment at beginning + * of mm/rmap.c: + * - all hugetlbfs_i_mmap_rwsem_key locks (aka mapping->i_mmap_rwsem for + * hugetlb mapping); + * - all i_mmap_rwsem locks; + * - all anon_vma->rwseml + * + * We can take all locks within these types randomly because the VM code + * doesn't nest them and we protected from parallel mm_take_all_locks() by + * mm_all_locks_mutex. * * mm_take_all_locks() and mm_drop_all_locks are expensive operations * that may have to take thousand of locks. @@ -3206,7 +3212,16 @@ int mm_take_all_locks(struct mm_struct *mm) for (vma = mm->mmap; vma; vma = vma->vm_next) { if (signal_pending(current)) goto out_unlock; - if (vma->vm_file && vma->vm_file->f_mapping) + if (vma->vm_file && vma->vm_file->f_mapping && + is_vm_hugetlb_page(vma)) + vm_lock_mapping(mm, vma->vm_file->f_mapping); + } + + for (vma = mm->mmap; vma; vma = vma->vm_next) { + if (signal_pending(current)) + goto out_unlock; + if (vma->vm_file && vma->vm_file->f_mapping && + !is_vm_hugetlb_page(vma)) vm_lock_mapping(mm, vma->vm_file->f_mapping); } -- cgit v1.2.3-55-g7522