From c0b124054f9e42eb6da545a10fe9122a7d7c3f72 Mon Sep 17 00:00:00 2001 From: Jérôme Glisse Date: Fri, 8 Sep 2017 16:11:27 -0700 Subject: mm/hmm/mirror: mirror process address space on device with HMM helpers This is a heterogeneous memory management (HMM) process address space mirroring. In a nutshell this provide an API to mirror process address space on a device. This boils down to keeping CPU and device page table synchronize (we assume that both device and CPU are cache coherent like PCIe device can be). This patch provide a simple API for device driver to achieve address space mirroring thus avoiding each device driver to grow its own CPU page table walker and its own CPU page table synchronization mechanism. This is useful for NVidia GPU >= Pascal, Mellanox IB >= mlx5 and more hardware in the future. [jglisse@redhat.com: fix hmm for "mmu_notifier kill invalidate_page callback"] Link: http://lkml.kernel.org/r/20170830231955.GD9445@redhat.com Link: http://lkml.kernel.org/r/20170817000548.32038-4-jglisse@redhat.com Signed-off-by: Jérôme Glisse Signed-off-by: Evgeny Baskakov Signed-off-by: John Hubbard Signed-off-by: Mark Hairgrove Signed-off-by: Sherry Cheung Signed-off-by: Subhash Gutti Cc: Aneesh Kumar Cc: Balbir Singh Cc: Benjamin Herrenschmidt Cc: Dan Williams Cc: David Nellans Cc: Johannes Weiner Cc: Kirill A. Shutemov Cc: Michal Hocko Cc: Paul E. McKenney Cc: Ross Zwisler Cc: Vladimir Davydov Cc: Bob Liu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hmm.c | 153 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 138 insertions(+), 15 deletions(-) (limited to 'mm/hmm.c') diff --git a/mm/hmm.c b/mm/hmm.c index de032ff9e576..d37daf9edcd3 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -21,16 +21,27 @@ #include #include #include +#include #ifdef CONFIG_HMM +static const struct mmu_notifier_ops hmm_mmu_notifier_ops; + /* * struct hmm - HMM per mm struct * * @mm: mm struct this HMM struct is bound to + * @sequence: we track updates to the CPU page table with a sequence number + * @mirrors: list of mirrors for this mm + * @mmu_notifier: mmu notifier to track updates to CPU page table + * @mirrors_sem: read/write semaphore protecting the mirrors list */ struct hmm { struct mm_struct *mm; + atomic_t sequence; + struct list_head mirrors; + struct mmu_notifier mmu_notifier; + struct rw_semaphore mirrors_sem; }; /* @@ -43,27 +54,48 @@ struct hmm { */ static struct hmm *hmm_register(struct mm_struct *mm) { - if (!mm->hmm) { - struct hmm *hmm = NULL; - - hmm = kmalloc(sizeof(*hmm), GFP_KERNEL); - if (!hmm) - return NULL; - hmm->mm = mm; - - spin_lock(&mm->page_table_lock); - if (!mm->hmm) - mm->hmm = hmm; - else - kfree(hmm); - spin_unlock(&mm->page_table_lock); - } + struct hmm *hmm = READ_ONCE(mm->hmm); + bool cleanup = false; /* * The hmm struct can only be freed once the mm_struct goes away, * hence we should always have pre-allocated an new hmm struct * above. */ + if (hmm) + return hmm; + + hmm = kmalloc(sizeof(*hmm), GFP_KERNEL); + if (!hmm) + return NULL; + INIT_LIST_HEAD(&hmm->mirrors); + init_rwsem(&hmm->mirrors_sem); + atomic_set(&hmm->sequence, 0); + hmm->mmu_notifier.ops = NULL; + hmm->mm = mm; + + /* + * We should only get here if hold the mmap_sem in write mode ie on + * registration of first mirror through hmm_mirror_register() + */ + hmm->mmu_notifier.ops = &hmm_mmu_notifier_ops; + if (__mmu_notifier_register(&hmm->mmu_notifier, mm)) { + kfree(hmm); + return NULL; + } + + spin_lock(&mm->page_table_lock); + if (!mm->hmm) + mm->hmm = hmm; + else + cleanup = true; + spin_unlock(&mm->page_table_lock); + + if (cleanup) { + mmu_notifier_unregister(&hmm->mmu_notifier, mm); + kfree(hmm); + } + return mm->hmm; } @@ -72,3 +104,94 @@ void hmm_mm_destroy(struct mm_struct *mm) kfree(mm->hmm); } #endif /* CONFIG_HMM */ + +#if IS_ENABLED(CONFIG_HMM_MIRROR) +static void hmm_invalidate_range(struct hmm *hmm, + enum hmm_update_type action, + unsigned long start, + unsigned long end) +{ + struct hmm_mirror *mirror; + + down_read(&hmm->mirrors_sem); + list_for_each_entry(mirror, &hmm->mirrors, list) + mirror->ops->sync_cpu_device_pagetables(mirror, action, + start, end); + up_read(&hmm->mirrors_sem); +} + +static void hmm_invalidate_range_start(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, + unsigned long end) +{ + struct hmm *hmm = mm->hmm; + + VM_BUG_ON(!hmm); + + atomic_inc(&hmm->sequence); +} + +static void hmm_invalidate_range_end(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, + unsigned long end) +{ + struct hmm *hmm = mm->hmm; + + VM_BUG_ON(!hmm); + + hmm_invalidate_range(mm->hmm, HMM_UPDATE_INVALIDATE, start, end); +} + +static const struct mmu_notifier_ops hmm_mmu_notifier_ops = { + .invalidate_range_start = hmm_invalidate_range_start, + .invalidate_range_end = hmm_invalidate_range_end, +}; + +/* + * hmm_mirror_register() - register a mirror against an mm + * + * @mirror: new mirror struct to register + * @mm: mm to register against + * + * To start mirroring a process address space, the device driver must register + * an HMM mirror struct. + * + * THE mm->mmap_sem MUST BE HELD IN WRITE MODE ! + */ +int hmm_mirror_register(struct hmm_mirror *mirror, struct mm_struct *mm) +{ + /* Sanity check */ + if (!mm || !mirror || !mirror->ops) + return -EINVAL; + + mirror->hmm = hmm_register(mm); + if (!mirror->hmm) + return -ENOMEM; + + down_write(&mirror->hmm->mirrors_sem); + list_add(&mirror->list, &mirror->hmm->mirrors); + up_write(&mirror->hmm->mirrors_sem); + + return 0; +} +EXPORT_SYMBOL(hmm_mirror_register); + +/* + * hmm_mirror_unregister() - unregister a mirror + * + * @mirror: new mirror struct to register + * + * Stop mirroring a process address space, and cleanup. + */ +void hmm_mirror_unregister(struct hmm_mirror *mirror) +{ + struct hmm *hmm = mirror->hmm; + + down_write(&hmm->mirrors_sem); + list_del(&mirror->list); + up_write(&hmm->mirrors_sem); +} +EXPORT_SYMBOL(hmm_mirror_unregister); +#endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */ -- cgit v1.2.3-55-g7522