Message ID | 20190523153436.19102-5-jgg@ziepe.ca (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | mm/hmm: Various revisions from a locking/code review | expand |
On 5/23/19 8:34 AM, Jason Gunthorpe wrote: > From: Jason Gunthorpe <jgg@mellanox.com> > > As coded this function can false-fail in various racy situations. Make it > reliable by running only under the write side of the mmap_sem and avoiding > the false-failing compare/exchange pattern. > > Also make the locking very easy to understand by only ever reading or > writing mm->hmm while holding the write side of the mmap_sem. > > Signed-off-by: Jason Gunthorpe <jgg@mellanox.com> > --- > mm/hmm.c | 75 ++++++++++++++++++++------------------------------------ > 1 file changed, 27 insertions(+), 48 deletions(-) > > diff --git a/mm/hmm.c b/mm/hmm.c > index e27058e92508b9..ec54be54d81135 100644 > --- a/mm/hmm.c > +++ b/mm/hmm.c > @@ -40,16 +40,6 @@ > #if IS_ENABLED(CONFIG_HMM_MIRROR) > static const struct mmu_notifier_ops hmm_mmu_notifier_ops; > > -static inline struct hmm *mm_get_hmm(struct mm_struct *mm) > -{ > - struct hmm *hmm = READ_ONCE(mm->hmm); > - > - if (hmm && kref_get_unless_zero(&hmm->kref)) > - return hmm; > - > - return NULL; > -} > - > /** > * hmm_get_or_create - register HMM against an mm (HMM internal) > * > @@ -64,11 +54,20 @@ static inline struct hmm *mm_get_hmm(struct mm_struct *mm) > */ > static struct hmm *hmm_get_or_create(struct mm_struct *mm) > { > - struct hmm *hmm = mm_get_hmm(mm); > - bool cleanup = false; > + struct hmm *hmm; > > - if (hmm) > - return hmm; > + lockdep_assert_held_exclusive(mm->mmap_sem); > + > + if (mm->hmm) { > + if (kref_get_unless_zero(&mm->hmm->kref)) > + return mm->hmm; > + /* > + * The hmm is being freed by some other CPU and is pending a > + * RCU grace period, but this CPU can NULL now it since we > + * have the mmap_sem. > + */ > + mm->hmm = NULL; Shouldn't there be a "return NULL;" here so it doesn't fall through and allocate a struct hmm below? > + } > > hmm = kmalloc(sizeof(*hmm), GFP_KERNEL); > if (!hmm) > @@ -85,54 +84,34 @@ static struct hmm *hmm_get_or_create(struct mm_struct *mm) > hmm->mm = mm; > mmgrab(hmm->mm); > > - spin_lock(&mm->page_table_lock); > - if (!mm->hmm) > - mm->hmm = hmm; > - else > - cleanup = true; > - spin_unlock(&mm->page_table_lock); > - > - if (cleanup) > - goto error; > - > - /* > - * We should only get here if hold the mmap_sem in write mode ie on > - * registration of first mirror through hmm_mirror_register() > - */ > hmm->mmu_notifier.ops = &hmm_mmu_notifier_ops; > - if (__mmu_notifier_register(&hmm->mmu_notifier, mm)) > - goto error_mm; > + if (__mmu_notifier_register(&hmm->mmu_notifier, mm)) { > + kfree(hmm); > + return NULL; > + } > > + mm->hmm = hmm; > return hmm; > - > -error_mm: > - spin_lock(&mm->page_table_lock); > - if (mm->hmm == hmm) > - mm->hmm = NULL; > - spin_unlock(&mm->page_table_lock); > -error: > - kfree(hmm); > - return NULL; > } > > static void hmm_fee_rcu(struct rcu_head *rcu) I see Jerome already saw and named this hmm_free_rcu() which I agree with. > { > + struct hmm *hmm = container_of(rcu, struct hmm, rcu); > + > + down_write(&hmm->mm->mmap_sem); > + if (hmm->mm->hmm == hmm) > + hmm->mm->hmm = NULL; > + up_write(&hmm->mm->mmap_sem); > + mmdrop(hmm->mm); > + > kfree(container_of(rcu, struct hmm, rcu)); > } > > static void hmm_free(struct kref *kref) > { > struct hmm *hmm = container_of(kref, struct hmm, kref); > - struct mm_struct *mm = hmm->mm; > - > - mmu_notifier_unregister_no_release(&hmm->mmu_notifier, mm); > > - spin_lock(&mm->page_table_lock); > - if (mm->hmm == hmm) > - mm->hmm = NULL; > - spin_unlock(&mm->page_table_lock); > - > - mmdrop(hmm->mm); > + mmu_notifier_unregister_no_release(&hmm->mmu_notifier, hmm->mm); > mmu_notifier_call_srcu(&hmm->rcu, hmm_fee_rcu); > } > > ----------------------------------------------------------------------------------- This email message is for the sole use of the intended recipient(s) and may contain confidential information. Any unauthorized review, use, disclosure or distribution is prohibited. If you are not the intended recipient, please contact the sender by reply email and destroy all copies of the original message. -----------------------------------------------------------------------------------
On Thu, May 23, 2019 at 04:38:28PM -0700, Ralph Campbell wrote: > > On 5/23/19 8:34 AM, Jason Gunthorpe wrote: > > From: Jason Gunthorpe <jgg@mellanox.com> > > > > As coded this function can false-fail in various racy situations. Make it > > reliable by running only under the write side of the mmap_sem and avoiding > > the false-failing compare/exchange pattern. > > > > Also make the locking very easy to understand by only ever reading or > > writing mm->hmm while holding the write side of the mmap_sem. > > > > Signed-off-by: Jason Gunthorpe <jgg@mellanox.com> > > mm/hmm.c | 75 ++++++++++++++++++++------------------------------------ > > 1 file changed, 27 insertions(+), 48 deletions(-) > > > > diff --git a/mm/hmm.c b/mm/hmm.c > > index e27058e92508b9..ec54be54d81135 100644 > > +++ b/mm/hmm.c > > @@ -40,16 +40,6 @@ > > #if IS_ENABLED(CONFIG_HMM_MIRROR) > > static const struct mmu_notifier_ops hmm_mmu_notifier_ops; > > -static inline struct hmm *mm_get_hmm(struct mm_struct *mm) > > -{ > > - struct hmm *hmm = READ_ONCE(mm->hmm); > > - > > - if (hmm && kref_get_unless_zero(&hmm->kref)) > > - return hmm; > > - > > - return NULL; > > -} > > - > > /** > > * hmm_get_or_create - register HMM against an mm (HMM internal) > > * > > @@ -64,11 +54,20 @@ static inline struct hmm *mm_get_hmm(struct mm_struct *mm) > > */ > > static struct hmm *hmm_get_or_create(struct mm_struct *mm) > > { > > - struct hmm *hmm = mm_get_hmm(mm); > > - bool cleanup = false; > > + struct hmm *hmm; > > - if (hmm) > > - return hmm; > > + lockdep_assert_held_exclusive(mm->mmap_sem); > > + > > + if (mm->hmm) { > > + if (kref_get_unless_zero(&mm->hmm->kref)) > > + return mm->hmm; > > + /* > > + * The hmm is being freed by some other CPU and is pending a > > + * RCU grace period, but this CPU can NULL now it since we > > + * have the mmap_sem. > > + */ > > + mm->hmm = NULL; > > Shouldn't there be a "return NULL;" here so it doesn't fall through and > allocate a struct hmm below? No, this function should only return NULL on memory allocation failure. In this case another thread is busy freeing the hmm but wasn't able to update mm->hmm to null due to a locking constraint. So we make it null on behalf of the other thread and allocate a fresh new hmm that is valid. The freeing thread will complete the free and do nothing with mm->hmm. > > static void hmm_fee_rcu(struct rcu_head *rcu) > > I see Jerome already saw and named this hmm_free_rcu() > which I agree with. I do love my typos :) > > { > > + struct hmm *hmm = container_of(rcu, struct hmm, rcu); > > + > > + down_write(&hmm->mm->mmap_sem); > > + if (hmm->mm->hmm == hmm) > > + hmm->mm->hmm = NULL; > > + up_write(&hmm->mm->mmap_sem); > > + mmdrop(hmm->mm); > > + > > kfree(container_of(rcu, struct hmm, rcu)); > > } > > static void hmm_free(struct kref *kref) > > { > > struct hmm *hmm = container_of(kref, struct hmm, kref); > > - struct mm_struct *mm = hmm->mm; > > - > > - mmu_notifier_unregister_no_release(&hmm->mmu_notifier, mm); > > - spin_lock(&mm->page_table_lock); > > - if (mm->hmm == hmm) > > - mm->hmm = NULL; > > - spin_unlock(&mm->page_table_lock); > > - > > - mmdrop(hmm->mm); > > + mmu_notifier_unregister_no_release(&hmm->mmu_notifier, hmm->mm); > > mmu_notifier_call_srcu(&hmm->rcu, hmm_fee_rcu); > > } > > > > This email message is for the sole use of the intended recipient(s) and may contain > confidential information. Any unauthorized review, use, disclosure or distribution > is prohibited. If you are not the intended recipient, please contact the sender by > reply email and destroy all copies of the original message. Ah, you should not send this trailer to the public mailing lists. Thanks, Jason
On 5/23/19 6:23 PM, Jason Gunthorpe wrote: > On Thu, May 23, 2019 at 04:38:28PM -0700, Ralph Campbell wrote: >> >> On 5/23/19 8:34 AM, Jason Gunthorpe wrote: >>> From: Jason Gunthorpe <jgg@mellanox.com> >>> >>> As coded this function can false-fail in various racy situations. Make it >>> reliable by running only under the write side of the mmap_sem and avoiding >>> the false-failing compare/exchange pattern. >>> >>> Also make the locking very easy to understand by only ever reading or >>> writing mm->hmm while holding the write side of the mmap_sem. >>> >>> Signed-off-by: Jason Gunthorpe <jgg@mellanox.com> >>> mm/hmm.c | 75 ++++++++++++++++++++------------------------------------ >>> 1 file changed, 27 insertions(+), 48 deletions(-) >>> >>> diff --git a/mm/hmm.c b/mm/hmm.c >>> index e27058e92508b9..ec54be54d81135 100644 >>> +++ b/mm/hmm.c >>> @@ -40,16 +40,6 @@ >>> #if IS_ENABLED(CONFIG_HMM_MIRROR) >>> static const struct mmu_notifier_ops hmm_mmu_notifier_ops; >>> -static inline struct hmm *mm_get_hmm(struct mm_struct *mm) >>> -{ >>> - struct hmm *hmm = READ_ONCE(mm->hmm); >>> - >>> - if (hmm && kref_get_unless_zero(&hmm->kref)) >>> - return hmm; >>> - >>> - return NULL; >>> -} >>> - >>> /** >>> * hmm_get_or_create - register HMM against an mm (HMM internal) >>> * >>> @@ -64,11 +54,20 @@ static inline struct hmm *mm_get_hmm(struct mm_struct *mm) >>> */ >>> static struct hmm *hmm_get_or_create(struct mm_struct *mm) >>> { >>> - struct hmm *hmm = mm_get_hmm(mm); >>> - bool cleanup = false; >>> + struct hmm *hmm; >>> - if (hmm) >>> - return hmm; >>> + lockdep_assert_held_exclusive(mm->mmap_sem); >>> + >>> + if (mm->hmm) { >>> + if (kref_get_unless_zero(&mm->hmm->kref)) >>> + return mm->hmm; >>> + /* >>> + * The hmm is being freed by some other CPU and is pending a >>> + * RCU grace period, but this CPU can NULL now it since we >>> + * have the mmap_sem. >>> + */ >>> + mm->hmm = NULL; >> >> Shouldn't there be a "return NULL;" here so it doesn't fall through and >> allocate a struct hmm below? > > No, this function should only return NULL on memory allocation > failure. > > In this case another thread is busy freeing the hmm but wasn't able to > update mm->hmm to null due to a locking constraint. So we make it null > on behalf of the other thread and allocate a fresh new hmm that is > valid. The freeing thread will complete the free and do nothing with > mm->hmm. > >>> static void hmm_fee_rcu(struct rcu_head *rcu) >> >> I see Jerome already saw and named this hmm_free_rcu() >> which I agree with. > > I do love my typos :) > >>> { >>> + struct hmm *hmm = container_of(rcu, struct hmm, rcu); >>> + >>> + down_write(&hmm->mm->mmap_sem); >>> + if (hmm->mm->hmm == hmm) >>> + hmm->mm->hmm = NULL; >>> + up_write(&hmm->mm->mmap_sem); >>> + mmdrop(hmm->mm); >>> + >>> kfree(container_of(rcu, struct hmm, rcu)); >>> } >>> static void hmm_free(struct kref *kref) >>> { >>> struct hmm *hmm = container_of(kref, struct hmm, kref); >>> - struct mm_struct *mm = hmm->mm; >>> - >>> - mmu_notifier_unregister_no_release(&hmm->mmu_notifier, mm); >>> - spin_lock(&mm->page_table_lock); >>> - if (mm->hmm == hmm) >>> - mm->hmm = NULL; >>> - spin_unlock(&mm->page_table_lock); >>> - >>> - mmdrop(hmm->mm); >>> + mmu_notifier_unregister_no_release(&hmm->mmu_notifier, hmm->mm); >>> mmu_notifier_call_srcu(&hmm->rcu, hmm_fee_rcu); >>> } >>> >> >> This email message is for the sole use of the intended recipient(s) and may contain >> confidential information. Any unauthorized review, use, disclosure or distribution >> is prohibited. If you are not the intended recipient, please contact the sender by >> reply email and destroy all copies of the original message. > > Ah, you should not send this trailer to the public mailing lists. > > Thanks, > Jason > Sorry, I have to apply the "magic" header to suppress this each time I send email to a public list.
diff --git a/mm/hmm.c b/mm/hmm.c index e27058e92508b9..ec54be54d81135 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -40,16 +40,6 @@ #if IS_ENABLED(CONFIG_HMM_MIRROR) static const struct mmu_notifier_ops hmm_mmu_notifier_ops; -static inline struct hmm *mm_get_hmm(struct mm_struct *mm) -{ - struct hmm *hmm = READ_ONCE(mm->hmm); - - if (hmm && kref_get_unless_zero(&hmm->kref)) - return hmm; - - return NULL; -} - /** * hmm_get_or_create - register HMM against an mm (HMM internal) * @@ -64,11 +54,20 @@ static inline struct hmm *mm_get_hmm(struct mm_struct *mm) */ static struct hmm *hmm_get_or_create(struct mm_struct *mm) { - struct hmm *hmm = mm_get_hmm(mm); - bool cleanup = false; + struct hmm *hmm; - if (hmm) - return hmm; + lockdep_assert_held_exclusive(mm->mmap_sem); + + if (mm->hmm) { + if (kref_get_unless_zero(&mm->hmm->kref)) + return mm->hmm; + /* + * The hmm is being freed by some other CPU and is pending a + * RCU grace period, but this CPU can NULL now it since we + * have the mmap_sem. + */ + mm->hmm = NULL; + } hmm = kmalloc(sizeof(*hmm), GFP_KERNEL); if (!hmm) @@ -85,54 +84,34 @@ static struct hmm *hmm_get_or_create(struct mm_struct *mm) hmm->mm = mm; mmgrab(hmm->mm); - spin_lock(&mm->page_table_lock); - if (!mm->hmm) - mm->hmm = hmm; - else - cleanup = true; - spin_unlock(&mm->page_table_lock); - - if (cleanup) - goto error; - - /* - * We should only get here if hold the mmap_sem in write mode ie on - * registration of first mirror through hmm_mirror_register() - */ hmm->mmu_notifier.ops = &hmm_mmu_notifier_ops; - if (__mmu_notifier_register(&hmm->mmu_notifier, mm)) - goto error_mm; + if (__mmu_notifier_register(&hmm->mmu_notifier, mm)) { + kfree(hmm); + return NULL; + } + mm->hmm = hmm; return hmm; - -error_mm: - spin_lock(&mm->page_table_lock); - if (mm->hmm == hmm) - mm->hmm = NULL; - spin_unlock(&mm->page_table_lock); -error: - kfree(hmm); - return NULL; } static void hmm_fee_rcu(struct rcu_head *rcu) { + struct hmm *hmm = container_of(rcu, struct hmm, rcu); + + down_write(&hmm->mm->mmap_sem); + if (hmm->mm->hmm == hmm) + hmm->mm->hmm = NULL; + up_write(&hmm->mm->mmap_sem); + mmdrop(hmm->mm); + kfree(container_of(rcu, struct hmm, rcu)); } static void hmm_free(struct kref *kref) { struct hmm *hmm = container_of(kref, struct hmm, kref); - struct mm_struct *mm = hmm->mm; - - mmu_notifier_unregister_no_release(&hmm->mmu_notifier, mm); - spin_lock(&mm->page_table_lock); - if (mm->hmm == hmm) - mm->hmm = NULL; - spin_unlock(&mm->page_table_lock); - - mmdrop(hmm->mm); + mmu_notifier_unregister_no_release(&hmm->mmu_notifier, hmm->mm); mmu_notifier_call_srcu(&hmm->rcu, hmm_fee_rcu); }