Message ID | 20190325144011.10560-3-jglisse@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Improve HMM driver API v2 | expand |
On Mon, Mar 25, 2019 at 10:40:02AM -0400, Jerome Glisse wrote: > From: Jérôme Glisse <jglisse@redhat.com> > > Every time i read the code to check that the HMM structure does not > vanish before it should thanks to the many lock protecting its removal > i get a headache. Switch to reference counting instead it is much > easier to follow and harder to break. This also remove some code that > is no longer needed with refcounting. > > Changes since v1: > - removed bunch of useless check (if API is use with bogus argument > better to fail loudly so user fix their code) > - s/hmm_get/mm_get_hmm/ > > Signed-off-by: Jérôme Glisse <jglisse@redhat.com> > Reviewed-by: Ralph Campbell <rcampbell@nvidia.com> > Cc: John Hubbard <jhubbard@nvidia.com> > Cc: Andrew Morton <akpm@linux-foundation.org> > Cc: Dan Williams <dan.j.williams@intel.com> > --- > include/linux/hmm.h | 2 + > mm/hmm.c | 170 ++++++++++++++++++++++++++++---------------- > 2 files changed, 112 insertions(+), 60 deletions(-) > > diff --git a/include/linux/hmm.h b/include/linux/hmm.h > index ad50b7b4f141..716fc61fa6d4 100644 > --- a/include/linux/hmm.h > +++ b/include/linux/hmm.h > @@ -131,6 +131,7 @@ enum hmm_pfn_value_e { > /* > * struct hmm_range - track invalidation lock on virtual address range > * > + * @hmm: the core HMM structure this range is active against > * @vma: the vm area struct for the range > * @list: all range lock are on a list > * @start: range virtual start address (inclusive) > @@ -142,6 +143,7 @@ enum hmm_pfn_value_e { > * @valid: pfns array did not change since it has been fill by an HMM function > */ > struct hmm_range { > + struct hmm *hmm; > struct vm_area_struct *vma; > struct list_head list; > unsigned long start; > diff --git a/mm/hmm.c b/mm/hmm.c > index fe1cd87e49ac..306e57f7cded 100644 > --- a/mm/hmm.c > +++ b/mm/hmm.c > @@ -50,6 +50,7 @@ static const struct mmu_notifier_ops hmm_mmu_notifier_ops; > */ > struct hmm { > struct mm_struct *mm; > + struct kref kref; > spinlock_t lock; > struct list_head ranges; > struct list_head mirrors; > @@ -57,6 +58,16 @@ struct hmm { > struct rw_semaphore mirrors_sem; > }; > > +static inline struct hmm *mm_get_hmm(struct mm_struct *mm) > +{ > + struct hmm *hmm = READ_ONCE(mm->hmm); > + > + if (hmm && kref_get_unless_zero(&hmm->kref)) > + return hmm; > + > + return NULL; > +} > + > /* > * hmm_register - register HMM against an mm (HMM internal) > * > @@ -67,14 +78,9 @@ struct hmm { > */ > static struct hmm *hmm_register(struct mm_struct *mm) > { > - struct hmm *hmm = READ_ONCE(mm->hmm); > + struct hmm *hmm = mm_get_hmm(mm); FWIW: having hmm_register == "hmm get" is a bit confusing... Ira > bool cleanup = false; > > - /* > - * The hmm struct can only be freed once the mm_struct goes away, > - * hence we should always have pre-allocated an new hmm struct > - * above. > - */ > if (hmm) > return hmm; > > @@ -86,6 +92,7 @@ static struct hmm *hmm_register(struct mm_struct *mm) > hmm->mmu_notifier.ops = NULL; > INIT_LIST_HEAD(&hmm->ranges); > spin_lock_init(&hmm->lock); > + kref_init(&hmm->kref); > hmm->mm = mm; > > spin_lock(&mm->page_table_lock); > @@ -106,7 +113,7 @@ static struct hmm *hmm_register(struct mm_struct *mm) > if (__mmu_notifier_register(&hmm->mmu_notifier, mm)) > goto error_mm; > > - return mm->hmm; > + return hmm; > > error_mm: > spin_lock(&mm->page_table_lock); > @@ -118,9 +125,41 @@ static struct hmm *hmm_register(struct mm_struct *mm) > return NULL; > } > > +static void hmm_free(struct kref *kref) > +{ > + struct hmm *hmm = container_of(kref, struct hmm, kref); > + struct mm_struct *mm = hmm->mm; > + > + mmu_notifier_unregister_no_release(&hmm->mmu_notifier, mm); > + > + spin_lock(&mm->page_table_lock); > + if (mm->hmm == hmm) > + mm->hmm = NULL; > + spin_unlock(&mm->page_table_lock); > + > + kfree(hmm); > +} > + > +static inline void hmm_put(struct hmm *hmm) > +{ > + kref_put(&hmm->kref, hmm_free); > +} > + > void hmm_mm_destroy(struct mm_struct *mm) > { > - kfree(mm->hmm); > + struct hmm *hmm; > + > + spin_lock(&mm->page_table_lock); > + hmm = mm_get_hmm(mm); > + mm->hmm = NULL; > + if (hmm) { > + hmm->mm = NULL; > + spin_unlock(&mm->page_table_lock); > + hmm_put(hmm); > + return; > + } > + > + spin_unlock(&mm->page_table_lock); > } > > static int hmm_invalidate_range(struct hmm *hmm, bool device, > @@ -165,7 +204,7 @@ static int hmm_invalidate_range(struct hmm *hmm, bool device, > static void hmm_release(struct mmu_notifier *mn, struct mm_struct *mm) > { > struct hmm_mirror *mirror; > - struct hmm *hmm = mm->hmm; > + struct hmm *hmm = mm_get_hmm(mm); > > down_write(&hmm->mirrors_sem); > mirror = list_first_entry_or_null(&hmm->mirrors, struct hmm_mirror, > @@ -186,13 +225,16 @@ static void hmm_release(struct mmu_notifier *mn, struct mm_struct *mm) > struct hmm_mirror, list); > } > up_write(&hmm->mirrors_sem); > + > + hmm_put(hmm); > } > > static int hmm_invalidate_range_start(struct mmu_notifier *mn, > const struct mmu_notifier_range *range) > { > + struct hmm *hmm = mm_get_hmm(range->mm); > struct hmm_update update; > - struct hmm *hmm = range->mm->hmm; > + int ret; > > VM_BUG_ON(!hmm); > > @@ -200,14 +242,16 @@ static int hmm_invalidate_range_start(struct mmu_notifier *mn, > update.end = range->end; > update.event = HMM_UPDATE_INVALIDATE; > update.blockable = range->blockable; > - return hmm_invalidate_range(hmm, true, &update); > + ret = hmm_invalidate_range(hmm, true, &update); > + hmm_put(hmm); > + return ret; > } > > static void hmm_invalidate_range_end(struct mmu_notifier *mn, > const struct mmu_notifier_range *range) > { > + struct hmm *hmm = mm_get_hmm(range->mm); > struct hmm_update update; > - struct hmm *hmm = range->mm->hmm; > > VM_BUG_ON(!hmm); > > @@ -216,6 +260,7 @@ static void hmm_invalidate_range_end(struct mmu_notifier *mn, > update.event = HMM_UPDATE_INVALIDATE; > update.blockable = true; > hmm_invalidate_range(hmm, false, &update); > + hmm_put(hmm); > } > > static const struct mmu_notifier_ops hmm_mmu_notifier_ops = { > @@ -241,24 +286,13 @@ int hmm_mirror_register(struct hmm_mirror *mirror, struct mm_struct *mm) > if (!mm || !mirror || !mirror->ops) > return -EINVAL; > > -again: > mirror->hmm = hmm_register(mm); > if (!mirror->hmm) > return -ENOMEM; > > down_write(&mirror->hmm->mirrors_sem); > - if (mirror->hmm->mm == NULL) { > - /* > - * A racing hmm_mirror_unregister() is about to destroy the hmm > - * struct. Try again to allocate a new one. > - */ > - up_write(&mirror->hmm->mirrors_sem); > - mirror->hmm = NULL; > - goto again; > - } else { > - list_add(&mirror->list, &mirror->hmm->mirrors); > - up_write(&mirror->hmm->mirrors_sem); > - } > + list_add(&mirror->list, &mirror->hmm->mirrors); > + up_write(&mirror->hmm->mirrors_sem); > > return 0; > } > @@ -273,33 +307,18 @@ EXPORT_SYMBOL(hmm_mirror_register); > */ > void hmm_mirror_unregister(struct hmm_mirror *mirror) > { > - bool should_unregister = false; > - struct mm_struct *mm; > - struct hmm *hmm; > + struct hmm *hmm = READ_ONCE(mirror->hmm); > > - if (mirror->hmm == NULL) > + if (hmm == NULL) > return; > > - hmm = mirror->hmm; > down_write(&hmm->mirrors_sem); > list_del_init(&mirror->list); > - should_unregister = list_empty(&hmm->mirrors); > + /* To protect us against double unregister ... */ > mirror->hmm = NULL; > - mm = hmm->mm; > - hmm->mm = NULL; > up_write(&hmm->mirrors_sem); > > - if (!should_unregister || mm == NULL) > - return; > - > - mmu_notifier_unregister_no_release(&hmm->mmu_notifier, mm); > - > - spin_lock(&mm->page_table_lock); > - if (mm->hmm == hmm) > - mm->hmm = NULL; > - spin_unlock(&mm->page_table_lock); > - > - kfree(hmm); > + hmm_put(hmm); > } > EXPORT_SYMBOL(hmm_mirror_unregister); > > @@ -708,6 +727,8 @@ int hmm_vma_get_pfns(struct hmm_range *range) > struct mm_walk mm_walk; > struct hmm *hmm; > > + range->hmm = NULL; > + > /* Sanity check, this really should not happen ! */ > if (range->start < vma->vm_start || range->start >= vma->vm_end) > return -EINVAL; > @@ -717,14 +738,18 @@ int hmm_vma_get_pfns(struct hmm_range *range) > hmm = hmm_register(vma->vm_mm); > if (!hmm) > return -ENOMEM; > - /* Caller must have registered a mirror, via hmm_mirror_register() ! */ > - if (!hmm->mmu_notifier.ops) > + > + /* Check if hmm_mm_destroy() was call. */ > + if (hmm->mm == NULL) { > + hmm_put(hmm); > return -EINVAL; > + } > > /* FIXME support hugetlb fs */ > if (is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL) || > vma_is_dax(vma)) { > hmm_pfns_special(range); > + hmm_put(hmm); > return -EINVAL; > } > > @@ -736,6 +761,7 @@ int hmm_vma_get_pfns(struct hmm_range *range) > * operations such has atomic access would not work. > */ > hmm_pfns_clear(range, range->pfns, range->start, range->end); > + hmm_put(hmm); > return -EPERM; > } > > @@ -758,6 +784,12 @@ int hmm_vma_get_pfns(struct hmm_range *range) > mm_walk.pte_hole = hmm_vma_walk_hole; > > walk_page_range(range->start, range->end, &mm_walk); > + /* > + * Transfer hmm reference to the range struct it will be drop inside > + * the hmm_vma_range_done() function (which _must_ be call if this > + * function return 0). > + */ > + range->hmm = hmm; > return 0; > } > EXPORT_SYMBOL(hmm_vma_get_pfns); > @@ -802,25 +834,27 @@ EXPORT_SYMBOL(hmm_vma_get_pfns); > */ > bool hmm_vma_range_done(struct hmm_range *range) > { > - unsigned long npages = (range->end - range->start) >> PAGE_SHIFT; > - struct hmm *hmm; > + bool ret = false; > > - if (range->end <= range->start) { > + /* Sanity check this really should not happen. */ > + if (range->hmm == NULL || range->end <= range->start) { > BUG(); > return false; > } > > - hmm = hmm_register(range->vma->vm_mm); > - if (!hmm) { > - memset(range->pfns, 0, sizeof(*range->pfns) * npages); > - return false; > - } > - > - spin_lock(&hmm->lock); > + spin_lock(&range->hmm->lock); > list_del_rcu(&range->list); > - spin_unlock(&hmm->lock); > + ret = range->valid; > + spin_unlock(&range->hmm->lock); > > - return range->valid; > + /* Is the mm still alive ? */ > + if (range->hmm->mm == NULL) > + ret = false; > + > + /* Drop reference taken by hmm_vma_fault() or hmm_vma_get_pfns() */ > + hmm_put(range->hmm); > + range->hmm = NULL; > + return ret; > } > EXPORT_SYMBOL(hmm_vma_range_done); > > @@ -880,6 +914,8 @@ int hmm_vma_fault(struct hmm_range *range, bool block) > struct hmm *hmm; > int ret; > > + range->hmm = NULL; > + > /* Sanity check, this really should not happen ! */ > if (range->start < vma->vm_start || range->start >= vma->vm_end) > return -EINVAL; > @@ -891,14 +927,18 @@ int hmm_vma_fault(struct hmm_range *range, bool block) > hmm_pfns_clear(range, range->pfns, range->start, range->end); > return -ENOMEM; > } > - /* Caller must have registered a mirror using hmm_mirror_register() */ > - if (!hmm->mmu_notifier.ops) > + > + /* Check if hmm_mm_destroy() was call. */ > + if (hmm->mm == NULL) { > + hmm_put(hmm); > return -EINVAL; > + } > > /* FIXME support hugetlb fs */ > if (is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL) || > vma_is_dax(vma)) { > hmm_pfns_special(range); > + hmm_put(hmm); > return -EINVAL; > } > > @@ -910,6 +950,7 @@ int hmm_vma_fault(struct hmm_range *range, bool block) > * operations such has atomic access would not work. > */ > hmm_pfns_clear(range, range->pfns, range->start, range->end); > + hmm_put(hmm); > return -EPERM; > } > > @@ -945,7 +986,16 @@ int hmm_vma_fault(struct hmm_range *range, bool block) > hmm_pfns_clear(range, &range->pfns[i], hmm_vma_walk.last, > range->end); > hmm_vma_range_done(range); > + hmm_put(hmm); > + } else { > + /* > + * Transfer hmm reference to the range struct it will be drop > + * inside the hmm_vma_range_done() function (which _must_ be > + * call if this function return 0). > + */ > + range->hmm = hmm; > } > + > return ret; > } > EXPORT_SYMBOL(hmm_vma_fault); > -- > 2.17.2 >
On Thu, Mar 28, 2019 at 05:39:26PM -0700, John Hubbard wrote: > On 3/28/19 2:21 PM, Jerome Glisse wrote: > > On Thu, Mar 28, 2019 at 01:43:13PM -0700, John Hubbard wrote: > >> On 3/28/19 12:11 PM, Jerome Glisse wrote: > >>> On Thu, Mar 28, 2019 at 04:07:20AM -0700, Ira Weiny wrote: > >>>> On Mon, Mar 25, 2019 at 10:40:02AM -0400, Jerome Glisse wrote: > >>>>> From: Jérôme Glisse <jglisse@redhat.com> > [...] > >>>>> @@ -67,14 +78,9 @@ struct hmm { > >>>>> */ > >>>>> static struct hmm *hmm_register(struct mm_struct *mm) > >>>>> { > >>>>> - struct hmm *hmm = READ_ONCE(mm->hmm); > >>>>> + struct hmm *hmm = mm_get_hmm(mm); > >>>> > >>>> FWIW: having hmm_register == "hmm get" is a bit confusing... > >>> > >>> The thing is that you want only one hmm struct per process and thus > >>> if there is already one and it is not being destroy then you want to > >>> reuse it. > >>> > >>> Also this is all internal to HMM code and so it should not confuse > >>> anyone. > >>> > >> > >> Well, it has repeatedly come up, and I'd claim that it is quite > >> counter-intuitive. So if there is an easy way to make this internal > >> HMM code clearer or better named, I would really love that to happen. > >> > >> And we shouldn't ever dismiss feedback based on "this is just internal > >> xxx subsystem code, no need for it to be as clear as other parts of the > >> kernel", right? > > > > Yes but i have not seen any better alternative that present code. If > > there is please submit patch. > > > > Ira, do you have any patch you're working on, or a more detailed suggestion there? > If not, then I might (later, as it's not urgent) propose a small cleanup patch > I had in mind for the hmm_register code. But I don't want to duplicate effort > if you're already thinking about it. No I don't have anything. I was just really digging into these this time around and I was about to comment on the lack of "get's" for some "puts" when I realized that "hmm_register" _was_ the get... :-( Ira > > > thanks, > -- > John Hubbard > NVIDIA > >
On Thu, Mar 28, 2019 at 09:50:03PM -0400, Jerome Glisse wrote: > On Thu, Mar 28, 2019 at 06:18:35PM -0700, John Hubbard wrote: > > On 3/28/19 6:00 PM, Jerome Glisse wrote: > > > On Thu, Mar 28, 2019 at 09:57:09AM -0700, Ira Weiny wrote: > > >> On Thu, Mar 28, 2019 at 05:39:26PM -0700, John Hubbard wrote: > > >>> On 3/28/19 2:21 PM, Jerome Glisse wrote: > > >>>> On Thu, Mar 28, 2019 at 01:43:13PM -0700, John Hubbard wrote: > > >>>>> On 3/28/19 12:11 PM, Jerome Glisse wrote: > > >>>>>> On Thu, Mar 28, 2019 at 04:07:20AM -0700, Ira Weiny wrote: > > >>>>>>> On Mon, Mar 25, 2019 at 10:40:02AM -0400, Jerome Glisse wrote: > > >>>>>>>> From: Jérôme Glisse <jglisse@redhat.com> > > >>> [...] > > >>>>>>>> @@ -67,14 +78,9 @@ struct hmm { > > >>>>>>>> */ > > >>>>>>>> static struct hmm *hmm_register(struct mm_struct *mm) > > >>>>>>>> { > > >>>>>>>> - struct hmm *hmm = READ_ONCE(mm->hmm); > > >>>>>>>> + struct hmm *hmm = mm_get_hmm(mm); > > >>>>>>> > > >>>>>>> FWIW: having hmm_register == "hmm get" is a bit confusing... > > >>>>>> > > >>>>>> The thing is that you want only one hmm struct per process and thus > > >>>>>> if there is already one and it is not being destroy then you want to > > >>>>>> reuse it. > > >>>>>> > > >>>>>> Also this is all internal to HMM code and so it should not confuse > > >>>>>> anyone. > > >>>>>> > > >>>>> > > >>>>> Well, it has repeatedly come up, and I'd claim that it is quite > > >>>>> counter-intuitive. So if there is an easy way to make this internal > > >>>>> HMM code clearer or better named, I would really love that to happen. > > >>>>> > > >>>>> And we shouldn't ever dismiss feedback based on "this is just internal > > >>>>> xxx subsystem code, no need for it to be as clear as other parts of the > > >>>>> kernel", right? > > >>>> > > >>>> Yes but i have not seen any better alternative that present code. If > > >>>> there is please submit patch. > > >>>> > > >>> > > >>> Ira, do you have any patch you're working on, or a more detailed suggestion there? > > >>> If not, then I might (later, as it's not urgent) propose a small cleanup patch > > >>> I had in mind for the hmm_register code. But I don't want to duplicate effort > > >>> if you're already thinking about it. > > >> > > >> No I don't have anything. > > >> > > >> I was just really digging into these this time around and I was about to > > >> comment on the lack of "get's" for some "puts" when I realized that > > >> "hmm_register" _was_ the get... > > >> > > >> :-( > > >> > > > > > > The get is mm_get_hmm() were you get a reference on HMM from a mm struct. > > > John in previous posting complained about me naming that function hmm_get() > > > and thus in this version i renamed it to mm_get_hmm() as we are getting > > > a reference on hmm from a mm struct. > > > > Well, that's not what I recommended, though. The actual conversation went like > > this [1]: > > > > --------------------------------------------------------------- > > >> So for this, hmm_get() really ought to be symmetric with > > >> hmm_put(), by taking a struct hmm*. And the null check is > > >> not helping here, so let's just go with this smaller version: > > >> > > >> static inline struct hmm *hmm_get(struct hmm *hmm) > > >> { > > >> if (kref_get_unless_zero(&hmm->kref)) > > >> return hmm; > > >> > > >> return NULL; > > >> } > > >> > > >> ...and change the few callers accordingly. > > >> > > > > > > What about renaning hmm_get() to mm_get_hmm() instead ? > > > > > > > For a get/put pair of functions, it would be ideal to pass > > the same argument type to each. It looks like we are passing > > around hmm*, and hmm retains a reference count on hmm->mm, > > so I think you have a choice of using either mm* or hmm* as > > the argument. I'm not sure that one is better than the other > > here, as the lifetimes appear to be linked pretty tightly. > > > > Whichever one is used, I think it would be best to use it > > in both the _get() and _put() calls. > > --------------------------------------------------------------- > > > > Your response was to change the name to mm_get_hmm(), but that's not > > what I recommended. > > Because i can not do that, hmm_put() can _only_ take hmm struct as > input while hmm_get() can _only_ get mm struct as input. > > hmm_put() can only take hmm because the hmm we are un-referencing > might no longer be associated with any mm struct and thus i do not > have a mm struct to use. > > hmm_get() can only get mm as input as we need to be careful when > accessing the hmm field within the mm struct and thus it is better > to have that code within a function than open coded and duplicated > all over the place. The input value is not the problem. The problem is in the naming. obj = get_obj( various parameters ); put_obj(obj); The problem is that the function is named hmm_register() either "gets" a reference to _or_ creates and gets a reference to the hmm object. What John is probably ready to submit is something like. struct hmm *get_create_hmm(struct mm *mm); void put_hmm(struct hmm *hmm); So when you are reading the code you see... foo(...) { struct hmm *hmm = get_create_hmm(mm); if (!hmm) error... do stuff... put_hmm(hmm); } Here I can see a very clear get/put pair. The name also shows that the hmm is created if need be as well as getting a reference. Ira > > > > > > > > > The hmm_put() is just releasing the reference on the hmm struct. > > > > > > Here i feel i am getting contradicting requirement from different people. > > > I don't think there is a way to please everyone here. > > > > > > > That's not a true conflict: you're comparing your actual implementation > > to Ira's request, rather than comparing my request to Ira's request. > > > > I think there's a way forward. Ira and I are actually both asking for the > > same thing: > > > > a) clear, concise get/put routines > > > > b) avoiding odd side effects in functions that have one name, but do > > additional surprising things. > > Please show me code because i do not see any other way to do it then > how i did. > > Cheers, > Jérôme
On Thu, Mar 28, 2019 at 04:07:20AM -0700, Ira Weiny wrote: > On Mon, Mar 25, 2019 at 10:40:02AM -0400, Jerome Glisse wrote: > > From: Jérôme Glisse <jglisse@redhat.com> > > > > Every time i read the code to check that the HMM structure does not > > vanish before it should thanks to the many lock protecting its removal > > i get a headache. Switch to reference counting instead it is much > > easier to follow and harder to break. This also remove some code that > > is no longer needed with refcounting. > > > > Changes since v1: > > - removed bunch of useless check (if API is use with bogus argument > > better to fail loudly so user fix their code) > > - s/hmm_get/mm_get_hmm/ > > > > Signed-off-by: Jérôme Glisse <jglisse@redhat.com> > > Reviewed-by: Ralph Campbell <rcampbell@nvidia.com> > > Cc: John Hubbard <jhubbard@nvidia.com> > > Cc: Andrew Morton <akpm@linux-foundation.org> > > Cc: Dan Williams <dan.j.williams@intel.com> > > --- > > include/linux/hmm.h | 2 + > > mm/hmm.c | 170 ++++++++++++++++++++++++++++---------------- > > 2 files changed, 112 insertions(+), 60 deletions(-) > > > > diff --git a/include/linux/hmm.h b/include/linux/hmm.h > > index ad50b7b4f141..716fc61fa6d4 100644 > > --- a/include/linux/hmm.h > > +++ b/include/linux/hmm.h > > @@ -131,6 +131,7 @@ enum hmm_pfn_value_e { > > /* > > * struct hmm_range - track invalidation lock on virtual address range > > * > > + * @hmm: the core HMM structure this range is active against > > * @vma: the vm area struct for the range > > * @list: all range lock are on a list > > * @start: range virtual start address (inclusive) > > @@ -142,6 +143,7 @@ enum hmm_pfn_value_e { > > * @valid: pfns array did not change since it has been fill by an HMM function > > */ > > struct hmm_range { > > + struct hmm *hmm; > > struct vm_area_struct *vma; > > struct list_head list; > > unsigned long start; > > diff --git a/mm/hmm.c b/mm/hmm.c > > index fe1cd87e49ac..306e57f7cded 100644 > > --- a/mm/hmm.c > > +++ b/mm/hmm.c > > @@ -50,6 +50,7 @@ static const struct mmu_notifier_ops hmm_mmu_notifier_ops; > > */ > > struct hmm { > > struct mm_struct *mm; > > + struct kref kref; > > spinlock_t lock; > > struct list_head ranges; > > struct list_head mirrors; > > @@ -57,6 +58,16 @@ struct hmm { > > struct rw_semaphore mirrors_sem; > > }; > > > > +static inline struct hmm *mm_get_hmm(struct mm_struct *mm) > > +{ > > + struct hmm *hmm = READ_ONCE(mm->hmm); > > + > > + if (hmm && kref_get_unless_zero(&hmm->kref)) > > + return hmm; > > + > > + return NULL; > > +} > > + > > /* > > * hmm_register - register HMM against an mm (HMM internal) > > * > > @@ -67,14 +78,9 @@ struct hmm { > > */ > > static struct hmm *hmm_register(struct mm_struct *mm) > > { > > - struct hmm *hmm = READ_ONCE(mm->hmm); > > + struct hmm *hmm = mm_get_hmm(mm); > > FWIW: having hmm_register == "hmm get" is a bit confusing... The thing is that you want only one hmm struct per process and thus if there is already one and it is not being destroy then you want to reuse it. Also this is all internal to HMM code and so it should not confuse anyone. Cheers, Jérôme
On 3/28/19 12:11 PM, Jerome Glisse wrote: > On Thu, Mar 28, 2019 at 04:07:20AM -0700, Ira Weiny wrote: >> On Mon, Mar 25, 2019 at 10:40:02AM -0400, Jerome Glisse wrote: >>> From: Jérôme Glisse <jglisse@redhat.com> >>> >>> Every time i read the code to check that the HMM structure does not >>> vanish before it should thanks to the many lock protecting its removal >>> i get a headache. Switch to reference counting instead it is much >>> easier to follow and harder to break. This also remove some code that >>> is no longer needed with refcounting. >>> >>> Changes since v1: >>> - removed bunch of useless check (if API is use with bogus argument >>> better to fail loudly so user fix their code) >>> - s/hmm_get/mm_get_hmm/ >>> >>> Signed-off-by: Jérôme Glisse <jglisse@redhat.com> >>> Reviewed-by: Ralph Campbell <rcampbell@nvidia.com> >>> Cc: John Hubbard <jhubbard@nvidia.com> >>> Cc: Andrew Morton <akpm@linux-foundation.org> >>> Cc: Dan Williams <dan.j.williams@intel.com> >>> --- >>> include/linux/hmm.h | 2 + >>> mm/hmm.c | 170 ++++++++++++++++++++++++++++---------------- >>> 2 files changed, 112 insertions(+), 60 deletions(-) >>> >>> diff --git a/include/linux/hmm.h b/include/linux/hmm.h >>> index ad50b7b4f141..716fc61fa6d4 100644 >>> --- a/include/linux/hmm.h >>> +++ b/include/linux/hmm.h >>> @@ -131,6 +131,7 @@ enum hmm_pfn_value_e { >>> /* >>> * struct hmm_range - track invalidation lock on virtual address range >>> * >>> + * @hmm: the core HMM structure this range is active against >>> * @vma: the vm area struct for the range >>> * @list: all range lock are on a list >>> * @start: range virtual start address (inclusive) >>> @@ -142,6 +143,7 @@ enum hmm_pfn_value_e { >>> * @valid: pfns array did not change since it has been fill by an HMM function >>> */ >>> struct hmm_range { >>> + struct hmm *hmm; >>> struct vm_area_struct *vma; >>> struct list_head list; >>> unsigned long start; >>> diff --git a/mm/hmm.c b/mm/hmm.c >>> index fe1cd87e49ac..306e57f7cded 100644 >>> --- a/mm/hmm.c >>> +++ b/mm/hmm.c >>> @@ -50,6 +50,7 @@ static const struct mmu_notifier_ops hmm_mmu_notifier_ops; >>> */ >>> struct hmm { >>> struct mm_struct *mm; >>> + struct kref kref; >>> spinlock_t lock; >>> struct list_head ranges; >>> struct list_head mirrors; >>> @@ -57,6 +58,16 @@ struct hmm { >>> struct rw_semaphore mirrors_sem; >>> }; >>> >>> +static inline struct hmm *mm_get_hmm(struct mm_struct *mm) >>> +{ >>> + struct hmm *hmm = READ_ONCE(mm->hmm); >>> + >>> + if (hmm && kref_get_unless_zero(&hmm->kref)) >>> + return hmm; >>> + >>> + return NULL; >>> +} >>> + >>> /* >>> * hmm_register - register HMM against an mm (HMM internal) >>> * >>> @@ -67,14 +78,9 @@ struct hmm { >>> */ >>> static struct hmm *hmm_register(struct mm_struct *mm) >>> { >>> - struct hmm *hmm = READ_ONCE(mm->hmm); >>> + struct hmm *hmm = mm_get_hmm(mm); >> >> FWIW: having hmm_register == "hmm get" is a bit confusing... > > The thing is that you want only one hmm struct per process and thus > if there is already one and it is not being destroy then you want to > reuse it. > > Also this is all internal to HMM code and so it should not confuse > anyone. > Well, it has repeatedly come up, and I'd claim that it is quite counter-intuitive. So if there is an easy way to make this internal HMM code clearer or better named, I would really love that to happen. And we shouldn't ever dismiss feedback based on "this is just internal xxx subsystem code, no need for it to be as clear as other parts of the kernel", right? thanks,
On Thu, Mar 28, 2019 at 01:43:13PM -0700, John Hubbard wrote: > On 3/28/19 12:11 PM, Jerome Glisse wrote: > > On Thu, Mar 28, 2019 at 04:07:20AM -0700, Ira Weiny wrote: > >> On Mon, Mar 25, 2019 at 10:40:02AM -0400, Jerome Glisse wrote: > >>> From: Jérôme Glisse <jglisse@redhat.com> > >>> > >>> Every time i read the code to check that the HMM structure does not > >>> vanish before it should thanks to the many lock protecting its removal > >>> i get a headache. Switch to reference counting instead it is much > >>> easier to follow and harder to break. This also remove some code that > >>> is no longer needed with refcounting. > >>> > >>> Changes since v1: > >>> - removed bunch of useless check (if API is use with bogus argument > >>> better to fail loudly so user fix their code) > >>> - s/hmm_get/mm_get_hmm/ > >>> > >>> Signed-off-by: Jérôme Glisse <jglisse@redhat.com> > >>> Reviewed-by: Ralph Campbell <rcampbell@nvidia.com> > >>> Cc: John Hubbard <jhubbard@nvidia.com> > >>> Cc: Andrew Morton <akpm@linux-foundation.org> > >>> Cc: Dan Williams <dan.j.williams@intel.com> > >>> --- > >>> include/linux/hmm.h | 2 + > >>> mm/hmm.c | 170 ++++++++++++++++++++++++++++---------------- > >>> 2 files changed, 112 insertions(+), 60 deletions(-) > >>> > >>> diff --git a/include/linux/hmm.h b/include/linux/hmm.h > >>> index ad50b7b4f141..716fc61fa6d4 100644 > >>> --- a/include/linux/hmm.h > >>> +++ b/include/linux/hmm.h > >>> @@ -131,6 +131,7 @@ enum hmm_pfn_value_e { > >>> /* > >>> * struct hmm_range - track invalidation lock on virtual address range > >>> * > >>> + * @hmm: the core HMM structure this range is active against > >>> * @vma: the vm area struct for the range > >>> * @list: all range lock are on a list > >>> * @start: range virtual start address (inclusive) > >>> @@ -142,6 +143,7 @@ enum hmm_pfn_value_e { > >>> * @valid: pfns array did not change since it has been fill by an HMM function > >>> */ > >>> struct hmm_range { > >>> + struct hmm *hmm; > >>> struct vm_area_struct *vma; > >>> struct list_head list; > >>> unsigned long start; > >>> diff --git a/mm/hmm.c b/mm/hmm.c > >>> index fe1cd87e49ac..306e57f7cded 100644 > >>> --- a/mm/hmm.c > >>> +++ b/mm/hmm.c > >>> @@ -50,6 +50,7 @@ static const struct mmu_notifier_ops hmm_mmu_notifier_ops; > >>> */ > >>> struct hmm { > >>> struct mm_struct *mm; > >>> + struct kref kref; > >>> spinlock_t lock; > >>> struct list_head ranges; > >>> struct list_head mirrors; > >>> @@ -57,6 +58,16 @@ struct hmm { > >>> struct rw_semaphore mirrors_sem; > >>> }; > >>> > >>> +static inline struct hmm *mm_get_hmm(struct mm_struct *mm) > >>> +{ > >>> + struct hmm *hmm = READ_ONCE(mm->hmm); > >>> + > >>> + if (hmm && kref_get_unless_zero(&hmm->kref)) > >>> + return hmm; > >>> + > >>> + return NULL; > >>> +} > >>> + > >>> /* > >>> * hmm_register - register HMM against an mm (HMM internal) > >>> * > >>> @@ -67,14 +78,9 @@ struct hmm { > >>> */ > >>> static struct hmm *hmm_register(struct mm_struct *mm) > >>> { > >>> - struct hmm *hmm = READ_ONCE(mm->hmm); > >>> + struct hmm *hmm = mm_get_hmm(mm); > >> > >> FWIW: having hmm_register == "hmm get" is a bit confusing... > > > > The thing is that you want only one hmm struct per process and thus > > if there is already one and it is not being destroy then you want to > > reuse it. > > > > Also this is all internal to HMM code and so it should not confuse > > anyone. > > > > Well, it has repeatedly come up, and I'd claim that it is quite > counter-intuitive. So if there is an easy way to make this internal > HMM code clearer or better named, I would really love that to happen. > > And we shouldn't ever dismiss feedback based on "this is just internal > xxx subsystem code, no need for it to be as clear as other parts of the > kernel", right? Yes but i have not seen any better alternative that present code. If there is please submit patch. Cheers, Jérôme
On 3/28/19 2:21 PM, Jerome Glisse wrote: > On Thu, Mar 28, 2019 at 01:43:13PM -0700, John Hubbard wrote: >> On 3/28/19 12:11 PM, Jerome Glisse wrote: >>> On Thu, Mar 28, 2019 at 04:07:20AM -0700, Ira Weiny wrote: >>>> On Mon, Mar 25, 2019 at 10:40:02AM -0400, Jerome Glisse wrote: >>>>> From: Jérôme Glisse <jglisse@redhat.com> [...] >>>>> @@ -67,14 +78,9 @@ struct hmm { >>>>> */ >>>>> static struct hmm *hmm_register(struct mm_struct *mm) >>>>> { >>>>> - struct hmm *hmm = READ_ONCE(mm->hmm); >>>>> + struct hmm *hmm = mm_get_hmm(mm); >>>> >>>> FWIW: having hmm_register == "hmm get" is a bit confusing... >>> >>> The thing is that you want only one hmm struct per process and thus >>> if there is already one and it is not being destroy then you want to >>> reuse it. >>> >>> Also this is all internal to HMM code and so it should not confuse >>> anyone. >>> >> >> Well, it has repeatedly come up, and I'd claim that it is quite >> counter-intuitive. So if there is an easy way to make this internal >> HMM code clearer or better named, I would really love that to happen. >> >> And we shouldn't ever dismiss feedback based on "this is just internal >> xxx subsystem code, no need for it to be as clear as other parts of the >> kernel", right? > > Yes but i have not seen any better alternative that present code. If > there is please submit patch. > Ira, do you have any patch you're working on, or a more detailed suggestion there? If not, then I might (later, as it's not urgent) propose a small cleanup patch I had in mind for the hmm_register code. But I don't want to duplicate effort if you're already thinking about it. thanks,
On Thu, Mar 28, 2019 at 09:57:09AM -0700, Ira Weiny wrote: > On Thu, Mar 28, 2019 at 05:39:26PM -0700, John Hubbard wrote: > > On 3/28/19 2:21 PM, Jerome Glisse wrote: > > > On Thu, Mar 28, 2019 at 01:43:13PM -0700, John Hubbard wrote: > > >> On 3/28/19 12:11 PM, Jerome Glisse wrote: > > >>> On Thu, Mar 28, 2019 at 04:07:20AM -0700, Ira Weiny wrote: > > >>>> On Mon, Mar 25, 2019 at 10:40:02AM -0400, Jerome Glisse wrote: > > >>>>> From: Jérôme Glisse <jglisse@redhat.com> > > [...] > > >>>>> @@ -67,14 +78,9 @@ struct hmm { > > >>>>> */ > > >>>>> static struct hmm *hmm_register(struct mm_struct *mm) > > >>>>> { > > >>>>> - struct hmm *hmm = READ_ONCE(mm->hmm); > > >>>>> + struct hmm *hmm = mm_get_hmm(mm); > > >>>> > > >>>> FWIW: having hmm_register == "hmm get" is a bit confusing... > > >>> > > >>> The thing is that you want only one hmm struct per process and thus > > >>> if there is already one and it is not being destroy then you want to > > >>> reuse it. > > >>> > > >>> Also this is all internal to HMM code and so it should not confuse > > >>> anyone. > > >>> > > >> > > >> Well, it has repeatedly come up, and I'd claim that it is quite > > >> counter-intuitive. So if there is an easy way to make this internal > > >> HMM code clearer or better named, I would really love that to happen. > > >> > > >> And we shouldn't ever dismiss feedback based on "this is just internal > > >> xxx subsystem code, no need for it to be as clear as other parts of the > > >> kernel", right? > > > > > > Yes but i have not seen any better alternative that present code. If > > > there is please submit patch. > > > > > > > Ira, do you have any patch you're working on, or a more detailed suggestion there? > > If not, then I might (later, as it's not urgent) propose a small cleanup patch > > I had in mind for the hmm_register code. But I don't want to duplicate effort > > if you're already thinking about it. > > No I don't have anything. > > I was just really digging into these this time around and I was about to > comment on the lack of "get's" for some "puts" when I realized that > "hmm_register" _was_ the get... > > :-( > The get is mm_get_hmm() were you get a reference on HMM from a mm struct. John in previous posting complained about me naming that function hmm_get() and thus in this version i renamed it to mm_get_hmm() as we are getting a reference on hmm from a mm struct. The hmm_put() is just releasing the reference on the hmm struct. Here i feel i am getting contradicting requirement from different people. I don't think there is a way to please everyone here. Cheers, Jérôme
On 3/28/19 6:00 PM, Jerome Glisse wrote: > On Thu, Mar 28, 2019 at 09:57:09AM -0700, Ira Weiny wrote: >> On Thu, Mar 28, 2019 at 05:39:26PM -0700, John Hubbard wrote: >>> On 3/28/19 2:21 PM, Jerome Glisse wrote: >>>> On Thu, Mar 28, 2019 at 01:43:13PM -0700, John Hubbard wrote: >>>>> On 3/28/19 12:11 PM, Jerome Glisse wrote: >>>>>> On Thu, Mar 28, 2019 at 04:07:20AM -0700, Ira Weiny wrote: >>>>>>> On Mon, Mar 25, 2019 at 10:40:02AM -0400, Jerome Glisse wrote: >>>>>>>> From: Jérôme Glisse <jglisse@redhat.com> >>> [...] >>>>>>>> @@ -67,14 +78,9 @@ struct hmm { >>>>>>>> */ >>>>>>>> static struct hmm *hmm_register(struct mm_struct *mm) >>>>>>>> { >>>>>>>> - struct hmm *hmm = READ_ONCE(mm->hmm); >>>>>>>> + struct hmm *hmm = mm_get_hmm(mm); >>>>>>> >>>>>>> FWIW: having hmm_register == "hmm get" is a bit confusing... >>>>>> >>>>>> The thing is that you want only one hmm struct per process and thus >>>>>> if there is already one and it is not being destroy then you want to >>>>>> reuse it. >>>>>> >>>>>> Also this is all internal to HMM code and so it should not confuse >>>>>> anyone. >>>>>> >>>>> >>>>> Well, it has repeatedly come up, and I'd claim that it is quite >>>>> counter-intuitive. So if there is an easy way to make this internal >>>>> HMM code clearer or better named, I would really love that to happen. >>>>> >>>>> And we shouldn't ever dismiss feedback based on "this is just internal >>>>> xxx subsystem code, no need for it to be as clear as other parts of the >>>>> kernel", right? >>>> >>>> Yes but i have not seen any better alternative that present code. If >>>> there is please submit patch. >>>> >>> >>> Ira, do you have any patch you're working on, or a more detailed suggestion there? >>> If not, then I might (later, as it's not urgent) propose a small cleanup patch >>> I had in mind for the hmm_register code. But I don't want to duplicate effort >>> if you're already thinking about it. >> >> No I don't have anything. >> >> I was just really digging into these this time around and I was about to >> comment on the lack of "get's" for some "puts" when I realized that >> "hmm_register" _was_ the get... >> >> :-( >> > > The get is mm_get_hmm() were you get a reference on HMM from a mm struct. > John in previous posting complained about me naming that function hmm_get() > and thus in this version i renamed it to mm_get_hmm() as we are getting > a reference on hmm from a mm struct. Well, that's not what I recommended, though. The actual conversation went like this [1]: --------------------------------------------------------------- >> So for this, hmm_get() really ought to be symmetric with >> hmm_put(), by taking a struct hmm*. And the null check is >> not helping here, so let's just go with this smaller version: >> >> static inline struct hmm *hmm_get(struct hmm *hmm) >> { >> if (kref_get_unless_zero(&hmm->kref)) >> return hmm; >> >> return NULL; >> } >> >> ...and change the few callers accordingly. >> > > What about renaning hmm_get() to mm_get_hmm() instead ? > For a get/put pair of functions, it would be ideal to pass the same argument type to each. It looks like we are passing around hmm*, and hmm retains a reference count on hmm->mm, so I think you have a choice of using either mm* or hmm* as the argument. I'm not sure that one is better than the other here, as the lifetimes appear to be linked pretty tightly. Whichever one is used, I think it would be best to use it in both the _get() and _put() calls. --------------------------------------------------------------- Your response was to change the name to mm_get_hmm(), but that's not what I recommended. > > The hmm_put() is just releasing the reference on the hmm struct. > > Here i feel i am getting contradicting requirement from different people. > I don't think there is a way to please everyone here. > That's not a true conflict: you're comparing your actual implementation to Ira's request, rather than comparing my request to Ira's request. I think there's a way forward. Ira and I are actually both asking for the same thing: a) clear, concise get/put routines b) avoiding odd side effects in functions that have one name, but do additional surprising things. [1] https://lore.kernel.org/r/1ccab0d3-7e90-8e39-074d-02ffbfc68480@nvidia.com thanks,
On Thu, Mar 28, 2019 at 06:18:35PM -0700, John Hubbard wrote: > On 3/28/19 6:00 PM, Jerome Glisse wrote: > > On Thu, Mar 28, 2019 at 09:57:09AM -0700, Ira Weiny wrote: > >> On Thu, Mar 28, 2019 at 05:39:26PM -0700, John Hubbard wrote: > >>> On 3/28/19 2:21 PM, Jerome Glisse wrote: > >>>> On Thu, Mar 28, 2019 at 01:43:13PM -0700, John Hubbard wrote: > >>>>> On 3/28/19 12:11 PM, Jerome Glisse wrote: > >>>>>> On Thu, Mar 28, 2019 at 04:07:20AM -0700, Ira Weiny wrote: > >>>>>>> On Mon, Mar 25, 2019 at 10:40:02AM -0400, Jerome Glisse wrote: > >>>>>>>> From: Jérôme Glisse <jglisse@redhat.com> > >>> [...] > >>>>>>>> @@ -67,14 +78,9 @@ struct hmm { > >>>>>>>> */ > >>>>>>>> static struct hmm *hmm_register(struct mm_struct *mm) > >>>>>>>> { > >>>>>>>> - struct hmm *hmm = READ_ONCE(mm->hmm); > >>>>>>>> + struct hmm *hmm = mm_get_hmm(mm); > >>>>>>> > >>>>>>> FWIW: having hmm_register == "hmm get" is a bit confusing... > >>>>>> > >>>>>> The thing is that you want only one hmm struct per process and thus > >>>>>> if there is already one and it is not being destroy then you want to > >>>>>> reuse it. > >>>>>> > >>>>>> Also this is all internal to HMM code and so it should not confuse > >>>>>> anyone. > >>>>>> > >>>>> > >>>>> Well, it has repeatedly come up, and I'd claim that it is quite > >>>>> counter-intuitive. So if there is an easy way to make this internal > >>>>> HMM code clearer or better named, I would really love that to happen. > >>>>> > >>>>> And we shouldn't ever dismiss feedback based on "this is just internal > >>>>> xxx subsystem code, no need for it to be as clear as other parts of the > >>>>> kernel", right? > >>>> > >>>> Yes but i have not seen any better alternative that present code. If > >>>> there is please submit patch. > >>>> > >>> > >>> Ira, do you have any patch you're working on, or a more detailed suggestion there? > >>> If not, then I might (later, as it's not urgent) propose a small cleanup patch > >>> I had in mind for the hmm_register code. But I don't want to duplicate effort > >>> if you're already thinking about it. > >> > >> No I don't have anything. > >> > >> I was just really digging into these this time around and I was about to > >> comment on the lack of "get's" for some "puts" when I realized that > >> "hmm_register" _was_ the get... > >> > >> :-( > >> > > > > The get is mm_get_hmm() were you get a reference on HMM from a mm struct. > > John in previous posting complained about me naming that function hmm_get() > > and thus in this version i renamed it to mm_get_hmm() as we are getting > > a reference on hmm from a mm struct. > > Well, that's not what I recommended, though. The actual conversation went like > this [1]: > > --------------------------------------------------------------- > >> So for this, hmm_get() really ought to be symmetric with > >> hmm_put(), by taking a struct hmm*. And the null check is > >> not helping here, so let's just go with this smaller version: > >> > >> static inline struct hmm *hmm_get(struct hmm *hmm) > >> { > >> if (kref_get_unless_zero(&hmm->kref)) > >> return hmm; > >> > >> return NULL; > >> } > >> > >> ...and change the few callers accordingly. > >> > > > > What about renaning hmm_get() to mm_get_hmm() instead ? > > > > For a get/put pair of functions, it would be ideal to pass > the same argument type to each. It looks like we are passing > around hmm*, and hmm retains a reference count on hmm->mm, > so I think you have a choice of using either mm* or hmm* as > the argument. I'm not sure that one is better than the other > here, as the lifetimes appear to be linked pretty tightly. > > Whichever one is used, I think it would be best to use it > in both the _get() and _put() calls. > --------------------------------------------------------------- > > Your response was to change the name to mm_get_hmm(), but that's not > what I recommended. Because i can not do that, hmm_put() can _only_ take hmm struct as input while hmm_get() can _only_ get mm struct as input. hmm_put() can only take hmm because the hmm we are un-referencing might no longer be associated with any mm struct and thus i do not have a mm struct to use. hmm_get() can only get mm as input as we need to be careful when accessing the hmm field within the mm struct and thus it is better to have that code within a function than open coded and duplicated all over the place. > > > > > The hmm_put() is just releasing the reference on the hmm struct. > > > > Here i feel i am getting contradicting requirement from different people. > > I don't think there is a way to please everyone here. > > > > That's not a true conflict: you're comparing your actual implementation > to Ira's request, rather than comparing my request to Ira's request. > > I think there's a way forward. Ira and I are actually both asking for the > same thing: > > a) clear, concise get/put routines > > b) avoiding odd side effects in functions that have one name, but do > additional surprising things. Please show me code because i do not see any other way to do it then how i did. Cheers, Jérôme
On 3/28/19 6:50 PM, Jerome Glisse wrote: [...] >>> >>> The hmm_put() is just releasing the reference on the hmm struct. >>> >>> Here i feel i am getting contradicting requirement from different people. >>> I don't think there is a way to please everyone here. >>> >> >> That's not a true conflict: you're comparing your actual implementation >> to Ira's request, rather than comparing my request to Ira's request. >> >> I think there's a way forward. Ira and I are actually both asking for the >> same thing: >> >> a) clear, concise get/put routines >> >> b) avoiding odd side effects in functions that have one name, but do >> additional surprising things. > > Please show me code because i do not see any other way to do it then > how i did. > Sure, I'll take a run at it. I've driven you crazy enough with the naming today, it's time to back it up with actual code. :) I hope this is not one of those "we must also change Nouveau in N+M steps" situations, though. I'm starting to despair about reviewing code that basically can't be changed... thanks,
On Thu, Mar 28, 2019 at 07:11:17PM -0700, John Hubbard wrote: > On 3/28/19 6:50 PM, Jerome Glisse wrote: > [...] > >>> > >>> The hmm_put() is just releasing the reference on the hmm struct. > >>> > >>> Here i feel i am getting contradicting requirement from different people. > >>> I don't think there is a way to please everyone here. > >>> > >> > >> That's not a true conflict: you're comparing your actual implementation > >> to Ira's request, rather than comparing my request to Ira's request. > >> > >> I think there's a way forward. Ira and I are actually both asking for the > >> same thing: > >> > >> a) clear, concise get/put routines > >> > >> b) avoiding odd side effects in functions that have one name, but do > >> additional surprising things. > > > > Please show me code because i do not see any other way to do it then > > how i did. > > > > Sure, I'll take a run at it. I've driven you crazy enough with the naming > today, it's time to back it up with actual code. :) Note that every single line in mm_get_hmm() do matter. > I hope this is not one of those "we must also change Nouveau in N+M steps" > situations, though. I'm starting to despair about reviewing code that > basically can't be changed... It can be change but i rather not do too many in one go, each change is like a tango with one partner and having tango with multiple partner at once is painful much more likely to step on each other foot. Cheers, Jérôme
On Thu, Mar 28, 2019 at 11:21:00AM -0700, Ira Weiny wrote: > On Thu, Mar 28, 2019 at 09:50:03PM -0400, Jerome Glisse wrote: > > On Thu, Mar 28, 2019 at 06:18:35PM -0700, John Hubbard wrote: > > > On 3/28/19 6:00 PM, Jerome Glisse wrote: > > > > On Thu, Mar 28, 2019 at 09:57:09AM -0700, Ira Weiny wrote: > > > >> On Thu, Mar 28, 2019 at 05:39:26PM -0700, John Hubbard wrote: > > > >>> On 3/28/19 2:21 PM, Jerome Glisse wrote: > > > >>>> On Thu, Mar 28, 2019 at 01:43:13PM -0700, John Hubbard wrote: > > > >>>>> On 3/28/19 12:11 PM, Jerome Glisse wrote: > > > >>>>>> On Thu, Mar 28, 2019 at 04:07:20AM -0700, Ira Weiny wrote: > > > >>>>>>> On Mon, Mar 25, 2019 at 10:40:02AM -0400, Jerome Glisse wrote: > > > >>>>>>>> From: Jérôme Glisse <jglisse@redhat.com> > > > >>> [...] > > > >>>>>>>> @@ -67,14 +78,9 @@ struct hmm { > > > >>>>>>>> */ > > > >>>>>>>> static struct hmm *hmm_register(struct mm_struct *mm) > > > >>>>>>>> { > > > >>>>>>>> - struct hmm *hmm = READ_ONCE(mm->hmm); > > > >>>>>>>> + struct hmm *hmm = mm_get_hmm(mm); > > > >>>>>>> > > > >>>>>>> FWIW: having hmm_register == "hmm get" is a bit confusing... > > > >>>>>> > > > >>>>>> The thing is that you want only one hmm struct per process and thus > > > >>>>>> if there is already one and it is not being destroy then you want to > > > >>>>>> reuse it. > > > >>>>>> > > > >>>>>> Also this is all internal to HMM code and so it should not confuse > > > >>>>>> anyone. > > > >>>>>> > > > >>>>> > > > >>>>> Well, it has repeatedly come up, and I'd claim that it is quite > > > >>>>> counter-intuitive. So if there is an easy way to make this internal > > > >>>>> HMM code clearer or better named, I would really love that to happen. > > > >>>>> > > > >>>>> And we shouldn't ever dismiss feedback based on "this is just internal > > > >>>>> xxx subsystem code, no need for it to be as clear as other parts of the > > > >>>>> kernel", right? > > > >>>> > > > >>>> Yes but i have not seen any better alternative that present code. If > > > >>>> there is please submit patch. > > > >>>> > > > >>> > > > >>> Ira, do you have any patch you're working on, or a more detailed suggestion there? > > > >>> If not, then I might (later, as it's not urgent) propose a small cleanup patch > > > >>> I had in mind for the hmm_register code. But I don't want to duplicate effort > > > >>> if you're already thinking about it. > > > >> > > > >> No I don't have anything. > > > >> > > > >> I was just really digging into these this time around and I was about to > > > >> comment on the lack of "get's" for some "puts" when I realized that > > > >> "hmm_register" _was_ the get... > > > >> > > > >> :-( > > > >> > > > > > > > > The get is mm_get_hmm() were you get a reference on HMM from a mm struct. > > > > John in previous posting complained about me naming that function hmm_get() > > > > and thus in this version i renamed it to mm_get_hmm() as we are getting > > > > a reference on hmm from a mm struct. > > > > > > Well, that's not what I recommended, though. The actual conversation went like > > > this [1]: > > > > > > --------------------------------------------------------------- > > > >> So for this, hmm_get() really ought to be symmetric with > > > >> hmm_put(), by taking a struct hmm*. And the null check is > > > >> not helping here, so let's just go with this smaller version: > > > >> > > > >> static inline struct hmm *hmm_get(struct hmm *hmm) > > > >> { > > > >> if (kref_get_unless_zero(&hmm->kref)) > > > >> return hmm; > > > >> > > > >> return NULL; > > > >> } > > > >> > > > >> ...and change the few callers accordingly. > > > >> > > > > > > > > What about renaning hmm_get() to mm_get_hmm() instead ? > > > > > > > > > > For a get/put pair of functions, it would be ideal to pass > > > the same argument type to each. It looks like we are passing > > > around hmm*, and hmm retains a reference count on hmm->mm, > > > so I think you have a choice of using either mm* or hmm* as > > > the argument. I'm not sure that one is better than the other > > > here, as the lifetimes appear to be linked pretty tightly. > > > > > > Whichever one is used, I think it would be best to use it > > > in both the _get() and _put() calls. > > > --------------------------------------------------------------- > > > > > > Your response was to change the name to mm_get_hmm(), but that's not > > > what I recommended. > > > > Because i can not do that, hmm_put() can _only_ take hmm struct as > > input while hmm_get() can _only_ get mm struct as input. > > > > hmm_put() can only take hmm because the hmm we are un-referencing > > might no longer be associated with any mm struct and thus i do not > > have a mm struct to use. > > > > hmm_get() can only get mm as input as we need to be careful when > > accessing the hmm field within the mm struct and thus it is better > > to have that code within a function than open coded and duplicated > > all over the place. > > The input value is not the problem. The problem is in the naming. > > obj = get_obj( various parameters ); > put_obj(obj); > > > The problem is that the function is named hmm_register() either "gets" a > reference to _or_ creates and gets a reference to the hmm object. > > What John is probably ready to submit is something like. > > struct hmm *get_create_hmm(struct mm *mm); > void put_hmm(struct hmm *hmm); > > > So when you are reading the code you see... > > foo(...) { > struct hmm *hmm = get_create_hmm(mm); > > if (!hmm) > error... > > do stuff... > > put_hmm(hmm); > } > > Here I can see a very clear get/put pair. The name also shows that the hmm is > created if need be as well as getting a reference. > You only need to create HMM when you either register a mirror or register a range. So they two pattern: average_foo() { struct hmm *hmm = mm_get_hmm(mm); ... hmm_put(hmm); } register_foo() { struct hmm *hmm = hmm_register(mm); ... return 0; error: ... hmm_put(hmm); } Cheers, Jérôme
On 3/28/19 7:25 PM, Jerome Glisse wrote: [...] >> The input value is not the problem. The problem is in the naming. >> >> obj = get_obj( various parameters ); >> put_obj(obj); >> >> >> The problem is that the function is named hmm_register() either "gets" a >> reference to _or_ creates and gets a reference to the hmm object. >> >> What John is probably ready to submit is something like. >> >> struct hmm *get_create_hmm(struct mm *mm); >> void put_hmm(struct hmm *hmm); >> >> >> So when you are reading the code you see... >> >> foo(...) { >> struct hmm *hmm = get_create_hmm(mm); >> >> if (!hmm) >> error... >> >> do stuff... >> >> put_hmm(hmm); >> } >> >> Here I can see a very clear get/put pair. The name also shows that the hmm is >> created if need be as well as getting a reference. >> > > You only need to create HMM when you either register a mirror or > register a range. So they two pattern: > > average_foo() { > struct hmm *hmm = mm_get_hmm(mm); > ... > hmm_put(hmm); > } > > register_foo() { > struct hmm *hmm = hmm_register(mm); > ... > return 0; > error: > ... > hmm_put(hmm); > } > 1. Looking at this fresh this morning, Ira's idea of just a single rename actually clarifies things a lot more than I expected. I think the following tiny patch would suffice here (I've updated documentation to match, and added a missing "@Return:" line too): iff --git a/mm/hmm.c b/mm/hmm.c index fd143251b157..37b1c5803f1e 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -50,14 +50,17 @@ static inline struct hmm *mm_get_hmm(struct mm_struct *mm) } /* - * hmm_register - register HMM against an mm (HMM internal) + * hmm_get_create - returns an HMM object, either by referencing the existing + * (per-process) object, or by creating a new one. * - * @mm: mm struct to attach to + * @mm: the mm_struct to attach to + * @Return: a pointer to the HMM object, or NULL upon failure. This pointer must + * be released, when done, via hmm_put(). * - * This is not intended to be used directly by device drivers. It allocates an - * HMM struct if mm does not have one, and initializes it. + * This is an internal HMM function, and is not intended to be used directly by + * device drivers. */ -static struct hmm *hmm_register(struct mm_struct *mm) +static struct hmm *hmm_get_create(struct mm_struct *mm) { struct hmm *hmm = mm_get_hmm(mm); bool cleanup = false; @@ -288,7 +291,7 @@ int hmm_mirror_register(struct hmm_mirror *mirror, struct mm_struct *mm) if (!mm || !mirror || !mirror->ops) return -EINVAL; - mirror->hmm = hmm_register(mm); + mirror->hmm = hmm_get_create(mm); if (!mirror->hmm) return -ENOMEM; @@ -915,7 +918,7 @@ int hmm_range_register(struct hmm_range *range, range->start = start; range->end = end; - range->hmm = hmm_register(mm); + range->hmm = hmm_get_create(mm); if (!range->hmm) return -EFAULT; 2. A not directly related point: did you see my minor comment on patch 0001? I think it might have been missed in all the threads yesterday. thanks,
diff --git a/include/linux/hmm.h b/include/linux/hmm.h index ad50b7b4f141..716fc61fa6d4 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -131,6 +131,7 @@ enum hmm_pfn_value_e { /* * struct hmm_range - track invalidation lock on virtual address range * + * @hmm: the core HMM structure this range is active against * @vma: the vm area struct for the range * @list: all range lock are on a list * @start: range virtual start address (inclusive) @@ -142,6 +143,7 @@ enum hmm_pfn_value_e { * @valid: pfns array did not change since it has been fill by an HMM function */ struct hmm_range { + struct hmm *hmm; struct vm_area_struct *vma; struct list_head list; unsigned long start; diff --git a/mm/hmm.c b/mm/hmm.c index fe1cd87e49ac..306e57f7cded 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -50,6 +50,7 @@ static const struct mmu_notifier_ops hmm_mmu_notifier_ops; */ struct hmm { struct mm_struct *mm; + struct kref kref; spinlock_t lock; struct list_head ranges; struct list_head mirrors; @@ -57,6 +58,16 @@ struct hmm { struct rw_semaphore mirrors_sem; }; +static inline struct hmm *mm_get_hmm(struct mm_struct *mm) +{ + struct hmm *hmm = READ_ONCE(mm->hmm); + + if (hmm && kref_get_unless_zero(&hmm->kref)) + return hmm; + + return NULL; +} + /* * hmm_register - register HMM against an mm (HMM internal) * @@ -67,14 +78,9 @@ struct hmm { */ static struct hmm *hmm_register(struct mm_struct *mm) { - struct hmm *hmm = READ_ONCE(mm->hmm); + struct hmm *hmm = mm_get_hmm(mm); bool cleanup = false; - /* - * The hmm struct can only be freed once the mm_struct goes away, - * hence we should always have pre-allocated an new hmm struct - * above. - */ if (hmm) return hmm; @@ -86,6 +92,7 @@ static struct hmm *hmm_register(struct mm_struct *mm) hmm->mmu_notifier.ops = NULL; INIT_LIST_HEAD(&hmm->ranges); spin_lock_init(&hmm->lock); + kref_init(&hmm->kref); hmm->mm = mm; spin_lock(&mm->page_table_lock); @@ -106,7 +113,7 @@ static struct hmm *hmm_register(struct mm_struct *mm) if (__mmu_notifier_register(&hmm->mmu_notifier, mm)) goto error_mm; - return mm->hmm; + return hmm; error_mm: spin_lock(&mm->page_table_lock); @@ -118,9 +125,41 @@ static struct hmm *hmm_register(struct mm_struct *mm) return NULL; } +static void hmm_free(struct kref *kref) +{ + struct hmm *hmm = container_of(kref, struct hmm, kref); + struct mm_struct *mm = hmm->mm; + + mmu_notifier_unregister_no_release(&hmm->mmu_notifier, mm); + + spin_lock(&mm->page_table_lock); + if (mm->hmm == hmm) + mm->hmm = NULL; + spin_unlock(&mm->page_table_lock); + + kfree(hmm); +} + +static inline void hmm_put(struct hmm *hmm) +{ + kref_put(&hmm->kref, hmm_free); +} + void hmm_mm_destroy(struct mm_struct *mm) { - kfree(mm->hmm); + struct hmm *hmm; + + spin_lock(&mm->page_table_lock); + hmm = mm_get_hmm(mm); + mm->hmm = NULL; + if (hmm) { + hmm->mm = NULL; + spin_unlock(&mm->page_table_lock); + hmm_put(hmm); + return; + } + + spin_unlock(&mm->page_table_lock); } static int hmm_invalidate_range(struct hmm *hmm, bool device, @@ -165,7 +204,7 @@ static int hmm_invalidate_range(struct hmm *hmm, bool device, static void hmm_release(struct mmu_notifier *mn, struct mm_struct *mm) { struct hmm_mirror *mirror; - struct hmm *hmm = mm->hmm; + struct hmm *hmm = mm_get_hmm(mm); down_write(&hmm->mirrors_sem); mirror = list_first_entry_or_null(&hmm->mirrors, struct hmm_mirror, @@ -186,13 +225,16 @@ static void hmm_release(struct mmu_notifier *mn, struct mm_struct *mm) struct hmm_mirror, list); } up_write(&hmm->mirrors_sem); + + hmm_put(hmm); } static int hmm_invalidate_range_start(struct mmu_notifier *mn, const struct mmu_notifier_range *range) { + struct hmm *hmm = mm_get_hmm(range->mm); struct hmm_update update; - struct hmm *hmm = range->mm->hmm; + int ret; VM_BUG_ON(!hmm); @@ -200,14 +242,16 @@ static int hmm_invalidate_range_start(struct mmu_notifier *mn, update.end = range->end; update.event = HMM_UPDATE_INVALIDATE; update.blockable = range->blockable; - return hmm_invalidate_range(hmm, true, &update); + ret = hmm_invalidate_range(hmm, true, &update); + hmm_put(hmm); + return ret; } static void hmm_invalidate_range_end(struct mmu_notifier *mn, const struct mmu_notifier_range *range) { + struct hmm *hmm = mm_get_hmm(range->mm); struct hmm_update update; - struct hmm *hmm = range->mm->hmm; VM_BUG_ON(!hmm); @@ -216,6 +260,7 @@ static void hmm_invalidate_range_end(struct mmu_notifier *mn, update.event = HMM_UPDATE_INVALIDATE; update.blockable = true; hmm_invalidate_range(hmm, false, &update); + hmm_put(hmm); } static const struct mmu_notifier_ops hmm_mmu_notifier_ops = { @@ -241,24 +286,13 @@ int hmm_mirror_register(struct hmm_mirror *mirror, struct mm_struct *mm) if (!mm || !mirror || !mirror->ops) return -EINVAL; -again: mirror->hmm = hmm_register(mm); if (!mirror->hmm) return -ENOMEM; down_write(&mirror->hmm->mirrors_sem); - if (mirror->hmm->mm == NULL) { - /* - * A racing hmm_mirror_unregister() is about to destroy the hmm - * struct. Try again to allocate a new one. - */ - up_write(&mirror->hmm->mirrors_sem); - mirror->hmm = NULL; - goto again; - } else { - list_add(&mirror->list, &mirror->hmm->mirrors); - up_write(&mirror->hmm->mirrors_sem); - } + list_add(&mirror->list, &mirror->hmm->mirrors); + up_write(&mirror->hmm->mirrors_sem); return 0; } @@ -273,33 +307,18 @@ EXPORT_SYMBOL(hmm_mirror_register); */ void hmm_mirror_unregister(struct hmm_mirror *mirror) { - bool should_unregister = false; - struct mm_struct *mm; - struct hmm *hmm; + struct hmm *hmm = READ_ONCE(mirror->hmm); - if (mirror->hmm == NULL) + if (hmm == NULL) return; - hmm = mirror->hmm; down_write(&hmm->mirrors_sem); list_del_init(&mirror->list); - should_unregister = list_empty(&hmm->mirrors); + /* To protect us against double unregister ... */ mirror->hmm = NULL; - mm = hmm->mm; - hmm->mm = NULL; up_write(&hmm->mirrors_sem); - if (!should_unregister || mm == NULL) - return; - - mmu_notifier_unregister_no_release(&hmm->mmu_notifier, mm); - - spin_lock(&mm->page_table_lock); - if (mm->hmm == hmm) - mm->hmm = NULL; - spin_unlock(&mm->page_table_lock); - - kfree(hmm); + hmm_put(hmm); } EXPORT_SYMBOL(hmm_mirror_unregister); @@ -708,6 +727,8 @@ int hmm_vma_get_pfns(struct hmm_range *range) struct mm_walk mm_walk; struct hmm *hmm; + range->hmm = NULL; + /* Sanity check, this really should not happen ! */ if (range->start < vma->vm_start || range->start >= vma->vm_end) return -EINVAL; @@ -717,14 +738,18 @@ int hmm_vma_get_pfns(struct hmm_range *range) hmm = hmm_register(vma->vm_mm); if (!hmm) return -ENOMEM; - /* Caller must have registered a mirror, via hmm_mirror_register() ! */ - if (!hmm->mmu_notifier.ops) + + /* Check if hmm_mm_destroy() was call. */ + if (hmm->mm == NULL) { + hmm_put(hmm); return -EINVAL; + } /* FIXME support hugetlb fs */ if (is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL) || vma_is_dax(vma)) { hmm_pfns_special(range); + hmm_put(hmm); return -EINVAL; } @@ -736,6 +761,7 @@ int hmm_vma_get_pfns(struct hmm_range *range) * operations such has atomic access would not work. */ hmm_pfns_clear(range, range->pfns, range->start, range->end); + hmm_put(hmm); return -EPERM; } @@ -758,6 +784,12 @@ int hmm_vma_get_pfns(struct hmm_range *range) mm_walk.pte_hole = hmm_vma_walk_hole; walk_page_range(range->start, range->end, &mm_walk); + /* + * Transfer hmm reference to the range struct it will be drop inside + * the hmm_vma_range_done() function (which _must_ be call if this + * function return 0). + */ + range->hmm = hmm; return 0; } EXPORT_SYMBOL(hmm_vma_get_pfns); @@ -802,25 +834,27 @@ EXPORT_SYMBOL(hmm_vma_get_pfns); */ bool hmm_vma_range_done(struct hmm_range *range) { - unsigned long npages = (range->end - range->start) >> PAGE_SHIFT; - struct hmm *hmm; + bool ret = false; - if (range->end <= range->start) { + /* Sanity check this really should not happen. */ + if (range->hmm == NULL || range->end <= range->start) { BUG(); return false; } - hmm = hmm_register(range->vma->vm_mm); - if (!hmm) { - memset(range->pfns, 0, sizeof(*range->pfns) * npages); - return false; - } - - spin_lock(&hmm->lock); + spin_lock(&range->hmm->lock); list_del_rcu(&range->list); - spin_unlock(&hmm->lock); + ret = range->valid; + spin_unlock(&range->hmm->lock); - return range->valid; + /* Is the mm still alive ? */ + if (range->hmm->mm == NULL) + ret = false; + + /* Drop reference taken by hmm_vma_fault() or hmm_vma_get_pfns() */ + hmm_put(range->hmm); + range->hmm = NULL; + return ret; } EXPORT_SYMBOL(hmm_vma_range_done); @@ -880,6 +914,8 @@ int hmm_vma_fault(struct hmm_range *range, bool block) struct hmm *hmm; int ret; + range->hmm = NULL; + /* Sanity check, this really should not happen ! */ if (range->start < vma->vm_start || range->start >= vma->vm_end) return -EINVAL; @@ -891,14 +927,18 @@ int hmm_vma_fault(struct hmm_range *range, bool block) hmm_pfns_clear(range, range->pfns, range->start, range->end); return -ENOMEM; } - /* Caller must have registered a mirror using hmm_mirror_register() */ - if (!hmm->mmu_notifier.ops) + + /* Check if hmm_mm_destroy() was call. */ + if (hmm->mm == NULL) { + hmm_put(hmm); return -EINVAL; + } /* FIXME support hugetlb fs */ if (is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL) || vma_is_dax(vma)) { hmm_pfns_special(range); + hmm_put(hmm); return -EINVAL; } @@ -910,6 +950,7 @@ int hmm_vma_fault(struct hmm_range *range, bool block) * operations such has atomic access would not work. */ hmm_pfns_clear(range, range->pfns, range->start, range->end); + hmm_put(hmm); return -EPERM; } @@ -945,7 +986,16 @@ int hmm_vma_fault(struct hmm_range *range, bool block) hmm_pfns_clear(range, &range->pfns[i], hmm_vma_walk.last, range->end); hmm_vma_range_done(range); + hmm_put(hmm); + } else { + /* + * Transfer hmm reference to the range struct it will be drop + * inside the hmm_vma_range_done() function (which _must_ be + * call if this function return 0). + */ + range->hmm = hmm; } + return ret; } EXPORT_SYMBOL(hmm_vma_fault);