Message ID | 20191216222537.491123-5-jhubbard@nvidia.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | mm/gup: track dma-pinned pages: FOLL_PIN | expand |
On Mon, Dec 16, 2019 at 02:25:16PM -0800, John Hubbard wrote: > An upcoming patch changes and complicates the refcounting and > especially the "put page" aspects of it. In order to keep > everything clean, refactor the devmap page release routines: > > * Rename put_devmap_managed_page() to page_is_devmap_managed(), > and limit the functionality to "read only": return a bool, > with no side effects. > > * Add a new routine, put_devmap_managed_page(), to handle checking > what kind of page it is, and what kind of refcount handling it > requires. > > * Rename __put_devmap_managed_page() to free_devmap_managed_page(), > and limit the functionality to unconditionally freeing a devmap > page. What the reason to separate put_devmap_managed_page() from free_devmap_managed_page() if free_devmap_managed_page() has exacly one caller? Is it preparation for the next patches? > This is originally based on a separate patch by Ira Weiny, which > applied to an early version of the put_user_page() experiments. > Since then, Jérôme Glisse suggested the refactoring described above. > > Cc: Christoph Hellwig <hch@lst.de> > Suggested-by: Jérôme Glisse <jglisse@redhat.com> > Reviewed-by: Dan Williams <dan.j.williams@intel.com> > Reviewed-by: Jan Kara <jack@suse.cz> > Signed-off-by: Ira Weiny <ira.weiny@intel.com> > Signed-off-by: John Hubbard <jhubbard@nvidia.com> > --- > include/linux/mm.h | 17 +++++++++++++---- > mm/memremap.c | 16 ++-------------- > mm/swap.c | 24 ++++++++++++++++++++++++ > 3 files changed, 39 insertions(+), 18 deletions(-) > > diff --git a/include/linux/mm.h b/include/linux/mm.h > index c97ea3b694e6..77a4df06c8a7 100644 > --- a/include/linux/mm.h > +++ b/include/linux/mm.h > @@ -952,9 +952,10 @@ static inline bool is_zone_device_page(const struct page *page) > #endif > > #ifdef CONFIG_DEV_PAGEMAP_OPS > -void __put_devmap_managed_page(struct page *page); > +void free_devmap_managed_page(struct page *page); > DECLARE_STATIC_KEY_FALSE(devmap_managed_key); > -static inline bool put_devmap_managed_page(struct page *page) > + > +static inline bool page_is_devmap_managed(struct page *page) > { > if (!static_branch_unlikely(&devmap_managed_key)) > return false; > @@ -963,7 +964,6 @@ static inline bool put_devmap_managed_page(struct page *page) > switch (page->pgmap->type) { > case MEMORY_DEVICE_PRIVATE: > case MEMORY_DEVICE_FS_DAX: > - __put_devmap_managed_page(page); > return true; > default: > break; > @@ -971,7 +971,14 @@ static inline bool put_devmap_managed_page(struct page *page) > return false; > } > > +bool put_devmap_managed_page(struct page *page); > + > #else /* CONFIG_DEV_PAGEMAP_OPS */ > +static inline bool page_is_devmap_managed(struct page *page) > +{ > + return false; > +} > + > static inline bool put_devmap_managed_page(struct page *page) > { > return false; > @@ -1028,8 +1035,10 @@ static inline void put_page(struct page *page) > * need to inform the device driver through callback. See > * include/linux/memremap.h and HMM for details. > */ > - if (put_devmap_managed_page(page)) > + if (page_is_devmap_managed(page)) { > + put_devmap_managed_page(page); put_devmap_managed_page() has yet another page_is_devmap_managed() check inside. It looks strange. > return; > + } > > if (put_page_testzero(page)) > __put_page(page); > diff --git a/mm/memremap.c b/mm/memremap.c > index e899fa876a62..2ba773859031 100644 > --- a/mm/memremap.c > +++ b/mm/memremap.c > @@ -411,20 +411,8 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn, > EXPORT_SYMBOL_GPL(get_dev_pagemap); > > #ifdef CONFIG_DEV_PAGEMAP_OPS > -void __put_devmap_managed_page(struct page *page) > +void free_devmap_managed_page(struct page *page) > { > - int count = page_ref_dec_return(page); > - > - /* still busy */ > - if (count > 1) > - return; > - > - /* only triggered by the dev_pagemap shutdown path */ > - if (count == 0) { > - __put_page(page); > - return; > - } > - > /* notify page idle for dax */ > if (!is_device_private_page(page)) { > wake_up_var(&page->_refcount); > @@ -461,5 +449,5 @@ void __put_devmap_managed_page(struct page *page) > page->mapping = NULL; > page->pgmap->ops->page_free(page); > } > -EXPORT_SYMBOL(__put_devmap_managed_page); > +EXPORT_SYMBOL(free_devmap_managed_page); > #endif /* CONFIG_DEV_PAGEMAP_OPS */ > diff --git a/mm/swap.c b/mm/swap.c > index 5341ae93861f..49f7c2eea0ba 100644 > --- a/mm/swap.c > +++ b/mm/swap.c > @@ -1102,3 +1102,27 @@ void __init swap_setup(void) > * _really_ don't want to cluster much more > */ > } > + > +#ifdef CONFIG_DEV_PAGEMAP_OPS > +bool put_devmap_managed_page(struct page *page) > +{ > + bool is_devmap = page_is_devmap_managed(page); > + > + if (is_devmap) { Reversing the condition would save you an indentation level. > + int count = page_ref_dec_return(page); > + > + /* > + * devmap page refcounts are 1-based, rather than 0-based: if > + * refcount is 1, then the page is free and the refcount is > + * stable because nobody holds a reference on the page. > + */ > + if (count == 1) > + free_devmap_managed_page(page); > + else if (!count) > + __put_page(page); > + } > + > + return is_devmap; > +} > +EXPORT_SYMBOL(put_devmap_managed_page); > +#endif > -- > 2.24.1 > >
On 12/18/19 8:04 AM, Kirill A. Shutemov wrote: > On Mon, Dec 16, 2019 at 02:25:16PM -0800, John Hubbard wrote: >> An upcoming patch changes and complicates the refcounting and >> especially the "put page" aspects of it. In order to keep >> everything clean, refactor the devmap page release routines: >> >> * Rename put_devmap_managed_page() to page_is_devmap_managed(), >> and limit the functionality to "read only": return a bool, >> with no side effects. >> >> * Add a new routine, put_devmap_managed_page(), to handle checking >> what kind of page it is, and what kind of refcount handling it >> requires. >> >> * Rename __put_devmap_managed_page() to free_devmap_managed_page(), >> and limit the functionality to unconditionally freeing a devmap >> page. > > What the reason to separate put_devmap_managed_page() from > free_devmap_managed_page() if free_devmap_managed_page() has exacly one > caller? Is it preparation for the next patches? Yes. A later patch, #23, adds another caller: __unpin_devmap_managed_user_page(). ... >> @@ -971,7 +971,14 @@ static inline bool put_devmap_managed_page(struct page *page) >> return false; >> } >> >> +bool put_devmap_managed_page(struct page *page); >> + >> #else /* CONFIG_DEV_PAGEMAP_OPS */ >> +static inline bool page_is_devmap_managed(struct page *page) >> +{ >> + return false; >> +} >> + >> static inline bool put_devmap_managed_page(struct page *page) >> { >> return false; >> @@ -1028,8 +1035,10 @@ static inline void put_page(struct page *page) >> * need to inform the device driver through callback. See >> * include/linux/memremap.h and HMM for details. >> */ >> - if (put_devmap_managed_page(page)) >> + if (page_is_devmap_managed(page)) { >> + put_devmap_managed_page(page); > > put_devmap_managed_page() has yet another page_is_devmap_managed() check > inside. It looks strange. > Good point, it's an extra unnecessary check. So to clean it up, I'll note that the "if" check is required here in put_page(), in order to stay out of non-inlined function calls in the hot path (put_page()). So I'll do the following: * Leave the above code as it is here * Simplify put_devmap_managed_page(), it was trying to do two separate things, and those two things have different requirements. So change it to a void function, with a WARN_ON_ONCE to assert that page_is_devmap_managed() is true, * And change the other caller (release_pages()) to do that check. ... >> @@ -1102,3 +1102,27 @@ void __init swap_setup(void) >> * _really_ don't want to cluster much more >> */ >> } >> + >> +#ifdef CONFIG_DEV_PAGEMAP_OPS >> +bool put_devmap_managed_page(struct page *page) >> +{ >> + bool is_devmap = page_is_devmap_managed(page); >> + >> + if (is_devmap) { > > Reversing the condition would save you an indentation level. Yes. Done. I'll also git-reply with an updated patch so you can see what it looks like. thanks,
On Mon, Dec 16, 2019 at 2:26 PM John Hubbard <jhubbard@nvidia.com> wrote: > > An upcoming patch changes and complicates the refcounting and > especially the "put page" aspects of it. In order to keep > everything clean, refactor the devmap page release routines: > > * Rename put_devmap_managed_page() to page_is_devmap_managed(), > and limit the functionality to "read only": return a bool, > with no side effects. > > * Add a new routine, put_devmap_managed_page(), to handle checking > what kind of page it is, and what kind of refcount handling it > requires. > > * Rename __put_devmap_managed_page() to free_devmap_managed_page(), > and limit the functionality to unconditionally freeing a devmap > page. > > This is originally based on a separate patch by Ira Weiny, which > applied to an early version of the put_user_page() experiments. > Since then, Jérôme Glisse suggested the refactoring described above. > > Cc: Christoph Hellwig <hch@lst.de> > Suggested-by: Jérôme Glisse <jglisse@redhat.com> > Reviewed-by: Dan Williams <dan.j.williams@intel.com> > Reviewed-by: Jan Kara <jack@suse.cz> > Signed-off-by: Ira Weiny <ira.weiny@intel.com> > Signed-off-by: John Hubbard <jhubbard@nvidia.com> > --- > include/linux/mm.h | 17 +++++++++++++---- > mm/memremap.c | 16 ++-------------- > mm/swap.c | 24 ++++++++++++++++++++++++ > 3 files changed, 39 insertions(+), 18 deletions(-) > > diff --git a/include/linux/mm.h b/include/linux/mm.h > index c97ea3b694e6..77a4df06c8a7 100644 > --- a/include/linux/mm.h > +++ b/include/linux/mm.h > @@ -952,9 +952,10 @@ static inline bool is_zone_device_page(const struct page *page) > #endif > > #ifdef CONFIG_DEV_PAGEMAP_OPS > -void __put_devmap_managed_page(struct page *page); > +void free_devmap_managed_page(struct page *page); > DECLARE_STATIC_KEY_FALSE(devmap_managed_key); > -static inline bool put_devmap_managed_page(struct page *page) > + > +static inline bool page_is_devmap_managed(struct page *page) > { > if (!static_branch_unlikely(&devmap_managed_key)) > return false; > @@ -963,7 +964,6 @@ static inline bool put_devmap_managed_page(struct page *page) > switch (page->pgmap->type) { > case MEMORY_DEVICE_PRIVATE: > case MEMORY_DEVICE_FS_DAX: > - __put_devmap_managed_page(page); > return true; > default: > break; > @@ -971,7 +971,14 @@ static inline bool put_devmap_managed_page(struct page *page) > return false; > } > > +bool put_devmap_managed_page(struct page *page); > + > #else /* CONFIG_DEV_PAGEMAP_OPS */ > +static inline bool page_is_devmap_managed(struct page *page) > +{ > + return false; > +} > + > static inline bool put_devmap_managed_page(struct page *page) > { > return false; > @@ -1028,8 +1035,10 @@ static inline void put_page(struct page *page) > * need to inform the device driver through callback. See > * include/linux/memremap.h and HMM for details. > */ > - if (put_devmap_managed_page(page)) > + if (page_is_devmap_managed(page)) { > + put_devmap_managed_page(page); > return; > + } > > if (put_page_testzero(page)) > __put_page(page); > diff --git a/mm/memremap.c b/mm/memremap.c > index e899fa876a62..2ba773859031 100644 > --- a/mm/memremap.c > +++ b/mm/memremap.c > @@ -411,20 +411,8 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn, > EXPORT_SYMBOL_GPL(get_dev_pagemap); > > #ifdef CONFIG_DEV_PAGEMAP_OPS > -void __put_devmap_managed_page(struct page *page) > +void free_devmap_managed_page(struct page *page) > { > - int count = page_ref_dec_return(page); > - > - /* still busy */ > - if (count > 1) > - return; > - > - /* only triggered by the dev_pagemap shutdown path */ > - if (count == 0) { > - __put_page(page); > - return; > - } > - > /* notify page idle for dax */ > if (!is_device_private_page(page)) { > wake_up_var(&page->_refcount); > @@ -461,5 +449,5 @@ void __put_devmap_managed_page(struct page *page) > page->mapping = NULL; > page->pgmap->ops->page_free(page); > } > -EXPORT_SYMBOL(__put_devmap_managed_page); > +EXPORT_SYMBOL(free_devmap_managed_page); This patch does not have a module consumer for free_devmap_managed_page(), so the export should move to the patch that needs the new export. Also the only reason that put_devmap_managed_page() is EXPORT_SYMBOL instead of EXPORT_SYMBOL_GPL is that there was no practical way to hide the devmap details from evey module in the kernel that did put_page(). I would expect free_devmap_managed_page() to EXPORT_SYMBOL_GPL if it is not inlined into an existing exported static inline api.
On 12/18/19 9:27 PM, Dan Williams wrote: ... >> @@ -461,5 +449,5 @@ void __put_devmap_managed_page(struct page *page) >> page->mapping = NULL; >> page->pgmap->ops->page_free(page); >> } >> -EXPORT_SYMBOL(__put_devmap_managed_page); >> +EXPORT_SYMBOL(free_devmap_managed_page); > > This patch does not have a module consumer for > free_devmap_managed_page(), so the export should move to the patch > that needs the new export. Hi Dan, OK, I know that's a policy--although it seems quite pointless here given that this is definitely going to need an EXPORT. At the moment, the series doesn't use it in any module at all, so I'll just delete the EXPORT for now. > > Also the only reason that put_devmap_managed_page() is EXPORT_SYMBOL > instead of EXPORT_SYMBOL_GPL is that there was no practical way to > hide the devmap details from evey module in the kernel that did > put_page(). I would expect free_devmap_managed_page() to > EXPORT_SYMBOL_GPL if it is not inlined into an existing exported > static inline api. > Sure, I'll change it to EXPORT_SYMBOL_GPL when the time comes. We do have to be careful that we don't shut out normal put_page() types of callers, but...glancing through the current callers, that doesn't look to be a problem. Good. So it should be OK to do EXPORT_SYMBOL_GPL here. Are you *sure* you don't want to just pre-emptively EXPORT now, and save looking at it again? thanks,
On Wed, Dec 18, 2019 at 9:51 PM John Hubbard <jhubbard@nvidia.com> wrote: > > On 12/18/19 9:27 PM, Dan Williams wrote: > ... > >> @@ -461,5 +449,5 @@ void __put_devmap_managed_page(struct page *page) > >> page->mapping = NULL; > >> page->pgmap->ops->page_free(page); > >> } > >> -EXPORT_SYMBOL(__put_devmap_managed_page); > >> +EXPORT_SYMBOL(free_devmap_managed_page); > > > > This patch does not have a module consumer for > > free_devmap_managed_page(), so the export should move to the patch > > that needs the new export. > > Hi Dan, > > OK, I know that's a policy--although it seems quite pointless here given > that this is definitely going to need an EXPORT. > > At the moment, the series doesn't use it in any module at all, so I'll just > delete the EXPORT for now. > > > > > Also the only reason that put_devmap_managed_page() is EXPORT_SYMBOL > > instead of EXPORT_SYMBOL_GPL is that there was no practical way to > > hide the devmap details from evey module in the kernel that did > > put_page(). I would expect free_devmap_managed_page() to > > EXPORT_SYMBOL_GPL if it is not inlined into an existing exported > > static inline api. > > > > Sure, I'll change it to EXPORT_SYMBOL_GPL when the time comes. We do have > to be careful that we don't shut out normal put_page() types of callers, > but...glancing through the current callers, that doesn't look to be a problem. > Good. So it should be OK to do EXPORT_SYMBOL_GPL here. > > Are you *sure* you don't want to just pre-emptively EXPORT now, and save > looking at it again? I'm positive. There is enough history for "trust me the consumer is coming" turning out not to be true to justify the hassle in my mind. I do trust you, but things happen.
On 12/18/19 10:52 PM, Dan Williams wrote: > On Wed, Dec 18, 2019 at 9:51 PM John Hubbard <jhubbard@nvidia.com> wrote: >> >> On 12/18/19 9:27 PM, Dan Williams wrote: >> ... >>>> @@ -461,5 +449,5 @@ void __put_devmap_managed_page(struct page *page) >>>> page->mapping = NULL; >>>> page->pgmap->ops->page_free(page); >>>> } >>>> -EXPORT_SYMBOL(__put_devmap_managed_page); >>>> +EXPORT_SYMBOL(free_devmap_managed_page); >>> >>> This patch does not have a module consumer for >>> free_devmap_managed_page(), so the export should move to the patch >>> that needs the new export. >> >> Hi Dan, >> >> OK, I know that's a policy--although it seems quite pointless here given >> that this is definitely going to need an EXPORT. >> >> At the moment, the series doesn't use it in any module at all, so I'll just >> delete the EXPORT for now. >> >>> >>> Also the only reason that put_devmap_managed_page() is EXPORT_SYMBOL >>> instead of EXPORT_SYMBOL_GPL is that there was no practical way to >>> hide the devmap details from evey module in the kernel that did >>> put_page(). I would expect free_devmap_managed_page() to >>> EXPORT_SYMBOL_GPL if it is not inlined into an existing exported >>> static inline api. >>> >> >> Sure, I'll change it to EXPORT_SYMBOL_GPL when the time comes. We do have >> to be careful that we don't shut out normal put_page() types of callers, >> but...glancing through the current callers, that doesn't look to be a problem. >> Good. So it should be OK to do EXPORT_SYMBOL_GPL here. >> >> Are you *sure* you don't want to just pre-emptively EXPORT now, and save >> looking at it again? > > I'm positive. There is enough history for "trust me the consumer is > coming" turning out not to be true to justify the hassle in my mind. I > do trust you, but things happen. > OK, it's deleted locally. Thanks for looking at the patch. I'll post a v12 series that includes the change, once it looks like reviews are slowing down. thanks,
diff --git a/include/linux/mm.h b/include/linux/mm.h index c97ea3b694e6..77a4df06c8a7 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -952,9 +952,10 @@ static inline bool is_zone_device_page(const struct page *page) #endif #ifdef CONFIG_DEV_PAGEMAP_OPS -void __put_devmap_managed_page(struct page *page); +void free_devmap_managed_page(struct page *page); DECLARE_STATIC_KEY_FALSE(devmap_managed_key); -static inline bool put_devmap_managed_page(struct page *page) + +static inline bool page_is_devmap_managed(struct page *page) { if (!static_branch_unlikely(&devmap_managed_key)) return false; @@ -963,7 +964,6 @@ static inline bool put_devmap_managed_page(struct page *page) switch (page->pgmap->type) { case MEMORY_DEVICE_PRIVATE: case MEMORY_DEVICE_FS_DAX: - __put_devmap_managed_page(page); return true; default: break; @@ -971,7 +971,14 @@ static inline bool put_devmap_managed_page(struct page *page) return false; } +bool put_devmap_managed_page(struct page *page); + #else /* CONFIG_DEV_PAGEMAP_OPS */ +static inline bool page_is_devmap_managed(struct page *page) +{ + return false; +} + static inline bool put_devmap_managed_page(struct page *page) { return false; @@ -1028,8 +1035,10 @@ static inline void put_page(struct page *page) * need to inform the device driver through callback. See * include/linux/memremap.h and HMM for details. */ - if (put_devmap_managed_page(page)) + if (page_is_devmap_managed(page)) { + put_devmap_managed_page(page); return; + } if (put_page_testzero(page)) __put_page(page); diff --git a/mm/memremap.c b/mm/memremap.c index e899fa876a62..2ba773859031 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -411,20 +411,8 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn, EXPORT_SYMBOL_GPL(get_dev_pagemap); #ifdef CONFIG_DEV_PAGEMAP_OPS -void __put_devmap_managed_page(struct page *page) +void free_devmap_managed_page(struct page *page) { - int count = page_ref_dec_return(page); - - /* still busy */ - if (count > 1) - return; - - /* only triggered by the dev_pagemap shutdown path */ - if (count == 0) { - __put_page(page); - return; - } - /* notify page idle for dax */ if (!is_device_private_page(page)) { wake_up_var(&page->_refcount); @@ -461,5 +449,5 @@ void __put_devmap_managed_page(struct page *page) page->mapping = NULL; page->pgmap->ops->page_free(page); } -EXPORT_SYMBOL(__put_devmap_managed_page); +EXPORT_SYMBOL(free_devmap_managed_page); #endif /* CONFIG_DEV_PAGEMAP_OPS */ diff --git a/mm/swap.c b/mm/swap.c index 5341ae93861f..49f7c2eea0ba 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -1102,3 +1102,27 @@ void __init swap_setup(void) * _really_ don't want to cluster much more */ } + +#ifdef CONFIG_DEV_PAGEMAP_OPS +bool put_devmap_managed_page(struct page *page) +{ + bool is_devmap = page_is_devmap_managed(page); + + if (is_devmap) { + int count = page_ref_dec_return(page); + + /* + * devmap page refcounts are 1-based, rather than 0-based: if + * refcount is 1, then the page is free and the refcount is + * stable because nobody holds a reference on the page. + */ + if (count == 1) + free_devmap_managed_page(page); + else if (!count) + __put_page(page); + } + + return is_devmap; +} +EXPORT_SYMBOL(put_devmap_managed_page); +#endif