Message ID | alpine.DEB.2.21.2002181828070.108053@chino.kir.corp.google.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [1/2] mm, shmem: add thp fault alloc and fallback stats | expand |
On Tue, Feb 18, 2020 at 6:29 PM David Rientjes <rientjes@google.com> wrote: > > The thp_fault_alloc and thp_fault_fallback vmstats are incremented when a > hugepage is successfully or unsuccessfully allocated, respectively, during > a page fault for anonymous memory. > > Extend this to shmem as well. Note that care is taken to increment > thp_fault_alloc only when the fault succeeds; this is the same behavior as > anonymous thp. > > Signed-off-by: David Rientjes <rientjes@google.com> > --- > mm/shmem.c | 23 ++++++++++++++--------- > 1 file changed, 14 insertions(+), 9 deletions(-) > > diff --git a/mm/shmem.c b/mm/shmem.c > --- a/mm/shmem.c > +++ b/mm/shmem.c > @@ -1502,9 +1502,8 @@ static struct page *shmem_alloc_page(gfp_t gfp, > return page; > } > > -static struct page *shmem_alloc_and_acct_page(gfp_t gfp, > - struct inode *inode, > - pgoff_t index, bool huge) > +static struct page *shmem_alloc_and_acct_page(gfp_t gfp, struct inode *inode, > + pgoff_t index, bool fault, bool huge) > { > struct shmem_inode_info *info = SHMEM_I(inode); > struct page *page; > @@ -1518,9 +1517,11 @@ static struct page *shmem_alloc_and_acct_page(gfp_t gfp, > if (!shmem_inode_acct_block(inode, nr)) > goto failed; > > - if (huge) > + if (huge) { > page = shmem_alloc_hugepage(gfp, info, index); > - else > + if (!page && fault) > + count_vm_event(THP_FAULT_FALLBACK); > + } else > page = shmem_alloc_page(gfp, info, index); > if (page) { > __SetPageLocked(page); > @@ -1832,11 +1833,10 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, > } > > alloc_huge: > - page = shmem_alloc_and_acct_page(gfp, inode, index, true); > + page = shmem_alloc_and_acct_page(gfp, inode, index, vmf, true); > if (IS_ERR(page)) { > alloc_nohuge: > - page = shmem_alloc_and_acct_page(gfp, inode, > - index, false); > + page = shmem_alloc_and_acct_page(gfp, inode, index, vmf, false); > } > if (IS_ERR(page)) { > int retry = 5; > @@ -1871,8 +1871,11 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, > > error = mem_cgroup_try_charge_delay(page, charge_mm, gfp, &memcg, > PageTransHuge(page)); > - if (error) > + if (error) { > + if (vmf && PageTransHuge(page)) > + count_vm_event(THP_FAULT_FALLBACK); > goto unacct; > + } > error = shmem_add_to_page_cache(page, mapping, hindex, > NULL, gfp & GFP_RECLAIM_MASK); > if (error) { > @@ -1883,6 +1886,8 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, > mem_cgroup_commit_charge(page, memcg, false, > PageTransHuge(page)); > lru_cache_add_anon(page); > + if (vmf && PageTransHuge(page)) > + count_vm_event(THP_FAULT_ALLOC); I think shmem THP alloc is accounted to THP_FILE_ALLOC. And it has been accounted by shmem_add_to_page_cache(). So, it sounds like a double count. > > spin_lock_irq(&info->lock); > info->alloced += compound_nr(page); >
On Tue, 18 Feb 2020, Yang Shi wrote: > > diff --git a/mm/shmem.c b/mm/shmem.c > > --- a/mm/shmem.c > > +++ b/mm/shmem.c > > @@ -1502,9 +1502,8 @@ static struct page *shmem_alloc_page(gfp_t gfp, > > return page; > > } > > > > -static struct page *shmem_alloc_and_acct_page(gfp_t gfp, > > - struct inode *inode, > > - pgoff_t index, bool huge) > > +static struct page *shmem_alloc_and_acct_page(gfp_t gfp, struct inode *inode, > > + pgoff_t index, bool fault, bool huge) > > { > > struct shmem_inode_info *info = SHMEM_I(inode); > > struct page *page; > > @@ -1518,9 +1517,11 @@ static struct page *shmem_alloc_and_acct_page(gfp_t gfp, > > if (!shmem_inode_acct_block(inode, nr)) > > goto failed; > > > > - if (huge) > > + if (huge) { > > page = shmem_alloc_hugepage(gfp, info, index); > > - else > > + if (!page && fault) > > + count_vm_event(THP_FAULT_FALLBACK); > > + } else > > page = shmem_alloc_page(gfp, info, index); > > if (page) { > > __SetPageLocked(page); > > @@ -1832,11 +1833,10 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, > > } > > > > alloc_huge: > > - page = shmem_alloc_and_acct_page(gfp, inode, index, true); > > + page = shmem_alloc_and_acct_page(gfp, inode, index, vmf, true); > > if (IS_ERR(page)) { > > alloc_nohuge: > > - page = shmem_alloc_and_acct_page(gfp, inode, > > - index, false); > > + page = shmem_alloc_and_acct_page(gfp, inode, index, vmf, false); > > } > > if (IS_ERR(page)) { > > int retry = 5; > > @@ -1871,8 +1871,11 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, > > > > error = mem_cgroup_try_charge_delay(page, charge_mm, gfp, &memcg, > > PageTransHuge(page)); > > - if (error) > > + if (error) { > > + if (vmf && PageTransHuge(page)) > > + count_vm_event(THP_FAULT_FALLBACK); > > goto unacct; > > + } > > error = shmem_add_to_page_cache(page, mapping, hindex, > > NULL, gfp & GFP_RECLAIM_MASK); > > if (error) { > > @@ -1883,6 +1886,8 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, > > mem_cgroup_commit_charge(page, memcg, false, > > PageTransHuge(page)); > > lru_cache_add_anon(page); > > + if (vmf && PageTransHuge(page)) > > + count_vm_event(THP_FAULT_ALLOC); > > I think shmem THP alloc is accounted to THP_FILE_ALLOC. And it has > been accounted by shmem_add_to_page_cache(). So, it sounds like a > double count. > I think we can choose to either include file allocations into both thp_fault_alloc and thp_fault_fallback or we can exclude them from both of them. I don't think we can account for only one of them. > > > > spin_lock_irq(&info->lock); > > info->alloced += compound_nr(page); > > >
On Tue, Feb 18, 2020 at 7:44 PM David Rientjes <rientjes@google.com> wrote: > > On Tue, 18 Feb 2020, Yang Shi wrote: > > > > diff --git a/mm/shmem.c b/mm/shmem.c > > > --- a/mm/shmem.c > > > +++ b/mm/shmem.c > > > @@ -1502,9 +1502,8 @@ static struct page *shmem_alloc_page(gfp_t gfp, > > > return page; > > > } > > > > > > -static struct page *shmem_alloc_and_acct_page(gfp_t gfp, > > > - struct inode *inode, > > > - pgoff_t index, bool huge) > > > +static struct page *shmem_alloc_and_acct_page(gfp_t gfp, struct inode *inode, > > > + pgoff_t index, bool fault, bool huge) > > > { > > > struct shmem_inode_info *info = SHMEM_I(inode); > > > struct page *page; > > > @@ -1518,9 +1517,11 @@ static struct page *shmem_alloc_and_acct_page(gfp_t gfp, > > > if (!shmem_inode_acct_block(inode, nr)) > > > goto failed; > > > > > > - if (huge) > > > + if (huge) { > > > page = shmem_alloc_hugepage(gfp, info, index); > > > - else > > > + if (!page && fault) > > > + count_vm_event(THP_FAULT_FALLBACK); > > > + } else > > > page = shmem_alloc_page(gfp, info, index); > > > if (page) { > > > __SetPageLocked(page); > > > @@ -1832,11 +1833,10 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, > > > } > > > > > > alloc_huge: > > > - page = shmem_alloc_and_acct_page(gfp, inode, index, true); > > > + page = shmem_alloc_and_acct_page(gfp, inode, index, vmf, true); > > > if (IS_ERR(page)) { > > > alloc_nohuge: > > > - page = shmem_alloc_and_acct_page(gfp, inode, > > > - index, false); > > > + page = shmem_alloc_and_acct_page(gfp, inode, index, vmf, false); > > > } > > > if (IS_ERR(page)) { > > > int retry = 5; > > > @@ -1871,8 +1871,11 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, > > > > > > error = mem_cgroup_try_charge_delay(page, charge_mm, gfp, &memcg, > > > PageTransHuge(page)); > > > - if (error) > > > + if (error) { > > > + if (vmf && PageTransHuge(page)) > > > + count_vm_event(THP_FAULT_FALLBACK); > > > goto unacct; > > > + } > > > error = shmem_add_to_page_cache(page, mapping, hindex, > > > NULL, gfp & GFP_RECLAIM_MASK); > > > if (error) { > > > @@ -1883,6 +1886,8 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, > > > mem_cgroup_commit_charge(page, memcg, false, > > > PageTransHuge(page)); > > > lru_cache_add_anon(page); > > > + if (vmf && PageTransHuge(page)) > > > + count_vm_event(THP_FAULT_ALLOC); > > > > I think shmem THP alloc is accounted to THP_FILE_ALLOC. And it has > > been accounted by shmem_add_to_page_cache(). So, it sounds like a > > double count. > > > > I think we can choose to either include file allocations into both > thp_fault_alloc and thp_fault_fallback or we can exclude them from both of > them. I don't think we can account for only one of them. How's about the 3rd option, adding THP_FILE_FALLBACK. According to the past discussion with Hugh and Kirill, basically shmem/file THP is treated differently and separately from anonymous THP, and they have separate enabling knobs (/sys/kernel/mm/transparent_hugepage/enabled just enables anonymous THP). Since we already have THP_FILE_ALLOC for shmem THP allocation, IMHO it makes more sense to have dedicated FALLBACK counter. And, this won't change the old behavior either. > > > > > > > spin_lock_irq(&info->lock); > > > info->alloced += compound_nr(page); > > > > >
On Wed, Feb 19, 2020 at 09:01:23AM -0800, Yang Shi wrote: > On Tue, Feb 18, 2020 at 7:44 PM David Rientjes <rientjes@google.com> wrote: > > > > On Tue, 18 Feb 2020, Yang Shi wrote: > > > > > > diff --git a/mm/shmem.c b/mm/shmem.c > > > > --- a/mm/shmem.c > > > > +++ b/mm/shmem.c > > > > @@ -1502,9 +1502,8 @@ static struct page *shmem_alloc_page(gfp_t gfp, > > > > return page; > > > > } > > > > > > > > -static struct page *shmem_alloc_and_acct_page(gfp_t gfp, > > > > - struct inode *inode, > > > > - pgoff_t index, bool huge) > > > > +static struct page *shmem_alloc_and_acct_page(gfp_t gfp, struct inode *inode, > > > > + pgoff_t index, bool fault, bool huge) > > > > { > > > > struct shmem_inode_info *info = SHMEM_I(inode); > > > > struct page *page; > > > > @@ -1518,9 +1517,11 @@ static struct page *shmem_alloc_and_acct_page(gfp_t gfp, > > > > if (!shmem_inode_acct_block(inode, nr)) > > > > goto failed; > > > > > > > > - if (huge) > > > > + if (huge) { > > > > page = shmem_alloc_hugepage(gfp, info, index); > > > > - else > > > > + if (!page && fault) > > > > + count_vm_event(THP_FAULT_FALLBACK); > > > > + } else > > > > page = shmem_alloc_page(gfp, info, index); > > > > if (page) { > > > > __SetPageLocked(page); > > > > @@ -1832,11 +1833,10 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, > > > > } > > > > > > > > alloc_huge: > > > > - page = shmem_alloc_and_acct_page(gfp, inode, index, true); > > > > + page = shmem_alloc_and_acct_page(gfp, inode, index, vmf, true); > > > > if (IS_ERR(page)) { > > > > alloc_nohuge: > > > > - page = shmem_alloc_and_acct_page(gfp, inode, > > > > - index, false); > > > > + page = shmem_alloc_and_acct_page(gfp, inode, index, vmf, false); > > > > } > > > > if (IS_ERR(page)) { > > > > int retry = 5; > > > > @@ -1871,8 +1871,11 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, > > > > > > > > error = mem_cgroup_try_charge_delay(page, charge_mm, gfp, &memcg, > > > > PageTransHuge(page)); > > > > - if (error) > > > > + if (error) { > > > > + if (vmf && PageTransHuge(page)) > > > > + count_vm_event(THP_FAULT_FALLBACK); > > > > goto unacct; > > > > + } > > > > error = shmem_add_to_page_cache(page, mapping, hindex, > > > > NULL, gfp & GFP_RECLAIM_MASK); > > > > if (error) { > > > > @@ -1883,6 +1886,8 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, > > > > mem_cgroup_commit_charge(page, memcg, false, > > > > PageTransHuge(page)); > > > > lru_cache_add_anon(page); > > > > + if (vmf && PageTransHuge(page)) > > > > + count_vm_event(THP_FAULT_ALLOC); > > > > > > I think shmem THP alloc is accounted to THP_FILE_ALLOC. And it has > > > been accounted by shmem_add_to_page_cache(). So, it sounds like a > > > double count. > > > > > > > I think we can choose to either include file allocations into both > > thp_fault_alloc and thp_fault_fallback or we can exclude them from both of > > them. I don't think we can account for only one of them. > > How's about the 3rd option, adding THP_FILE_FALLBACK. I like this option. Problem with THP_FAULT_* is that shmem_getpage_gfp() is called not only from fault path, but also from syscalls.
On Thu, Feb 20, 2020 at 5:11 AM Kirill A. Shutemov <kirill@shutemov.name> wrote: > > On Wed, Feb 19, 2020 at 09:01:23AM -0800, Yang Shi wrote: > > On Tue, Feb 18, 2020 at 7:44 PM David Rientjes <rientjes@google.com> wrote: > > > > > > On Tue, 18 Feb 2020, Yang Shi wrote: > > > > > > > > diff --git a/mm/shmem.c b/mm/shmem.c > > > > > --- a/mm/shmem.c > > > > > +++ b/mm/shmem.c > > > > > @@ -1502,9 +1502,8 @@ static struct page *shmem_alloc_page(gfp_t gfp, > > > > > return page; > > > > > } > > > > > > > > > > -static struct page *shmem_alloc_and_acct_page(gfp_t gfp, > > > > > - struct inode *inode, > > > > > - pgoff_t index, bool huge) > > > > > +static struct page *shmem_alloc_and_acct_page(gfp_t gfp, struct inode *inode, > > > > > + pgoff_t index, bool fault, bool huge) > > > > > { > > > > > struct shmem_inode_info *info = SHMEM_I(inode); > > > > > struct page *page; > > > > > @@ -1518,9 +1517,11 @@ static struct page *shmem_alloc_and_acct_page(gfp_t gfp, > > > > > if (!shmem_inode_acct_block(inode, nr)) > > > > > goto failed; > > > > > > > > > > - if (huge) > > > > > + if (huge) { > > > > > page = shmem_alloc_hugepage(gfp, info, index); > > > > > - else > > > > > + if (!page && fault) > > > > > + count_vm_event(THP_FAULT_FALLBACK); > > > > > + } else > > > > > page = shmem_alloc_page(gfp, info, index); > > > > > if (page) { > > > > > __SetPageLocked(page); > > > > > @@ -1832,11 +1833,10 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, > > > > > } > > > > > > > > > > alloc_huge: > > > > > - page = shmem_alloc_and_acct_page(gfp, inode, index, true); > > > > > + page = shmem_alloc_and_acct_page(gfp, inode, index, vmf, true); > > > > > if (IS_ERR(page)) { > > > > > alloc_nohuge: > > > > > - page = shmem_alloc_and_acct_page(gfp, inode, > > > > > - index, false); > > > > > + page = shmem_alloc_and_acct_page(gfp, inode, index, vmf, false); > > > > > } > > > > > if (IS_ERR(page)) { > > > > > int retry = 5; > > > > > @@ -1871,8 +1871,11 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, > > > > > > > > > > error = mem_cgroup_try_charge_delay(page, charge_mm, gfp, &memcg, > > > > > PageTransHuge(page)); > > > > > - if (error) > > > > > + if (error) { > > > > > + if (vmf && PageTransHuge(page)) > > > > > + count_vm_event(THP_FAULT_FALLBACK); > > > > > goto unacct; > > > > > + } > > > > > error = shmem_add_to_page_cache(page, mapping, hindex, > > > > > NULL, gfp & GFP_RECLAIM_MASK); > > > > > if (error) { > > > > > @@ -1883,6 +1886,8 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, > > > > > mem_cgroup_commit_charge(page, memcg, false, > > > > > PageTransHuge(page)); > > > > > lru_cache_add_anon(page); > > > > > + if (vmf && PageTransHuge(page)) > > > > > + count_vm_event(THP_FAULT_ALLOC); > > > > > > > > I think shmem THP alloc is accounted to THP_FILE_ALLOC. And it has > > > > been accounted by shmem_add_to_page_cache(). So, it sounds like a > > > > double count. > > > > > > > > > > I think we can choose to either include file allocations into both > > > thp_fault_alloc and thp_fault_fallback or we can exclude them from both of > > > them. I don't think we can account for only one of them. > > > > How's about the 3rd option, adding THP_FILE_FALLBACK. > > I like this option. > > Problem with THP_FAULT_* is that shmem_getpage_gfp() is called not only > from fault path, but also from syscalls. I found another usecase for THP_FILE_FALLBACK. I wanted to measure file THP allocation success rate in our uecase. It looks nr_file_alloc / (nr_file_alloc + nr_file_fallback) is the most simple way. David, are you still working on this patch? > > -- > Kirill A. Shutemov
On Fri, 6 Mar 2020, Yang Shi wrote: > > > > I think we can choose to either include file allocations into both > > > > thp_fault_alloc and thp_fault_fallback or we can exclude them from both of > > > > them. I don't think we can account for only one of them. > > > > > > How's about the 3rd option, adding THP_FILE_FALLBACK. > > > > I like this option. > > > > Problem with THP_FAULT_* is that shmem_getpage_gfp() is called not only > > from fault path, but also from syscalls. > > I found another usecase for THP_FILE_FALLBACK. I wanted to measure > file THP allocation success rate in our uecase. It looks nr_file_alloc > / (nr_file_alloc + nr_file_fallback) is the most simple way. > > David, are you still working on this patch? > Yes, I have a refresh to send out. I don't enable CONFIG_FS_DAX but the THP_FAULT_FALLBACK there seems somewhat out of place. It's not necessarily within the scope of my patchset but thought I'd mention it if someone had strong feelings about whether the DAX cases should be separated out as well.
diff --git a/mm/shmem.c b/mm/shmem.c --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1502,9 +1502,8 @@ static struct page *shmem_alloc_page(gfp_t gfp, return page; } -static struct page *shmem_alloc_and_acct_page(gfp_t gfp, - struct inode *inode, - pgoff_t index, bool huge) +static struct page *shmem_alloc_and_acct_page(gfp_t gfp, struct inode *inode, + pgoff_t index, bool fault, bool huge) { struct shmem_inode_info *info = SHMEM_I(inode); struct page *page; @@ -1518,9 +1517,11 @@ static struct page *shmem_alloc_and_acct_page(gfp_t gfp, if (!shmem_inode_acct_block(inode, nr)) goto failed; - if (huge) + if (huge) { page = shmem_alloc_hugepage(gfp, info, index); - else + if (!page && fault) + count_vm_event(THP_FAULT_FALLBACK); + } else page = shmem_alloc_page(gfp, info, index); if (page) { __SetPageLocked(page); @@ -1832,11 +1833,10 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, } alloc_huge: - page = shmem_alloc_and_acct_page(gfp, inode, index, true); + page = shmem_alloc_and_acct_page(gfp, inode, index, vmf, true); if (IS_ERR(page)) { alloc_nohuge: - page = shmem_alloc_and_acct_page(gfp, inode, - index, false); + page = shmem_alloc_and_acct_page(gfp, inode, index, vmf, false); } if (IS_ERR(page)) { int retry = 5; @@ -1871,8 +1871,11 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, error = mem_cgroup_try_charge_delay(page, charge_mm, gfp, &memcg, PageTransHuge(page)); - if (error) + if (error) { + if (vmf && PageTransHuge(page)) + count_vm_event(THP_FAULT_FALLBACK); goto unacct; + } error = shmem_add_to_page_cache(page, mapping, hindex, NULL, gfp & GFP_RECLAIM_MASK); if (error) { @@ -1883,6 +1886,8 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, mem_cgroup_commit_charge(page, memcg, false, PageTransHuge(page)); lru_cache_add_anon(page); + if (vmf && PageTransHuge(page)) + count_vm_event(THP_FAULT_ALLOC); spin_lock_irq(&info->lock); info->alloced += compound_nr(page);
The thp_fault_alloc and thp_fault_fallback vmstats are incremented when a hugepage is successfully or unsuccessfully allocated, respectively, during a page fault for anonymous memory. Extend this to shmem as well. Note that care is taken to increment thp_fault_alloc only when the fault succeeds; this is the same behavior as anonymous thp. Signed-off-by: David Rientjes <rientjes@google.com> --- mm/shmem.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-)