diff mbox series

[RFC,5/5] mm: shmem: add anonymous share mTHP counters

Message ID 05d0096e4ec3e572d1d52d33a31a661321ac1551.1713755580.git.baolin.wang@linux.alibaba.com (mailing list archive)
State New
Headers show
Series add mTHP support for anonymous share pages | expand

Commit Message

Baolin Wang April 22, 2024, 7:02 a.m. UTC
Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
---
 include/linux/huge_mm.h | 2 ++
 mm/huge_memory.c        | 4 ++++
 mm/shmem.c              | 5 ++++-
 3 files changed, 10 insertions(+), 1 deletion(-)

Comments

Barry Song April 23, 2024, 1:17 a.m. UTC | #1
On Mon, Apr 22, 2024 at 3:03 PM Baolin Wang
<baolin.wang@linux.alibaba.com> wrote:
>
> Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
> ---
>  include/linux/huge_mm.h | 2 ++
>  mm/huge_memory.c        | 4 ++++
>  mm/shmem.c              | 5 ++++-
>  3 files changed, 10 insertions(+), 1 deletion(-)
>
> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
> index 26b6fa98d8ac..67b9c1acad31 100644
> --- a/include/linux/huge_mm.h
> +++ b/include/linux/huge_mm.h
> @@ -270,6 +270,8 @@ enum mthp_stat_item {
>         MTHP_STAT_ANON_SWPOUT,
>         MTHP_STAT_ANON_SWPOUT_FALLBACK,
>         MTHP_STAT_ANON_SWPIN_REFAULT,
> +       MTHP_STAT_SHMEM_ANON_ALLOC,
> +       MTHP_STAT_SHMEM_ANON_ALLOC_FALLBACK,

not quite sure about this. for 2MB pmd-mapped THP shmem, we count them
as FILE_THP.
here we are counting as SHMEM_ANON. To me, SHMEM_ANON is more correct but
it doesn't align with pmd-mapped THP. David, Ryan, what do you think?


>         __MTHP_STAT_COUNT
>  };
>
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 9e52c0db7580..dc15240c1ab3 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -557,6 +557,8 @@ DEFINE_MTHP_STAT_ATTR(anon_alloc_fallback, MTHP_STAT_ANON_ALLOC_FALLBACK);
>  DEFINE_MTHP_STAT_ATTR(anon_swpout, MTHP_STAT_ANON_SWPOUT);
>  DEFINE_MTHP_STAT_ATTR(anon_swpout_fallback, MTHP_STAT_ANON_SWPOUT_FALLBACK);
>  DEFINE_MTHP_STAT_ATTR(anon_swpin_refault, MTHP_STAT_ANON_SWPIN_REFAULT);
> +DEFINE_MTHP_STAT_ATTR(shmem_anon_alloc, MTHP_STAT_SHMEM_ANON_ALLOC);
> +DEFINE_MTHP_STAT_ATTR(shmem_anon_alloc_fallback, MTHP_STAT_SHMEM_ANON_ALLOC_FALLBACK);
>
>  static struct attribute *stats_attrs[] = {
>         &anon_alloc_attr.attr,
> @@ -564,6 +566,8 @@ static struct attribute *stats_attrs[] = {
>         &anon_swpout_attr.attr,
>         &anon_swpout_fallback_attr.attr,
>         &anon_swpin_refault_attr.attr,
> +       &shmem_anon_alloc_attr.attr,
> +       &shmem_anon_alloc_fallback_attr.attr,
>         NULL,
>  };
>
> diff --git a/mm/shmem.c b/mm/shmem.c
> index 8b009e7040b2..4a0aa75ab29c 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -1706,11 +1706,14 @@ static struct folio *shmem_alloc_and_add_folio(struct vm_fault *vmf,
>                         pages = 1 << order;
>                         index = round_down(index, pages);
>                         folio = shmem_alloc_hugefolio(gfp, info, index, order);
> -                       if (folio)
> +                       if (folio) {
> +                               count_mthp_stat(order, MTHP_STAT_SHMEM_ANON_ALLOC);
>                                 goto allocated;
> +                       }
>
>                         if (pages == HPAGE_PMD_NR)
>                                 count_vm_event(THP_FILE_FALLBACK);
> +                       count_mthp_stat(order, MTHP_STAT_SHMEM_ANON_ALLOC_FALLBACK);
>                         order = next_order(&orders, order);
>                 }
>         } else {
> --
> 2.39.3
>

Thanks
Barry
Baolin Wang April 23, 2024, 1:46 a.m. UTC | #2
On 2024/4/23 09:17, Barry Song wrote:
> On Mon, Apr 22, 2024 at 3:03 PM Baolin Wang
> <baolin.wang@linux.alibaba.com> wrote:
>>
>> Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
>> ---
>>   include/linux/huge_mm.h | 2 ++
>>   mm/huge_memory.c        | 4 ++++
>>   mm/shmem.c              | 5 ++++-
>>   3 files changed, 10 insertions(+), 1 deletion(-)
>>
>> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
>> index 26b6fa98d8ac..67b9c1acad31 100644
>> --- a/include/linux/huge_mm.h
>> +++ b/include/linux/huge_mm.h
>> @@ -270,6 +270,8 @@ enum mthp_stat_item {
>>          MTHP_STAT_ANON_SWPOUT,
>>          MTHP_STAT_ANON_SWPOUT_FALLBACK,
>>          MTHP_STAT_ANON_SWPIN_REFAULT,
>> +       MTHP_STAT_SHMEM_ANON_ALLOC,
>> +       MTHP_STAT_SHMEM_ANON_ALLOC_FALLBACK,
> 
> not quite sure about this. for 2MB pmd-mapped THP shmem, we count them
> as FILE_THP.
> here we are counting as SHMEM_ANON. To me, SHMEM_ANON is more correct but
> it doesn't align with pmd-mapped THP. David, Ryan, what do you think?

Thanks for reviewing.

IMO, I think both approaches are acceptable, which also reflects the 
dual nature of anonymous shared pages: on the one hand they are 
anonymous pages, and on the other hand, they are backed by a pseudo 
file. From the user's perspective, I prefer to use the term "anonymous 
shmem", which can be distinguished from the real file-backed THP.

Anyway, let's see what others think.

>>          __MTHP_STAT_COUNT
>>   };
>>
>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
>> index 9e52c0db7580..dc15240c1ab3 100644
>> --- a/mm/huge_memory.c
>> +++ b/mm/huge_memory.c
>> @@ -557,6 +557,8 @@ DEFINE_MTHP_STAT_ATTR(anon_alloc_fallback, MTHP_STAT_ANON_ALLOC_FALLBACK);
>>   DEFINE_MTHP_STAT_ATTR(anon_swpout, MTHP_STAT_ANON_SWPOUT);
>>   DEFINE_MTHP_STAT_ATTR(anon_swpout_fallback, MTHP_STAT_ANON_SWPOUT_FALLBACK);
>>   DEFINE_MTHP_STAT_ATTR(anon_swpin_refault, MTHP_STAT_ANON_SWPIN_REFAULT);
>> +DEFINE_MTHP_STAT_ATTR(shmem_anon_alloc, MTHP_STAT_SHMEM_ANON_ALLOC);
>> +DEFINE_MTHP_STAT_ATTR(shmem_anon_alloc_fallback, MTHP_STAT_SHMEM_ANON_ALLOC_FALLBACK);
>>
>>   static struct attribute *stats_attrs[] = {
>>          &anon_alloc_attr.attr,
>> @@ -564,6 +566,8 @@ static struct attribute *stats_attrs[] = {
>>          &anon_swpout_attr.attr,
>>          &anon_swpout_fallback_attr.attr,
>>          &anon_swpin_refault_attr.attr,
>> +       &shmem_anon_alloc_attr.attr,
>> +       &shmem_anon_alloc_fallback_attr.attr,
>>          NULL,
>>   };
>>
>> diff --git a/mm/shmem.c b/mm/shmem.c
>> index 8b009e7040b2..4a0aa75ab29c 100644
>> --- a/mm/shmem.c
>> +++ b/mm/shmem.c
>> @@ -1706,11 +1706,14 @@ static struct folio *shmem_alloc_and_add_folio(struct vm_fault *vmf,
>>                          pages = 1 << order;
>>                          index = round_down(index, pages);
>>                          folio = shmem_alloc_hugefolio(gfp, info, index, order);
>> -                       if (folio)
>> +                       if (folio) {
>> +                               count_mthp_stat(order, MTHP_STAT_SHMEM_ANON_ALLOC);
>>                                  goto allocated;
>> +                       }
>>
>>                          if (pages == HPAGE_PMD_NR)
>>                                  count_vm_event(THP_FILE_FALLBACK);
>> +                       count_mthp_stat(order, MTHP_STAT_SHMEM_ANON_ALLOC_FALLBACK);
>>                          order = next_order(&orders, order);
>>                  }
>>          } else {
>> --
>> 2.39.3
>>
> 
> Thanks
> Barry
Lance Yang April 23, 2024, 9:45 a.m. UTC | #3
On 2024/4/23 09:17, Barry Song wrote:
[...]
>> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
>> index 26b6fa98d8ac..67b9c1acad31 100644
>> --- a/include/linux/huge_mm.h
>> +++ b/include/linux/huge_mm.h
>> @@ -270,6 +270,8 @@ enum mthp_stat_item {
>>          MTHP_STAT_ANON_SWPOUT,
>>          MTHP_STAT_ANON_SWPOUT_FALLBACK,
>>          MTHP_STAT_ANON_SWPIN_REFAULT,
>> +       MTHP_STAT_SHMEM_ANON_ALLOC,
>> +       MTHP_STAT_SHMEM_ANON_ALLOC_FALLBACK,
> 
> not quite sure about this. for 2MB pmd-mapped THP shmem, we count them
> as FILE_THP.
> here we are counting as SHMEM_ANON. To me, SHMEM_ANON is more correct but
> it doesn't align with pmd-mapped THP. David, Ryan, what do you think?

+1

IMO, shmem isn't actually file-backed, but it has file-backed-like
characteristics :)

FWIW, perhaps MTHP_STAT_FILE_ALLOC and MTHP_STAT_FILE_ALLOC_FALLBACK
would better align with PMD/PTE-mapped THP.

Thanks,
Lance
Lance Yang April 23, 2024, 11:22 a.m. UTC | #4
On Tue, Apr 23, 2024 at 5:46 PM Lance Yang <ioworker0@gmail.com> wrote:
>
> On 2024/4/23 09:17, Barry Song wrote:
> [...]
> >> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
> >> index 26b6fa98d8ac..67b9c1acad31 100644
> >> --- a/include/linux/huge_mm.h
> >> +++ b/include/linux/huge_mm.h
> >> @@ -270,6 +270,8 @@ enum mthp_stat_item {
> >>          MTHP_STAT_ANON_SWPOUT,
> >>          MTHP_STAT_ANON_SWPOUT_FALLBACK,
> >>          MTHP_STAT_ANON_SWPIN_REFAULT,
> >> +       MTHP_STAT_SHMEM_ANON_ALLOC,
> >> +       MTHP_STAT_SHMEM_ANON_ALLOC_FALLBACK,

Seems like you forgot to add the FILE_FALLBACK_CHARGE counter
in this patch :)

IIUC, you've excluded the THP_FILE_FALLBACK_CHARGE counter
for PTE-mapped mTHP that size < PMD in patch3.

Thanks,
Lance
David Hildenbrand April 23, 2024, 11:37 a.m. UTC | #5
On 23.04.24 03:17, Barry Song wrote:
> On Mon, Apr 22, 2024 at 3:03 PM Baolin Wang
> <baolin.wang@linux.alibaba.com> wrote:
>>
>> Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
>> ---
>>   include/linux/huge_mm.h | 2 ++
>>   mm/huge_memory.c        | 4 ++++
>>   mm/shmem.c              | 5 ++++-
>>   3 files changed, 10 insertions(+), 1 deletion(-)
>>
>> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
>> index 26b6fa98d8ac..67b9c1acad31 100644
>> --- a/include/linux/huge_mm.h
>> +++ b/include/linux/huge_mm.h
>> @@ -270,6 +270,8 @@ enum mthp_stat_item {
>>          MTHP_STAT_ANON_SWPOUT,
>>          MTHP_STAT_ANON_SWPOUT_FALLBACK,
>>          MTHP_STAT_ANON_SWPIN_REFAULT,
>> +       MTHP_STAT_SHMEM_ANON_ALLOC,
>> +       MTHP_STAT_SHMEM_ANON_ALLOC_FALLBACK,
> 
> not quite sure about this. for 2MB pmd-mapped THP shmem, we count them
> as FILE_THP.
> here we are counting as SHMEM_ANON. To me, SHMEM_ANON is more correct but
> it doesn't align with pmd-mapped THP. David, Ryan, what do you think?

The term "anonymous share" in the patch subject is weird to begin with 
;) Easy to confuse with anonymous cow-shared memory. Let's just call it 
"anonymous shmem", which it is under the hood.

... regarding the question: if we add FILE_ALLOC and friends, at least 
initially, we wouldn't account other large pagecache folios.

... likely we should add that then as well so the counter matches the 
actual name?

If we later realize that we need separate FILE vs. SHMEM vs. WHATEVER 
counters, we can always add more fine-grained counters later. Doing it 
consistently w.r.t. traditional THPs first sounds reasonable.
Ryan Roberts April 23, 2024, 11:39 a.m. UTC | #6
On 23/04/2024 02:46, Baolin Wang wrote:
> 
> 
> On 2024/4/23 09:17, Barry Song wrote:
>> On Mon, Apr 22, 2024 at 3:03 PM Baolin Wang
>> <baolin.wang@linux.alibaba.com> wrote:
>>>
>>> Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
>>> ---
>>>   include/linux/huge_mm.h | 2 ++
>>>   mm/huge_memory.c        | 4 ++++
>>>   mm/shmem.c              | 5 ++++-
>>>   3 files changed, 10 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
>>> index 26b6fa98d8ac..67b9c1acad31 100644
>>> --- a/include/linux/huge_mm.h
>>> +++ b/include/linux/huge_mm.h
>>> @@ -270,6 +270,8 @@ enum mthp_stat_item {
>>>          MTHP_STAT_ANON_SWPOUT,
>>>          MTHP_STAT_ANON_SWPOUT_FALLBACK,
>>>          MTHP_STAT_ANON_SWPIN_REFAULT,
>>> +       MTHP_STAT_SHMEM_ANON_ALLOC,
>>> +       MTHP_STAT_SHMEM_ANON_ALLOC_FALLBACK,
>>
>> not quite sure about this. for 2MB pmd-mapped THP shmem, we count them
>> as FILE_THP.
>> here we are counting as SHMEM_ANON. To me, SHMEM_ANON is more correct but
>> it doesn't align with pmd-mapped THP. David, Ryan, what do you think?
> 
> Thanks for reviewing.
> 
> IMO, I think both approaches are acceptable, which also reflects the dual nature
> of anonymous shared pages: on the one hand they are anonymous pages, and on the
> other hand, they are backed by a pseudo file. From the user's perspective, I
> prefer to use the term "anonymous shmem", which can be distinguished from the
> real file-backed THP.
> 
> Anyway, let's see what others think.

From a quick look at the code, it looks like the shmem alloc/fallback/charge
events are all lumped in with FILE_THP. But the instantaneous "how many are
allocated" and "how many are mapped" have their own NR_SHMEM_THPS and
NR_SHMEM_PMDMAPPED counters? So its a bit inconsistent today.

My preference would be to add these to be consistent with the anon stats:

MTHP_STAT_SHMEM_FAULT_ALLOC,
MTHP_STAT_SHMEM_FAULT_FALLBACK,
MTHP_STAT_SHMEM_FAULT_FALLBACK_CHARGE,

But it looks like these aren't always allocated due to faults? So perhaps:

MTHP_STAT_SHMEM_ALLOC,
MTHP_STAT_SHMEM_FALLBACK,
MTHP_STAT_SHMEM_FALLBACK_CHARGE,

If I've understood the code correctly (I know nothing about shmem), the
allocation can be for both mmap(SHARED|ANON) and for tmpfs? So "SHMEM_ANON"
probably isn't quite right?


> 
>>>          __MTHP_STAT_COUNT
>>>   };
>>>
>>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
>>> index 9e52c0db7580..dc15240c1ab3 100644
>>> --- a/mm/huge_memory.c
>>> +++ b/mm/huge_memory.c
>>> @@ -557,6 +557,8 @@ DEFINE_MTHP_STAT_ATTR(anon_alloc_fallback,
>>> MTHP_STAT_ANON_ALLOC_FALLBACK);
>>>   DEFINE_MTHP_STAT_ATTR(anon_swpout, MTHP_STAT_ANON_SWPOUT);
>>>   DEFINE_MTHP_STAT_ATTR(anon_swpout_fallback, MTHP_STAT_ANON_SWPOUT_FALLBACK);
>>>   DEFINE_MTHP_STAT_ATTR(anon_swpin_refault, MTHP_STAT_ANON_SWPIN_REFAULT);
>>> +DEFINE_MTHP_STAT_ATTR(shmem_anon_alloc, MTHP_STAT_SHMEM_ANON_ALLOC);
>>> +DEFINE_MTHP_STAT_ATTR(shmem_anon_alloc_fallback,
>>> MTHP_STAT_SHMEM_ANON_ALLOC_FALLBACK);
>>>
>>>   static struct attribute *stats_attrs[] = {
>>>          &anon_alloc_attr.attr,
>>> @@ -564,6 +566,8 @@ static struct attribute *stats_attrs[] = {
>>>          &anon_swpout_attr.attr,
>>>          &anon_swpout_fallback_attr.attr,
>>>          &anon_swpin_refault_attr.attr,
>>> +       &shmem_anon_alloc_attr.attr,
>>> +       &shmem_anon_alloc_fallback_attr.attr,
>>>          NULL,
>>>   };
>>>
>>> diff --git a/mm/shmem.c b/mm/shmem.c
>>> index 8b009e7040b2..4a0aa75ab29c 100644
>>> --- a/mm/shmem.c
>>> +++ b/mm/shmem.c
>>> @@ -1706,11 +1706,14 @@ static struct folio *shmem_alloc_and_add_folio(struct
>>> vm_fault *vmf,
>>>                          pages = 1 << order;
>>>                          index = round_down(index, pages);
>>>                          folio = shmem_alloc_hugefolio(gfp, info, index, order);
>>> -                       if (folio)
>>> +                       if (folio) {
>>> +                               count_mthp_stat(order,
>>> MTHP_STAT_SHMEM_ANON_ALLOC);

is there any reason why this can't go next to the existing PMD-size stat?

>>>                                  goto allocated;
>>> +                       }
>>>
>>>                          if (pages == HPAGE_PMD_NR)
>>>                                  count_vm_event(THP_FILE_FALLBACK);
>>> +                       count_mthp_stat(order,
>>> MTHP_STAT_SHMEM_ANON_ALLOC_FALLBACK);
>>>                          order = next_order(&orders, order);
>>>                  }
>>>          } else {
>>> -- 
>>> 2.39.3
>>>
>>
>> Thanks
>> Barry
Baolin Wang April 24, 2024, 3:48 a.m. UTC | #7
On 2024/4/23 19:39, Ryan Roberts wrote:
> On 23/04/2024 02:46, Baolin Wang wrote:
>>
>>
>> On 2024/4/23 09:17, Barry Song wrote:
>>> On Mon, Apr 22, 2024 at 3:03 PM Baolin Wang
>>> <baolin.wang@linux.alibaba.com> wrote:
>>>>
>>>> Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
>>>> ---
>>>>    include/linux/huge_mm.h | 2 ++
>>>>    mm/huge_memory.c        | 4 ++++
>>>>    mm/shmem.c              | 5 ++++-
>>>>    3 files changed, 10 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
>>>> index 26b6fa98d8ac..67b9c1acad31 100644
>>>> --- a/include/linux/huge_mm.h
>>>> +++ b/include/linux/huge_mm.h
>>>> @@ -270,6 +270,8 @@ enum mthp_stat_item {
>>>>           MTHP_STAT_ANON_SWPOUT,
>>>>           MTHP_STAT_ANON_SWPOUT_FALLBACK,
>>>>           MTHP_STAT_ANON_SWPIN_REFAULT,
>>>> +       MTHP_STAT_SHMEM_ANON_ALLOC,
>>>> +       MTHP_STAT_SHMEM_ANON_ALLOC_FALLBACK,
>>>
>>> not quite sure about this. for 2MB pmd-mapped THP shmem, we count them
>>> as FILE_THP.
>>> here we are counting as SHMEM_ANON. To me, SHMEM_ANON is more correct but
>>> it doesn't align with pmd-mapped THP. David, Ryan, what do you think?
>>
>> Thanks for reviewing.
>>
>> IMO, I think both approaches are acceptable, which also reflects the dual nature
>> of anonymous shared pages: on the one hand they are anonymous pages, and on the
>> other hand, they are backed by a pseudo file. From the user's perspective, I
>> prefer to use the term "anonymous shmem", which can be distinguished from the
>> real file-backed THP.
>>
>> Anyway, let's see what others think.
> 
>  From a quick look at the code, it looks like the shmem alloc/fallback/charge
> events are all lumped in with FILE_THP. But the instantaneous "how many are
> allocated" and "how many are mapped" have their own NR_SHMEM_THPS and
> NR_SHMEM_PMDMAPPED counters? So its a bit inconsistent today.
> 
> My preference would be to add these to be consistent with the anon stats:
> 
> MTHP_STAT_SHMEM_FAULT_ALLOC,
> MTHP_STAT_SHMEM_FAULT_FALLBACK,
> MTHP_STAT_SHMEM_FAULT_FALLBACK_CHARGE,
> 
> But it looks like these aren't always allocated due to faults? So perhaps:
> 
> MTHP_STAT_SHMEM_ALLOC,
> MTHP_STAT_SHMEM_FALLBACK,
> MTHP_STAT_SHMEM_FALLBACK_CHARGE,

This looks good to me.

> If I've understood the code correctly (I know nothing about shmem), the
> allocation can be for both mmap(SHARED|ANON) and for tmpfs? So "SHMEM_ANON"

This is allowed, but the 'fd' for tmpfs will be ignored (see 
ksys_mmap_pgoff()), which is same with anonymous shmem.

> probably isn't quite right?
> 
> 
>>
>>>>           __MTHP_STAT_COUNT
>>>>    };
>>>>
>>>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
>>>> index 9e52c0db7580..dc15240c1ab3 100644
>>>> --- a/mm/huge_memory.c
>>>> +++ b/mm/huge_memory.c
>>>> @@ -557,6 +557,8 @@ DEFINE_MTHP_STAT_ATTR(anon_alloc_fallback,
>>>> MTHP_STAT_ANON_ALLOC_FALLBACK);
>>>>    DEFINE_MTHP_STAT_ATTR(anon_swpout, MTHP_STAT_ANON_SWPOUT);
>>>>    DEFINE_MTHP_STAT_ATTR(anon_swpout_fallback, MTHP_STAT_ANON_SWPOUT_FALLBACK);
>>>>    DEFINE_MTHP_STAT_ATTR(anon_swpin_refault, MTHP_STAT_ANON_SWPIN_REFAULT);
>>>> +DEFINE_MTHP_STAT_ATTR(shmem_anon_alloc, MTHP_STAT_SHMEM_ANON_ALLOC);
>>>> +DEFINE_MTHP_STAT_ATTR(shmem_anon_alloc_fallback,
>>>> MTHP_STAT_SHMEM_ANON_ALLOC_FALLBACK);
>>>>
>>>>    static struct attribute *stats_attrs[] = {
>>>>           &anon_alloc_attr.attr,
>>>> @@ -564,6 +566,8 @@ static struct attribute *stats_attrs[] = {
>>>>           &anon_swpout_attr.attr,
>>>>           &anon_swpout_fallback_attr.attr,
>>>>           &anon_swpin_refault_attr.attr,
>>>> +       &shmem_anon_alloc_attr.attr,
>>>> +       &shmem_anon_alloc_fallback_attr.attr,
>>>>           NULL,
>>>>    };
>>>>
>>>> diff --git a/mm/shmem.c b/mm/shmem.c
>>>> index 8b009e7040b2..4a0aa75ab29c 100644
>>>> --- a/mm/shmem.c
>>>> +++ b/mm/shmem.c
>>>> @@ -1706,11 +1706,14 @@ static struct folio *shmem_alloc_and_add_folio(struct
>>>> vm_fault *vmf,
>>>>                           pages = 1 << order;
>>>>                           index = round_down(index, pages);
>>>>                           folio = shmem_alloc_hugefolio(gfp, info, index, order);
>>>> -                       if (folio)
>>>> +                       if (folio) {
>>>> +                               count_mthp_stat(order,
>>>> MTHP_STAT_SHMEM_ANON_ALLOC);
> 
> is there any reason why this can't go next to the existing PMD-size stat?

No, will move to the existing PMD-size stat.
Baolin Wang April 24, 2024, 3:49 a.m. UTC | #8
On 2024/4/23 19:22, Lance Yang wrote:
> On Tue, Apr 23, 2024 at 5:46 PM Lance Yang <ioworker0@gmail.com> wrote:
>>
>> On 2024/4/23 09:17, Barry Song wrote:
>> [...]
>>>> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
>>>> index 26b6fa98d8ac..67b9c1acad31 100644
>>>> --- a/include/linux/huge_mm.h
>>>> +++ b/include/linux/huge_mm.h
>>>> @@ -270,6 +270,8 @@ enum mthp_stat_item {
>>>>           MTHP_STAT_ANON_SWPOUT,
>>>>           MTHP_STAT_ANON_SWPOUT_FALLBACK,
>>>>           MTHP_STAT_ANON_SWPIN_REFAULT,
>>>> +       MTHP_STAT_SHMEM_ANON_ALLOC,
>>>> +       MTHP_STAT_SHMEM_ANON_ALLOC_FALLBACK,
> 
> Seems like you forgot to add the FILE_FALLBACK_CHARGE counter
> in this patch :)
> 
> IIUC, you've excluded the THP_FILE_FALLBACK_CHARGE counter
> for PTE-mapped mTHP that size < PMD in patch3.

Yes, will add in next version.
Baolin Wang April 24, 2024, 6:10 a.m. UTC | #9
On 2024/4/23 19:37, David Hildenbrand wrote:
> On 23.04.24 03:17, Barry Song wrote:
>> On Mon, Apr 22, 2024 at 3:03 PM Baolin Wang
>> <baolin.wang@linux.alibaba.com> wrote:
>>>
>>> Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
>>> ---
>>>   include/linux/huge_mm.h | 2 ++
>>>   mm/huge_memory.c        | 4 ++++
>>>   mm/shmem.c              | 5 ++++-
>>>   3 files changed, 10 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
>>> index 26b6fa98d8ac..67b9c1acad31 100644
>>> --- a/include/linux/huge_mm.h
>>> +++ b/include/linux/huge_mm.h
>>> @@ -270,6 +270,8 @@ enum mthp_stat_item {
>>>          MTHP_STAT_ANON_SWPOUT,
>>>          MTHP_STAT_ANON_SWPOUT_FALLBACK,
>>>          MTHP_STAT_ANON_SWPIN_REFAULT,
>>> +       MTHP_STAT_SHMEM_ANON_ALLOC,
>>> +       MTHP_STAT_SHMEM_ANON_ALLOC_FALLBACK,
>>
>> not quite sure about this. for 2MB pmd-mapped THP shmem, we count them
>> as FILE_THP.
>> here we are counting as SHMEM_ANON. To me, SHMEM_ANON is more correct but
>> it doesn't align with pmd-mapped THP. David, Ryan, what do you think?
> 
> The term "anonymous share" in the patch subject is weird to begin with 
> ;) Easy to confuse with anonymous cow-shared memory. Let's just call it 
> "anonymous shmem", which it is under the hood.

Sure.

> ... regarding the question: if we add FILE_ALLOC and friends, at least 
> initially, we wouldn't account other large pagecache folios.
> 
> ... likely we should add that then as well so the counter matches the 
> actual name?
> 
> If we later realize that we need separate FILE vs. SHMEM vs. WHATEVER 
> counters, we can always add more fine-grained counters later. Doing it 
> consistently w.r.t. traditional THPs first sounds reasonable.

Um, once we expose it to userspace through the sysfs interface, the 
sysfs interface should be explicit as much as possible and avoid 
confusing users, otherwise it will be difficult to change this kind of 
interface in the future. Personally, I prefer to Ryan's suggestion.
David Hildenbrand April 24, 2024, 7:11 a.m. UTC | #10
On 24.04.24 08:10, Baolin Wang wrote:
> 
> 
> On 2024/4/23 19:37, David Hildenbrand wrote:
>> On 23.04.24 03:17, Barry Song wrote:
>>> On Mon, Apr 22, 2024 at 3:03 PM Baolin Wang
>>> <baolin.wang@linux.alibaba.com> wrote:
>>>>
>>>> Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
>>>> ---
>>>>    include/linux/huge_mm.h | 2 ++
>>>>    mm/huge_memory.c        | 4 ++++
>>>>    mm/shmem.c              | 5 ++++-
>>>>    3 files changed, 10 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
>>>> index 26b6fa98d8ac..67b9c1acad31 100644
>>>> --- a/include/linux/huge_mm.h
>>>> +++ b/include/linux/huge_mm.h
>>>> @@ -270,6 +270,8 @@ enum mthp_stat_item {
>>>>           MTHP_STAT_ANON_SWPOUT,
>>>>           MTHP_STAT_ANON_SWPOUT_FALLBACK,
>>>>           MTHP_STAT_ANON_SWPIN_REFAULT,
>>>> +       MTHP_STAT_SHMEM_ANON_ALLOC,
>>>> +       MTHP_STAT_SHMEM_ANON_ALLOC_FALLBACK,
>>>
>>> not quite sure about this. for 2MB pmd-mapped THP shmem, we count them
>>> as FILE_THP.
>>> here we are counting as SHMEM_ANON. To me, SHMEM_ANON is more correct but
>>> it doesn't align with pmd-mapped THP. David, Ryan, what do you think?
>>
>> The term "anonymous share" in the patch subject is weird to begin with
>> ;) Easy to confuse with anonymous cow-shared memory. Let's just call it
>> "anonymous shmem", which it is under the hood.
> 
> Sure.
> 
>> ... regarding the question: if we add FILE_ALLOC and friends, at least
>> initially, we wouldn't account other large pagecache folios.
>>
>> ... likely we should add that then as well so the counter matches the
>> actual name?
>>
>> If we later realize that we need separate FILE vs. SHMEM vs. WHATEVER
>> counters, we can always add more fine-grained counters later. Doing it
>> consistently w.r.t. traditional THPs first sounds reasonable.
> 
> Um, once we expose it to userspace through the sysfs interface, the
> sysfs interface should be explicit as much as possible and avoid
> confusing users, otherwise it will be difficult to change this kind of
> interface in the future. Personally, I prefer to Ryan's suggestion.

Inconsistency is confusing. As long as you avoid that, I don't 
particularly care.
Ryan Roberts April 24, 2024, 8:15 a.m. UTC | #11
On 24/04/2024 08:11, David Hildenbrand wrote:
> On 24.04.24 08:10, Baolin Wang wrote:
>>
>>
>> On 2024/4/23 19:37, David Hildenbrand wrote:
>>> On 23.04.24 03:17, Barry Song wrote:
>>>> On Mon, Apr 22, 2024 at 3:03 PM Baolin Wang
>>>> <baolin.wang@linux.alibaba.com> wrote:
>>>>>
>>>>> Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
>>>>> ---
>>>>>    include/linux/huge_mm.h | 2 ++
>>>>>    mm/huge_memory.c        | 4 ++++
>>>>>    mm/shmem.c              | 5 ++++-
>>>>>    3 files changed, 10 insertions(+), 1 deletion(-)
>>>>>
>>>>> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
>>>>> index 26b6fa98d8ac..67b9c1acad31 100644
>>>>> --- a/include/linux/huge_mm.h
>>>>> +++ b/include/linux/huge_mm.h
>>>>> @@ -270,6 +270,8 @@ enum mthp_stat_item {
>>>>>           MTHP_STAT_ANON_SWPOUT,
>>>>>           MTHP_STAT_ANON_SWPOUT_FALLBACK,
>>>>>           MTHP_STAT_ANON_SWPIN_REFAULT,
>>>>> +       MTHP_STAT_SHMEM_ANON_ALLOC,
>>>>> +       MTHP_STAT_SHMEM_ANON_ALLOC_FALLBACK,
>>>>
>>>> not quite sure about this. for 2MB pmd-mapped THP shmem, we count them
>>>> as FILE_THP.
>>>> here we are counting as SHMEM_ANON. To me, SHMEM_ANON is more correct but
>>>> it doesn't align with pmd-mapped THP. David, Ryan, what do you think?
>>>
>>> The term "anonymous share" in the patch subject is weird to begin with
>>> ;) Easy to confuse with anonymous cow-shared memory. Let's just call it
>>> "anonymous shmem", which it is under the hood.
>>
>> Sure.
>>
>>> ... regarding the question: if we add FILE_ALLOC and friends, at least
>>> initially, we wouldn't account other large pagecache folios.
>>>
>>> ... likely we should add that then as well so the counter matches the
>>> actual name?
>>>
>>> If we later realize that we need separate FILE vs. SHMEM vs. WHATEVER
>>> counters, we can always add more fine-grained counters later. Doing it
>>> consistently w.r.t. traditional THPs first sounds reasonable.
>>
>> Um, once we expose it to userspace through the sysfs interface, the
>> sysfs interface should be explicit as much as possible and avoid
>> confusing users, otherwise it will be difficult to change this kind of
>> interface in the future. Personally, I prefer to Ryan's suggestion.
> 
> Inconsistency is confusing. As long as you avoid that, I don't particularly care.

This is a good point. We have been careful to make sure the 2M ANON mTHP stats
match the existing PMD-size stats. So we should definitely make sure that any
future 2M FILE mTHP stats match too, which I guess means counting both SHMEM and
FILE events.

So perhaps it makes more sense to add FILE counters to start with. If we need
the SHMEM-specific counters, we could add them later?

I'm happy to go with the crowd on this...
Baolin Wang April 24, 2024, 9:31 a.m. UTC | #12
On 2024/4/24 16:15, Ryan Roberts wrote:
> On 24/04/2024 08:11, David Hildenbrand wrote:
>> On 24.04.24 08:10, Baolin Wang wrote:
>>>
>>>
>>> On 2024/4/23 19:37, David Hildenbrand wrote:
>>>> On 23.04.24 03:17, Barry Song wrote:
>>>>> On Mon, Apr 22, 2024 at 3:03 PM Baolin Wang
>>>>> <baolin.wang@linux.alibaba.com> wrote:
>>>>>>
>>>>>> Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
>>>>>> ---
>>>>>>     include/linux/huge_mm.h | 2 ++
>>>>>>     mm/huge_memory.c        | 4 ++++
>>>>>>     mm/shmem.c              | 5 ++++-
>>>>>>     3 files changed, 10 insertions(+), 1 deletion(-)
>>>>>>
>>>>>> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
>>>>>> index 26b6fa98d8ac..67b9c1acad31 100644
>>>>>> --- a/include/linux/huge_mm.h
>>>>>> +++ b/include/linux/huge_mm.h
>>>>>> @@ -270,6 +270,8 @@ enum mthp_stat_item {
>>>>>>            MTHP_STAT_ANON_SWPOUT,
>>>>>>            MTHP_STAT_ANON_SWPOUT_FALLBACK,
>>>>>>            MTHP_STAT_ANON_SWPIN_REFAULT,
>>>>>> +       MTHP_STAT_SHMEM_ANON_ALLOC,
>>>>>> +       MTHP_STAT_SHMEM_ANON_ALLOC_FALLBACK,
>>>>>
>>>>> not quite sure about this. for 2MB pmd-mapped THP shmem, we count them
>>>>> as FILE_THP.
>>>>> here we are counting as SHMEM_ANON. To me, SHMEM_ANON is more correct but
>>>>> it doesn't align with pmd-mapped THP. David, Ryan, what do you think?
>>>>
>>>> The term "anonymous share" in the patch subject is weird to begin with
>>>> ;) Easy to confuse with anonymous cow-shared memory. Let's just call it
>>>> "anonymous shmem", which it is under the hood.
>>>
>>> Sure.
>>>
>>>> ... regarding the question: if we add FILE_ALLOC and friends, at least
>>>> initially, we wouldn't account other large pagecache folios.
>>>>
>>>> ... likely we should add that then as well so the counter matches the
>>>> actual name?
>>>>
>>>> If we later realize that we need separate FILE vs. SHMEM vs. WHATEVER
>>>> counters, we can always add more fine-grained counters later. Doing it
>>>> consistently w.r.t. traditional THPs first sounds reasonable.
>>>
>>> Um, once we expose it to userspace through the sysfs interface, the
>>> sysfs interface should be explicit as much as possible and avoid
>>> confusing users, otherwise it will be difficult to change this kind of
>>> interface in the future. Personally, I prefer to Ryan's suggestion.
>>
>> Inconsistency is confusing. As long as you avoid that, I don't particularly care.
> 
> This is a good point. We have been careful to make sure the 2M ANON mTHP stats
> match the existing PMD-size stats. So we should definitely make sure that any
> future 2M FILE mTHP stats match too, which I guess means counting both SHMEM and
> FILE events.
> 
> So perhaps it makes more sense to add FILE counters to start with. If we need
> the SHMEM-specific counters, we could add them later?
> 
> I'm happy to go with the crowd on this...

(Seems I'm the only one who prefers the term 'SHMEM_' now.) Fine, I have 
no strong preference, and let's keep consistency first. Thanks guys.
diff mbox series

Patch

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 26b6fa98d8ac..67b9c1acad31 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -270,6 +270,8 @@  enum mthp_stat_item {
 	MTHP_STAT_ANON_SWPOUT,
 	MTHP_STAT_ANON_SWPOUT_FALLBACK,
 	MTHP_STAT_ANON_SWPIN_REFAULT,
+	MTHP_STAT_SHMEM_ANON_ALLOC,
+	MTHP_STAT_SHMEM_ANON_ALLOC_FALLBACK,
 	__MTHP_STAT_COUNT
 };
 
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 9e52c0db7580..dc15240c1ab3 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -557,6 +557,8 @@  DEFINE_MTHP_STAT_ATTR(anon_alloc_fallback, MTHP_STAT_ANON_ALLOC_FALLBACK);
 DEFINE_MTHP_STAT_ATTR(anon_swpout, MTHP_STAT_ANON_SWPOUT);
 DEFINE_MTHP_STAT_ATTR(anon_swpout_fallback, MTHP_STAT_ANON_SWPOUT_FALLBACK);
 DEFINE_MTHP_STAT_ATTR(anon_swpin_refault, MTHP_STAT_ANON_SWPIN_REFAULT);
+DEFINE_MTHP_STAT_ATTR(shmem_anon_alloc, MTHP_STAT_SHMEM_ANON_ALLOC);
+DEFINE_MTHP_STAT_ATTR(shmem_anon_alloc_fallback, MTHP_STAT_SHMEM_ANON_ALLOC_FALLBACK);
 
 static struct attribute *stats_attrs[] = {
 	&anon_alloc_attr.attr,
@@ -564,6 +566,8 @@  static struct attribute *stats_attrs[] = {
 	&anon_swpout_attr.attr,
 	&anon_swpout_fallback_attr.attr,
 	&anon_swpin_refault_attr.attr,
+	&shmem_anon_alloc_attr.attr,
+	&shmem_anon_alloc_fallback_attr.attr,
 	NULL,
 };
 
diff --git a/mm/shmem.c b/mm/shmem.c
index 8b009e7040b2..4a0aa75ab29c 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1706,11 +1706,14 @@  static struct folio *shmem_alloc_and_add_folio(struct vm_fault *vmf,
 			pages = 1 << order;
 			index = round_down(index, pages);
 			folio = shmem_alloc_hugefolio(gfp, info, index, order);
-			if (folio)
+			if (folio) {
+				count_mthp_stat(order, MTHP_STAT_SHMEM_ANON_ALLOC);
 				goto allocated;
+			}
 
 			if (pages == HPAGE_PMD_NR)
 				count_vm_event(THP_FILE_FALLBACK);
+			count_mthp_stat(order, MTHP_STAT_SHMEM_ANON_ALLOC_FALLBACK);
 			order = next_order(&orders, order);
 		}
 	} else {