diff mbox

arm64: calculate the various pages number to show

Message ID 1448458872-39897-1-git-send-email-zhongjiang@huawei.com (mailing list archive)
State New, archived
Headers show

Commit Message

zhong jiang Nov. 25, 2015, 1:41 p.m. UTC
This patch add the interface to show the number of 4KB or 64KB page,
aims to statistics the number of different types of pages.

Signed-off-by: zhongjiang <zhongjiang@huawei.com>
---
 arch/arm64/include/asm/pgtable-types.h |   24 ++++++++++++++++++++++++
 arch/arm64/mm/mmu.c                    |   28 ++++++++++++++++++++++++++++
 arch/arm64/mm/pageattr.c               |   31 +++++++++++++++++++++++++++++++
 3 files changed, 83 insertions(+), 0 deletions(-)

Comments

Mark Rutland Nov. 25, 2015, 3:04 p.m. UTC | #1
On Wed, Nov 25, 2015 at 09:41:12PM +0800, zhongjiang wrote:
> This patch add the interface to show the number of 4KB or 64KB page,
> aims to statistics the number of different types of pages.

What is this useful for? Why do we want it?

What does it account for, just the swapper?

> Signed-off-by: zhongjiang <zhongjiang@huawei.com>
> ---
>  arch/arm64/include/asm/pgtable-types.h |   24 ++++++++++++++++++++++++
>  arch/arm64/mm/mmu.c                    |   28 ++++++++++++++++++++++++++++
>  arch/arm64/mm/pageattr.c               |   31 +++++++++++++++++++++++++++++++
>  3 files changed, 83 insertions(+), 0 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/pgtable-types.h b/arch/arm64/include/asm/pgtable-types.h
> index 2b1bd7e..aa52546 100644
> --- a/arch/arm64/include/asm/pgtable-types.h
> +++ b/arch/arm64/include/asm/pgtable-types.h
> @@ -86,6 +86,30 @@ typedef pteval_t pgprot_t;
>  
>  #endif /* STRICT_MM_TYPECHECKS */
>  
> +struct seq_file;
> +extern void arch_report_meminfo(struct seq_file *m);
> +
> +enum pg_level {
> +	PG_LEVEL_NONE,
> +#ifdef CONFIG_ARM64_4K_PAGES
> +	PG_LEVEL_4K,
> +	PG_LEVEL_2M,
> +	PG_LEVEL_1G,
> +#else
> +	PG_LEVEL_64K,
> +	PG_LEVEL_512M,
> +#endif
> +	PG_LEVEL_NUM
> +};

This doesn't account for 16K pages, and it means each call site has to
handle the various page sizes directly.

It would be better to simply count PTE/PMD/PUD/PGD, then handle the size
conversion at the end when logging.

> @@ -85,6 +86,11 @@ void split_pmd(pmd_t *pmd, pte_t *pte)
>  		set_pte(pte, pfn_pte(pfn, prot));
>  		pfn++;
>  	} while (pte++, i++, i < PTRS_PER_PTE);
> +#ifdef CONFIG_ARM64_4K_PAGES
> +	split_page_count(PG_LEVEL_2M);
> +#else
> +	split_page_count(PG_LEVEL_512M);
> +#endif
>  }

e.g. here you'd just count PG_LEVEL_PMD, which would work regardless of
page size.

> diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
> index 7a5ff11..c1888b9 100644
> --- a/arch/arm64/mm/pageattr.c
> +++ b/arch/arm64/mm/pageattr.c
> @@ -15,12 +15,43 @@
>  #include <linux/module.h>
>  #include <linux/sched.h>
>  
> +#include <linux/seq_file.h>
>  #include <asm/pgalloc.h>
>  #include <asm/pgtable.h>
>  #include <asm/tlbflush.h>
>  
>  #include "mm.h"
>  
> +static unsigned long direct_pages_count[PG_LEVEL_NUM];

This doesn't match reality by the time we start executing the kernel,
given we created page tables in head.S.

> +
> +void update_page_count(int level, unsigned long pages)
> +{
> +	direct_pages_count[level] += pages;
> +}
> +
> +void split_page_count(int level)
> +{
> +	direct_pages_count[level]--;
> +	direct_pages_count[level-1] += PTRS_PER_PTE;
> +}
> +
> +void arch_report_meminfo(struct seq_file *m)
> +{
> +#ifdef CONFIG_ARM64_4K_PAGES
> +	seq_printf(m, "DirectMap4k:     %8lu kB\n",
> +			direct_pages_count[PG_LEVEL_4K] << 2);
> +	seq_printf(m, "DirectMap2M:     %8lu kB\n",
> +			direct_pages_count[PG_LEVEL_2M] << 11);
> +	seq_printf(m, "DirectMap1G:     %8lu kB\n",
> +			direct_pages_count[PG_LEVEL_1G] << 20);
> +#else
> +	seq_printf(m, "DirectMap64k:     %8lu kB\n",
> +			direct_pages_count[PG_LEVEL_64K] << 6);
> +	seq_printf(m, "DirectMap512M:     %8lu kB\n",
> +			direct_pages_count[PG_LEVEL_512M] << 19);
> +#endif
> +}

You could dynamuically determine the sizes here for each field, and not
have to have #ifdefs.

That all said, I don't see what this is useful for, and it looks very
fragile.

Thanks,
Mark.
zhong jiang Nov. 26, 2015, 3:05 p.m. UTC | #2
On 2015/11/25 23:04, Mark Rutland wrote:
> On Wed, Nov 25, 2015 at 09:41:12PM +0800, zhongjiang wrote:
>> This patch add the interface to show the number of 4KB or 64KB page,
>> aims to statistics the number of different types of pages.
> 
> What is this useful for? Why do we want it?
> 
> What does it account for, just the swapper?
> 

The patch is wirtten when I was in backport set_memory_ro. It can be used to
detect whether there is a large page spliting and merging. large page will
significantly reduce the TLB miss, and improve the system performance.

>> Signed-off-by: zhongjiang <zhongjiang@huawei.com>
>> ---
>>  arch/arm64/include/asm/pgtable-types.h |   24 ++++++++++++++++++++++++
>>  arch/arm64/mm/mmu.c                    |   28 ++++++++++++++++++++++++++++
>>  arch/arm64/mm/pageattr.c               |   31 +++++++++++++++++++++++++++++++
>>  3 files changed, 83 insertions(+), 0 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/pgtable-types.h b/arch/arm64/include/asm/pgtable-types.h
>> index 2b1bd7e..aa52546 100644
>> --- a/arch/arm64/include/asm/pgtable-types.h
>> +++ b/arch/arm64/include/asm/pgtable-types.h
>> @@ -86,6 +86,30 @@ typedef pteval_t pgprot_t;
>>  
>>  #endif /* STRICT_MM_TYPECHECKS */
>>  
>> +struct seq_file;
>> +extern void arch_report_meminfo(struct seq_file *m);
>> +
>> +enum pg_level {
>> +	PG_LEVEL_NONE,
>> +#ifdef CONFIG_ARM64_4K_PAGES
>> +	PG_LEVEL_4K,
>> +	PG_LEVEL_2M,
>> +	PG_LEVEL_1G,
>> +#else
>> +	PG_LEVEL_64K,
>> +	PG_LEVEL_512M,
>> +#endif
>> +	PG_LEVEL_NUM
>> +};
> 
> This doesn't account for 16K pages, and it means each call site has to
> handle the various page sizes directly.
> 
> It would be better to simply count PTE/PMD/PUD/PGD, then handle the size
> conversion at the end when logging.
> 

yes, now I only consider the 4kb and 64kb. if the patch is approved ,I will
improve it.
each call site need two different varialbes to statistics, aiming to distinguish
diffent pages. I think it will no more simple.


>> @@ -85,6 +86,11 @@ void split_pmd(pmd_t *pmd, pte_t *pte)
>>  		set_pte(pte, pfn_pte(pfn, prot));
>>  		pfn++;
>>  	} while (pte++, i++, i < PTRS_PER_PTE);
>> +#ifdef CONFIG_ARM64_4K_PAGES
>> +	split_page_count(PG_LEVEL_2M);
>> +#else
>> +	split_page_count(PG_LEVEL_512M);
>> +#endif
>>  }
> 
> e.g. here you'd just count PG_LEVEL_PMD, which would work regardless of
> page size.
> 
>> diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
>> index 7a5ff11..c1888b9 100644
>> --- a/arch/arm64/mm/pageattr.c
>> +++ b/arch/arm64/mm/pageattr.c
>> @@ -15,12 +15,43 @@
>>  #include <linux/module.h>
>>  #include <linux/sched.h>
>>  
>> +#include <linux/seq_file.h>
>>  #include <asm/pgalloc.h>
>>  #include <asm/pgtable.h>
>>  #include <asm/tlbflush.h>
>>  
>>  #include "mm.h"
>>  
>> +static unsigned long direct_pages_count[PG_LEVEL_NUM];
> 
> This doesn't match reality by the time we start executing the kernel,
> given we created page tables in head.S.
> 
>> +
>> +void update_page_count(int level, unsigned long pages)
>> +{
>> +	direct_pages_count[level] += pages;
>> +}
>> +
>> +void split_page_count(int level)
>> +{
>> +	direct_pages_count[level]--;
>> +	direct_pages_count[level-1] += PTRS_PER_PTE;
>> +}
>> +
>> +void arch_report_meminfo(struct seq_file *m)
>> +{
>> +#ifdef CONFIG_ARM64_4K_PAGES
>> +	seq_printf(m, "DirectMap4k:     %8lu kB\n",
>> +			direct_pages_count[PG_LEVEL_4K] << 2);
>> +	seq_printf(m, "DirectMap2M:     %8lu kB\n",
>> +			direct_pages_count[PG_LEVEL_2M] << 11);
>> +	seq_printf(m, "DirectMap1G:     %8lu kB\n",
>> +			direct_pages_count[PG_LEVEL_1G] << 20);
>> +#else
>> +	seq_printf(m, "DirectMap64k:     %8lu kB\n",
>> +			direct_pages_count[PG_LEVEL_64K] << 6);
>> +	seq_printf(m, "DirectMap512M:     %8lu kB\n",
>> +			direct_pages_count[PG_LEVEL_512M] << 19);
>> +#endif
>> +}
> 
> You could dynamuically determine the sizes here for each field, and not
> have to have #ifdefs.> 

I don't understand what you mean. I think it can be more readable and operability.

Thanks
zhongjiang

> That all said, I don't see what this is useful for, and it looks very
> fragile.
> 
> Thanks,
> Mark.
> 
> .
>
Mark Rutland Nov. 26, 2015, 3:49 p.m. UTC | #3
On Thu, Nov 26, 2015 at 11:05:32PM +0800, zhong jiang wrote:
> On 2015/11/25 23:04, Mark Rutland wrote:
> > On Wed, Nov 25, 2015 at 09:41:12PM +0800, zhongjiang wrote:
> >> This patch add the interface to show the number of 4KB or 64KB page,
> >> aims to statistics the number of different types of pages.
> > 
> > What is this useful for? Why do we want it?
> > 
> > What does it account for, just the swapper?
> > 
> 
> The patch is wirtten when I was in backport set_memory_ro. It can be used to
> detect whether there is a large page spliting and merging. large page will
> significantly reduce the TLB miss, and improve the system performance.

Ok, but typically the user isn't going to be able to do much with this
information. It feels more like something that should be in the page
table dump code (where we can calculate the values as we walk the
tables).

What is it intended to account for?

The entire swapper?

Just the linear mapping?

> >> Signed-off-by: zhongjiang <zhongjiang@huawei.com>
> >> ---
> >>  arch/arm64/include/asm/pgtable-types.h |   24 ++++++++++++++++++++++++
> >>  arch/arm64/mm/mmu.c                    |   28 ++++++++++++++++++++++++++++
> >>  arch/arm64/mm/pageattr.c               |   31 +++++++++++++++++++++++++++++++
> >>  3 files changed, 83 insertions(+), 0 deletions(-)
> >>
> >> diff --git a/arch/arm64/include/asm/pgtable-types.h b/arch/arm64/include/asm/pgtable-types.h
> >> index 2b1bd7e..aa52546 100644
> >> --- a/arch/arm64/include/asm/pgtable-types.h
> >> +++ b/arch/arm64/include/asm/pgtable-types.h
> >> @@ -86,6 +86,30 @@ typedef pteval_t pgprot_t;
> >>  
> >>  #endif /* STRICT_MM_TYPECHECKS */
> >>  
> >> +struct seq_file;
> >> +extern void arch_report_meminfo(struct seq_file *m);
> >> +
> >> +enum pg_level {
> >> +	PG_LEVEL_NONE,
> >> +#ifdef CONFIG_ARM64_4K_PAGES
> >> +	PG_LEVEL_4K,
> >> +	PG_LEVEL_2M,
> >> +	PG_LEVEL_1G,
> >> +#else
> >> +	PG_LEVEL_64K,
> >> +	PG_LEVEL_512M,
> >> +#endif
> >> +	PG_LEVEL_NUM
> >> +};
> > 
> > This doesn't account for 16K pages, and it means each call site has to
> > handle the various page sizes directly.
> > 
> > It would be better to simply count PTE/PMD/PUD/PGD, then handle the size
> > conversion at the end when logging.
> > 
> 
> yes, now I only consider the 4kb and 64kb. if the patch is approved ,I will
> improve it.
> each call site need two different varialbes to statistics, aiming to distinguish
> diffent pages. I think it will no more simple.

I don't follow.

Rather than having:

	#if defined(CONFIG_ARM64_4K_PAGES)
		update_page_count(PG_LEVEL_4K, i);
	#else if defined(CONFIG_ARM64_16K_PAGES)
		update_page_count(PG_LEVEL_16K, i);
	#else if defined(CONFIG_ARM64_64K_PAGES)
		update_page_count(PG_LEVEL_64K, i);
	#else
	#error PAGE SIZE UNKNOWN
	#endif

You'd have:

	update_page_count(PG_LEVEL_PTE, i)

The latter is clearly simpler.

See the end of this email for what the other end would look like.

> >> diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
> >> index 7a5ff11..c1888b9 100644
> >> --- a/arch/arm64/mm/pageattr.c
> >> +++ b/arch/arm64/mm/pageattr.c
> >> @@ -15,12 +15,43 @@
> >>  #include <linux/module.h>
> >>  #include <linux/sched.h>
> >>  
> >> +#include <linux/seq_file.h>
> >>  #include <asm/pgalloc.h>
> >>  #include <asm/pgtable.h>
> >>  #include <asm/tlbflush.h>
> >>  
> >>  #include "mm.h"
> >>  
> >> +static unsigned long direct_pages_count[PG_LEVEL_NUM];
> > 
> > This doesn't match reality by the time we start executing the kernel,
> > given we created page tables in head.S.

As I mentioned here, I don't think that the account is correct, but it
depends on what you're trying to account for.

> >> +void update_page_count(int level, unsigned long pages)
> >> +{
> >> +	direct_pages_count[level] += pages;
> >> +}
> >> +
> >> +void split_page_count(int level)
> >> +{
> >> +	direct_pages_count[level]--;
> >> +	direct_pages_count[level-1] += PTRS_PER_PTE;
> >> +}
> >> +
> >> +void arch_report_meminfo(struct seq_file *m)
> >> +{
> >> +#ifdef CONFIG_ARM64_4K_PAGES
> >> +	seq_printf(m, "DirectMap4k:     %8lu kB\n",
> >> +			direct_pages_count[PG_LEVEL_4K] << 2);
> >> +	seq_printf(m, "DirectMap2M:     %8lu kB\n",
> >> +			direct_pages_count[PG_LEVEL_2M] << 11);
> >> +	seq_printf(m, "DirectMap1G:     %8lu kB\n",
> >> +			direct_pages_count[PG_LEVEL_1G] << 20);
> >> +#else
> >> +	seq_printf(m, "DirectMap64k:     %8lu kB\n",
> >> +			direct_pages_count[PG_LEVEL_64K] << 6);
> >> +	seq_printf(m, "DirectMap512M:     %8lu kB\n",
> >> +			direct_pages_count[PG_LEVEL_512M] << 19);
> >> +#endif
> >> +}
> > 
> > You could dynamuically determine the sizes here for each field, and not
> > have to have #ifdefs.> 
> 
> I don't understand what you mean. I think it can be more readable and operability.

Assuming you use PGLEVEL_{PTE,PMD,PUD,PGD}, you can have this work for
any size of page and number of levels using something like:

void arch_report_meminfo(struct seq_file *m)
{
	seq_printf(m, "DirectMap%dk:     %8lu kB\n",
		   PAGE_SIZE / SZ_1K,
		   direct_pages_count[PG_LEVEL_PTE] * PAGE_SIZE / SZ_1K);

#if CONFIG_PGTABLE_LEVELS > 2
	seq_printf(m, "DirectMap%dM:     %8lu kB\n",
		   PMD_SIZE / SZ_1M,
		   direct_pages_count[PG_LEVEL_PMD] * PUD_SIZE / SZ_1K);

#endif

#if CONFIG_PGTABLE_LEVELS > 3
	seq_printf(m, "DirectMap%dG:     %8lu kB\n",
		   PUD_SIZE / SZ_1G,
		   direct_pages_count[PG_LEVEL_PUD] * PUD_SIZE / SZ_1K);

#endif
}

I think that's far more readable and maintainable.

The above may not cover all cases; I'm not sure if you can have a huge
PGD entry in some configuration. If we can, it should be easy to fix up
for.

Thanks,
Mark.
Xishi Qiu Nov. 27, 2015, 1:52 a.m. UTC | #4
On 2015/11/26 23:49, Mark Rutland wrote:

> On Thu, Nov 26, 2015 at 11:05:32PM +0800, zhong jiang wrote:
>> On 2015/11/25 23:04, Mark Rutland wrote:
>>> On Wed, Nov 25, 2015 at 09:41:12PM +0800, zhongjiang wrote:
>>>> This patch add the interface to show the number of 4KB or 64KB page,
>>>> aims to statistics the number of different types of pages.
>>>
>>> What is this useful for? Why do we want it?
>>>
>>> What does it account for, just the swapper?
>>>
>>
>> The patch is wirtten when I was in backport set_memory_ro. It can be used to
>> detect whether there is a large page spliting and merging. large page will
>> significantly reduce the TLB miss, and improve the system performance.
> 
> Ok, but typically the user isn't going to be able to do much with this
> information. It feels more like something that should be in the page
> table dump code (where we can calculate the values as we walk the
> tables).
> 
> What is it intended to account for?
> 
> The entire swapper?
> 
> Just the linear mapping?

Hi Mark,

x86 has this information when cat /proc/meminfo, so how about just
like x86 to show it?

Thanks,
Xishi Qiu
zhong jiang Nov. 27, 2015, 8:40 a.m. UTC | #5
On 2015/11/26 23:49, Mark Rutland wrote:
> On Thu, Nov 26, 2015 at 11:05:32PM +0800, zhong jiang wrote:
>> On 2015/11/25 23:04, Mark Rutland wrote:
>>> On Wed, Nov 25, 2015 at 09:41:12PM +0800, zhongjiang wrote:
>>>> This patch add the interface to show the number of 4KB or 64KB page,
>>>> aims to statistics the number of different types of pages.
>>>
>>> What is this useful for? Why do we want it?
>>>
>>> What does it account for, just the swapper?
>>>
>>
>> The patch is wirtten when I was in backport set_memory_ro. It can be used to
>> detect whether there is a large page spliting and merging. large page will
>> significantly reduce the TLB miss, and improve the system performance.
> 
> Ok, but typically the user isn't going to be able to do much with this
> information. It feels more like something that should be in the page
> table dump code (where we can calculate the values as we walk the
> tables).
> 
> What is it intended to account for?
> 
> The entire swapper?
> 
> Just the linear mapping?

yes, It is used only to the direct mapping, calculating the number of
various pages just like the x86.
> 
>>>> Signed-off-by: zhongjiang <zhongjiang@huawei.com>
>>>> ---
>>>>  arch/arm64/include/asm/pgtable-types.h |   24 ++++++++++++++++++++++++
>>>>  arch/arm64/mm/mmu.c                    |   28 ++++++++++++++++++++++++++++
>>>>  arch/arm64/mm/pageattr.c               |   31 +++++++++++++++++++++++++++++++
>>>>  3 files changed, 83 insertions(+), 0 deletions(-)
>>>>
>>>> diff --git a/arch/arm64/include/asm/pgtable-types.h b/arch/arm64/include/asm/pgtable-types.h
>>>> index 2b1bd7e..aa52546 100644
>>>> --- a/arch/arm64/include/asm/pgtable-types.h
>>>> +++ b/arch/arm64/include/asm/pgtable-types.h
>>>> @@ -86,6 +86,30 @@ typedef pteval_t pgprot_t;
>>>>  
>>>>  #endif /* STRICT_MM_TYPECHECKS */
>>>>  
>>>> +struct seq_file;
>>>> +extern void arch_report_meminfo(struct seq_file *m);
>>>> +
>>>> +enum pg_level {
>>>> +	PG_LEVEL_NONE,
>>>> +#ifdef CONFIG_ARM64_4K_PAGES
>>>> +	PG_LEVEL_4K,
>>>> +	PG_LEVEL_2M,
>>>> +	PG_LEVEL_1G,
>>>> +#else
>>>> +	PG_LEVEL_64K,
>>>> +	PG_LEVEL_512M,
>>>> +#endif
>>>> +	PG_LEVEL_NUM
>>>> +};
>>>
>>> This doesn't account for 16K pages, and it means each call site has to
>>> handle the various page sizes directly.
>>>
>>> It would be better to simply count PTE/PMD/PUD/PGD, then handle the size
>>> conversion at the end when logging.
>>>
>>
>> yes, now I only consider the 4kb and 64kb. if the patch is approved ,I will
>> improve it.
>> each call site need two different varialbes to statistics, aiming to distinguish
>> diffent pages. I think it will no more simple.
> 
> I don't follow.
> 
> Rather than having:
> 
> 	#if defined(CONFIG_ARM64_4K_PAGES)
> 		update_page_count(PG_LEVEL_4K, i);
> 	#else if defined(CONFIG_ARM64_16K_PAGES)
> 		update_page_count(PG_LEVEL_16K, i);
> 	#else if defined(CONFIG_ARM64_64K_PAGES)
> 		update_page_count(PG_LEVEL_64K, i);
> 	#else
> 	#error PAGE SIZE UNKNOWN
> 	#endif
> 
> You'd have:
> 
> 	update_page_count(PG_LEVEL_PTE, i)
> 
> The latter is clearly simpler.
> 
> See the end of this email for what the other end would look like.
> 
>>>> diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
>>>> index 7a5ff11..c1888b9 100644
>>>> --- a/arch/arm64/mm/pageattr.c
>>>> +++ b/arch/arm64/mm/pageattr.c
>>>> @@ -15,12 +15,43 @@
>>>>  #include <linux/module.h>
>>>>  #include <linux/sched.h>
>>>>  
>>>> +#include <linux/seq_file.h>
>>>>  #include <asm/pgalloc.h>
>>>>  #include <asm/pgtable.h>
>>>>  #include <asm/tlbflush.h>
>>>>  
>>>>  #include "mm.h"
>>>>  
>>>> +static unsigned long direct_pages_count[PG_LEVEL_NUM];
>>>
>>> This doesn't match reality by the time we start executing the kernel,
>>> given we created page tables in head.S.
> 
> As I mentioned here, I don't think that the account is correct, but it
> depends on what you're trying to account for.
> 
>>>> +void update_page_count(int level, unsigned long pages)
>>>> +{
>>>> +	direct_pages_count[level] += pages;
>>>> +}
>>>> +
>>>> +void split_page_count(int level)
>>>> +{
>>>> +	direct_pages_count[level]--;
>>>> +	direct_pages_count[level-1] += PTRS_PER_PTE;
>>>> +}
>>>> +
>>>> +void arch_report_meminfo(struct seq_file *m)
>>>> +{
>>>> +#ifdef CONFIG_ARM64_4K_PAGES
>>>> +	seq_printf(m, "DirectMap4k:     %8lu kB\n",
>>>> +			direct_pages_count[PG_LEVEL_4K] << 2);
>>>> +	seq_printf(m, "DirectMap2M:     %8lu kB\n",
>>>> +			direct_pages_count[PG_LEVEL_2M] << 11);
>>>> +	seq_printf(m, "DirectMap1G:     %8lu kB\n",
>>>> +			direct_pages_count[PG_LEVEL_1G] << 20);
>>>> +#else
>>>> +	seq_printf(m, "DirectMap64k:     %8lu kB\n",
>>>> +			direct_pages_count[PG_LEVEL_64K] << 6);
>>>> +	seq_printf(m, "DirectMap512M:     %8lu kB\n",
>>>> +			direct_pages_count[PG_LEVEL_512M] << 19);
>>>> +#endif
>>>> +}
>>>
>>> You could dynamuically determine the sizes here for each field, and not
>>> have to have #ifdefs.> 
>>
>> I don't understand what you mean. I think it can be more readable and operability.
> 
> Assuming you use PGLEVEL_{PTE,PMD,PUD,PGD}, you can have this work for
> any size of page and number of levels using something like:
> 
> void arch_report_meminfo(struct seq_file *m)
> {
> 	seq_printf(m, "DirectMap%dk:     %8lu kB\n",
> 		   PAGE_SIZE / SZ_1K,
> 		   direct_pages_count[PG_LEVEL_PTE] * PAGE_SIZE / SZ_1K);
> 
> #if CONFIG_PGTABLE_LEVELS > 2
> 	seq_printf(m, "DirectMap%dM:     %8lu kB\n",
> 		   PMD_SIZE / SZ_1M,
> 		   direct_pages_count[PG_LEVEL_PMD] * PUD_SIZE / SZ_1K);
> 
> #endif
> 
> #if CONFIG_PGTABLE_LEVELS > 3
> 	seq_printf(m, "DirectMap%dG:     %8lu kB\n",
> 		   PUD_SIZE / SZ_1G,
> 		   direct_pages_count[PG_LEVEL_PUD] * PUD_SIZE / SZ_1K);
> 
> #endif
> }
> 
> I think that's far more readable and maintainable.
> 
> The above may not cover all cases; I'm not sure if you can have a huge
> PGD entry in some configuration. If we can, it should be easy to fix up
> for.
> 
> Thanks,
> Mark.
> 
>

OK, It seems to be better . then, I will improve it.
Thank you for your advice.

Thanks
zhongjiang
Mark Rutland Dec. 3, 2015, 6:22 p.m. UTC | #6
On Fri, Nov 27, 2015 at 09:52:16AM +0800, Xishi Qiu wrote:
> On 2015/11/26 23:49, Mark Rutland wrote:
> 
> > On Thu, Nov 26, 2015 at 11:05:32PM +0800, zhong jiang wrote:
> >> On 2015/11/25 23:04, Mark Rutland wrote:
> >>> On Wed, Nov 25, 2015 at 09:41:12PM +0800, zhongjiang wrote:
> >>>> This patch add the interface to show the number of 4KB or 64KB page,
> >>>> aims to statistics the number of different types of pages.
> >>>
> >>> What is this useful for? Why do we want it?
> >>>
> >>> What does it account for, just the swapper?
> >>>
> >>
> >> The patch is wirtten when I was in backport set_memory_ro. It can be used to
> >> detect whether there is a large page spliting and merging. large page will
> >> significantly reduce the TLB miss, and improve the system performance.
> > 
> > Ok, but typically the user isn't going to be able to do much with this
> > information. It feels more like something that should be in the page
> > table dump code (where we can calculate the values as we walk the
> > tables).
> > 
> > What is it intended to account for?
> > 
> > The entire swapper?
> > 
> > Just the linear mapping?
> 
> Hi Mark,
> 
> x86 has this information when cat /proc/meminfo, so how about just
> like x86 to show it?

The fact that another architecture has some implementation doesn't
necessarily mean it's a good idea. In this case there are concerns that
don't apply to x86, in that we support a number of page sizes, and
anything reading this needs to handle that fact.

If there's a sensible use-case, then I am not opposed to this. I don't
see the point in adding it just because we can.

A prerequisite for adding it is knowing precisely what it is intended to
describe. Otherwise it's impossible to review.

Thanks,
Mark.
diff mbox

Patch

diff --git a/arch/arm64/include/asm/pgtable-types.h b/arch/arm64/include/asm/pgtable-types.h
index 2b1bd7e..aa52546 100644
--- a/arch/arm64/include/asm/pgtable-types.h
+++ b/arch/arm64/include/asm/pgtable-types.h
@@ -86,6 +86,30 @@  typedef pteval_t pgprot_t;
 
 #endif /* STRICT_MM_TYPECHECKS */
 
+struct seq_file;
+extern void arch_report_meminfo(struct seq_file *m);
+
+enum pg_level {
+	PG_LEVEL_NONE,
+#ifdef CONFIG_ARM64_4K_PAGES
+	PG_LEVEL_4K,
+	PG_LEVEL_2M,
+	PG_LEVEL_1G,
+#else
+	PG_LEVEL_64K,
+	PG_LEVEL_512M,
+#endif
+	PG_LEVEL_NUM
+};
+
+#ifdef CONFIG_PROC_FS
+extern void update_page_count(int level, unsigned long pages);
+extern void split_page_count(int level);
+#else
+static inline void update_page_count(int level, unsigned long pages) {}
+static inline void split_page_count(int level) {}
+#endif
+
 #if CONFIG_PGTABLE_LEVELS == 2
 #include <asm-generic/pgtable-nopmd.h>
 #elif CONFIG_PGTABLE_LEVELS == 3
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 0a7bee7..f9772d0 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -30,6 +30,7 @@ 
 #include <linux/stop_machine.h>
 #include <linux/bootmem.h>
 
+#include <asm/pgtable-types.h>
 #include <asm/cputype.h>
 #include <asm/fixmap.h>
 #include <asm/sections.h>
@@ -85,6 +86,11 @@  void split_pmd(pmd_t *pmd, pte_t *pte)
 		set_pte(pte, pfn_pte(pfn, prot));
 		pfn++;
 	} while (pte++, i++, i < PTRS_PER_PTE);
+#ifdef CONFIG_ARM64_4K_PAGES
+	split_page_count(PG_LEVEL_2M);
+#else
+	split_page_count(PG_LEVEL_512M);
+#endif
 }
 
 static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
@@ -93,6 +99,7 @@  static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
 				  void *(*alloc)(unsigned long size))
 {
 	pte_t *pte;
+	unsigned long i = 0;
 
 	if (pmd_none(*pmd) || pmd_sect(*pmd)) {
 		pte = alloc(PTRS_PER_PTE * sizeof(pte_t));
@@ -107,7 +114,13 @@  static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
 	do {
 		set_pte(pte, pfn_pte(pfn, prot));
 		pfn++;
+		i++;
 	} while (pte++, addr += PAGE_SIZE, addr != end);
+#ifdef CONFIG_ARM64_4K_PAGES
+	update_page_count(PG_LEVEL_4K, i);
+#else
+	update_page_count(PG_LEVEL_64K, i);
+#endif
 }
 
 void split_pud(pud_t *old_pud, pmd_t *pmd)
@@ -120,6 +133,9 @@  void split_pud(pud_t *old_pud, pmd_t *pmd)
 		set_pmd(pmd, __pmd(addr | prot));
 		addr += PMD_SIZE;
 	} while (pmd++, i++, i < PTRS_PER_PMD);
+#ifdef CONFIG_ARM64_4K_PAGES
+	split_page_count(PG_LEVEL_1G);
+#endif
 }
 
 static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud,
@@ -129,6 +145,7 @@  static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud,
 {
 	pmd_t *pmd;
 	unsigned long next;
+	unsigned long i = 0;
 
 	/*
 	 * Check for initial section mappings in the pgd/pud and remove them.
@@ -159,6 +176,7 @@  static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud,
 			 * Check for previous table entries created during
 			 * boot (__create_page_tables) and flush them.
 			 */
+			i++;
 			if (!pmd_none(old_pmd)) {
 				flush_tlb_all();
 				if (pmd_table(old_pmd)) {
@@ -173,6 +191,11 @@  static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud,
 		}
 		phys += next - addr;
 	} while (pmd++, addr = next, addr != end);
+#ifdef CONFIG_ARM64_4K_PAGES
+	update_page_count(PG_LEVEL_2M, i);
+#else
+	update_page_count(PG_LEVEL_512M, i);
+#endif
 }
 
 static inline bool use_1G_block(unsigned long addr, unsigned long next,
@@ -194,6 +217,7 @@  static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd,
 {
 	pud_t *pud;
 	unsigned long next;
+	unsigned long i = 0;
 
 	if (pgd_none(*pgd)) {
 		pud = alloc(PTRS_PER_PUD * sizeof(pud_t));
@@ -220,6 +244,7 @@  static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd,
 			 *
 			 * Look up the old pmd table and free it.
 			 */
+			i++;
 			if (!pud_none(old_pud)) {
 				flush_tlb_all();
 				if (pud_table(old_pud)) {
@@ -233,6 +258,9 @@  static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd,
 		}
 		phys += next - addr;
 	} while (pud++, addr = next, addr != end);
+#ifdef CONFIG_ARM64_4K_PAGES
+	update_page_count(PG_LEVEL_1G, i);
+#endif
 }
 
 /*
diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index 7a5ff11..c1888b9 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -15,12 +15,43 @@ 
 #include <linux/module.h>
 #include <linux/sched.h>
 
+#include <linux/seq_file.h>
 #include <asm/pgalloc.h>
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 
 #include "mm.h"
 
+static unsigned long direct_pages_count[PG_LEVEL_NUM];
+
+void update_page_count(int level, unsigned long pages)
+{
+	direct_pages_count[level] += pages;
+}
+
+void split_page_count(int level)
+{
+	direct_pages_count[level]--;
+	direct_pages_count[level-1] += PTRS_PER_PTE;
+}
+
+void arch_report_meminfo(struct seq_file *m)
+{
+#ifdef CONFIG_ARM64_4K_PAGES
+	seq_printf(m, "DirectMap4k:     %8lu kB\n",
+			direct_pages_count[PG_LEVEL_4K] << 2);
+	seq_printf(m, "DirectMap2M:     %8lu kB\n",
+			direct_pages_count[PG_LEVEL_2M] << 11);
+	seq_printf(m, "DirectMap1G:     %8lu kB\n",
+			direct_pages_count[PG_LEVEL_1G] << 20);
+#else
+	seq_printf(m, "DirectMap64k:     %8lu kB\n",
+			direct_pages_count[PG_LEVEL_64K] << 6);
+	seq_printf(m, "DirectMap512M:     %8lu kB\n",
+			direct_pages_count[PG_LEVEL_512M] << 19);
+#endif
+}
+
 static int update_pte_range(struct mm_struct *mm, pmd_t *pmd,
 				unsigned long addr, unsigned long end,
 				pgprot_t clear, pgprot_t set)