diff mbox

[v2,3/5] mm/page_alloc: Optimize free_area_init_core

Message ID 20180719132740.32743-4-osalvador@techadventures.net (mailing list archive)
State New, archived
Headers show

Commit Message

Oscar Salvador July 19, 2018, 1:27 p.m. UTC
From: Oscar Salvador <osalvador@suse.de>

In free_area_init_core we calculate the amount of managed pages
we are left with, by substracting the memmap pages and the pages
reserved for dma.
With the values left, we also account the total of kernel pages and
the total of pages.

Since memmap pages are calculated from zone->spanned_pages,
let us only do these calculcations whenever zone->spanned_pages is greather
than 0.

Signed-off-by: Oscar Salvador <osalvador@suse.de>
---
 mm/page_alloc.c | 73 ++++++++++++++++++++++++++++++---------------------------
 1 file changed, 38 insertions(+), 35 deletions(-)

Comments

Michal Hocko July 19, 2018, 1:44 p.m. UTC | #1
On Thu 19-07-18 15:27:38, osalvador@techadventures.net wrote:
> From: Oscar Salvador <osalvador@suse.de>
> 
> In free_area_init_core we calculate the amount of managed pages
> we are left with, by substracting the memmap pages and the pages
> reserved for dma.
> With the values left, we also account the total of kernel pages and
> the total of pages.
> 
> Since memmap pages are calculated from zone->spanned_pages,
> let us only do these calculcations whenever zone->spanned_pages is greather
> than 0.

But why do we care? How do we test this? In other words, why is this
worth merging?

> 
> Signed-off-by: Oscar Salvador <osalvador@suse.de>
> ---
>  mm/page_alloc.c | 73 ++++++++++++++++++++++++++++++---------------------------
>  1 file changed, 38 insertions(+), 35 deletions(-)
> 
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 10b754fba5fa..f7a6f4e13f41 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -6237,6 +6237,40 @@ static void pgdat_init_kcompactd(struct pglist_data *pgdat)
>  static void pgdat_init_kcompactd(struct pglist_data *pgdat) {}
>  #endif
>  
> +static unsigned long calc_remaining_pages(enum zone_type type, unsigned long freesize,
> +								unsigned long size)
> +{
> +	unsigned long memmap_pages = calc_memmap_size(size, freesize);
> +
> +	if(!is_highmem_idx(type)) {
> +		if (freesize >= memmap_pages) {
> +			freesize -= memmap_pages;
> +			if (memmap_pages)
> +				printk(KERN_DEBUG
> +					"  %s zone: %lu pages used for memmap\n",
> +					zone_names[type], memmap_pages);
> +		} else
> +			pr_warn("  %s zone: %lu pages exceeds freesize %lu\n",
> +				zone_names[type], memmap_pages, freesize);
> +	}
> +
> +	/* Account for reserved pages */
> +	if (type == 0 && freesize > dma_reserve) {
> +		freesize -= dma_reserve;
> +		printk(KERN_DEBUG "  %s zone: %lu pages reserved\n",
> +		zone_names[0], dma_reserve);
> +	}
> +
> +	if (!is_highmem_idx(type))
> +		nr_kernel_pages += freesize;
> +	/* Charge for highmem memmap if there are enough kernel pages */
> +	else if (nr_kernel_pages > memmap_pages * 2)
> +		nr_kernel_pages -= memmap_pages;
> +	nr_all_pages += freesize;
> +
> +	return freesize;
> +}
> +
>  /*
>   * Set up the zone data structures:
>   *   - mark all pages reserved
> @@ -6267,43 +6301,12 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
>  
>  	for (j = 0; j < MAX_NR_ZONES; j++) {
>  		struct zone *zone = pgdat->node_zones + j;
> -		unsigned long size, freesize, memmap_pages;
> +		unsigned long size = zone->spanned_pages;
> +		unsigned long freesize = zone->present_pages;
>  		unsigned long zone_start_pfn = zone->zone_start_pfn;
>  
> -		size = zone->spanned_pages;
> -		freesize = zone->present_pages;
> -
> -		/*
> -		 * Adjust freesize so that it accounts for how much memory
> -		 * is used by this zone for memmap. This affects the watermark
> -		 * and per-cpu initialisations
> -		 */
> -		memmap_pages = calc_memmap_size(size, freesize);
> -		if (!is_highmem_idx(j)) {
> -			if (freesize >= memmap_pages) {
> -				freesize -= memmap_pages;
> -				if (memmap_pages)
> -					printk(KERN_DEBUG
> -					       "  %s zone: %lu pages used for memmap\n",
> -					       zone_names[j], memmap_pages);
> -			} else
> -				pr_warn("  %s zone: %lu pages exceeds freesize %lu\n",
> -					zone_names[j], memmap_pages, freesize);
> -		}
> -
> -		/* Account for reserved pages */
> -		if (j == 0 && freesize > dma_reserve) {
> -			freesize -= dma_reserve;
> -			printk(KERN_DEBUG "  %s zone: %lu pages reserved\n",
> -					zone_names[0], dma_reserve);
> -		}
> -
> -		if (!is_highmem_idx(j))
> -			nr_kernel_pages += freesize;
> -		/* Charge for highmem memmap if there are enough kernel pages */
> -		else if (nr_kernel_pages > memmap_pages * 2)
> -			nr_kernel_pages -= memmap_pages;
> -		nr_all_pages += freesize;
> +		if (size)
> +			freesize = calc_remaining_pages(j, freesize, size);
>  
>  		/*
>  		 * Set an approximate value for lowmem here, it will be adjusted
> -- 
> 2.13.6
>
Oscar Salvador July 19, 2018, 2:03 p.m. UTC | #2
On Thu, Jul 19, 2018 at 03:44:17PM +0200, Michal Hocko wrote:
> On Thu 19-07-18 15:27:38, osalvador@techadventures.net wrote:
> > From: Oscar Salvador <osalvador@suse.de>
> > 
> > In free_area_init_core we calculate the amount of managed pages
> > we are left with, by substracting the memmap pages and the pages
> > reserved for dma.
> > With the values left, we also account the total of kernel pages and
> > the total of pages.
> > 
> > Since memmap pages are calculated from zone->spanned_pages,
> > let us only do these calculcations whenever zone->spanned_pages is greather
> > than 0.
> 
> But why do we care? How do we test this? In other words, why is this
> worth merging?
 
Uhm, unless the values are going to be updated, why do we want to go through all
comparasions/checks?
I thought it was a nice thing to have the chance to skip that block unless we are going to
update the counters.

Again, if you think this only adds complexity and no good, I can drop it.

Thanks
Michal Hocko July 19, 2018, 3:15 p.m. UTC | #3
On Thu 19-07-18 16:03:27, Oscar Salvador wrote:
> On Thu, Jul 19, 2018 at 03:44:17PM +0200, Michal Hocko wrote:
> > On Thu 19-07-18 15:27:38, osalvador@techadventures.net wrote:
> > > From: Oscar Salvador <osalvador@suse.de>
> > > 
> > > In free_area_init_core we calculate the amount of managed pages
> > > we are left with, by substracting the memmap pages and the pages
> > > reserved for dma.
> > > With the values left, we also account the total of kernel pages and
> > > the total of pages.
> > > 
> > > Since memmap pages are calculated from zone->spanned_pages,
> > > let us only do these calculcations whenever zone->spanned_pages is greather
> > > than 0.
> > 
> > But why do we care? How do we test this? In other words, why is this
> > worth merging?
>  
> Uhm, unless the values are going to be updated, why do we want to go through all
> comparasions/checks?
> I thought it was a nice thing to have the chance to skip that block unless we are going to
> update the counters.
> 
> Again, if you think this only adds complexity and no good, I can drop it.

Your changelog doesn't really explain the motivation. Does the change
help performance? Is this a pure cleanup?

The function is certainly not an example of beauty. It is more an
example of changes done on top of older ones without much thinking. But
I do not see your change would make it so much better. I would consider
it a much nicer cleanup if it was split into logical units each doing
one specific thing.

Btw. are you sure this change is correct? E.g.
		/*
		 * Set an approximate value for lowmem here, it will be adjusted
		 * when the bootmem allocator frees pages into the buddy system.
		 * And all highmem pages will be managed by the buddy system.
		 */
		zone->managed_pages = is_highmem_idx(j) ? realsize : freesize;

expects freesize to be calculated properly and just from quick reading
the code I do not see why skipping other adjustments is ok for size > 0.
Maybe this is OK, I dunno and my brain is already heading few days off
but a real cleanup wouldn't even make me think what the heck is going on
here.
Oscar Salvador July 19, 2018, 8:52 p.m. UTC | #4
On Thu, Jul 19, 2018 at 05:15:55PM +0200, Michal Hocko wrote:
> Your changelog doesn't really explain the motivation. Does the change
> help performance? Is this a pure cleanup?

Hi Michal,

Sorry to not have explained this better from the very beginning.

It should help a bit in performance terms as we would be skipping those
condition checks and assignations for zones that do not have any pages.
It is not a huge win, but I think that skipping code we do not really need to run
is worh to have.

> The function is certainly not an example of beauty. It is more an
> example of changes done on top of older ones without much thinking. But
> I do not see your change would make it so much better. I would consider
> it a much nicer cleanup if it was split into logical units each doing
> one specific thing.

About the cleanup, I thought that moving that block of code to a separate function
would make the code easier to follow.
If you think that this is still not enough, I can try to split it and see the outcome.

> Btw. are you sure this change is correct? E.g.
> 		/*
> 		 * Set an approximate value for lowmem here, it will be adjusted
> 		 * when the bootmem allocator frees pages into the buddy system.
> 		 * And all highmem pages will be managed by the buddy system.
> 		 */
> 		zone->managed_pages = is_highmem_idx(j) ? realsize : freesize;
> 
> expects freesize to be calculated properly and just from quick reading
> the code I do not see why skipping other adjustments is ok for size > 0.
> Maybe this is OK, I dunno and my brain is already heading few days off
> but a real cleanup wouldn't even make me think what the heck is going on
> here.

This changed in commit e69438596bb3e97809e76be315e54a4a444f4797.
Current code does not have "realsize" anymore.

Thanks
Michal Hocko July 23, 2018, 8:30 a.m. UTC | #5
On Thu 19-07-18 22:52:35, Oscar Salvador wrote:
> On Thu, Jul 19, 2018 at 05:15:55PM +0200, Michal Hocko wrote:
> > Your changelog doesn't really explain the motivation. Does the change
> > help performance? Is this a pure cleanup?
> 
> Hi Michal,
> 
> Sorry to not have explained this better from the very beginning.
> 
> It should help a bit in performance terms as we would be skipping those
> condition checks and assignations for zones that do not have any pages.
> It is not a huge win, but I think that skipping code we do not really need to run
> is worh to have.

It is always preferable to have numbers when you are claiming
performance benefits.

> 
> > The function is certainly not an example of beauty. It is more an
> > example of changes done on top of older ones without much thinking. But
> > I do not see your change would make it so much better. I would consider
> > it a much nicer cleanup if it was split into logical units each doing
> > one specific thing.
> 
> About the cleanup, I thought that moving that block of code to a separate function
> would make the code easier to follow.
> If you think that this is still not enough, I can try to split it and see the outcome.

Well, on the other hand, is the change really worth it? Moving code
around is not free either. Any git blame tracking to understand why the
code is done this way will be harder.

But just to make it clear, I do not want to discourage you from touching
this area. I just believe that if you really want to do that then the
result shouldn't just tweak one specific case and tweak it. It would be
much more appreciated if the new code had much better structure and less
hacks.
diff mbox

Patch

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 10b754fba5fa..f7a6f4e13f41 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6237,6 +6237,40 @@  static void pgdat_init_kcompactd(struct pglist_data *pgdat)
 static void pgdat_init_kcompactd(struct pglist_data *pgdat) {}
 #endif
 
+static unsigned long calc_remaining_pages(enum zone_type type, unsigned long freesize,
+								unsigned long size)
+{
+	unsigned long memmap_pages = calc_memmap_size(size, freesize);
+
+	if(!is_highmem_idx(type)) {
+		if (freesize >= memmap_pages) {
+			freesize -= memmap_pages;
+			if (memmap_pages)
+				printk(KERN_DEBUG
+					"  %s zone: %lu pages used for memmap\n",
+					zone_names[type], memmap_pages);
+		} else
+			pr_warn("  %s zone: %lu pages exceeds freesize %lu\n",
+				zone_names[type], memmap_pages, freesize);
+	}
+
+	/* Account for reserved pages */
+	if (type == 0 && freesize > dma_reserve) {
+		freesize -= dma_reserve;
+		printk(KERN_DEBUG "  %s zone: %lu pages reserved\n",
+		zone_names[0], dma_reserve);
+	}
+
+	if (!is_highmem_idx(type))
+		nr_kernel_pages += freesize;
+	/* Charge for highmem memmap if there are enough kernel pages */
+	else if (nr_kernel_pages > memmap_pages * 2)
+		nr_kernel_pages -= memmap_pages;
+	nr_all_pages += freesize;
+
+	return freesize;
+}
+
 /*
  * Set up the zone data structures:
  *   - mark all pages reserved
@@ -6267,43 +6301,12 @@  static void __paginginit free_area_init_core(struct pglist_data *pgdat)
 
 	for (j = 0; j < MAX_NR_ZONES; j++) {
 		struct zone *zone = pgdat->node_zones + j;
-		unsigned long size, freesize, memmap_pages;
+		unsigned long size = zone->spanned_pages;
+		unsigned long freesize = zone->present_pages;
 		unsigned long zone_start_pfn = zone->zone_start_pfn;
 
-		size = zone->spanned_pages;
-		freesize = zone->present_pages;
-
-		/*
-		 * Adjust freesize so that it accounts for how much memory
-		 * is used by this zone for memmap. This affects the watermark
-		 * and per-cpu initialisations
-		 */
-		memmap_pages = calc_memmap_size(size, freesize);
-		if (!is_highmem_idx(j)) {
-			if (freesize >= memmap_pages) {
-				freesize -= memmap_pages;
-				if (memmap_pages)
-					printk(KERN_DEBUG
-					       "  %s zone: %lu pages used for memmap\n",
-					       zone_names[j], memmap_pages);
-			} else
-				pr_warn("  %s zone: %lu pages exceeds freesize %lu\n",
-					zone_names[j], memmap_pages, freesize);
-		}
-
-		/* Account for reserved pages */
-		if (j == 0 && freesize > dma_reserve) {
-			freesize -= dma_reserve;
-			printk(KERN_DEBUG "  %s zone: %lu pages reserved\n",
-					zone_names[0], dma_reserve);
-		}
-
-		if (!is_highmem_idx(j))
-			nr_kernel_pages += freesize;
-		/* Charge for highmem memmap if there are enough kernel pages */
-		else if (nr_kernel_pages > memmap_pages * 2)
-			nr_kernel_pages -= memmap_pages;
-		nr_all_pages += freesize;
+		if (size)
+			freesize = calc_remaining_pages(j, freesize, size);
 
 		/*
 		 * Set an approximate value for lowmem here, it will be adjusted