diff mbox series

[2/2] mm/page_alloc: avoid counting event if no successful allocation

Message ID 20210709102855.55058-2-yanfei.xu@windriver.com (mailing list archive)
State New
Headers show
Series [1/2] mm/page_alloc: correct return value when failing at preparing | expand

Commit Message

Xu, Yanfei July 9, 2021, 10:28 a.m. UTC
While the nr_populated is non-zero, however the nr_account might be
zero if allocating fails. In this case, not to count event can save
some cycles.

And this commit extract the check of "page_array" from a while
statement to avoid unnecessary checks for it.

Signed-off-by: Yanfei Xu <yanfei.xu@windriver.com>
---
 mm/page_alloc.c | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

Comments

Mel Gorman July 9, 2021, 12:26 p.m. UTC | #1
On Fri, Jul 09, 2021 at 06:28:55PM +0800, Yanfei Xu wrote:
> While the nr_populated is non-zero, however the nr_account might be
> zero if allocating fails. In this case, not to count event can save
> some cycles.
> 

The much more likely path is that nr_account is positive so we avoid a
branch in the common case.

> And this commit extract the check of "page_array" from a while
> statement to avoid unnecessary checks for it.
> 

I'm surprised the compiler does not catch that page_array is invariant
for the loop. Did you check if gcc generates different code is page_array
is explicitly checked once instead of putting it in the loop?
Xu, Yanfei July 10, 2021, 11:31 a.m. UTC | #2
On 7/9/21 8:26 PM, Mel Gorman wrote:
> [Please note: This e-mail is from an EXTERNAL e-mail address]
> 
> On Fri, Jul 09, 2021 at 06:28:55PM +0800, Yanfei Xu wrote:
>> While the nr_populated is non-zero, however the nr_account might be
>> zero if allocating fails. In this case, not to count event can save
>> some cycles.
>>
> 
> The much more likely path is that nr_account is positive so we avoid a
> branch in the common case.
> 

Got it.

>> And this commit extract the check of "page_array" from a while
>> statement to avoid unnecessary checks for it.
>>
> 
> I'm surprised the compiler does not catch that page_array is invariant
> for the loop. Did you check if gcc generates different code is page_array
> is explicitly checked once instead of putting it in the loop?
> 

Hm.. In fact, the page_array always doesn't appear in the loop due to 
the compiler's optimization. And I just confirmed the assembly.

not apply my patch:

ffffffff81267100 <__alloc_pages_bulk>:
ffffffff81267100:       e8 0b 73 df ff          callq  ffffffff8105e410 
<__fentry__>
ffffffff81267105:       55                      push   %rbp
ffffffff81267106:       48 89 e5                mov    %rsp,%rbp
ffffffff81267109:       41 57                   push   %r15
ffffffff8126710b:       41 56                   push   %r14
ffffffff8126710d:       41 55                   push   %r13
ffffffff8126710f:       41 54                   push   %r12
ffffffff81267111:       53                      push   %rbx
ffffffff81267112:       48 83 ec 58             sub    $0x58,%rsp
ffffffff81267116:       85 c9                   test   %ecx,%ecx
ffffffff81267118:       89 7d c4                mov    %edi,-0x3c(%rbp)
ffffffff8126711b:       89 75 9c                mov    %esi,-0x64(%rbp)
ffffffff8126711e:       48 89 55 a0             mov    %rdx,-0x60(%rbp)
ffffffff81267122:       89 4d d4                mov    %ecx,-0x2c(%rbp)
ffffffff81267125:       4c 89 45 a8             mov    %r8,-0x58(%rbp)
ffffffff81267129:       4c 89 4d c8             mov    %r9,-0x38(%rbp)
ffffffff8126712d:       0f 8e 7c 05 00 00       jle    ffffffff812676af 
<__alloc_pages_bulk+0x5af>
ffffffff81267133:       4d 85 c9                test   %r9,%r9
ffffffff81267136:       0f 84 84 05 00 00       je     ffffffff812676c0 
<__alloc_pages_bulk+0x5c0>
ffffffff8126713c:       49 83 39 00             cmpq   $0x0,(%r9)
ffffffff81267140:       0f 84 7a 05 00 00       je     ffffffff812676c0 
<__alloc_pages_bulk+0x5c0>
ffffffff81267146:       49 8d 41 08             lea    0x8(%r9),%rax
ffffffff8126714a:       89 ca                   mov    %ecx,%edx
ffffffff8126714c:       45 31 e4                xor    %r12d,%r12d
ffffffff8126714f:       eb 0b                   jmp    ffffffff8126715c 
<__alloc_pages_bulk+0x5c>
ffffffff81267151:       48 83 c0 08             add    $0x8,%rax
ffffffff81267155:       48 83 78 f8 00          cmpq   $0x0,-0x8(%rax)
ffffffff8126715a:       74 1c                   je     ffffffff81267178 
<__alloc_pages_bulk+0x78>
ffffffff8126715c:       41 83 c4 01             add    $0x1,%r12d
ffffffff81267160:       44 39 e2                cmp    %r12d,%edx
ffffffff81267163:       75 ec                   jne    ffffffff81267151 
<__alloc_pages_bulk+0x51>
ffffffff81267165:       48 63 45 d4             movslq -0x2c(%rbp),%rax
ffffffff81267169:       48 83 c4 58             add    $0x58,%rsp
ffffffff8126716d:       5b                      pop    %rbx
ffffffff8126716e:       41 5c                   pop    %r12
ffffffff81267170:       41 5d                   pop    %r13
ffffffff81267172:       41 5e                   pop    %r14
ffffffff81267174:       41 5f                   pop    %r15
ffffffff81267176:       5d                      pop    %rbp
ffffffff81267177:       c3                      retq
ffffffff81267178:       8b 45 d4                mov    -0x2c(%rbp),%eax
ffffffff8126717b:       44 29 e0                sub    %r12d,%eax
ffffffff8126717e:       83 f8 01                cmp    $0x1,%eax
ffffffff81267181:       0f 84 47 04 00 00       je     ffffffff812675ce 
<__alloc_pages_bulk+0x4ce>
ffffffff81267187:       8b 0d 53 22 a4 01       mov 
0x1a42253(%rip),%ecx        # ffffffff82ca93e0 
<page_group_by_mobility_disabled>
ffffffff8126718d:       48 63 45 9c             movslq -0x64(%rbp),%rax
ffffffff81267191:       44 8b 7d c4             mov    -0x3c(%rbp),%r15d
ffffffff81267195:       44 23 3d 58 22 a4 01    and 
0x1a42258(%rip),%r15d        # ffffffff82ca93f4 <gfp_allowed_mask>


applied my patch:

ffffffff81267100 <__alloc_pages_bulk>:
ffffffff81267100:       e8 0b 73 df ff          callq  ffffffff8105e410 
<__fentry__>
ffffffff81267105:       55                      push   %rbp
ffffffff81267106:       48 89 e5                mov    %rsp,%rbp
ffffffff81267109:       41 57                   push   %r15
ffffffff8126710b:       41 56                   push   %r14
ffffffff8126710d:       41 55                   push   %r13
ffffffff8126710f:       41 54                   push   %r12
ffffffff81267111:       53                      push   %rbx
ffffffff81267112:       48 83 ec 60             sub    $0x60,%rsp
ffffffff81267116:       85 c9                   test   %ecx,%ecx
ffffffff81267118:       89 7d c8                mov    %edi,-0x38(%rbp)
ffffffff8126711b:       89 75 9c                mov    %esi,-0x64(%rbp)
ffffffff8126711e:       48 89 55 a0             mov    %rdx,-0x60(%rbp)
ffffffff81267122:       89 4d d0                mov    %ecx,-0x30(%rbp)
ffffffff81267125:       4c 89 45 a8             mov    %r8,-0x58(%rbp)
ffffffff81267129:       0f 8e 91 05 00 00       jle    ffffffff812676c0 
<__alloc_pages_bulk+0x5c0>
ffffffff8126712f:       4d 85 c9                test   %r9,%r9
ffffffff81267132:       4d 89 cf                mov    %r9,%r15
ffffffff81267135:       0f 84 4d 05 00 00       je     ffffffff81267688 
<__alloc_pages_bulk+0x588>
ffffffff8126713b:       8d 49 ff                lea    -0x1(%rcx),%ecx
ffffffff8126713e:       31 c0                   xor    %eax,%eax
ffffffff81267140:       eb 03                   jmp    ffffffff81267145 
<__alloc_pages_bulk+0x45>
ffffffff81267142:       48 89 d0                mov    %rdx,%rax
ffffffff81267145:       49 83 3c c7 00          cmpq   $0x0,(%r15,%rax,8)
ffffffff8126714a:       41 89 c4                mov    %eax,%r12d
ffffffff8126714d:       74 0d                   je     ffffffff8126715c 
<__alloc_pages_bulk+0x5c>
ffffffff8126714f:       44 8d 60 01             lea    0x1(%rax),%r12d
ffffffff81267153:       48 39 c1                cmp    %rax,%rcx
ffffffff81267156:       48 8d 50 01             lea    0x1(%rax),%rdx
ffffffff8126715a:       75 e6                   jne    ffffffff81267142 
<__alloc_pages_bulk+0x42>
ffffffff8126715c:       8b 45 d0                mov    -0x30(%rbp),%eax
ffffffff8126715f:       41 39 c4                cmp    %eax,%r12d
ffffffff81267162:       0f 84 19 04 00 00       je     ffffffff81267581 
<__alloc_pages_bulk+0x481>
ffffffff81267168:       44 29 e0                sub    %r12d,%eax
ffffffff8126716b:       83 f8 01                cmp    $0x1,%eax
ffffffff8126716e:       0f 84 66 04 00 00       je     ffffffff812675da 
<__alloc_pages_bulk+0x4da>
ffffffff81267174:       48 63 45 9c             movslq -0x64(%rbp),%rax
ffffffff81267178:       8b 0d 62 22 a4 01       mov 
0x1a42262(%rip),%ecx        # ffffffff82ca93e0 
<page_group_by_mobility_disabled>
ffffffff8126717e:       8b 5d c8                mov    -0x38(%rbp),%ebx
ffffffff81267181:       23 1d 6d 22 a4 01       and 
0x1a4226d(%rip),%ebx        # ffffffff82ca93f4 <gfp_allowed_mask>


Thanks,
Yanfei

> --
> Mel Gorman
> SUSE Labs
>
diff mbox series

Patch

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e9fd57ca4c1c..e25d508b85e9 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5235,16 +5235,18 @@  unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
 	if (unlikely(nr_pages <= 0))
 		return 0;
 
-	/*
-	 * Skip populated array elements to determine if any pages need
-	 * to be allocated before disabling IRQs.
-	 */
-	while (page_array && nr_populated < nr_pages && page_array[nr_populated])
-		nr_populated++;
+	if (page_array) {
+		/*
+		 * Skip populated array elements to determine if any pages need
+		 * to be allocated before disabling IRQs.
+		 */
+		while (nr_populated < nr_pages && page_array[nr_populated])
+			nr_populated++;
 
-	/* Already populated array? */
-	if (unlikely(page_array && nr_pages - nr_populated == 0))
-		return nr_populated;
+		/* Already populated array? */
+		if (unlikely(nr_pages - nr_populated == 0))
+			return nr_populated;
+	}
 
 	/* Use the single page allocator for one page. */
 	if (nr_pages - nr_populated == 1)
@@ -5319,9 +5321,10 @@  unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
 
 	local_unlock_irqrestore(&pagesets.lock, flags);
 
-	__count_zid_vm_events(PGALLOC, zone_idx(zone), nr_account);
-	zone_statistics(ac.preferred_zoneref->zone, zone, nr_account);
-
+	if (likely(nr_account)) {
+		__count_zid_vm_events(PGALLOC, zone_idx(zone), nr_account);
+		zone_statistics(ac.preferred_zoneref->zone, zone, nr_account);
+	}
 	return nr_populated;
 
 failed_irq: