diff mbox

[1/2] page-alloc/x86: don't restrict DMA heap to node 0

Message ID 57AB0EAE020000780010494F@prv-mh.provo.novell.com (mailing list archive)
State New, archived
Headers show

Commit Message

Jan Beulich Aug. 10, 2016, 9:23 a.m. UTC
When node zero has no memory, the DMA bit width will end up getting set
to 9, which is obviously not helpful to hold back a reasonable amount
of low enough memory for Dom0 to use for DMA purposes. Find the lowest
node with memory below 4Gb instead.

Introduce arch_get_dma_bitsize() to keep this arch-specific logic out
of common code.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
page-alloc/x86: don't restrict DMA heap to node 0

When node zero has no memory, the DMA bit width will end up getting set
to 9, which is obviously not helpful to hold back a reasonable amount
of low enough memory for Dom0 to use for DMA purposes. Find the lowest
node with memory below 4Gb instead.

Introduce arch_get_dma_bitsize() to keep this arch-specific logic out
of common code.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/numa.c
+++ b/xen/arch/x86/numa.c
@@ -355,11 +355,21 @@ void __init init_cpu_to_node(void)
     }
 }
 
-EXPORT_SYMBOL(cpu_to_node);
-EXPORT_SYMBOL(node_to_cpumask);
-EXPORT_SYMBOL(memnode_shift);
-EXPORT_SYMBOL(memnodemap);
-EXPORT_SYMBOL(node_data);
+unsigned int __init arch_get_dma_bitsize(void)
+{
+    unsigned int node;
+
+    for_each_online_node(node)
+        if ( node_spanned_pages(node) &&
+             !(node_start_pfn(node) >> (32 - PAGE_SHIFT)) )
+            break;
+    if ( node >= MAX_NUMNODES )
+        panic("No node with memory below 4Gb");
+
+    return min_t(unsigned int,
+                 flsl(node_start_pfn(node) + node_spanned_pages(node) / 4 - 1)
+                 + PAGE_SHIFT, 32);
+}
 
 static void dump_numa(unsigned char key)
 {
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -1368,16 +1368,7 @@ void __init end_boot_allocator(void)
     init_heap_pages(virt_to_page(bootmem_region_list), 1);
 
     if ( !dma_bitsize && (num_online_nodes() > 1) )
-    {
-#ifdef CONFIG_X86
-        dma_bitsize = min_t(unsigned int,
-                            flsl(NODE_DATA(0)->node_spanned_pages) - 1
-                            + PAGE_SHIFT - 2,
-                            32);
-#else
-        dma_bitsize = 32;
-#endif
-    }
+        dma_bitsize = arch_get_dma_bitsize();
 
     printk("Domain heap initialised");
     if ( dma_bitsize )
--- a/xen/include/asm-arm/numa.h
+++ b/xen/include/asm-arm/numa.h
@@ -17,6 +17,11 @@ static inline __attribute__((pure)) node
 #define node_start_pfn(nid) (pdx_to_pfn(frametable_base_pdx))
 #define __node_distance(a, b) (20)
 
+static inline unsigned int arch_get_dma_bitsize(void)
+{
+    return 32;
+}
+
 #endif /* __ARCH_ARM_NUMA_H */
 /*
  * Local variables:
--- a/xen/include/asm-x86/numa.h
+++ b/xen/include/asm-x86/numa.h
@@ -86,5 +86,6 @@ extern int valid_numa_range(u64 start, u
 
 void srat_parse_regions(u64 addr);
 extern u8 __node_distance(nodeid_t a, nodeid_t b);
+unsigned int arch_get_dma_bitsize(void);
 
 #endif

Comments

Andrew Cooper Aug. 10, 2016, 9:58 a.m. UTC | #1
On 10/08/16 10:23, Jan Beulich wrote:
> --- a/xen/arch/x86/numa.c
> +++ b/xen/arch/x86/numa.c
> @@ -355,11 +355,21 @@ void __init init_cpu_to_node(void)
>      }
>  }
>  
> -EXPORT_SYMBOL(cpu_to_node);
> -EXPORT_SYMBOL(node_to_cpumask);
> -EXPORT_SYMBOL(memnode_shift);
> -EXPORT_SYMBOL(memnodemap);
> -EXPORT_SYMBOL(node_data);
> +unsigned int __init arch_get_dma_bitsize(void)
> +{
> +    unsigned int node;
> +
> +    for_each_online_node(node)
> +        if ( node_spanned_pages(node) &&
> +             !(node_start_pfn(node) >> (32 - PAGE_SHIFT)) )
> +            break;
> +    if ( node >= MAX_NUMNODES )
> +        panic("No node with memory below 4Gb");
> +
> +    return min_t(unsigned int,
> +                 flsl(node_start_pfn(node) + node_spanned_pages(node) / 4 - 1)
> +                 + PAGE_SHIFT, 32);

You have moved the -1 and -2 inside the flsl() call, which alters its
behaviour quite a bit.  Is this intentional or accidental?

~Andrew

> +}
>  
>  static void dump_numa(unsigned char key)
>  {
> --- a/xen/common/page_alloc.c
> +++ b/xen/common/page_alloc.c
> @@ -1368,16 +1368,7 @@ void __init end_boot_allocator(void)
>      init_heap_pages(virt_to_page(bootmem_region_list), 1);
>  
>      if ( !dma_bitsize && (num_online_nodes() > 1) )
> -    {
> -#ifdef CONFIG_X86
> -        dma_bitsize = min_t(unsigned int,
> -                            flsl(NODE_DATA(0)->node_spanned_pages) - 1
> -                            + PAGE_SHIFT - 2,
> -                            32);
> -#else
> -        dma_bitsize = 32;
> -#endif
> -    }
> +        dma_bitsize = arch_get_dma_bitsize();
>  
>      printk("Domain heap initialised");
>      if ( dma_bitsize )
Jan Beulich Aug. 10, 2016, 10:21 a.m. UTC | #2
>>> On 10.08.16 at 11:58, <andrew.cooper3@citrix.com> wrote:
> On 10/08/16 10:23, Jan Beulich wrote:
>> --- a/xen/arch/x86/numa.c
>> +++ b/xen/arch/x86/numa.c
>> @@ -355,11 +355,21 @@ void __init init_cpu_to_node(void)
>>      }
>>  }
>>  
>> -EXPORT_SYMBOL(cpu_to_node);
>> -EXPORT_SYMBOL(node_to_cpumask);
>> -EXPORT_SYMBOL(memnode_shift);
>> -EXPORT_SYMBOL(memnodemap);
>> -EXPORT_SYMBOL(node_data);
>> +unsigned int __init arch_get_dma_bitsize(void)
>> +{
>> +    unsigned int node;
>> +
>> +    for_each_online_node(node)
>> +        if ( node_spanned_pages(node) &&
>> +             !(node_start_pfn(node) >> (32 - PAGE_SHIFT)) )
>> +            break;
>> +    if ( node >= MAX_NUMNODES )
>> +        panic("No node with memory below 4Gb");
>> +
>> +    return min_t(unsigned int,
>> +                 flsl(node_start_pfn(node) + node_spanned_pages(node) / 4 - 1)
>> +                 + PAGE_SHIFT, 32);
> 
> You have moved the -1 and -2 inside the flsl() call, which alters its
> behaviour quite a bit.  Is this intentional or accidental?

This is intentional, and their original placement was only not too
wrong because of the effective use of zero in place of what is
now node_start_pfn(node). (Obviously the division by 4 alone
could have gone in either place, but the "- 1" should have been
inside the flsl() even before imo.)

Jan
Andrew Cooper Aug. 10, 2016, 12:18 p.m. UTC | #3
On 10/08/16 11:21, Jan Beulich wrote:
>>>> On 10.08.16 at 11:58, <andrew.cooper3@citrix.com> wrote:
>> On 10/08/16 10:23, Jan Beulich wrote:
>>> --- a/xen/arch/x86/numa.c
>>> +++ b/xen/arch/x86/numa.c
>>> @@ -355,11 +355,21 @@ void __init init_cpu_to_node(void)
>>>      }
>>>  }
>>>  
>>> -EXPORT_SYMBOL(cpu_to_node);
>>> -EXPORT_SYMBOL(node_to_cpumask);
>>> -EXPORT_SYMBOL(memnode_shift);
>>> -EXPORT_SYMBOL(memnodemap);
>>> -EXPORT_SYMBOL(node_data);
>>> +unsigned int __init arch_get_dma_bitsize(void)
>>> +{
>>> +    unsigned int node;
>>> +
>>> +    for_each_online_node(node)
>>> +        if ( node_spanned_pages(node) &&
>>> +             !(node_start_pfn(node) >> (32 - PAGE_SHIFT)) )
>>> +            break;
>>> +    if ( node >= MAX_NUMNODES )
>>> +        panic("No node with memory below 4Gb");
>>> +
>>> +    return min_t(unsigned int,
>>> +                 flsl(node_start_pfn(node) + node_spanned_pages(node) / 4 - 1)
>>> +                 + PAGE_SHIFT, 32);
>> You have moved the -1 and -2 inside the flsl() call, which alters its
>> behaviour quite a bit.  Is this intentional or accidental?
> This is intentional, and their original placement was only not too
> wrong because of the effective use of zero in place of what is
> now node_start_pfn(node). (Obviously the division by 4 alone
> could have gone in either place, but the "- 1" should have been
> inside the flsl() even before imo.)

In which case it should be at least mentioned in the commit message.

Finally, do you really mean to only divide the spanned pages by 4? 
Either way, it could do with further bracketing.

~Andrew
Jan Beulich Aug. 10, 2016, 12:50 p.m. UTC | #4
>>> On 10.08.16 at 14:18, <andrew.cooper3@citrix.com> wrote:
> On 10/08/16 11:21, Jan Beulich wrote:
>>>>> On 10.08.16 at 11:58, <andrew.cooper3@citrix.com> wrote:
>>> On 10/08/16 10:23, Jan Beulich wrote:
>>>> --- a/xen/arch/x86/numa.c
>>>> +++ b/xen/arch/x86/numa.c
>>>> @@ -355,11 +355,21 @@ void __init init_cpu_to_node(void)
>>>>      }
>>>>  }
>>>>  
>>>> -EXPORT_SYMBOL(cpu_to_node);
>>>> -EXPORT_SYMBOL(node_to_cpumask);
>>>> -EXPORT_SYMBOL(memnode_shift);
>>>> -EXPORT_SYMBOL(memnodemap);
>>>> -EXPORT_SYMBOL(node_data);
>>>> +unsigned int __init arch_get_dma_bitsize(void)
>>>> +{
>>>> +    unsigned int node;
>>>> +
>>>> +    for_each_online_node(node)
>>>> +        if ( node_spanned_pages(node) &&
>>>> +             !(node_start_pfn(node) >> (32 - PAGE_SHIFT)) )
>>>> +            break;
>>>> +    if ( node >= MAX_NUMNODES )
>>>> +        panic("No node with memory below 4Gb");
>>>> +
>>>> +    return min_t(unsigned int,
>>>> +                 flsl(node_start_pfn(node) + node_spanned_pages(node) / 4 - 
> 1)
>>>> +                 + PAGE_SHIFT, 32);
>>> You have moved the -1 and -2 inside the flsl() call, which alters its
>>> behaviour quite a bit.  Is this intentional or accidental?
>> This is intentional, and their original placement was only not too
>> wrong because of the effective use of zero in place of what is
>> now node_start_pfn(node). (Obviously the division by 4 alone
>> could have gone in either place, but the "- 1" should have been
>> inside the flsl() even before imo.)
> 
> In which case it should be at least mentioned in the commit message.

"Also adjust the original calculation: I think the subtraction of 1
 should have been part of the flsl() argument rather than getting
 applied to its result. And while previously the division by 4 was valid
 to be done on the flsl() result, this now also needs to be converted,
 as is should only be applied to the spanned pages value."

> Finally, do you really mean to only divide the spanned pages by 4? 

Hmm, this question reads ambiguous to me: Do you mean the divisor
should be larger than 4? I don't think so, or else the area could end
up rather small. Or do you mean to divide the sum of start address
and spanned pages? That would surely be wrong, as the result could
end up below start address, and hence the area could continue to
remain empty.

> Either way, it could do with further bracketing.

Bracketing of what? Even elementary school maths give precedence
to multiplication and division over addition and subtraction. I know
you like to parenthesize basically every binary expression that's an
operand of another expression, but I think you meanwhile also know
that I think this goes too far.

Jan
Julien Grall Aug. 11, 2016, 9:53 a.m. UTC | #5
Hi Jan,

On 10/08/2016 11:23, Jan Beulich wrote:
> --- a/xen/include/asm-arm/numa.h
> +++ b/xen/include/asm-arm/numa.h
> @@ -17,6 +17,11 @@ static inline __attribute__((pure)) node
>  #define node_start_pfn(nid) (pdx_to_pfn(frametable_base_pdx))
>  #define __node_distance(a, b) (20)
>
> +static inline unsigned int arch_get_dma_bitsize(void)
> +{
> +    return 32;
> +}
> +

I am not sure why we return 32 here for ARM. Anyway, as it was already 
the case before this patch:

Acked-by: Julien Grall <julien.grall@arm.com>

Regards,
Jan Beulich Aug. 11, 2016, 10:05 a.m. UTC | #6
>>> On 11.08.16 at 11:53, <julien.grall@arm.com> wrote:
> Hi Jan,
> 
> On 10/08/2016 11:23, Jan Beulich wrote:
>> --- a/xen/include/asm-arm/numa.h
>> +++ b/xen/include/asm-arm/numa.h
>> @@ -17,6 +17,11 @@ static inline __attribute__((pure)) node
>>  #define node_start_pfn(nid) (pdx_to_pfn(frametable_base_pdx))
>>  #define __node_distance(a, b) (20)
>>
>> +static inline unsigned int arch_get_dma_bitsize(void)
>> +{
>> +    return 32;
>> +}
>> +
> 
> I am not sure why we return 32 here for ARM. Anyway, as it was already 
> the case before this patch:

In fact I think we could get away without that inline function for the
time being: Since NODES_SHIFT is undefined on ARM, the call out of
page_alloc.c should get elided by the compiler anyway, so a possible
alternative would be to just have a declaration of the function on
ARM without actual implementation.

> Acked-by: Julien Grall <julien.grall@arm.com>

Thanks.

Jan
diff mbox

Patch

--- a/xen/arch/x86/numa.c
+++ b/xen/arch/x86/numa.c
@@ -355,11 +355,21 @@  void __init init_cpu_to_node(void)
     }
 }
 
-EXPORT_SYMBOL(cpu_to_node);
-EXPORT_SYMBOL(node_to_cpumask);
-EXPORT_SYMBOL(memnode_shift);
-EXPORT_SYMBOL(memnodemap);
-EXPORT_SYMBOL(node_data);
+unsigned int __init arch_get_dma_bitsize(void)
+{
+    unsigned int node;
+
+    for_each_online_node(node)
+        if ( node_spanned_pages(node) &&
+             !(node_start_pfn(node) >> (32 - PAGE_SHIFT)) )
+            break;
+    if ( node >= MAX_NUMNODES )
+        panic("No node with memory below 4Gb");
+
+    return min_t(unsigned int,
+                 flsl(node_start_pfn(node) + node_spanned_pages(node) / 4 - 1)
+                 + PAGE_SHIFT, 32);
+}
 
 static void dump_numa(unsigned char key)
 {
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -1368,16 +1368,7 @@  void __init end_boot_allocator(void)
     init_heap_pages(virt_to_page(bootmem_region_list), 1);
 
     if ( !dma_bitsize && (num_online_nodes() > 1) )
-    {
-#ifdef CONFIG_X86
-        dma_bitsize = min_t(unsigned int,
-                            flsl(NODE_DATA(0)->node_spanned_pages) - 1
-                            + PAGE_SHIFT - 2,
-                            32);
-#else
-        dma_bitsize = 32;
-#endif
-    }
+        dma_bitsize = arch_get_dma_bitsize();
 
     printk("Domain heap initialised");
     if ( dma_bitsize )
--- a/xen/include/asm-arm/numa.h
+++ b/xen/include/asm-arm/numa.h
@@ -17,6 +17,11 @@  static inline __attribute__((pure)) node
 #define node_start_pfn(nid) (pdx_to_pfn(frametable_base_pdx))
 #define __node_distance(a, b) (20)
 
+static inline unsigned int arch_get_dma_bitsize(void)
+{
+    return 32;
+}
+
 #endif /* __ARCH_ARM_NUMA_H */
 /*
  * Local variables:
--- a/xen/include/asm-x86/numa.h
+++ b/xen/include/asm-x86/numa.h
@@ -86,5 +86,6 @@  extern int valid_numa_range(u64 start, u
 
 void srat_parse_regions(u64 addr);
 extern u8 __node_distance(nodeid_t a, nodeid_t b);
+unsigned int arch_get_dma_bitsize(void);
 
 #endif