diff mbox

Bug report about KASLR and ZONE_MOVABLE

Message ID 20180711124008.GF2070@MiWiFi-R3L-srv (mailing list archive)
State New, archived
Headers show

Commit Message

Baoquan He July 11, 2018, 12:40 p.m. UTC
Please try this v3 patch:

From 9850d3de9c02e570dc7572069a9749a8add4c4c7 Mon Sep 17 00:00:00 2001
From: Baoquan He <bhe@redhat.com>
Date: Wed, 11 Jul 2018 20:31:51 +0800
Subject: [PATCH v3] mm, page_alloc: find movable zone after kernel text

In find_zone_movable_pfns_for_nodes(), when try to find the starting
PFN movable zone begins in each node, kernel text position is not
considered. KASLR may put kernel after which movable zone begins.

Fix it by finding movable zone after kernel text on that node.

Signed-off-by: Baoquan He <bhe@redhat.com>
---
 mm/page_alloc.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

Comments

kernel test robot July 11, 2018, 5:59 p.m. UTC | #1
Hi Baoquan,

I love your patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on v4.18-rc4 next-20180711]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Baoquan-He/mm-page_alloc-find-movable-zone-after-kernel-text/20180711-234359
config: x86_64-randconfig-x015-201827 (attached as .config)
compiler: gcc-7 (Debian 7.3.0-16) 7.3.0
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

All error/warnings (new ones prefixed by >>):

   In file included from include/asm-generic/bug.h:18:0,
                    from arch/x86/include/asm/bug.h:83,
                    from include/linux/bug.h:5,
                    from include/linux/mmdebug.h:5,
                    from include/linux/mm.h:9,
                    from mm/page_alloc.c:18:
   mm/page_alloc.c: In function 'find_zone_movable_pfns_for_nodes':
>> include/linux/pfn.h:19:40: error: invalid operands to binary >> (have 'char *' and 'int')
    #define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
                       ~~~~~~~~~~~~~~~~~~~ ^
   include/linux/kernel.h:812:40: note: in definition of macro '__typecheck'
      (!!(sizeof((typeof(x) *)1 == (typeof(y) *)1)))
                                           ^
   include/linux/kernel.h:836:24: note: in expansion of macro '__safe_cmp'
     __builtin_choose_expr(__safe_cmp(x, y), \
                           ^~~~~~~~~~
   include/linux/kernel.h:852:19: note: in expansion of macro '__careful_cmp'
    #define max(x, y) __careful_cmp(x, y, >)
                      ^~~~~~~~~~~~~
>> mm/page_alloc.c:6690:21: note: in expansion of macro 'max'
        real_startpfn = max(usable_startpfn,
                        ^~~
>> mm/page_alloc.c:6691:7: note: in expansion of macro 'PFN_UP'
          PFN_UP(_etext))
          ^~~~~~
>> include/linux/pfn.h:19:40: error: invalid operands to binary >> (have 'char *' and 'int')
    #define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
                       ~~~~~~~~~~~~~~~~~~~ ^
   include/linux/kernel.h:820:48: note: in definition of macro '__is_constexpr'
     (sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8)))
                                                   ^
   include/linux/kernel.h:826:25: note: in expansion of macro '__no_side_effects'
      (__typecheck(x, y) && __no_side_effects(x, y))
                            ^~~~~~~~~~~~~~~~~
   include/linux/kernel.h:836:24: note: in expansion of macro '__safe_cmp'
     __builtin_choose_expr(__safe_cmp(x, y), \
                           ^~~~~~~~~~
   include/linux/kernel.h:852:19: note: in expansion of macro '__careful_cmp'
    #define max(x, y) __careful_cmp(x, y, >)
                      ^~~~~~~~~~~~~
>> mm/page_alloc.c:6690:21: note: in expansion of macro 'max'
        real_startpfn = max(usable_startpfn,
                        ^~~
>> mm/page_alloc.c:6691:7: note: in expansion of macro 'PFN_UP'
          PFN_UP(_etext))
          ^~~~~~
>> include/linux/pfn.h:19:40: error: invalid operands to binary >> (have 'char *' and 'int')
    #define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
                       ~~~~~~~~~~~~~~~~~~~ ^
   include/linux/kernel.h:828:34: note: in definition of macro '__cmp'
    #define __cmp(x, y, op) ((x) op (y) ? (x) : (y))
                                     ^
   include/linux/kernel.h:852:19: note: in expansion of macro '__careful_cmp'
    #define max(x, y) __careful_cmp(x, y, >)
                      ^~~~~~~~~~~~~
>> mm/page_alloc.c:6690:21: note: in expansion of macro 'max'
        real_startpfn = max(usable_startpfn,
                        ^~~
>> mm/page_alloc.c:6691:7: note: in expansion of macro 'PFN_UP'
          PFN_UP(_etext))
          ^~~~~~
>> include/linux/pfn.h:19:40: error: invalid operands to binary >> (have 'char *' and 'int')
    #define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
                       ~~~~~~~~~~~~~~~~~~~ ^
   include/linux/kernel.h:828:46: note: in definition of macro '__cmp'
    #define __cmp(x, y, op) ((x) op (y) ? (x) : (y))
                                                 ^
   include/linux/kernel.h:852:19: note: in expansion of macro '__careful_cmp'
    #define max(x, y) __careful_cmp(x, y, >)
                      ^~~~~~~~~~~~~
>> mm/page_alloc.c:6690:21: note: in expansion of macro 'max'
        real_startpfn = max(usable_startpfn,
                        ^~~
>> mm/page_alloc.c:6691:7: note: in expansion of macro 'PFN_UP'
          PFN_UP(_etext))
          ^~~~~~
>> include/linux/pfn.h:19:40: error: invalid operands to binary >> (have 'char *' and 'int')
    #define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
                       ~~~~~~~~~~~~~~~~~~~ ^
   include/linux/kernel.h:832:10: note: in definition of macro '__cmp_once'
      typeof(y) unique_y = (y);  \
             ^
   include/linux/kernel.h:852:19: note: in expansion of macro '__careful_cmp'
    #define max(x, y) __careful_cmp(x, y, >)
                      ^~~~~~~~~~~~~
>> mm/page_alloc.c:6690:21: note: in expansion of macro 'max'
        real_startpfn = max(usable_startpfn,
                        ^~~
>> mm/page_alloc.c:6691:7: note: in expansion of macro 'PFN_UP'
          PFN_UP(_etext))
          ^~~~~~
>> include/linux/pfn.h:19:40: error: invalid operands to binary >> (have 'char *' and 'int')
    #define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
                       ~~~~~~~~~~~~~~~~~~~ ^
   include/linux/kernel.h:832:25: note: in definition of macro '__cmp_once'
      typeof(y) unique_y = (y);  \
                            ^
   include/linux/kernel.h:852:19: note: in expansion of macro '__careful_cmp'
    #define max(x, y) __careful_cmp(x, y, >)
                      ^~~~~~~~~~~~~
>> mm/page_alloc.c:6690:21: note: in expansion of macro 'max'
        real_startpfn = max(usable_startpfn,
                        ^~~
>> mm/page_alloc.c:6691:7: note: in expansion of macro 'PFN_UP'
          PFN_UP(_etext))
          ^~~~~~
>> include/linux/kernel.h:836:2: error: first argument to '__builtin_choose_expr' not a constant
     __builtin_choose_expr(__safe_cmp(x, y), \
     ^
   include/linux/kernel.h:852:19: note: in expansion of macro '__careful_cmp'
    #define max(x, y) __careful_cmp(x, y, >)
                      ^~~~~~~~~~~~~
>> mm/page_alloc.c:6690:21: note: in expansion of macro 'max'
        real_startpfn = max(usable_startpfn,
                        ^~~

vim +6692 mm/page_alloc.c

  6540	
  6541	/*
  6542	 * Find the PFN the Movable zone begins in each node. Kernel memory
  6543	 * is spread evenly between nodes as long as the nodes have enough
  6544	 * memory. When they don't, some nodes will have more kernelcore than
  6545	 * others
  6546	 */
  6547	static void __init find_zone_movable_pfns_for_nodes(void)
  6548	{
  6549		int i, nid;
  6550		unsigned long usable_startpfn, real_startpfn;
  6551		unsigned long kernelcore_node, kernelcore_remaining;
  6552		/* save the state before borrow the nodemask */
  6553		nodemask_t saved_node_state = node_states[N_MEMORY];
  6554		unsigned long totalpages = early_calculate_totalpages();
  6555		int usable_nodes = nodes_weight(node_states[N_MEMORY]);
  6556		struct memblock_region *r;
  6557	
  6558		/* Need to find movable_zone earlier when movable_node is specified. */
  6559		find_usable_zone_for_movable();
  6560	
  6561		/*
  6562		 * If movable_node is specified, ignore kernelcore and movablecore
  6563		 * options.
  6564		 */
  6565		if (movable_node_is_enabled()) {
  6566			for_each_memblock(memory, r) {
  6567				if (!memblock_is_hotpluggable(r))
  6568					continue;
  6569	
  6570				nid = r->nid;
  6571	
  6572				usable_startpfn = PFN_DOWN(r->base);
  6573				zone_movable_pfn[nid] = zone_movable_pfn[nid] ?
  6574					min(usable_startpfn, zone_movable_pfn[nid]) :
  6575					usable_startpfn;
  6576			}
  6577	
  6578			goto out2;
  6579		}
  6580	
  6581		/*
  6582		 * If kernelcore=mirror is specified, ignore movablecore option
  6583		 */
  6584		if (mirrored_kernelcore) {
  6585			bool mem_below_4gb_not_mirrored = false;
  6586	
  6587			for_each_memblock(memory, r) {
  6588				if (memblock_is_mirror(r))
  6589					continue;
  6590	
  6591				nid = r->nid;
  6592	
  6593				usable_startpfn = memblock_region_memory_base_pfn(r);
  6594	
  6595				if (usable_startpfn < 0x100000) {
  6596					mem_below_4gb_not_mirrored = true;
  6597					continue;
  6598				}
  6599	
  6600				zone_movable_pfn[nid] = zone_movable_pfn[nid] ?
  6601					min(usable_startpfn, zone_movable_pfn[nid]) :
  6602					usable_startpfn;
  6603			}
  6604	
  6605			if (mem_below_4gb_not_mirrored)
  6606				pr_warn("This configuration results in unmirrored kernel memory.");
  6607	
  6608			goto out2;
  6609		}
  6610	
  6611		/*
  6612		 * If kernelcore=nn% or movablecore=nn% was specified, calculate the
  6613		 * amount of necessary memory.
  6614		 */
  6615		if (required_kernelcore_percent)
  6616			required_kernelcore = (totalpages * 100 * required_kernelcore_percent) /
  6617					       10000UL;
  6618		if (required_movablecore_percent)
  6619			required_movablecore = (totalpages * 100 * required_movablecore_percent) /
  6620						10000UL;
  6621	
  6622		/*
  6623		 * If movablecore= was specified, calculate what size of
  6624		 * kernelcore that corresponds so that memory usable for
  6625		 * any allocation type is evenly spread. If both kernelcore
  6626		 * and movablecore are specified, then the value of kernelcore
  6627		 * will be used for required_kernelcore if it's greater than
  6628		 * what movablecore would have allowed.
  6629		 */
  6630		if (required_movablecore) {
  6631			unsigned long corepages;
  6632	
  6633			/*
  6634			 * Round-up so that ZONE_MOVABLE is at least as large as what
  6635			 * was requested by the user
  6636			 */
  6637			required_movablecore =
  6638				roundup(required_movablecore, MAX_ORDER_NR_PAGES);
  6639			required_movablecore = min(totalpages, required_movablecore);
  6640			corepages = totalpages - required_movablecore;
  6641	
  6642			required_kernelcore = max(required_kernelcore, corepages);
  6643		}
  6644	
  6645		/*
  6646		 * If kernelcore was not specified or kernelcore size is larger
  6647		 * than totalpages, there is no ZONE_MOVABLE.
  6648		 */
  6649		if (!required_kernelcore || required_kernelcore >= totalpages)
  6650			goto out;
  6651	
  6652		/* usable_startpfn is the lowest possible pfn ZONE_MOVABLE can be at */
  6653		usable_startpfn = arch_zone_lowest_possible_pfn[movable_zone];
  6654	
  6655	restart:
  6656		/* Spread kernelcore memory as evenly as possible throughout nodes */
  6657		kernelcore_node = required_kernelcore / usable_nodes;
  6658		for_each_node_state(nid, N_MEMORY) {
  6659			unsigned long start_pfn, end_pfn;
  6660	
  6661			/*
  6662			 * Recalculate kernelcore_node if the division per node
  6663			 * now exceeds what is necessary to satisfy the requested
  6664			 * amount of memory for the kernel
  6665			 */
  6666			if (required_kernelcore < kernelcore_node)
  6667				kernelcore_node = required_kernelcore / usable_nodes;
  6668	
  6669			/*
  6670			 * As the map is walked, we track how much memory is usable
  6671			 * by the kernel using kernelcore_remaining. When it is
  6672			 * 0, the rest of the node is usable by ZONE_MOVABLE
  6673			 */
  6674			kernelcore_remaining = kernelcore_node;
  6675	
  6676			/* Go through each range of PFNs within this node */
  6677			for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
  6678				unsigned long size_pages;
  6679	
  6680				start_pfn = max(start_pfn, zone_movable_pfn[nid]);
  6681				if (start_pfn >= end_pfn)
  6682					continue;
  6683	
  6684				/*
  6685				 * KASLR may put kernel near tail of node memory,
  6686				 * start after kernel on that node to find PFN
  6687				 * which zone begins.
  6688				 */
  6689				if (pfn_to_nid(PFN_UP(_etext)) == i)
> 6690					real_startpfn = max(usable_startpfn,
> 6691							PFN_UP(_etext))
> 6692				else
  6693					real_startpfn = usable_startpfn;
  6694				/* Account for what is only usable for kernelcore */
  6695				if (start_pfn < real_startpfn) {
  6696					unsigned long kernel_pages;
  6697					kernel_pages = min(end_pfn, real_startpfn)
  6698									- start_pfn;
  6699	
  6700					kernelcore_remaining -= min(kernel_pages,
  6701								kernelcore_remaining);
  6702					required_kernelcore -= min(kernel_pages,
  6703								required_kernelcore);
  6704	
  6705					/* Continue if range is now fully accounted */
  6706					if (end_pfn <= real_startpfn) {
  6707	
  6708						/*
  6709						 * Push zone_movable_pfn to the end so
  6710						 * that if we have to rebalance
  6711						 * kernelcore across nodes, we will
  6712						 * not double account here
  6713						 */
  6714						zone_movable_pfn[nid] = end_pfn;
  6715						continue;
  6716					}
  6717					start_pfn = real_startpfn;
  6718				}
  6719	
  6720				/*
  6721				 * The usable PFN range for ZONE_MOVABLE is from
  6722				 * start_pfn->end_pfn. Calculate size_pages as the
  6723				 * number of pages used as kernelcore
  6724				 */
  6725				size_pages = end_pfn - start_pfn;
  6726				if (size_pages > kernelcore_remaining)
  6727					size_pages = kernelcore_remaining;
  6728				zone_movable_pfn[nid] = start_pfn + size_pages;
  6729	
  6730				/*
  6731				 * Some kernelcore has been met, update counts and
  6732				 * break if the kernelcore for this node has been
  6733				 * satisfied
  6734				 */
  6735				required_kernelcore -= min(required_kernelcore,
  6736									size_pages);
  6737				kernelcore_remaining -= size_pages;
  6738				if (!kernelcore_remaining)
  6739					break;
  6740			}
  6741		}
  6742	
  6743		/*
  6744		 * If there is still required_kernelcore, we do another pass with one
  6745		 * less node in the count. This will push zone_movable_pfn[nid] further
  6746		 * along on the nodes that still have memory until kernelcore is
  6747		 * satisfied
  6748		 */
  6749		usable_nodes--;
  6750		if (usable_nodes && required_kernelcore > usable_nodes)
  6751			goto restart;
  6752	
  6753	out2:
  6754		/* Align start of ZONE_MOVABLE on all nids to MAX_ORDER_NR_PAGES */
  6755		for (nid = 0; nid < MAX_NUMNODES; nid++)
  6756			zone_movable_pfn[nid] =
  6757				roundup(zone_movable_pfn[nid], MAX_ORDER_NR_PAGES);
  6758	
  6759	out:
  6760		/* restore the node_state */
  6761		node_states[N_MEMORY] = saved_node_state;
  6762	}
  6763	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
kernel test robot July 11, 2018, 7:02 p.m. UTC | #2
Hi Baoquan,

I love your patch! Perhaps something to improve:

[auto build test WARNING on linus/master]
[also build test WARNING on v4.18-rc4 next-20180711]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Baoquan-He/mm-page_alloc-find-movable-zone-after-kernel-text/20180711-234359
config: x86_64-randconfig-x007-201827 (attached as .config)
compiler: gcc-7 (Debian 7.3.0-16) 7.3.0
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

All warnings (new ones prefixed by >>):

   In file included from include/asm-generic/bug.h:5:0,
                    from arch/x86/include/asm/bug.h:83,
                    from include/linux/bug.h:5,
                    from include/linux/mmdebug.h:5,
                    from include/linux/mm.h:9,
                    from mm/page_alloc.c:18:
   mm/page_alloc.c: In function 'find_zone_movable_pfns_for_nodes':
   include/linux/pfn.h:19:40: error: invalid operands to binary >> (have 'char *' and 'int')
    #define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
                       ~~~~~~~~~~~~~~~~~~~ ^
   include/linux/compiler.h:58:30: note: in definition of macro '__trace_if'
     if (__builtin_constant_p(!!(cond)) ? !!(cond) :   \
                                 ^~~~
   mm/page_alloc.c:6689:4: note: in expansion of macro 'if'
       if (pfn_to_nid(PFN_UP(_etext)) == i)
       ^~
>> mm/page_alloc.c:6689:8: note: in expansion of macro 'pfn_to_nid'
       if (pfn_to_nid(PFN_UP(_etext)) == i)
           ^~~~~~~~~~
   mm/page_alloc.c:6689:19: note: in expansion of macro 'PFN_UP'
       if (pfn_to_nid(PFN_UP(_etext)) == i)
                      ^~~~~~
   include/linux/pfn.h:19:40: error: invalid operands to binary >> (have 'char *' and 'int')
    #define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
                       ~~~~~~~~~~~~~~~~~~~ ^
   include/linux/compiler.h:58:42: note: in definition of macro '__trace_if'
     if (__builtin_constant_p(!!(cond)) ? !!(cond) :   \
                                             ^~~~
   mm/page_alloc.c:6689:4: note: in expansion of macro 'if'
       if (pfn_to_nid(PFN_UP(_etext)) == i)
       ^~
>> mm/page_alloc.c:6689:8: note: in expansion of macro 'pfn_to_nid'
       if (pfn_to_nid(PFN_UP(_etext)) == i)
           ^~~~~~~~~~
   mm/page_alloc.c:6689:19: note: in expansion of macro 'PFN_UP'
       if (pfn_to_nid(PFN_UP(_etext)) == i)
                      ^~~~~~
   include/linux/pfn.h:19:40: error: invalid operands to binary >> (have 'char *' and 'int')
    #define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
                       ~~~~~~~~~~~~~~~~~~~ ^
   include/linux/compiler.h:69:16: note: in definition of macro '__trace_if'
      ______r = !!(cond);     \
                   ^~~~
   mm/page_alloc.c:6689:4: note: in expansion of macro 'if'
       if (pfn_to_nid(PFN_UP(_etext)) == i)
       ^~
>> mm/page_alloc.c:6689:8: note: in expansion of macro 'pfn_to_nid'
       if (pfn_to_nid(PFN_UP(_etext)) == i)
           ^~~~~~~~~~
   mm/page_alloc.c:6689:19: note: in expansion of macro 'PFN_UP'
       if (pfn_to_nid(PFN_UP(_etext)) == i)
                      ^~~~~~
   In file included from include/asm-generic/bug.h:18:0,
                    from arch/x86/include/asm/bug.h:83,
                    from include/linux/bug.h:5,
                    from include/linux/mmdebug.h:5,
                    from include/linux/mm.h:9,
                    from mm/page_alloc.c:18:
   include/linux/pfn.h:19:40: error: invalid operands to binary >> (have 'char *' and 'int')
    #define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
                       ~~~~~~~~~~~~~~~~~~~ ^
   include/linux/kernel.h:812:40: note: in definition of macro '__typecheck'
      (!!(sizeof((typeof(x) *)1 == (typeof(y) *)1)))
                                           ^
   include/linux/kernel.h:836:24: note: in expansion of macro '__safe_cmp'
     __builtin_choose_expr(__safe_cmp(x, y), \
                           ^~~~~~~~~~
   include/linux/kernel.h:852:19: note: in expansion of macro '__careful_cmp'
    #define max(x, y) __careful_cmp(x, y, >)
                      ^~~~~~~~~~~~~
   mm/page_alloc.c:6690:21: note: in expansion of macro 'max'
        real_startpfn = max(usable_startpfn,
                        ^~~
   mm/page_alloc.c:6691:7: note: in expansion of macro 'PFN_UP'
          PFN_UP(_etext))
          ^~~~~~
   include/linux/pfn.h:19:40: error: invalid operands to binary >> (have 'char *' and 'int')
    #define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
                       ~~~~~~~~~~~~~~~~~~~ ^
   include/linux/kernel.h:820:48: note: in definition of macro '__is_constexpr'
     (sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8)))
                                                   ^
   include/linux/kernel.h:826:25: note: in expansion of macro '__no_side_effects'
      (__typecheck(x, y) && __no_side_effects(x, y))
                            ^~~~~~~~~~~~~~~~~
   include/linux/kernel.h:836:24: note: in expansion of macro '__safe_cmp'
     __builtin_choose_expr(__safe_cmp(x, y), \
                           ^~~~~~~~~~
   include/linux/kernel.h:852:19: note: in expansion of macro '__careful_cmp'
    #define max(x, y) __careful_cmp(x, y, >)
                      ^~~~~~~~~~~~~
   mm/page_alloc.c:6690:21: note: in expansion of macro 'max'
        real_startpfn = max(usable_startpfn,
                        ^~~
   mm/page_alloc.c:6691:7: note: in expansion of macro 'PFN_UP'
          PFN_UP(_etext))
          ^~~~~~
   include/linux/pfn.h:19:40: error: invalid operands to binary >> (have 'char *' and 'int')
    #define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
                       ~~~~~~~~~~~~~~~~~~~ ^
   include/linux/kernel.h:828:34: note: in definition of macro '__cmp'
    #define __cmp(x, y, op) ((x) op (y) ? (x) : (y))
                                     ^
   include/linux/kernel.h:852:19: note: in expansion of macro '__careful_cmp'
    #define max(x, y) __careful_cmp(x, y, >)
                      ^~~~~~~~~~~~~
   mm/page_alloc.c:6690:21: note: in expansion of macro 'max'
        real_startpfn = max(usable_startpfn,
                        ^~~
   mm/page_alloc.c:6691:7: note: in expansion of macro 'PFN_UP'
          PFN_UP(_etext))
          ^~~~~~
   include/linux/pfn.h:19:40: error: invalid operands to binary >> (have 'char *' and 'int')
    #define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
                       ~~~~~~~~~~~~~~~~~~~ ^
   include/linux/kernel.h:828:46: note: in definition of macro '__cmp'
    #define __cmp(x, y, op) ((x) op (y) ? (x) : (y))
                                                 ^
   include/linux/kernel.h:852:19: note: in expansion of macro '__careful_cmp'
    #define max(x, y) __careful_cmp(x, y, >)
                      ^~~~~~~~~~~~~
   mm/page_alloc.c:6690:21: note: in expansion of macro 'max'
        real_startpfn = max(usable_startpfn,
                        ^~~
   mm/page_alloc.c:6691:7: note: in expansion of macro 'PFN_UP'
          PFN_UP(_etext))
          ^~~~~~
   include/linux/pfn.h:19:40: error: invalid operands to binary >> (have 'char *' and 'int')
    #define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
                       ~~~~~~~~~~~~~~~~~~~ ^
   include/linux/kernel.h:832:10: note: in definition of macro '__cmp_once'
      typeof(y) unique_y = (y);  \
             ^
   include/linux/kernel.h:852:19: note: in expansion of macro '__careful_cmp'
    #define max(x, y) __careful_cmp(x, y, >)
                      ^~~~~~~~~~~~~
   mm/page_alloc.c:6690:21: note: in expansion of macro 'max'
        real_startpfn = max(usable_startpfn,
                        ^~~
   mm/page_alloc.c:6691:7: note: in expansion of macro 'PFN_UP'
          PFN_UP(_etext))
          ^~~~~~
   include/linux/pfn.h:19:40: error: invalid operands to binary >> (have 'char *' and 'int')
    #define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
                       ~~~~~~~~~~~~~~~~~~~ ^
   include/linux/kernel.h:832:25: note: in definition of macro '__cmp_once'
      typeof(y) unique_y = (y);  \

vim +/pfn_to_nid +6689 mm/page_alloc.c

  6540	
  6541	/*
  6542	 * Find the PFN the Movable zone begins in each node. Kernel memory
  6543	 * is spread evenly between nodes as long as the nodes have enough
  6544	 * memory. When they don't, some nodes will have more kernelcore than
  6545	 * others
  6546	 */
  6547	static void __init find_zone_movable_pfns_for_nodes(void)
  6548	{
  6549		int i, nid;
  6550		unsigned long usable_startpfn, real_startpfn;
  6551		unsigned long kernelcore_node, kernelcore_remaining;
  6552		/* save the state before borrow the nodemask */
  6553		nodemask_t saved_node_state = node_states[N_MEMORY];
  6554		unsigned long totalpages = early_calculate_totalpages();
  6555		int usable_nodes = nodes_weight(node_states[N_MEMORY]);
  6556		struct memblock_region *r;
  6557	
  6558		/* Need to find movable_zone earlier when movable_node is specified. */
  6559		find_usable_zone_for_movable();
  6560	
  6561		/*
  6562		 * If movable_node is specified, ignore kernelcore and movablecore
  6563		 * options.
  6564		 */
  6565		if (movable_node_is_enabled()) {
  6566			for_each_memblock(memory, r) {
  6567				if (!memblock_is_hotpluggable(r))
  6568					continue;
  6569	
  6570				nid = r->nid;
  6571	
  6572				usable_startpfn = PFN_DOWN(r->base);
  6573				zone_movable_pfn[nid] = zone_movable_pfn[nid] ?
  6574					min(usable_startpfn, zone_movable_pfn[nid]) :
  6575					usable_startpfn;
  6576			}
  6577	
  6578			goto out2;
  6579		}
  6580	
  6581		/*
  6582		 * If kernelcore=mirror is specified, ignore movablecore option
  6583		 */
  6584		if (mirrored_kernelcore) {
  6585			bool mem_below_4gb_not_mirrored = false;
  6586	
  6587			for_each_memblock(memory, r) {
  6588				if (memblock_is_mirror(r))
  6589					continue;
  6590	
  6591				nid = r->nid;
  6592	
  6593				usable_startpfn = memblock_region_memory_base_pfn(r);
  6594	
  6595				if (usable_startpfn < 0x100000) {
  6596					mem_below_4gb_not_mirrored = true;
  6597					continue;
  6598				}
  6599	
  6600				zone_movable_pfn[nid] = zone_movable_pfn[nid] ?
  6601					min(usable_startpfn, zone_movable_pfn[nid]) :
  6602					usable_startpfn;
  6603			}
  6604	
  6605			if (mem_below_4gb_not_mirrored)
  6606				pr_warn("This configuration results in unmirrored kernel memory.");
  6607	
  6608			goto out2;
  6609		}
  6610	
  6611		/*
  6612		 * If kernelcore=nn% or movablecore=nn% was specified, calculate the
  6613		 * amount of necessary memory.
  6614		 */
  6615		if (required_kernelcore_percent)
  6616			required_kernelcore = (totalpages * 100 * required_kernelcore_percent) /
  6617					       10000UL;
  6618		if (required_movablecore_percent)
  6619			required_movablecore = (totalpages * 100 * required_movablecore_percent) /
  6620						10000UL;
  6621	
  6622		/*
  6623		 * If movablecore= was specified, calculate what size of
  6624		 * kernelcore that corresponds so that memory usable for
  6625		 * any allocation type is evenly spread. If both kernelcore
  6626		 * and movablecore are specified, then the value of kernelcore
  6627		 * will be used for required_kernelcore if it's greater than
  6628		 * what movablecore would have allowed.
  6629		 */
  6630		if (required_movablecore) {
  6631			unsigned long corepages;
  6632	
  6633			/*
  6634			 * Round-up so that ZONE_MOVABLE is at least as large as what
  6635			 * was requested by the user
  6636			 */
  6637			required_movablecore =
  6638				roundup(required_movablecore, MAX_ORDER_NR_PAGES);
  6639			required_movablecore = min(totalpages, required_movablecore);
  6640			corepages = totalpages - required_movablecore;
  6641	
  6642			required_kernelcore = max(required_kernelcore, corepages);
  6643		}
  6644	
  6645		/*
  6646		 * If kernelcore was not specified or kernelcore size is larger
  6647		 * than totalpages, there is no ZONE_MOVABLE.
  6648		 */
  6649		if (!required_kernelcore || required_kernelcore >= totalpages)
  6650			goto out;
  6651	
  6652		/* usable_startpfn is the lowest possible pfn ZONE_MOVABLE can be at */
  6653		usable_startpfn = arch_zone_lowest_possible_pfn[movable_zone];
  6654	
  6655	restart:
  6656		/* Spread kernelcore memory as evenly as possible throughout nodes */
  6657		kernelcore_node = required_kernelcore / usable_nodes;
  6658		for_each_node_state(nid, N_MEMORY) {
  6659			unsigned long start_pfn, end_pfn;
  6660	
  6661			/*
  6662			 * Recalculate kernelcore_node if the division per node
  6663			 * now exceeds what is necessary to satisfy the requested
  6664			 * amount of memory for the kernel
  6665			 */
  6666			if (required_kernelcore < kernelcore_node)
  6667				kernelcore_node = required_kernelcore / usable_nodes;
  6668	
  6669			/*
  6670			 * As the map is walked, we track how much memory is usable
  6671			 * by the kernel using kernelcore_remaining. When it is
  6672			 * 0, the rest of the node is usable by ZONE_MOVABLE
  6673			 */
  6674			kernelcore_remaining = kernelcore_node;
  6675	
  6676			/* Go through each range of PFNs within this node */
  6677			for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
  6678				unsigned long size_pages;
  6679	
  6680				start_pfn = max(start_pfn, zone_movable_pfn[nid]);
  6681				if (start_pfn >= end_pfn)
  6682					continue;
  6683	
  6684				/*
  6685				 * KASLR may put kernel near tail of node memory,
  6686				 * start after kernel on that node to find PFN
  6687				 * which zone begins.
  6688				 */
> 6689				if (pfn_to_nid(PFN_UP(_etext)) == i)
  6690					real_startpfn = max(usable_startpfn,
  6691							PFN_UP(_etext))
  6692				else
  6693					real_startpfn = usable_startpfn;
  6694				/* Account for what is only usable for kernelcore */
  6695				if (start_pfn < real_startpfn) {
  6696					unsigned long kernel_pages;
  6697					kernel_pages = min(end_pfn, real_startpfn)
  6698									- start_pfn;
  6699	
  6700					kernelcore_remaining -= min(kernel_pages,
  6701								kernelcore_remaining);
  6702					required_kernelcore -= min(kernel_pages,
  6703								required_kernelcore);
  6704	
  6705					/* Continue if range is now fully accounted */
  6706					if (end_pfn <= real_startpfn) {
  6707	
  6708						/*
  6709						 * Push zone_movable_pfn to the end so
  6710						 * that if we have to rebalance
  6711						 * kernelcore across nodes, we will
  6712						 * not double account here
  6713						 */
  6714						zone_movable_pfn[nid] = end_pfn;
  6715						continue;
  6716					}
  6717					start_pfn = real_startpfn;
  6718				}
  6719	
  6720				/*
  6721				 * The usable PFN range for ZONE_MOVABLE is from
  6722				 * start_pfn->end_pfn. Calculate size_pages as the
  6723				 * number of pages used as kernelcore
  6724				 */
  6725				size_pages = end_pfn - start_pfn;
  6726				if (size_pages > kernelcore_remaining)
  6727					size_pages = kernelcore_remaining;
  6728				zone_movable_pfn[nid] = start_pfn + size_pages;
  6729	
  6730				/*
  6731				 * Some kernelcore has been met, update counts and
  6732				 * break if the kernelcore for this node has been
  6733				 * satisfied
  6734				 */
  6735				required_kernelcore -= min(required_kernelcore,
  6736									size_pages);
  6737				kernelcore_remaining -= size_pages;
  6738				if (!kernelcore_remaining)
  6739					break;
  6740			}
  6741		}
  6742	
  6743		/*
  6744		 * If there is still required_kernelcore, we do another pass with one
  6745		 * less node in the count. This will push zone_movable_pfn[nid] further
  6746		 * along on the nodes that still have memory until kernelcore is
  6747		 * satisfied
  6748		 */
  6749		usable_nodes--;
  6750		if (usable_nodes && required_kernelcore > usable_nodes)
  6751			goto restart;
  6752	
  6753	out2:
  6754		/* Align start of ZONE_MOVABLE on all nids to MAX_ORDER_NR_PAGES */
  6755		for (nid = 0; nid < MAX_NUMNODES; nid++)
  6756			zone_movable_pfn[nid] =
  6757				roundup(zone_movable_pfn[nid], MAX_ORDER_NR_PAGES);
  6758	
  6759	out:
  6760		/* restore the node_state */
  6761		node_states[N_MEMORY] = saved_node_state;
  6762	}
  6763	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
Dou Liyang July 12, 2018, 5:49 a.m. UTC | #3
Hi Baoquan,

At 07/11/2018 08:40 PM, Baoquan He wrote:
> Please try this v3 patch:
> >>From 9850d3de9c02e570dc7572069a9749a8add4c4c7 Mon Sep 17 00:00:00 2001
> From: Baoquan He <bhe@redhat.com>
> Date: Wed, 11 Jul 2018 20:31:51 +0800
> Subject: [PATCH v3] mm, page_alloc: find movable zone after kernel text
> 
> In find_zone_movable_pfns_for_nodes(), when try to find the starting
> PFN movable zone begins in each node, kernel text position is not
> considered. KASLR may put kernel after which movable zone begins.
> 
> Fix it by finding movable zone after kernel text on that node.
> 
> Signed-off-by: Baoquan He <bhe@redhat.com>


You fix this in the _zone_init side_. This may make the 'kernelcore=' or
'movablecore=' failed if the KASLR puts the kernel back the tail of the
last node, or more.

Due to we have fix the mirror memory in KASLR side, and Chao is trying
to fix the 'movable_node' in KASLR side. Have you had a chance to fix
this in the KASLR side.


> ---
>   mm/page_alloc.c | 20 +++++++++++++++-----
>   1 file changed, 15 insertions(+), 5 deletions(-)
> 
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 1521100..390eb35 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -6547,7 +6547,7 @@ static unsigned long __init early_calculate_totalpages(void)
>   static void __init find_zone_movable_pfns_for_nodes(void)
>   {
>   	int i, nid;
> -	unsigned long usable_startpfn;
> +	unsigned long usable_startpfn, real_startpfn;
>   	unsigned long kernelcore_node, kernelcore_remaining;
>   	/* save the state before borrow the nodemask */
>   	nodemask_t saved_node_state = node_states[N_MEMORY];
> @@ -6681,10 +6681,20 @@ static void __init find_zone_movable_pfns_for_nodes(void)
>   			if (start_pfn >= end_pfn)
>   				continue;
>   
> +			/*
> +			 * KASLR may put kernel near tail of node memory,
> +			 * start after kernel on that node to find PFN
> +			 * which zone begins.
> +			 */
> +			if (pfn_to_nid(PFN_UP(_etext)) == i)

Here, did you want to check the Node id? seems may be nid.

and

for_each_node_state(nid, N_MEMORY) {

         ... seems check here is more suitable.

	for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {

	}
}

Thanks,
	dou

> +				real_startpfn = max(usable_startpfn,
> +						PFN_UP(_etext))
> +			else
> +				real_startpfn = usable_startpfn;
>   			/* Account for what is only usable for kernelcore */
> -			if (start_pfn < usable_startpfn) {
> +			if (start_pfn < real_startpfn) {
>   				unsigned long kernel_pages;
> -				kernel_pages = min(end_pfn, usable_startpfn)
> +				kernel_pages = min(end_pfn, real_startpfn)
>   								- start_pfn;
>   
>   				kernelcore_remaining -= min(kernel_pages,
> @@ -6693,7 +6703,7 @@ static void __init find_zone_movable_pfns_for_nodes(void)
>   							required_kernelcore);
>   
>   				/* Continue if range is now fully accounted */
> -				if (end_pfn <= usable_startpfn) {
> +				if (end_pfn <= real_startpfn) {
>   
>   					/*
>   					 * Push zone_movable_pfn to the end so
> @@ -6704,7 +6714,7 @@ static void __init find_zone_movable_pfns_for_nodes(void)
>   					zone_movable_pfn[nid] = end_pfn;
>   					continue;
>   				}
> -				start_pfn = usable_startpfn;
> +				start_pfn = real_startpfn;
>   			}
>   
>   			/*
>
Chao Fan July 12, 2018, 6:01 a.m. UTC | #4
On Thu, Jul 12, 2018 at 01:49:49PM +0800, Dou Liyang wrote:
>Hi Baoquan,
>
>At 07/11/2018 08:40 PM, Baoquan He wrote:
>> Please try this v3 patch:
>> >>From 9850d3de9c02e570dc7572069a9749a8add4c4c7 Mon Sep 17 00:00:00 2001
>> From: Baoquan He <bhe@redhat.com>
>> Date: Wed, 11 Jul 2018 20:31:51 +0800
>> Subject: [PATCH v3] mm, page_alloc: find movable zone after kernel text
>> 
>> In find_zone_movable_pfns_for_nodes(), when try to find the starting
>> PFN movable zone begins in each node, kernel text position is not
>> considered. KASLR may put kernel after which movable zone begins.
>> 
>> Fix it by finding movable zone after kernel text on that node.
>> 
>> Signed-off-by: Baoquan He <bhe@redhat.com>
>
>
>You fix this in the _zone_init side_. This may make the 'kernelcore=' or
>'movablecore=' failed if the KASLR puts the kernel back the tail of the
>last node, or more.

I think it may not fail.
There is a 'restart' to do another pass.

>
>Due to we have fix the mirror memory in KASLR side, and Chao is trying
>to fix the 'movable_node' in KASLR side. Have you had a chance to fix
>this in the KASLR side.
>

I think it's better to fix here, but not KASLR side.
Cause much more code will be change if doing it in KASLR side.
Since we didn't parse 'kernelcore' in compressed code, and you can see
the distribution of ZONE_MOVABLE need so much code, so we do not need
to do so much job in KASLR side. But here, several lines will be OK.

Thanks,
Chao Fan

>
>> ---
>>   mm/page_alloc.c | 20 +++++++++++++++-----
>>   1 file changed, 15 insertions(+), 5 deletions(-)
>> 
>> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
>> index 1521100..390eb35 100644
>> --- a/mm/page_alloc.c
>> +++ b/mm/page_alloc.c
>> @@ -6547,7 +6547,7 @@ static unsigned long __init early_calculate_totalpages(void)
>>   static void __init find_zone_movable_pfns_for_nodes(void)
>>   {
>>   	int i, nid;
>> -	unsigned long usable_startpfn;
>> +	unsigned long usable_startpfn, real_startpfn;
>>   	unsigned long kernelcore_node, kernelcore_remaining;
>>   	/* save the state before borrow the nodemask */
>>   	nodemask_t saved_node_state = node_states[N_MEMORY];
>> @@ -6681,10 +6681,20 @@ static void __init find_zone_movable_pfns_for_nodes(void)
>>   			if (start_pfn >= end_pfn)
>>   				continue;
>> +			/*
>> +			 * KASLR may put kernel near tail of node memory,
>> +			 * start after kernel on that node to find PFN
>> +			 * which zone begins.
>> +			 */
>> +			if (pfn_to_nid(PFN_UP(_etext)) == i)
>
>Here, did you want to check the Node id? seems may be nid.
>
>and
>
>for_each_node_state(nid, N_MEMORY) {
>
>        ... seems check here is more suitable.
>
>	for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
>
>	}
>}
>
>Thanks,
>	dou
>
>> +				real_startpfn = max(usable_startpfn,
>> +						PFN_UP(_etext))
>> +			else
>> +				real_startpfn = usable_startpfn;
>>   			/* Account for what is only usable for kernelcore */
>> -			if (start_pfn < usable_startpfn) {
>> +			if (start_pfn < real_startpfn) {
>>   				unsigned long kernel_pages;
>> -				kernel_pages = min(end_pfn, usable_startpfn)
>> +				kernel_pages = min(end_pfn, real_startpfn)
>>   								- start_pfn;
>>   				kernelcore_remaining -= min(kernel_pages,
>> @@ -6693,7 +6703,7 @@ static void __init find_zone_movable_pfns_for_nodes(void)
>>   							required_kernelcore);
>>   				/* Continue if range is now fully accounted */
>> -				if (end_pfn <= usable_startpfn) {
>> +				if (end_pfn <= real_startpfn) {
>>   					/*
>>   					 * Push zone_movable_pfn to the end so
>> @@ -6704,7 +6714,7 @@ static void __init find_zone_movable_pfns_for_nodes(void)
>>   					zone_movable_pfn[nid] = end_pfn;
>>   					continue;
>>   				}
>> -				start_pfn = usable_startpfn;
>> +				start_pfn = real_startpfn;
>>   			}
>>   			/*
>>
Michal Hocko July 12, 2018, 12:32 p.m. UTC | #5
On Thu 12-07-18 14:01:15, Chao Fan wrote:
> On Thu, Jul 12, 2018 at 01:49:49PM +0800, Dou Liyang wrote:
> >Hi Baoquan,
> >
> >At 07/11/2018 08:40 PM, Baoquan He wrote:
> >> Please try this v3 patch:
> >> >>From 9850d3de9c02e570dc7572069a9749a8add4c4c7 Mon Sep 17 00:00:00 2001
> >> From: Baoquan He <bhe@redhat.com>
> >> Date: Wed, 11 Jul 2018 20:31:51 +0800
> >> Subject: [PATCH v3] mm, page_alloc: find movable zone after kernel text
> >> 
> >> In find_zone_movable_pfns_for_nodes(), when try to find the starting
> >> PFN movable zone begins in each node, kernel text position is not
> >> considered. KASLR may put kernel after which movable zone begins.
> >> 
> >> Fix it by finding movable zone after kernel text on that node.
> >> 
> >> Signed-off-by: Baoquan He <bhe@redhat.com>
> >
> >
> >You fix this in the _zone_init side_. This may make the 'kernelcore=' or
> >'movablecore=' failed if the KASLR puts the kernel back the tail of the
> >last node, or more.
> 
> I think it may not fail.
> There is a 'restart' to do another pass.
> 
> >
> >Due to we have fix the mirror memory in KASLR side, and Chao is trying
> >to fix the 'movable_node' in KASLR side. Have you had a chance to fix
> >this in the KASLR side.
> >
> 
> I think it's better to fix here, but not KASLR side.
> Cause much more code will be change if doing it in KASLR side.
> Since we didn't parse 'kernelcore' in compressed code, and you can see
> the distribution of ZONE_MOVABLE need so much code, so we do not need
> to do so much job in KASLR side. But here, several lines will be OK.

I am not able to find the beginning of the email thread right now. Could
you summarize what is the actual problem please?
diff mbox

Patch

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 1521100..390eb35 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6547,7 +6547,7 @@  static unsigned long __init early_calculate_totalpages(void)
 static void __init find_zone_movable_pfns_for_nodes(void)
 {
 	int i, nid;
-	unsigned long usable_startpfn;
+	unsigned long usable_startpfn, real_startpfn;
 	unsigned long kernelcore_node, kernelcore_remaining;
 	/* save the state before borrow the nodemask */
 	nodemask_t saved_node_state = node_states[N_MEMORY];
@@ -6681,10 +6681,20 @@  static void __init find_zone_movable_pfns_for_nodes(void)
 			if (start_pfn >= end_pfn)
 				continue;
 
+			/*
+			 * KASLR may put kernel near tail of node memory,
+			 * start after kernel on that node to find PFN
+			 * which zone begins.
+			 */
+			if (pfn_to_nid(PFN_UP(_etext)) == i)
+				real_startpfn = max(usable_startpfn,
+						PFN_UP(_etext))
+			else
+				real_startpfn = usable_startpfn;
 			/* Account for what is only usable for kernelcore */
-			if (start_pfn < usable_startpfn) {
+			if (start_pfn < real_startpfn) {
 				unsigned long kernel_pages;
-				kernel_pages = min(end_pfn, usable_startpfn)
+				kernel_pages = min(end_pfn, real_startpfn)
 								- start_pfn;
 
 				kernelcore_remaining -= min(kernel_pages,
@@ -6693,7 +6703,7 @@  static void __init find_zone_movable_pfns_for_nodes(void)
 							required_kernelcore);
 
 				/* Continue if range is now fully accounted */
-				if (end_pfn <= usable_startpfn) {
+				if (end_pfn <= real_startpfn) {
 
 					/*
 					 * Push zone_movable_pfn to the end so
@@ -6704,7 +6714,7 @@  static void __init find_zone_movable_pfns_for_nodes(void)
 					zone_movable_pfn[nid] = end_pfn;
 					continue;
 				}
-				start_pfn = usable_startpfn;
+				start_pfn = real_startpfn;
 			}
 
 			/*