diff mbox series

[1/4] mm/vmalloc: allow arch-specific vmalloc_node overrides

Message ID 20240220203256.31153-2-mbland@motorola.com (mailing list archive)
State Not Applicable
Delegated to: BPF
Headers show
Series arm64: mm: support dynamic vmalloc/pmd configuration | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 fail Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 fail Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-11 fail Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test
bpf/vmtest-bpf-next-VM_Test-15 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-16 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-18 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-19 fail Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18 and -O2 optimization
bpf/vmtest-bpf-next-PR fail PR summary
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17 and -O2 optimization
netdev/tree_selection success Not a local patch
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-7 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-8 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-10 success Logs for x86_64-gcc / build-release

Commit Message

Maxwell Bland Feb. 20, 2024, 8:32 p.m. UTC
Present non-uniform use of __vmalloc_node and __vmalloc_node_range makes
enforcing appropriate code and data seperation untenable on certain
microarchitectures, as VMALLOC_START and VMALLOC_END are monolithic
while the use of the vmalloc interface is non-monolithic: in particular,
appropriate randomness in ASLR makes it such that code regions must fall
in some region between VMALLOC_START and VMALLOC_end, but this
necessitates that code pages are intermingled with data pages, meaning
code-specific protections, such as arm64's PXNTable, cannot be
performantly runtime enforced.

The solution to this problem allows architectures to override the
vmalloc wrapper functions by enforcing that the rest of the kernel does
not reimplement __vmalloc_node by using __vmalloc_node_range with the
same parameters as __vmalloc_node or provides a __weak tag to those
functions using __vmalloc_node_range with parameters repeating those of
__vmalloc_node.

Two benefits of this approach are (1) greater flexibility to each
architecture for handling of virtual memory while not compromising the
kernel's vmalloc logic and (2) more uniform use of the __vmalloc_node
interface, reserving the more specialized __vmalloc_node_range for more
specialized cases, such as kasan's shadow memory.

Signed-off-by: Maxwell Bland <mbland@motorola.com>
---
 arch/arm/kernel/irq.c               |  2 +-
 arch/arm64/include/asm/vmap_stack.h |  2 +-
 arch/arm64/kernel/efi.c             |  2 +-
 arch/powerpc/kernel/irq.c           |  2 +-
 arch/riscv/include/asm/irq_stack.h  |  2 +-
 arch/s390/hypfs/hypfs_diag.c        |  2 +-
 arch/s390/kernel/setup.c            |  6 ++---
 arch/s390/kernel/sthyi.c            |  2 +-
 include/linux/vmalloc.h             | 15 ++++++++++-
 kernel/bpf/syscall.c                |  4 +--
 kernel/fork.c                       |  4 +--
 kernel/scs.c                        |  3 +--
 lib/objpool.c                       |  2 +-
 lib/test_vmalloc.c                  |  6 ++---
 mm/util.c                           |  3 +--
 mm/vmalloc.c                        | 39 +++++++++++------------------
 16 files changed, 47 insertions(+), 49 deletions(-)

Comments

Christoph Hellwig Feb. 21, 2024, 5:43 a.m. UTC | #1
On Tue, Feb 20, 2024 at 02:32:53PM -0600, Maxwell Bland wrote:
> Present non-uniform use of __vmalloc_node and __vmalloc_node_range makes
> enforcing appropriate code and data seperation untenable on certain
> microarchitectures, as VMALLOC_START and VMALLOC_END are monolithic
> while the use of the vmalloc interface is non-monolithic: in particular,
> appropriate randomness in ASLR makes it such that code regions must fall
> in some region between VMALLOC_START and VMALLOC_end, but this
> necessitates that code pages are intermingled with data pages, meaning
> code-specific protections, such as arm64's PXNTable, cannot be
> performantly runtime enforced.

That's not actually true.  We have MODULE_START/END to separate them,
which is used by mips only for now.

> 
> The solution to this problem allows architectures to override the
> vmalloc wrapper functions by enforcing that the rest of the kernel does
> not reimplement __vmalloc_node by using __vmalloc_node_range with the
> same parameters as __vmalloc_node or provides a __weak tag to those
> functions using __vmalloc_node_range with parameters repeating those of
> __vmalloc_node.

I'm really not too happy about overriding the functions.  Especially
as the separation is a generally good idea and it would be good to
move everyone (or at least all modern architectures) over to a scheme
like this.
Christophe Leroy Feb. 21, 2024, 6:59 a.m. UTC | #2
Le 20/02/2024 à 21:32, Maxwell Bland a écrit :
> [Vous ne recevez pas souvent de courriers de mbland@motorola.com. Découvrez pourquoi ceci est important à https://aka.ms/LearnAboutSenderIdentification ]
> 
> Present non-uniform use of __vmalloc_node and __vmalloc_node_range makes
> enforcing appropriate code and data seperation untenable on certain
> microarchitectures, as VMALLOC_START and VMALLOC_END are monolithic
> while the use of the vmalloc interface is non-monolithic: in particular,
> appropriate randomness in ASLR makes it such that code regions must fall
> in some region between VMALLOC_START and VMALLOC_end, but this
> necessitates that code pages are intermingled with data pages, meaning
> code-specific protections, such as arm64's PXNTable, cannot be
> performantly runtime enforced.
> 
> The solution to this problem allows architectures to override the
> vmalloc wrapper functions by enforcing that the rest of the kernel does
> not reimplement __vmalloc_node by using __vmalloc_node_range with the
> same parameters as __vmalloc_node or provides a __weak tag to those
> functions using __vmalloc_node_range with parameters repeating those of
> __vmalloc_node.
> 
> Two benefits of this approach are (1) greater flexibility to each
> architecture for handling of virtual memory while not compromising the
> kernel's vmalloc logic and (2) more uniform use of the __vmalloc_node
> interface, reserving the more specialized __vmalloc_node_range for more
> specialized cases, such as kasan's shadow memory.

I'm not sure I understand the message. What I understand is that you 
allow architectures to override vmalloc_node().

In the code you add __weak for that. But you also add the flags to the 
parameters and I can't understand why when reading the above description.

Christophe
Christophe Leroy Feb. 21, 2024, 7:38 a.m. UTC | #3
Le 21/02/2024 à 06:43, Christoph Hellwig a écrit :
> On Tue, Feb 20, 2024 at 02:32:53PM -0600, Maxwell Bland wrote:
>> Present non-uniform use of __vmalloc_node and __vmalloc_node_range makes
>> enforcing appropriate code and data seperation untenable on certain
>> microarchitectures, as VMALLOC_START and VMALLOC_END are monolithic
>> while the use of the vmalloc interface is non-monolithic: in particular,
>> appropriate randomness in ASLR makes it such that code regions must fall
>> in some region between VMALLOC_START and VMALLOC_end, but this
>> necessitates that code pages are intermingled with data pages, meaning
>> code-specific protections, such as arm64's PXNTable, cannot be
>> performantly runtime enforced.
> 
> That's not actually true.  We have MODULE_START/END to separate them,
> which is used by mips only for now.

We have MODULES_VADDR and MODULES_END that are used by arm, arm64, 
loongarcg, powerpc, riscv, s390, sparc, x86_64

is_vmalloc_or_module_addr() is using MODULES_VADDR so I guess this 
function fails on mips ?

> 
>>
>> The solution to this problem allows architectures to override the
>> vmalloc wrapper functions by enforcing that the rest of the kernel does
>> not reimplement __vmalloc_node by using __vmalloc_node_range with the
>> same parameters as __vmalloc_node or provides a __weak tag to those
>> functions using __vmalloc_node_range with parameters repeating those of
>> __vmalloc_node.
> 
> I'm really not too happy about overriding the functions.  Especially
> as the separation is a generally good idea and it would be good to
> move everyone (or at least all modern architectures) over to a scheme
> like this.
Maxwell Bland Feb. 21, 2024, 5:19 p.m. UTC | #4
> On Wednesday, February 21, 2024 12:59 AM, Christophe Leroy wrote:
> 
> In the code you add __weak for that. But you also add the flags to the
> parameters and I can't understand why when reading the above description.

This  change was made to allow most kernel interfaces use vmalloc_node and
enable the overrides to work. It also reduces the number of kernel locations
which would need to be change if there was ever a change to the
vmalloc_node_range interface.

However, there is a pushback to overriding the vmalloc interface, so this change
will likely not show up in my final patch.

Regards,
Maxwell
kernel test robot Feb. 21, 2024, 6:25 p.m. UTC | #5
Hi Maxwell,

kernel test robot noticed the following build errors:

[auto build test ERROR on b401b621758e46812da61fa58a67c3fd8d91de0d]

url:    https://github.com/intel-lab-lkp/linux/commits/Maxwell-Bland/mm-vmalloc-allow-arch-specific-vmalloc_node-overrides/20240221-043458
base:   b401b621758e46812da61fa58a67c3fd8d91de0d
patch link:    https://lore.kernel.org/r/20240220203256.31153-2-mbland%40motorola.com
patch subject: [PATCH 1/4] mm/vmalloc: allow arch-specific vmalloc_node overrides
config: m68k-allnoconfig (https://download.01.org/0day-ci/archive/20240222/202402220229.5xZWdZBK-lkp@intel.com/config)
compiler: m68k-linux-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240222/202402220229.5xZWdZBK-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202402220229.5xZWdZBK-lkp@intel.com/

All errors (new ones prefixed by >>):

>> mm/nommu.c:160:7: error: conflicting types for '__vmalloc_node'; have 'void *(long unsigned int,  long unsigned int,  gfp_t,  int,  const void *)' {aka 'void *(long unsigned int,  long unsigned int,  unsigned int,  int,  const void *)'}
     160 | void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t gfp_mask,
         |       ^~~~~~~~~~~~~~
   In file included from include/asm-generic/io.h:994,
                    from arch/m68k/include/asm/io.h:14,
                    from arch/m68k/include/asm/pgtable_no.h:14,
                    from arch/m68k/include/asm/pgtable.h:6,
                    from include/linux/pgtable.h:6,
                    from include/linux/mm.h:29,
                    from mm/nommu.c:20:
   include/linux/vmalloc.h:152:7: note: previous declaration of '__vmalloc_node' with type 'void *(long unsigned int,  long unsigned int,  gfp_t,  long unsigned int,  int,  const void *)' {aka 'void *(long unsigned int,  long unsigned int,  unsigned int,  long unsigned int,  int,  const void *)'}
     152 | void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t gfp_mask,
         |       ^~~~~~~~~~~~~~


vim +160 mm/nommu.c

041de93ff86fc5 Christoph Hellwig 2020-06-01  159  
2b9059489c839e Christoph Hellwig 2020-06-01 @160  void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t gfp_mask,
2b9059489c839e Christoph Hellwig 2020-06-01  161  		int node, const void *caller)
a7c3e901a46ff5 Michal Hocko      2017-05-08  162  {
2b9059489c839e Christoph Hellwig 2020-06-01  163  	return __vmalloc(size, gfp_mask);
a7c3e901a46ff5 Michal Hocko      2017-05-08  164  }
a7c3e901a46ff5 Michal Hocko      2017-05-08  165
diff mbox series

Patch

diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c
index fe28fc1f759d..109f4f363621 100644
--- a/arch/arm/kernel/irq.c
+++ b/arch/arm/kernel/irq.c
@@ -61,7 +61,7 @@  static void __init init_irq_stacks(void)
 						       THREAD_SIZE_ORDER);
 		else
 			stack = __vmalloc_node(THREAD_SIZE, THREAD_ALIGN,
-					       THREADINFO_GFP, NUMA_NO_NODE,
+					       THREADINFO_GFP, 0, NUMA_NO_NODE,
 					       __builtin_return_address(0));
 
 		if (WARN_ON(!stack))
diff --git a/arch/arm64/include/asm/vmap_stack.h b/arch/arm64/include/asm/vmap_stack.h
index 20873099c035..57a7eaa720d5 100644
--- a/arch/arm64/include/asm/vmap_stack.h
+++ b/arch/arm64/include/asm/vmap_stack.h
@@ -21,7 +21,7 @@  static inline unsigned long *arch_alloc_vmap_stack(size_t stack_size, int node)
 
 	BUILD_BUG_ON(!IS_ENABLED(CONFIG_VMAP_STACK));
 
-	p = __vmalloc_node(stack_size, THREAD_ALIGN, THREADINFO_GFP, node,
+	p = __vmalloc_node(stack_size, THREAD_ALIGN, THREADINFO_GFP, 0, node,
 			__builtin_return_address(0));
 	return kasan_reset_tag(p);
 }
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index 0228001347be..48efa31a9161 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c
@@ -205,7 +205,7 @@  static int __init arm64_efi_rt_init(void)
 		return 0;
 
 	p = __vmalloc_node(THREAD_SIZE, THREAD_ALIGN, GFP_KERNEL,
-			   NUMA_NO_NODE, &&l);
+			   0, NUMA_NO_NODE, &&l);
 l:	if (!p) {
 		pr_warn("Failed to allocate EFI runtime stack\n");
 		clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 6f7d4edaa0bc..ceb7ea07ca28 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -308,7 +308,7 @@  DEFINE_INTERRUPT_HANDLER_ASYNC(do_IRQ)
 static void *__init alloc_vm_stack(void)
 {
 	return __vmalloc_node(THREAD_SIZE, THREAD_ALIGN, THREADINFO_GFP,
-			      NUMA_NO_NODE, (void *)_RET_IP_);
+			      0, NUMA_NO_NODE, (void *)_RET_IP_);
 }
 
 static void __init vmap_irqstack_init(void)
diff --git a/arch/riscv/include/asm/irq_stack.h b/arch/riscv/include/asm/irq_stack.h
index 6441ded3b0cf..d2410735bde0 100644
--- a/arch/riscv/include/asm/irq_stack.h
+++ b/arch/riscv/include/asm/irq_stack.h
@@ -24,7 +24,7 @@  static inline unsigned long *arch_alloc_vmap_stack(size_t stack_size, int node)
 {
 	void *p;
 
-	p = __vmalloc_node(stack_size, THREAD_ALIGN, THREADINFO_GFP, node,
+	p = __vmalloc_node(stack_size, THREAD_ALIGN, THREADINFO_GFP, 0, node,
 			__builtin_return_address(0));
 	return kasan_reset_tag(p);
 }
diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c
index 279b7bba4d43..16359d854288 100644
--- a/arch/s390/hypfs/hypfs_diag.c
+++ b/arch/s390/hypfs/hypfs_diag.c
@@ -70,7 +70,7 @@  void *diag204_get_buffer(enum diag204_format fmt, int *pages)
 			return ERR_PTR(-EOPNOTSUPP);
 	}
 	diag204_buf = __vmalloc_node(array_size(*pages, PAGE_SIZE),
-				     PAGE_SIZE, GFP_KERNEL, NUMA_NO_NODE,
+				     PAGE_SIZE, GFP_KERNEL, 0, NUMA_NO_NODE,
 				     __builtin_return_address(0));
 	if (!diag204_buf)
 		return ERR_PTR(-ENOMEM);
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index d1f3b56e7afc..2c25b4e9f20a 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -254,7 +254,7 @@  static void __init conmode_default(void)
 		cpcmd("QUERY TERM", query_buffer, 1024, NULL);
 		ptr = strstr(query_buffer, "CONMODE");
 		/*
-		 * Set the conmode to 3215 so that the device recognition 
+		 * Set the conmode to 3215 so that the device recognition
 		 * will set the cu_type of the console to 3215. If the
 		 * conmode is 3270 and we don't set it back then both
 		 * 3215 and the 3270 driver will try to access the console
@@ -314,7 +314,7 @@  static inline void setup_zfcpdump(void) {}
 
  /*
  * Reboot, halt and power_off stubs. They just call _machine_restart,
- * _machine_halt or _machine_power_off. 
+ * _machine_halt or _machine_power_off.
  */
 
 void machine_restart(char *command)
@@ -364,7 +364,7 @@  unsigned long stack_alloc(void)
 	void *ret;
 
 	ret = __vmalloc_node(THREAD_SIZE, THREAD_SIZE, THREADINFO_GFP,
-			     NUMA_NO_NODE, __builtin_return_address(0));
+			     0, NUMA_NO_NODE, __builtin_return_address(0));
 	kmemleak_not_leak(ret);
 	return (unsigned long)ret;
 #else
diff --git a/arch/s390/kernel/sthyi.c b/arch/s390/kernel/sthyi.c
index 30bb20461db4..5bf239bcdae9 100644
--- a/arch/s390/kernel/sthyi.c
+++ b/arch/s390/kernel/sthyi.c
@@ -318,7 +318,7 @@  static void fill_diag(struct sthyi_sctns *sctns)
 		return;
 
 	diag204_buf = __vmalloc_node(array_size(pages, PAGE_SIZE),
-				     PAGE_SIZE, GFP_KERNEL, NUMA_NO_NODE,
+				     PAGE_SIZE, GFP_KERNEL, 0, NUMA_NO_NODE,
 				     __builtin_return_address(0));
 	if (!diag204_buf)
 		return;
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index c720be70c8dd..f13bd711ad7d 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -150,7 +150,8 @@  extern void *__vmalloc_node_range(unsigned long size, unsigned long align,
 			pgprot_t prot, unsigned long vm_flags, int node,
 			const void *caller) __alloc_size(1);
 void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t gfp_mask,
-		int node, const void *caller) __alloc_size(1);
+		unsigned long vm_flags, int node, const void *caller)
+		__alloc_size(1);
 void *vmalloc_huge(unsigned long size, gfp_t gfp_mask) __alloc_size(1);
 
 extern void *__vmalloc_array(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2);
@@ -295,4 +296,16 @@  bool vmalloc_dump_obj(void *object);
 static inline bool vmalloc_dump_obj(void *object) { return false; }
 #endif
 
+#if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32)
+#define GFP_VMALLOC32 (GFP_DMA32 | GFP_KERNEL)
+#elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA)
+#define GFP_VMALLOC32 (GFP_DMA | GFP_KERNEL)
+#else
+/*
+ * 64b systems should always have either DMA or DMA32 zones. For others
+ * GFP_DMA32 should do the right thing and use the normal zone.
+ */
+#define GFP_VMALLOC32 (GFP_DMA32 | GFP_KERNEL)
+#endif
+
 #endif /* _LINUX_VMALLOC_H */
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index a1f18681721c..79c11307ff40 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -303,8 +303,8 @@  static void *__bpf_map_area_alloc(u64 size, int numa_node, bool mmapable)
 			return area;
 	}
 
-	return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END,
-			gfp | GFP_KERNEL | __GFP_RETRY_MAYFAIL, PAGE_KERNEL,
+	return __vmalloc_node(size, align,
+			gfp | GFP_KERNEL | __GFP_RETRY_MAYFAIL,
 			flags, numa_node, __builtin_return_address(0));
 }
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 0d944e92a43f..800bb1c76000 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -304,10 +304,8 @@  static int alloc_thread_stack_node(struct task_struct *tsk, int node)
 	 * so memcg accounting is performed manually on assigning/releasing
 	 * stacks to tasks. Drop __GFP_ACCOUNT.
 	 */
-	stack = __vmalloc_node_range(THREAD_SIZE, THREAD_ALIGN,
-				     VMALLOC_START, VMALLOC_END,
+	stack = __vmalloc_node(THREAD_SIZE, THREAD_ALIGN,
 				     THREADINFO_GFP & ~__GFP_ACCOUNT,
-				     PAGE_KERNEL,
 				     0, node, __builtin_return_address(0));
 	if (!stack)
 		return -ENOMEM;
diff --git a/kernel/scs.c b/kernel/scs.c
index d7809affe740..5b89fb08a392 100644
--- a/kernel/scs.c
+++ b/kernel/scs.c
@@ -43,8 +43,7 @@  static void *__scs_alloc(int node)
 		}
 	}
 
-	s = __vmalloc_node_range(SCS_SIZE, 1, VMALLOC_START, VMALLOC_END,
-				    GFP_SCS, PAGE_KERNEL, 0, node,
+	s = __vmalloc_node(SCS_SIZE, 1, GFP_SCS, 0, node,
 				    __builtin_return_address(0));
 
 out:
diff --git a/lib/objpool.c b/lib/objpool.c
index cfdc02420884..f0acd421a652 100644
--- a/lib/objpool.c
+++ b/lib/objpool.c
@@ -80,7 +80,7 @@  objpool_init_percpu_slots(struct objpool_head *pool, int nr_objs,
 			slot = kmalloc_node(size, pool->gfp, cpu_to_node(i));
 		else
 			slot = __vmalloc_node(size, sizeof(void *), pool->gfp,
-				cpu_to_node(i), __builtin_return_address(0));
+				0, cpu_to_node(i), __builtin_return_address(0));
 		if (!slot)
 			return -ENOMEM;
 		memset(slot, 0, size);
diff --git a/lib/test_vmalloc.c b/lib/test_vmalloc.c
index 3718d9886407..6bde73f892f9 100644
--- a/lib/test_vmalloc.c
+++ b/lib/test_vmalloc.c
@@ -97,7 +97,7 @@  static int random_size_align_alloc_test(void)
 		size = ((rnd % 10) + 1) * PAGE_SIZE;
 
 		ptr = __vmalloc_node(size, align, GFP_KERNEL | __GFP_ZERO, 0,
-				__builtin_return_address(0));
+				0, __builtin_return_address(0));
 		if (!ptr)
 			return -1;
 
@@ -120,7 +120,7 @@  static int align_shift_alloc_test(void)
 		align = ((unsigned long) 1) << i;
 
 		ptr = __vmalloc_node(PAGE_SIZE, align, GFP_KERNEL|__GFP_ZERO, 0,
-				__builtin_return_address(0));
+				0, __builtin_return_address(0));
 		if (!ptr)
 			return -1;
 
@@ -138,7 +138,7 @@  static int fix_align_alloc_test(void)
 	for (i = 0; i < test_loop_count; i++) {
 		ptr = __vmalloc_node(5 * PAGE_SIZE, THREAD_ALIGN << 1,
 				GFP_KERNEL | __GFP_ZERO, 0,
-				__builtin_return_address(0));
+				0, __builtin_return_address(0));
 		if (!ptr)
 			return -1;
 
diff --git a/mm/util.c b/mm/util.c
index 5a6a9802583b..c6b7111215e2 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -639,8 +639,7 @@  void *kvmalloc_node(size_t size, gfp_t flags, int node)
 	 * about the resulting pointer, and cannot play
 	 * protection games.
 	 */
-	return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
-			flags, PAGE_KERNEL, VM_ALLOW_HUGE_VMAP,
+	return __vmalloc_node(size, 1, flags, VM_ALLOW_HUGE_VMAP,
 			node, __builtin_return_address(0));
 }
 EXPORT_SYMBOL(kvmalloc_node);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index d12a17fc0c17..18ece28e79d3 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -3119,7 +3119,7 @@  static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 
 	/* Please note that the recursion is strictly bounded. */
 	if (array_size > PAGE_SIZE) {
-		area->pages = __vmalloc_node(array_size, 1, nested_gfp, node,
+		area->pages = __vmalloc_node(array_size, 1, nested_gfp, 0, node,
 					area->caller);
 	} else {
 		area->pages = kmalloc_node(array_size, nested_gfp, node);
@@ -3379,11 +3379,12 @@  void *__vmalloc_node_range(unsigned long size, unsigned long align,
  *
  * Return: pointer to the allocated memory or %NULL on error
  */
-void *__vmalloc_node(unsigned long size, unsigned long align,
-			    gfp_t gfp_mask, int node, const void *caller)
+__weak void *__vmalloc_node(unsigned long size, unsigned long align,
+			    gfp_t gfp_mask, unsigned long vm_flags, int node,
+			    const void *caller)
 {
 	return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END,
-				gfp_mask, PAGE_KERNEL, 0, node, caller);
+				gfp_mask, PAGE_KERNEL, vm_flags, node, caller);
 }
 /*
  * This is only for performance analysis of vmalloc and stress purpose.
@@ -3396,7 +3397,7 @@  EXPORT_SYMBOL_GPL(__vmalloc_node);
 
 void *__vmalloc(unsigned long size, gfp_t gfp_mask)
 {
-	return __vmalloc_node(size, 1, gfp_mask, NUMA_NO_NODE,
+	return __vmalloc_node(size, 1, gfp_mask, 0, NUMA_NO_NODE,
 				__builtin_return_address(0));
 }
 EXPORT_SYMBOL(__vmalloc);
@@ -3415,7 +3416,7 @@  EXPORT_SYMBOL(__vmalloc);
  */
 void *vmalloc(unsigned long size)
 {
-	return __vmalloc_node(size, 1, GFP_KERNEL, NUMA_NO_NODE,
+	return __vmalloc_node(size, 1, GFP_KERNEL, 0, NUMA_NO_NODE,
 				__builtin_return_address(0));
 }
 EXPORT_SYMBOL(vmalloc);
@@ -3432,7 +3433,7 @@  EXPORT_SYMBOL(vmalloc);
  *
  * Return: pointer to the allocated memory or %NULL on error
  */
-void *vmalloc_huge(unsigned long size, gfp_t gfp_mask)
+__weak void *vmalloc_huge(unsigned long size, gfp_t gfp_mask)
 {
 	return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
 				    gfp_mask, PAGE_KERNEL, VM_ALLOW_HUGE_VMAP,
@@ -3455,7 +3456,7 @@  EXPORT_SYMBOL_GPL(vmalloc_huge);
  */
 void *vzalloc(unsigned long size)
 {
-	return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_ZERO, NUMA_NO_NODE,
+	return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_ZERO, 0, NUMA_NO_NODE,
 				__builtin_return_address(0));
 }
 EXPORT_SYMBOL(vzalloc);
@@ -3469,7 +3470,7 @@  EXPORT_SYMBOL(vzalloc);
  *
  * Return: pointer to the allocated memory or %NULL on error
  */
-void *vmalloc_user(unsigned long size)
+__weak void *vmalloc_user(unsigned long size)
 {
 	return __vmalloc_node_range(size, SHMLBA,  VMALLOC_START, VMALLOC_END,
 				    GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL,
@@ -3493,7 +3494,7 @@  EXPORT_SYMBOL(vmalloc_user);
  */
 void *vmalloc_node(unsigned long size, int node)
 {
-	return __vmalloc_node(size, 1, GFP_KERNEL, node,
+	return __vmalloc_node(size, 1, GFP_KERNEL, 0, node,
 			__builtin_return_address(0));
 }
 EXPORT_SYMBOL(vmalloc_node);
@@ -3511,23 +3512,11 @@  EXPORT_SYMBOL(vmalloc_node);
  */
 void *vzalloc_node(unsigned long size, int node)
 {
-	return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_ZERO, node,
+	return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_ZERO, 0, node,
 				__builtin_return_address(0));
 }
 EXPORT_SYMBOL(vzalloc_node);
 
-#if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32)
-#define GFP_VMALLOC32 (GFP_DMA32 | GFP_KERNEL)
-#elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA)
-#define GFP_VMALLOC32 (GFP_DMA | GFP_KERNEL)
-#else
-/*
- * 64b systems should always have either DMA or DMA32 zones. For others
- * GFP_DMA32 should do the right thing and use the normal zone.
- */
-#define GFP_VMALLOC32 (GFP_DMA32 | GFP_KERNEL)
-#endif
-
 /**
  * vmalloc_32 - allocate virtually contiguous memory (32bit addressable)
  * @size:	allocation size
@@ -3539,7 +3528,7 @@  EXPORT_SYMBOL(vzalloc_node);
  */
 void *vmalloc_32(unsigned long size)
 {
-	return __vmalloc_node(size, 1, GFP_VMALLOC32, NUMA_NO_NODE,
+	return __vmalloc_node(size, 1, GFP_VMALLOC32, 0, NUMA_NO_NODE,
 			__builtin_return_address(0));
 }
 EXPORT_SYMBOL(vmalloc_32);
@@ -3553,7 +3542,7 @@  EXPORT_SYMBOL(vmalloc_32);
  *
  * Return: pointer to the allocated memory or %NULL on error
  */
-void *vmalloc_32_user(unsigned long size)
+__weak void *vmalloc_32_user(unsigned long size)
 {
 	return __vmalloc_node_range(size, SHMLBA,  VMALLOC_START, VMALLOC_END,
 				    GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL,