diff mbox series

[v3,13/14] mm: Unpoison pcpu chunks with base address tag

Message ID 61033ef5b70277039ceeb8f6173e8b3fbc271c08.1743772053.git.maciej.wieczor-retman@intel.com (mailing list archive)
State New
Headers show
Series [v3,01/14] kasan: sw_tags: Use arithmetic shift for shadow computation | expand

Commit Message

Maciej Wieczor-Retman April 4, 2025, 1:14 p.m. UTC
The problem presented here is related to NUMA systems and tag-based
KASAN mode. Getting to it can be explained in the following points:

	1. A new chunk is created with pcpu_create_chunk() and
	   vm_structs are allocated. On systems with one NUMA node only
	   one is allocated, but with more NUMA nodes at least a second
	   one will be allocated too.

	2. chunk->base_addr is assigned the modified value of
	   vms[0]->addr and thus inherits the tag of this allocated
	   structure.

	3. In pcpu_alloc() for each possible cpu pcpu_chunk_addr() is
	   executed which calculates per cpu pointers that correspond to
	   the vms structure addresses. The calculations are based on
	   adding an offset from a table to chunk->base_addr.

Here the problem presents itself since for addresses based on vms[1] and
up, the tag will be different than the ones based on vms[0] (base_addr).
The tag mismatch happens and an error is reported.

Unpoison all the vms[]->addr with the same tag to resolve the mismatch.

Signed-off-by: Maciej Wieczor-Retman <maciej.wieczor-retman@intel.com>
---
Changelog v3:
- Remove last version of this patch that just resets the tag on
  base_addr and add this patch that unpoisons all areas with the same
  tag instead.

 include/linux/kasan.h | 10 ++++++++++
 mm/kasan/shadow.c     | 11 +++++++++++
 mm/vmalloc.c          |  3 +--
 3 files changed, 22 insertions(+), 2 deletions(-)

Comments

Dave Hansen April 4, 2025, 6:08 p.m. UTC | #1
On 4/4/25 06:14, Maciej Wieczor-Retman wrote:
> The problem presented here is related to NUMA systems and tag-based
> KASAN mode. Getting to it can be explained in the following points:
> 
> 	1. A new chunk is created with pcpu_create_chunk() and
> 	   vm_structs are allocated. On systems with one NUMA node only
> 	   one is allocated, but with more NUMA nodes at least a second
> 	   one will be allocated too.
> 
> 	2. chunk->base_addr is assigned the modified value of
> 	   vms[0]->addr and thus inherits the tag of this allocated
> 	   structure.
> 
> 	3. In pcpu_alloc() for each possible cpu pcpu_chunk_addr() is
> 	   executed which calculates per cpu pointers that correspond to
> 	   the vms structure addresses. The calculations are based on
> 	   adding an offset from a table to chunk->base_addr.
> 
> Here the problem presents itself since for addresses based on vms[1] and
> up, the tag will be different than the ones based on vms[0] (base_addr).
> The tag mismatch happens and an error is reported.
> 
> Unpoison all the vms[]->addr with the same tag to resolve the mismatch.

I think there's a bit too much superfluous information in there. For
instance, it's not important to talk about how or why there can be more
than one chunk, just say there _can_ be more than one.

	1. There can be more than one chunk
	2. The chunks are virtually contiguous
	3. Since they are virtually contiguous, the chunks are all
	   addressed from a single base address
	4. The base address has a tag
	5. The base address points at the first chunk and thus inherits
	   the tag of the first chunk
	6. The subsequent chunks will be accessed with the tag from the
	   first chunk
	7. Thus, the subsequent chunks need to have their tag set to
	   match that of the first chunk.

Right?

> diff --git a/include/linux/kasan.h b/include/linux/kasan.h
> index 54481f8c30c5..bd033b2ba383 100644
> --- a/include/linux/kasan.h
> +++ b/include/linux/kasan.h
> @@ -613,6 +613,13 @@ static __always_inline void kasan_poison_vmalloc(const void *start,
>  		__kasan_poison_vmalloc(start, size);
>  }
>  
> +void __kasan_unpoison_vmap_areas(struct vm_struct **vms, int nr_vms);
> +static __always_inline void kasan_unpoison_vmap_areas(struct vm_struct **vms, int nr_vms)
> +{
> +	if (kasan_enabled())
> +		__kasan_unpoison_vmap_areas(vms, nr_vms);
> +}
> +
>  #else /* CONFIG_KASAN_VMALLOC */
>  
>  static inline void kasan_populate_early_vm_area_shadow(void *start,
> @@ -637,6 +644,9 @@ static inline void *kasan_unpoison_vmalloc(const void *start,
>  static inline void kasan_poison_vmalloc(const void *start, unsigned long size)
>  { }
>  
> +static inline void kasan_unpoison_vmap_areas(struct vm_struct **vms, int nr_vms)
> +{ }
> +
>  #endif /* CONFIG_KASAN_VMALLOC */
>  
>  #if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \
> diff --git a/mm/kasan/shadow.c b/mm/kasan/shadow.c
> index 88d1c9dcb507..9496f256bc0f 100644
> --- a/mm/kasan/shadow.c
> +++ b/mm/kasan/shadow.c
> @@ -582,6 +582,17 @@ void __kasan_poison_vmalloc(const void *start, unsigned long size)
>  	kasan_poison(start, size, KASAN_VMALLOC_INVALID, false);
>  }
>  
> +void __kasan_unpoison_vmap_areas(struct vm_struct **vms, int nr_vms)
> +{
> +	int area;
> +
> +	for (area = 0 ; area < nr_vms ; area++) {
> +		kasan_poison(vms[area]->addr, vms[area]->size,
> +			     arch_kasan_get_tag(vms[0]->addr), false);
> +		arch_kasan_set_tag(vms[area]->addr, arch_kasan_get_tag(vms[0]->addr));
> +	}
> +}

-ENOCOMMENTS

>  #else /* CONFIG_KASAN_VMALLOC */
>  
>  int kasan_alloc_module_shadow(void *addr, size_t size, gfp_t gfp_mask)
> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> index 61981ee1c9d2..fbd56bf8aeb2 100644
> --- a/mm/vmalloc.c
> +++ b/mm/vmalloc.c
> @@ -4783,8 +4783,7 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
>  	 * non-VM_ALLOC mappings, see __kasan_unpoison_vmalloc().
>  	 */
>  	for (area = 0; area < nr_vms; area++)
> -		vms[area]->addr = kasan_unpoison_vmalloc(vms[area]->addr,
> -				vms[area]->size, KASAN_VMALLOC_PROT_NORMAL);
> +		kasan_unpoison_vmap_areas(vms, nr_vms);
>  
>  	kfree(vas);
>  	return vms;

So, the right way to do this is refactor, first, then add your changes
after. This really wants to be two patches.
diff mbox series

Patch

diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 54481f8c30c5..bd033b2ba383 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -613,6 +613,13 @@  static __always_inline void kasan_poison_vmalloc(const void *start,
 		__kasan_poison_vmalloc(start, size);
 }
 
+void __kasan_unpoison_vmap_areas(struct vm_struct **vms, int nr_vms);
+static __always_inline void kasan_unpoison_vmap_areas(struct vm_struct **vms, int nr_vms)
+{
+	if (kasan_enabled())
+		__kasan_unpoison_vmap_areas(vms, nr_vms);
+}
+
 #else /* CONFIG_KASAN_VMALLOC */
 
 static inline void kasan_populate_early_vm_area_shadow(void *start,
@@ -637,6 +644,9 @@  static inline void *kasan_unpoison_vmalloc(const void *start,
 static inline void kasan_poison_vmalloc(const void *start, unsigned long size)
 { }
 
+static inline void kasan_unpoison_vmap_areas(struct vm_struct **vms, int nr_vms)
+{ }
+
 #endif /* CONFIG_KASAN_VMALLOC */
 
 #if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \
diff --git a/mm/kasan/shadow.c b/mm/kasan/shadow.c
index 88d1c9dcb507..9496f256bc0f 100644
--- a/mm/kasan/shadow.c
+++ b/mm/kasan/shadow.c
@@ -582,6 +582,17 @@  void __kasan_poison_vmalloc(const void *start, unsigned long size)
 	kasan_poison(start, size, KASAN_VMALLOC_INVALID, false);
 }
 
+void __kasan_unpoison_vmap_areas(struct vm_struct **vms, int nr_vms)
+{
+	int area;
+
+	for (area = 0 ; area < nr_vms ; area++) {
+		kasan_poison(vms[area]->addr, vms[area]->size,
+			     arch_kasan_get_tag(vms[0]->addr), false);
+		arch_kasan_set_tag(vms[area]->addr, arch_kasan_get_tag(vms[0]->addr));
+	}
+}
+
 #else /* CONFIG_KASAN_VMALLOC */
 
 int kasan_alloc_module_shadow(void *addr, size_t size, gfp_t gfp_mask)
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 61981ee1c9d2..fbd56bf8aeb2 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -4783,8 +4783,7 @@  struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
 	 * non-VM_ALLOC mappings, see __kasan_unpoison_vmalloc().
 	 */
 	for (area = 0; area < nr_vms; area++)
-		vms[area]->addr = kasan_unpoison_vmalloc(vms[area]->addr,
-				vms[area]->size, KASAN_VMALLOC_PROT_NORMAL);
+		kasan_unpoison_vmap_areas(vms, nr_vms);
 
 	kfree(vas);
 	return vms;