diff mbox series

[kvm-unit-tests,v2,2/7] lib/vmalloc: vmalloc support for handling allocation metadata

Message ID 20201002154420.292134-3-imbrenda@linux.ibm.com (mailing list archive)
State New, archived
Headers show
Series Rewrite the allocators | expand

Commit Message

Claudio Imbrenda Oct. 2, 2020, 3:44 p.m. UTC
Add allocation metadata handling to vmalloc.

In upcoming patches, allocation metadata will have to be handled
directly bt the lower level allocators, and will not be handled by the
common wrapper.

In this patch, the number of allocated pages plus a magic value are
written immediately before the returned pointer. This means that multi
page allocations will allocate one extra page (which is not worse than
what the current allocator does).

For small allocations there is an optimization: the returned address is
intentionally not page-aligned. This signals that the allocation
spanned one page only. In this case the metadata is only the magic
value, and it is also saved immediately before the returned pointer.
Since the pointer does not point to the begininng of the page, there is
always space in the same page for the magic value.

Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
---
 lib/vmalloc.c | 105 +++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 95 insertions(+), 10 deletions(-)

Comments

Andrew Jones Oct. 3, 2020, 8:46 a.m. UTC | #1
On Fri, Oct 02, 2020 at 05:44:15PM +0200, Claudio Imbrenda wrote:
> Add allocation metadata handling to vmalloc.
> 
> In upcoming patches, allocation metadata will have to be handled
> directly bt the lower level allocators, and will not be handled by the
> common wrapper.
> 
> In this patch, the number of allocated pages plus a magic value are
> written immediately before the returned pointer. This means that multi
> page allocations will allocate one extra page (which is not worse than
> what the current allocator does).
> 
> For small allocations there is an optimization: the returned address is
> intentionally not page-aligned. This signals that the allocation
> spanned one page only. In this case the metadata is only the magic
> value, and it is also saved immediately before the returned pointer.
> Since the pointer does not point to the begininng of the page, there is
> always space in the same page for the magic value.
> 
> Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
> ---
>  lib/vmalloc.c | 105 +++++++++++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 95 insertions(+), 10 deletions(-)
> 
> diff --git a/lib/vmalloc.c b/lib/vmalloc.c
> index e0c7b6b..2f25734 100644
> --- a/lib/vmalloc.c
> +++ b/lib/vmalloc.c
> @@ -15,6 +15,16 @@
>  #include <bitops.h>
>  #include "vmalloc.h"
>  
> +#define VM_MAGIC 0x7E57C0DE
> +
> +#define GET_METADATA(x) (((struct metadata *)(x)) - 1)
> +#define GET_MAGIC(x) (*((unsigned long *)(x) - 1))
> +
> +struct metadata {
> +	unsigned long npages;
> +	unsigned long magic;
> +};
> +
>  static struct spinlock lock;
>  static void *vfree_top = 0;
>  static void *page_root;
> @@ -25,8 +35,14 @@ static void *page_root;
>   *
>   * nr is the number of pages to allocate
>   * alignment_pages is the alignment of the allocation *in pages*
> + * metadata indicates whether an extra (unaligned) page needs to be allocated
> + * right before the main (aligned) allocation.
> + *
> + * The return value points to the first allocated virtual page, which will
> + * be the (potentially unaligned) metadata page if the metadata flag is
> + * specified.
>   */
> -void *alloc_vpages_aligned(ulong nr, unsigned int align_order)
> +static void *do_alloc_vpages(ulong nr, unsigned int align_order, bool metadata)
>  {
>  	uintptr_t ptr;
>  
> @@ -34,6 +50,8 @@ void *alloc_vpages_aligned(ulong nr, unsigned int align_order)
>  	ptr = (uintptr_t)vfree_top;
>  	ptr -= PAGE_SIZE * nr;
>  	ptr &= GENMASK_ULL(63, PAGE_SHIFT + align_order);
> +	if (metadata)
> +		ptr -= PAGE_SIZE;
>  	vfree_top = (void *)ptr;
>  	spin_unlock(&lock);
>  
> @@ -41,6 +59,11 @@ void *alloc_vpages_aligned(ulong nr, unsigned int align_order)
>  	return (void *)ptr;
>  }
>  
> +void *alloc_vpages_aligned(ulong nr, unsigned int align_order)
> +{
> +	return do_alloc_vpages(nr, align_order, false);
> +}
> +
>  void *alloc_vpages(ulong nr)
>  {
>  	return alloc_vpages_aligned(nr, 0);
> @@ -69,35 +92,97 @@ void *vmap(phys_addr_t phys, size_t size)
>  	return mem;
>  }
>  
> +/*
> + * Allocate one page, for an object with specified alignment.
> + * The resulting pointer will be aligned to the required alignment, but
> + * intentionally not page-aligned.
> + * The metadata for single pages allocation is just the magic value,
> + * which is placed right before the pointer, like for bigger allocations.
> + */
> +static void *vm_alloc_one_page(size_t alignment)
> +{
> +	void *p;
> +
> +	/* this guarantees that there will be space for the magic value */
> +	assert(alignment >= sizeof(uintptr_t));
> +	assert(alignment < PAGE_SIZE);
> +	p = alloc_vpage();
> +	install_page(page_root, virt_to_phys(alloc_page()), p);
> +	p = (void *)((uintptr_t)p + alignment);
> +	/* write the magic value right before the returned address */
> +	GET_MAGIC(p) = VM_MAGIC;
> +	return p;
> +}
> +
>  /*
>   * Allocate virtual memory, with the specified minimum alignment.
> + * If the allocation fits in one page, only one page is allocated. Otherwise
> + * enough pages are allocated for the object, plus one to keep metadata
> + * information about the allocation.
>   */
>  static void *vm_memalign(size_t alignment, size_t size)
>  {
> +	struct metadata *m;
>  	phys_addr_t pa;
> -	void *mem, *p;
> +	uintptr_t p;
> +	void *mem;
> +	size_t i;
>  
> +	if (!size)
> +		return NULL;
>  	assert(is_power_of_2(alignment));
>  
> +	if (alignment < sizeof(uintptr_t))
> +		alignment = sizeof(uintptr_t);
> +	/* it fits in one page, allocate only one page */
> +	if (alignment + size <= PAGE_SIZE)
> +		return vm_alloc_one_page(alignment);
>  	size = PAGE_ALIGN(size) / PAGE_SIZE;
>  	alignment = get_order(PAGE_ALIGN(alignment) / PAGE_SIZE);
> -	mem = p = alloc_vpages_aligned(size, alignment);
> -	while (size--) {
> +	mem = do_alloc_vpages(size, alignment, true);
> +	p = (uintptr_t)mem;
> +	/* skip the metadata page */
> +	mem = (void *)(p + PAGE_SIZE);
> +	/*
> +	 * time to actually allocate the physical pages to back our virtual
> +	 * allocation; note that we need to allocate one extra page (for the
> +	 * metadata), hence the <=
> +	 */
> +	for (i = 0; i <= size; i++, p += PAGE_SIZE) {
>  		pa = virt_to_phys(alloc_page());
>  		assert(pa);
> -		install_page(page_root, pa, p);
> -		p += PAGE_SIZE;
> +		install_page(page_root, pa, (void *)p);
>  	}
> +	m = GET_METADATA(mem);
> +	m->npages = size;
> +	m->magic = VM_MAGIC;
>  	return mem;
>  }
>  
>  static void vm_free(void *mem, size_t size)
>  {
> -	while (size) {
> -		free_page(phys_to_virt(virt_to_pte_phys(page_root, mem)));
> -		mem += PAGE_SIZE;
> -		size -= PAGE_SIZE;
> +	struct metadata *m;
> +	uintptr_t ptr, end;
> +
> +	/* the pointer is not page-aligned, it was a single-page allocation */
> +	if (!IS_ALIGNED((uintptr_t)mem, PAGE_SIZE)) {
> +		assert(GET_MAGIC(mem) == VM_MAGIC);
> +		ptr = virt_to_pte_phys(page_root, mem) & PAGE_MASK;
> +		free_page(phys_to_virt(ptr));
> +		return;
>  	}
> +
> +	/* the pointer is page-aligned, it was a multi-page allocation */
> +	m = GET_METADATA(mem);
> +	assert(m->magic == VM_MAGIC);
> +	assert(m->npages > 0);
> +	/* free all the pages including the metadata page */
> +	ptr = (uintptr_t)mem - PAGE_SIZE;
> +	end = ptr + m->npages * PAGE_SIZE;
> +	for ( ; ptr < end; ptr += PAGE_SIZE)
> +		free_page(phys_to_virt(virt_to_pte_phys(page_root, (void *)ptr)));
> +	/* free the last one separately to avoid overflow issues */
> +	free_page(phys_to_virt(virt_to_pte_phys(page_root, (void *)ptr)));

I don't get this. How is

 for (p = start; p < end; p += step)
   process(p);
 process(p)

different from

 for (p = start; p <= end; p += step)
   process(p);

To avoid overflow issues we should simple ensure start and end are
computed correctly. Also, I'd prefer 'end' point to the actual end,
not the last included page, e.g. start=0x1000, end=0x1fff. Then we
have

 start = get_start();
 assert(PAGE_ALIGN(start) == start);
 end = start + nr_pages * PAGE_SIZE - 1;
 assert(start < end);
 for (p = start; start < end; p += PAGE_SIZE)
   process(p);

Thanks,
drew


>  }
>  
>  static struct alloc_ops vmalloc_ops = {
> -- 
> 2.26.2
>
Claudio Imbrenda Oct. 5, 2020, 7 a.m. UTC | #2
On Sat, 3 Oct 2020 10:46:39 +0200
Andrew Jones <drjones@redhat.com> wrote:

[...]

> > +	/* the pointer is page-aligned, it was a multi-page
> > allocation */
> > +	m = GET_METADATA(mem);
> > +	assert(m->magic == VM_MAGIC);
> > +	assert(m->npages > 0);
> > +	/* free all the pages including the metadata page */
> > +	ptr = (uintptr_t)mem - PAGE_SIZE;
> > +	end = ptr + m->npages * PAGE_SIZE;
> > +	for ( ; ptr < end; ptr += PAGE_SIZE)
> > +		free_page(phys_to_virt(virt_to_pte_phys(page_root,
> > (void *)ptr)));
> > +	/* free the last one separately to avoid overflow issues */
> > +	free_page(phys_to_virt(virt_to_pte_phys(page_root, (void
> > *)ptr)));  
> 
> I don't get this. How is
> 
>  for (p = start; p < end; p += step)
>    process(p);
>  process(p)
> 
> different from
> 
>  for (p = start; p <= end; p += step)
>    process(p);

there was a reason at some point, I think the code evolved past it and
these lines stayed there as is

> To avoid overflow issues we should simple ensure start and end are
> computed correctly. Also, I'd prefer 'end' point to the actual end,
> not the last included page, e.g. start=0x1000, end=0x1fff. Then we
> have
> 
>  start = get_start();
>  assert(PAGE_ALIGN(start) == start);
>  end = start + nr_pages * PAGE_SIZE - 1;
>  assert(start < end);
>  for (p = start; start < end; p += PAGE_SIZE)
>    process(p);
> 
> Thanks,
> drew

yeah I'll definitely fix it
diff mbox series

Patch

diff --git a/lib/vmalloc.c b/lib/vmalloc.c
index e0c7b6b..2f25734 100644
--- a/lib/vmalloc.c
+++ b/lib/vmalloc.c
@@ -15,6 +15,16 @@ 
 #include <bitops.h>
 #include "vmalloc.h"
 
+#define VM_MAGIC 0x7E57C0DE
+
+#define GET_METADATA(x) (((struct metadata *)(x)) - 1)
+#define GET_MAGIC(x) (*((unsigned long *)(x) - 1))
+
+struct metadata {
+	unsigned long npages;
+	unsigned long magic;
+};
+
 static struct spinlock lock;
 static void *vfree_top = 0;
 static void *page_root;
@@ -25,8 +35,14 @@  static void *page_root;
  *
  * nr is the number of pages to allocate
  * alignment_pages is the alignment of the allocation *in pages*
+ * metadata indicates whether an extra (unaligned) page needs to be allocated
+ * right before the main (aligned) allocation.
+ *
+ * The return value points to the first allocated virtual page, which will
+ * be the (potentially unaligned) metadata page if the metadata flag is
+ * specified.
  */
-void *alloc_vpages_aligned(ulong nr, unsigned int align_order)
+static void *do_alloc_vpages(ulong nr, unsigned int align_order, bool metadata)
 {
 	uintptr_t ptr;
 
@@ -34,6 +50,8 @@  void *alloc_vpages_aligned(ulong nr, unsigned int align_order)
 	ptr = (uintptr_t)vfree_top;
 	ptr -= PAGE_SIZE * nr;
 	ptr &= GENMASK_ULL(63, PAGE_SHIFT + align_order);
+	if (metadata)
+		ptr -= PAGE_SIZE;
 	vfree_top = (void *)ptr;
 	spin_unlock(&lock);
 
@@ -41,6 +59,11 @@  void *alloc_vpages_aligned(ulong nr, unsigned int align_order)
 	return (void *)ptr;
 }
 
+void *alloc_vpages_aligned(ulong nr, unsigned int align_order)
+{
+	return do_alloc_vpages(nr, align_order, false);
+}
+
 void *alloc_vpages(ulong nr)
 {
 	return alloc_vpages_aligned(nr, 0);
@@ -69,35 +92,97 @@  void *vmap(phys_addr_t phys, size_t size)
 	return mem;
 }
 
+/*
+ * Allocate one page, for an object with specified alignment.
+ * The resulting pointer will be aligned to the required alignment, but
+ * intentionally not page-aligned.
+ * The metadata for single pages allocation is just the magic value,
+ * which is placed right before the pointer, like for bigger allocations.
+ */
+static void *vm_alloc_one_page(size_t alignment)
+{
+	void *p;
+
+	/* this guarantees that there will be space for the magic value */
+	assert(alignment >= sizeof(uintptr_t));
+	assert(alignment < PAGE_SIZE);
+	p = alloc_vpage();
+	install_page(page_root, virt_to_phys(alloc_page()), p);
+	p = (void *)((uintptr_t)p + alignment);
+	/* write the magic value right before the returned address */
+	GET_MAGIC(p) = VM_MAGIC;
+	return p;
+}
+
 /*
  * Allocate virtual memory, with the specified minimum alignment.
+ * If the allocation fits in one page, only one page is allocated. Otherwise
+ * enough pages are allocated for the object, plus one to keep metadata
+ * information about the allocation.
  */
 static void *vm_memalign(size_t alignment, size_t size)
 {
+	struct metadata *m;
 	phys_addr_t pa;
-	void *mem, *p;
+	uintptr_t p;
+	void *mem;
+	size_t i;
 
+	if (!size)
+		return NULL;
 	assert(is_power_of_2(alignment));
 
+	if (alignment < sizeof(uintptr_t))
+		alignment = sizeof(uintptr_t);
+	/* it fits in one page, allocate only one page */
+	if (alignment + size <= PAGE_SIZE)
+		return vm_alloc_one_page(alignment);
 	size = PAGE_ALIGN(size) / PAGE_SIZE;
 	alignment = get_order(PAGE_ALIGN(alignment) / PAGE_SIZE);
-	mem = p = alloc_vpages_aligned(size, alignment);
-	while (size--) {
+	mem = do_alloc_vpages(size, alignment, true);
+	p = (uintptr_t)mem;
+	/* skip the metadata page */
+	mem = (void *)(p + PAGE_SIZE);
+	/*
+	 * time to actually allocate the physical pages to back our virtual
+	 * allocation; note that we need to allocate one extra page (for the
+	 * metadata), hence the <=
+	 */
+	for (i = 0; i <= size; i++, p += PAGE_SIZE) {
 		pa = virt_to_phys(alloc_page());
 		assert(pa);
-		install_page(page_root, pa, p);
-		p += PAGE_SIZE;
+		install_page(page_root, pa, (void *)p);
 	}
+	m = GET_METADATA(mem);
+	m->npages = size;
+	m->magic = VM_MAGIC;
 	return mem;
 }
 
 static void vm_free(void *mem, size_t size)
 {
-	while (size) {
-		free_page(phys_to_virt(virt_to_pte_phys(page_root, mem)));
-		mem += PAGE_SIZE;
-		size -= PAGE_SIZE;
+	struct metadata *m;
+	uintptr_t ptr, end;
+
+	/* the pointer is not page-aligned, it was a single-page allocation */
+	if (!IS_ALIGNED((uintptr_t)mem, PAGE_SIZE)) {
+		assert(GET_MAGIC(mem) == VM_MAGIC);
+		ptr = virt_to_pte_phys(page_root, mem) & PAGE_MASK;
+		free_page(phys_to_virt(ptr));
+		return;
 	}
+
+	/* the pointer is page-aligned, it was a multi-page allocation */
+	m = GET_METADATA(mem);
+	assert(m->magic == VM_MAGIC);
+	assert(m->npages > 0);
+	/* free all the pages including the metadata page */
+	ptr = (uintptr_t)mem - PAGE_SIZE;
+	end = ptr + m->npages * PAGE_SIZE;
+	for ( ; ptr < end; ptr += PAGE_SIZE)
+		free_page(phys_to_virt(virt_to_pte_phys(page_root, (void *)ptr)));
+	/* free the last one separately to avoid overflow issues */
+	free_page(phys_to_virt(virt_to_pte_phys(page_root, (void *)ptr)));
 }
 
 static struct alloc_ops vmalloc_ops = {