diff mbox series

[RFC,v3,08/35] mm: cma: Introduce cma_alloc_range()

Message ID 20240125164256.4147-9-alexandru.elisei@arm.com (mailing list archive)
State New, archived
Headers show
Series Add support for arm64 MTE dynamic tag storage reuse | expand

Commit Message

Alexandru Elisei Jan. 25, 2024, 4:42 p.m. UTC
Today, cma_alloc() is used to allocate a contiguous memory region. The
function allows the caller to specify the number of pages to allocate, but
not the starting address. cma_alloc() will walk over the entire CMA region
trying to allocate the first available range of the specified size.

Introduce cma_alloc_range(), which makes CMA more versatile by allowing the
caller to specify a particular range in the CMA region, defined by the
start pfn and the size.

arm64 will make use of this function when tag storage management will be
implemented: cma_alloc_range() will be used to reserve the tag storage
associated with a tagged page.

Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
---

Changes since rfc v2:

* New patch.

 include/linux/cma.h        |  2 +
 include/trace/events/cma.h | 59 ++++++++++++++++++++++++++
 mm/cma.c                   | 86 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 147 insertions(+)

Comments

Anshuman Khandual Jan. 30, 2024, 5:20 a.m. UTC | #1
On 1/25/24 22:12, Alexandru Elisei wrote:
> Today, cma_alloc() is used to allocate a contiguous memory region. The
> function allows the caller to specify the number of pages to allocate, but
> not the starting address. cma_alloc() will walk over the entire CMA region
> trying to allocate the first available range of the specified size.
> 
> Introduce cma_alloc_range(), which makes CMA more versatile by allowing the
> caller to specify a particular range in the CMA region, defined by the
> start pfn and the size.
> 
> arm64 will make use of this function when tag storage management will be
> implemented: cma_alloc_range() will be used to reserve the tag storage
> associated with a tagged page.

Basically, you would like to pass on a preferred start address and the
allocation could just fail if a contig range is not available from such
a starting address ?

Then why not just change cma_alloc() to take a new argument 'start_pfn'.
Why create a new but almost similar allocator ?

But then I am wondering why this could not be done in the arm64 platform
code itself operating on a CMA area reserved just for tag storage. Unless
this new allocator has other usage beyond MTE, this could be implemented
in the platform itself.

> 
> Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
> ---
> 
> Changes since rfc v2:
> 
> * New patch.
> 
>  include/linux/cma.h        |  2 +
>  include/trace/events/cma.h | 59 ++++++++++++++++++++++++++
>  mm/cma.c                   | 86 ++++++++++++++++++++++++++++++++++++++
>  3 files changed, 147 insertions(+)
> 
> diff --git a/include/linux/cma.h b/include/linux/cma.h
> index 63873b93deaa..e32559da6942 100644
> --- a/include/linux/cma.h
> +++ b/include/linux/cma.h
> @@ -50,6 +50,8 @@ extern int cma_init_reserved_mem(phys_addr_t base, phys_addr_t size,
>  					struct cma **res_cma);
>  extern struct page *cma_alloc(struct cma *cma, unsigned long count, unsigned int align,
>  			      bool no_warn);
> +extern int cma_alloc_range(struct cma *cma, unsigned long start, unsigned long count,
> +			   unsigned tries, gfp_t gfp);
>  extern bool cma_pages_valid(struct cma *cma, const struct page *pages, unsigned long count);
>  extern bool cma_release(struct cma *cma, const struct page *pages, unsigned long count);
>  
> diff --git a/include/trace/events/cma.h b/include/trace/events/cma.h
> index 25103e67737c..a89af313a572 100644
> --- a/include/trace/events/cma.h
> +++ b/include/trace/events/cma.h
> @@ -36,6 +36,65 @@ TRACE_EVENT(cma_release,
>  		  __entry->count)
>  );
>  
> +TRACE_EVENT(cma_alloc_range_start,
> +
> +	TP_PROTO(const char *name, unsigned long start, unsigned long count,
> +		 unsigned tries),
> +
> +	TP_ARGS(name, start, count, tries),
> +
> +	TP_STRUCT__entry(
> +		__string(name, name)
> +		__field(unsigned long, start)
> +		__field(unsigned long, count)
> +		__field(unsigned, tries)
> +	),
> +
> +	TP_fast_assign(
> +		__assign_str(name, name);
> +		__entry->start = start;
> +		__entry->count = count;
> +		__entry->tries = tries;
> +	),
> +
> +	TP_printk("name=%s start=%lx count=%lu tries=%u",
> +		  __get_str(name),
> +		  __entry->start,
> +		  __entry->count,
> +		  __entry->tries)
> +);
> +
> +TRACE_EVENT(cma_alloc_range_finish,
> +
> +	TP_PROTO(const char *name, unsigned long start, unsigned long count,
> +		 unsigned attempts, int err),
> +
> +	TP_ARGS(name, start, count, attempts, err),
> +
> +	TP_STRUCT__entry(
> +		__string(name, name)
> +		__field(unsigned long, start)
> +		__field(unsigned long, count)
> +		__field(unsigned, attempts)
> +		__field(int, err)
> +	),
> +
> +	TP_fast_assign(
> +		__assign_str(name, name);
> +		__entry->start = start;
> +		__entry->count = count;
> +		__entry->attempts = attempts;
> +		__entry->err = err;
> +	),
> +
> +	TP_printk("name=%s start=%lx count=%lu attempts=%u err=%d",
> +		  __get_str(name),
> +		  __entry->start,
> +		  __entry->count,
> +		  __entry->attempts,
> +		  __entry->err)
> +);
> +
>  TRACE_EVENT(cma_alloc_start,
>  
>  	TP_PROTO(const char *name, unsigned long count, unsigned int align),
> diff --git a/mm/cma.c b/mm/cma.c
> index 543bb6b3be8e..4a0f68b9443b 100644
> --- a/mm/cma.c
> +++ b/mm/cma.c
> @@ -416,6 +416,92 @@ static void cma_debug_show_areas(struct cma *cma)
>  static inline void cma_debug_show_areas(struct cma *cma) { }
>  #endif
>  
> +/**
> + * cma_alloc_range() - allocate pages in a specific range
> + * @cma:   Contiguous memory region for which the allocation is performed.
> + * @start: Starting pfn of the allocation.
> + * @count: Requested number of pages
> + * @tries: Number of tries if the range is busy
> + * @no_warn: Avoid printing message about failed allocation
> + *
> + * This function allocates part of contiguous memory from a specific contiguous
> + * memory area, from the specified starting address. The 'start' pfn and the the
> + * 'count' number of pages must be aligned to the CMA bitmap order per bit.
> + */
> +int cma_alloc_range(struct cma *cma, unsigned long start, unsigned long count,
> +		    unsigned tries, gfp_t gfp)
> +{
> +	unsigned long bitmap_maxno, bitmap_no, bitmap_start, bitmap_count;
> +	unsigned long i = 0;
> +	struct page *page;
> +	int err = -EINVAL;
> +
> +	if (!cma || !cma->count || !cma->bitmap)
> +		goto out_stats;
> +
> +	trace_cma_alloc_range_start(cma->name, start, count, tries);
> +
> +	if (!count || start < cma->base_pfn ||
> +	    start + count > cma->base_pfn + cma->count)
> +		goto out_stats;
> +
> +	if (!IS_ALIGNED(start | count, 1 << cma->order_per_bit))
> +		goto out_stats;
> +
> +	bitmap_start = (start - cma->base_pfn) >> cma->order_per_bit;
> +	bitmap_maxno = cma_bitmap_maxno(cma);
> +	bitmap_count = cma_bitmap_pages_to_bits(cma, count);
> +
> +	spin_lock_irq(&cma->lock);
> +	bitmap_no = bitmap_find_next_zero_area(cma->bitmap, bitmap_maxno,
> +					       bitmap_start, bitmap_count, 0);
> +	if (bitmap_no != bitmap_start) {
> +		spin_unlock_irq(&cma->lock);
> +		err = -EEXIST;
> +		goto out_stats;
> +	}
> +	bitmap_set(cma->bitmap, bitmap_start, bitmap_count);
> +	spin_unlock_irq(&cma->lock);
> +
> +	for (i = 0; i < tries; i++) {
> +		mutex_lock(&cma_mutex);
> +		err = alloc_contig_range(start, start + count, MIGRATE_CMA, gfp);
> +		mutex_unlock(&cma_mutex);
> +
> +		if (err != -EBUSY)
> +			break;
> +	}
> +
> +	if (err) {
> +		cma_clear_bitmap(cma, start, count);
> +	} else {
> +		page = pfn_to_page(start);
> +
> +		/*
> +		 * CMA can allocate multiple page blocks, which results in
> +		 * different blocks being marked with different tags. Reset the
> +		 * tags to ignore those page blocks.
> +		 */
> +		for (i = 0; i < count; i++)
> +			page_kasan_tag_reset(nth_page(page, i));
> +	}
> +
> +out_stats:
> +	trace_cma_alloc_range_finish(cma->name, start, count, i, err);
> +
> +	if (err) {
> +		count_vm_events(CMA_ALLOC_FAIL, count);
> +		if (cma)
> +			cma_sysfs_account_fail_pages(cma, count);
> +	} else {
> +		count_vm_events(CMA_ALLOC_SUCCESS, count);
> +		cma_sysfs_account_success_pages(cma, count);
> +	}
> +
> +	return err;
> +}
> +
> +
>  /**
>   * cma_alloc() - allocate pages from contiguous area
>   * @cma:   Contiguous memory region for which the allocation is performed.
Alexandru Elisei Jan. 30, 2024, 11:35 a.m. UTC | #2
Hi,

On Tue, Jan 30, 2024 at 10:50:00AM +0530, Anshuman Khandual wrote:
> 
> 
> On 1/25/24 22:12, Alexandru Elisei wrote:
> > Today, cma_alloc() is used to allocate a contiguous memory region. The
> > function allows the caller to specify the number of pages to allocate, but
> > not the starting address. cma_alloc() will walk over the entire CMA region
> > trying to allocate the first available range of the specified size.
> > 
> > Introduce cma_alloc_range(), which makes CMA more versatile by allowing the
> > caller to specify a particular range in the CMA region, defined by the
> > start pfn and the size.
> > 
> > arm64 will make use of this function when tag storage management will be
> > implemented: cma_alloc_range() will be used to reserve the tag storage
> > associated with a tagged page.
> 
> Basically, you would like to pass on a preferred start address and the
> allocation could just fail if a contig range is not available from such
> a starting address ?
> 
> Then why not just change cma_alloc() to take a new argument 'start_pfn'.
> Why create a new but almost similar allocator ?

I tried doing that, and I gave up because:

- It made cma_alloc() even more complex and hard to follow.

- What value should 'start_pfn' be to tell cma_alloc() that it should be
  ignored? Or, to put it another way, what pfn number is invalid on **all**
  platforms that Linux supports?

I can give it another go if we can come up with an invalid value for
'start_pfn'.

> 
> But then I am wondering why this could not be done in the arm64 platform
> code itself operating on a CMA area reserved just for tag storage. Unless
> this new allocator has other usage beyond MTE, this could be implemented
> in the platform itself.

I had the same idea in the previous iteration, David Hildenbrand suggested
this approach [1].

[1] https://lore.kernel.org/linux-fsdevel/2aafd53f-af1f-45f3-a08c-d11962254315@redhat.com/

Thanks,
Alex

> 
> > 
> > Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
> > ---
> > 
> > Changes since rfc v2:
> > 
> > * New patch.
> > 
> >  include/linux/cma.h        |  2 +
> >  include/trace/events/cma.h | 59 ++++++++++++++++++++++++++
> >  mm/cma.c                   | 86 ++++++++++++++++++++++++++++++++++++++
> >  3 files changed, 147 insertions(+)
> > 
> > diff --git a/include/linux/cma.h b/include/linux/cma.h
> > index 63873b93deaa..e32559da6942 100644
> > --- a/include/linux/cma.h
> > +++ b/include/linux/cma.h
> > @@ -50,6 +50,8 @@ extern int cma_init_reserved_mem(phys_addr_t base, phys_addr_t size,
> >  					struct cma **res_cma);
> >  extern struct page *cma_alloc(struct cma *cma, unsigned long count, unsigned int align,
> >  			      bool no_warn);
> > +extern int cma_alloc_range(struct cma *cma, unsigned long start, unsigned long count,
> > +			   unsigned tries, gfp_t gfp);
> >  extern bool cma_pages_valid(struct cma *cma, const struct page *pages, unsigned long count);
> >  extern bool cma_release(struct cma *cma, const struct page *pages, unsigned long count);
> >  
> > diff --git a/include/trace/events/cma.h b/include/trace/events/cma.h
> > index 25103e67737c..a89af313a572 100644
> > --- a/include/trace/events/cma.h
> > +++ b/include/trace/events/cma.h
> > @@ -36,6 +36,65 @@ TRACE_EVENT(cma_release,
> >  		  __entry->count)
> >  );
> >  
> > +TRACE_EVENT(cma_alloc_range_start,
> > +
> > +	TP_PROTO(const char *name, unsigned long start, unsigned long count,
> > +		 unsigned tries),
> > +
> > +	TP_ARGS(name, start, count, tries),
> > +
> > +	TP_STRUCT__entry(
> > +		__string(name, name)
> > +		__field(unsigned long, start)
> > +		__field(unsigned long, count)
> > +		__field(unsigned, tries)
> > +	),
> > +
> > +	TP_fast_assign(
> > +		__assign_str(name, name);
> > +		__entry->start = start;
> > +		__entry->count = count;
> > +		__entry->tries = tries;
> > +	),
> > +
> > +	TP_printk("name=%s start=%lx count=%lu tries=%u",
> > +		  __get_str(name),
> > +		  __entry->start,
> > +		  __entry->count,
> > +		  __entry->tries)
> > +);
> > +
> > +TRACE_EVENT(cma_alloc_range_finish,
> > +
> > +	TP_PROTO(const char *name, unsigned long start, unsigned long count,
> > +		 unsigned attempts, int err),
> > +
> > +	TP_ARGS(name, start, count, attempts, err),
> > +
> > +	TP_STRUCT__entry(
> > +		__string(name, name)
> > +		__field(unsigned long, start)
> > +		__field(unsigned long, count)
> > +		__field(unsigned, attempts)
> > +		__field(int, err)
> > +	),
> > +
> > +	TP_fast_assign(
> > +		__assign_str(name, name);
> > +		__entry->start = start;
> > +		__entry->count = count;
> > +		__entry->attempts = attempts;
> > +		__entry->err = err;
> > +	),
> > +
> > +	TP_printk("name=%s start=%lx count=%lu attempts=%u err=%d",
> > +		  __get_str(name),
> > +		  __entry->start,
> > +		  __entry->count,
> > +		  __entry->attempts,
> > +		  __entry->err)
> > +);
> > +
> >  TRACE_EVENT(cma_alloc_start,
> >  
> >  	TP_PROTO(const char *name, unsigned long count, unsigned int align),
> > diff --git a/mm/cma.c b/mm/cma.c
> > index 543bb6b3be8e..4a0f68b9443b 100644
> > --- a/mm/cma.c
> > +++ b/mm/cma.c
> > @@ -416,6 +416,92 @@ static void cma_debug_show_areas(struct cma *cma)
> >  static inline void cma_debug_show_areas(struct cma *cma) { }
> >  #endif
> >  
> > +/**
> > + * cma_alloc_range() - allocate pages in a specific range
> > + * @cma:   Contiguous memory region for which the allocation is performed.
> > + * @start: Starting pfn of the allocation.
> > + * @count: Requested number of pages
> > + * @tries: Number of tries if the range is busy
> > + * @no_warn: Avoid printing message about failed allocation
> > + *
> > + * This function allocates part of contiguous memory from a specific contiguous
> > + * memory area, from the specified starting address. The 'start' pfn and the the
> > + * 'count' number of pages must be aligned to the CMA bitmap order per bit.
> > + */
> > +int cma_alloc_range(struct cma *cma, unsigned long start, unsigned long count,
> > +		    unsigned tries, gfp_t gfp)
> > +{
> > +	unsigned long bitmap_maxno, bitmap_no, bitmap_start, bitmap_count;
> > +	unsigned long i = 0;
> > +	struct page *page;
> > +	int err = -EINVAL;
> > +
> > +	if (!cma || !cma->count || !cma->bitmap)
> > +		goto out_stats;
> > +
> > +	trace_cma_alloc_range_start(cma->name, start, count, tries);
> > +
> > +	if (!count || start < cma->base_pfn ||
> > +	    start + count > cma->base_pfn + cma->count)
> > +		goto out_stats;
> > +
> > +	if (!IS_ALIGNED(start | count, 1 << cma->order_per_bit))
> > +		goto out_stats;
> > +
> > +	bitmap_start = (start - cma->base_pfn) >> cma->order_per_bit;
> > +	bitmap_maxno = cma_bitmap_maxno(cma);
> > +	bitmap_count = cma_bitmap_pages_to_bits(cma, count);
> > +
> > +	spin_lock_irq(&cma->lock);
> > +	bitmap_no = bitmap_find_next_zero_area(cma->bitmap, bitmap_maxno,
> > +					       bitmap_start, bitmap_count, 0);
> > +	if (bitmap_no != bitmap_start) {
> > +		spin_unlock_irq(&cma->lock);
> > +		err = -EEXIST;
> > +		goto out_stats;
> > +	}
> > +	bitmap_set(cma->bitmap, bitmap_start, bitmap_count);
> > +	spin_unlock_irq(&cma->lock);
> > +
> > +	for (i = 0; i < tries; i++) {
> > +		mutex_lock(&cma_mutex);
> > +		err = alloc_contig_range(start, start + count, MIGRATE_CMA, gfp);
> > +		mutex_unlock(&cma_mutex);
> > +
> > +		if (err != -EBUSY)
> > +			break;
> > +	}
> > +
> > +	if (err) {
> > +		cma_clear_bitmap(cma, start, count);
> > +	} else {
> > +		page = pfn_to_page(start);
> > +
> > +		/*
> > +		 * CMA can allocate multiple page blocks, which results in
> > +		 * different blocks being marked with different tags. Reset the
> > +		 * tags to ignore those page blocks.
> > +		 */
> > +		for (i = 0; i < count; i++)
> > +			page_kasan_tag_reset(nth_page(page, i));
> > +	}
> > +
> > +out_stats:
> > +	trace_cma_alloc_range_finish(cma->name, start, count, i, err);
> > +
> > +	if (err) {
> > +		count_vm_events(CMA_ALLOC_FAIL, count);
> > +		if (cma)
> > +			cma_sysfs_account_fail_pages(cma, count);
> > +	} else {
> > +		count_vm_events(CMA_ALLOC_SUCCESS, count);
> > +		cma_sysfs_account_success_pages(cma, count);
> > +	}
> > +
> > +	return err;
> > +}
> > +
> > +
> >  /**
> >   * cma_alloc() - allocate pages from contiguous area
> >   * @cma:   Contiguous memory region for which the allocation is performed.
>
Anshuman Khandual Jan. 31, 2024, 6:24 a.m. UTC | #3
On 1/30/24 17:05, Alexandru Elisei wrote:
> Hi,
> 
> On Tue, Jan 30, 2024 at 10:50:00AM +0530, Anshuman Khandual wrote:
>>
>> On 1/25/24 22:12, Alexandru Elisei wrote:
>>> Today, cma_alloc() is used to allocate a contiguous memory region. The
>>> function allows the caller to specify the number of pages to allocate, but
>>> not the starting address. cma_alloc() will walk over the entire CMA region
>>> trying to allocate the first available range of the specified size.
>>>
>>> Introduce cma_alloc_range(), which makes CMA more versatile by allowing the
>>> caller to specify a particular range in the CMA region, defined by the
>>> start pfn and the size.
>>>
>>> arm64 will make use of this function when tag storage management will be
>>> implemented: cma_alloc_range() will be used to reserve the tag storage
>>> associated with a tagged page.
>> Basically, you would like to pass on a preferred start address and the
>> allocation could just fail if a contig range is not available from such
>> a starting address ?
>>
>> Then why not just change cma_alloc() to take a new argument 'start_pfn'.
>> Why create a new but almost similar allocator ?
> I tried doing that, and I gave up because:
> 
> - It made cma_alloc() even more complex and hard to follow.
> 
> - What value should 'start_pfn' be to tell cma_alloc() that it should be
>   ignored? Or, to put it another way, what pfn number is invalid on **all**
>   platforms that Linux supports?
> 
> I can give it another go if we can come up with an invalid value for
> 'start_pfn'.

Something negative might work. How about -1/-1UL ? A quick search gives
some instances such as ...

git grep "pfn == -1"

mm/mm_init.c:   if (*start_pfn == -1UL)
mm/vmscan.c:            if (pfn == -1)
mm/vmscan.c:            if (pfn == -1)
mm/vmscan.c:            if (pfn == -1)
tools/testing/selftests/mm/hugepage-vmemmap.c:  if (pfn == -1UL) {

Could not -1UL be abstracted as common macro MM_INVALID_PFN to be used in
such scenarios including here ?

> 
>> But then I am wondering why this could not be done in the arm64 platform
>> code itself operating on a CMA area reserved just for tag storage. Unless
>> this new allocator has other usage beyond MTE, this could be implemented
>> in the platform itself.
> I had the same idea in the previous iteration, David Hildenbrand suggested
> this approach [1].
> 
> [1] https://lore.kernel.org/linux-fsdevel/2aafd53f-af1f-45f3-a08c-d11962254315@redhat.com/

There are two different cma_alloc() proposals here - including the next
patch i.e mm: cma: Fast track allocating memory when the pages are free

1) Augment cma_alloc() or add cma_alloc_range() with start_pfn parameter
2) Speed up cma_alloc() for small allocation requests when pages are free

The second one if separated out from this series could be considered on
its own as it will help all existing cma_alloc() callers. The first one
definitely needs an use case as provided in this series.
Alexandru Elisei Jan. 31, 2024, 2:18 p.m. UTC | #4
Hi,

On Wed, Jan 31, 2024 at 11:54:17AM +0530, Anshuman Khandual wrote:
> 
> 
> On 1/30/24 17:05, Alexandru Elisei wrote:
> > Hi,
> > 
> > On Tue, Jan 30, 2024 at 10:50:00AM +0530, Anshuman Khandual wrote:
> >>
> >> On 1/25/24 22:12, Alexandru Elisei wrote:
> >>> Today, cma_alloc() is used to allocate a contiguous memory region. The
> >>> function allows the caller to specify the number of pages to allocate, but
> >>> not the starting address. cma_alloc() will walk over the entire CMA region
> >>> trying to allocate the first available range of the specified size.
> >>>
> >>> Introduce cma_alloc_range(), which makes CMA more versatile by allowing the
> >>> caller to specify a particular range in the CMA region, defined by the
> >>> start pfn and the size.
> >>>
> >>> arm64 will make use of this function when tag storage management will be
> >>> implemented: cma_alloc_range() will be used to reserve the tag storage
> >>> associated with a tagged page.
> >> Basically, you would like to pass on a preferred start address and the
> >> allocation could just fail if a contig range is not available from such
> >> a starting address ?
> >>
> >> Then why not just change cma_alloc() to take a new argument 'start_pfn'.
> >> Why create a new but almost similar allocator ?
> > I tried doing that, and I gave up because:
> > 
> > - It made cma_alloc() even more complex and hard to follow.
> > 
> > - What value should 'start_pfn' be to tell cma_alloc() that it should be
> >   ignored? Or, to put it another way, what pfn number is invalid on **all**
> >   platforms that Linux supports?
> > 
> > I can give it another go if we can come up with an invalid value for
> > 'start_pfn'.
> 
> Something negative might work. How about -1/-1UL ? A quick search gives
> some instances such as ...
> 
> git grep "pfn == -1"
> 
> mm/mm_init.c:   if (*start_pfn == -1UL)
> mm/vmscan.c:            if (pfn == -1)
> mm/vmscan.c:            if (pfn == -1)
> mm/vmscan.c:            if (pfn == -1)
> tools/testing/selftests/mm/hugepage-vmemmap.c:  if (pfn == -1UL) {
> 
> Could not -1UL be abstracted as common macro MM_INVALID_PFN to be used in
> such scenarios including here ?

Ah yes, you are right, get_pte_pfn() already uses -1 as an invalid pfn, so
I can just use that.

Will definitely give it a go on the next iteration, thanks for the
suggestion!

> 
> > 
> >> But then I am wondering why this could not be done in the arm64 platform
> >> code itself operating on a CMA area reserved just for tag storage. Unless
> >> this new allocator has other usage beyond MTE, this could be implemented
> >> in the platform itself.
> > I had the same idea in the previous iteration, David Hildenbrand suggested
> > this approach [1].
> > 
> > [1] https://lore.kernel.org/linux-fsdevel/2aafd53f-af1f-45f3-a08c-d11962254315@redhat.com/
> 
> There are two different cma_alloc() proposals here - including the next
> patch i.e mm: cma: Fast track allocating memory when the pages are free
> 
> 1) Augment cma_alloc() or add cma_alloc_range() with start_pfn parameter
> 2) Speed up cma_alloc() for small allocation requests when pages are free
> 
> The second one if separated out from this series could be considered on
> its own as it will help all existing cma_alloc() callers. The first one
> definitely needs an use case as provided in this series.

I understand, thanks for the input!

Alex
diff mbox series

Patch

diff --git a/include/linux/cma.h b/include/linux/cma.h
index 63873b93deaa..e32559da6942 100644
--- a/include/linux/cma.h
+++ b/include/linux/cma.h
@@ -50,6 +50,8 @@  extern int cma_init_reserved_mem(phys_addr_t base, phys_addr_t size,
 					struct cma **res_cma);
 extern struct page *cma_alloc(struct cma *cma, unsigned long count, unsigned int align,
 			      bool no_warn);
+extern int cma_alloc_range(struct cma *cma, unsigned long start, unsigned long count,
+			   unsigned tries, gfp_t gfp);
 extern bool cma_pages_valid(struct cma *cma, const struct page *pages, unsigned long count);
 extern bool cma_release(struct cma *cma, const struct page *pages, unsigned long count);
 
diff --git a/include/trace/events/cma.h b/include/trace/events/cma.h
index 25103e67737c..a89af313a572 100644
--- a/include/trace/events/cma.h
+++ b/include/trace/events/cma.h
@@ -36,6 +36,65 @@  TRACE_EVENT(cma_release,
 		  __entry->count)
 );
 
+TRACE_EVENT(cma_alloc_range_start,
+
+	TP_PROTO(const char *name, unsigned long start, unsigned long count,
+		 unsigned tries),
+
+	TP_ARGS(name, start, count, tries),
+
+	TP_STRUCT__entry(
+		__string(name, name)
+		__field(unsigned long, start)
+		__field(unsigned long, count)
+		__field(unsigned, tries)
+	),
+
+	TP_fast_assign(
+		__assign_str(name, name);
+		__entry->start = start;
+		__entry->count = count;
+		__entry->tries = tries;
+	),
+
+	TP_printk("name=%s start=%lx count=%lu tries=%u",
+		  __get_str(name),
+		  __entry->start,
+		  __entry->count,
+		  __entry->tries)
+);
+
+TRACE_EVENT(cma_alloc_range_finish,
+
+	TP_PROTO(const char *name, unsigned long start, unsigned long count,
+		 unsigned attempts, int err),
+
+	TP_ARGS(name, start, count, attempts, err),
+
+	TP_STRUCT__entry(
+		__string(name, name)
+		__field(unsigned long, start)
+		__field(unsigned long, count)
+		__field(unsigned, attempts)
+		__field(int, err)
+	),
+
+	TP_fast_assign(
+		__assign_str(name, name);
+		__entry->start = start;
+		__entry->count = count;
+		__entry->attempts = attempts;
+		__entry->err = err;
+	),
+
+	TP_printk("name=%s start=%lx count=%lu attempts=%u err=%d",
+		  __get_str(name),
+		  __entry->start,
+		  __entry->count,
+		  __entry->attempts,
+		  __entry->err)
+);
+
 TRACE_EVENT(cma_alloc_start,
 
 	TP_PROTO(const char *name, unsigned long count, unsigned int align),
diff --git a/mm/cma.c b/mm/cma.c
index 543bb6b3be8e..4a0f68b9443b 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -416,6 +416,92 @@  static void cma_debug_show_areas(struct cma *cma)
 static inline void cma_debug_show_areas(struct cma *cma) { }
 #endif
 
+/**
+ * cma_alloc_range() - allocate pages in a specific range
+ * @cma:   Contiguous memory region for which the allocation is performed.
+ * @start: Starting pfn of the allocation.
+ * @count: Requested number of pages
+ * @tries: Number of tries if the range is busy
+ * @no_warn: Avoid printing message about failed allocation
+ *
+ * This function allocates part of contiguous memory from a specific contiguous
+ * memory area, from the specified starting address. The 'start' pfn and the the
+ * 'count' number of pages must be aligned to the CMA bitmap order per bit.
+ */
+int cma_alloc_range(struct cma *cma, unsigned long start, unsigned long count,
+		    unsigned tries, gfp_t gfp)
+{
+	unsigned long bitmap_maxno, bitmap_no, bitmap_start, bitmap_count;
+	unsigned long i = 0;
+	struct page *page;
+	int err = -EINVAL;
+
+	if (!cma || !cma->count || !cma->bitmap)
+		goto out_stats;
+
+	trace_cma_alloc_range_start(cma->name, start, count, tries);
+
+	if (!count || start < cma->base_pfn ||
+	    start + count > cma->base_pfn + cma->count)
+		goto out_stats;
+
+	if (!IS_ALIGNED(start | count, 1 << cma->order_per_bit))
+		goto out_stats;
+
+	bitmap_start = (start - cma->base_pfn) >> cma->order_per_bit;
+	bitmap_maxno = cma_bitmap_maxno(cma);
+	bitmap_count = cma_bitmap_pages_to_bits(cma, count);
+
+	spin_lock_irq(&cma->lock);
+	bitmap_no = bitmap_find_next_zero_area(cma->bitmap, bitmap_maxno,
+					       bitmap_start, bitmap_count, 0);
+	if (bitmap_no != bitmap_start) {
+		spin_unlock_irq(&cma->lock);
+		err = -EEXIST;
+		goto out_stats;
+	}
+	bitmap_set(cma->bitmap, bitmap_start, bitmap_count);
+	spin_unlock_irq(&cma->lock);
+
+	for (i = 0; i < tries; i++) {
+		mutex_lock(&cma_mutex);
+		err = alloc_contig_range(start, start + count, MIGRATE_CMA, gfp);
+		mutex_unlock(&cma_mutex);
+
+		if (err != -EBUSY)
+			break;
+	}
+
+	if (err) {
+		cma_clear_bitmap(cma, start, count);
+	} else {
+		page = pfn_to_page(start);
+
+		/*
+		 * CMA can allocate multiple page blocks, which results in
+		 * different blocks being marked with different tags. Reset the
+		 * tags to ignore those page blocks.
+		 */
+		for (i = 0; i < count; i++)
+			page_kasan_tag_reset(nth_page(page, i));
+	}
+
+out_stats:
+	trace_cma_alloc_range_finish(cma->name, start, count, i, err);
+
+	if (err) {
+		count_vm_events(CMA_ALLOC_FAIL, count);
+		if (cma)
+			cma_sysfs_account_fail_pages(cma, count);
+	} else {
+		count_vm_events(CMA_ALLOC_SUCCESS, count);
+		cma_sysfs_account_success_pages(cma, count);
+	}
+
+	return err;
+}
+
+
 /**
  * cma_alloc() - allocate pages from contiguous area
  * @cma:   Contiguous memory region for which the allocation is performed.