diff mbox series

[v2,3/4] io_uring/rsrc: add init and account functions for coalesced imus

Message ID 20240511055229.352481-4-cliang01.li@samsung.com (mailing list archive)
State New
Headers show
Series io_uring/rsrc: coalescing multi-hugepage registered buffers | expand

Commit Message

Chenliang Li May 11, 2024, 5:52 a.m. UTC
This patch depends on patch 1 and 2.
Introduces two functions to separate the coalesced imu alloc and
accounting path from the original one. This helps to keep the original
code path clean.

Signed-off-by: Chenliang Li <cliang01.li@samsung.com>
---
 io_uring/rsrc.c | 86 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 86 insertions(+)

Comments

Jens Axboe May 11, 2024, 4:48 p.m. UTC | #1
On 5/10/24 11:52 PM, Chenliang Li wrote:
> This patch depends on patch 1 and 2.

What does "patch 1 and 2" mean here, once it's in the git log? It
doesn't really mean anything. It's quite natural for patches in a series
to have dependencies on each other, eg patch 3 requirest 1 and 2.
Highlighting it doesn't really add anything, so just get rid of that.

> Introduces two functions to separate the coalesced imu alloc and
> accounting path from the original one. This helps to keep the original
> code path clean.
> 
> Signed-off-by: Chenliang Li <cliang01.li@samsung.com>
> ---
>  io_uring/rsrc.c | 86 +++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 86 insertions(+)
> 
> diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
> index 578d382ca9bc..7f95eba72f1c 100644
> --- a/io_uring/rsrc.c
> +++ b/io_uring/rsrc.c
> @@ -871,6 +871,42 @@ static int io_buffer_account_pin(struct io_ring_ctx *ctx, struct page **pages,
>  	return ret;
>  }
>  
> +static int io_coalesced_buffer_account_pin(struct io_ring_ctx *ctx,
> +					   struct page **pages,
> +					   struct io_mapped_ubuf *imu,
> +					   struct page **last_hpage,
> +					   struct io_imu_folio_data *data)
> +{
> +	int i, j, ret;
> +
> +	imu->acct_pages = 0;
> +	j = 0;
> +	for (i = 0; i < data->nr_folios; i++) {
> +		struct page *hpage = pages[j];
> +
> +		if (hpage == *last_hpage)
> +			continue;
> +		*last_hpage = hpage;
> +		/*
> +		 * Already checked the page array in try coalesce,
> +		 * so pass in nr_pages=0 here to waive that.
> +		 */
> +		if (headpage_already_acct(ctx, pages, 0, hpage))
> +			continue;
> +		imu->acct_pages += data->nr_pages_mid;
> +		j += (i == 0) ?
> +			data->nr_pages_head : data->nr_pages_mid;

Can we just initialize 'j' to data->nr_pages_head and change this to be:

	if (i)
		j += data->nr_pages_mid;

That would be a lot cleaner.

> +	if (!imu->acct_pages)
> +		return 0;
> +
> +	ret = io_account_mem(ctx, imu->acct_pages);
> +	if (ret)
> +		imu->acct_pages = 0;
> +	return ret;
> +}

	ret = io_account_mem(ctx, imu->acct_pages);
	if (!ret)
		return 0;
	imu->acct_pages = 0;
	return ret;

> +				  struct io_mapped_ubuf **pimu,
> +				  struct page **last_hpage, struct page **pages,
> +				  struct io_imu_folio_data *data)
> +{
> +	struct io_mapped_ubuf *imu = NULL;
> +	unsigned long off;
> +	size_t size, vec_len;
> +	int ret, i, j;
> +
> +	ret = -ENOMEM;
> +	imu = kvmalloc(struct_size(imu, bvec, data->nr_folios), GFP_KERNEL);
> +	if (!imu)
> +		return ret;
> +
> +	ret = io_coalesced_buffer_account_pin(ctx, pages, imu, last_hpage,
> +						data);
> +	if (ret) {
> +		j = 0;
> +		for (i = 0; i < data->nr_folios; i++) {
> +			unpin_user_page(pages[j]);
> +			j += (i == 0) ?
> +				data->nr_pages_head : data->nr_pages_mid;
> +		}
> +		return ret;

Same comment here.
Chenliang Li May 13, 2024, 2:16 a.m. UTC | #2
On Sat, 11 May 2024 10:48:18 -0600 Jens Axboe wrote:
> On 5/10/24 11:52 PM, Chenliang Li wrote:
>> This patch depends on patch 1 and 2.

> What does "patch 1 and 2" mean here, once it's in the git log? It
> doesn't really mean anything. It's quite natural for patches in a series
> to have dependencies on each other, eg patch 3 requirest 1 and 2.
> Highlighting it doesn't really add anything, so just get rid of that.

will delete that in V3.

>> +static int io_coalesced_buffer_account_pin(struct io_ring_ctx *ctx,
>> +					   struct page **pages,
>> +					   struct io_mapped_ubuf *imu,
>> +					   struct page **last_hpage,
>> +					   struct io_imu_folio_data *data)
>> +{
>> +	int i, j, ret;
>> +
>> +	imu->acct_pages = 0;
>> +	j = 0;
>> +	for (i = 0; i < data->nr_folios; i++) {
>> +		struct page *hpage = pages[j];
>> +
>> +		if (hpage == *last_hpage)
>> +			continue;
>> +		*last_hpage = hpage;
>> +		/*
>> +		 * Already checked the page array in try coalesce,
>> +		 * so pass in nr_pages=0 here to waive that.
>> +		 */
>> +		if (headpage_already_acct(ctx, pages, 0, hpage))
>> +			continue;
>> +		imu->acct_pages += data->nr_pages_mid;
>> +		j += (i == 0) ?
>> +			data->nr_pages_head : data->nr_pages_mid;
>
> Can we just initialize 'j' to data->nr_pages_head and change this to be:
>
>	if (i)
>		j += data->nr_pages_mid;

Yes, will change it in V3.

>> +	if (!imu->acct_pages)
>> +		return 0;
>> +
>> +	ret = io_account_mem(ctx, imu->acct_pages);
>> +	if (ret)
>> +		imu->acct_pages = 0;
>> +	return ret;
>> +}
>
>	ret = io_account_mem(ctx, imu->acct_pages);
>	if (!ret)
>		return 0;
>	imu->acct_pages = 0;
>	return ret;

Will change it.

>> +	if (ret) {
>> +		j = 0;
>> +		for (i = 0; i < data->nr_folios; i++) {
>> +			unpin_user_page(pages[j]);
>> +			j += (i == 0) ?
>> +				data->nr_pages_head : data->nr_pages_mid;
>> +		}
>> +		return ret;

> Same comment here.

Will change it.
diff mbox series

Patch

diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index 578d382ca9bc..7f95eba72f1c 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -871,6 +871,42 @@  static int io_buffer_account_pin(struct io_ring_ctx *ctx, struct page **pages,
 	return ret;
 }
 
+static int io_coalesced_buffer_account_pin(struct io_ring_ctx *ctx,
+					   struct page **pages,
+					   struct io_mapped_ubuf *imu,
+					   struct page **last_hpage,
+					   struct io_imu_folio_data *data)
+{
+	int i, j, ret;
+
+	imu->acct_pages = 0;
+	j = 0;
+	for (i = 0; i < data->nr_folios; i++) {
+		struct page *hpage = pages[j];
+
+		if (hpage == *last_hpage)
+			continue;
+		*last_hpage = hpage;
+		/*
+		 * Already checked the page array in try coalesce,
+		 * so pass in nr_pages=0 here to waive that.
+		 */
+		if (headpage_already_acct(ctx, pages, 0, hpage))
+			continue;
+		imu->acct_pages += data->nr_pages_mid;
+		j += (i == 0) ?
+			data->nr_pages_head : data->nr_pages_mid;
+	}
+
+	if (!imu->acct_pages)
+		return 0;
+
+	ret = io_account_mem(ctx, imu->acct_pages);
+	if (ret)
+		imu->acct_pages = 0;
+	return ret;
+}
+
 static bool __io_sqe_buffer_try_coalesce(struct page **pages, int nr_pages,
 					 struct io_imu_folio_data *data)
 {
@@ -949,6 +985,56 @@  static bool io_sqe_buffer_try_coalesce(struct page **pages, int nr_pages,
 	return true;
 }
 
+static int io_coalesced_imu_alloc(struct io_ring_ctx *ctx, struct iovec *iov,
+				  struct io_mapped_ubuf **pimu,
+				  struct page **last_hpage, struct page **pages,
+				  struct io_imu_folio_data *data)
+{
+	struct io_mapped_ubuf *imu = NULL;
+	unsigned long off;
+	size_t size, vec_len;
+	int ret, i, j;
+
+	ret = -ENOMEM;
+	imu = kvmalloc(struct_size(imu, bvec, data->nr_folios), GFP_KERNEL);
+	if (!imu)
+		return ret;
+
+	ret = io_coalesced_buffer_account_pin(ctx, pages, imu, last_hpage,
+						data);
+	if (ret) {
+		j = 0;
+		for (i = 0; i < data->nr_folios; i++) {
+			unpin_user_page(pages[j]);
+			j += (i == 0) ?
+				data->nr_pages_head : data->nr_pages_mid;
+		}
+		return ret;
+	}
+	off = (unsigned long) iov->iov_base & ~PAGE_MASK;
+	size = iov->iov_len;
+	/* store original address for later verification */
+	imu->ubuf = (unsigned long) iov->iov_base;
+	imu->ubuf_end = imu->ubuf + iov->iov_len;
+	imu->nr_bvecs = data->nr_folios;
+	imu->folio_shift = data->folio_shift;
+	imu->folio_mask = ~((1UL << data->folio_shift) - 1);
+	*pimu = imu;
+	ret = 0;
+
+	vec_len = min_t(size_t, size, PAGE_SIZE * data->nr_pages_head - off);
+	bvec_set_page(&imu->bvec[0], pages[0], vec_len, off);
+	size -= vec_len;
+	j = data->nr_pages_head;
+	for (i = 1; i < data->nr_folios; i++) {
+		vec_len = min_t(size_t, size, data->folio_size);
+		bvec_set_page(&imu->bvec[i], pages[j], vec_len, 0);
+		size -= vec_len;
+		j += data->nr_pages_mid;
+	}
+	return ret;
+}
+
 static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
 				  struct io_mapped_ubuf **pimu,
 				  struct page **last_hpage)