diff mbox series

[RFC,v2,04/14] mm: create common code from request allocation based from blk-mq code

Message ID 157617507410.42350.16156693139630931510.stgit@djiang5-desk3.ch.intel.com (mailing list archive)
State Superseded
Headers show
Series idxd driver for Intel Data Streaming Accelerator | expand

Commit Message

Dave Jiang Dec. 12, 2019, 6:24 p.m. UTC
Move the allocation of requests from compound pages to a common function
to allow usages by other callers. Since the routine has more to do with
memory allocation and management, it is moved to be exported by the
mempool.h and be part of mm subsystem.

Signed-off-by: Dave Jiang <dave.jiang@intel.com>
---
 block/blk-mq.c          |   94 +++++++++++++----------------------------------
 include/linux/mempool.h |    6 +++
 mm/Makefile             |    2 -
 mm/request_alloc.c      |   95 +++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 128 insertions(+), 69 deletions(-)
 create mode 100644 mm/request_alloc.c

Comments

Andrew Morton Dec. 13, 2019, 12:43 a.m. UTC | #1
On Thu, 12 Dec 2019 11:24:34 -0700 Dave Jiang <dave.jiang@intel.com> wrote:

> Move the allocation of requests from compound pages to a common function
> to allow usages by other callers.

What other callers are expected?

> Since the routine has more to do with
> memory allocation and management, it is moved to be exported by the
> mempool.h and be part of mm subsystem.
> 

Hm, this move doesn't seem to fit very well.  But perhaps it's close
enough.

> --- a/mm/Makefile
> +++ b/mm/Makefile
> @@ -42,7 +42,7 @@ obj-y			:= filemap.o mempool.o oom_kill.o fadvise.o \
>  			   mm_init.o mmu_context.o percpu.o slab_common.o \
>  			   compaction.o vmacache.o \
>  			   interval_tree.o list_lru.o workingset.o \
> -			   debug.o gup.o $(mmu-y)
> +			   debug.o gup.o request_alloc.o $(mmu-y)

Now there's a regression.  We're adding a bunch of unused code to a
CONFIG_BLOCK=n kernel.

>
> ...
>
> +void request_from_pages_free(struct list_head *page_list)
>
> ...
>
> +int request_from_pages_alloc(void *ctx, unsigned int depth, size_t rq_size,
> +			     struct list_head *page_list, int max_order,
> +			     int node,
> +			     void (*assign)(void *ctx, void *req, int idx))

I find these function names hard to understand.  Are they well chosen?

Some documentation would help.  These are global, exported-to-modules
API functions and they really should be fully documented.

> +{
> +	size_t left;
> +	unsigned int i, j, entries_per_page;
> +
> +	left = rq_size * depth;
> +
> +	for (i = 0; i < depth; ) {

"depth" of what?

> +		int this_order = max_order;
> +		struct page *page;
> +		int to_do;
> +		void *p;
> +
> +		while (this_order && left < order_to_size(this_order - 1))
> +			this_order--;
> +
> +		do {
> +			page = alloc_pages_node(node,
> +						GFP_NOIO | __GFP_NOWARN |
> +						__GFP_NORETRY | __GFP_ZERO,
> +						this_order);
> +			if (page)
> +				break;
> +			if (!this_order--)
> +				break;
> +			if (order_to_size(this_order) < rq_size)
> +				break;
> +		} while (1);

What the heck is all the above trying to do?  Some explanatory comments
are needed, methinks.

> +		if (!page)
> +			goto fail;
> +
> +		page->private = this_order;
> +		list_add_tail(&page->lru, page_list);
> +
> +		p = page_address(page);
> +		/*
> +		 * Allow kmemleak to scan these pages as they contain pointers
> +		 * to additional allocations like via ops->init_request().
> +		 */
> +		kmemleak_alloc(p, order_to_size(this_order), 1, GFP_NOIO);
> +		entries_per_page = order_to_size(this_order) / rq_size;
> +		to_do = min(entries_per_page, depth - i);
> +		left -= to_do * rq_size;
> +		for (j = 0; j < to_do; j++) {
> +			assign((void *)ctx, p, i);
> +			p += rq_size;
> +			i++;
> +		}
> +	}
> +
> +	return i;
> +
> +fail:
> +	request_from_pages_free(page_list);
> +	return -ENOMEM;
> +}
> +EXPORT_SYMBOL_GPL(request_from_pages_alloc);
Dave Jiang Dec. 13, 2019, 10:06 p.m. UTC | #2
On 12/12/19 5:43 PM, Andrew Morton wrote:
> On Thu, 12 Dec 2019 11:24:34 -0700 Dave Jiang <dave.jiang@intel.com> wrote:
> 
>> Move the allocation of requests from compound pages to a common function
>> to allow usages by other callers.
> 
> What other callers are expected?

I'm introducing usage in the dmaengine subsystem. So it will be block 
and dmaengine subsystem so far.

> 
>> Since the routine has more to do with
>> memory allocation and management, it is moved to be exported by the
>> mempool.h and be part of mm subsystem.
>>
> 
> Hm, this move doesn't seem to fit very well.  But perhaps it's close
> enough.

I'm open to suggestions.

> 
>> --- a/mm/Makefile
>> +++ b/mm/Makefile
>> @@ -42,7 +42,7 @@ obj-y			:= filemap.o mempool.o oom_kill.o fadvise.o \
>>   			   mm_init.o mmu_context.o percpu.o slab_common.o \
>>   			   compaction.o vmacache.o \
>>   			   interval_tree.o list_lru.o workingset.o \
>> -			   debug.o gup.o $(mmu-y)
>> +			   debug.o gup.o request_alloc.o $(mmu-y)
> 
> Now there's a regression.  We're adding a bunch of unused code to a
> CONFIG_BLOCK=n kernel.

I will introduce a KCONFIG value and have block and dmaegine select it 
to build this new code in when needed.

> 
>>
>> ...
>>
>> +void request_from_pages_free(struct list_head *page_list)
>>
>> ...
>>
>> +int request_from_pages_alloc(void *ctx, unsigned int depth, size_t rq_size,
>> +			     struct list_head *page_list, int max_order,
>> +			     int node,
>> +			     void (*assign)(void *ctx, void *req, int idx))
> 
> I find these function names hard to understand.  Are they well chosen?

The names could probably be better. Maybe
context_alloc_from_pages() and context_free_from_pages()?

So the background of this was I saw a block of code in block-mq that I 
can utilize in the new dmanegine request allocation. The code block is 
for mq to allocate pre-allocate requests from pages. I contacted Jens 
and he was ok with moving it to a common location out of blk-mq code. 
Maybe the name request is too specific and for a "general" allocator 
helper does not make sense.

> 
> Some documentation would help.  These are global, exported-to-modules
> API functions and they really should be fully documented.

Yes I will add more comments in regard to this function.

Jens, since you are the original write of this code, do you mind 
providing some commentary as to some of the quirks of this code block? I 
can do my best to try to explain it but you probably know the the code 
much better than me. Thanks!

> 
>> +{
>> +	size_t left;
>> +	unsigned int i, j, entries_per_page;
>> +
>> +	left = rq_size * depth;
>> +
>> +	for (i = 0; i < depth; ) {
> 
> "depth" of what?
> 
>> +		int this_order = max_order;
>> +		struct page *page;
>> +		int to_do;
>> +		void *p;
>> +
>> +		while (this_order && left < order_to_size(this_order - 1))
>> +			this_order--;
>> +
>> +		do {
>> +			page = alloc_pages_node(node,
>> +						GFP_NOIO | __GFP_NOWARN |
>> +						__GFP_NORETRY | __GFP_ZERO,
>> +						this_order);
>> +			if (page)
>> +				break;
>> +			if (!this_order--)
>> +				break;
>> +			if (order_to_size(this_order) < rq_size)
>> +				break;
>> +		} while (1);
> 
> What the heck is all the above trying to do?  Some explanatory comments
> are needed, methinks.
> 
>> +		if (!page)
>> +			goto fail;
>> +
>> +		page->private = this_order;
>> +		list_add_tail(&page->lru, page_list);
>> +
>> +		p = page_address(page);
>> +		/*
>> +		 * Allow kmemleak to scan these pages as they contain pointers
>> +		 * to additional allocations like via ops->init_request().
>> +		 */
>> +		kmemleak_alloc(p, order_to_size(this_order), 1, GFP_NOIO);
>> +		entries_per_page = order_to_size(this_order) / rq_size;
>> +		to_do = min(entries_per_page, depth - i);
>> +		left -= to_do * rq_size;
>> +		for (j = 0; j < to_do; j++) {
>> +			assign((void *)ctx, p, i);
>> +			p += rq_size;
>> +			i++;
>> +		}
>> +	}
>> +
>> +	return i;
>> +
>> +fail:
>> +	request_from_pages_free(page_list);
>> +	return -ENOMEM;
>> +}
>> +EXPORT_SYMBOL_GPL(request_from_pages_alloc);
diff mbox series

Patch

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 323c9cb28066..10f3fecb381a 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -10,7 +10,6 @@ 
 #include <linux/backing-dev.h>
 #include <linux/bio.h>
 #include <linux/blkdev.h>
-#include <linux/kmemleak.h>
 #include <linux/mm.h>
 #include <linux/init.h>
 #include <linux/slab.h>
@@ -26,6 +25,7 @@ 
 #include <linux/delay.h>
 #include <linux/crash_dump.h>
 #include <linux/prefetch.h>
+#include <linux/mempool.h>
 
 #include <trace/events/block.h>
 
@@ -2015,8 +2015,6 @@  static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
 		     unsigned int hctx_idx)
 {
-	struct page *page;
-
 	if (tags->rqs && set->ops->exit_request) {
 		int i;
 
@@ -2030,16 +2028,7 @@  void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
 		}
 	}
 
-	while (!list_empty(&tags->page_list)) {
-		page = list_first_entry(&tags->page_list, struct page, lru);
-		list_del_init(&page->lru);
-		/*
-		 * Remove kmemleak object previously allocated in
-		 * blk_mq_alloc_rqs().
-		 */
-		kmemleak_free(page_address(page));
-		__free_pages(page, page->private);
-	}
+	request_from_pages_free(&tags->page_list);
 }
 
 void blk_mq_free_rq_map(struct blk_mq_tags *tags)
@@ -2089,11 +2078,6 @@  struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
 	return tags;
 }
 
-static size_t order_to_size(unsigned int order)
-{
-	return (size_t)PAGE_SIZE << order;
-}
-
 static int blk_mq_init_request(struct blk_mq_tag_set *set, struct request *rq,
 			       unsigned int hctx_idx, int node)
 {
@@ -2109,12 +2093,20 @@  static int blk_mq_init_request(struct blk_mq_tag_set *set, struct request *rq,
 	return 0;
 }
 
+static void blk_mq_assign_request(void *ctx, void *ptr, int idx)
+{
+	struct blk_mq_tags *tags = (struct blk_mq_tags *)ctx;
+	struct request *rq = ptr;
+
+	tags->static_rqs[idx] = rq;
+}
+
 int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
 		     unsigned int hctx_idx, unsigned int depth)
 {
-	unsigned int i, j, entries_per_page, max_order = 4;
-	size_t rq_size, left;
-	int node;
+	unsigned int i;
+	size_t rq_size;
+	int node, rc;
 
 	node = blk_mq_hw_queue_to_node(&set->map[HCTX_TYPE_DEFAULT], hctx_idx);
 	if (node == NUMA_NO_NODE)
@@ -2128,62 +2120,28 @@  int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
 	 */
 	rq_size = round_up(sizeof(struct request) + set->cmd_size,
 				cache_line_size());
-	left = rq_size * depth;
-
-	for (i = 0; i < depth; ) {
-		int this_order = max_order;
-		struct page *page;
-		int to_do;
-		void *p;
-
-		while (this_order && left < order_to_size(this_order - 1))
-			this_order--;
-
-		do {
-			page = alloc_pages_node(node,
-				GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO,
-				this_order);
-			if (page)
-				break;
-			if (!this_order--)
-				break;
-			if (order_to_size(this_order) < rq_size)
-				break;
-		} while (1);
 
-		if (!page)
-			goto fail;
+	rc = request_from_pages_alloc((void *)tags, depth, rq_size,
+				      &tags->page_list, 4, node,
+				      blk_mq_assign_request);
+	if (rc < 0)
+		goto fail;
 
-		page->private = this_order;
-		list_add_tail(&page->lru, &tags->page_list);
+	for (i = 0; i < rc; i++) {
+		struct request *rq = tags->static_rqs[i];
 
-		p = page_address(page);
-		/*
-		 * Allow kmemleak to scan these pages as they contain pointers
-		 * to additional allocations like via ops->init_request().
-		 */
-		kmemleak_alloc(p, order_to_size(this_order), 1, GFP_NOIO);
-		entries_per_page = order_to_size(this_order) / rq_size;
-		to_do = min(entries_per_page, depth - i);
-		left -= to_do * rq_size;
-		for (j = 0; j < to_do; j++) {
-			struct request *rq = p;
-
-			tags->static_rqs[i] = rq;
-			if (blk_mq_init_request(set, rq, hctx_idx, node)) {
-				tags->static_rqs[i] = NULL;
-				goto fail;
-			}
-
-			p += rq_size;
-			i++;
+		if (blk_mq_init_request(set, rq, hctx_idx, node)) {
+			tags->static_rqs[i] = NULL;
+			rc = -ENOMEM;
+			goto fail;
 		}
 	}
+
 	return 0;
 
 fail:
 	blk_mq_free_rqs(set, tags, hctx_idx);
-	return -ENOMEM;
+	return rc;
 }
 
 /*
diff --git a/include/linux/mempool.h b/include/linux/mempool.h
index 0c964ac107c2..5b1f6214c881 100644
--- a/include/linux/mempool.h
+++ b/include/linux/mempool.h
@@ -108,4 +108,10 @@  static inline mempool_t *mempool_create_page_pool(int min_nr, int order)
 			      (void *)(long)order);
 }
 
+int request_from_pages_alloc(void *ctx, unsigned int depth, size_t rq_size,
+			     struct list_head *page_list, int max_order,
+			     int node,
+			     void (*assign)(void *ctx, void *req, int idx));
+void request_from_pages_free(struct list_head *page_list);
+
 #endif /* _LINUX_MEMPOOL_H */
diff --git a/mm/Makefile b/mm/Makefile
index 1937cc251883..92e7d74ba7a7 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -42,7 +42,7 @@  obj-y			:= filemap.o mempool.o oom_kill.o fadvise.o \
 			   mm_init.o mmu_context.o percpu.o slab_common.o \
 			   compaction.o vmacache.o \
 			   interval_tree.o list_lru.o workingset.o \
-			   debug.o gup.o $(mmu-y)
+			   debug.o gup.o request_alloc.o $(mmu-y)
 
 # Give 'page_alloc' its own module-parameter namespace
 page-alloc-y := page_alloc.o
diff --git a/mm/request_alloc.c b/mm/request_alloc.c
new file mode 100644
index 000000000000..01ebea8ccdfc
--- /dev/null
+++ b/mm/request_alloc.c
@@ -0,0 +1,95 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Common function for struct allocation. Moved from blk-mq code
+ *
+ * Copyright (C) 2013-2014 Jens Axboe
+ */
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/mm_types.h>
+#include <linux/list.h>
+#include <linux/kmemleak.h>
+#include <linux/mm.h>
+
+void request_from_pages_free(struct list_head *page_list)
+{
+	struct page *page, *n;
+
+	list_for_each_entry_safe(page, n, page_list, lru) {
+		list_del_init(&page->lru);
+		/*
+		 * Remove kmemleak object previously allocated in
+		 * blk_mq_alloc_rqs().
+		 */
+		kmemleak_free(page_address(page));
+		__free_pages(page, page->private);
+	}
+}
+EXPORT_SYMBOL_GPL(request_from_pages_free);
+
+static size_t order_to_size(unsigned int order)
+{
+	return (size_t)PAGE_SIZE << order;
+}
+
+int request_from_pages_alloc(void *ctx, unsigned int depth, size_t rq_size,
+			     struct list_head *page_list, int max_order,
+			     int node,
+			     void (*assign)(void *ctx, void *req, int idx))
+{
+	size_t left;
+	unsigned int i, j, entries_per_page;
+
+	left = rq_size * depth;
+
+	for (i = 0; i < depth; ) {
+		int this_order = max_order;
+		struct page *page;
+		int to_do;
+		void *p;
+
+		while (this_order && left < order_to_size(this_order - 1))
+			this_order--;
+
+		do {
+			page = alloc_pages_node(node,
+						GFP_NOIO | __GFP_NOWARN |
+						__GFP_NORETRY | __GFP_ZERO,
+						this_order);
+			if (page)
+				break;
+			if (!this_order--)
+				break;
+			if (order_to_size(this_order) < rq_size)
+				break;
+		} while (1);
+
+		if (!page)
+			goto fail;
+
+		page->private = this_order;
+		list_add_tail(&page->lru, page_list);
+
+		p = page_address(page);
+		/*
+		 * Allow kmemleak to scan these pages as they contain pointers
+		 * to additional allocations like via ops->init_request().
+		 */
+		kmemleak_alloc(p, order_to_size(this_order), 1, GFP_NOIO);
+		entries_per_page = order_to_size(this_order) / rq_size;
+		to_do = min(entries_per_page, depth - i);
+		left -= to_do * rq_size;
+		for (j = 0; j < to_do; j++) {
+			assign((void *)ctx, p, i);
+			p += rq_size;
+			i++;
+		}
+	}
+
+	return i;
+
+fail:
+	request_from_pages_free(page_list);
+	return -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(request_from_pages_alloc);