diff mbox

[v3,06/11] scatterlist: support "page-less" (__pfn_t only) entries

Message ID 1431542149.31415.10.camel@intel.com (mailing list archive)
State Changes Requested
Delegated to: Dan Williams
Headers show

Commit Message

Dan Williams May 13, 2015, 6:35 p.m. UTC
[ adding willy (yes, need to fix my scripts), and dmaengine folks]

Jens, Christoph,

I've rebased this patch series block/for-next.  With commit 84be456f883c
"remove <asm/scatterlist.h>" I think we can take the next step of
removing all references to page_link and just use __pfn_t by default.
v4 patch below.

Jens, I'm wondering if you want to take this series(.) as patches or
prepare a git branch to pull?

8<--------
Subject: scatterlist: support "page-less" (__pfn_t only) entries

From: Matthew Wilcox <willy@linux.intel.com>

Given that an offset will never be more than PAGE_SIZE, steal the unused
bits of the offset to implement a flags field.  Move the existing "this
is a sg_chain() or sg_last() entry" flags to the new 'flags' field.

[djbw: rebase on block/for-4.2]
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
---
 block/blk-merge.c            |    2 -
 drivers/crypto/omap-sham.c   |    2 -
 drivers/dma/imx-dma.c        |    7 +--
 drivers/dma/ste_dma40.c      |    5 --
 drivers/mmc/card/queue.c     |    4 +-
 include/crypto/scatterwalk.h |    9 +---
 include/linux/scatterlist.h  |   92 ++++++++++++++++++++++++++----------------
 samples/kfifo/dma-example.c  |    8 ++--
 8 files changed, 69 insertions(+), 60 deletions(-)

Comments

Vinod Koul May 19, 2015, 4:10 a.m. UTC | #1
On Wed, May 13, 2015 at 06:35:55PM +0000, Williams, Dan J wrote:
> [ adding willy (yes, need to fix my scripts), and dmaengine folks]
> 
> Jens, Christoph,
> 
> I've rebased this patch series block/for-next.  With commit 84be456f883c
> "remove <asm/scatterlist.h>" I think we can take the next step of
> removing all references to page_link and just use __pfn_t by default.
> v4 patch below.
> 
> Jens, I'm wondering if you want to take this series(.) as patches or
> prepare a git branch to pull?
> 

> diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c
> index 4d63e0d4da9a..21736afd3320 100644
> --- a/drivers/crypto/omap-sham.c
> +++ b/drivers/crypto/omap-sham.c
> @@ -582,7 +582,7 @@ static int omap_sham_xmit_dma(struct omap_sham_dev *dd, dma_addr_t dma_addr,
>  		 * the dmaengine may try to DMA the incorrect amount of data.
>  		 */
>  		sg_init_table(&ctx->sgl, 1);
> -		ctx->sgl.page_link = ctx->sg->page_link;
> +		ctx->sgl.pfn = ctx->sg->pfn;
Do you want drivers to tinker with internals of sg, I think driver should be
agnostic here, perhpas a helper to get pfn, sg_get_pfn()

>  		ctx->sgl.offset = ctx->sg->offset;
>  		sg_dma_len(&ctx->sgl) = len32;
>  		sg_dma_address(&ctx->sgl) = sg_dma_address(ctx->sg);
> diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c
> index eed405976ea9..a767727bcfef 100644
> --- a/drivers/dma/imx-dma.c
> +++ b/drivers/dma/imx-dma.c
> @@ -886,7 +886,7 @@ static struct dma_async_tx_descriptor *imxdma_prep_dma_cyclic(
>  	sg_init_table(imxdmac->sg_list, periods);
>  
>  	for (i = 0; i < periods; i++) {
> -		imxdmac->sg_list[i].page_link = 0;
> +		imxdmac->sg_list[i].pfn.page = NULL;
same here, sg_set_pfn_page() ?

>  		imxdmac->sg_list[i].offset = 0;
>  		imxdmac->sg_list[i].dma_address = dma_addr;
>  		sg_dma_len(&imxdmac->sg_list[i]) = period_len;
> @@ -894,10 +894,7 @@ static struct dma_async_tx_descriptor *imxdma_prep_dma_cyclic(
>  	}
>  
>  	/* close the loop */
> -	imxdmac->sg_list[periods].offset = 0;
> -	sg_dma_len(&imxdmac->sg_list[periods]) = 0;
> -	imxdmac->sg_list[periods].page_link =
> -		((unsigned long)imxdmac->sg_list | 0x01) & ~0x02;
> +	sg_chain(imxdmac->sg_list, periods + 1, imxdmac->sg_list);
This looks right...
Dan Williams May 20, 2015, 4:03 p.m. UTC | #2
On Mon, May 18, 2015 at 9:10 PM, Vinod Koul <vinod.koul@intel.com> wrote:
> On Wed, May 13, 2015 at 06:35:55PM +0000, Williams, Dan J wrote:
>> [ adding willy (yes, need to fix my scripts), and dmaengine folks]
>>
>> Jens, Christoph,
>>
>> I've rebased this patch series block/for-next.  With commit 84be456f883c
>> "remove <asm/scatterlist.h>" I think we can take the next step of
>> removing all references to page_link and just use __pfn_t by default.
>> v4 patch below.
>>
>> Jens, I'm wondering if you want to take this series(.) as patches or
>> prepare a git branch to pull?
>>
>
>> diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c
>> index 4d63e0d4da9a..21736afd3320 100644
>> --- a/drivers/crypto/omap-sham.c
>> +++ b/drivers/crypto/omap-sham.c
>> @@ -582,7 +582,7 @@ static int omap_sham_xmit_dma(struct omap_sham_dev *dd, dma_addr_t dma_addr,
>>                * the dmaengine may try to DMA the incorrect amount of data.
>>                */
>>               sg_init_table(&ctx->sgl, 1);
>> -             ctx->sgl.page_link = ctx->sg->page_link;
>> +             ctx->sgl.pfn = ctx->sg->pfn;
> Do you want drivers to tinker with internals of sg, I think driver should be
> agnostic here, perhpas a helper to get pfn, sg_get_pfn()

Hmm, we have sg_page() seems like we also need an sg_pfn().  Thanks, will fix.

>
>>               ctx->sgl.offset = ctx->sg->offset;
>>               sg_dma_len(&ctx->sgl) = len32;
>>               sg_dma_address(&ctx->sgl) = sg_dma_address(ctx->sg);
>> diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c
>> index eed405976ea9..a767727bcfef 100644
>> --- a/drivers/dma/imx-dma.c
>> +++ b/drivers/dma/imx-dma.c
>> @@ -886,7 +886,7 @@ static struct dma_async_tx_descriptor *imxdma_prep_dma_cyclic(
>>       sg_init_table(imxdmac->sg_list, periods);
>>
>>       for (i = 0; i < periods; i++) {
>> -             imxdmac->sg_list[i].page_link = 0;
>> +             imxdmac->sg_list[i].pfn.page = NULL;
> same here, sg_set_pfn_page() ?

sg_set_page() would work here.

>
>>               imxdmac->sg_list[i].offset = 0;
>>               imxdmac->sg_list[i].dma_address = dma_addr;
>>               sg_dma_len(&imxdmac->sg_list[i]) = period_len;
>> @@ -894,10 +894,7 @@ static struct dma_async_tx_descriptor *imxdma_prep_dma_cyclic(
>>       }
>>
>>       /* close the loop */
>> -     imxdmac->sg_list[periods].offset = 0;
>> -     sg_dma_len(&imxdmac->sg_list[periods]) = 0;
>> -     imxdmac->sg_list[periods].page_link =
>> -             ((unsigned long)imxdmac->sg_list | 0x01) & ~0x02;
>> +     sg_chain(imxdmac->sg_list, periods + 1, imxdmac->sg_list);
> This looks right...
>

Cool, thanks for taking a look.
Christoph Hellwig May 23, 2015, 2:12 p.m. UTC | #3
On Wed, May 13, 2015 at 06:35:55PM +0000, Williams, Dan J wrote:
> Jens, I'm wondering if you want to take this series(.) as patches or
> prepare a git branch to pull?

Honestly I don't think it should go anyway.  It makes a big mess of
a structure without providing a real user for it.  Given how we are
using the bio_vec for in-kernel page based I/O these days it seems
like a very dangerous idea.
Dan Williams May 23, 2015, 4:41 p.m. UTC | #4
On Sat, May 23, 2015 at 7:12 AM, hch@lst.de <hch@lst.de> wrote:
> On Wed, May 13, 2015 at 06:35:55PM +0000, Williams, Dan J wrote:
>> Jens, I'm wondering if you want to take this series(.) as patches or
>> prepare a git branch to pull?
>
> Honestly I don't think it should go anyway.  It makes a big mess of
> a structure without providing a real user for it.  Given how we are
> using the bio_vec for in-kernel page based I/O these days it seems
> like a very dangerous idea.

There's nothing dangerous about the __pfn_t conversion of the block
layer in the !CONFIG_DEV_PFN case a __pfn_t based bio_vec is
bit-for-bit identical to a struct page based bio_vec.  However, you're
right, I can't make the same claim about a scatterlist before and
after the change.

Hmm, we're missing a pfn-only block I/O user and we're missing the
second half of the implementation that provides __pfn_t_to_page() for
persistent memory.  I'm looking to have a solution __pfn_t_to_page()
shortly, maybe that will allow the scatterlist changes to be
skipped...  we'll see.
diff mbox

Patch

diff --git a/block/blk-merge.c b/block/blk-merge.c
index 218ad1e57a49..82a688551b72 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -267,7 +267,7 @@  int blk_rq_map_sg(struct request_queue *q, struct request *rq,
 		if (rq->cmd_flags & REQ_WRITE)
 			memset(q->dma_drain_buffer, 0, q->dma_drain_size);
 
-		sg->page_link &= ~0x02;
+		sg_unmark_end(sg);
 		sg = sg_next(sg);
 		sg_set_page(sg, virt_to_page(q->dma_drain_buffer),
 			    q->dma_drain_size,
diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c
index 4d63e0d4da9a..21736afd3320 100644
--- a/drivers/crypto/omap-sham.c
+++ b/drivers/crypto/omap-sham.c
@@ -582,7 +582,7 @@  static int omap_sham_xmit_dma(struct omap_sham_dev *dd, dma_addr_t dma_addr,
 		 * the dmaengine may try to DMA the incorrect amount of data.
 		 */
 		sg_init_table(&ctx->sgl, 1);
-		ctx->sgl.page_link = ctx->sg->page_link;
+		ctx->sgl.pfn = ctx->sg->pfn;
 		ctx->sgl.offset = ctx->sg->offset;
 		sg_dma_len(&ctx->sgl) = len32;
 		sg_dma_address(&ctx->sgl) = sg_dma_address(ctx->sg);
diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c
index eed405976ea9..a767727bcfef 100644
--- a/drivers/dma/imx-dma.c
+++ b/drivers/dma/imx-dma.c
@@ -886,7 +886,7 @@  static struct dma_async_tx_descriptor *imxdma_prep_dma_cyclic(
 	sg_init_table(imxdmac->sg_list, periods);
 
 	for (i = 0; i < periods; i++) {
-		imxdmac->sg_list[i].page_link = 0;
+		imxdmac->sg_list[i].pfn.page = NULL;
 		imxdmac->sg_list[i].offset = 0;
 		imxdmac->sg_list[i].dma_address = dma_addr;
 		sg_dma_len(&imxdmac->sg_list[i]) = period_len;
@@ -894,10 +894,7 @@  static struct dma_async_tx_descriptor *imxdma_prep_dma_cyclic(
 	}
 
 	/* close the loop */
-	imxdmac->sg_list[periods].offset = 0;
-	sg_dma_len(&imxdmac->sg_list[periods]) = 0;
-	imxdmac->sg_list[periods].page_link =
-		((unsigned long)imxdmac->sg_list | 0x01) & ~0x02;
+	sg_chain(imxdmac->sg_list, periods + 1, imxdmac->sg_list);
 
 	desc->type = IMXDMA_DESC_CYCLIC;
 	desc->sg = imxdmac->sg_list;
diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 3c10f034d4b9..e8c00642cacb 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -2562,10 +2562,7 @@  dma40_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t dma_addr,
 		dma_addr += period_len;
 	}
 
-	sg[periods].offset = 0;
-	sg_dma_len(&sg[periods]) = 0;
-	sg[periods].page_link =
-		((unsigned long)sg | 0x01) & ~0x02;
+	sg_chain(sg, periods + 1, sg);
 
 	txd = d40_prep_sg(chan, sg, sg, periods, direction,
 			  DMA_PREP_INTERRUPT);
diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c
index 236d194c2883..127f76294e71 100644
--- a/drivers/mmc/card/queue.c
+++ b/drivers/mmc/card/queue.c
@@ -469,7 +469,7 @@  static unsigned int mmc_queue_packed_map_sg(struct mmc_queue *mq,
 			sg_set_buf(__sg, buf + offset, len);
 			offset += len;
 			remain -= len;
-			(__sg++)->page_link &= ~0x02;
+			sg_unmark_end(__sg++);
 			sg_len++;
 		} while (remain);
 	}
@@ -477,7 +477,7 @@  static unsigned int mmc_queue_packed_map_sg(struct mmc_queue *mq,
 	list_for_each_entry(req, &packed->list, queuelist) {
 		sg_len += blk_rq_map_sg(mq->queue, req, __sg);
 		__sg = sg + (sg_len - 1);
-		(__sg++)->page_link &= ~0x02;
+		sg_unmark_end(__sg++);
 	}
 	sg_mark_end(sg + (sg_len - 1));
 	return sg_len;
diff --git a/include/crypto/scatterwalk.h b/include/crypto/scatterwalk.h
index 20e4226a2e14..4529889b0f07 100644
--- a/include/crypto/scatterwalk.h
+++ b/include/crypto/scatterwalk.h
@@ -25,13 +25,8 @@ 
 #include <linux/scatterlist.h>
 #include <linux/sched.h>
 
-static inline void scatterwalk_sg_chain(struct scatterlist *sg1, int num,
-					struct scatterlist *sg2)
-{
-	sg_set_page(&sg1[num - 1], (void *)sg2, 0, 0);
-	sg1[num - 1].page_link &= ~0x02;
-	sg1[num - 1].page_link |= 0x01;
-}
+#define scatterwalk_sg_chain(prv, num, sgl)	sg_chain(prv, num, sgl)
+#define scatterwalk_sg_next(sgl)		sg_next(sgl)
 
 static inline void scatterwalk_crypto_chain(struct scatterlist *head,
 					    struct scatterlist *sg,
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index eca1ec93775c..98c21b460292 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -9,14 +9,18 @@ 
 
 struct scatterlist {
 #ifdef CONFIG_DEBUG_SG
-	unsigned long	sg_magic;
+	unsigned long   sg_magic;
 #endif
-	unsigned long	page_link;
-	unsigned int	offset;
-	unsigned int	length;
-	dma_addr_t	dma_address;
+        union {
+                __pfn_t pfn;
+                struct scatterlist *next;
+        };
+        unsigned short  offset;
+        unsigned short  sg_flags;
+        unsigned int    length;
+        dma_addr_t      dma_address;
 #ifdef CONFIG_NEED_SG_DMA_LENGTH
-	unsigned int	dma_length;
+        unsigned int    dma_length;
 #endif
 };
 
@@ -44,30 +48,29 @@  struct sg_table {
 /*
  * Notes on SG table design.
  *
- * We use the unsigned long page_link field in the scatterlist struct to place
- * the page pointer AND encode information about the sg table as well. The two
- * lower bits are reserved for this information.
+ * We use the fact that a given sg entry will never be larger than a
+ * page to pack 'offset' and 'sg_flags' into 32-bits.  This supports a
+ * PAGE_SIZE up to 64K and flags to encode information about the sg
+ * entry.
  *
- * If bit 0 is set, then the page_link contains a pointer to the next sg
- * table list. Otherwise the next entry is at sg + 1.
+ * If SG_FLAGS_CHAIN is set, then the 'next' member of the entry is a
+ * pointer to the next sg table list. Otherwise the next entry is at sg
+ * + 1.
  *
- * If bit 1 is set, then this sg entry is the last element in a list.
+ * If SG_FLAGS_LAST is set, then this sg entry is the last element in a
+ * list.
  *
  * See sg_next().
  *
  */
 
 #define SG_MAGIC	0x87654321
+#define SG_FLAGS_CHAIN	0x0001
+#define SG_FLAGS_LAST	0x0002
 
-/*
- * We overload the LSB of the page pointer to indicate whether it's
- * a valid sg entry, or whether it points to the start of a new scatterlist.
- * Those low bits are there for everyone! (thanks mason :-)
- */
-#define sg_is_chain(sg)		((sg)->page_link & 0x01)
-#define sg_is_last(sg)		((sg)->page_link & 0x02)
-#define sg_chain_ptr(sg)	\
-	((struct scatterlist *) ((sg)->page_link & ~0x03))
+#define sg_is_chain(sg)		((sg)->sg_flags & SG_FLAGS_CHAIN)
+#define sg_is_last(sg)		((sg)->sg_flags & SG_FLAGS_LAST)
+#define sg_chain_ptr(sg)	((sg)->next)
 
 /**
  * sg_assign_page - Assign a given page to an SG entry
@@ -81,18 +84,15 @@  struct sg_table {
  **/
 static inline void sg_assign_page(struct scatterlist *sg, struct page *page)
 {
-	unsigned long page_link = sg->page_link & 0x3;
-
-	/*
-	 * In order for the low bit stealing approach to work, pages
-	 * must be aligned at a 32-bit boundary as a minimum.
-	 */
-	BUG_ON((unsigned long) page & 0x03);
 #ifdef CONFIG_DEBUG_SG
 	BUG_ON(sg->sg_magic != SG_MAGIC);
 	BUG_ON(sg_is_chain(sg));
 #endif
-	sg->page_link = page_link | (unsigned long) page;
+	sg->pfn = page_to_pfn_t(page);
+#ifdef CONFIG_DEBUG_SG
+	BUG_ON(sg->sg_magic != SG_MAGIC);
+	BUG_ON(sg_is_chain(sg));
+#endif
 }
 
 /**
@@ -113,17 +113,32 @@  static inline void sg_set_page(struct scatterlist *sg, struct page *page,
 			       unsigned int len, unsigned int offset)
 {
 	sg_assign_page(sg, page);
+	BUG_ON(offset > 65535);
 	sg->offset = offset;
 	sg->length = len;
 }
 
+static inline void sg_set_pfn(struct scatterlist *sg, __pfn_t pfn,
+			      unsigned int len, unsigned int offset)
+{
+#ifdef CONFIG_DEBUG_SG
+	BUG_ON(sg->sg_magic != SG_MAGIC);
+	BUG_ON(sg_is_chain(sg));
+#endif
+	sg->pfn = pfn;
+	BUG_ON(offset > 65535);
+	sg->offset = offset;
+	sg->sg_flags = 0;
+	sg->length = len;
+}
+
 static inline struct page *sg_page(struct scatterlist *sg)
 {
 #ifdef CONFIG_DEBUG_SG
 	BUG_ON(sg->sg_magic != SG_MAGIC);
 	BUG_ON(sg_is_chain(sg));
 #endif
-	return (struct page *)((sg)->page_link & ~0x3);
+	return __pfn_t_to_page(sg->pfn);
 }
 
 /**
@@ -175,7 +190,8 @@  static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents,
 	 * Set lowest bit to indicate a link pointer, and make sure to clear
 	 * the termination bit if it happens to be set.
 	 */
-	prv[prv_nents - 1].page_link = ((unsigned long) sgl | 0x01) & ~0x02;
+	prv[prv_nents - 1].next = sgl;
+	prv[prv_nents - 1].sg_flags = SG_FLAGS_CHAIN;
 }
 
 /**
@@ -195,8 +211,8 @@  static inline void sg_mark_end(struct scatterlist *sg)
 	/*
 	 * Set termination bit, clear potential chain bit
 	 */
-	sg->page_link |= 0x02;
-	sg->page_link &= ~0x01;
+	sg->sg_flags |= SG_FLAGS_LAST;
+	sg->sg_flags &= ~SG_FLAGS_CHAIN;
 }
 
 /**
@@ -212,7 +228,7 @@  static inline void sg_unmark_end(struct scatterlist *sg)
 #ifdef CONFIG_DEBUG_SG
 	BUG_ON(sg->sg_magic != SG_MAGIC);
 #endif
-	sg->page_link &= ~0x02;
+	sg->sg_flags &= ~SG_FLAGS_LAST;
 }
 
 /**
@@ -227,7 +243,7 @@  static inline void sg_unmark_end(struct scatterlist *sg)
  **/
 static inline dma_addr_t sg_phys(struct scatterlist *sg)
 {
-	return page_to_phys(sg_page(sg)) + sg->offset;
+	return __pfn_t_to_phys(sg->pfn) + sg->offset;
 }
 
 /**
@@ -242,7 +258,11 @@  static inline dma_addr_t sg_phys(struct scatterlist *sg)
  **/
 static inline void *sg_virt(struct scatterlist *sg)
 {
-	return page_address(sg_page(sg)) + sg->offset;
+	struct page *page;
+
+	page = __pfn_t_to_page(sg->pfn) + sg->offset;
+	BUG_ON(!page); /* don't use sg_virt() on unmapped memory */
+	return page_address(page) + sg->offset;
 }
 
 int sg_nents(struct scatterlist *sg);
diff --git a/samples/kfifo/dma-example.c b/samples/kfifo/dma-example.c
index aa243db93f01..3eeff9a56e0e 100644
--- a/samples/kfifo/dma-example.c
+++ b/samples/kfifo/dma-example.c
@@ -75,8 +75,8 @@  static int __init example_init(void)
 	for (i = 0; i < nents; i++) {
 		printk(KERN_INFO
 		"sg[%d] -> "
-		"page_link 0x%.8lx offset 0x%.8x length 0x%.8x\n",
-			i, sg[i].page_link, sg[i].offset, sg[i].length);
+		"pfn_data 0x%.8lx offset 0x%.8x length 0x%.8x\n",
+			i, sg[i].pfn.data, sg[i].offset, sg[i].length);
 
 		if (sg_is_last(&sg[i]))
 			break;
@@ -104,8 +104,8 @@  static int __init example_init(void)
 	for (i = 0; i < nents; i++) {
 		printk(KERN_INFO
 		"sg[%d] -> "
-		"page_link 0x%.8lx offset 0x%.8x length 0x%.8x\n",
-			i, sg[i].page_link, sg[i].offset, sg[i].length);
+		"pfn_data 0x%.8lx offset 0x%.8x length 0x%.8x\n",
+			i, sg[i].pfn.data, sg[i].offset, sg[i].length);
 
 		if (sg_is_last(&sg[i]))
 			break;