diff mbox series

[V2] RDMA/siw: Convert siw_tx_hdt() to kmap_local_page()

Message ID 20210622203432.2715659-1-ira.weiny@intel.com (mailing list archive)
State Superseded
Headers show
Series [V2] RDMA/siw: Convert siw_tx_hdt() to kmap_local_page() | expand

Commit Message

Ira Weiny June 22, 2021, 8:34 p.m. UTC
From: Ira Weiny <ira.weiny@intel.com>

kmap() is being deprecated and will break uses of device dax after PKS
protection is introduced.[1]

The use of kmap() in siw_tx_hdt() is all thread local therefore
kmap_local_page() is a sufficient replacement and will work with pgmap
protected pages when those are implemented.

siw_tx_hdt() tracks pages used in a page_array.  It uses that array to
unmap pages which were mapped on function exit.  Not all entries in the
array are mapped and this is tracked in kmap_mask.

kunmap_local() takes a mapped address rather than a page.  Alter
siw_unmap_pages() to take the iov array to reuse the iov_base address of
each mapping.  Use PAGE_MASK to get the proper address for
kunmap_local().

kmap_local_page() mappings are tracked in a stack and must be unmapped
in the opposite order they were mapped in.  Because segments are mapped
into the page array in increasing index order, modify siw_unmap_pages()
to unmap pages in decreasing order.

Use kmap_local_page() instead of kmap() to map pages in the page_array.

[1] https://lore.kernel.org/lkml/20201009195033.3208459-59-ira.weiny@intel.com/

Signed-off-by: Ira Weiny <ira.weiny@intel.com>

---
Changes for V2:
	From Bernard
		Reuse iov[].iov_base rather than declaring another array of
		pointers and preserve the use of kmap_mask to know which iov's
		were kmapped.

---
 drivers/infiniband/sw/siw/siw_qp_tx.c | 32 +++++++++++++++++----------
 1 file changed, 20 insertions(+), 12 deletions(-)

Comments

Bernard Metzler June 23, 2021, 2:36 p.m. UTC | #1
-----ira.weiny@intel.com wrote: -----

>To: "Jason Gunthorpe" <jgg@ziepe.ca>
>From: ira.weiny@intel.com
>Date: 06/22/2021 10:35PM
>Cc: "Ira Weiny" <ira.weiny@intel.com>, "Mike Marciniszyn"
><mike.marciniszyn@cornelisnetworks.com>, "Dennis Dalessandro"
><dennis.dalessandro@cornelisnetworks.com>, "Doug Ledford"
><dledford@redhat.com>, "Faisal Latif" <faisal.latif@intel.com>,
>"Shiraz Saleem" <shiraz.saleem@intel.com>, "Bernard Metzler"
><bmt@zurich.ibm.com>, "Kamal Heib" <kheib@redhat.com>,
>linux-rdma@vger.kernel.org, linux-kernel@vger.kernel.org
>Subject: [EXTERNAL] [PATCH V2] RDMA/siw: Convert siw_tx_hdt() to
>kmap_local_page()
>
>From: Ira Weiny <ira.weiny@intel.com>
>
>kmap() is being deprecated and will break uses of device dax after
>PKS
>protection is introduced.[1]
>
>The use of kmap() in siw_tx_hdt() is all thread local therefore
>kmap_local_page() is a sufficient replacement and will work with
>pgmap
>protected pages when those are implemented.
>
>siw_tx_hdt() tracks pages used in a page_array.  It uses that array
>to
>unmap pages which were mapped on function exit.  Not all entries in
>the
>array are mapped and this is tracked in kmap_mask.
>
>kunmap_local() takes a mapped address rather than a page.  Alter
>siw_unmap_pages() to take the iov array to reuse the iov_base address
>of
>each mapping.  Use PAGE_MASK to get the proper address for
>kunmap_local().
>
>kmap_local_page() mappings are tracked in a stack and must be
>unmapped
>in the opposite order they were mapped in.  Because segments are
>mapped
>into the page array in increasing index order, modify
>siw_unmap_pages()
>to unmap pages in decreasing order.
>
>Use kmap_local_page() instead of kmap() to map pages in the
>page_array.
>
>[1]
>INVALID URI REMOVED
>lkml_20201009195033.3208459-2D59-2Dira.weiny-40intel.com_&d=DwIDAg&c=
>jf_iaSHvJObTbx-siA1ZOg&r=2TaYXQ0T-r8ZO1PP1alNwU_QJcRRLfmYTAgd3QCvqSc&
>m=ujJBVqPLdVdVxXvOu_PlFL3NVC0Znds3FgxyrtWJtwM&s=WZIBAdwlCqPIRjsNOGlly
>gQ6Hsug6ObgrWgO_nvBGyc&e= 
>
>Signed-off-by: Ira Weiny <ira.weiny@intel.com>
>
>---
>Changes for V2:
>	From Bernard
>		Reuse iov[].iov_base rather than declaring another array of
>		pointers and preserve the use of kmap_mask to know which iov's
>		were kmapped.
>
>---
> drivers/infiniband/sw/siw/siw_qp_tx.c | 32
>+++++++++++++++++----------
> 1 file changed, 20 insertions(+), 12 deletions(-)
>
>diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c
>b/drivers/infiniband/sw/siw/siw_qp_tx.c
>index db68a10d12cd..fd3b9e6a67d7 100644
>--- a/drivers/infiniband/sw/siw/siw_qp_tx.c
>+++ b/drivers/infiniband/sw/siw/siw_qp_tx.c
>@@ -396,13 +396,20 @@ static int siw_0copy_tx(struct socket *s,
>struct page **page,
> 
> #define MAX_TRAILER (MPA_CRC_SIZE + 4)
> 
>-static void siw_unmap_pages(struct page **pp, unsigned long
>kmap_mask)
>+static void siw_unmap_pages(struct kvec *iov, unsigned long
>kmap_mask, int len)
> {
>-	while (kmap_mask) {
>-		if (kmap_mask & BIT(0))
>-			kunmap(*pp);
>-		pp++;
>-		kmap_mask >>= 1;
>+	int i;
>+
>+	/*
>+	 * Work backwards through the array to honor the kmap_local_page()
>+	 * ordering requirements.
>+	 */
>+	for (i = (len-1); i >= 0; i--) {
>+		if (kmap_mask & BIT(i)) {
>+			unsigned long addr = (unsigned long)iov[i].iov_base;
>+
>+			kunmap_local((void *)(addr & PAGE_MASK));
>+		}
> 	}
> }
> 
>@@ -498,7 +505,7 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx,
>struct socket *s)
> 					p = siw_get_upage(mem->umem,
> 							  sge->laddr + sge_off);
> 				if (unlikely(!p)) {
>-					siw_unmap_pages(page_array, kmap_mask);
>+					siw_unmap_pages(iov, kmap_mask, MAX_ARRAY);
> 					wqe->processed -= c_tx->bytes_unsent;
> 					rv = -EFAULT;
> 					goto done_crc;
>@@ -506,11 +513,12 @@ static int siw_tx_hdt(struct siw_iwarp_tx
>*c_tx, struct socket *s)
> 				page_array[seg] = p;
> 
> 				if (!c_tx->use_sendpage) {
>-					iov[seg].iov_base = kmap(p) + fp_off;
>-					iov[seg].iov_len = plen;
>+					void *kaddr = kmap_local_page(page_array[seg]);

we can use 'kmap_local_page(p)' here
> 
> 					/* Remember for later kunmap() */
> 					kmap_mask |= BIT(seg);
>+					iov[seg].iov_base = kaddr + fp_off;
>+					iov[seg].iov_len = plen;
> 
> 					if (do_crc)
> 						crypto_shash_update(
>@@ -518,7 +526,7 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx,
>struct socket *s)
> 							iov[seg].iov_base,
> 							plen);

This patch does not apply for me. Would I have to install first
your [Patch 3/4] -- since the current patch references kmap_local_page()
already? Maybe it is better to apply if it would be just one siw
related patch in that series?



> 				} else if (do_crc) {
>-					kaddr = kmap_local_page(p);
>+					kaddr = kmap_local_page(page_array[seg]);

using 'kmap_local_page(p)' as you had it is straightforward
and I would prefer it.

> 					crypto_shash_update(c_tx->mpa_crc_hd,
> 							    kaddr + fp_off,
> 							    plen);
>@@ -542,7 +550,7 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx,
>struct socket *s)
> 
> 			if (++seg > (int)MAX_ARRAY) {
> 				siw_dbg_qp(tx_qp(c_tx), "to many fragments\n");
>-				siw_unmap_pages(page_array, kmap_mask);
>+				siw_unmap_pages(iov, kmap_mask, MAX_ARRAY);

to minimize the iterations over the byte array in 'siw_unmap_pages()',
we may pass seg-1 instead of MAX_ARRAY


> 				wqe->processed -= c_tx->bytes_unsent;
> 				rv = -EMSGSIZE;
> 				goto done_crc;
>@@ -593,7 +601,7 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx,
>struct socket *s)
> 	} else {
> 		rv = kernel_sendmsg(s, &msg, iov, seg + 1,
> 				    hdr_len + data_len + trl_len);
>-		siw_unmap_pages(page_array, kmap_mask);
>+		siw_unmap_pages(iov, kmap_mask, MAX_ARRAY);

to minimize the iterations over the byte array in 'siw_unmap_pages()',
we may pass seg instead of MAX_ARRAY

> 	}
> 	if (rv < (int)hdr_len) {
> 		/* Not even complete hdr pushed or negative rv */
>-- 
>2.28.0.rc0.12.gb6a658bd00c9
>
>
Ira Weiny June 23, 2021, 3:34 p.m. UTC | #2
On Wed, Jun 23, 2021 at 02:36:45PM +0000, Bernard Metzler wrote:
> -----ira.weiny@intel.com wrote: -----
> 
> >@@ -506,11 +513,12 @@ static int siw_tx_hdt(struct siw_iwarp_tx
> >*c_tx, struct socket *s)
> > 				page_array[seg] = p;
> > 
> > 				if (!c_tx->use_sendpage) {
> >-					iov[seg].iov_base = kmap(p) + fp_off;
> >-					iov[seg].iov_len = plen;
> >+					void *kaddr = kmap_local_page(page_array[seg]);
> 
> we can use 'kmap_local_page(p)' here

Yes but I actually did this on purpose as it makes the code read clearly that
the mapping is 'seg' element of the array.  Do you prefer 'p' because this is a
performant path?

> > 
> > 					/* Remember for later kunmap() */
> > 					kmap_mask |= BIT(seg);
> >+					iov[seg].iov_base = kaddr + fp_off;
> >+					iov[seg].iov_len = plen;
> > 
> > 					if (do_crc)
> > 						crypto_shash_update(
> >@@ -518,7 +526,7 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx,
> >struct socket *s)
> > 							iov[seg].iov_base,
> > 							plen);
> 
> This patch does not apply for me. Would I have to install first
> your [Patch 3/4] -- since the current patch references kmap_local_page()
> already? Maybe it is better to apply if it would be just one siw
> related patch in that series?

Yes the other patch goes first.  I split it out to make this more difficult
change more reviewable.  I could squash them as it is probably straight forward
enough but I've been careful with this in other subsystems.

Jason, do you have any issue with squashing the 2 patches?

> 
> 
> 
> > 				} else if (do_crc) {
> >-					kaddr = kmap_local_page(p);
> >+					kaddr = kmap_local_page(page_array[seg]);
> 
> using 'kmap_local_page(p)' as you had it is straightforward
> and I would prefer it.

OK.  I think this reads cleaner but I can see 'p' being more performant.

> 
> > 					crypto_shash_update(c_tx->mpa_crc_hd,
> > 							    kaddr + fp_off,
> > 							    plen);
> >@@ -542,7 +550,7 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx,
> >struct socket *s)
> > 
> > 			if (++seg > (int)MAX_ARRAY) {
> > 				siw_dbg_qp(tx_qp(c_tx), "to many fragments\n");
> >-				siw_unmap_pages(page_array, kmap_mask);
> >+				siw_unmap_pages(iov, kmap_mask, MAX_ARRAY);
> 
> to minimize the iterations over the byte array in 'siw_unmap_pages()',
> we may pass seg-1 instead of MAX_ARRAY

Sounds good.

> 
> 
> > 				wqe->processed -= c_tx->bytes_unsent;
> > 				rv = -EMSGSIZE;
> > 				goto done_crc;
> >@@ -593,7 +601,7 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx,
> >struct socket *s)
> > 	} else {
> > 		rv = kernel_sendmsg(s, &msg, iov, seg + 1,
> > 				    hdr_len + data_len + trl_len);
> >-		siw_unmap_pages(page_array, kmap_mask);
> >+		siw_unmap_pages(iov, kmap_mask, MAX_ARRAY);
> 
> to minimize the iterations over the byte array in 'siw_unmap_pages()',
> we may pass seg instead of MAX_ARRAY

Will do.

Thanks for the review!  :-D
Ira
diff mbox series

Patch

diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c
index db68a10d12cd..fd3b9e6a67d7 100644
--- a/drivers/infiniband/sw/siw/siw_qp_tx.c
+++ b/drivers/infiniband/sw/siw/siw_qp_tx.c
@@ -396,13 +396,20 @@  static int siw_0copy_tx(struct socket *s, struct page **page,
 
 #define MAX_TRAILER (MPA_CRC_SIZE + 4)
 
-static void siw_unmap_pages(struct page **pp, unsigned long kmap_mask)
+static void siw_unmap_pages(struct kvec *iov, unsigned long kmap_mask, int len)
 {
-	while (kmap_mask) {
-		if (kmap_mask & BIT(0))
-			kunmap(*pp);
-		pp++;
-		kmap_mask >>= 1;
+	int i;
+
+	/*
+	 * Work backwards through the array to honor the kmap_local_page()
+	 * ordering requirements.
+	 */
+	for (i = (len-1); i >= 0; i--) {
+		if (kmap_mask & BIT(i)) {
+			unsigned long addr = (unsigned long)iov[i].iov_base;
+
+			kunmap_local((void *)(addr & PAGE_MASK));
+		}
 	}
 }
 
@@ -498,7 +505,7 @@  static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s)
 					p = siw_get_upage(mem->umem,
 							  sge->laddr + sge_off);
 				if (unlikely(!p)) {
-					siw_unmap_pages(page_array, kmap_mask);
+					siw_unmap_pages(iov, kmap_mask, MAX_ARRAY);
 					wqe->processed -= c_tx->bytes_unsent;
 					rv = -EFAULT;
 					goto done_crc;
@@ -506,11 +513,12 @@  static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s)
 				page_array[seg] = p;
 
 				if (!c_tx->use_sendpage) {
-					iov[seg].iov_base = kmap(p) + fp_off;
-					iov[seg].iov_len = plen;
+					void *kaddr = kmap_local_page(page_array[seg]);
 
 					/* Remember for later kunmap() */
 					kmap_mask |= BIT(seg);
+					iov[seg].iov_base = kaddr + fp_off;
+					iov[seg].iov_len = plen;
 
 					if (do_crc)
 						crypto_shash_update(
@@ -518,7 +526,7 @@  static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s)
 							iov[seg].iov_base,
 							plen);
 				} else if (do_crc) {
-					kaddr = kmap_local_page(p);
+					kaddr = kmap_local_page(page_array[seg]);
 					crypto_shash_update(c_tx->mpa_crc_hd,
 							    kaddr + fp_off,
 							    plen);
@@ -542,7 +550,7 @@  static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s)
 
 			if (++seg > (int)MAX_ARRAY) {
 				siw_dbg_qp(tx_qp(c_tx), "to many fragments\n");
-				siw_unmap_pages(page_array, kmap_mask);
+				siw_unmap_pages(iov, kmap_mask, MAX_ARRAY);
 				wqe->processed -= c_tx->bytes_unsent;
 				rv = -EMSGSIZE;
 				goto done_crc;
@@ -593,7 +601,7 @@  static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s)
 	} else {
 		rv = kernel_sendmsg(s, &msg, iov, seg + 1,
 				    hdr_len + data_len + trl_len);
-		siw_unmap_pages(page_array, kmap_mask);
+		siw_unmap_pages(iov, kmap_mask, MAX_ARRAY);
 	}
 	if (rv < (int)hdr_len) {
 		/* Not even complete hdr pushed or negative rv */