diff mbox series

[v3,3/3] NFSv4.2: Rework scratch handling for READ_PLUS (again)

Message ID 20230609200013.849882-3-anna@kernel.org (mailing list archive)
State New, archived
Headers show
Series [v3,1/3] NFSv4.2: Fix READ_PLUS smatch warnings | expand

Commit Message

Anna Schumaker June 9, 2023, 8 p.m. UTC
From: Anna Schumaker <Anna.Schumaker@Netapp.com>

I found that the read code might send multiple requests using the same
nfs_pgio_header, but nfs4_proc_read_setup() is only called once. This is
how we ended up occasionally double-freeing the scratch buffer, but also
means we set a NULL pointer but non-zero length to the xdr scratch
buffer. This results in an oops the first time decoding needs to copy
something to scratch, which frequently happens when decoding READ_PLUS
hole segments.

I fix this by moving scratch handling into the pageio read code. I
provide a function to allocate scratch space for decoding read replies,
and free the scratch buffer when the nfs_pgio_header is freed.

Krzysztof Kozlowski hit a bug a while ago with similar symptoms,
and I'm hopeful that this patch fixes his issue.

Reported-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Fixes: fbd2a05f29a9 (NFSv4.2: Rework scratch handling for READ_PLUS)
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>

---
v3: Remove accidentally copy-and-pasted line from the commit message
---
 fs/nfs/internal.h |  1 +
 fs/nfs/nfs42.h    |  1 +
 fs/nfs/nfs42xdr.c |  2 +-
 fs/nfs/nfs4proc.c | 13 +------------
 fs/nfs/read.c     | 10 ++++++++++
 5 files changed, 14 insertions(+), 13 deletions(-)

Comments

Krzysztof Kozlowski June 10, 2023, 10:13 a.m. UTC | #1
On 09/06/2023 22:00, Anna Schumaker wrote:
> From: Anna Schumaker <Anna.Schumaker@Netapp.com>
> 
> I found that the read code might send multiple requests using the same
> nfs_pgio_header, but nfs4_proc_read_setup() is only called once. This is
> how we ended up occasionally double-freeing the scratch buffer, but also
> means we set a NULL pointer but non-zero length to the xdr scratch
> buffer. This results in an oops the first time decoding needs to copy
> something to scratch, which frequently happens when decoding READ_PLUS
> hole segments.
> 
> I fix this by moving scratch handling into the pageio read code. I
> provide a function to allocate scratch space for decoding read replies,
> and free the scratch buffer when the nfs_pgio_header is freed.
> 
> Krzysztof Kozlowski hit a bug a while ago with similar symptoms,
> and I'm hopeful that this patch fixes his issue.

Unfortunately it does not help. Same NULL ptr, next-20230609 with this
patchset:


[   26.780433] Unable to handle kernel NULL pointer dereference at virtual address 00000004 when read

[   27.124547] mmiocpy from xdr_inline_decode (net/sunrpc/xdr.c:1424 net/sunrpc/xdr.c:1459) 
[   27.129643] xdr_inline_decode from nfs4_xdr_dec_read_plus (fs/nfs/nfs42xdr.c:1069 fs/nfs/nfs42xdr.c:1152 fs/nfs/nfs42xdr.c:1365 fs/nfs/nfs42xdr.c:1346) 
[   27.136147] nfs4_xdr_dec_read_plus from call_decode (net/sunrpc/clnt.c:2592) 
[   27.142124] call_decode from __rpc_execute (include/asm-generic/bitops/generic-non-atomic.h:128 net/sunrpc/sched.c:952) 
[   27.147232] __rpc_execute from rpc_async_schedule (include/linux/sched/mm.h:368 net/sunrpc/sched.c:1033) 
[   27.152864] rpc_async_schedule from process_one_work (include/linux/atomic/atomic-arch-fallback.h:444 include/linux/jump_label.h:260 include/linux/jump_label.h:270 include/trace/events/workqueue.h:108 kernel/workqueue.c:2599) 
[   27.158935] process_one_work from worker_thread (include/linux/list.h:292 kernel/workqueue.c:2746) 
[   27.164476] worker_thread from kthread (kernel/kthread.c:381) 
[   27.169329] kthread from ret_from_fork (arch/arm/kernel/entry-common.S:134)

Best regards,
Krzysztof
Anna Schumaker June 12, 2023, 7:57 p.m. UTC | #2
On Sat, Jun 10, 2023 at 6:13 AM Krzysztof Kozlowski
<krzysztof.kozlowski@linaro.org> wrote:
>
>
> On 09/06/2023 22:00, Anna Schumaker wrote:
> > From: Anna Schumaker <Anna.Schumaker@Netapp.com>
> >
> > I found that the read code might send multiple requests using the same
> > nfs_pgio_header, but nfs4_proc_read_setup() is only called once. This is
> > how we ended up occasionally double-freeing the scratch buffer, but also
> > means we set a NULL pointer but non-zero length to the xdr scratch
> > buffer. This results in an oops the first time decoding needs to copy
> > something to scratch, which frequently happens when decoding READ_PLUS
> > hole segments.
> >
> > I fix this by moving scratch handling into the pageio read code. I
> > provide a function to allocate scratch space for decoding read replies,
> > and free the scratch buffer when the nfs_pgio_header is freed.
> >
> > Krzysztof Kozlowski hit a bug a while ago with similar symptoms,
> > and I'm hopeful that this patch fixes his issue.
>
> Unfortunately it does not help. Same NULL ptr, next-20230609 with this
> patchset:

That's unfortunate. I was really hoping between patch #2 and #3 that
it would finally address the issue. I think you said your client is
ARM v7, that's 32-bit right? I'll try to do some 32-bit testing to see
if that uncovers anything on my end. In the meantime, I'll try to
update the debugging printk() patch based on what I learned while
working patch #3 last week. I'll try to get that to you in the next
day or two.

Anna

>
>
> [   26.780433] Unable to handle kernel NULL pointer dereference at virtual address 00000004 when read
>
> [   27.124547] mmiocpy from xdr_inline_decode (net/sunrpc/xdr.c:1424 net/sunrpc/xdr.c:1459)
> [   27.129643] xdr_inline_decode from nfs4_xdr_dec_read_plus (fs/nfs/nfs42xdr.c:1069 fs/nfs/nfs42xdr.c:1152 fs/nfs/nfs42xdr.c:1365 fs/nfs/nfs42xdr.c:1346)
> [   27.136147] nfs4_xdr_dec_read_plus from call_decode (net/sunrpc/clnt.c:2592)
> [   27.142124] call_decode from __rpc_execute (include/asm-generic/bitops/generic-non-atomic.h:128 net/sunrpc/sched.c:952)
> [   27.147232] __rpc_execute from rpc_async_schedule (include/linux/sched/mm.h:368 net/sunrpc/sched.c:1033)
> [   27.152864] rpc_async_schedule from process_one_work (include/linux/atomic/atomic-arch-fallback.h:444 include/linux/jump_label.h:260 include/linux/jump_label.h:270 include/trace/events/workqueue.h:108 kernel/workqueue.c:2599)
> [   27.158935] process_one_work from worker_thread (include/linux/list.h:292 kernel/workqueue.c:2746)
> [   27.164476] worker_thread from kthread (kernel/kthread.c:381)
> [   27.169329] kthread from ret_from_fork (arch/arm/kernel/entry-common.S:134)
>
> Best regards,
> Krzysztof
>
Krzysztof Kozlowski June 12, 2023, 8:04 p.m. UTC | #3
On 12/06/2023 21:57, Anna Schumaker wrote:

>>> I fix this by moving scratch handling into the pageio read code. I
>>> provide a function to allocate scratch space for decoding read replies,
>>> and free the scratch buffer when the nfs_pgio_header is freed.
>>>
>>> Krzysztof Kozlowski hit a bug a while ago with similar symptoms,
>>> and I'm hopeful that this patch fixes his issue.
>>
>> Unfortunately it does not help. Same NULL ptr, next-20230609 with this
>> patchset:
> 
> That's unfortunate. I was really hoping between patch #2 and #3 that
> it would finally address the issue. I think you said your client is
> ARM v7, that's 32-bit right? I'll try to do some 32-bit testing to see

Yes, it's 32-bit ARM.

> if that uncovers anything on my end. In the meantime, I'll try to
> update the debugging printk() patch based on what I learned while
> working patch #3 last week. I'll try to get that to you in the next
> day or two.
> 



Best regards,
Krzysztof
diff mbox series

Patch

diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 3cc027d3bd58..1607c23f68d4 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -489,6 +489,7 @@  extern const struct nfs_pgio_completion_ops nfs_async_read_completion_ops;
 extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
 			struct inode *inode, bool force_mds,
 			const struct nfs_pgio_completion_ops *compl_ops);
+extern bool nfs_read_alloc_scratch(struct nfs_pgio_header *hdr, size_t size);
 extern int nfs_read_add_folio(struct nfs_pageio_descriptor *pgio,
 			       struct nfs_open_context *ctx,
 			       struct folio *folio);
diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h
index 0fe5aacbcfdf..b59876b01a1e 100644
--- a/fs/nfs/nfs42.h
+++ b/fs/nfs/nfs42.h
@@ -13,6 +13,7 @@ 
  * more? Need to consider not to pre-alloc too much for a compound.
  */
 #define PNFS_LAYOUTSTATS_MAXDEV (4)
+#define READ_PLUS_SCRATCH_SIZE (16)
 
 /* nfs4.2proc.c */
 #ifdef CONFIG_NFS_V4_2
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index 75765382cc0e..20aa5e746497 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -1351,7 +1351,7 @@  static int nfs4_xdr_dec_read_plus(struct rpc_rqst *rqstp,
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_set_scratch_buffer(xdr, res->scratch, sizeof(res->scratch));
+	xdr_set_scratch_buffer(xdr, res->scratch, READ_PLUS_SCRATCH_SIZE);
 
 	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index d3665390c4cb..73dc8a793ae9 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -5437,18 +5437,8 @@  static bool nfs4_read_plus_not_supported(struct rpc_task *task,
 	return false;
 }
 
-static inline void nfs4_read_plus_scratch_free(struct nfs_pgio_header *hdr)
-{
-	if (hdr->res.scratch) {
-		kfree(hdr->res.scratch);
-		hdr->res.scratch = NULL;
-	}
-}
-
 static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
 {
-	nfs4_read_plus_scratch_free(hdr);
-
 	if (!nfs4_sequence_done(task, &hdr->res.seq_res))
 		return -EAGAIN;
 	if (nfs4_read_stateid_changed(task, &hdr->args))
@@ -5468,8 +5458,7 @@  static bool nfs42_read_plus_support(struct nfs_pgio_header *hdr,
 	/* Note: We don't use READ_PLUS with pNFS yet */
 	if (nfs_server_capable(hdr->inode, NFS_CAP_READ_PLUS) && !hdr->ds_clp) {
 		msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ_PLUS];
-		hdr->res.scratch = kmalloc(32, GFP_KERNEL);
-		return hdr->res.scratch != NULL;
+		return nfs_read_alloc_scratch(hdr, READ_PLUS_SCRATCH_SIZE);
 	}
 	return false;
 }
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index f71eeee67e20..7dc21a48e3e7 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -47,6 +47,8 @@  static struct nfs_pgio_header *nfs_readhdr_alloc(void)
 
 static void nfs_readhdr_free(struct nfs_pgio_header *rhdr)
 {
+	if (rhdr->res.scratch != NULL)
+		kfree(rhdr->res.scratch);
 	kmem_cache_free(nfs_rdata_cachep, rhdr);
 }
 
@@ -108,6 +110,14 @@  void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
 }
 EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds);
 
+bool nfs_read_alloc_scratch(struct nfs_pgio_header *hdr, size_t size)
+{
+	WARN_ON(hdr->res.scratch != NULL);
+	hdr->res.scratch = kmalloc(size, GFP_KERNEL);
+	return hdr->res.scratch != NULL;
+}
+EXPORT_SYMBOL_GPL(nfs_read_alloc_scratch);
+
 static void nfs_readpage_release(struct nfs_page *req, int error)
 {
 	struct folio *folio = nfs_page_to_folio(req);