diff mbox series

nfs: don't negate the op_status in nfs4_flexfiles_io_event tracepoints

Message ID 20250407-nfs-testing-v1-1-2e637737ce76@kernel.org (mailing list archive)
State New
Headers show
Series nfs: don't negate the op_status in nfs4_flexfiles_io_event tracepoints | expand

Commit Message

Jeff Layton April 7, 2025, 2:24 p.m. UTC
In particular, doing this makes NFS4ERR_NXIO look like -ENXIO when
the tracepoints fire.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/nfs/nfs4trace.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)


---
base-commit: e63b29244e588206f4d417ae27dc04bf1d58b982
change-id: 20250407-nfs-testing-61217f89ee25

Best regards,

Comments

Trond Myklebust April 7, 2025, 2:35 p.m. UTC | #1
On Mon, 2025-04-07 at 10:24 -0400, Jeff Layton wrote:
> In particular, doing this makes NFS4ERR_NXIO look like -ENXIO when
> the tracepoints fire.
> 
> Signed-off-by: Jeff Layton <jlayton@kernel.org>
> ---
>  fs/nfs/nfs4trace.h | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
> index
> bc67fe6801b138204641319ecaf1115aac76af62..eb7d625d45e83f025ef96952660
> dded85aa0ca89 100644
> --- a/fs/nfs/nfs4trace.h
> +++ b/fs/nfs/nfs4trace.h
> @@ -2089,7 +2089,7 @@ DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event,
>  		TP_printk(
>  			"error=%ld (%s) fileid=%02x:%02x:%llu
> fhandle=0x%08x "
>  			"offset=%llu count=%u stateid=%d:0x%08x
> dstaddr=%s",
> -			-__entry->error,
> +			__entry->error,
>  			show_nfs4_status(__entry->error),
>  			MAJOR(__entry->dev), MINOR(__entry->dev),
>  			(unsigned long long)__entry->fileid,
> 

Hmm... I've been getting tired of not having the RPC level errors in
the above, so I was actually thinking of changing those tracepoints to
something more like this.

8<--------------------------------------------------------------------
From ee1c801410b2649b2a1c71e0fb5fe1b16fd20e86 Mon Sep 17 00:00:00 2001
Message-ID: <ee1c801410b2649b2a1c71e0fb5fe1b16fd20e86.1744036227.git.trond.myklebust@hammerspace.com>
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Mon, 7 Apr 2025 14:36:41 +0200
Subject: [PATCH] pNFS/flexfiles: Record the RPC errors in the I/O tracepoints

When debugging I/O issues, we want to see not just the NFS level errors,
but also the RPC level problems, so record both in the tracepoints.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/flexfilelayout/flexfilelayout.c |  6 ++---
 fs/nfs/nfs4trace.h                     | 34 +++++++++++++++++---------
 2 files changed, 25 insertions(+), 15 deletions(-)

diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 61ad269c825f..e6909cafab68 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1329,7 +1329,7 @@ static int ff_layout_read_done_cb(struct rpc_task *task,
 					    hdr->args.offset, hdr->args.count,
 					    &hdr->res.op_status, OP_READ,
 					    task->tk_status);
-		trace_ff_layout_read_error(hdr);
+		trace_ff_layout_read_error(hdr, task->tk_status);
 	}
 
 	err = ff_layout_async_handle_error(task, hdr->args.context->state,
@@ -1502,7 +1502,7 @@ static int ff_layout_write_done_cb(struct rpc_task *task,
 					    hdr->args.offset, hdr->args.count,
 					    &hdr->res.op_status, OP_WRITE,
 					    task->tk_status);
-		trace_ff_layout_write_error(hdr);
+		trace_ff_layout_write_error(hdr, task->tk_status);
 	}
 
 	err = ff_layout_async_handle_error(task, hdr->args.context->state,
@@ -1551,7 +1551,7 @@ static int ff_layout_commit_done_cb(struct rpc_task *task,
 					    data->args.offset, data->args.count,
 					    &data->res.op_status, OP_COMMIT,
 					    task->tk_status);
-		trace_ff_layout_commit_error(data);
+		trace_ff_layout_commit_error(data, task->tk_status);
 	}
 
 	err = ff_layout_async_handle_error(task, NULL, data->ds_clp,
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index bc67fe6801b1..deab4c0e21a0 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -2051,13 +2051,15 @@ TRACE_EVENT(fl_getdevinfo,
 
 DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event,
 		TP_PROTO(
-			const struct nfs_pgio_header *hdr
+			const struct nfs_pgio_header *hdr,
+			int error
 		),
 
-		TP_ARGS(hdr),
+		TP_ARGS(hdr, error),
 
 		TP_STRUCT__entry(
 			__field(unsigned long, error)
+			__field(unsigned long, nfs_error)
 			__field(dev_t, dev)
 			__field(u32, fhandle)
 			__field(u64, fileid)
@@ -2073,7 +2075,8 @@ DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event,
 		TP_fast_assign(
 			const struct inode *inode = hdr->inode;
 
-			__entry->error = hdr->res.op_status;
+			__entry->error = -error;
+			__entry->nfs_error = hdr->res.op_status;
 			__entry->fhandle = nfs_fhandle_hash(hdr->args.fh);
 			__entry->fileid = NFS_FILEID(inode);
 			__entry->dev = inode->i_sb->s_dev;
@@ -2088,7 +2091,8 @@ DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event,
 
 		TP_printk(
 			"error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
-			"offset=%llu count=%u stateid=%d:0x%08x dstaddr=%s",
+			"offset=%llu count=%u stateid=%d:0x%08x dstaddr=%s "
+			"nfs_error=%lu (%s)",
 			-__entry->error,
 			show_nfs4_status(__entry->error),
 			MAJOR(__entry->dev), MINOR(__entry->dev),
@@ -2096,28 +2100,32 @@ DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event,
 			__entry->fhandle,
 			__entry->offset, __entry->count,
 			__entry->stateid_seq, __entry->stateid_hash,
-			__get_str(dstaddr)
+			__get_str(dstaddr), __entry->nfs_error,
+			show_nfs4_status(__entry->nfs_error)
 		)
 );
 
 #define DEFINE_NFS4_FLEXFILES_IO_EVENT(name) \
 	DEFINE_EVENT(nfs4_flexfiles_io_event, name, \
 			TP_PROTO( \
-				const struct nfs_pgio_header *hdr \
+				const struct nfs_pgio_header *hdr, \
+				int error \
 			), \
-			TP_ARGS(hdr))
+			TP_ARGS(hdr, error))
 DEFINE_NFS4_FLEXFILES_IO_EVENT(ff_layout_read_error);
 DEFINE_NFS4_FLEXFILES_IO_EVENT(ff_layout_write_error);
 
 TRACE_EVENT(ff_layout_commit_error,
 		TP_PROTO(
-			const struct nfs_commit_data *data
+			const struct nfs_commit_data *data,
+			int error
 		),
 
-		TP_ARGS(data),
+		TP_ARGS(data, error),
 
 		TP_STRUCT__entry(
 			__field(unsigned long, error)
+			__field(unsigned long, nfs_error)
 			__field(dev_t, dev)
 			__field(u32, fhandle)
 			__field(u64, fileid)
@@ -2131,7 +2139,8 @@ TRACE_EVENT(ff_layout_commit_error,
 		TP_fast_assign(
 			const struct inode *inode = data->inode;
 
-			__entry->error = data->res.op_status;
+			__entry->error = -error;
+			__entry->nfs_error = data->res.op_status;
 			__entry->fhandle = nfs_fhandle_hash(data->args.fh);
 			__entry->fileid = NFS_FILEID(inode);
 			__entry->dev = inode->i_sb->s_dev;
@@ -2142,14 +2151,15 @@ TRACE_EVENT(ff_layout_commit_error,
 
 		TP_printk(
 			"error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
-			"offset=%llu count=%u dstaddr=%s",
+			"offset=%llu count=%u dstaddr=%s nfs_error=%lu (%s)",
 			-__entry->error,
 			show_nfs4_status(__entry->error),
 			MAJOR(__entry->dev), MINOR(__entry->dev),
 			(unsigned long long)__entry->fileid,
 			__entry->fhandle,
 			__entry->offset, __entry->count,
-			__get_str(dstaddr)
+			__get_str(dstaddr), __entry->nfs_error,
+			show_nfs4_status(__entry->nfs_error)
 		)
 );
Jeff Layton April 7, 2025, 2:40 p.m. UTC | #2
On Mon, 2025-04-07 at 10:24 -0400, Jeff Layton wrote:
> In particular, doing this makes NFS4ERR_NXIO look like -ENXIO when
> the tracepoints fire.
> 
> Signed-off-by: Jeff Layton <jlayton@kernel.org>
> ---
>  fs/nfs/nfs4trace.h | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
> index bc67fe6801b138204641319ecaf1115aac76af62..eb7d625d45e83f025ef96952660dded85aa0ca89 100644
> --- a/fs/nfs/nfs4trace.h
> +++ b/fs/nfs/nfs4trace.h
> @@ -2089,7 +2089,7 @@ DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event,
>  		TP_printk(
>  			"error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
>  			"offset=%llu count=%u stateid=%d:0x%08x dstaddr=%s",
> -			-__entry->error,
> +			__entry->error,

It looks like there is a related problem here too. In particular,
show_nfs4_status() calls __print_symbolic, but some of the entries in
the array are duplicates.

For instance, ENXIO == 6, but NFS4ERR_NXIO is also 6. The second entry
in that array will never be matched, because ENXIO will always be found
first.

If we're going to mix errnos and nfs error codes, maybe we should
change it to treat negative values as errnos and positive ones as NFS
errors?

>  			show_nfs4_status(__entry->error),
>  			MAJOR(__entry->dev), MINOR(__entry->dev),
>  			(unsigned long long)__entry->fileid,
> 
> ---
> base-commit: e63b29244e588206f4d417ae27dc04bf1d58b982
> change-id: 20250407-nfs-testing-61217f89ee25
> 
> Best regards,
diff mbox series

Patch

diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index bc67fe6801b138204641319ecaf1115aac76af62..eb7d625d45e83f025ef96952660dded85aa0ca89 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -2089,7 +2089,7 @@  DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event,
 		TP_printk(
 			"error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
 			"offset=%llu count=%u stateid=%d:0x%08x dstaddr=%s",
-			-__entry->error,
+			__entry->error,
 			show_nfs4_status(__entry->error),
 			MAJOR(__entry->dev), MINOR(__entry->dev),
 			(unsigned long long)__entry->fileid,