Message ID | 20190523201351.12232-3-dwysocha@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [1/3] SUNRPC: Move call to rpc_count_iostats before rpc_call_done | expand |
On Thu, May 23, 2019 at 04:13:50PM -0400, Dave Wysochanski wrote: > We often see various error conditions with NFS4.x that show up with > a very high operation count all completing with tk_status < 0 in a > short period of time. Add a count to rpc_iostats to record on a > per-op basis the ops that complete in this manner, which will > enable lower overhead diagnostics. Looks like a good idea to me. It's too bad we can't distinguish the errors. (E.g. ESTALE on a lookup call is a lot more interesting than ENOENT.) But understood that maintaining (number of possible errors) * (number of possible ops) counters is probably overkill, so just counting the number of errors seems like a good start. --b. > > Signed-off-by: Dave Wysochanski <dwysocha@redhat.com> > --- > include/linux/sunrpc/metrics.h | 7 ++++++- > net/sunrpc/stats.c | 8 ++++++-- > 2 files changed, 12 insertions(+), 3 deletions(-) > > diff --git a/include/linux/sunrpc/metrics.h b/include/linux/sunrpc/metrics.h > index 1b3751327575..0ee3f7052846 100644 > --- a/include/linux/sunrpc/metrics.h > +++ b/include/linux/sunrpc/metrics.h > @@ -30,7 +30,7 @@ > #include <linux/ktime.h> > #include <linux/spinlock.h> > > -#define RPC_IOSTATS_VERS "1.0" > +#define RPC_IOSTATS_VERS "1.1" > > struct rpc_iostats { > spinlock_t om_lock; > @@ -66,6 +66,11 @@ struct rpc_iostats { > ktime_t om_queue, /* queued for xmit */ > om_rtt, /* RPC RTT */ > om_execute; /* RPC execution */ > + /* > + * The count of operations that complete with tk_status < 0. > + * These statuses usually indicate error conditions. > + */ > + unsigned long om_error_status; > } ____cacheline_aligned; > > struct rpc_task; > diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c > index 8b2d3c58ffae..737414247ca7 100644 > --- a/net/sunrpc/stats.c > +++ b/net/sunrpc/stats.c > @@ -176,6 +176,8 @@ void rpc_count_iostats_metrics(const struct rpc_task *task, > > execute = ktime_sub(now, task->tk_start); > op_metrics->om_execute = ktime_add(op_metrics->om_execute, execute); > + if (task->tk_status < 0) > + op_metrics->om_error_status++; > > spin_unlock(&op_metrics->om_lock); > > @@ -218,13 +220,14 @@ static void _add_rpc_iostats(struct rpc_iostats *a, struct rpc_iostats *b) > a->om_queue = ktime_add(a->om_queue, b->om_queue); > a->om_rtt = ktime_add(a->om_rtt, b->om_rtt); > a->om_execute = ktime_add(a->om_execute, b->om_execute); > + a->om_error_status += b->om_error_status; > } > > static void _print_rpc_iostats(struct seq_file *seq, struct rpc_iostats *stats, > int op, const struct rpc_procinfo *procs) > { > _print_name(seq, op, procs); > - seq_printf(seq, "%lu %lu %lu %llu %llu %llu %llu %llu\n", > + seq_printf(seq, "%lu %lu %lu %llu %llu %llu %llu %llu %lu\n", > stats->om_ops, > stats->om_ntrans, > stats->om_timeouts, > @@ -232,7 +235,8 @@ static void _print_rpc_iostats(struct seq_file *seq, struct rpc_iostats *stats, > stats->om_bytes_recv, > ktime_to_ms(stats->om_queue), > ktime_to_ms(stats->om_rtt), > - ktime_to_ms(stats->om_execute)); > + ktime_to_ms(stats->om_execute), > + stats->om_error_status); > } > > void rpc_clnt_show_stats(struct seq_file *seq, struct rpc_clnt *clnt) > -- > 2.20.1
> On May 30, 2019, at 5:38 PM, bfields@fieldses.org wrote: > > On Thu, May 23, 2019 at 04:13:50PM -0400, Dave Wysochanski wrote: >> We often see various error conditions with NFS4.x that show up with >> a very high operation count all completing with tk_status < 0 in a >> short period of time. Add a count to rpc_iostats to record on a >> per-op basis the ops that complete in this manner, which will >> enable lower overhead diagnostics. > > Looks like a good idea to me. > > It's too bad we can't distinguish the errors. (E.g. ESTALE on a lookup > call is a lot more interesting than ENOENT.) But understood that > maintaining (number of possible errors) * (number of possible ops) > counters is probably overkill, so just counting the number of errors > seems like a good start. We now have trace points that can do that too. > --b. > >> >> Signed-off-by: Dave Wysochanski <dwysocha@redhat.com> >> --- >> include/linux/sunrpc/metrics.h | 7 ++++++- >> net/sunrpc/stats.c | 8 ++++++-- >> 2 files changed, 12 insertions(+), 3 deletions(-) >> >> diff --git a/include/linux/sunrpc/metrics.h b/include/linux/sunrpc/metrics.h >> index 1b3751327575..0ee3f7052846 100644 >> --- a/include/linux/sunrpc/metrics.h >> +++ b/include/linux/sunrpc/metrics.h >> @@ -30,7 +30,7 @@ >> #include <linux/ktime.h> >> #include <linux/spinlock.h> >> >> -#define RPC_IOSTATS_VERS "1.0" >> +#define RPC_IOSTATS_VERS "1.1" >> >> struct rpc_iostats { >> spinlock_t om_lock; >> @@ -66,6 +66,11 @@ struct rpc_iostats { >> ktime_t om_queue, /* queued for xmit */ >> om_rtt, /* RPC RTT */ >> om_execute; /* RPC execution */ >> + /* >> + * The count of operations that complete with tk_status < 0. >> + * These statuses usually indicate error conditions. >> + */ >> + unsigned long om_error_status; >> } ____cacheline_aligned; >> >> struct rpc_task; >> diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c >> index 8b2d3c58ffae..737414247ca7 100644 >> --- a/net/sunrpc/stats.c >> +++ b/net/sunrpc/stats.c >> @@ -176,6 +176,8 @@ void rpc_count_iostats_metrics(const struct rpc_task *task, >> >> execute = ktime_sub(now, task->tk_start); >> op_metrics->om_execute = ktime_add(op_metrics->om_execute, execute); >> + if (task->tk_status < 0) >> + op_metrics->om_error_status++; >> >> spin_unlock(&op_metrics->om_lock); >> >> @@ -218,13 +220,14 @@ static void _add_rpc_iostats(struct rpc_iostats *a, struct rpc_iostats *b) >> a->om_queue = ktime_add(a->om_queue, b->om_queue); >> a->om_rtt = ktime_add(a->om_rtt, b->om_rtt); >> a->om_execute = ktime_add(a->om_execute, b->om_execute); >> + a->om_error_status += b->om_error_status; >> } >> >> static void _print_rpc_iostats(struct seq_file *seq, struct rpc_iostats *stats, >> int op, const struct rpc_procinfo *procs) >> { >> _print_name(seq, op, procs); >> - seq_printf(seq, "%lu %lu %lu %llu %llu %llu %llu %llu\n", >> + seq_printf(seq, "%lu %lu %lu %llu %llu %llu %llu %llu %lu\n", >> stats->om_ops, >> stats->om_ntrans, >> stats->om_timeouts, >> @@ -232,7 +235,8 @@ static void _print_rpc_iostats(struct seq_file *seq, struct rpc_iostats *stats, >> stats->om_bytes_recv, >> ktime_to_ms(stats->om_queue), >> ktime_to_ms(stats->om_rtt), >> - ktime_to_ms(stats->om_execute)); >> + ktime_to_ms(stats->om_execute), >> + stats->om_error_status); >> } >> >> void rpc_clnt_show_stats(struct seq_file *seq, struct rpc_clnt *clnt) >> -- >> 2.20.1 -- Chuck Lever
On Thu, May 30, 2019 at 06:19:54PM -0400, Chuck Lever wrote: > > > > On May 30, 2019, at 5:38 PM, bfields@fieldses.org wrote: > > > > On Thu, May 23, 2019 at 04:13:50PM -0400, Dave Wysochanski wrote: > >> We often see various error conditions with NFS4.x that show up with > >> a very high operation count all completing with tk_status < 0 in a > >> short period of time. Add a count to rpc_iostats to record on a > >> per-op basis the ops that complete in this manner, which will > >> enable lower overhead diagnostics. > > > > Looks like a good idea to me. > > > > It's too bad we can't distinguish the errors. (E.g. ESTALE on a lookup > > call is a lot more interesting than ENOENT.) But understood that > > maintaining (number of possible errors) * (number of possible ops) > > counters is probably overkill, so just counting the number of errors > > seems like a good start. > > We now have trace points that can do that too. You mean, that can report every error (and its value)? I assume having these statistics in mountstats is still useful, though. --b. > > > > --b. > > > >> > >> Signed-off-by: Dave Wysochanski <dwysocha@redhat.com> > >> --- > >> include/linux/sunrpc/metrics.h | 7 ++++++- > >> net/sunrpc/stats.c | 8 ++++++-- > >> 2 files changed, 12 insertions(+), 3 deletions(-) > >> > >> diff --git a/include/linux/sunrpc/metrics.h b/include/linux/sunrpc/metrics.h > >> index 1b3751327575..0ee3f7052846 100644 > >> --- a/include/linux/sunrpc/metrics.h > >> +++ b/include/linux/sunrpc/metrics.h > >> @@ -30,7 +30,7 @@ > >> #include <linux/ktime.h> > >> #include <linux/spinlock.h> > >> > >> -#define RPC_IOSTATS_VERS "1.0" > >> +#define RPC_IOSTATS_VERS "1.1" > >> > >> struct rpc_iostats { > >> spinlock_t om_lock; > >> @@ -66,6 +66,11 @@ struct rpc_iostats { > >> ktime_t om_queue, /* queued for xmit */ > >> om_rtt, /* RPC RTT */ > >> om_execute; /* RPC execution */ > >> + /* > >> + * The count of operations that complete with tk_status < 0. > >> + * These statuses usually indicate error conditions. > >> + */ > >> + unsigned long om_error_status; > >> } ____cacheline_aligned; > >> > >> struct rpc_task; > >> diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c > >> index 8b2d3c58ffae..737414247ca7 100644 > >> --- a/net/sunrpc/stats.c > >> +++ b/net/sunrpc/stats.c > >> @@ -176,6 +176,8 @@ void rpc_count_iostats_metrics(const struct rpc_task *task, > >> > >> execute = ktime_sub(now, task->tk_start); > >> op_metrics->om_execute = ktime_add(op_metrics->om_execute, execute); > >> + if (task->tk_status < 0) > >> + op_metrics->om_error_status++; > >> > >> spin_unlock(&op_metrics->om_lock); > >> > >> @@ -218,13 +220,14 @@ static void _add_rpc_iostats(struct rpc_iostats *a, struct rpc_iostats *b) > >> a->om_queue = ktime_add(a->om_queue, b->om_queue); > >> a->om_rtt = ktime_add(a->om_rtt, b->om_rtt); > >> a->om_execute = ktime_add(a->om_execute, b->om_execute); > >> + a->om_error_status += b->om_error_status; > >> } > >> > >> static void _print_rpc_iostats(struct seq_file *seq, struct rpc_iostats *stats, > >> int op, const struct rpc_procinfo *procs) > >> { > >> _print_name(seq, op, procs); > >> - seq_printf(seq, "%lu %lu %lu %llu %llu %llu %llu %llu\n", > >> + seq_printf(seq, "%lu %lu %lu %llu %llu %llu %llu %llu %lu\n", > >> stats->om_ops, > >> stats->om_ntrans, > >> stats->om_timeouts, > >> @@ -232,7 +235,8 @@ static void _print_rpc_iostats(struct seq_file *seq, struct rpc_iostats *stats, > >> stats->om_bytes_recv, > >> ktime_to_ms(stats->om_queue), > >> ktime_to_ms(stats->om_rtt), > >> - ktime_to_ms(stats->om_execute)); > >> + ktime_to_ms(stats->om_execute), > >> + stats->om_error_status); > >> } > >> > >> void rpc_clnt_show_stats(struct seq_file *seq, struct rpc_clnt *clnt) > >> -- > >> 2.20.1 > > -- > Chuck Lever > >
On Thu, May 30, 2019 at 5:39 PM J. Bruce Fields <bfields@fieldses.org> wrote: > > On Thu, May 23, 2019 at 04:13:50PM -0400, Dave Wysochanski wrote: > > We often see various error conditions with NFS4.x that show up with > > a very high operation count all completing with tk_status < 0 in a > > short period of time. Add a count to rpc_iostats to record on a > > per-op basis the ops that complete in this manner, which will > > enable lower overhead diagnostics. > > Looks like a good idea to me. > > It's too bad we can't distinguish the errors. (E.g. ESTALE on a lookup > call is a lot more interesting than ENOENT.) But understood that > maintaining (number of possible errors) * (number of possible ops) > counters is probably overkill, so just counting the number of errors > seems like a good start. > > --b. > I did consider a more elaborate approach, where each code would be counted. Most likely that would need to go into debugfs. I didn't pursue that but may look into it in the future along with other work. I do think the < 0 status counts have value even if we do not know the specific code, they are very low overhead, and always there if added to mountstats. I was envisioning using this along with something like a periodic capture of mountstats possibly using PCP and then using some statistics as a "NFS health" measure for a specific mount point. There are already some "NFS health" metrics that may be calculated with existing mountstats (for example if you're clever you can spot certain NFS4 protocol loops), but the error counts would make it much easier and reliable. > > > > Signed-off-by: Dave Wysochanski <dwysocha@redhat.com> > > --- > > include/linux/sunrpc/metrics.h | 7 ++++++- > > net/sunrpc/stats.c | 8 ++++++-- > > 2 files changed, 12 insertions(+), 3 deletions(-) > > > > diff --git a/include/linux/sunrpc/metrics.h b/include/linux/sunrpc/metrics.h > > index 1b3751327575..0ee3f7052846 100644 > > --- a/include/linux/sunrpc/metrics.h > > +++ b/include/linux/sunrpc/metrics.h > > @@ -30,7 +30,7 @@ > > #include <linux/ktime.h> > > #include <linux/spinlock.h> > > > > -#define RPC_IOSTATS_VERS "1.0" > > +#define RPC_IOSTATS_VERS "1.1" > > > > struct rpc_iostats { > > spinlock_t om_lock; > > @@ -66,6 +66,11 @@ struct rpc_iostats { > > ktime_t om_queue, /* queued for xmit */ > > om_rtt, /* RPC RTT */ > > om_execute; /* RPC execution */ > > + /* > > + * The count of operations that complete with tk_status < 0. > > + * These statuses usually indicate error conditions. > > + */ > > + unsigned long om_error_status; > > } ____cacheline_aligned; > > > > struct rpc_task; > > diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c > > index 8b2d3c58ffae..737414247ca7 100644 > > --- a/net/sunrpc/stats.c > > +++ b/net/sunrpc/stats.c > > @@ -176,6 +176,8 @@ void rpc_count_iostats_metrics(const struct rpc_task *task, > > > > execute = ktime_sub(now, task->tk_start); > > op_metrics->om_execute = ktime_add(op_metrics->om_execute, execute); > > + if (task->tk_status < 0) > > + op_metrics->om_error_status++; > > > > spin_unlock(&op_metrics->om_lock); > > > > @@ -218,13 +220,14 @@ static void _add_rpc_iostats(struct rpc_iostats *a, struct rpc_iostats *b) > > a->om_queue = ktime_add(a->om_queue, b->om_queue); > > a->om_rtt = ktime_add(a->om_rtt, b->om_rtt); > > a->om_execute = ktime_add(a->om_execute, b->om_execute); > > + a->om_error_status += b->om_error_status; > > } > > > > static void _print_rpc_iostats(struct seq_file *seq, struct rpc_iostats *stats, > > int op, const struct rpc_procinfo *procs) > > { > > _print_name(seq, op, procs); > > - seq_printf(seq, "%lu %lu %lu %llu %llu %llu %llu %llu\n", > > + seq_printf(seq, "%lu %lu %lu %llu %llu %llu %llu %llu %lu\n", > > stats->om_ops, > > stats->om_ntrans, > > stats->om_timeouts, > > @@ -232,7 +235,8 @@ static void _print_rpc_iostats(struct seq_file *seq, struct rpc_iostats *stats, > > stats->om_bytes_recv, > > ktime_to_ms(stats->om_queue), > > ktime_to_ms(stats->om_rtt), > > - ktime_to_ms(stats->om_execute)); > > + ktime_to_ms(stats->om_execute), > > + stats->om_error_status); > > } > > > > void rpc_clnt_show_stats(struct seq_file *seq, struct rpc_clnt *clnt) > > -- > > 2.20.1
> On May 30, 2019, at 6:33 PM, Bruce Fields <bfields@fieldses.org> wrote: > > On Thu, May 30, 2019 at 06:19:54PM -0400, Chuck Lever wrote: >> >> >>> On May 30, 2019, at 5:38 PM, bfields@fieldses.org wrote: >>> >>> On Thu, May 23, 2019 at 04:13:50PM -0400, Dave Wysochanski wrote: >>>> We often see various error conditions with NFS4.x that show up with >>>> a very high operation count all completing with tk_status < 0 in a >>>> short period of time. Add a count to rpc_iostats to record on a >>>> per-op basis the ops that complete in this manner, which will >>>> enable lower overhead diagnostics. >>> >>> Looks like a good idea to me. >>> >>> It's too bad we can't distinguish the errors. (E.g. ESTALE on a lookup >>> call is a lot more interesting than ENOENT.) But understood that >>> maintaining (number of possible errors) * (number of possible ops) >>> counters is probably overkill, so just counting the number of errors >>> seems like a good start. >> >> We now have trace points that can do that too. > > You mean, that can report every error (and its value)? Yes, the nfs_xdr_status trace point reports the error by value and symbolic name. > I assume having these statistics in mountstats is still useful, though. > > --b. > >> >> >>> --b. >>> >>>> >>>> Signed-off-by: Dave Wysochanski <dwysocha@redhat.com> >>>> --- >>>> include/linux/sunrpc/metrics.h | 7 ++++++- >>>> net/sunrpc/stats.c | 8 ++++++-- >>>> 2 files changed, 12 insertions(+), 3 deletions(-) >>>> >>>> diff --git a/include/linux/sunrpc/metrics.h b/include/linux/sunrpc/metrics.h >>>> index 1b3751327575..0ee3f7052846 100644 >>>> --- a/include/linux/sunrpc/metrics.h >>>> +++ b/include/linux/sunrpc/metrics.h >>>> @@ -30,7 +30,7 @@ >>>> #include <linux/ktime.h> >>>> #include <linux/spinlock.h> >>>> >>>> -#define RPC_IOSTATS_VERS "1.0" >>>> +#define RPC_IOSTATS_VERS "1.1" >>>> >>>> struct rpc_iostats { >>>> spinlock_t om_lock; >>>> @@ -66,6 +66,11 @@ struct rpc_iostats { >>>> ktime_t om_queue, /* queued for xmit */ >>>> om_rtt, /* RPC RTT */ >>>> om_execute; /* RPC execution */ >>>> + /* >>>> + * The count of operations that complete with tk_status < 0. >>>> + * These statuses usually indicate error conditions. >>>> + */ >>>> + unsigned long om_error_status; >>>> } ____cacheline_aligned; >>>> >>>> struct rpc_task; >>>> diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c >>>> index 8b2d3c58ffae..737414247ca7 100644 >>>> --- a/net/sunrpc/stats.c >>>> +++ b/net/sunrpc/stats.c >>>> @@ -176,6 +176,8 @@ void rpc_count_iostats_metrics(const struct rpc_task *task, >>>> >>>> execute = ktime_sub(now, task->tk_start); >>>> op_metrics->om_execute = ktime_add(op_metrics->om_execute, execute); >>>> + if (task->tk_status < 0) >>>> + op_metrics->om_error_status++; >>>> >>>> spin_unlock(&op_metrics->om_lock); >>>> >>>> @@ -218,13 +220,14 @@ static void _add_rpc_iostats(struct rpc_iostats *a, struct rpc_iostats *b) >>>> a->om_queue = ktime_add(a->om_queue, b->om_queue); >>>> a->om_rtt = ktime_add(a->om_rtt, b->om_rtt); >>>> a->om_execute = ktime_add(a->om_execute, b->om_execute); >>>> + a->om_error_status += b->om_error_status; >>>> } >>>> >>>> static void _print_rpc_iostats(struct seq_file *seq, struct rpc_iostats *stats, >>>> int op, const struct rpc_procinfo *procs) >>>> { >>>> _print_name(seq, op, procs); >>>> - seq_printf(seq, "%lu %lu %lu %llu %llu %llu %llu %llu\n", >>>> + seq_printf(seq, "%lu %lu %lu %llu %llu %llu %llu %llu %lu\n", >>>> stats->om_ops, >>>> stats->om_ntrans, >>>> stats->om_timeouts, >>>> @@ -232,7 +235,8 @@ static void _print_rpc_iostats(struct seq_file *seq, struct rpc_iostats *stats, >>>> stats->om_bytes_recv, >>>> ktime_to_ms(stats->om_queue), >>>> ktime_to_ms(stats->om_rtt), >>>> - ktime_to_ms(stats->om_execute)); >>>> + ktime_to_ms(stats->om_execute), >>>> + stats->om_error_status); >>>> } >>>> >>>> void rpc_clnt_show_stats(struct seq_file *seq, struct rpc_clnt *clnt) >>>> -- >>>> 2.20.1 >> >> -- >> Chuck Lever -- Chuck Lever
On Fri, 2019-05-31 at 09:25 -0400, Chuck Lever wrote: > > On May 30, 2019, at 6:33 PM, Bruce Fields <bfields@fieldses.org> > > wrote: > > > > On Thu, May 30, 2019 at 06:19:54PM -0400, Chuck Lever wrote: > > > > > > > > > > On May 30, 2019, at 5:38 PM, bfields@fieldses.org wrote: > > > > > > > > On Thu, May 23, 2019 at 04:13:50PM -0400, Dave Wysochanski > > > > wrote: > > > > > We often see various error conditions with NFS4.x that show > > > > > up with > > > > > a very high operation count all completing with tk_status < 0 > > > > > in a > > > > > short period of time. Add a count to rpc_iostats to record > > > > > on a > > > > > per-op basis the ops that complete in this manner, which will > > > > > enable lower overhead diagnostics. > > > > > > > > Looks like a good idea to me. > > > > > > > > It's too bad we can't distinguish the errors. (E.g. ESTALE on > > > > a lookup > > > > call is a lot more interesting than ENOENT.) But understood > > > > that > > > > maintaining (number of possible errors) * (number of possible > > > > ops) > > > > counters is probably overkill, so just counting the number of > > > > errors > > > > seems like a good start. > > > > > > We now have trace points that can do that too. > > > > You mean, that can report every error (and its value)? > > Yes, the nfs_xdr_status trace point reports the error by value and > symbolic name. > The tracepoint is very useful I agree. I don't think it will show: a) the mount b) the opcode Or am I mistaken and there's a way to get those with a filter or another tracepoint? > > > I assume having these statistics in mountstats is still useful, > > though. > > > > --b. > > > > > > > > > > > > --b. > > > > > > > > > > > > > > Signed-off-by: Dave Wysochanski <dwysocha@redhat.com> > > > > > --- > > > > > include/linux/sunrpc/metrics.h | 7 ++++++- > > > > > net/sunrpc/stats.c | 8 ++++++-- > > > > > 2 files changed, 12 insertions(+), 3 deletions(-) > > > > > > > > > > diff --git a/include/linux/sunrpc/metrics.h > > > > > b/include/linux/sunrpc/metrics.h > > > > > index 1b3751327575..0ee3f7052846 100644 > > > > > --- a/include/linux/sunrpc/metrics.h > > > > > +++ b/include/linux/sunrpc/metrics.h > > > > > @@ -30,7 +30,7 @@ > > > > > #include <linux/ktime.h> > > > > > #include <linux/spinlock.h> > > > > > > > > > > -#define RPC_IOSTATS_VERS "1.0" > > > > > +#define RPC_IOSTATS_VERS "1.1" > > > > > > > > > > struct rpc_iostats { > > > > > spinlock_t om_lock; > > > > > @@ -66,6 +66,11 @@ struct rpc_iostats { > > > > > ktime_t om_queue, /* queued for > > > > > xmit */ > > > > > om_rtt, /* RPC RTT */ > > > > > om_execute; /* RPC > > > > > execution */ > > > > > + /* > > > > > + * The count of operations that complete with tk_status > > > > > < 0. > > > > > + * These statuses usually indicate error conditions. > > > > > + */ > > > > > + unsigned long om_error_status; > > > > > } ____cacheline_aligned; > > > > > > > > > > struct rpc_task; > > > > > diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c > > > > > index 8b2d3c58ffae..737414247ca7 100644 > > > > > --- a/net/sunrpc/stats.c > > > > > +++ b/net/sunrpc/stats.c > > > > > @@ -176,6 +176,8 @@ void rpc_count_iostats_metrics(const > > > > > struct rpc_task *task, > > > > > > > > > > execute = ktime_sub(now, task->tk_start); > > > > > op_metrics->om_execute = ktime_add(op_metrics- > > > > > >om_execute, execute); > > > > > + if (task->tk_status < 0) > > > > > + op_metrics->om_error_status++; > > > > > > > > > > spin_unlock(&op_metrics->om_lock); > > > > > > > > > > @@ -218,13 +220,14 @@ static void _add_rpc_iostats(struct > > > > > rpc_iostats *a, struct rpc_iostats *b) > > > > > a->om_queue = ktime_add(a->om_queue, b->om_queue); > > > > > a->om_rtt = ktime_add(a->om_rtt, b->om_rtt); > > > > > a->om_execute = ktime_add(a->om_execute, b- > > > > > >om_execute); > > > > > + a->om_error_status += b->om_error_status; > > > > > } > > > > > > > > > > static void _print_rpc_iostats(struct seq_file *seq, struct > > > > > rpc_iostats *stats, > > > > > int op, const struct > > > > > rpc_procinfo *procs) > > > > > { > > > > > _print_name(seq, op, procs); > > > > > - seq_printf(seq, "%lu %lu %lu %llu %llu %llu %llu > > > > > %llu\n", > > > > > + seq_printf(seq, "%lu %lu %lu %llu %llu %llu %llu %llu > > > > > %lu\n", > > > > > stats->om_ops, > > > > > stats->om_ntrans, > > > > > stats->om_timeouts, > > > > > @@ -232,7 +235,8 @@ static void _print_rpc_iostats(struct > > > > > seq_file *seq, struct rpc_iostats *stats, > > > > > stats->om_bytes_recv, > > > > > ktime_to_ms(stats->om_queue), > > > > > ktime_to_ms(stats->om_rtt), > > > > > - ktime_to_ms(stats->om_execute)); > > > > > + ktime_to_ms(stats->om_execute), > > > > > + stats->om_error_status); > > > > > } > > > > > > > > > > void rpc_clnt_show_stats(struct seq_file *seq, struct > > > > > rpc_clnt *clnt) > > > > > -- > > > > > 2.20.1 > > > > > > -- > > > Chuck Lever > > -- > Chuck Lever > > >
> On Jun 3, 2019, at 2:53 PM, Dave Wysochanski <dwysocha@redhat.com> wrote: > > On Fri, 2019-05-31 at 09:25 -0400, Chuck Lever wrote: >>> On May 30, 2019, at 6:33 PM, Bruce Fields <bfields@fieldses.org> >>> wrote: >>> >>> On Thu, May 30, 2019 at 06:19:54PM -0400, Chuck Lever wrote: >>>> >>>> >>>>> On May 30, 2019, at 5:38 PM, bfields@fieldses.org wrote: >>>>> >>>>> On Thu, May 23, 2019 at 04:13:50PM -0400, Dave Wysochanski >>>>> wrote: >>>>>> We often see various error conditions with NFS4.x that show >>>>>> up with >>>>>> a very high operation count all completing with tk_status < 0 >>>>>> in a >>>>>> short period of time. Add a count to rpc_iostats to record >>>>>> on a >>>>>> per-op basis the ops that complete in this manner, which will >>>>>> enable lower overhead diagnostics. >>>>> >>>>> Looks like a good idea to me. >>>>> >>>>> It's too bad we can't distinguish the errors. (E.g. ESTALE on >>>>> a lookup >>>>> call is a lot more interesting than ENOENT.) But understood >>>>> that >>>>> maintaining (number of possible errors) * (number of possible >>>>> ops) >>>>> counters is probably overkill, so just counting the number of >>>>> errors >>>>> seems like a good start. >>>> >>>> We now have trace points that can do that too. >>> >>> You mean, that can report every error (and its value)? >> >> Yes, the nfs_xdr_status trace point reports the error by value and >> symbolic name. >> > > The tracepoint is very useful I agree. I don't think it will show: > a) the mount > b) the opcode > > Or am I mistaken and there's a way to get those with a filter or > another tracepoint? The opcode can be exposed by another trace point, but the link between the two trace points is tenuous and could be improved. I don't believe any of the NFS trace points expose the mount. My testing is largely on a single mount so my imagination stopped there. >>> I assume having these statistics in mountstats is still useful, >>> though. >>> >>> --b. >>> >>>> >>>> >>>>> --b. >>>>> >>>>>> >>>>>> Signed-off-by: Dave Wysochanski <dwysocha@redhat.com> >>>>>> --- >>>>>> include/linux/sunrpc/metrics.h | 7 ++++++- >>>>>> net/sunrpc/stats.c | 8 ++++++-- >>>>>> 2 files changed, 12 insertions(+), 3 deletions(-) >>>>>> >>>>>> diff --git a/include/linux/sunrpc/metrics.h >>>>>> b/include/linux/sunrpc/metrics.h >>>>>> index 1b3751327575..0ee3f7052846 100644 >>>>>> --- a/include/linux/sunrpc/metrics.h >>>>>> +++ b/include/linux/sunrpc/metrics.h >>>>>> @@ -30,7 +30,7 @@ >>>>>> #include <linux/ktime.h> >>>>>> #include <linux/spinlock.h> >>>>>> >>>>>> -#define RPC_IOSTATS_VERS "1.0" >>>>>> +#define RPC_IOSTATS_VERS "1.1" >>>>>> >>>>>> struct rpc_iostats { >>>>>> spinlock_t om_lock; >>>>>> @@ -66,6 +66,11 @@ struct rpc_iostats { >>>>>> ktime_t om_queue, /* queued for >>>>>> xmit */ >>>>>> om_rtt, /* RPC RTT */ >>>>>> om_execute; /* RPC >>>>>> execution */ >>>>>> + /* >>>>>> + * The count of operations that complete with tk_status >>>>>> < 0. >>>>>> + * These statuses usually indicate error conditions. >>>>>> + */ >>>>>> + unsigned long om_error_status; >>>>>> } ____cacheline_aligned; >>>>>> >>>>>> struct rpc_task; >>>>>> diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c >>>>>> index 8b2d3c58ffae..737414247ca7 100644 >>>>>> --- a/net/sunrpc/stats.c >>>>>> +++ b/net/sunrpc/stats.c >>>>>> @@ -176,6 +176,8 @@ void rpc_count_iostats_metrics(const >>>>>> struct rpc_task *task, >>>>>> >>>>>> execute = ktime_sub(now, task->tk_start); >>>>>> op_metrics->om_execute = ktime_add(op_metrics- >>>>>>> om_execute, execute); >>>>>> + if (task->tk_status < 0) >>>>>> + op_metrics->om_error_status++; >>>>>> >>>>>> spin_unlock(&op_metrics->om_lock); >>>>>> >>>>>> @@ -218,13 +220,14 @@ static void _add_rpc_iostats(struct >>>>>> rpc_iostats *a, struct rpc_iostats *b) >>>>>> a->om_queue = ktime_add(a->om_queue, b->om_queue); >>>>>> a->om_rtt = ktime_add(a->om_rtt, b->om_rtt); >>>>>> a->om_execute = ktime_add(a->om_execute, b- >>>>>>> om_execute); >>>>>> + a->om_error_status += b->om_error_status; >>>>>> } >>>>>> >>>>>> static void _print_rpc_iostats(struct seq_file *seq, struct >>>>>> rpc_iostats *stats, >>>>>> int op, const struct >>>>>> rpc_procinfo *procs) >>>>>> { >>>>>> _print_name(seq, op, procs); >>>>>> - seq_printf(seq, "%lu %lu %lu %llu %llu %llu %llu >>>>>> %llu\n", >>>>>> + seq_printf(seq, "%lu %lu %lu %llu %llu %llu %llu %llu >>>>>> %lu\n", >>>>>> stats->om_ops, >>>>>> stats->om_ntrans, >>>>>> stats->om_timeouts, >>>>>> @@ -232,7 +235,8 @@ static void _print_rpc_iostats(struct >>>>>> seq_file *seq, struct rpc_iostats *stats, >>>>>> stats->om_bytes_recv, >>>>>> ktime_to_ms(stats->om_queue), >>>>>> ktime_to_ms(stats->om_rtt), >>>>>> - ktime_to_ms(stats->om_execute)); >>>>>> + ktime_to_ms(stats->om_execute), >>>>>> + stats->om_error_status); >>>>>> } >>>>>> >>>>>> void rpc_clnt_show_stats(struct seq_file *seq, struct >>>>>> rpc_clnt *clnt) >>>>>> -- >>>>>> 2.20.1 >>>> >>>> -- >>>> Chuck Lever >> >> -- >> Chuck Lever -- Chuck Lever
On Mon, 2019-06-03 at 14:56 -0400, Chuck Lever wrote: > > On Jun 3, 2019, at 2:53 PM, Dave Wysochanski <dwysocha@redhat.com> > > wrote: > > > > On Fri, 2019-05-31 at 09:25 -0400, Chuck Lever wrote: > > > > On May 30, 2019, at 6:33 PM, Bruce Fields <bfields@fieldses.org > > > > > > > > > wrote: > > > > > > > > On Thu, May 30, 2019 at 06:19:54PM -0400, Chuck Lever wrote: > > > > > > > > > > > > > > > > On May 30, 2019, at 5:38 PM, bfields@fieldses.org wrote: > > > > > > > > > > > > On Thu, May 23, 2019 at 04:13:50PM -0400, Dave Wysochanski > > > > > > wrote: > > > > > > > We often see various error conditions with NFS4.x that > > > > > > > show > > > > > > > up with > > > > > > > a very high operation count all completing with tk_status > > > > > > > < 0 > > > > > > > in a > > > > > > > short period of time. Add a count to rpc_iostats to > > > > > > > record > > > > > > > on a > > > > > > > per-op basis the ops that complete in this manner, which > > > > > > > will > > > > > > > enable lower overhead diagnostics. > > > > > > > > > > > > Looks like a good idea to me. > > > > > > > > > > > > It's too bad we can't distinguish the errors. (E.g. ESTALE > > > > > > on > > > > > > a lookup > > > > > > call is a lot more interesting than ENOENT.) But > > > > > > understood > > > > > > that > > > > > > maintaining (number of possible errors) * (number of > > > > > > possible > > > > > > ops) > > > > > > counters is probably overkill, so just counting the number > > > > > > of > > > > > > errors > > > > > > seems like a good start. > > > > > > > > > > We now have trace points that can do that too. > > > > > > > > You mean, that can report every error (and its value)? > > > > > > Yes, the nfs_xdr_status trace point reports the error by value > > > and > > > symbolic name. > > > > > > > The tracepoint is very useful I agree. I don't think it will show: > > a) the mount > > b) the opcode > > > > Or am I mistaken and there's a way to get those with a filter or > > another tracepoint? > > The opcode can be exposed by another trace point, but the link > between > the two trace points is tenuous and could be improved. > I think the number is there but it's not decoded! This was for a WRITE completing with BAD_STATEID - very nice: kworker/u16:0-31130 [006] .... 949028.602298: nfs4_xdr_status: operation 38: nfs status -10025 (BAD_STATEID) > I don't believe any of the NFS trace points expose the mount. My > testing > is largely on a single mount so my imagination stopped there. > Ok thanks for the confirmation. > > > > > I assume having these statistics in mountstats is still useful, > > > > though. > > > > > > > > --b. > > > > > > > > > > > > > > > > > > > > --b. > > > > > > > > > > > > > > > > > > > > Signed-off-by: Dave Wysochanski <dwysocha@redhat.com> > > > > > > > --- > > > > > > > include/linux/sunrpc/metrics.h | 7 ++++++- > > > > > > > net/sunrpc/stats.c | 8 ++++++-- > > > > > > > 2 files changed, 12 insertions(+), 3 deletions(-) > > > > > > > > > > > > > > diff --git a/include/linux/sunrpc/metrics.h > > > > > > > b/include/linux/sunrpc/metrics.h > > > > > > > index 1b3751327575..0ee3f7052846 100644 > > > > > > > --- a/include/linux/sunrpc/metrics.h > > > > > > > +++ b/include/linux/sunrpc/metrics.h > > > > > > > @@ -30,7 +30,7 @@ > > > > > > > #include <linux/ktime.h> > > > > > > > #include <linux/spinlock.h> > > > > > > > > > > > > > > -#define RPC_IOSTATS_VERS "1.0" > > > > > > > +#define RPC_IOSTATS_VERS "1.1" > > > > > > > > > > > > > > struct rpc_iostats { > > > > > > > spinlock_t om_lock; > > > > > > > @@ -66,6 +66,11 @@ struct rpc_iostats { > > > > > > > ktime_t om_queue, /* queued > > > > > > > for > > > > > > > xmit */ > > > > > > > om_rtt, /* RPC > > > > > > > RTT */ > > > > > > > om_execute; /* RPC > > > > > > > execution */ > > > > > > > + /* > > > > > > > + * The count of operations that complete with tk_status > > > > > > > < 0. > > > > > > > + * These statuses usually indicate error conditions. > > > > > > > + */ > > > > > > > + unsigned long om_error_status; > > > > > > > } ____cacheline_aligned; > > > > > > > > > > > > > > struct rpc_task; > > > > > > > diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c > > > > > > > index 8b2d3c58ffae..737414247ca7 100644 > > > > > > > --- a/net/sunrpc/stats.c > > > > > > > +++ b/net/sunrpc/stats.c > > > > > > > @@ -176,6 +176,8 @@ void rpc_count_iostats_metrics(const > > > > > > > struct rpc_task *task, > > > > > > > > > > > > > > execute = ktime_sub(now, task->tk_start); > > > > > > > op_metrics->om_execute = ktime_add(op_metrics- > > > > > > > > om_execute, execute); > > > > > > > > > > > > > > + if (task->tk_status < 0) > > > > > > > + op_metrics->om_error_status++; > > > > > > > > > > > > > > spin_unlock(&op_metrics->om_lock); > > > > > > > > > > > > > > @@ -218,13 +220,14 @@ static void _add_rpc_iostats(struct > > > > > > > rpc_iostats *a, struct rpc_iostats *b) > > > > > > > a->om_queue = ktime_add(a->om_queue, b->om_queue); > > > > > > > a->om_rtt = ktime_add(a->om_rtt, b->om_rtt); > > > > > > > a->om_execute = ktime_add(a->om_execute, b- > > > > > > > > om_execute); > > > > > > > > > > > > > > + a->om_error_status += b->om_error_status; > > > > > > > } > > > > > > > > > > > > > > static void _print_rpc_iostats(struct seq_file *seq, > > > > > > > struct > > > > > > > rpc_iostats *stats, > > > > > > > int op, const struct > > > > > > > rpc_procinfo *procs) > > > > > > > { > > > > > > > _print_name(seq, op, procs); > > > > > > > - seq_printf(seq, "%lu %lu %lu %llu %llu %llu %llu > > > > > > > %llu\n", > > > > > > > + seq_printf(seq, "%lu %lu %lu %llu %llu %llu %llu %llu > > > > > > > %lu\n", > > > > > > > stats->om_ops, > > > > > > > stats->om_ntrans, > > > > > > > stats->om_timeouts, > > > > > > > @@ -232,7 +235,8 @@ static void _print_rpc_iostats(struct > > > > > > > seq_file *seq, struct rpc_iostats *stats, > > > > > > > stats->om_bytes_recv, > > > > > > > ktime_to_ms(stats->om_queue), > > > > > > > ktime_to_ms(stats->om_rtt), > > > > > > > - ktime_to_ms(stats->om_execute)); > > > > > > > + ktime_to_ms(stats->om_execute), > > > > > > > + stats->om_error_status); > > > > > > > } > > > > > > > > > > > > > > void rpc_clnt_show_stats(struct seq_file *seq, struct > > > > > > > rpc_clnt *clnt) > > > > > > > -- > > > > > > > 2.20.1 > > > > > > > > > > -- > > > > > Chuck Lever > > > > > > -- > > > Chuck Lever > > -- > Chuck Lever > > >
> On Jun 3, 2019, at 3:05 PM, Dave Wysochanski <dwysocha@redhat.com> wrote: > > On Mon, 2019-06-03 at 14:56 -0400, Chuck Lever wrote: >>> On Jun 3, 2019, at 2:53 PM, Dave Wysochanski <dwysocha@redhat.com> >>> wrote: >>> >>> On Fri, 2019-05-31 at 09:25 -0400, Chuck Lever wrote: >>>>> On May 30, 2019, at 6:33 PM, Bruce Fields <bfields@fieldses.org >>>>>> >>>>> wrote: >>>>> >>>>> On Thu, May 30, 2019 at 06:19:54PM -0400, Chuck Lever wrote: >>>>>> >>>>>> >>>>>>> On May 30, 2019, at 5:38 PM, bfields@fieldses.org wrote: >>>>>>> >>>>>>> On Thu, May 23, 2019 at 04:13:50PM -0400, Dave Wysochanski >>>>>>> wrote: >>>>>>>> We often see various error conditions with NFS4.x that >>>>>>>> show >>>>>>>> up with >>>>>>>> a very high operation count all completing with tk_status >>>>>>>> < 0 >>>>>>>> in a >>>>>>>> short period of time. Add a count to rpc_iostats to >>>>>>>> record >>>>>>>> on a >>>>>>>> per-op basis the ops that complete in this manner, which >>>>>>>> will >>>>>>>> enable lower overhead diagnostics. >>>>>>> >>>>>>> Looks like a good idea to me. >>>>>>> >>>>>>> It's too bad we can't distinguish the errors. (E.g. ESTALE >>>>>>> on >>>>>>> a lookup >>>>>>> call is a lot more interesting than ENOENT.) But >>>>>>> understood >>>>>>> that >>>>>>> maintaining (number of possible errors) * (number of >>>>>>> possible >>>>>>> ops) >>>>>>> counters is probably overkill, so just counting the number >>>>>>> of >>>>>>> errors >>>>>>> seems like a good start. >>>>>> >>>>>> We now have trace points that can do that too. >>>>> >>>>> You mean, that can report every error (and its value)? >>>> >>>> Yes, the nfs_xdr_status trace point reports the error by value >>>> and >>>> symbolic name. >>>> >>> >>> The tracepoint is very useful I agree. I don't think it will show: >>> a) the mount >>> b) the opcode >>> >>> Or am I mistaken and there's a way to get those with a filter or >>> another tracepoint? >> >> The opcode can be exposed by another trace point, but the link >> between >> the two trace points is tenuous and could be improved. >> > > I think the number is there but it's not decoded! This was for a WRITE > completing with BAD_STATEID - very nice: > > kworker/u16:0-31130 [006] .... 949028.602298: nfs4_xdr_status: operation 38: nfs status -10025 (BAD_STATEID) OK, I'm smarter than I look. This shows the NFS operation, not the RPC procedure (which would be NFS4 COMPOUND), which is the right thing to do. >> I don't believe any of the NFS trace points expose the mount. My >> testing >> is largely on a single mount so my imagination stopped there. >> > > Ok thanks for the confirmation. Well, it's something worth thinking carefully about: if you see a slick way to make that information available, have at it. >>>>> I assume having these statistics in mountstats is still useful, >>>>> though. >>>>> >>>>> --b. >>>>> >>>>>> >>>>>> >>>>>>> --b. >>>>>>> >>>>>>>> >>>>>>>> Signed-off-by: Dave Wysochanski <dwysocha@redhat.com> >>>>>>>> --- >>>>>>>> include/linux/sunrpc/metrics.h | 7 ++++++- >>>>>>>> net/sunrpc/stats.c | 8 ++++++-- >>>>>>>> 2 files changed, 12 insertions(+), 3 deletions(-) >>>>>>>> >>>>>>>> diff --git a/include/linux/sunrpc/metrics.h >>>>>>>> b/include/linux/sunrpc/metrics.h >>>>>>>> index 1b3751327575..0ee3f7052846 100644 >>>>>>>> --- a/include/linux/sunrpc/metrics.h >>>>>>>> +++ b/include/linux/sunrpc/metrics.h >>>>>>>> @@ -30,7 +30,7 @@ >>>>>>>> #include <linux/ktime.h> >>>>>>>> #include <linux/spinlock.h> >>>>>>>> >>>>>>>> -#define RPC_IOSTATS_VERS "1.0" >>>>>>>> +#define RPC_IOSTATS_VERS "1.1" >>>>>>>> >>>>>>>> struct rpc_iostats { >>>>>>>> spinlock_t om_lock; >>>>>>>> @@ -66,6 +66,11 @@ struct rpc_iostats { >>>>>>>> ktime_t om_queue, /* queued >>>>>>>> for >>>>>>>> xmit */ >>>>>>>> om_rtt, /* RPC >>>>>>>> RTT */ >>>>>>>> om_execute; /* RPC >>>>>>>> execution */ >>>>>>>> + /* >>>>>>>> + * The count of operations that complete with tk_status >>>>>>>> < 0. >>>>>>>> + * These statuses usually indicate error conditions. >>>>>>>> + */ >>>>>>>> + unsigned long om_error_status; >>>>>>>> } ____cacheline_aligned; >>>>>>>> >>>>>>>> struct rpc_task; >>>>>>>> diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c >>>>>>>> index 8b2d3c58ffae..737414247ca7 100644 >>>>>>>> --- a/net/sunrpc/stats.c >>>>>>>> +++ b/net/sunrpc/stats.c >>>>>>>> @@ -176,6 +176,8 @@ void rpc_count_iostats_metrics(const >>>>>>>> struct rpc_task *task, >>>>>>>> >>>>>>>> execute = ktime_sub(now, task->tk_start); >>>>>>>> op_metrics->om_execute = ktime_add(op_metrics- >>>>>>>>> om_execute, execute); >>>>>>>> >>>>>>>> + if (task->tk_status < 0) >>>>>>>> + op_metrics->om_error_status++; >>>>>>>> >>>>>>>> spin_unlock(&op_metrics->om_lock); >>>>>>>> >>>>>>>> @@ -218,13 +220,14 @@ static void _add_rpc_iostats(struct >>>>>>>> rpc_iostats *a, struct rpc_iostats *b) >>>>>>>> a->om_queue = ktime_add(a->om_queue, b->om_queue); >>>>>>>> a->om_rtt = ktime_add(a->om_rtt, b->om_rtt); >>>>>>>> a->om_execute = ktime_add(a->om_execute, b- >>>>>>>>> om_execute); >>>>>>>> >>>>>>>> + a->om_error_status += b->om_error_status; >>>>>>>> } >>>>>>>> >>>>>>>> static void _print_rpc_iostats(struct seq_file *seq, >>>>>>>> struct >>>>>>>> rpc_iostats *stats, >>>>>>>> int op, const struct >>>>>>>> rpc_procinfo *procs) >>>>>>>> { >>>>>>>> _print_name(seq, op, procs); >>>>>>>> - seq_printf(seq, "%lu %lu %lu %llu %llu %llu %llu >>>>>>>> %llu\n", >>>>>>>> + seq_printf(seq, "%lu %lu %lu %llu %llu %llu %llu %llu >>>>>>>> %lu\n", >>>>>>>> stats->om_ops, >>>>>>>> stats->om_ntrans, >>>>>>>> stats->om_timeouts, >>>>>>>> @@ -232,7 +235,8 @@ static void _print_rpc_iostats(struct >>>>>>>> seq_file *seq, struct rpc_iostats *stats, >>>>>>>> stats->om_bytes_recv, >>>>>>>> ktime_to_ms(stats->om_queue), >>>>>>>> ktime_to_ms(stats->om_rtt), >>>>>>>> - ktime_to_ms(stats->om_execute)); >>>>>>>> + ktime_to_ms(stats->om_execute), >>>>>>>> + stats->om_error_status); >>>>>>>> } >>>>>>>> >>>>>>>> void rpc_clnt_show_stats(struct seq_file *seq, struct >>>>>>>> rpc_clnt *clnt) >>>>>>>> -- >>>>>>>> 2.20.1 >>>>>> >>>>>> -- >>>>>> Chuck Lever >>>> >>>> -- >>>> Chuck Lever >> >> -- >> Chuck Lever -- Chuck Lever
On Mon, Jun 03, 2019 at 02:56:29PM -0400, Chuck Lever wrote: > > On Jun 3, 2019, at 2:53 PM, Dave Wysochanski <dwysocha@redhat.com> wrote: > > On Fri, 2019-05-31 at 09:25 -0400, Chuck Lever wrote: > >>> On May 30, 2019, at 6:33 PM, Bruce Fields <bfields@fieldses.org> > >>> wrote: > >>> On Thu, May 30, 2019 at 06:19:54PM -0400, Chuck Lever wrote: > >>>> We now have trace points that can do that too. > >>> > >>> You mean, that can report every error (and its value)? > >> > >> Yes, the nfs_xdr_status trace point reports the error by value and > >> symbolic name. > > > > The tracepoint is very useful I agree. I don't think it will show: > > a) the mount > > b) the opcode > > > > Or am I mistaken and there's a way to get those with a filter or > > another tracepoint? > > The opcode can be exposed by another trace point, but the link between > the two trace points is tenuous and could be improved. > > I don't believe any of the NFS trace points expose the mount. My testing > is largely on a single mount so my imagination stopped there. Dumb question: is it possible to add more fields to tracepoints without breaking some kind of backwards compatibility? I wonder if adding, say, an xid and an xprt pointer to tracepoints when available would help with this kind of thing. In any case, I think Dave's stats will still be handy if only because they're on all the time. --b.
On Tue, 2019-06-04 at 10:45 -0400, Bruce Fields wrote: > On Mon, Jun 03, 2019 at 02:56:29PM -0400, Chuck Lever wrote: > > > On Jun 3, 2019, at 2:53 PM, Dave Wysochanski <dwysocha@redhat.com > > > > wrote: > > > On Fri, 2019-05-31 at 09:25 -0400, Chuck Lever wrote: > > > > > On May 30, 2019, at 6:33 PM, Bruce Fields < > > > > > bfields@fieldses.org> > > > > > wrote: > > > > > On Thu, May 30, 2019 at 06:19:54PM -0400, Chuck Lever wrote: > > > > > > We now have trace points that can do that too. > > > > > > > > > > You mean, that can report every error (and its value)? > > > > > > > > Yes, the nfs_xdr_status trace point reports the error by value > > > > and > > > > symbolic name. > > > > > > The tracepoint is very useful I agree. I don't think it will > > > show: > > > a) the mount > > > b) the opcode > > > > > > Or am I mistaken and there's a way to get those with a filter or > > > another tracepoint? > > > > The opcode can be exposed by another trace point, but the link > > between > > the two trace points is tenuous and could be improved. > > > > I don't believe any of the NFS trace points expose the mount. My > > testing > > is largely on a single mount so my imagination stopped there. > > Dumb question: is it possible to add more fields to tracepoints > without > breaking some kind of backwards compatibility? Tracepoints are not to be considered an API. > I wonder if adding, say, an xid and an xprt pointer to tracepoints > when > available would help with this kind of thing. > > In any case, I think Dave's stats will still be handy if only because > they're on all the time. Given that someone who shall remain anonymous added the struct rpc_rqst to the struct xdr_stream for "debugging purposes", it should be quite possible to add both the XID and the xprt in this case.
On Tue, 2019-06-04 at 10:45 -0400, Bruce Fields wrote: > On Mon, Jun 03, 2019 at 02:56:29PM -0400, Chuck Lever wrote: > > > On Jun 3, 2019, at 2:53 PM, Dave Wysochanski <dwysocha@redhat.com > > > > wrote: > > > On Fri, 2019-05-31 at 09:25 -0400, Chuck Lever wrote: > > > > > On May 30, 2019, at 6:33 PM, Bruce Fields < > > > > > bfields@fieldses.org> > > > > > wrote: > > > > > On Thu, May 30, 2019 at 06:19:54PM -0400, Chuck Lever wrote: > > > > > > We now have trace points that can do that too. > > > > > > > > > > You mean, that can report every error (and its value)? > > > > > > > > Yes, the nfs_xdr_status trace point reports the error by value > > > > and > > > > symbolic name. > > > > > > The tracepoint is very useful I agree. I don't think it will > > > show: > > > a) the mount > > > b) the opcode > > > > > > Or am I mistaken and there's a way to get those with a filter or > > > another tracepoint? > > > > The opcode can be exposed by another trace point, but the link > > between > > the two trace points is tenuous and could be improved. > > > > I don't believe any of the NFS trace points expose the mount. My > > testing > > is largely on a single mount so my imagination stopped there. > > Dumb question: is it possible to add more fields to tracepoints > without > breaking some kind of backwards compatibility? > > I wonder if adding, say, an xid and an xprt pointer to tracepoints > when > available would help with this kind of thing. > > In any case, I think Dave's stats will still be handy if only because > they're on all the time. > > --b. Trond or Anna, will you take this series for mountstats or are you opposed to it? I think it is useful in conjuction with the tracepoints because it is always on and easy to know which mount is involved (we often start with a customer saying mount XYZ has some issue or is hanging). If you see problems or want other testing please let me know. Thanks!
diff --git a/include/linux/sunrpc/metrics.h b/include/linux/sunrpc/metrics.h index 1b3751327575..0ee3f7052846 100644 --- a/include/linux/sunrpc/metrics.h +++ b/include/linux/sunrpc/metrics.h @@ -30,7 +30,7 @@ #include <linux/ktime.h> #include <linux/spinlock.h> -#define RPC_IOSTATS_VERS "1.0" +#define RPC_IOSTATS_VERS "1.1" struct rpc_iostats { spinlock_t om_lock; @@ -66,6 +66,11 @@ struct rpc_iostats { ktime_t om_queue, /* queued for xmit */ om_rtt, /* RPC RTT */ om_execute; /* RPC execution */ + /* + * The count of operations that complete with tk_status < 0. + * These statuses usually indicate error conditions. + */ + unsigned long om_error_status; } ____cacheline_aligned; struct rpc_task; diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c index 8b2d3c58ffae..737414247ca7 100644 --- a/net/sunrpc/stats.c +++ b/net/sunrpc/stats.c @@ -176,6 +176,8 @@ void rpc_count_iostats_metrics(const struct rpc_task *task, execute = ktime_sub(now, task->tk_start); op_metrics->om_execute = ktime_add(op_metrics->om_execute, execute); + if (task->tk_status < 0) + op_metrics->om_error_status++; spin_unlock(&op_metrics->om_lock); @@ -218,13 +220,14 @@ static void _add_rpc_iostats(struct rpc_iostats *a, struct rpc_iostats *b) a->om_queue = ktime_add(a->om_queue, b->om_queue); a->om_rtt = ktime_add(a->om_rtt, b->om_rtt); a->om_execute = ktime_add(a->om_execute, b->om_execute); + a->om_error_status += b->om_error_status; } static void _print_rpc_iostats(struct seq_file *seq, struct rpc_iostats *stats, int op, const struct rpc_procinfo *procs) { _print_name(seq, op, procs); - seq_printf(seq, "%lu %lu %lu %llu %llu %llu %llu %llu\n", + seq_printf(seq, "%lu %lu %lu %llu %llu %llu %llu %llu %lu\n", stats->om_ops, stats->om_ntrans, stats->om_timeouts, @@ -232,7 +235,8 @@ static void _print_rpc_iostats(struct seq_file *seq, struct rpc_iostats *stats, stats->om_bytes_recv, ktime_to_ms(stats->om_queue), ktime_to_ms(stats->om_rtt), - ktime_to_ms(stats->om_execute)); + ktime_to_ms(stats->om_execute), + stats->om_error_status); } void rpc_clnt_show_stats(struct seq_file *seq, struct rpc_clnt *clnt)
We often see various error conditions with NFS4.x that show up with a very high operation count all completing with tk_status < 0 in a short period of time. Add a count to rpc_iostats to record on a per-op basis the ops that complete in this manner, which will enable lower overhead diagnostics. Signed-off-by: Dave Wysochanski <dwysocha@redhat.com> --- include/linux/sunrpc/metrics.h | 7 ++++++- net/sunrpc/stats.c | 8 ++++++-- 2 files changed, 12 insertions(+), 3 deletions(-)