[v8,5/5] ceph: add global metadata perf metric support
diff mbox series

Message ID 20200221070556.18922-6-xiubli@redhat.com
State New
Headers show
Series
  • ceph: add perf metrics support
Related show

Commit Message

Xiubo Li Feb. 21, 2020, 7:05 a.m. UTC
From: Xiubo Li <xiubli@redhat.com>

It will calculate the latency for the metedata requests, which only
include the time cousumed by network and the ceph.

item          total       sum_lat(us)     avg_lat(us)
-----------------------------------------------------
metadata      113         220000          1946

URL: https://tracker.ceph.com/issues/43215
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
 fs/ceph/debugfs.c    |  6 ++++++
 fs/ceph/mds_client.c | 20 ++++++++++++++++++++
 fs/ceph/metric.h     | 13 +++++++++++++
 3 files changed, 39 insertions(+)

Comments

Jeff Layton Feb. 21, 2020, 12:03 p.m. UTC | #1
On Fri, 2020-02-21 at 02:05 -0500, xiubli@redhat.com wrote:
> From: Xiubo Li <xiubli@redhat.com>
> 
> It will calculate the latency for the metedata requests, which only
> include the time cousumed by network and the ceph.
> 

"and the ceph MDS" ?

> item          total       sum_lat(us)     avg_lat(us)
> -----------------------------------------------------
> metadata      113         220000          1946
> 
> URL: https://tracker.ceph.com/issues/43215
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
>  fs/ceph/debugfs.c    |  6 ++++++
>  fs/ceph/mds_client.c | 20 ++++++++++++++++++++
>  fs/ceph/metric.h     | 13 +++++++++++++
>  3 files changed, 39 insertions(+)
> 
> diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
> index 464bfbdb970d..60f3e307fca1 100644
> --- a/fs/ceph/debugfs.c
> +++ b/fs/ceph/debugfs.c
> @@ -146,6 +146,12 @@ static int metric_show(struct seq_file *s, void *p)
>  	avg = total ? sum / total : 0;
>  	seq_printf(s, "%-14s%-12lld%-16lld%lld\n", "write", total, sum, avg);
>  
> +	total = percpu_counter_sum(&mdsc->metric.total_metadatas);
> +	sum = percpu_counter_sum(&mdsc->metric.metadata_latency_sum);
> +	sum = jiffies_to_usecs(sum);
> +	avg = total ? sum / total : 0;
> +	seq_printf(s, "%-14s%-12lld%-16lld%lld\n", "metadata", total, sum, avg);
> +
>  	seq_printf(s, "\n");
>  	seq_printf(s, "item          total           miss            hit\n");
>  	seq_printf(s, "-------------------------------------------------\n");
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index 0a3447966b26..3e792eca6af7 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -3017,6 +3017,12 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
>  
>  	/* kick calling process */
>  	complete_request(mdsc, req);
> +
> +	if (!result || result == -ENOENT) {
> +		s64 latency = jiffies - req->r_started;
> +
> +		ceph_update_metadata_latency(&mdsc->metric, latency);
> +	}

Should we add an r_end_stamp field to the mds request struct and use
that to calculate this? Many jiffies may have passed between the reply
coming in and this point. If you really want to measure the latency that
would be more accurate, I think.

>  out:
>  	ceph_mdsc_put_request(req);
>  	return;
> @@ -4196,8 +4202,20 @@ static int ceph_mdsc_metric_init(struct ceph_client_metric *metric)
>  	if (ret)
>  		goto err_write_latency_sum;
>  
> +	ret = percpu_counter_init(&metric->total_metadatas, 0, GFP_KERNEL);
> +	if (ret)
> +		goto err_total_metadatas;
> +
> +	ret = percpu_counter_init(&metric->metadata_latency_sum, 0, GFP_KERNEL);
> +	if (ret)
> +		goto err_metadata_latency_sum;
> +
>  	return 0;
>  
> +err_metadata_latency_sum:
> +	percpu_counter_destroy(&metric->total_metadatas);
> +err_total_metadatas:
> +	percpu_counter_destroy(&metric->write_latency_sum);
>  err_write_latency_sum:
>  	percpu_counter_destroy(&metric->total_writes);
>  err_total_writes:
> @@ -4553,6 +4571,8 @@ void ceph_mdsc_destroy(struct ceph_fs_client *fsc)
>  
>  	ceph_mdsc_stop(mdsc);
>  
> +	percpu_counter_destroy(&mdsc->metric.metadata_latency_sum);
> +	percpu_counter_destroy(&mdsc->metric.total_metadatas);
>  	percpu_counter_destroy(&mdsc->metric.write_latency_sum);
>  	percpu_counter_destroy(&mdsc->metric.total_writes);
>  	percpu_counter_destroy(&mdsc->metric.read_latency_sum);
> diff --git a/fs/ceph/metric.h b/fs/ceph/metric.h
> index a3d342f258e6..4c14b4ac089d 100644
> --- a/fs/ceph/metric.h
> +++ b/fs/ceph/metric.h
> @@ -18,6 +18,9 @@ struct ceph_client_metric {
>  
>  	struct percpu_counter total_writes;
>  	struct percpu_counter write_latency_sum;
> +
> +	struct percpu_counter total_metadatas;
> +	struct percpu_counter metadata_latency_sum;
>  };
>  
>  static inline void ceph_update_cap_hit(struct ceph_client_metric *m)
> @@ -65,4 +68,14 @@ static inline void ceph_update_write_latency(struct ceph_client_metric *m,
>  		percpu_counter_add(&m->write_latency_sum, latency);
>  	}
>  }
> +
> +static inline void ceph_update_metadata_latency(struct ceph_client_metric *m,
> +						s64 latency)
> +{
> +	if (!m)
> +		return;
> +
> +	percpu_counter_inc(&m->total_metadatas);
> +	percpu_counter_add(&m->metadata_latency_sum, latency);
> +}
>  #endif /* _FS_CEPH_MDS_METRIC_H */
Ilya Dryomov Feb. 21, 2020, 2:56 p.m. UTC | #2
On Fri, Feb 21, 2020 at 1:03 PM Jeff Layton <jlayton@kernel.org> wrote:
>
> On Fri, 2020-02-21 at 02:05 -0500, xiubli@redhat.com wrote:
> > From: Xiubo Li <xiubli@redhat.com>
> >
> > It will calculate the latency for the metedata requests, which only
> > include the time cousumed by network and the ceph.
> >
>
> "and the ceph MDS" ?
>
> > item          total       sum_lat(us)     avg_lat(us)
> > -----------------------------------------------------
> > metadata      113         220000          1946
> >
> > URL: https://tracker.ceph.com/issues/43215
> > Signed-off-by: Xiubo Li <xiubli@redhat.com>
> > ---
> >  fs/ceph/debugfs.c    |  6 ++++++
> >  fs/ceph/mds_client.c | 20 ++++++++++++++++++++
> >  fs/ceph/metric.h     | 13 +++++++++++++
> >  3 files changed, 39 insertions(+)
> >
> > diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
> > index 464bfbdb970d..60f3e307fca1 100644
> > --- a/fs/ceph/debugfs.c
> > +++ b/fs/ceph/debugfs.c
> > @@ -146,6 +146,12 @@ static int metric_show(struct seq_file *s, void *p)
> >       avg = total ? sum / total : 0;
> >       seq_printf(s, "%-14s%-12lld%-16lld%lld\n", "write", total, sum, avg);
> >
> > +     total = percpu_counter_sum(&mdsc->metric.total_metadatas);
> > +     sum = percpu_counter_sum(&mdsc->metric.metadata_latency_sum);
> > +     sum = jiffies_to_usecs(sum);
> > +     avg = total ? sum / total : 0;
> > +     seq_printf(s, "%-14s%-12lld%-16lld%lld\n", "metadata", total, sum, avg);
> > +
> >       seq_printf(s, "\n");
> >       seq_printf(s, "item          total           miss            hit\n");
> >       seq_printf(s, "-------------------------------------------------\n");
> > diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> > index 0a3447966b26..3e792eca6af7 100644
> > --- a/fs/ceph/mds_client.c
> > +++ b/fs/ceph/mds_client.c
> > @@ -3017,6 +3017,12 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
> >
> >       /* kick calling process */
> >       complete_request(mdsc, req);
> > +
> > +     if (!result || result == -ENOENT) {
> > +             s64 latency = jiffies - req->r_started;
> > +
> > +             ceph_update_metadata_latency(&mdsc->metric, latency);
> > +     }
>
> Should we add an r_end_stamp field to the mds request struct and use
> that to calculate this? Many jiffies may have passed between the reply
> coming in and this point. If you really want to measure the latency that
> would be more accurate, I think.

Yes, capturing it after invoking the callback is inconsistent
with what is done for OSD requests (the new r_end_stamp is set in
finish_request()).

It looks like this is the only place where MDS r_end_stamp would be
needed, so perhaps just move this before complete_request() call?

Thanks,

                Ilya
Xiubo Li Feb. 22, 2020, 1:09 a.m. UTC | #3
On 2020/2/21 20:03, Jeff Layton wrote:
> On Fri, 2020-02-21 at 02:05 -0500, xiubli@redhat.com wrote:
>> From: Xiubo Li <xiubli@redhat.com>
>>
>> It will calculate the latency for the metedata requests, which only
>> include the time cousumed by network and the ceph.
>>
> "and the ceph MDS" ?

Yeah, will fix the commit comment.


>
>> item          total       sum_lat(us)     avg_lat(us)
>> -----------------------------------------------------
>> metadata      113         220000          1946
>>
>> URL: https://tracker.ceph.com/issues/43215
>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>> ---
>>   fs/ceph/debugfs.c    |  6 ++++++
>>   fs/ceph/mds_client.c | 20 ++++++++++++++++++++
>>   fs/ceph/metric.h     | 13 +++++++++++++
>>   3 files changed, 39 insertions(+)
>>
>> diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
>> index 464bfbdb970d..60f3e307fca1 100644
>> --- a/fs/ceph/debugfs.c
>> +++ b/fs/ceph/debugfs.c
>> @@ -146,6 +146,12 @@ static int metric_show(struct seq_file *s, void *p)
>>   	avg = total ? sum / total : 0;
>>   	seq_printf(s, "%-14s%-12lld%-16lld%lld\n", "write", total, sum, avg);
>>   
>> +	total = percpu_counter_sum(&mdsc->metric.total_metadatas);
>> +	sum = percpu_counter_sum(&mdsc->metric.metadata_latency_sum);
>> +	sum = jiffies_to_usecs(sum);
>> +	avg = total ? sum / total : 0;
>> +	seq_printf(s, "%-14s%-12lld%-16lld%lld\n", "metadata", total, sum, avg);
>> +
>>   	seq_printf(s, "\n");
>>   	seq_printf(s, "item          total           miss            hit\n");
>>   	seq_printf(s, "-------------------------------------------------\n");
>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
>> index 0a3447966b26..3e792eca6af7 100644
>> --- a/fs/ceph/mds_client.c
>> +++ b/fs/ceph/mds_client.c
>> @@ -3017,6 +3017,12 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
>>   
>>   	/* kick calling process */
>>   	complete_request(mdsc, req);
>> +
>> +	if (!result || result == -ENOENT) {
>> +		s64 latency = jiffies - req->r_started;
>> +
>> +		ceph_update_metadata_latency(&mdsc->metric, latency);
>> +	}
> Should we add an r_end_stamp field to the mds request struct and use
> that to calculate this? Many jiffies may have passed between the reply
> coming in and this point. If you really want to measure the latency that
> would be more accurate, I think.

Okay, will add it.

Thanks.

BRs

>>   out:
>>   	ceph_mdsc_put_request(req);
>>   	return;
>> @@ -4196,8 +4202,20 @@ static int ceph_mdsc_metric_init(struct ceph_client_metric *metric)
>>   	if (ret)
>>   		goto err_write_latency_sum;
>>   
>> +	ret = percpu_counter_init(&metric->total_metadatas, 0, GFP_KERNEL);
>> +	if (ret)
>> +		goto err_total_metadatas;
>> +
>> +	ret = percpu_counter_init(&metric->metadata_latency_sum, 0, GFP_KERNEL);
>> +	if (ret)
>> +		goto err_metadata_latency_sum;
>> +
>>   	return 0;
>>   
>> +err_metadata_latency_sum:
>> +	percpu_counter_destroy(&metric->total_metadatas);
>> +err_total_metadatas:
>> +	percpu_counter_destroy(&metric->write_latency_sum);
>>   err_write_latency_sum:
>>   	percpu_counter_destroy(&metric->total_writes);
>>   err_total_writes:
>> @@ -4553,6 +4571,8 @@ void ceph_mdsc_destroy(struct ceph_fs_client *fsc)
>>   
>>   	ceph_mdsc_stop(mdsc);
>>   
>> +	percpu_counter_destroy(&mdsc->metric.metadata_latency_sum);
>> +	percpu_counter_destroy(&mdsc->metric.total_metadatas);
>>   	percpu_counter_destroy(&mdsc->metric.write_latency_sum);
>>   	percpu_counter_destroy(&mdsc->metric.total_writes);
>>   	percpu_counter_destroy(&mdsc->metric.read_latency_sum);
>> diff --git a/fs/ceph/metric.h b/fs/ceph/metric.h
>> index a3d342f258e6..4c14b4ac089d 100644
>> --- a/fs/ceph/metric.h
>> +++ b/fs/ceph/metric.h
>> @@ -18,6 +18,9 @@ struct ceph_client_metric {
>>   
>>   	struct percpu_counter total_writes;
>>   	struct percpu_counter write_latency_sum;
>> +
>> +	struct percpu_counter total_metadatas;
>> +	struct percpu_counter metadata_latency_sum;
>>   };
>>   
>>   static inline void ceph_update_cap_hit(struct ceph_client_metric *m)
>> @@ -65,4 +68,14 @@ static inline void ceph_update_write_latency(struct ceph_client_metric *m,
>>   		percpu_counter_add(&m->write_latency_sum, latency);
>>   	}
>>   }
>> +
>> +static inline void ceph_update_metadata_latency(struct ceph_client_metric *m,
>> +						s64 latency)
>> +{
>> +	if (!m)
>> +		return;
>> +
>> +	percpu_counter_inc(&m->total_metadatas);
>> +	percpu_counter_add(&m->metadata_latency_sum, latency);
>> +}
>>   #endif /* _FS_CEPH_MDS_METRIC_H */
Xiubo Li Feb. 22, 2020, 1:20 a.m. UTC | #4
On 2020/2/21 22:56, Ilya Dryomov wrote:
> On Fri, Feb 21, 2020 at 1:03 PM Jeff Layton <jlayton@kernel.org> wrote:
>> On Fri, 2020-02-21 at 02:05 -0500, xiubli@redhat.com wrote:
>>> From: Xiubo Li <xiubli@redhat.com>
>>>
>>> It will calculate the latency for the metedata requests, which only
>>> include the time cousumed by network and the ceph.
>>>
>> "and the ceph MDS" ?
>>
>>> item          total       sum_lat(us)     avg_lat(us)
>>> -----------------------------------------------------
>>> metadata      113         220000          1946
>>>
>>> URL: https://tracker.ceph.com/issues/43215
>>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>>> ---
>>>   fs/ceph/debugfs.c    |  6 ++++++
>>>   fs/ceph/mds_client.c | 20 ++++++++++++++++++++
>>>   fs/ceph/metric.h     | 13 +++++++++++++
>>>   3 files changed, 39 insertions(+)
>>>
>>> diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
>>> index 464bfbdb970d..60f3e307fca1 100644
>>> --- a/fs/ceph/debugfs.c
>>> +++ b/fs/ceph/debugfs.c
>>> @@ -146,6 +146,12 @@ static int metric_show(struct seq_file *s, void *p)
>>>        avg = total ? sum / total : 0;
>>>        seq_printf(s, "%-14s%-12lld%-16lld%lld\n", "write", total, sum, avg);
>>>
>>> +     total = percpu_counter_sum(&mdsc->metric.total_metadatas);
>>> +     sum = percpu_counter_sum(&mdsc->metric.metadata_latency_sum);
>>> +     sum = jiffies_to_usecs(sum);
>>> +     avg = total ? sum / total : 0;
>>> +     seq_printf(s, "%-14s%-12lld%-16lld%lld\n", "metadata", total, sum, avg);
>>> +
>>>        seq_printf(s, "\n");
>>>        seq_printf(s, "item          total           miss            hit\n");
>>>        seq_printf(s, "-------------------------------------------------\n");
>>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
>>> index 0a3447966b26..3e792eca6af7 100644
>>> --- a/fs/ceph/mds_client.c
>>> +++ b/fs/ceph/mds_client.c
>>> @@ -3017,6 +3017,12 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
>>>
>>>        /* kick calling process */
>>>        complete_request(mdsc, req);
>>> +
>>> +     if (!result || result == -ENOENT) {
>>> +             s64 latency = jiffies - req->r_started;
>>> +
>>> +             ceph_update_metadata_latency(&mdsc->metric, latency);
>>> +     }
>> Should we add an r_end_stamp field to the mds request struct and use
>> that to calculate this? Many jiffies may have passed between the reply
>> coming in and this point. If you really want to measure the latency that
>> would be more accurate, I think.
> Yes, capturing it after invoking the callback is inconsistent
> with what is done for OSD requests (the new r_end_stamp is set in
> finish_request()).
>
> It looks like this is the only place where MDS r_end_stamp would be
> needed, so perhaps just move this before complete_request() call?

Currently for the OSD requests, they are almost in the same place where 
at the end of the handle_reply.

If we don't want to calculate the consumption by the most of 
handle_reply code, we may set the r_end_stamp in the begin of it for 
both OSD/MDS requests ?

I'm thinking since in the handle_reply, it may also wait for the mutex 
locks and then sleeps, so move the r_end_stamp to the beginning should 
make sense...

Thanks
BRs


> Thanks,
>
>                  Ilya
>
Xiubo Li Feb. 22, 2020, 1:36 a.m. UTC | #5
On 2020/2/22 9:20, Xiubo Li wrote:
> On 2020/2/21 22:56, Ilya Dryomov wrote:
>> On Fri, Feb 21, 2020 at 1:03 PM Jeff Layton <jlayton@kernel.org> wrote:
>>> On Fri, 2020-02-21 at 02:05 -0500, xiubli@redhat.com wrote:
>>>> From: Xiubo Li <xiubli@redhat.com>
>>>>
>>>> It will calculate the latency for the metedata requests, which only
>>>> include the time cousumed by network and the ceph.
>>>>
>>> "and the ceph MDS" ?
>>>
>>>> item          total sum_lat(us)     avg_lat(us)
>>>> -----------------------------------------------------
>>>> metadata      113         220000          1946
>>>>
>>>> URL: https://tracker.ceph.com/issues/43215
>>>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>>>> ---
>>>>   fs/ceph/debugfs.c    |  6 ++++++
>>>>   fs/ceph/mds_client.c | 20 ++++++++++++++++++++
>>>>   fs/ceph/metric.h     | 13 +++++++++++++
>>>>   3 files changed, 39 insertions(+)
>>>>
>>>> diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
>>>> index 464bfbdb970d..60f3e307fca1 100644
>>>> --- a/fs/ceph/debugfs.c
>>>> +++ b/fs/ceph/debugfs.c
>>>> @@ -146,6 +146,12 @@ static int metric_show(struct seq_file *s, 
>>>> void *p)
>>>>        avg = total ? sum / total : 0;
>>>>        seq_printf(s, "%-14s%-12lld%-16lld%lld\n", "write", total, 
>>>> sum, avg);
>>>>
>>>> +     total = percpu_counter_sum(&mdsc->metric.total_metadatas);
>>>> +     sum = percpu_counter_sum(&mdsc->metric.metadata_latency_sum);
>>>> +     sum = jiffies_to_usecs(sum);
>>>> +     avg = total ? sum / total : 0;
>>>> +     seq_printf(s, "%-14s%-12lld%-16lld%lld\n", "metadata", total, 
>>>> sum, avg);
>>>> +
>>>>        seq_printf(s, "\n");
>>>>        seq_printf(s, "item          total miss            hit\n");
>>>>        seq_printf(s, 
>>>> "-------------------------------------------------\n");
>>>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
>>>> index 0a3447966b26..3e792eca6af7 100644
>>>> --- a/fs/ceph/mds_client.c
>>>> +++ b/fs/ceph/mds_client.c
>>>> @@ -3017,6 +3017,12 @@ static void handle_reply(struct 
>>>> ceph_mds_session *session, struct ceph_msg *msg)
>>>>
>>>>        /* kick calling process */
>>>>        complete_request(mdsc, req);
>>>> +
>>>> +     if (!result || result == -ENOENT) {
>>>> +             s64 latency = jiffies - req->r_started;
>>>> +
>>>> + ceph_update_metadata_latency(&mdsc->metric, latency);
>>>> +     }
>>> Should we add an r_end_stamp field to the mds request struct and use
>>> that to calculate this? Many jiffies may have passed between the reply
>>> coming in and this point. If you really want to measure the latency 
>>> that
>>> would be more accurate, I think.
>> Yes, capturing it after invoking the callback is inconsistent
>> with what is done for OSD requests (the new r_end_stamp is set in
>> finish_request()).
>>
>> It looks like this is the only place where MDS r_end_stamp would be
>> needed, so perhaps just move this before complete_request() call?
>
> Currently for the OSD requests, they are almost in the same place 
> where at the end of the handle_reply.
>
> If we don't want to calculate the consumption by the most of 
> handle_reply code, we may set the r_end_stamp in the begin of it for 
> both OSD/MDS requests ?
>
> I'm thinking since in the handle_reply, it may also wait for the mutex 
> locks and then sleeps, so move the r_end_stamp to the beginning should 
> make sense...
>
Currently case: we are calculating the time took from OSD/MDS requests 
creating until the handle_reply finishes.

Another case mentioned above: we should move the r_end_stamp to the 
handle_reply beginning, move r_start_stamp just before the requesting 
submitted.

Which one should be better ?

Thanks.

BRs


> Thanks
> BRs
>
>
>> Thanks,
>>
>>                  Ilya
>>
>
Ilya Dryomov Feb. 24, 2020, 9:56 a.m. UTC | #6
On Sat, Feb 22, 2020 at 2:21 AM Xiubo Li <xiubli@redhat.com> wrote:
>
> On 2020/2/21 22:56, Ilya Dryomov wrote:
> > On Fri, Feb 21, 2020 at 1:03 PM Jeff Layton <jlayton@kernel.org> wrote:
> >> On Fri, 2020-02-21 at 02:05 -0500, xiubli@redhat.com wrote:
> >>> From: Xiubo Li <xiubli@redhat.com>
> >>>
> >>> It will calculate the latency for the metedata requests, which only
> >>> include the time cousumed by network and the ceph.
> >>>
> >> "and the ceph MDS" ?
> >>
> >>> item          total       sum_lat(us)     avg_lat(us)
> >>> -----------------------------------------------------
> >>> metadata      113         220000          1946
> >>>
> >>> URL: https://tracker.ceph.com/issues/43215
> >>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> >>> ---
> >>>   fs/ceph/debugfs.c    |  6 ++++++
> >>>   fs/ceph/mds_client.c | 20 ++++++++++++++++++++
> >>>   fs/ceph/metric.h     | 13 +++++++++++++
> >>>   3 files changed, 39 insertions(+)
> >>>
> >>> diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
> >>> index 464bfbdb970d..60f3e307fca1 100644
> >>> --- a/fs/ceph/debugfs.c
> >>> +++ b/fs/ceph/debugfs.c
> >>> @@ -146,6 +146,12 @@ static int metric_show(struct seq_file *s, void *p)
> >>>        avg = total ? sum / total : 0;
> >>>        seq_printf(s, "%-14s%-12lld%-16lld%lld\n", "write", total, sum, avg);
> >>>
> >>> +     total = percpu_counter_sum(&mdsc->metric.total_metadatas);
> >>> +     sum = percpu_counter_sum(&mdsc->metric.metadata_latency_sum);
> >>> +     sum = jiffies_to_usecs(sum);
> >>> +     avg = total ? sum / total : 0;
> >>> +     seq_printf(s, "%-14s%-12lld%-16lld%lld\n", "metadata", total, sum, avg);
> >>> +
> >>>        seq_printf(s, "\n");
> >>>        seq_printf(s, "item          total           miss            hit\n");
> >>>        seq_printf(s, "-------------------------------------------------\n");
> >>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> >>> index 0a3447966b26..3e792eca6af7 100644
> >>> --- a/fs/ceph/mds_client.c
> >>> +++ b/fs/ceph/mds_client.c
> >>> @@ -3017,6 +3017,12 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
> >>>
> >>>        /* kick calling process */
> >>>        complete_request(mdsc, req);
> >>> +
> >>> +     if (!result || result == -ENOENT) {
> >>> +             s64 latency = jiffies - req->r_started;
> >>> +
> >>> +             ceph_update_metadata_latency(&mdsc->metric, latency);
> >>> +     }
> >> Should we add an r_end_stamp field to the mds request struct and use
> >> that to calculate this? Many jiffies may have passed between the reply
> >> coming in and this point. If you really want to measure the latency that
> >> would be more accurate, I think.
> > Yes, capturing it after invoking the callback is inconsistent
> > with what is done for OSD requests (the new r_end_stamp is set in
> > finish_request()).
> >
> > It looks like this is the only place where MDS r_end_stamp would be
> > needed, so perhaps just move this before complete_request() call?
>
> Currently for the OSD requests, they are almost in the same place where
> at the end of the handle_reply.

For OSD requests, r_end_request is captured _before_ the supplied
callback is invoked.  I suggest to do the same for MDS requests.

>
> If we don't want to calculate the consumption by the most of
> handle_reply code, we may set the r_end_stamp in the begin of it for
> both OSD/MDS requests ?
>
> I'm thinking since in the handle_reply, it may also wait for the mutex
> locks and then sleeps, so move the r_end_stamp to the beginning should
> make sense...

No, the time spent in handle_reply() must be included, just like the
time spent in submit_request() is included.  If you look at the code,
you will see that there is a case where handle_reply() drops the reply
on the floor and resubmits the OSD request.  Further, on many errors,
handle_reply() isn't even called, so finish_request() is the only place
where r_end_stamp can be captured in a consistent manner.

Thanks,

                Ilya
Xiubo Li Feb. 24, 2020, 10:20 a.m. UTC | #7
On 2020/2/24 17:56, Ilya Dryomov wrote:
> On Sat, Feb 22, 2020 at 2:21 AM Xiubo Li <xiubli@redhat.com> wrote:
>> On 2020/2/21 22:56, Ilya Dryomov wrote:
>>> On Fri, Feb 21, 2020 at 1:03 PM Jeff Layton <jlayton@kernel.org> wrote:
>>>> On Fri, 2020-02-21 at 02:05 -0500, xiubli@redhat.com wrote:
>>>>> From: Xiubo Li <xiubli@redhat.com>
>>>>>
>>>>> It will calculate the latency for the metedata requests, which only
>>>>> include the time cousumed by network and the ceph.
>>>>>
>>>> "and the ceph MDS" ?
>>>>
>>>>> item          total       sum_lat(us)     avg_lat(us)
>>>>> -----------------------------------------------------
>>>>> metadata      113         220000          1946
>>>>>
>>>>> URL: https://tracker.ceph.com/issues/43215
>>>>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>>>>> ---
>>>>>    fs/ceph/debugfs.c    |  6 ++++++
>>>>>    fs/ceph/mds_client.c | 20 ++++++++++++++++++++
>>>>>    fs/ceph/metric.h     | 13 +++++++++++++
>>>>>    3 files changed, 39 insertions(+)
>>>>>
>>>>> diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
>>>>> index 464bfbdb970d..60f3e307fca1 100644
>>>>> --- a/fs/ceph/debugfs.c
>>>>> +++ b/fs/ceph/debugfs.c
>>>>> @@ -146,6 +146,12 @@ static int metric_show(struct seq_file *s, void *p)
>>>>>         avg = total ? sum / total : 0;
>>>>>         seq_printf(s, "%-14s%-12lld%-16lld%lld\n", "write", total, sum, avg);
>>>>>
>>>>> +     total = percpu_counter_sum(&mdsc->metric.total_metadatas);
>>>>> +     sum = percpu_counter_sum(&mdsc->metric.metadata_latency_sum);
>>>>> +     sum = jiffies_to_usecs(sum);
>>>>> +     avg = total ? sum / total : 0;
>>>>> +     seq_printf(s, "%-14s%-12lld%-16lld%lld\n", "metadata", total, sum, avg);
>>>>> +
>>>>>         seq_printf(s, "\n");
>>>>>         seq_printf(s, "item          total           miss            hit\n");
>>>>>         seq_printf(s, "-------------------------------------------------\n");
>>>>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
>>>>> index 0a3447966b26..3e792eca6af7 100644
>>>>> --- a/fs/ceph/mds_client.c
>>>>> +++ b/fs/ceph/mds_client.c
>>>>> @@ -3017,6 +3017,12 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
>>>>>
>>>>>         /* kick calling process */
>>>>>         complete_request(mdsc, req);
>>>>> +
>>>>> +     if (!result || result == -ENOENT) {
>>>>> +             s64 latency = jiffies - req->r_started;
>>>>> +
>>>>> +             ceph_update_metadata_latency(&mdsc->metric, latency);
>>>>> +     }
>>>> Should we add an r_end_stamp field to the mds request struct and use
>>>> that to calculate this? Many jiffies may have passed between the reply
>>>> coming in and this point. If you really want to measure the latency that
>>>> would be more accurate, I think.
>>> Yes, capturing it after invoking the callback is inconsistent
>>> with what is done for OSD requests (the new r_end_stamp is set in
>>> finish_request()).
>>>
>>> It looks like this is the only place where MDS r_end_stamp would be
>>> needed, so perhaps just move this before complete_request() call?
>> Currently for the OSD requests, they are almost in the same place where
>> at the end of the handle_reply.
> For OSD requests, r_end_request is captured _before_ the supplied
> callback is invoked.  I suggest to do the same for MDS requests.

Will do it in complete_request() for MDS case, there is no finish_request().


>> If we don't want to calculate the consumption by the most of
>> handle_reply code, we may set the r_end_stamp in the begin of it for
>> both OSD/MDS requests ?
>>
>> I'm thinking since in the handle_reply, it may also wait for the mutex
>> locks and then sleeps, so move the r_end_stamp to the beginning should
>> make sense...
> No, the time spent in handle_reply() must be included, just like the
> time spent in submit_request() is included.  If you look at the code,
> you will see that there is a case where handle_reply() drops the reply
> on the floor and resubmits the OSD request.  Further, on many errors,
> handle_reply() isn't even called, so finish_request() is the only place
> where r_end_stamp can be captured in a consistent manner.

Okay, get it.

Thanks Ilya.

BRs


> Thanks,
>
>                  Ilya
>

Patch
diff mbox series

diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 464bfbdb970d..60f3e307fca1 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -146,6 +146,12 @@  static int metric_show(struct seq_file *s, void *p)
 	avg = total ? sum / total : 0;
 	seq_printf(s, "%-14s%-12lld%-16lld%lld\n", "write", total, sum, avg);
 
+	total = percpu_counter_sum(&mdsc->metric.total_metadatas);
+	sum = percpu_counter_sum(&mdsc->metric.metadata_latency_sum);
+	sum = jiffies_to_usecs(sum);
+	avg = total ? sum / total : 0;
+	seq_printf(s, "%-14s%-12lld%-16lld%lld\n", "metadata", total, sum, avg);
+
 	seq_printf(s, "\n");
 	seq_printf(s, "item          total           miss            hit\n");
 	seq_printf(s, "-------------------------------------------------\n");
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 0a3447966b26..3e792eca6af7 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -3017,6 +3017,12 @@  static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
 
 	/* kick calling process */
 	complete_request(mdsc, req);
+
+	if (!result || result == -ENOENT) {
+		s64 latency = jiffies - req->r_started;
+
+		ceph_update_metadata_latency(&mdsc->metric, latency);
+	}
 out:
 	ceph_mdsc_put_request(req);
 	return;
@@ -4196,8 +4202,20 @@  static int ceph_mdsc_metric_init(struct ceph_client_metric *metric)
 	if (ret)
 		goto err_write_latency_sum;
 
+	ret = percpu_counter_init(&metric->total_metadatas, 0, GFP_KERNEL);
+	if (ret)
+		goto err_total_metadatas;
+
+	ret = percpu_counter_init(&metric->metadata_latency_sum, 0, GFP_KERNEL);
+	if (ret)
+		goto err_metadata_latency_sum;
+
 	return 0;
 
+err_metadata_latency_sum:
+	percpu_counter_destroy(&metric->total_metadatas);
+err_total_metadatas:
+	percpu_counter_destroy(&metric->write_latency_sum);
 err_write_latency_sum:
 	percpu_counter_destroy(&metric->total_writes);
 err_total_writes:
@@ -4553,6 +4571,8 @@  void ceph_mdsc_destroy(struct ceph_fs_client *fsc)
 
 	ceph_mdsc_stop(mdsc);
 
+	percpu_counter_destroy(&mdsc->metric.metadata_latency_sum);
+	percpu_counter_destroy(&mdsc->metric.total_metadatas);
 	percpu_counter_destroy(&mdsc->metric.write_latency_sum);
 	percpu_counter_destroy(&mdsc->metric.total_writes);
 	percpu_counter_destroy(&mdsc->metric.read_latency_sum);
diff --git a/fs/ceph/metric.h b/fs/ceph/metric.h
index a3d342f258e6..4c14b4ac089d 100644
--- a/fs/ceph/metric.h
+++ b/fs/ceph/metric.h
@@ -18,6 +18,9 @@  struct ceph_client_metric {
 
 	struct percpu_counter total_writes;
 	struct percpu_counter write_latency_sum;
+
+	struct percpu_counter total_metadatas;
+	struct percpu_counter metadata_latency_sum;
 };
 
 static inline void ceph_update_cap_hit(struct ceph_client_metric *m)
@@ -65,4 +68,14 @@  static inline void ceph_update_write_latency(struct ceph_client_metric *m,
 		percpu_counter_add(&m->write_latency_sum, latency);
 	}
 }
+
+static inline void ceph_update_metadata_latency(struct ceph_client_metric *m,
+						s64 latency)
+{
+	if (!m)
+		return;
+
+	percpu_counter_inc(&m->total_metadatas);
+	percpu_counter_add(&m->metadata_latency_sum, latency);
+}
 #endif /* _FS_CEPH_MDS_METRIC_H */