diff mbox series

[2/3] Track basic SCMI statistics

Message ID 20240701142851.1448515-3-luke.parkin@arm.com (mailing list archive)
State New
Headers show
Series Add Per-transport SCMI debug statistics | expand

Commit Message

Luke Parkin July 1, 2024, 2:28 p.m. UTC
Add scmi_debug_stats struct with atomic_t types to prevent racing.

Add tracking of 5 initial statistics
- sent_ok & sent_fail
- response_ok & dlyd_response_ok
- xfers_response_timeout

Signed-off-by: Luke Parkin <luke.parkin@arm.com>
---
 drivers/firmware/arm_scmi/driver.c | 46 +++++++++++++++++++++++++++++-
 1 file changed, 45 insertions(+), 1 deletion(-)

Comments

Cristian Marussi July 1, 2024, 4:15 p.m. UTC | #1
On Mon, Jul 01, 2024 at 03:28:50PM +0100, Luke Parkin wrote:
> Add scmi_debug_stats struct with atomic_t types to prevent racing.
> 

Hi

"Add SCMI debug statisticts based on atomic types to prevent races."

if you want to specify why you did it this way in the commit message,
but this seems really more something for a comment in the doxygen that
on the commit log.

> Add tracking of 5 initial statistics
> - sent_ok & sent_fail
> - response_ok & dlyd_response_ok
> - xfers_response_timeout

Morover I would not specify here in the log what you added, you can see
it from the code. "Add some initial stats counter" if you want.

> 
> Signed-off-by: Luke Parkin <luke.parkin@arm.com>
> ---
>  drivers/firmware/arm_scmi/driver.c | 46 +++++++++++++++++++++++++++++-
>  1 file changed, 45 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c
> index 6b6957f4743f..f69dff699d48 100644
> --- a/drivers/firmware/arm_scmi/driver.c
> +++ b/drivers/firmware/arm_scmi/driver.c
> @@ -125,6 +125,22 @@ struct scmi_debug_info {
>  	bool is_atomic;
>  };
>  
> +/**
> + * struct scmi_debug_stats - Debug statistics
> + * @sent_ok: Count of successful sends
> + * @sent_fail: Count of failed sends
> + * @response_ok: Count of successful responses
> + * @dlyd_response_ok: Count of successful delayed responses
> + * @xfers_response_timeout: Count of xfer response timeouts
> + */
> +struct scmi_debug_stats {
> +	atomic_t sent_ok;
> +	atomic_t sent_fail;
> +	atomic_t response_ok;
> +	atomic_t dlyd_response_ok;
> +	atomic_t xfers_response_timeout;
> +};
> +
>  /**
>   * struct scmi_info - Structure representing a SCMI instance
>   *
> @@ -141,6 +157,7 @@ struct scmi_debug_info {
>   * @protocols: IDR for protocols' instance descriptors initialized for
>   *	       this SCMI instance: populated on protocol's first attempted
>   *	       usage.
> + * @stats: Contains several atomic_t's for tracking various statistics
>   * @protocols_mtx: A mutex to protect protocols instances initialization.
>   * @protocols_imp: List of protocols implemented, currently maximum of
>   *		   scmi_revision_info.num_protocols elements allocated by the
> @@ -174,6 +191,7 @@ struct scmi_info {
>  	struct idr tx_idr;
>  	struct idr rx_idr;
>  	struct idr protocols;
> +	struct scmi_debug_stats stats;

Pleaase move this field right after scmi_debug_info down below, so that
we keep all debug stuff together.

Also this is an embedded structure, not a bare pointer to dynamically
allocated stuff (and this is fine...) so you should probably ifdef
ARM_SCMI_DEBUG_STATISTICS at compile time the presence of this field itself
so as not to waste memory for something you never use...but this is
opinable because it will also, on the other side, polllute a bit the code
with unpleasant ifdeffery... so maybe Sudeep wont like it...the other option
would be to make this a pointer and conditionaly devm_kzalloc a struct to this
pointer (like scmi_debug_info)

>  	/* Ensure mutual exclusive access to protocols instance array */
>  	struct mutex protocols_mtx;
>  	u8 *protocols_imp;
> @@ -1143,7 +1161,12 @@ static void scmi_handle_response(struct scmi_chan_info *cinfo,
>  						SCMI_RAW_REPLY_QUEUE,
>  						cinfo->id);
>  	}
> -
> +	if (IS_ENABLED(CONFIG_ARM_SCMI_DEBUG_STATISTICS)) {
> +		if (xfer->hdr.type == MSG_TYPE_DELAYED_RESP)
> +			atomic_inc(&info->stats.dlyd_response_ok);
> +		else
> +			atomic_inc(&info->stats.response_ok);
> +	}

Ok, so IMO, this is the main core thing to rework in this series: the
"counting" operation/block should be defined as a macro so that it can
be fully compiled out when STATS=n, because these are counters
incremented on the hot path for each message, not just once in a while,
so the above if(IS_ENABELD()) now will be there and evaluated even when
STATS=n.

Something like:

	#ifdef CONFIG_ARM_SCMI_DEBUG_STATISTICS
	#define SCMI_LOG_STATS(counter)			\
		<your magic here> 			\
	#else
	#define SCMI_LOG_STATS(counter)
	#endif

.... I have not thought it through eh...so it could be radically
different...the point is ... the counting machinery should just
disappear at compile time when STATS=n

Moreover please define this macros magic here in this patch BUT split
out in a distinct patch all the places where you make use of it, so that
this patch contains only definitions of mechanisms and struct and
another patch will contain all the places where stats are monitored.

>  	scmi_xfer_command_release(info, xfer);
>  }
>  
> @@ -1279,6 +1302,12 @@ static int scmi_wait_for_reply(struct device *dev, const struct scmi_desc *desc,
>  		}
>  	}
>  
> +	if (IS_ENABLED(CONFIG_ARM_SCMI_DEBUG_STATISTICS) && ret == -ETIMEDOUT) {
> +		struct scmi_info *info =
> +					handle_to_scmi_info(cinfo->handle);
> +		atomic_inc(&info->stats.xfers_response_timeout);
> +	}
> +

Ditto.

>  	return ret;
>  }
>  
> @@ -1414,6 +1443,13 @@ static int do_xfer(const struct scmi_protocol_handle *ph,
>  	trace_scmi_xfer_end(xfer->transfer_id, xfer->hdr.id,
>  			    xfer->hdr.protocol_id, xfer->hdr.seq, ret);
>  
> +	if (IS_ENABLED(CONFIG_ARM_SCMI_DEBUG_STATISTICS)) {
> +		if (ret == 0)
> +			atomic_inc(&info->stats.sent_ok);
> +		else
> +			atomic_inc(&info->stats.sent_fail);
> +	}
> +

Ditto.

>  	return ret;
>  }
>  
> @@ -2994,6 +3030,14 @@ static int scmi_probe(struct platform_device *pdev)
>  	handle->devm_protocol_get = scmi_devm_protocol_get;
>  	handle->devm_protocol_put = scmi_devm_protocol_put;
>  
> +	if (IS_ENABLED(CONFIG_ARM_SCMI_DEBUG_STATISTICS)) {
> +		atomic_set(&info->stats.response_ok, 0);
> +		atomic_set(&info->stats.sent_fail, 0);
> +		atomic_set(&info->stats.sent_ok, 0);
> +		atomic_set(&info->stats.dlyd_response_ok, 0);
> +		atomic_set(&info->stats.xfers_response_timeout, 0);
> +	}
> +

I think that you can just drop this zero initializations because the
stats are part of the info structure which is created devm_kzalloc'ed...
so zerod at creation time AND indeed atomic_t is just a structure containing
an int counter which will be already zeroed too...I dont think that any
other special init is done, it is already zero, and nothing more is done
by your atomic_set(v, 0)...but feel free to verify I am not missing
something, please.

Thanks,
Cristian
Luke Parkin July 2, 2024, 9:57 a.m. UTC | #2
> Ok, so IMO, this is the main core thing to rework in this series: the
> "counting" operation/block should be defined as a macro so that it can
> be fully compiled out when STATS=n, because these are counters
> incremented on the hot path for each message, not just once in a while,
> so the above if(IS_ENABELD()) now will be there and evaluated even when
> STATS=n.
>
> Something like:
>
> 	#ifdef CONFIG_ARM_SCMI_DEBUG_STATISTICS
> 	#define SCMI_LOG_STATS(counter)			\
> 		<your magic here> 			\
> 	#else
> 	#define SCMI_LOG_STATS(counter)
> 	#endif
>
> .... I have not thought it through eh...so it could be radically
> different...the point is ... the counting machinery should just
> disappear at compile time when STATS=n

Hey Cristian, Unless I've missed something, It looks like IS_ENABLED() does do
what you ask for.
In Documentation/process/coding-style.rst:1168 it reccomends using IS_ENABLED
for conditional compilation over #if and #ifdef, saying that the compiler will
constant-fold the conditional away.

Thanks,
Luke
Cristian Marussi July 2, 2024, 11:10 a.m. UTC | #3
On Tue, Jul 02, 2024 at 10:57:23AM +0100, Luke Parkin wrote:
> > Ok, so IMO, this is the main core thing to rework in this series: the
> > "counting" operation/block should be defined as a macro so that it can
> > be fully compiled out when STATS=n, because these are counters
> > incremented on the hot path for each message, not just once in a while,
> > so the above if(IS_ENABELD()) now will be there and evaluated even when
> > STATS=n.
> >
> > Something like:
> >
> > 	#ifdef CONFIG_ARM_SCMI_DEBUG_STATISTICS
> > 	#define SCMI_LOG_STATS(counter)			\
> > 		<your magic here> 			\
> > 	#else
> > 	#define SCMI_LOG_STATS(counter)
> > 	#endif
> >
> > .... I have not thought it through eh...so it could be radically
> > different...the point is ... the counting machinery should just
> > disappear at compile time when STATS=n
> 
> Hey Cristian, Unless I've missed something, It looks like IS_ENABLED() does do
> what you ask for.
> In Documentation/process/coding-style.rst:1168 it reccomends using IS_ENABLED
> for conditional compilation over #if and #ifdef, saying that the compiler will
> constant-fold the conditional away.

Yes indeed, it will be compiled out anyway, forgot that despite having
it used myself a few lines below :D .... but from the readability standpoint,
given that we will sprinkle this all over the code, wont be much clearer to
conditionally define once for all an inline function (like mentioned at the
start of that coding-style.rst paragraph) or a macro in a header (like common.h)
to wrap the atomic_inc

#ifdef CONFIG_ARM_SCMI_DEBUG_STATISTICS
static inline void scmi_log_stats(atomic_t *cnt)
{
	atomic_inc(cnt);
}
#else
static inline void scmi_log_stats(atomic_t *cnt) { }
#endif

and then just call it plainly wherever it needs, knowing that the compiler
will equally compile it out all-over when STATS=n.

ifdeffery is discouraged in the code flow but it is acceptable to define
alternative nop fucntions in a header.

Also because in some of the callsite you handle 2 stats with some ifcond
(conditional on the IS_ENABLED that is good) and that could be a problem,
but those calls can be split and placed alone where that some condition is
already check normally as in as an example in scmi_handle_response():

	if (xfer->hdr.type == MSG_TYPE_DELAYED_RESP) {                           
                scmi_clear_channel(info, cinfo);
                complete(xfer->async_done);
+		scmi_log_stats(&info->stats.dlyd_response_ok);
	} else {                                                                 
                complete(&xfer->done);                                           
+		scmi_log_stats(&info->stats.response_ok);
        }                                    

...what do you think, am I missing something else ?

Thanks,
Cristian
Luke Parkin July 2, 2024, 12:50 p.m. UTC | #4
> #ifdef CONFIG_ARM_SCMI_DEBUG_STATISTICS
> static inline void scmi_log_stats(atomic_t *cnt)
> {
> 	atomic_inc(cnt);
> }
> #else
> static inline void scmi_log_stats(atomic_t *cnt) { }
> #endif
> but those calls can be split and placed alone where that some condition is
> already check normally as in as an example in scmi_handle_response():
>
> 	if (xfer->hdr.type == MSG_TYPE_DELAYED_RESP) {
>                 scmi_clear_channel(info, cinfo);
>                 complete(xfer->async_done);
> +		scmi_log_stats(&info->stats.dlyd_response_ok);
> 	} else {
>                 complete(&xfer->done);
> +		scmi_log_stats(&info->stats.response_ok);
>         }
>
> ...what do you think, am I missing something else ?

Ah yeah, that looks better to me. I'll use that.

Thanks!
Luke


On 02/07/2024 12:10, Cristian Marussi wrote:
> On Tue, Jul 02, 2024 at 10:57:23AM +0100, Luke Parkin wrote:
>>> Ok, so IMO, this is the main core thing to rework in this series: the
>>> "counting" operation/block should be defined as a macro so that it can
>>> be fully compiled out when STATS=n, because these are counters
>>> incremented on the hot path for each message, not just once in a while,
>>> so the above if(IS_ENABELD()) now will be there and evaluated even when
>>> STATS=n.
>>>
>>> Something like:
>>>
>>> 	#ifdef CONFIG_ARM_SCMI_DEBUG_STATISTICS
>>> 	#define SCMI_LOG_STATS(counter)			\
>>> 		<your magic here> 			\
>>> 	#else
>>> 	#define SCMI_LOG_STATS(counter)
>>> 	#endif
>>>
>>> .... I have not thought it through eh...so it could be radically
>>> different...the point is ... the counting machinery should just
>>> disappear at compile time when STATS=n
>>
>> Hey Cristian, Unless I've missed something, It looks like IS_ENABLED() does do
>> what you ask for.
>> In Documentation/process/coding-style.rst:1168 it reccomends using IS_ENABLED
>> for conditional compilation over #if and #ifdef, saying that the compiler will
>> constant-fold the conditional away.
> 
> Yes indeed, it will be compiled out anyway, forgot that despite having
> it used myself a few lines below :D .... but from the readability standpoint,
> given that we will sprinkle this all over the code, wont be much clearer to
> conditionally define once for all an inline function (like mentioned at the
> start of that coding-style.rst paragraph) or a macro in a header (like common.h)
> to wrap the atomic_inc
> 
> #ifdef CONFIG_ARM_SCMI_DEBUG_STATISTICS
> static inline void scmi_log_stats(atomic_t *cnt)
> {
> 	atomic_inc(cnt);
> }
> #else
> static inline void scmi_log_stats(atomic_t *cnt) { }
> #endif
> 
> and then just call it plainly wherever it needs, knowing that the compiler
> will equally compile it out all-over when STATS=n.
> 
> ifdeffery is discouraged in the code flow but it is acceptable to define
> alternative nop fucntions in a header.
> 
> Also because in some of the callsite you handle 2 stats with some ifcond
> (conditional on the IS_ENABLED that is good) and that could be a problem,
> but those calls can be split and placed alone where that some condition is
> already check normally as in as an example in scmi_handle_response():
> 
> 	if (xfer->hdr.type == MSG_TYPE_DELAYED_RESP) {                           
>                 scmi_clear_channel(info, cinfo);
>                 complete(xfer->async_done);
> +		scmi_log_stats(&info->stats.dlyd_response_ok);
> 	} else {                                                                 
>                 complete(&xfer->done);                                           
> +		scmi_log_stats(&info->stats.response_ok);
>         }                                    
> 
> ...what do you think, am I missing something else ?
> 
> Thanks,
> Cristian
diff mbox series

Patch

diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c
index 6b6957f4743f..f69dff699d48 100644
--- a/drivers/firmware/arm_scmi/driver.c
+++ b/drivers/firmware/arm_scmi/driver.c
@@ -125,6 +125,22 @@  struct scmi_debug_info {
 	bool is_atomic;
 };
 
+/**
+ * struct scmi_debug_stats - Debug statistics
+ * @sent_ok: Count of successful sends
+ * @sent_fail: Count of failed sends
+ * @response_ok: Count of successful responses
+ * @dlyd_response_ok: Count of successful delayed responses
+ * @xfers_response_timeout: Count of xfer response timeouts
+ */
+struct scmi_debug_stats {
+	atomic_t sent_ok;
+	atomic_t sent_fail;
+	atomic_t response_ok;
+	atomic_t dlyd_response_ok;
+	atomic_t xfers_response_timeout;
+};
+
 /**
  * struct scmi_info - Structure representing a SCMI instance
  *
@@ -141,6 +157,7 @@  struct scmi_debug_info {
  * @protocols: IDR for protocols' instance descriptors initialized for
  *	       this SCMI instance: populated on protocol's first attempted
  *	       usage.
+ * @stats: Contains several atomic_t's for tracking various statistics
  * @protocols_mtx: A mutex to protect protocols instances initialization.
  * @protocols_imp: List of protocols implemented, currently maximum of
  *		   scmi_revision_info.num_protocols elements allocated by the
@@ -174,6 +191,7 @@  struct scmi_info {
 	struct idr tx_idr;
 	struct idr rx_idr;
 	struct idr protocols;
+	struct scmi_debug_stats stats;
 	/* Ensure mutual exclusive access to protocols instance array */
 	struct mutex protocols_mtx;
 	u8 *protocols_imp;
@@ -1143,7 +1161,12 @@  static void scmi_handle_response(struct scmi_chan_info *cinfo,
 						SCMI_RAW_REPLY_QUEUE,
 						cinfo->id);
 	}
-
+	if (IS_ENABLED(CONFIG_ARM_SCMI_DEBUG_STATISTICS)) {
+		if (xfer->hdr.type == MSG_TYPE_DELAYED_RESP)
+			atomic_inc(&info->stats.dlyd_response_ok);
+		else
+			atomic_inc(&info->stats.response_ok);
+	}
 	scmi_xfer_command_release(info, xfer);
 }
 
@@ -1279,6 +1302,12 @@  static int scmi_wait_for_reply(struct device *dev, const struct scmi_desc *desc,
 		}
 	}
 
+	if (IS_ENABLED(CONFIG_ARM_SCMI_DEBUG_STATISTICS) && ret == -ETIMEDOUT) {
+		struct scmi_info *info =
+					handle_to_scmi_info(cinfo->handle);
+		atomic_inc(&info->stats.xfers_response_timeout);
+	}
+
 	return ret;
 }
 
@@ -1414,6 +1443,13 @@  static int do_xfer(const struct scmi_protocol_handle *ph,
 	trace_scmi_xfer_end(xfer->transfer_id, xfer->hdr.id,
 			    xfer->hdr.protocol_id, xfer->hdr.seq, ret);
 
+	if (IS_ENABLED(CONFIG_ARM_SCMI_DEBUG_STATISTICS)) {
+		if (ret == 0)
+			atomic_inc(&info->stats.sent_ok);
+		else
+			atomic_inc(&info->stats.sent_fail);
+	}
+
 	return ret;
 }
 
@@ -2994,6 +3030,14 @@  static int scmi_probe(struct platform_device *pdev)
 	handle->devm_protocol_get = scmi_devm_protocol_get;
 	handle->devm_protocol_put = scmi_devm_protocol_put;
 
+	if (IS_ENABLED(CONFIG_ARM_SCMI_DEBUG_STATISTICS)) {
+		atomic_set(&info->stats.response_ok, 0);
+		atomic_set(&info->stats.sent_fail, 0);
+		atomic_set(&info->stats.sent_ok, 0);
+		atomic_set(&info->stats.dlyd_response_ok, 0);
+		atomic_set(&info->stats.xfers_response_timeout, 0);
+	}
+
 	/* System wide atomic threshold for atomic ops .. if any */
 	if (!of_property_read_u32(np, "atomic-threshold-us",
 				  &info->atomic_threshold))