diff mbox series

[4/4] drm/i915/perf: add new open param to configure polling of OA buffer

Message ID 20200312230502.36898-5-umesh.nerlige.ramappa@intel.com (mailing list archive)
State New, archived
Headers show
Series drm/i915/perf: add OA interrupt support | expand

Commit Message

Umesh Nerlige Ramappa March 12, 2020, 11:05 p.m. UTC
From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>

This new parameter let's the application choose how often the OA
buffer should be checked on the CPU side for data availability. Longer
polling period tend to reduce CPU overhead if the application does not
care about somewhat real time data collection.

v2: Allow disabling polling completely with 0 value (Lionel)
v3: Version the new parameter (Joonas)
v4: Rebase (Umesh)
v5: Make poll delay value of 0 invalid (Umesh)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
---
 drivers/gpu/drm/i915/i915_perf.c       | 37 ++++++++++++++++++++------
 drivers/gpu/drm/i915/i915_perf_types.h |  6 +++++
 include/uapi/drm/i915_drm.h            | 13 +++++++++
 3 files changed, 48 insertions(+), 8 deletions(-)

Comments

Lionel Landwerlin March 13, 2020, 9:59 a.m. UTC | #1
On 13/03/2020 01:05, Umesh Nerlige Ramappa wrote:
> From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>
> This new parameter let's the application choose how often the OA
> buffer should be checked on the CPU side for data availability. Longer
> polling period tend to reduce CPU overhead if the application does not
> care about somewhat real time data collection.
>
> v2: Allow disabling polling completely with 0 value (Lionel)
> v3: Version the new parameter (Joonas)
> v4: Rebase (Umesh)
> v5: Make poll delay value of 0 invalid (Umesh)
>
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_perf.c       | 37 ++++++++++++++++++++------
>   drivers/gpu/drm/i915/i915_perf_types.h |  6 +++++
>   include/uapi/drm/i915_drm.h            | 13 +++++++++
>   3 files changed, 48 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index 21a63644846f..ca139ac31b11 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -262,11 +262,11 @@
>    */
>   #define OA_TAIL_MARGIN_NSEC	100000ULL
>   
> -/* frequency for checking whether the OA unit has written new reports to the
> - * circular OA buffer...
> +/* The default frequency for checking whether the OA unit has written new
> + * reports to the circular OA buffer...
>    */
> -#define POLL_FREQUENCY 200
> -#define POLL_PERIOD (NSEC_PER_SEC / POLL_FREQUENCY)
> +#define DEFAULT_POLL_FREQUENCY 200
> +#define DEFAULT_POLL_PERIOD (NSEC_PER_SEC / DEFAULT_POLL_FREQUENCY)
>   
>   /* for sysctl proc_dointvec_minmax of dev.i915.perf_stream_paranoid */
>   static u32 i915_perf_stream_paranoid = true;
> @@ -349,6 +349,8 @@ static const struct i915_oa_format gen12_oa_formats[I915_OA_FORMAT_MAX] = {
>    * @oa_periodic: Whether to enable periodic OA unit sampling
>    * @oa_period_exponent: The OA unit sampling period is derived from this
>    * @engine: The engine (typically rcs0) being monitored by the OA unit
> + * @poll_oa_period: The period at which the CPU will check for OA data
> + * availability
>    *
>    * As read_properties_unlocked() enumerates and validates the properties given
>    * to open a stream of metrics the configuration is built up in the structure
> @@ -368,6 +370,7 @@ struct perf_open_properties {
>   	int oa_period_exponent;
>   
>   	struct intel_engine_cs *engine;
> +	u64 poll_oa_period;
>   };
>   
>   struct i915_oa_config_bo {
> @@ -2642,9 +2645,9 @@ static void i915_oa_stream_enable(struct i915_perf_stream *stream)
>   
>   	stream->perf->ops.oa_enable(stream);
>   
> -	if (stream->periodic)
> +	if (stream->periodic && stream->poll_oa_period)
>   		hrtimer_start(&stream->poll_check_timer,
> -			      ns_to_ktime(POLL_PERIOD),
> +			      ns_to_ktime(stream->poll_oa_period),
>   			      HRTIMER_MODE_REL_PINNED);
>   }
>   
> @@ -3044,7 +3047,8 @@ static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer)
>   		wake_up(&stream->poll_wq);
>   	}
>   
> -	hrtimer_forward_now(hrtimer, ns_to_ktime(POLL_PERIOD));
> +	hrtimer_forward_now(hrtimer,
> +			    ns_to_ktime(stream->poll_oa_period));
>   
>   	return HRTIMER_RESTART;
>   }
> @@ -3425,6 +3429,7 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf,
>   
>   	stream->perf = perf;
>   	stream->ctx = specific_ctx;
> +	stream->poll_oa_period = props->poll_oa_period;
>   
>   	ret = i915_oa_stream_init(stream, param, props);
>   	if (ret)
> @@ -3481,6 +3486,7 @@ static u64 oa_exponent_to_ns(struct i915_perf *perf, int exponent)
>   /**
>    * read_properties_unlocked - validate + copy userspace stream open properties
>    * @perf: i915 perf instance
> + * @open_flags: Flags set by userspace for the opening of the stream


Looks you can drop the open_flags, it's not used anymore in this version.


>    * @uprops: The array of u64 key value pairs given by userspace
>    * @n_props: The number of key value pairs expected in @uprops
>    * @props: The stream configuration built up while validating properties
> @@ -3494,6 +3500,7 @@ static u64 oa_exponent_to_ns(struct i915_perf *perf, int exponent)
>    * rule out defining new properties with ordering requirements in the future.
>    */
>   static int read_properties_unlocked(struct i915_perf *perf,
> +				    u32 open_flags,
>   				    u64 __user *uprops,
>   				    u32 n_props,
>   				    struct perf_open_properties *props)
> @@ -3502,6 +3509,7 @@ static int read_properties_unlocked(struct i915_perf *perf,
>   	u32 i;
>   
>   	memset(props, 0, sizeof(struct perf_open_properties));
> +	props->poll_oa_period = DEFAULT_POLL_PERIOD;
>   
>   	if (!n_props) {
>   		DRM_DEBUG("No i915 perf properties given\n");
> @@ -3617,6 +3625,14 @@ static int read_properties_unlocked(struct i915_perf *perf,
>   		case DRM_I915_PERF_PROP_HOLD_PREEMPTION:
>   			props->hold_preemption = !!value;
>   			break;
> +		case DRM_I915_PERF_PROP_POLL_OA_DELAY:
> +			if (value < 100000 /* 100us */) {
> +				DRM_DEBUG("OA availability timer too small (%lluns < 100us)\n",
> +					  value);
> +				return -EINVAL;
> +			}
> +			props->poll_oa_period = value;
> +			break;
>   		case DRM_I915_PERF_PROP_MAX:
>   			MISSING_CASE(id);
>   			return -EINVAL;
> @@ -3675,6 +3691,7 @@ int i915_perf_open_ioctl(struct drm_device *dev, void *data,
>   	}
>   
>   	ret = read_properties_unlocked(perf,
> +				       param->flags,
>   				       u64_to_user_ptr(param->properties_ptr),
>   				       param->num_properties,
>   				       &props);
> @@ -4457,8 +4474,12 @@ int i915_perf_ioctl_version(void)
>   	 *    preemption on a particular context so that performance data is
>   	 *    accessible from a delta of MI_RPC reports without looking at the
>   	 *    OA buffer.
> +	 *
> +	 * 4: Add DRM_I915_PERF_PROP_POLL_OA_DELAY parameter that controls
> +	 *    enable/disable as well as the interval for the hrtimer used to
> +	 *    check for OA data.


I think you should drop the "enable/disable" part. We just make the 
delay configurable now with a lower bound limit of 100us.


>   	 */
> -	return 3;
> +	return 4;
>   }
>   
>   #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
> diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h
> index 9ee7c58e70d5..01559ead22e2 100644
> --- a/drivers/gpu/drm/i915/i915_perf_types.h
> +++ b/drivers/gpu/drm/i915/i915_perf_types.h
> @@ -304,6 +304,12 @@ struct i915_perf_stream {
>   	 * reprogrammed.
>   	 */
>   	struct i915_vma *noa_wait;
> +
> +	/**
> +	 * @poll_oa_period: The period in nanoseconds at which the OA
> +	 * buffer should be checked for available data.
> +	 */
> +	u64 poll_oa_period;
>   };
>   
>   /**
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 2813e579b480..dd511e7f795d 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -1969,6 +1969,19 @@ enum drm_i915_perf_property_id {
>   	 */
>   	DRM_I915_PERF_PROP_HOLD_PREEMPTION,
>   
> +	/**
> +	 * This optional parameter specifies the timer interval in nanoseconds
> +	 * at which the i915 driver will check the OA buffer for available data.
> +	 * Minimum allowed value is 100 microseconds. A default value is used by
> +	 * the driver if this parameter is not specified. Note that a large
> +	 * value may reduce cpu consumption during OA perf captures, but it
> +	 * would also potentially result in OA buffer overwrite as the captures
> +	 * reach end of the OA buffer.
> +	 *
> +	 * This property is available in perf revision 4.
> +	 */
> +	DRM_I915_PERF_PROP_POLL_OA_DELAY,
> +
>   	DRM_I915_PERF_PROP_MAX /* non-ABI */
>   };
>
Dixit, Ashutosh March 17, 2020, 12:02 a.m. UTC | #2
On Thu, 12 Mar 2020 16:05:02 -0700, Umesh Nerlige Ramappa wrote:
>
> From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>
> This new parameter let's the application choose how often the OA
> buffer should be checked on the CPU side for data availability. Longer
> polling period tend to reduce CPU overhead if the application does not
> care about somewhat real time data collection.

Lionely already has comments on this so skipping those.

> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index 21a63644846f..ca139ac31b11 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -262,11 +262,11 @@
>   */
>  #define OA_TAIL_MARGIN_NSEC	100000ULL
>
> -/* frequency for checking whether the OA unit has written new reports to the
> - * circular OA buffer...
> +/* The default frequency for checking whether the OA unit has written new
> + * reports to the circular OA buffer...
>   */
> -#define POLL_FREQUENCY 200
> -#define POLL_PERIOD (NSEC_PER_SEC / POLL_FREQUENCY)
> +#define DEFAULT_POLL_FREQUENCY 200
> +#define DEFAULT_POLL_PERIOD (NSEC_PER_SEC / DEFAULT_POLL_FREQUENCY)

nit, but I mostly like to have "units" as part of the name, so I'd recommed
calling this DEFAULT_POLL_PERIOD_NS.

> @@ -349,6 +349,8 @@ static const struct i915_oa_format gen12_oa_formats[I915_OA_FORMAT_MAX] = {
>   * @oa_periodic: Whether to enable periodic OA unit sampling
>   * @oa_period_exponent: The OA unit sampling period is derived from this
>   * @engine: The engine (typically rcs0) being monitored by the OA unit
> + * @poll_oa_period: The period at which the CPU will check for OA data
> + * availability
>   *
>   * As read_properties_unlocked() enumerates and validates the properties given
>   * to open a stream of metrics the configuration is built up in the structure
> @@ -368,6 +370,7 @@ struct perf_open_properties {
>	int oa_period_exponent;
>
>	struct intel_engine_cs *engine;
> +	u64 poll_oa_period;

poll_oa_period_ns?

> @@ -2642,9 +2645,9 @@ static void i915_oa_stream_enable(struct i915_perf_stream *stream)
>
>	stream->perf->ops.oa_enable(stream);
>
> -	if (stream->periodic)
> +	if (stream->periodic && stream->poll_oa_period)

poll_oa_period check is not required, it's always present.

> @@ -3617,6 +3625,14 @@ static int read_properties_unlocked(struct i915_perf *perf,
>		case DRM_I915_PERF_PROP_HOLD_PREEMPTION:
>			props->hold_preemption = !!value;
>			break;
> +		case DRM_I915_PERF_PROP_POLL_OA_DELAY:
> +			if (value < 100000 /* 100us */) {

So no maximum? That's probably ok, reap what you sow. Have we tested 100
us, seems low, anyway.

> diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h
> index 9ee7c58e70d5..01559ead22e2 100644
> --- a/drivers/gpu/drm/i915/i915_perf_types.h
> +++ b/drivers/gpu/drm/i915/i915_perf_types.h
> @@ -304,6 +304,12 @@ struct i915_perf_stream {
>	 * reprogrammed.
>	 */
>	struct i915_vma *noa_wait;
> +
> +	/**
> +	 * @poll_oa_period: The period in nanoseconds at which the OA
> +	 * buffer should be checked for available data.
> +	 */
> +	u64 poll_oa_period;

poll_oa_period_ns?

>  };
>
>  /**
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 2813e579b480..dd511e7f795d 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -1969,6 +1969,19 @@ enum drm_i915_perf_property_id {
>	 */
>	DRM_I915_PERF_PROP_HOLD_PREEMPTION,
>
> +	/**
> +	 * This optional parameter specifies the timer interval in nanoseconds
> +	 * at which the i915 driver will check the OA buffer for available data.
> +	 * Minimum allowed value is 100 microseconds. A default value is used by
> +	 * the driver if this parameter is not specified. Note that a large
> +	 * value may reduce cpu consumption during OA perf captures, but it
> +	 * would also potentially result in OA buffer overwrite as the captures
> +	 * reach end of the OA buffer.
> +	 *
> +	 * This property is available in perf revision 4.
> +	 */
> +	DRM_I915_PERF_PROP_POLL_OA_DELAY,

Is DRM_I915_PERF_PROP_POLL_OA_PERIOD_NS a better name? At least PERIOD
instead of DELAY.

Thanks!
--
Ashutosh
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 21a63644846f..ca139ac31b11 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -262,11 +262,11 @@ 
  */
 #define OA_TAIL_MARGIN_NSEC	100000ULL
 
-/* frequency for checking whether the OA unit has written new reports to the
- * circular OA buffer...
+/* The default frequency for checking whether the OA unit has written new
+ * reports to the circular OA buffer...
  */
-#define POLL_FREQUENCY 200
-#define POLL_PERIOD (NSEC_PER_SEC / POLL_FREQUENCY)
+#define DEFAULT_POLL_FREQUENCY 200
+#define DEFAULT_POLL_PERIOD (NSEC_PER_SEC / DEFAULT_POLL_FREQUENCY)
 
 /* for sysctl proc_dointvec_minmax of dev.i915.perf_stream_paranoid */
 static u32 i915_perf_stream_paranoid = true;
@@ -349,6 +349,8 @@  static const struct i915_oa_format gen12_oa_formats[I915_OA_FORMAT_MAX] = {
  * @oa_periodic: Whether to enable periodic OA unit sampling
  * @oa_period_exponent: The OA unit sampling period is derived from this
  * @engine: The engine (typically rcs0) being monitored by the OA unit
+ * @poll_oa_period: The period at which the CPU will check for OA data
+ * availability
  *
  * As read_properties_unlocked() enumerates and validates the properties given
  * to open a stream of metrics the configuration is built up in the structure
@@ -368,6 +370,7 @@  struct perf_open_properties {
 	int oa_period_exponent;
 
 	struct intel_engine_cs *engine;
+	u64 poll_oa_period;
 };
 
 struct i915_oa_config_bo {
@@ -2642,9 +2645,9 @@  static void i915_oa_stream_enable(struct i915_perf_stream *stream)
 
 	stream->perf->ops.oa_enable(stream);
 
-	if (stream->periodic)
+	if (stream->periodic && stream->poll_oa_period)
 		hrtimer_start(&stream->poll_check_timer,
-			      ns_to_ktime(POLL_PERIOD),
+			      ns_to_ktime(stream->poll_oa_period),
 			      HRTIMER_MODE_REL_PINNED);
 }
 
@@ -3044,7 +3047,8 @@  static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer)
 		wake_up(&stream->poll_wq);
 	}
 
-	hrtimer_forward_now(hrtimer, ns_to_ktime(POLL_PERIOD));
+	hrtimer_forward_now(hrtimer,
+			    ns_to_ktime(stream->poll_oa_period));
 
 	return HRTIMER_RESTART;
 }
@@ -3425,6 +3429,7 @@  i915_perf_open_ioctl_locked(struct i915_perf *perf,
 
 	stream->perf = perf;
 	stream->ctx = specific_ctx;
+	stream->poll_oa_period = props->poll_oa_period;
 
 	ret = i915_oa_stream_init(stream, param, props);
 	if (ret)
@@ -3481,6 +3486,7 @@  static u64 oa_exponent_to_ns(struct i915_perf *perf, int exponent)
 /**
  * read_properties_unlocked - validate + copy userspace stream open properties
  * @perf: i915 perf instance
+ * @open_flags: Flags set by userspace for the opening of the stream
  * @uprops: The array of u64 key value pairs given by userspace
  * @n_props: The number of key value pairs expected in @uprops
  * @props: The stream configuration built up while validating properties
@@ -3494,6 +3500,7 @@  static u64 oa_exponent_to_ns(struct i915_perf *perf, int exponent)
  * rule out defining new properties with ordering requirements in the future.
  */
 static int read_properties_unlocked(struct i915_perf *perf,
+				    u32 open_flags,
 				    u64 __user *uprops,
 				    u32 n_props,
 				    struct perf_open_properties *props)
@@ -3502,6 +3509,7 @@  static int read_properties_unlocked(struct i915_perf *perf,
 	u32 i;
 
 	memset(props, 0, sizeof(struct perf_open_properties));
+	props->poll_oa_period = DEFAULT_POLL_PERIOD;
 
 	if (!n_props) {
 		DRM_DEBUG("No i915 perf properties given\n");
@@ -3617,6 +3625,14 @@  static int read_properties_unlocked(struct i915_perf *perf,
 		case DRM_I915_PERF_PROP_HOLD_PREEMPTION:
 			props->hold_preemption = !!value;
 			break;
+		case DRM_I915_PERF_PROP_POLL_OA_DELAY:
+			if (value < 100000 /* 100us */) {
+				DRM_DEBUG("OA availability timer too small (%lluns < 100us)\n",
+					  value);
+				return -EINVAL;
+			}
+			props->poll_oa_period = value;
+			break;
 		case DRM_I915_PERF_PROP_MAX:
 			MISSING_CASE(id);
 			return -EINVAL;
@@ -3675,6 +3691,7 @@  int i915_perf_open_ioctl(struct drm_device *dev, void *data,
 	}
 
 	ret = read_properties_unlocked(perf,
+				       param->flags,
 				       u64_to_user_ptr(param->properties_ptr),
 				       param->num_properties,
 				       &props);
@@ -4457,8 +4474,12 @@  int i915_perf_ioctl_version(void)
 	 *    preemption on a particular context so that performance data is
 	 *    accessible from a delta of MI_RPC reports without looking at the
 	 *    OA buffer.
+	 *
+	 * 4: Add DRM_I915_PERF_PROP_POLL_OA_DELAY parameter that controls
+	 *    enable/disable as well as the interval for the hrtimer used to
+	 *    check for OA data.
 	 */
-	return 3;
+	return 4;
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h
index 9ee7c58e70d5..01559ead22e2 100644
--- a/drivers/gpu/drm/i915/i915_perf_types.h
+++ b/drivers/gpu/drm/i915/i915_perf_types.h
@@ -304,6 +304,12 @@  struct i915_perf_stream {
 	 * reprogrammed.
 	 */
 	struct i915_vma *noa_wait;
+
+	/**
+	 * @poll_oa_period: The period in nanoseconds at which the OA
+	 * buffer should be checked for available data.
+	 */
+	u64 poll_oa_period;
 };
 
 /**
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 2813e579b480..dd511e7f795d 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1969,6 +1969,19 @@  enum drm_i915_perf_property_id {
 	 */
 	DRM_I915_PERF_PROP_HOLD_PREEMPTION,
 
+	/**
+	 * This optional parameter specifies the timer interval in nanoseconds
+	 * at which the i915 driver will check the OA buffer for available data.
+	 * Minimum allowed value is 100 microseconds. A default value is used by
+	 * the driver if this parameter is not specified. Note that a large
+	 * value may reduce cpu consumption during OA perf captures, but it
+	 * would also potentially result in OA buffer overwrite as the captures
+	 * reach end of the OA buffer.
+	 *
+	 * This property is available in perf revision 4.
+	 */
+	DRM_I915_PERF_PROP_POLL_OA_DELAY,
+
 	DRM_I915_PERF_PROP_MAX /* non-ABI */
 };