diff mbox

[19/20] drm/i915: Use SSE4.1 movntdqa based memcpy for sampling GuC log buffer

Message ID 1470983123-22127-20-git-send-email-akash.goel@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

akash.goel@intel.com Aug. 12, 2016, 6:25 a.m. UTC
From: Akash Goel <akash.goel@intel.com>

In order to have fast reads from the GuC log buffer, used SSE4.1 movntdqa
based memcpy function i915_memcpy_from_wc.
GuC log buffer has a WC type vmalloc mapping and copying using movntqda from
WC type memory is almost as fast as reading from WB memory.
This will further reduce the log buffer sampling time, so is needed dearly
to deal with the flush interrupt storm when GuC is generating logs at a very
high rate.
Ideally SSE 4.1 should be present on all chipsets supporting GuC based
submisssions, but if not then logging will not be enabled.

Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Akash Goel <akash.goel@intel.com>
---
 drivers/gpu/drm/i915/i915_guc_submission.c | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

Comments

Tvrtko Ursulin Aug. 12, 2016, 4:06 p.m. UTC | #1
On 12/08/16 07:25, akash.goel@intel.com wrote:
> From: Akash Goel <akash.goel@intel.com>
>
> In order to have fast reads from the GuC log buffer, used SSE4.1 movntdqa
> based memcpy function i915_memcpy_from_wc.
> GuC log buffer has a WC type vmalloc mapping and copying using movntqda from
> WC type memory is almost as fast as reading from WB memory.
> This will further reduce the log buffer sampling time, so is needed dearly
> to deal with the flush interrupt storm when GuC is generating logs at a very
> high rate.
> Ideally SSE 4.1 should be present on all chipsets supporting GuC based
> submisssions, but if not then logging will not be enabled.
>
> Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
> Signed-off-by: Akash Goel <akash.goel@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_guc_submission.c | 17 ++++++++++++++---
>   1 file changed, 14 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index 1818343..af48f62 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -987,15 +987,16 @@ static void guc_read_update_log_buffer(struct intel_guc *guc)
>   			/* Just copy the newly written data */
>   			if (read_offset <= write_offset) {
>   				bytes_to_copy = write_offset - read_offset;
> -				memcpy(dst_data_ptr + read_offset,
> +				i915_memcpy_from_wc(dst_data_ptr + read_offset,
>   				     src_data_ptr + read_offset, bytes_to_copy);
>   			} else {
>   				bytes_to_copy = buffer_size - read_offset;
> -				memcpy(dst_data_ptr + read_offset,
> +				i915_memcpy_from_wc(dst_data_ptr + read_offset,
>   				     src_data_ptr + read_offset, bytes_to_copy);
>
>   				bytes_to_copy = write_offset;
> -				memcpy(dst_data_ptr, src_data_ptr, bytes_to_copy);
> +				i915_memcpy_from_wc(dst_data_ptr, src_data_ptr,
> +				     bytes_to_copy);
>   			}
>
>   			src_data_ptr += buffer_size;
> @@ -1210,6 +1211,16 @@ static void guc_create_log(struct intel_guc *guc)
>
>   	obj = guc->log.obj;
>   	if (!obj) {
> +		/* We require SSE 4.1 for fast reads from the GuC log buffer and
> +		 * it should be present on the chipsets supporting GuC based
> +		 * submisssions.
> +		 */
> +		if (WARN_ON(!i915_memcpy_from_wc(NULL, NULL, 0))) {
> +			/* logging will not be enabled */
> +			i915.guc_log_level = -1;
> +			return;
> +		}
> +
>   		obj = gem_allocate_guc_obj(dev_priv, size);
>   		if (!obj) {
>   			/* logging will be off */
>

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 1818343..af48f62 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -987,15 +987,16 @@  static void guc_read_update_log_buffer(struct intel_guc *guc)
 			/* Just copy the newly written data */
 			if (read_offset <= write_offset) {
 				bytes_to_copy = write_offset - read_offset;
-				memcpy(dst_data_ptr + read_offset,
+				i915_memcpy_from_wc(dst_data_ptr + read_offset,
 				     src_data_ptr + read_offset, bytes_to_copy);
 			} else {
 				bytes_to_copy = buffer_size - read_offset;
-				memcpy(dst_data_ptr + read_offset,
+				i915_memcpy_from_wc(dst_data_ptr + read_offset,
 				     src_data_ptr + read_offset, bytes_to_copy);
 
 				bytes_to_copy = write_offset;
-				memcpy(dst_data_ptr, src_data_ptr, bytes_to_copy);
+				i915_memcpy_from_wc(dst_data_ptr, src_data_ptr,
+				     bytes_to_copy);
 			}
 
 			src_data_ptr += buffer_size;
@@ -1210,6 +1211,16 @@  static void guc_create_log(struct intel_guc *guc)
 
 	obj = guc->log.obj;
 	if (!obj) {
+		/* We require SSE 4.1 for fast reads from the GuC log buffer and
+		 * it should be present on the chipsets supporting GuC based
+		 * submisssions.
+		 */
+		if (WARN_ON(!i915_memcpy_from_wc(NULL, NULL, 0))) {
+			/* logging will not be enabled */
+			i915.guc_log_level = -1;
+			return;
+		}
+
 		obj = gem_allocate_guc_obj(dev_priv, size);
 		if (!obj) {
 			/* logging will be off */