diff mbox series

[10/19] drm/i915/perf: Use gt-specific ggtt for OA and noa-wait buffers

Message ID 20220823204155.8178-11-umesh.nerlige.ramappa@intel.com (mailing list archive)
State New, archived
Headers show
Series Add DG2 OA support | expand

Commit Message

Umesh Nerlige Ramappa Aug. 23, 2022, 8:41 p.m. UTC
User passes uabi engine class and instance to the perf OA interface. Use
gt corresponding to the engine to pin the buffers to the right ggtt.

Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
---
 drivers/gpu/drm/i915/i915_perf.c | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

Comments

Lionel Landwerlin Sept. 6, 2022, 7:56 p.m. UTC | #1
On 23/08/2022 23:41, Umesh Nerlige Ramappa wrote:
> User passes uabi engine class and instance to the perf OA interface. Use
> gt corresponding to the engine to pin the buffers to the right ggtt.
>
> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>

I didn't know there was a GGTT per engine.

Do I understand this correct?


Thanks,

-Lionel


> ---
>   drivers/gpu/drm/i915/i915_perf.c | 21 +++++++++++++++++++--
>   1 file changed, 19 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index 87b92d2946f4..f7621b45966c 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -1765,6 +1765,7 @@ static void gen12_init_oa_buffer(struct i915_perf_stream *stream)
>   static int alloc_oa_buffer(struct i915_perf_stream *stream)
>   {
>   	struct drm_i915_private *i915 = stream->perf->i915;
> +	struct intel_gt *gt = stream->engine->gt;
>   	struct drm_i915_gem_object *bo;
>   	struct i915_vma *vma;
>   	int ret;
> @@ -1784,11 +1785,22 @@ static int alloc_oa_buffer(struct i915_perf_stream *stream)
>   	i915_gem_object_set_cache_coherency(bo, I915_CACHE_LLC);
>   
>   	/* PreHSW required 512K alignment, HSW requires 16M */
> -	vma = i915_gem_object_ggtt_pin(bo, NULL, 0, SZ_16M, 0);
> +	vma = i915_vma_instance(bo, &gt->ggtt->vm, NULL);
>   	if (IS_ERR(vma)) {
>   		ret = PTR_ERR(vma);
>   		goto err_unref;
>   	}
> +
> +	/*
> +	 * PreHSW required 512K alignment.
> +	 * HSW and onwards, align to requested size of OA buffer.
> +	 */
> +	ret = i915_vma_pin(vma, 0, SZ_16M, PIN_GLOBAL | PIN_HIGH);
> +	if (ret) {
> +		drm_err(&gt->i915->drm, "Failed to pin OA buffer %d\n", ret);
> +		goto err_unref;
> +	}
> +
>   	stream->oa_buffer.vma = vma;
>   
>   	stream->oa_buffer.vaddr =
> @@ -1838,6 +1850,7 @@ static u32 *save_restore_register(struct i915_perf_stream *stream, u32 *cs,
>   static int alloc_noa_wait(struct i915_perf_stream *stream)
>   {
>   	struct drm_i915_private *i915 = stream->perf->i915;
> +	struct intel_gt *gt = stream->engine->gt;
>   	struct drm_i915_gem_object *bo;
>   	struct i915_vma *vma;
>   	const u64 delay_ticks = 0xffffffffffffffff -
> @@ -1878,12 +1891,16 @@ static int alloc_noa_wait(struct i915_perf_stream *stream)
>   	 * multiple OA config BOs will have a jump to this address and it
>   	 * needs to be fixed during the lifetime of the i915/perf stream.
>   	 */
> -	vma = i915_gem_object_ggtt_pin_ww(bo, &ww, NULL, 0, 0, PIN_HIGH);
> +	vma = i915_vma_instance(bo, &gt->ggtt->vm, NULL);
>   	if (IS_ERR(vma)) {
>   		ret = PTR_ERR(vma);
>   		goto out_ww;
>   	}
>   
> +	ret = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_GLOBAL | PIN_HIGH);
> +	if (ret)
> +		goto out_ww;
> +
>   	batch = cs = i915_gem_object_pin_map(bo, I915_MAP_WB);
>   	if (IS_ERR(batch)) {
>   		ret = PTR_ERR(batch);
Umesh Nerlige Ramappa Sept. 6, 2022, 8:28 p.m. UTC | #2
On Tue, Sep 06, 2022 at 10:56:13PM +0300, Lionel Landwerlin wrote:
>On 23/08/2022 23:41, Umesh Nerlige Ramappa wrote:
>>User passes uabi engine class and instance to the perf OA interface. Use
>>gt corresponding to the engine to pin the buffers to the right ggtt.
>>
>>Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
>
>I didn't know there was a GGTT per engine.
>
>Do I understand this correct?

No, GGTT is still per-gt. We just derive the gt from engine class 
instance passed (as in engine->gt).

>
>
>Thanks,
>
>-Lionel
>
>
>>---
>>  drivers/gpu/drm/i915/i915_perf.c | 21 +++++++++++++++++++--
>>  1 file changed, 19 insertions(+), 2 deletions(-)
>>
>>diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
>>index 87b92d2946f4..f7621b45966c 100644
>>--- a/drivers/gpu/drm/i915/i915_perf.c
>>+++ b/drivers/gpu/drm/i915/i915_perf.c
>>@@ -1765,6 +1765,7 @@ static void gen12_init_oa_buffer(struct i915_perf_stream *stream)
>>  static int alloc_oa_buffer(struct i915_perf_stream *stream)
>>  {
>>  	struct drm_i915_private *i915 = stream->perf->i915;
>>+	struct intel_gt *gt = stream->engine->gt;
>>  	struct drm_i915_gem_object *bo;
>>  	struct i915_vma *vma;
>>  	int ret;
>>@@ -1784,11 +1785,22 @@ static int alloc_oa_buffer(struct i915_perf_stream *stream)
>>  	i915_gem_object_set_cache_coherency(bo, I915_CACHE_LLC);
>>  	/* PreHSW required 512K alignment, HSW requires 16M */
>>-	vma = i915_gem_object_ggtt_pin(bo, NULL, 0, SZ_16M, 0);
>>+	vma = i915_vma_instance(bo, &gt->ggtt->vm, NULL);
>>  	if (IS_ERR(vma)) {
>>  		ret = PTR_ERR(vma);
>>  		goto err_unref;
>>  	}
>>+
>>+	/*
>>+	 * PreHSW required 512K alignment.
>>+	 * HSW and onwards, align to requested size of OA buffer.
>>+	 */
>>+	ret = i915_vma_pin(vma, 0, SZ_16M, PIN_GLOBAL | PIN_HIGH);
>>+	if (ret) {
>>+		drm_err(&gt->i915->drm, "Failed to pin OA buffer %d\n", ret);
>>+		goto err_unref;
>>+	}
>>+
>>  	stream->oa_buffer.vma = vma;
>>  	stream->oa_buffer.vaddr =
>>@@ -1838,6 +1850,7 @@ static u32 *save_restore_register(struct i915_perf_stream *stream, u32 *cs,
>>  static int alloc_noa_wait(struct i915_perf_stream *stream)
>>  {
>>  	struct drm_i915_private *i915 = stream->perf->i915;
>>+	struct intel_gt *gt = stream->engine->gt;
>>  	struct drm_i915_gem_object *bo;
>>  	struct i915_vma *vma;
>>  	const u64 delay_ticks = 0xffffffffffffffff -
>>@@ -1878,12 +1891,16 @@ static int alloc_noa_wait(struct i915_perf_stream *stream)
>>  	 * multiple OA config BOs will have a jump to this address and it
>>  	 * needs to be fixed during the lifetime of the i915/perf stream.
>>  	 */
>>-	vma = i915_gem_object_ggtt_pin_ww(bo, &ww, NULL, 0, 0, PIN_HIGH);
>>+	vma = i915_vma_instance(bo, &gt->ggtt->vm, NULL);
>>  	if (IS_ERR(vma)) {
>>  		ret = PTR_ERR(vma);
>>  		goto out_ww;
>>  	}
>>+	ret = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_GLOBAL | PIN_HIGH);
>>+	if (ret)
>>+		goto out_ww;
>>+
>>  	batch = cs = i915_gem_object_pin_map(bo, I915_MAP_WB);
>>  	if (IS_ERR(batch)) {
>>  		ret = PTR_ERR(batch);
>
>
Lionel Landwerlin Sept. 6, 2022, 8:31 p.m. UTC | #3
On 06/09/2022 23:28, Umesh Nerlige Ramappa wrote:
> On Tue, Sep 06, 2022 at 10:56:13PM +0300, Lionel Landwerlin wrote:
>> On 23/08/2022 23:41, Umesh Nerlige Ramappa wrote:
>>> User passes uabi engine class and instance to the perf OA interface. 
>>> Use
>>> gt corresponding to the engine to pin the buffers to the right ggtt.
>>>
>>> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
>>
>> I didn't know there was a GGTT per engine.
>>
>> Do I understand this correct?
>
> No, GGTT is still per-gt. We just derive the gt from engine class 
> instance passed (as in engine->gt).


Oh thanks I understand now.


Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>


>
>>
>>
>> Thanks,
>>
>> -Lionel
>>
>>
>>> ---
>>>  drivers/gpu/drm/i915/i915_perf.c | 21 +++++++++++++++++++--
>>>  1 file changed, 19 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_perf.c 
>>> b/drivers/gpu/drm/i915/i915_perf.c
>>> index 87b92d2946f4..f7621b45966c 100644
>>> --- a/drivers/gpu/drm/i915/i915_perf.c
>>> +++ b/drivers/gpu/drm/i915/i915_perf.c
>>> @@ -1765,6 +1765,7 @@ static void gen12_init_oa_buffer(struct 
>>> i915_perf_stream *stream)
>>>  static int alloc_oa_buffer(struct i915_perf_stream *stream)
>>>  {
>>>      struct drm_i915_private *i915 = stream->perf->i915;
>>> +    struct intel_gt *gt = stream->engine->gt;
>>>      struct drm_i915_gem_object *bo;
>>>      struct i915_vma *vma;
>>>      int ret;
>>> @@ -1784,11 +1785,22 @@ static int alloc_oa_buffer(struct 
>>> i915_perf_stream *stream)
>>>      i915_gem_object_set_cache_coherency(bo, I915_CACHE_LLC);
>>>      /* PreHSW required 512K alignment, HSW requires 16M */
>>> -    vma = i915_gem_object_ggtt_pin(bo, NULL, 0, SZ_16M, 0);
>>> +    vma = i915_vma_instance(bo, &gt->ggtt->vm, NULL);
>>>      if (IS_ERR(vma)) {
>>>          ret = PTR_ERR(vma);
>>>          goto err_unref;
>>>      }
>>> +
>>> +    /*
>>> +     * PreHSW required 512K alignment.
>>> +     * HSW and onwards, align to requested size of OA buffer.
>>> +     */
>>> +    ret = i915_vma_pin(vma, 0, SZ_16M, PIN_GLOBAL | PIN_HIGH);
>>> +    if (ret) {
>>> +        drm_err(&gt->i915->drm, "Failed to pin OA buffer %d\n", ret);
>>> +        goto err_unref;
>>> +    }
>>> +
>>>      stream->oa_buffer.vma = vma;
>>>      stream->oa_buffer.vaddr =
>>> @@ -1838,6 +1850,7 @@ static u32 *save_restore_register(struct 
>>> i915_perf_stream *stream, u32 *cs,
>>>  static int alloc_noa_wait(struct i915_perf_stream *stream)
>>>  {
>>>      struct drm_i915_private *i915 = stream->perf->i915;
>>> +    struct intel_gt *gt = stream->engine->gt;
>>>      struct drm_i915_gem_object *bo;
>>>      struct i915_vma *vma;
>>>      const u64 delay_ticks = 0xffffffffffffffff -
>>> @@ -1878,12 +1891,16 @@ static int alloc_noa_wait(struct 
>>> i915_perf_stream *stream)
>>>       * multiple OA config BOs will have a jump to this address and it
>>>       * needs to be fixed during the lifetime of the i915/perf stream.
>>>       */
>>> -    vma = i915_gem_object_ggtt_pin_ww(bo, &ww, NULL, 0, 0, PIN_HIGH);
>>> +    vma = i915_vma_instance(bo, &gt->ggtt->vm, NULL);
>>>      if (IS_ERR(vma)) {
>>>          ret = PTR_ERR(vma);
>>>          goto out_ww;
>>>      }
>>> +    ret = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_GLOBAL | PIN_HIGH);
>>> +    if (ret)
>>> +        goto out_ww;
>>> +
>>>      batch = cs = i915_gem_object_pin_map(bo, I915_MAP_WB);
>>>      if (IS_ERR(batch)) {
>>>          ret = PTR_ERR(batch);
>>
>>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 87b92d2946f4..f7621b45966c 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1765,6 +1765,7 @@  static void gen12_init_oa_buffer(struct i915_perf_stream *stream)
 static int alloc_oa_buffer(struct i915_perf_stream *stream)
 {
 	struct drm_i915_private *i915 = stream->perf->i915;
+	struct intel_gt *gt = stream->engine->gt;
 	struct drm_i915_gem_object *bo;
 	struct i915_vma *vma;
 	int ret;
@@ -1784,11 +1785,22 @@  static int alloc_oa_buffer(struct i915_perf_stream *stream)
 	i915_gem_object_set_cache_coherency(bo, I915_CACHE_LLC);
 
 	/* PreHSW required 512K alignment, HSW requires 16M */
-	vma = i915_gem_object_ggtt_pin(bo, NULL, 0, SZ_16M, 0);
+	vma = i915_vma_instance(bo, &gt->ggtt->vm, NULL);
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
 		goto err_unref;
 	}
+
+	/*
+	 * PreHSW required 512K alignment.
+	 * HSW and onwards, align to requested size of OA buffer.
+	 */
+	ret = i915_vma_pin(vma, 0, SZ_16M, PIN_GLOBAL | PIN_HIGH);
+	if (ret) {
+		drm_err(&gt->i915->drm, "Failed to pin OA buffer %d\n", ret);
+		goto err_unref;
+	}
+
 	stream->oa_buffer.vma = vma;
 
 	stream->oa_buffer.vaddr =
@@ -1838,6 +1850,7 @@  static u32 *save_restore_register(struct i915_perf_stream *stream, u32 *cs,
 static int alloc_noa_wait(struct i915_perf_stream *stream)
 {
 	struct drm_i915_private *i915 = stream->perf->i915;
+	struct intel_gt *gt = stream->engine->gt;
 	struct drm_i915_gem_object *bo;
 	struct i915_vma *vma;
 	const u64 delay_ticks = 0xffffffffffffffff -
@@ -1878,12 +1891,16 @@  static int alloc_noa_wait(struct i915_perf_stream *stream)
 	 * multiple OA config BOs will have a jump to this address and it
 	 * needs to be fixed during the lifetime of the i915/perf stream.
 	 */
-	vma = i915_gem_object_ggtt_pin_ww(bo, &ww, NULL, 0, 0, PIN_HIGH);
+	vma = i915_vma_instance(bo, &gt->ggtt->vm, NULL);
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
 		goto out_ww;
 	}
 
+	ret = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_GLOBAL | PIN_HIGH);
+	if (ret)
+		goto out_ww;
+
 	batch = cs = i915_gem_object_pin_map(bo, I915_MAP_WB);
 	if (IS_ERR(batch)) {
 		ret = PTR_ERR(batch);