[v2,02/15] drm/i915/dsb: DSB context creation.
diff mbox series

Message ID 20190821063236.19705-3-animesh.manna@intel.com
State New
Headers show
Series
  • DSB enablement.
Related show

Commit Message

Animesh Manna Aug. 21, 2019, 6:32 a.m. UTC
The function will internally get the gem buffer from global GTT
which is mapped in cpu domain to feed the data + opcode for DSB engine.

Cc: Imre Deak <imre.deak@intel.com>
Cc: Michel Thierry <michel.thierry@intel.com>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Animesh Manna <animesh.manna@intel.com>
---
 drivers/gpu/drm/i915/Makefile                 |  1 +
 .../drm/i915/display/intel_display_types.h    |  4 ++
 drivers/gpu/drm/i915/display/intel_dsb.c      | 66 +++++++++++++++++++
 drivers/gpu/drm/i915/display/intel_dsb.h      | 31 +++++++++
 drivers/gpu/drm/i915/i915_drv.h               |  1 +
 5 files changed, 103 insertions(+)
 create mode 100644 drivers/gpu/drm/i915/display/intel_dsb.c
 create mode 100644 drivers/gpu/drm/i915/display/intel_dsb.h

Comments

Chris Wilson Aug. 21, 2019, 6:11 p.m. UTC | #1
Quoting Animesh Manna (2019-08-21 07:32:22)
> The function will internally get the gem buffer from global GTT
> which is mapped in cpu domain to feed the data + opcode for DSB engine.
> 
> Cc: Imre Deak <imre.deak@intel.com>
> Cc: Michel Thierry <michel.thierry@intel.com>
> Cc: Jani Nikula <jani.nikula@intel.com>
> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> Signed-off-by: Animesh Manna <animesh.manna@intel.com>
> ---
>  drivers/gpu/drm/i915/Makefile                 |  1 +
>  .../drm/i915/display/intel_display_types.h    |  4 ++
>  drivers/gpu/drm/i915/display/intel_dsb.c      | 66 +++++++++++++++++++
>  drivers/gpu/drm/i915/display/intel_dsb.h      | 31 +++++++++
>  drivers/gpu/drm/i915/i915_drv.h               |  1 +
>  5 files changed, 103 insertions(+)
>  create mode 100644 drivers/gpu/drm/i915/display/intel_dsb.c
>  create mode 100644 drivers/gpu/drm/i915/display/intel_dsb.h
> 
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index 45add812048b..5232b2607822 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -171,6 +171,7 @@ i915-y += \
>         display/intel_display_power.o \
>         display/intel_dpio_phy.o \
>         display/intel_dpll_mgr.o \
> +       display/intel_dsb.o \
>         display/intel_fbc.o \
>         display/intel_fifo_underrun.o \
>         display/intel_frontbuffer.o \
> diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h
> index 449abaea619f..e62450a72dc2 100644
> --- a/drivers/gpu/drm/i915/display/intel_display_types.h
> +++ b/drivers/gpu/drm/i915/display/intel_display_types.h
> @@ -1026,6 +1026,10 @@ struct intel_crtc {
>  
>         /* scalers available on this crtc */
>         int num_scalers;
> +
> +       /* per pipe DSB related info */
> +       struct intel_dsb dsb[MAX_DSB_PER_PIPE];
> +       int dsb_in_use;
>  };
>  
>  struct intel_plane {
> diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c
> new file mode 100644
> index 000000000000..6cde3af30643
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/display/intel_dsb.c
> @@ -0,0 +1,66 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright © 2019 Intel Corporation
> + *
> + */
> +
> +#include "../i915_drv.h"
> +#include "intel_display_types.h"
> +
> +#define DSB_BUF_SIZE    (2 * PAGE_SIZE)
> +
> +struct intel_dsb *
> +intel_dsb_get(struct intel_crtc *crtc)
> +{
> +       struct drm_device *dev = crtc->base.dev;
> +       struct drm_i915_private *i915 = to_i915(dev);
> +       struct drm_i915_gem_object *obj;
> +       struct i915_vma *vma;
> +       struct intel_dsb *dsb;
> +       intel_wakeref_t wakeref;
> +       int i;
> +
> +       WARN_ON(crtc->dsb_in_use >= MAX_DSB_PER_PIPE);
> +
> +       for (i = 0; i < MAX_DSB_PER_PIPE; i++) {
> +               if (!crtc->dsb[i].cmd_buf) {
> +                       dsb = &crtc->dsb[i];
> +                       dsb->id = i;
> +               }
> +       }

That seems out of place.

> +
> +       dsb = &crtc->dsb[crtc->dsb_in_use];

What lock guards dsb_in_use?

> +       dsb->crtc = crtc;
> +       if (!HAS_DSB(i915))
> +               return dsb;
> +
> +       wakeref = intel_runtime_pm_get(&i915->runtime_pm);
> +       mutex_lock(&i915->drm.struct_mutex);
> +
> +       obj = i915_gem_object_create_shmem(i915, DSB_BUF_SIZE);

_internal not shmem, you never need to swap.

> +       if (IS_ERR(obj))
> +               goto err;
> +
> +       vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);

Only this (currently) still requires struct_mutex

Does it have to mappable? Is that the HW constraint?

> +       if (IS_ERR(vma)) {
> +               DRM_DEBUG_KMS("Vma creation failed.\n");
> +               i915_gem_object_put(obj);
> +               goto err;
> +       }
> +
> +       dsb->cmd_buf = i915_gem_object_pin_map(vma->obj, I915_MAP_WC);

Are you sure you WC? You spend more time reading it back than writing.

> +       if (IS_ERR(dsb->cmd_buf)) {
> +               DRM_DEBUG_KMS("Command buffer creation failed.\n");
> +               dsb->cmd_buf = NULL;
> +               goto err;
> +       }
> +       crtc->dsb_in_use++;
> +       dsb->cmd_buf_head = (uintptr_t)i915_ggtt_offset(vma);

Pardon? Do not even pretend an offset into the global virtual address of
the GPU is a CPU pointer. Just don't bother, you store the vma, so you
already have the offset stored.

> +       dsb->vma = vma;
> +
> +       memset(dsb->cmd_buf, 0, DSB_BUF_SIZE);

That's overkill.

> +err:
> +       mutex_unlock(&i915->drm.struct_mutex);
> +       intel_runtime_pm_put(&i915->runtime_pm, wakeref);

So what did you do here that required a wakeref?

> +       return dsb;
> +}

Include the complimentary teardown routine. That should be part of the
API you are presenting.
-Chris
Animesh Manna Aug. 22, 2019, 12:05 p.m. UTC | #2
Hi,


On 8/21/2019 11:41 PM, Chris Wilson wrote:
> Quoting Animesh Manna (2019-08-21 07:32:22)
>> The function will internally get the gem buffer from global GTT
>> which is mapped in cpu domain to feed the data + opcode for DSB engine.
>>
>> Cc: Imre Deak <imre.deak@intel.com>
>> Cc: Michel Thierry <michel.thierry@intel.com>
>> Cc: Jani Nikula <jani.nikula@intel.com>
>> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
>> Signed-off-by: Animesh Manna <animesh.manna@intel.com>
>> ---
>>   drivers/gpu/drm/i915/Makefile                 |  1 +
>>   .../drm/i915/display/intel_display_types.h    |  4 ++
>>   drivers/gpu/drm/i915/display/intel_dsb.c      | 66 +++++++++++++++++++
>>   drivers/gpu/drm/i915/display/intel_dsb.h      | 31 +++++++++
>>   drivers/gpu/drm/i915/i915_drv.h               |  1 +
>>   5 files changed, 103 insertions(+)
>>   create mode 100644 drivers/gpu/drm/i915/display/intel_dsb.c
>>   create mode 100644 drivers/gpu/drm/i915/display/intel_dsb.h
>>
>> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
>> index 45add812048b..5232b2607822 100644
>> --- a/drivers/gpu/drm/i915/Makefile
>> +++ b/drivers/gpu/drm/i915/Makefile
>> @@ -171,6 +171,7 @@ i915-y += \
>>          display/intel_display_power.o \
>>          display/intel_dpio_phy.o \
>>          display/intel_dpll_mgr.o \
>> +       display/intel_dsb.o \
>>          display/intel_fbc.o \
>>          display/intel_fifo_underrun.o \
>>          display/intel_frontbuffer.o \
>> diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h
>> index 449abaea619f..e62450a72dc2 100644
>> --- a/drivers/gpu/drm/i915/display/intel_display_types.h
>> +++ b/drivers/gpu/drm/i915/display/intel_display_types.h
>> @@ -1026,6 +1026,10 @@ struct intel_crtc {
>>   
>>          /* scalers available on this crtc */
>>          int num_scalers;
>> +
>> +       /* per pipe DSB related info */
>> +       struct intel_dsb dsb[MAX_DSB_PER_PIPE];
>> +       int dsb_in_use;
>>   };
>>   
>>   struct intel_plane {
>> diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c
>> new file mode 100644
>> index 000000000000..6cde3af30643
>> --- /dev/null
>> +++ b/drivers/gpu/drm/i915/display/intel_dsb.c
>> @@ -0,0 +1,66 @@
>> +// SPDX-License-Identifier: MIT
>> +/*
>> + * Copyright © 2019 Intel Corporation
>> + *
>> + */
>> +
>> +#include "../i915_drv.h"
>> +#include "intel_display_types.h"
>> +
>> +#define DSB_BUF_SIZE    (2 * PAGE_SIZE)
>> +
>> +struct intel_dsb *
>> +intel_dsb_get(struct intel_crtc *crtc)
>> +{
>> +       struct drm_device *dev = crtc->base.dev;
>> +       struct drm_i915_private *i915 = to_i915(dev);
>> +       struct drm_i915_gem_object *obj;
>> +       struct i915_vma *vma;
>> +       struct intel_dsb *dsb;
>> +       intel_wakeref_t wakeref;
>> +       int i;
>> +
>> +       WARN_ON(crtc->dsb_in_use >= MAX_DSB_PER_PIPE);
>> +
>> +       for (i = 0; i < MAX_DSB_PER_PIPE; i++) {
>> +               if (!crtc->dsb[i].cmd_buf) {
>> +                       dsb = &crtc->dsb[i];
>> +                       dsb->id = i;
>> +               }
>> +       }
> That seems out of place.

Yes, will fix it.
>
>> +
>> +       dsb = &crtc->dsb[crtc->dsb_in_use];
> What lock guards dsb_in_use?

Thanks for catching, need to add mutex_lock.
>
>> +       dsb->crtc = crtc;
>> +       if (!HAS_DSB(i915))
>> +               return dsb;
>> +
>> +       wakeref = intel_runtime_pm_get(&i915->runtime_pm);
>> +       mutex_lock(&i915->drm.struct_mutex);
>> +
>> +       obj = i915_gem_object_create_shmem(i915, DSB_BUF_SIZE);
> _internal not shmem, you never need to swap.

Will do.
>
>> +       if (IS_ERR(obj))
>> +               goto err;
>> +
>> +       vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
> Only this (currently) still requires struct_mutex

Sure will add.
>
> Does it have to mappable? Is that the HW constraint?

Yes, as per HW design need a cpu mapped buffer to write opcode+data from 
driver.
>
>> +       if (IS_ERR(vma)) {
>> +               DRM_DEBUG_KMS("Vma creation failed.\n");
>> +               i915_gem_object_put(obj);
>> +               goto err;
>> +       }
>> +
>> +       dsb->cmd_buf = i915_gem_object_pin_map(vma->obj, I915_MAP_WC);
> Are you sure you WC? You spend more time reading it back than writing.

DSB functionality is focused about buffer creation with opcode + data 
and execute it.
Currently not reading the buffer back.  Please suggest if need any 
improvement here.
>
>> +       if (IS_ERR(dsb->cmd_buf)) {
>> +               DRM_DEBUG_KMS("Command buffer creation failed.\n");
>> +               dsb->cmd_buf = NULL;
>> +               goto err;
>> +       }
>> +       crtc->dsb_in_use++;
>> +       dsb->cmd_buf_head = (uintptr_t)i915_ggtt_offset(vma);
> Pardon? Do not even pretend an offset into the global virtual address of
> the GPU is a CPU pointer. Just don't bother, you store the vma, so you
> already have the offset stored.

Got it, may not need cmd_buf_head separately. Will fix it.
>
>> +       dsb->vma = vma;
>> +
>> +       memset(dsb->cmd_buf, 0, DSB_BUF_SIZE);
> That's overkill.

ok, will remove, as DSB_TAIL_PTR is CACHELINE aligned so will fill the 
unused space with zero in intel_dsb_commit().
>
>> +err:
>> +       mutex_unlock(&i915->drm.struct_mutex);
>> +       intel_runtime_pm_put(&i915->runtime_pm, wakeref);
> So what did you do here that required a wakeref?

while referring some existing code of getting page from global gtt, I 
assumed that wakeref maybe needed.
If I am wrong, please let me know, will correct.
>
>> +       return dsb;
>> +}
> Include the complimentary teardown routine. That should be part of the
> API you are presenting.

Sure I can add intel_dsb_put function in this patch.
Thanks Chris for code review.

Regards,
Animesh
> -Chris
Chris Wilson Aug. 22, 2019, 12:09 p.m. UTC | #3
Quoting Animesh Manna (2019-08-22 13:05:06)
> Hi,
> 
> 
> On 8/21/2019 11:41 PM, Chris Wilson wrote:
> > Quoting Animesh Manna (2019-08-21 07:32:22)
> >> +       vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
> > Only this (currently) still requires struct_mutex
> 
> Sure will add.
> >
> > Does it have to mappable? Is that the HW constraint?
> 
> Yes, as per HW design need a cpu mapped buffer to write opcode+data from 
> driver.

PIN_MAPPABLE refers to the iomem aperture portion of the Global GTT (i.e.
the low 64-512MiB). You never use a GGTT mmap for your CPU access, so the
placement should be entirely dictated by the DSB requirements. If you
don't need to be in the low region, don't force it to be, so we have
less congestion for the objects that have to be placed in that region.
-Chris
Tvrtko Ursulin Oct. 17, 2019, 8:35 a.m. UTC | #4
On 22/08/2019 13:09, Chris Wilson wrote:
> Quoting Animesh Manna (2019-08-22 13:05:06)
>> Hi,
>>
>>
>> On 8/21/2019 11:41 PM, Chris Wilson wrote:
>>> Quoting Animesh Manna (2019-08-21 07:32:22)
>>>> +       vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
>>> Only this (currently) still requires struct_mutex
>>
>> Sure will add.
>>>
>>> Does it have to mappable? Is that the HW constraint?
>>
>> Yes, as per HW design need a cpu mapped buffer to write opcode+data from
>> driver.
> 
> PIN_MAPPABLE refers to the iomem aperture portion of the Global GTT (i.e.
> the low 64-512MiB). You never use a GGTT mmap for your CPU access, so the
> placement should be entirely dictated by the DSB requirements. If you
> don't need to be in the low region, don't force it to be, so we have
> less congestion for the objects that have to be placed in that region.

I was doing a mini audit of what uses the aperture these days and 
noticed this code has been merged in the meantime, but AFAICS this 
question from Chris hasn't been answered? At least not on the mailing 
list. So does it need to be in the aperture region or not?

Regards,

Tvrtko
Animesh Manna Oct. 17, 2019, 12:52 p.m. UTC | #5
On 10/17/2019 2:05 PM, Tvrtko Ursulin wrote:
>
> On 22/08/2019 13:09, Chris Wilson wrote:
>> Quoting Animesh Manna (2019-08-22 13:05:06)
>>> Hi,
>>>
>>>
>>> On 8/21/2019 11:41 PM, Chris Wilson wrote:
>>>> Quoting Animesh Manna (2019-08-21 07:32:22)
>>>>> +       vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 
>>>>> PIN_MAPPABLE);
>>>> Only this (currently) still requires struct_mutex
>>>
>>> Sure will add.
>>>>
>>>> Does it have to mappable? Is that the HW constraint?
>>>
>>> Yes, as per HW design need a cpu mapped buffer to write opcode+data 
>>> from
>>> driver.
>>
>> PIN_MAPPABLE refers to the iomem aperture portion of the Global GTT 
>> (i.e.
>> the low 64-512MiB). You never use a GGTT mmap for your CPU access, so 
>> the
>> placement should be entirely dictated by the DSB requirements. If you
>> don't need to be in the low region, don't force it to be, so we have
>> less congestion for the objects that have to be placed in that region.
>
> I was doing a mini audit of what uses the aperture these days and 
> noticed this code has been merged in the meantime, but AFAICS this 
> question from Chris hasn't been answered? At least not on the mailing 
> list. So does it need to be in the aperture region or not?

Hi,

Based on recommendation from H/w team used PIN_MAPPABLE, not very sure 
about internal details.

Regards,
Animesh
>
> Regards,
>
> Tvrtko
Tvrtko Ursulin Oct. 17, 2019, 1:09 p.m. UTC | #6
On 17/10/2019 13:52, Animesh Manna wrote:
> On 10/17/2019 2:05 PM, Tvrtko Ursulin wrote:
>> On 22/08/2019 13:09, Chris Wilson wrote:
>>> Quoting Animesh Manna (2019-08-22 13:05:06)
>>>> Hi,
>>>>
>>>>
>>>> On 8/21/2019 11:41 PM, Chris Wilson wrote:
>>>>> Quoting Animesh Manna (2019-08-21 07:32:22)
>>>>>> +       vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 
>>>>>> PIN_MAPPABLE);
>>>>> Only this (currently) still requires struct_mutex
>>>>
>>>> Sure will add.
>>>>>
>>>>> Does it have to mappable? Is that the HW constraint?
>>>>
>>>> Yes, as per HW design need a cpu mapped buffer to write opcode+data 
>>>> from
>>>> driver.
>>>
>>> PIN_MAPPABLE refers to the iomem aperture portion of the Global GTT 
>>> (i.e.
>>> the low 64-512MiB). You never use a GGTT mmap for your CPU access, so 
>>> the
>>> placement should be entirely dictated by the DSB requirements. If you
>>> don't need to be in the low region, don't force it to be, so we have
>>> less congestion for the objects that have to be placed in that region.
>>
>> I was doing a mini audit of what uses the aperture these days and 
>> noticed this code has been merged in the meantime, but AFAICS this 
>> question from Chris hasn't been answered? At least not on the mailing 
>> list. So does it need to be in the aperture region or not?
> 
> Hi,
> 
> Based on recommendation from H/w team used PIN_MAPPABLE, not very sure 
> about internal details.

What did the recommendation exactly say? That it has to be in GGTT or 
aperture?

Regards,

Tvrtko
Animesh Manna Oct. 17, 2019, 1:53 p.m. UTC | #7
On 10/17/2019 6:39 PM, Tvrtko Ursulin wrote:
>
> On 17/10/2019 13:52, Animesh Manna wrote:
>> On 10/17/2019 2:05 PM, Tvrtko Ursulin wrote:
>>> On 22/08/2019 13:09, Chris Wilson wrote:
>>>> Quoting Animesh Manna (2019-08-22 13:05:06)
>>>>> Hi,
>>>>>
>>>>>
>>>>> On 8/21/2019 11:41 PM, Chris Wilson wrote:
>>>>>> Quoting Animesh Manna (2019-08-21 07:32:22)
>>>>>>> +       vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 
>>>>>>> PIN_MAPPABLE);
>>>>>> Only this (currently) still requires struct_mutex
>>>>>
>>>>> Sure will add.
>>>>>>
>>>>>> Does it have to mappable? Is that the HW constraint?
>>>>>
>>>>> Yes, as per HW design need a cpu mapped buffer to write 
>>>>> opcode+data from
>>>>> driver.
>>>>
>>>> PIN_MAPPABLE refers to the iomem aperture portion of the Global GTT 
>>>> (i.e.
>>>> the low 64-512MiB). You never use a GGTT mmap for your CPU access, 
>>>> so the
>>>> placement should be entirely dictated by the DSB requirements. If you
>>>> don't need to be in the low region, don't force it to be, so we have
>>>> less congestion for the objects that have to be placed in that region.
>>>
>>> I was doing a mini audit of what uses the aperture these days and 
>>> noticed this code has been merged in the meantime, but AFAICS this 
>>> question from Chris hasn't been answered? At least not on the 
>>> mailing list. So does it need to be in the aperture region or not?
>>
>> Hi,
>>
>> Based on recommendation from H/w team used PIN_MAPPABLE, not very 
>> sure about internal details.
>
> What did the recommendation exactly say? That it has to be in GGTT or 
> aperture?

It said:
"GMM to allocate buffer from global GTT, get CPU mapped address as well 
(not stolen memory) ... ".

Regards,
Animesh

>
> Regards,
>
> Tvrtko
Tvrtko Ursulin Oct. 17, 2019, 2:38 p.m. UTC | #8
On 17/10/2019 14:53, Animesh Manna wrote:
> On 10/17/2019 6:39 PM, Tvrtko Ursulin wrote:
>> On 17/10/2019 13:52, Animesh Manna wrote:
>>> On 10/17/2019 2:05 PM, Tvrtko Ursulin wrote:
>>>> On 22/08/2019 13:09, Chris Wilson wrote:
>>>>> Quoting Animesh Manna (2019-08-22 13:05:06)
>>>>>> Hi,
>>>>>>
>>>>>>
>>>>>> On 8/21/2019 11:41 PM, Chris Wilson wrote:
>>>>>>> Quoting Animesh Manna (2019-08-21 07:32:22)
>>>>>>>> +       vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 
>>>>>>>> PIN_MAPPABLE);
>>>>>>> Only this (currently) still requires struct_mutex
>>>>>>
>>>>>> Sure will add.
>>>>>>>
>>>>>>> Does it have to mappable? Is that the HW constraint?
>>>>>>
>>>>>> Yes, as per HW design need a cpu mapped buffer to write 
>>>>>> opcode+data from
>>>>>> driver.
>>>>>
>>>>> PIN_MAPPABLE refers to the iomem aperture portion of the Global GTT 
>>>>> (i.e.
>>>>> the low 64-512MiB). You never use a GGTT mmap for your CPU access, 
>>>>> so the
>>>>> placement should be entirely dictated by the DSB requirements. If you
>>>>> don't need to be in the low region, don't force it to be, so we have
>>>>> less congestion for the objects that have to be placed in that region.
>>>>
>>>> I was doing a mini audit of what uses the aperture these days and 
>>>> noticed this code has been merged in the meantime, but AFAICS this 
>>>> question from Chris hasn't been answered? At least not on the 
>>>> mailing list. So does it need to be in the aperture region or not?
>>>
>>> Hi,
>>>
>>> Based on recommendation from H/w team used PIN_MAPPABLE, not very 
>>> sure about internal details.
>>
>> What did the recommendation exactly say? That it has to be in GGTT or 
>> aperture?
> 
> It said:
> "GMM to allocate buffer from global GTT, get CPU mapped address as well 
> (not stolen memory) ... ".

So it's possible you don't need PIN_MAPPABLE.

Do we have some test coverage for this? In other words if I send a patch 
which removes it, will we know if the feature is healthy?

Regards,

Tvrtko
Animesh Manna Oct. 21, 2019, 10:11 a.m. UTC | #9
On 10/17/2019 8:08 PM, Tvrtko Ursulin wrote:
>
> On 17/10/2019 14:53, Animesh Manna wrote:
>> On 10/17/2019 6:39 PM, Tvrtko Ursulin wrote:
>>> On 17/10/2019 13:52, Animesh Manna wrote:
>>>> On 10/17/2019 2:05 PM, Tvrtko Ursulin wrote:
>>>>> On 22/08/2019 13:09, Chris Wilson wrote:
>>>>>> Quoting Animesh Manna (2019-08-22 13:05:06)
>>>>>>> Hi,
>>>>>>>
>>>>>>>
>>>>>>> On 8/21/2019 11:41 PM, Chris Wilson wrote:
>>>>>>>> Quoting Animesh Manna (2019-08-21 07:32:22)
>>>>>>>>> +       vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 
>>>>>>>>> PIN_MAPPABLE);
>>>>>>>> Only this (currently) still requires struct_mutex
>>>>>>>
>>>>>>> Sure will add.
>>>>>>>>
>>>>>>>> Does it have to mappable? Is that the HW constraint?
>>>>>>>
>>>>>>> Yes, as per HW design need a cpu mapped buffer to write 
>>>>>>> opcode+data from
>>>>>>> driver.
>>>>>>
>>>>>> PIN_MAPPABLE refers to the iomem aperture portion of the Global 
>>>>>> GTT (i.e.
>>>>>> the low 64-512MiB). You never use a GGTT mmap for your CPU 
>>>>>> access, so the
>>>>>> placement should be entirely dictated by the DSB requirements. If 
>>>>>> you
>>>>>> don't need to be in the low region, don't force it to be, so we have
>>>>>> less congestion for the objects that have to be placed in that 
>>>>>> region.
>>>>>
>>>>> I was doing a mini audit of what uses the aperture these days and 
>>>>> noticed this code has been merged in the meantime, but AFAICS this 
>>>>> question from Chris hasn't been answered? At least not on the 
>>>>> mailing list. So does it need to be in the aperture region or not?
>>>>
>>>> Hi,
>>>>
>>>> Based on recommendation from H/w team used PIN_MAPPABLE, not very 
>>>> sure about internal details.
>>>
>>> What did the recommendation exactly say? That it has to be in GGTT 
>>> or aperture?
>>
>> It said:
>> "GMM to allocate buffer from global GTT, get CPU mapped address as 
>> well (not stolen memory) ... ".
>
> So it's possible you don't need PIN_MAPPABLE.

As per I remember from initial discussion from h/w team DSB is not cache 
coherent. Due to that we have used I915_MAP_WC during mapping the buffer 
and want to keep the buffer in aperture region.
Could not find the discussion thread as it was quiet old like around 
initial enablement of DSB feature.

>
>
> Do we have some test coverage for this? In other words if I send a 
> patch which removes it, will we know if the feature is healthy?

Gamma lut programming is enabled through DSB. kms_color igt is having 
the coverage for this.

Not sure cache coherence issue is easily reproducible or not, will go 
with expert's opinion.

Regards,
Animesh

>
> Regards,
>
> Tvrtko
Chris Wilson Oct. 21, 2019, 10:18 a.m. UTC | #10
Quoting Animesh Manna (2019-10-21 11:11:04)
> 
> 
> On 10/17/2019 8:08 PM, Tvrtko Ursulin wrote:
> >
> > On 17/10/2019 14:53, Animesh Manna wrote:
> >> On 10/17/2019 6:39 PM, Tvrtko Ursulin wrote:
> >>> On 17/10/2019 13:52, Animesh Manna wrote:
> >>>> On 10/17/2019 2:05 PM, Tvrtko Ursulin wrote:
> >>>>> On 22/08/2019 13:09, Chris Wilson wrote:
> >>>>>> Quoting Animesh Manna (2019-08-22 13:05:06)
> >>>>>>> Hi,
> >>>>>>>
> >>>>>>>
> >>>>>>> On 8/21/2019 11:41 PM, Chris Wilson wrote:
> >>>>>>>> Quoting Animesh Manna (2019-08-21 07:32:22)
> >>>>>>>>> +       vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 
> >>>>>>>>> PIN_MAPPABLE);
> >>>>>>>> Only this (currently) still requires struct_mutex
> >>>>>>>
> >>>>>>> Sure will add.
> >>>>>>>>
> >>>>>>>> Does it have to mappable? Is that the HW constraint?
> >>>>>>>
> >>>>>>> Yes, as per HW design need a cpu mapped buffer to write 
> >>>>>>> opcode+data from
> >>>>>>> driver.
> >>>>>>
> >>>>>> PIN_MAPPABLE refers to the iomem aperture portion of the Global 
> >>>>>> GTT (i.e.
> >>>>>> the low 64-512MiB). You never use a GGTT mmap for your CPU 
> >>>>>> access, so the
> >>>>>> placement should be entirely dictated by the DSB requirements. If 
> >>>>>> you
> >>>>>> don't need to be in the low region, don't force it to be, so we have
> >>>>>> less congestion for the objects that have to be placed in that 
> >>>>>> region.
> >>>>>
> >>>>> I was doing a mini audit of what uses the aperture these days and 
> >>>>> noticed this code has been merged in the meantime, but AFAICS this 
> >>>>> question from Chris hasn't been answered? At least not on the 
> >>>>> mailing list. So does it need to be in the aperture region or not?
> >>>>
> >>>> Hi,
> >>>>
> >>>> Based on recommendation from H/w team used PIN_MAPPABLE, not very 
> >>>> sure about internal details.
> >>>
> >>> What did the recommendation exactly say? That it has to be in GGTT 
> >>> or aperture?
> >>
> >> It said:
> >> "GMM to allocate buffer from global GTT, get CPU mapped address as 
> >> well (not stolen memory) ... ".
> >
> > So it's possible you don't need PIN_MAPPABLE.
> 
> As per I remember from initial discussion from h/w team DSB is not cache 
> coherent. Due to that we have used I915_MAP_WC during mapping the buffer 
> and want to keep the buffer in aperture region.

The mappable aperture has nothing to do with I915_MAP_WC which is a
processor attribute on its page tables. If only I mentioned we had a way
to flush caches!
-Chris

Patch
diff mbox series

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 45add812048b..5232b2607822 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -171,6 +171,7 @@  i915-y += \
 	display/intel_display_power.o \
 	display/intel_dpio_phy.o \
 	display/intel_dpll_mgr.o \
+	display/intel_dsb.o \
 	display/intel_fbc.o \
 	display/intel_fifo_underrun.o \
 	display/intel_frontbuffer.o \
diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h
index 449abaea619f..e62450a72dc2 100644
--- a/drivers/gpu/drm/i915/display/intel_display_types.h
+++ b/drivers/gpu/drm/i915/display/intel_display_types.h
@@ -1026,6 +1026,10 @@  struct intel_crtc {
 
 	/* scalers available on this crtc */
 	int num_scalers;
+
+	/* per pipe DSB related info */
+	struct intel_dsb dsb[MAX_DSB_PER_PIPE];
+	int dsb_in_use;
 };
 
 struct intel_plane {
diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c
new file mode 100644
index 000000000000..6cde3af30643
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_dsb.c
@@ -0,0 +1,66 @@ 
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2019 Intel Corporation
+ *
+ */
+
+#include "../i915_drv.h"
+#include "intel_display_types.h"
+
+#define DSB_BUF_SIZE    (2 * PAGE_SIZE)
+
+struct intel_dsb *
+intel_dsb_get(struct intel_crtc *crtc)
+{
+	struct drm_device *dev = crtc->base.dev;
+	struct drm_i915_private *i915 = to_i915(dev);
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
+	struct intel_dsb *dsb;
+	intel_wakeref_t wakeref;
+	int i;
+
+	WARN_ON(crtc->dsb_in_use >= MAX_DSB_PER_PIPE);
+
+	for (i = 0; i < MAX_DSB_PER_PIPE; i++) {
+		if (!crtc->dsb[i].cmd_buf) {
+			dsb = &crtc->dsb[i];
+			dsb->id = i;
+		}
+	}
+
+	dsb = &crtc->dsb[crtc->dsb_in_use];
+	dsb->crtc = crtc;
+	if (!HAS_DSB(i915))
+		return dsb;
+
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+	mutex_lock(&i915->drm.struct_mutex);
+
+	obj = i915_gem_object_create_shmem(i915, DSB_BUF_SIZE);
+	if (IS_ERR(obj))
+		goto err;
+
+	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
+	if (IS_ERR(vma)) {
+		DRM_DEBUG_KMS("Vma creation failed.\n");
+		i915_gem_object_put(obj);
+		goto err;
+	}
+
+	dsb->cmd_buf = i915_gem_object_pin_map(vma->obj, I915_MAP_WC);
+	if (IS_ERR(dsb->cmd_buf)) {
+		DRM_DEBUG_KMS("Command buffer creation failed.\n");
+		dsb->cmd_buf = NULL;
+		goto err;
+	}
+	crtc->dsb_in_use++;
+	dsb->cmd_buf_head = (uintptr_t)i915_ggtt_offset(vma);
+	dsb->vma = vma;
+
+	memset(dsb->cmd_buf, 0, DSB_BUF_SIZE);
+err:
+	mutex_unlock(&i915->drm.struct_mutex);
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
+	return dsb;
+}
diff --git a/drivers/gpu/drm/i915/display/intel_dsb.h b/drivers/gpu/drm/i915/display/intel_dsb.h
new file mode 100644
index 000000000000..50a2a6590a71
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_dsb.h
@@ -0,0 +1,31 @@ 
+/* SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef _INTEL_DSB_H
+#define _INTEL_DSB_H
+
+struct intel_crtc;
+struct i915_vma;
+
+enum dsb_id {
+	INVALID_DSB = -1,
+	DSB1,
+	DSB2,
+	DSB3,
+	MAX_DSB_PER_PIPE
+};
+
+struct intel_dsb {
+	struct intel_crtc *crtc;
+	enum dsb_id id;
+	u32 *cmd_buf;
+	u32 cmd_buf_head;
+	struct i915_vma *vma;
+};
+
+struct intel_dsb *
+intel_dsb_get(struct intel_crtc *crtc);
+
+#endif
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 9e03c162a5f8..643fd6d6fd73 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -67,6 +67,7 @@ 
 #include "display/intel_display.h"
 #include "display/intel_display_power.h"
 #include "display/intel_dpll_mgr.h"
+#include "display/intel_dsb.h"
 #include "display/intel_frontbuffer.h"
 #include "display/intel_gmbus.h"
 #include "display/intel_opregion.h"