diff mbox

[01/15] drm/i915: Add i915_gem_object_write() to i915_gem.c

Message ID 1434393394-21002-2-git-send-email-david.s.gordon@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Dave Gordon June 15, 2015, 6:36 p.m. UTC
From: Alex Dai <yu.dai@intel.com>

i915_gem_object_write() is a generic function to copy data from a plain
linear buffer to a paged gem object.

We will need this for the microcontroller firmware loading support code.

Issue: VIZ-4884
Signed-off-by: Alex Dai <yu.dai@intel.com>
Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h |    2 ++
 drivers/gpu/drm/i915/i915_gem.c |   28 ++++++++++++++++++++++++++++
 2 files changed, 30 insertions(+)

Comments

Chris Wilson June 15, 2015, 8:09 p.m. UTC | #1
On Mon, Jun 15, 2015 at 07:36:19PM +0100, Dave Gordon wrote:
> From: Alex Dai <yu.dai@intel.com>
> 
> i915_gem_object_write() is a generic function to copy data from a plain
> linear buffer to a paged gem object.
> 
> We will need this for the microcontroller firmware loading support code.
> 
> Issue: VIZ-4884
> Signed-off-by: Alex Dai <yu.dai@intel.com>
> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h |    2 ++
>  drivers/gpu/drm/i915/i915_gem.c |   28 ++++++++++++++++++++++++++++
>  2 files changed, 30 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 611fbd8..9094c06 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2713,6 +2713,8 @@ void *i915_gem_object_alloc(struct drm_device *dev);
>  void i915_gem_object_free(struct drm_i915_gem_object *obj);
>  void i915_gem_object_init(struct drm_i915_gem_object *obj,
>  			 const struct drm_i915_gem_object_ops *ops);
> +int i915_gem_object_write(struct drm_i915_gem_object *obj,
> +			  const void *data, size_t size);
>  struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
>  						  size_t size);
>  void i915_init_vm(struct drm_i915_private *dev_priv,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index be35f04..75d63c2 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -5392,3 +5392,31 @@ bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj)
>  	return false;
>  }
>  
> +/* Fill the @obj with the @size amount of @data */
> +int i915_gem_object_write(struct drm_i915_gem_object *obj,
> +			const void *data, size_t size)
> +{
> +	struct sg_table *sg;
> +	size_t bytes;
> +	int ret;
> +
> +	ret = i915_gem_object_get_pages(obj);
> +	if (ret)
> +		return ret;
> +
> +	i915_gem_object_pin_pages(obj);

You don't set the object into the CPU domain, or instead manually handle
the domain flushing. You don't handle objects that cannot be written
directly by the CPU, nor do you handle objects whose representation in
memory is not linear.
-Chris
Dave Gordon June 17, 2015, 7:23 a.m. UTC | #2
On 15/06/15 21:09, Chris Wilson wrote:
> On Mon, Jun 15, 2015 at 07:36:19PM +0100, Dave Gordon wrote:
>> From: Alex Dai <yu.dai@intel.com>
>>
>> i915_gem_object_write() is a generic function to copy data from a plain
>> linear buffer to a paged gem object.
>>
>> We will need this for the microcontroller firmware loading support code.
>>
>> Issue: VIZ-4884
>> Signed-off-by: Alex Dai <yu.dai@intel.com>
>> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
>> ---
>>  drivers/gpu/drm/i915/i915_drv.h |    2 ++
>>  drivers/gpu/drm/i915/i915_gem.c |   28 ++++++++++++++++++++++++++++
>>  2 files changed, 30 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index 611fbd8..9094c06 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -2713,6 +2713,8 @@ void *i915_gem_object_alloc(struct drm_device *dev);
>>  void i915_gem_object_free(struct drm_i915_gem_object *obj);
>>  void i915_gem_object_init(struct drm_i915_gem_object *obj,
>>  			 const struct drm_i915_gem_object_ops *ops);
>> +int i915_gem_object_write(struct drm_i915_gem_object *obj,
>> +			  const void *data, size_t size);
>>  struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
>>  						  size_t size);
>>  void i915_init_vm(struct drm_i915_private *dev_priv,
>> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
>> index be35f04..75d63c2 100644
>> --- a/drivers/gpu/drm/i915/i915_gem.c
>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>> @@ -5392,3 +5392,31 @@ bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj)
>>  	return false;
>>  }
>>  
>> +/* Fill the @obj with the @size amount of @data */
>> +int i915_gem_object_write(struct drm_i915_gem_object *obj,
>> +			const void *data, size_t size)
>> +{
>> +	struct sg_table *sg;
>> +	size_t bytes;
>> +	int ret;
>> +
>> +	ret = i915_gem_object_get_pages(obj);
>> +	if (ret)
>> +		return ret;
>> +
>> +	i915_gem_object_pin_pages(obj);
> 
> You don't set the object into the CPU domain, or instead manually handle
> the domain flushing. You don't handle objects that cannot be written
> directly by the CPU, nor do you handle objects whose representation in
> memory is not linear.
> -Chris

No we don't handle just any random gem object, but we do return an error
code for any types not supported. However, as we don't really need the
full generality of writing into a gem object of any type, I will replace
this function with one that combines the allocation of a new object
(which will therefore definitely be of the correct type, in the correct
domain, etc) and filling it with the data to be preserved.

Bikeshedding over the name of such function welcome :)

.Dave.
Daniel Vetter June 17, 2015, 12:02 p.m. UTC | #3
On Wed, Jun 17, 2015 at 08:23:40AM +0100, Dave Gordon wrote:
> On 15/06/15 21:09, Chris Wilson wrote:
> > On Mon, Jun 15, 2015 at 07:36:19PM +0100, Dave Gordon wrote:
> >> From: Alex Dai <yu.dai@intel.com>
> >>
> >> i915_gem_object_write() is a generic function to copy data from a plain
> >> linear buffer to a paged gem object.
> >>
> >> We will need this for the microcontroller firmware loading support code.
> >>
> >> Issue: VIZ-4884
> >> Signed-off-by: Alex Dai <yu.dai@intel.com>
> >> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
> >> ---
> >>  drivers/gpu/drm/i915/i915_drv.h |    2 ++
> >>  drivers/gpu/drm/i915/i915_gem.c |   28 ++++++++++++++++++++++++++++
> >>  2 files changed, 30 insertions(+)
> >>
> >> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> >> index 611fbd8..9094c06 100644
> >> --- a/drivers/gpu/drm/i915/i915_drv.h
> >> +++ b/drivers/gpu/drm/i915/i915_drv.h
> >> @@ -2713,6 +2713,8 @@ void *i915_gem_object_alloc(struct drm_device *dev);
> >>  void i915_gem_object_free(struct drm_i915_gem_object *obj);
> >>  void i915_gem_object_init(struct drm_i915_gem_object *obj,
> >>  			 const struct drm_i915_gem_object_ops *ops);
> >> +int i915_gem_object_write(struct drm_i915_gem_object *obj,
> >> +			  const void *data, size_t size);
> >>  struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
> >>  						  size_t size);
> >>  void i915_init_vm(struct drm_i915_private *dev_priv,
> >> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> >> index be35f04..75d63c2 100644
> >> --- a/drivers/gpu/drm/i915/i915_gem.c
> >> +++ b/drivers/gpu/drm/i915/i915_gem.c
> >> @@ -5392,3 +5392,31 @@ bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj)
> >>  	return false;
> >>  }
> >>  
> >> +/* Fill the @obj with the @size amount of @data */
> >> +int i915_gem_object_write(struct drm_i915_gem_object *obj,
> >> +			const void *data, size_t size)
> >> +{
> >> +	struct sg_table *sg;
> >> +	size_t bytes;
> >> +	int ret;
> >> +
> >> +	ret = i915_gem_object_get_pages(obj);
> >> +	if (ret)
> >> +		return ret;
> >> +
> >> +	i915_gem_object_pin_pages(obj);
> > 
> > You don't set the object into the CPU domain, or instead manually handle
> > the domain flushing. You don't handle objects that cannot be written
> > directly by the CPU, nor do you handle objects whose representation in
> > memory is not linear.
> > -Chris
> 
> No we don't handle just any random gem object, but we do return an error
> code for any types not supported. However, as we don't really need the
> full generality of writing into a gem object of any type, I will replace
> this function with one that combines the allocation of a new object
> (which will therefore definitely be of the correct type, in the correct
> domain, etc) and filling it with the data to be preserved.

Domain handling is required for all gem objects, and the resulting bugs if
you don't for one-off objects are absolutely no fun to track down.
-Daniel
Dave Gordon June 18, 2015, 11:49 a.m. UTC | #4
On 17/06/15 13:02, Daniel Vetter wrote:
> On Wed, Jun 17, 2015 at 08:23:40AM +0100, Dave Gordon wrote:
>> On 15/06/15 21:09, Chris Wilson wrote:
>>> On Mon, Jun 15, 2015 at 07:36:19PM +0100, Dave Gordon wrote:
>>>> From: Alex Dai <yu.dai@intel.com>
>>>>
>>>> i915_gem_object_write() is a generic function to copy data from a plain
>>>> linear buffer to a paged gem object.
>>>>
>>>> We will need this for the microcontroller firmware loading support code.
>>>>
>>>> Issue: VIZ-4884
>>>> Signed-off-by: Alex Dai <yu.dai@intel.com>
>>>> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
>>>> ---
>>>>  drivers/gpu/drm/i915/i915_drv.h |    2 ++
>>>>  drivers/gpu/drm/i915/i915_gem.c |   28 ++++++++++++++++++++++++++++
>>>>  2 files changed, 30 insertions(+)
>>>>
>>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>>>> index 611fbd8..9094c06 100644
>>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>>> @@ -2713,6 +2713,8 @@ void *i915_gem_object_alloc(struct drm_device *dev);
>>>>  void i915_gem_object_free(struct drm_i915_gem_object *obj);
>>>>  void i915_gem_object_init(struct drm_i915_gem_object *obj,
>>>>  			 const struct drm_i915_gem_object_ops *ops);
>>>> +int i915_gem_object_write(struct drm_i915_gem_object *obj,
>>>> +			  const void *data, size_t size);
>>>>  struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
>>>>  						  size_t size);
>>>>  void i915_init_vm(struct drm_i915_private *dev_priv,
>>>> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
>>>> index be35f04..75d63c2 100644
>>>> --- a/drivers/gpu/drm/i915/i915_gem.c
>>>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>>>> @@ -5392,3 +5392,31 @@ bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj)
>>>>  	return false;
>>>>  }
>>>>  
>>>> +/* Fill the @obj with the @size amount of @data */
>>>> +int i915_gem_object_write(struct drm_i915_gem_object *obj,
>>>> +			const void *data, size_t size)
>>>> +{
>>>> +	struct sg_table *sg;
>>>> +	size_t bytes;
>>>> +	int ret;
>>>> +
>>>> +	ret = i915_gem_object_get_pages(obj);
>>>> +	if (ret)
>>>> +		return ret;
>>>> +
>>>> +	i915_gem_object_pin_pages(obj);
>>>
>>> You don't set the object into the CPU domain, or instead manually handle
>>> the domain flushing. You don't handle objects that cannot be written
>>> directly by the CPU, nor do you handle objects whose representation in
>>> memory is not linear.
>>> -Chris
>>
>> No we don't handle just any random gem object, but we do return an error
>> code for any types not supported. However, as we don't really need the
>> full generality of writing into a gem object of any type, I will replace
>> this function with one that combines the allocation of a new object
>> (which will therefore definitely be of the correct type, in the correct
>> domain, etc) and filling it with the data to be preserved.

The usage pattern for the particular case is going to be:
	Once-only:
		Allocate
		Fill
	Then each time GuC is (re-)initialised:
		Map to GTT
		DMA-read from buffer into GuC private memory
		Unmap
	Only on unload:
		Dispose

So our object is write-once by the CPU (and that's always the first
operation), thereafter read-occasionally by the GuC's DMA engine.

> Domain handling is required for all gem objects, and the resulting bugs if
> you don't for one-off objects are absolutely no fun to track down.
> -Daniel

Is it not the case that the new object returned by
i915_gem_alloc_object() is
(a) of a type that can be mapped into the GTT, and
(b) initially in the CPU domain for both reading and writing?

So AFAICS the allocate-and-fill function I'm describing (to appear in
next patch series respin) doesn't need any further domain handling.

.Dave.
Chris Wilson June 18, 2015, 12:10 p.m. UTC | #5
On Thu, Jun 18, 2015 at 12:49:55PM +0100, Dave Gordon wrote:
> On 17/06/15 13:02, Daniel Vetter wrote:
> > Domain handling is required for all gem objects, and the resulting bugs if
> > you don't for one-off objects are absolutely no fun to track down.
> 
> Is it not the case that the new object returned by
> i915_gem_alloc_object() is
> (a) of a type that can be mapped into the GTT, and
> (b) initially in the CPU domain for both reading and writing?
> 
> So AFAICS the allocate-and-fill function I'm describing (to appear in
> next patch series respin) doesn't need any further domain handling.

A i915_gem_object_create_from_data() is a reasonable addition, and I
suspect it will make the code a bit more succinct.

Whilst your statement is true today, calling set_domain is then a no-op,
and helps document how you use the object and so reduces the likelihood
of us introducing bugs in the future.
-Chris
Daniel Vetter June 18, 2015, 2:31 p.m. UTC | #6
On Thu, Jun 18, 2015 at 12:49:55PM +0100, Dave Gordon wrote:
> On 17/06/15 13:02, Daniel Vetter wrote:
> > On Wed, Jun 17, 2015 at 08:23:40AM +0100, Dave Gordon wrote:
> >> On 15/06/15 21:09, Chris Wilson wrote:
> >>> On Mon, Jun 15, 2015 at 07:36:19PM +0100, Dave Gordon wrote:
> >>>> From: Alex Dai <yu.dai@intel.com>
> >>>>
> >>>> i915_gem_object_write() is a generic function to copy data from a plain
> >>>> linear buffer to a paged gem object.
> >>>>
> >>>> We will need this for the microcontroller firmware loading support code.
> >>>>
> >>>> Issue: VIZ-4884
> >>>> Signed-off-by: Alex Dai <yu.dai@intel.com>
> >>>> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
> >>>> ---
> >>>>  drivers/gpu/drm/i915/i915_drv.h |    2 ++
> >>>>  drivers/gpu/drm/i915/i915_gem.c |   28 ++++++++++++++++++++++++++++
> >>>>  2 files changed, 30 insertions(+)
> >>>>
> >>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> >>>> index 611fbd8..9094c06 100644
> >>>> --- a/drivers/gpu/drm/i915/i915_drv.h
> >>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
> >>>> @@ -2713,6 +2713,8 @@ void *i915_gem_object_alloc(struct drm_device *dev);
> >>>>  void i915_gem_object_free(struct drm_i915_gem_object *obj);
> >>>>  void i915_gem_object_init(struct drm_i915_gem_object *obj,
> >>>>  			 const struct drm_i915_gem_object_ops *ops);
> >>>> +int i915_gem_object_write(struct drm_i915_gem_object *obj,
> >>>> +			  const void *data, size_t size);
> >>>>  struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
> >>>>  						  size_t size);
> >>>>  void i915_init_vm(struct drm_i915_private *dev_priv,
> >>>> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> >>>> index be35f04..75d63c2 100644
> >>>> --- a/drivers/gpu/drm/i915/i915_gem.c
> >>>> +++ b/drivers/gpu/drm/i915/i915_gem.c
> >>>> @@ -5392,3 +5392,31 @@ bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj)
> >>>>  	return false;
> >>>>  }
> >>>>  
> >>>> +/* Fill the @obj with the @size amount of @data */
> >>>> +int i915_gem_object_write(struct drm_i915_gem_object *obj,
> >>>> +			const void *data, size_t size)
> >>>> +{
> >>>> +	struct sg_table *sg;
> >>>> +	size_t bytes;
> >>>> +	int ret;
> >>>> +
> >>>> +	ret = i915_gem_object_get_pages(obj);
> >>>> +	if (ret)
> >>>> +		return ret;
> >>>> +
> >>>> +	i915_gem_object_pin_pages(obj);
> >>>
> >>> You don't set the object into the CPU domain, or instead manually handle
> >>> the domain flushing. You don't handle objects that cannot be written
> >>> directly by the CPU, nor do you handle objects whose representation in
> >>> memory is not linear.
> >>> -Chris
> >>
> >> No we don't handle just any random gem object, but we do return an error
> >> code for any types not supported. However, as we don't really need the
> >> full generality of writing into a gem object of any type, I will replace
> >> this function with one that combines the allocation of a new object
> >> (which will therefore definitely be of the correct type, in the correct
> >> domain, etc) and filling it with the data to be preserved.
> 
> The usage pattern for the particular case is going to be:
> 	Once-only:
> 		Allocate
> 		Fill
> 	Then each time GuC is (re-)initialised:
> 		Map to GTT
> 		DMA-read from buffer into GuC private memory
> 		Unmap
> 	Only on unload:
> 		Dispose
> 
> So our object is write-once by the CPU (and that's always the first
> operation), thereafter read-occasionally by the GuC's DMA engine.

Yup. The problem is more that on atom platforms the objects aren't
coherent by default and generally you need to do something. Hence we
either have
- an explicit set_caching call to document that this is a gpu object which
  is always coherent (so also on chv/bxt), even when that's a no-op on big
  core
- or wrap everything in set_domain calls, even when those are no-ops too.

If either of those lack, reviews tend to freak out preemptively and the
reptil brain takes over ;-)

Cheers, Daniel
Dave Gordon June 18, 2015, 6:07 p.m. UTC | #7
On 18/06/15 13:10, Chris Wilson wrote:
> On Thu, Jun 18, 2015 at 12:49:55PM +0100, Dave Gordon wrote:
>> On 17/06/15 13:02, Daniel Vetter wrote:
>>> Domain handling is required for all gem objects, and the resulting bugs if
>>> you don't for one-off objects are absolutely no fun to track down.
>>
>> Is it not the case that the new object returned by
>> i915_gem_alloc_object() is
>> (a) of a type that can be mapped into the GTT, and
>> (b) initially in the CPU domain for both reading and writing?
>>
>> So AFAICS the allocate-and-fill function I'm describing (to appear in
>> next patch series respin) doesn't need any further domain handling.
> 
> A i915_gem_object_create_from_data() is a reasonable addition, and I
> suspect it will make the code a bit more succinct.

I shall adopt this name for it :)

> Whilst your statement is true today, calling set_domain is then a no-op,
> and helps document how you use the object and so reduces the likelihood
> of us introducing bugs in the future.
> -Chris

So here's the new function ... where should the set-to-cpu-domain go?
After the pin_pages and before the sg_copy_from_buffer?

/* Allocate a new GEM object and fill it with the supplied data */
struct drm_i915_gem_object *
i915_gem_object_create_from_data(struct drm_device *dev,
                                 const void *data, size_t size)
{
        struct drm_i915_gem_object *obj;
        struct sg_table *sg;
        size_t bytes;
        int ret;

        obj = i915_gem_alloc_object(dev, round_up(size, PAGE_SIZE));
        if (!obj)
                return NULL;

        ret = i915_gem_object_get_pages(obj);
        if (ret)
                goto fail;

        i915_gem_object_pin_pages(obj);
        sg = obj->pages;
        bytes = sg_copy_from_buffer(sg->sgl, sg->nents, (void *)data, size);
        i915_gem_object_unpin_pages(obj);

        if (WARN_ON(bytes != size)) {
                DRM_ERROR("Incomplete copy, wrote %zu of %zu", bytes, size);
                goto fail;
        }

        return obj;

fail:
        drm_gem_object_unreference(&obj->base);
        return NULL;
}

.Dave.
Dave Gordon June 18, 2015, 6:28 p.m. UTC | #8
On 18/06/15 15:31, Daniel Vetter wrote:
> On Thu, Jun 18, 2015 at 12:49:55PM +0100, Dave Gordon wrote:
>> On 17/06/15 13:02, Daniel Vetter wrote:
>>> On Wed, Jun 17, 2015 at 08:23:40AM +0100, Dave Gordon wrote:
>>>> On 15/06/15 21:09, Chris Wilson wrote:
>>>>> On Mon, Jun 15, 2015 at 07:36:19PM +0100, Dave Gordon wrote:
>>>>>> From: Alex Dai <yu.dai@intel.com>
>>>>>>
>>>>>> i915_gem_object_write() is a generic function to copy data from a plain
>>>>>> linear buffer to a paged gem object.
>>>>>>
>>>>>> We will need this for the microcontroller firmware loading support code.
>>>>>>
>>>>>> Issue: VIZ-4884
>>>>>> Signed-off-by: Alex Dai <yu.dai@intel.com>
>>>>>> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
>>>>>> ---
>>>>>>  drivers/gpu/drm/i915/i915_drv.h |    2 ++
>>>>>>  drivers/gpu/drm/i915/i915_gem.c |   28 ++++++++++++++++++++++++++++
>>>>>>  2 files changed, 30 insertions(+)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>>>>>> index 611fbd8..9094c06 100644
>>>>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>>>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>>>>> @@ -2713,6 +2713,8 @@ void *i915_gem_object_alloc(struct drm_device *dev);
>>>>>>  void i915_gem_object_free(struct drm_i915_gem_object *obj);
>>>>>>  void i915_gem_object_init(struct drm_i915_gem_object *obj,
>>>>>>  			 const struct drm_i915_gem_object_ops *ops);
>>>>>> +int i915_gem_object_write(struct drm_i915_gem_object *obj,
>>>>>> +			  const void *data, size_t size);
>>>>>>  struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
>>>>>>  						  size_t size);
>>>>>>  void i915_init_vm(struct drm_i915_private *dev_priv,
>>>>>> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
>>>>>> index be35f04..75d63c2 100644
>>>>>> --- a/drivers/gpu/drm/i915/i915_gem.c
>>>>>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>>>>>> @@ -5392,3 +5392,31 @@ bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj)
>>>>>>  	return false;
>>>>>>  }
>>>>>>  
>>>>>> +/* Fill the @obj with the @size amount of @data */
>>>>>> +int i915_gem_object_write(struct drm_i915_gem_object *obj,
>>>>>> +			const void *data, size_t size)
>>>>>> +{
>>>>>> +	struct sg_table *sg;
>>>>>> +	size_t bytes;
>>>>>> +	int ret;
>>>>>> +
>>>>>> +	ret = i915_gem_object_get_pages(obj);
>>>>>> +	if (ret)
>>>>>> +		return ret;
>>>>>> +
>>>>>> +	i915_gem_object_pin_pages(obj);
>>>>>
>>>>> You don't set the object into the CPU domain, or instead manually handle
>>>>> the domain flushing. You don't handle objects that cannot be written
>>>>> directly by the CPU, nor do you handle objects whose representation in
>>>>> memory is not linear.
>>>>> -Chris
>>>>
>>>> No we don't handle just any random gem object, but we do return an error
>>>> code for any types not supported. However, as we don't really need the
>>>> full generality of writing into a gem object of any type, I will replace
>>>> this function with one that combines the allocation of a new object
>>>> (which will therefore definitely be of the correct type, in the correct
>>>> domain, etc) and filling it with the data to be preserved.
>>
>> The usage pattern for the particular case is going to be:
>> 	Once-only:
>> 		Allocate
>> 		Fill
>> 	Then each time GuC is (re-)initialised:
>> 		Map to GTT
>> 		DMA-read from buffer into GuC private memory
>> 		Unmap
>> 	Only on unload:
>> 		Dispose
>>
>> So our object is write-once by the CPU (and that's always the first
>> operation), thereafter read-occasionally by the GuC's DMA engine.
> 
> Yup. The problem is more that on atom platforms the objects aren't
> coherent by default and generally you need to do something. Hence we
> either have
> - an explicit set_caching call to document that this is a gpu object which
>   is always coherent (so also on chv/bxt), even when that's a no-op on big
>   core
> - or wrap everything in set_domain calls, even when those are no-ops too.
> 
> If either of those lack, reviews tend to freak out preemptively and the
> reptil brain takes over ;-)
> 
> Cheers, Daniel

We don't need "coherency" as such. The buffer is filled (once only) by
the CPU (so I should put a set-to-cpu-domain between the allocate and
fill stages?) Once it's filled, the CPU need not read or write it ever
again.

Then before the DMA engine accesses it, we call i915_gem_obj_ggtt_pin,
which I'm assuming will take care of any coherency issues (making sure
the data written by the CPU is now visible to the DMA engine) when it
puts the buffer into the GTT-readable domain. Is that not sufficient?

.Dave.
Chris Wilson June 19, 2015, 8:44 a.m. UTC | #9
On Thu, Jun 18, 2015 at 07:07:46PM +0100, Dave Gordon wrote:
> On 18/06/15 13:10, Chris Wilson wrote:
> > On Thu, Jun 18, 2015 at 12:49:55PM +0100, Dave Gordon wrote:
> >> On 17/06/15 13:02, Daniel Vetter wrote:
> >>> Domain handling is required for all gem objects, and the resulting bugs if
> >>> you don't for one-off objects are absolutely no fun to track down.
> >>
> >> Is it not the case that the new object returned by
> >> i915_gem_alloc_object() is
> >> (a) of a type that can be mapped into the GTT, and
> >> (b) initially in the CPU domain for both reading and writing?
> >>
> >> So AFAICS the allocate-and-fill function I'm describing (to appear in
> >> next patch series respin) doesn't need any further domain handling.
> > 
> > A i915_gem_object_create_from_data() is a reasonable addition, and I
> > suspect it will make the code a bit more succinct.
> 
> I shall adopt this name for it :)
> 
> > Whilst your statement is true today, calling set_domain is then a no-op,
> > and helps document how you use the object and so reduces the likelihood
> > of us introducing bugs in the future.
> > -Chris
> 
> So here's the new function ... where should the set-to-cpu-domain go?
> After the pin_pages and before the sg_copy_from_buffer?

Either, since the domain will not change whilst you have the lock,
but if you do it before get_pages() you will have a slightly easier
error path.

Part of the reason why I want a function like this is so that I can
replace it with a stolen object and so need to write the data through a
temporary GGTT mapping. Speak now if you need more flags to the function
to prevent certain classes of objects being created.
-Chris
Dave Gordon June 22, 2015, 11:59 a.m. UTC | #10
On 19/06/15 09:44, Chris Wilson wrote:
> On Thu, Jun 18, 2015 at 07:07:46PM +0100, Dave Gordon wrote:
>> On 18/06/15 13:10, Chris Wilson wrote:
>>> On Thu, Jun 18, 2015 at 12:49:55PM +0100, Dave Gordon wrote:
>>>> On 17/06/15 13:02, Daniel Vetter wrote:
>>>>> Domain handling is required for all gem objects, and the resulting bugs if
>>>>> you don't for one-off objects are absolutely no fun to track down.
>>>>
>>>> Is it not the case that the new object returned by
>>>> i915_gem_alloc_object() is
>>>> (a) of a type that can be mapped into the GTT, and
>>>> (b) initially in the CPU domain for both reading and writing?
>>>>
>>>> So AFAICS the allocate-and-fill function I'm describing (to appear in
>>>> next patch series respin) doesn't need any further domain handling.
>>>
>>> A i915_gem_object_create_from_data() is a reasonable addition, and I
>>> suspect it will make the code a bit more succinct.
>>
>> I shall adopt this name for it :)
>>
>>> Whilst your statement is true today, calling set_domain is then a no-op,
>>> and helps document how you use the object and so reduces the likelihood
>>> of us introducing bugs in the future.
>>> -Chris
>>
>> So here's the new function ... where should the set-to-cpu-domain go?
>> After the pin_pages and before the sg_copy_from_buffer?
> 
> Either, since the domain will not change whilst you have the lock,
> but if you do it before get_pages() you will have a slightly easier
> error path.

OK, call to i915_gem_object_set_to_cpu_domain(obj, true) added right
after the i915_gem_alloc_object(); also, since we now have multiple
failure paths where the ability to distinguish them might be useful (and
since this function is a public addition to the gem_object repertoire),
I've made it return object-or-error-code, with the incomplete-copy case
returning ERR_PTR(-EIO).

> Part of the reason why I want a function like this is so that I can
> replace it with a stolen object and so need to write the data through a
> temporary GGTT mapping. Speak now if you need more flags to the function
> to prevent certain classes of objects being created.
> -Chris

For the GuC, the firmware image is written once by the CPU and
thereafter read only by the DMA engine via a GGTT address; other uC
devices might have different requirements e.g. the CSR/DMC doesn't have
a DMA engine AFAIK and the f/w is transferred to the h/w via MMIO writes
by the CPU. The primary reason for storing the image in a GEM object
(rather than kmalloc'd space, as the DMC loader does) is to make it
pageable; it's needed multiple times, as we have to reload the f/w after
reset or on exit from low-power states, but not used the rest of the
time. So the existing objects seem a good match.

The GuC's pool and log objects, OTOH, must be permanently resident in
RAM and permanently mapped via the GGTT above GUC_WOPCM_OFFSET. So for
these it would be useful to have an allocator that *didn't* make the
object shmfs-backed.

.Dave.
Chris Wilson June 22, 2015, 12:37 p.m. UTC | #11
On Mon, Jun 22, 2015 at 12:59:00PM +0100, Dave Gordon wrote:
> On 19/06/15 09:44, Chris Wilson wrote:
> > On Thu, Jun 18, 2015 at 07:07:46PM +0100, Dave Gordon wrote:
> >> On 18/06/15 13:10, Chris Wilson wrote:
> >>> On Thu, Jun 18, 2015 at 12:49:55PM +0100, Dave Gordon wrote:
> >>>> On 17/06/15 13:02, Daniel Vetter wrote:
> >>>>> Domain handling is required for all gem objects, and the resulting bugs if
> >>>>> you don't for one-off objects are absolutely no fun to track down.
> >>>>
> >>>> Is it not the case that the new object returned by
> >>>> i915_gem_alloc_object() is
> >>>> (a) of a type that can be mapped into the GTT, and
> >>>> (b) initially in the CPU domain for both reading and writing?
> >>>>
> >>>> So AFAICS the allocate-and-fill function I'm describing (to appear in
> >>>> next patch series respin) doesn't need any further domain handling.
> >>>
> >>> A i915_gem_object_create_from_data() is a reasonable addition, and I
> >>> suspect it will make the code a bit more succinct.
> >>
> >> I shall adopt this name for it :)
> >>
> >>> Whilst your statement is true today, calling set_domain is then a no-op,
> >>> and helps document how you use the object and so reduces the likelihood
> >>> of us introducing bugs in the future.
> >>> -Chris
> >>
> >> So here's the new function ... where should the set-to-cpu-domain go?
> >> After the pin_pages and before the sg_copy_from_buffer?
> > 
> > Either, since the domain will not change whilst you have the lock,
> > but if you do it before get_pages() you will have a slightly easier
> > error path.
> 
> OK, call to i915_gem_object_set_to_cpu_domain(obj, true) added right
> after the i915_gem_alloc_object(); also, since we now have multiple
> failure paths where the ability to distinguish them might be useful (and
> since this function is a public addition to the gem_object repertoire),
> I've made it return object-or-error-code, with the incomplete-copy case
> returning ERR_PTR(-EIO).

I'd stick to only using EIO when we have a GPU failure. Incomplete copy
is EFAULT.

> > Part of the reason why I want a function like this is so that I can
> > replace it with a stolen object and so need to write the data through a
> > temporary GGTT mapping. Speak now if you need more flags to the function
> > to prevent certain classes of objects being created.
> > -Chris
> 
> For the GuC, the firmware image is written once by the CPU and
> thereafter read only by the DMA engine via a GGTT address; other uC
> devices might have different requirements e.g. the CSR/DMC doesn't have
> a DMA engine AFAIK and the f/w is transferred to the h/w via MMIO writes
> by the CPU. The primary reason for storing the image in a GEM object
> (rather than kmalloc'd space, as the DMC loader does) is to make it
> pageable; it's needed multiple times, as we have to reload the f/w after
> reset or on exit from low-power states, but not used the rest of the
> time. So the existing objects seem a good match.
> 
> The GuC's pool and log objects, OTOH, must be permanently resident in
> RAM and permanently mapped via the GGTT above GUC_WOPCM_OFFSET. So for
> these it would be useful to have an allocator that *didn't* make the
> object shmfs-backed.

http://cgit.freedesktop.org/~ickle/linux-2.6/commit/?h=nightly&id=3f74a251aa9a3e5c1215226f7857ed53693c563f

Though I want to use stolen as much as is practically possible, however
without direct CPU access, stolen is very much an idle fancy.
-Chris
Dave Gordon June 23, 2015, 4:54 p.m. UTC | #12
On 22/06/15 13:37, Chris Wilson wrote:
> On Mon, Jun 22, 2015 at 12:59:00PM +0100, Dave Gordon wrote:
>> On 19/06/15 09:44, Chris Wilson wrote:
>>> On Thu, Jun 18, 2015 at 07:07:46PM +0100, Dave Gordon wrote:
>>>> On 18/06/15 13:10, Chris Wilson wrote:
>>>>> On Thu, Jun 18, 2015 at 12:49:55PM +0100, Dave Gordon wrote:
>>>>>> On 17/06/15 13:02, Daniel Vetter wrote:
>>>>>>> Domain handling is required for all gem objects, and the resulting bugs if
>>>>>>> you don't for one-off objects are absolutely no fun to track down.
>>>>>>
>>>>>> Is it not the case that the new object returned by
>>>>>> i915_gem_alloc_object() is
>>>>>> (a) of a type that can be mapped into the GTT, and
>>>>>> (b) initially in the CPU domain for both reading and writing?
>>>>>>
>>>>>> So AFAICS the allocate-and-fill function I'm describing (to appear in
>>>>>> next patch series respin) doesn't need any further domain handling.
>>>>>
>>>>> A i915_gem_object_create_from_data() is a reasonable addition, and I
>>>>> suspect it will make the code a bit more succinct.
>>>>
>>>> I shall adopt this name for it :)
>>>>
>>>>> Whilst your statement is true today, calling set_domain is then a no-op,
>>>>> and helps document how you use the object and so reduces the likelihood
>>>>> of us introducing bugs in the future.
>>>>> -Chris
>>>>
>>>> So here's the new function ... where should the set-to-cpu-domain go?
>>>> After the pin_pages and before the sg_copy_from_buffer?
>>>
>>> Either, since the domain will not change whilst you have the lock,
>>> but if you do it before get_pages() you will have a slightly easier
>>> error path.
>>
>> OK, call to i915_gem_object_set_to_cpu_domain(obj, true) added right
>> after the i915_gem_alloc_object(); also, since we now have multiple
>> failure paths where the ability to distinguish them might be useful (and
>> since this function is a public addition to the gem_object repertoire),
>> I've made it return object-or-error-code, with the incomplete-copy case
>> returning ERR_PTR(-EIO).
> 
> I'd stick to only using EIO when we have a GPU failure. Incomplete copy
> is EFAULT.

Done, it now returns -EFAULT on incomplete copy.

>>> Part of the reason why I want a function like this is so that I can
>>> replace it with a stolen object and so need to write the data through a
>>> temporary GGTT mapping. Speak now if you need more flags to the function
>>> to prevent certain classes of objects being created.
>>> -Chris
>>
>> For the GuC, the firmware image is written once by the CPU and
>> thereafter read only by the DMA engine via a GGTT address; other uC
>> devices might have different requirements e.g. the CSR/DMC doesn't have
>> a DMA engine AFAIK and the f/w is transferred to the h/w via MMIO writes
>> by the CPU. The primary reason for storing the image in a GEM object
>> (rather than kmalloc'd space, as the DMC loader does) is to make it
>> pageable; it's needed multiple times, as we have to reload the f/w after
>> reset or on exit from low-power states, but not used the rest of the
>> time. So the existing objects seem a good match.
>>
>> The GuC's pool and log objects, OTOH, must be permanently resident in
>> RAM and permanently mapped via the GGTT above GUC_WOPCM_OFFSET. So for
>> these it would be useful to have an allocator that *didn't* make the
>> object shmfs-backed.
> 
> http://cgit.freedesktop.org/~ickle/linux-2.6/commit/?h=nightly&id=3f74a251aa9a3e5c1215226f7857ed53693c563f
> 
> Though I want to use stolen as much as is practically possible, however
> without direct CPU access, stolen is very much an idle fancy.
> -Chris

Yes, that looks useful ...

.Dave.
Daniel Vetter June 24, 2015, 9:32 a.m. UTC | #13
On Thu, Jun 18, 2015 at 07:28:26PM +0100, Dave Gordon wrote:
> On 18/06/15 15:31, Daniel Vetter wrote:
> > On Thu, Jun 18, 2015 at 12:49:55PM +0100, Dave Gordon wrote:
> >> On 17/06/15 13:02, Daniel Vetter wrote:
> >>> On Wed, Jun 17, 2015 at 08:23:40AM +0100, Dave Gordon wrote:
> >>>> On 15/06/15 21:09, Chris Wilson wrote:
> >>>>> On Mon, Jun 15, 2015 at 07:36:19PM +0100, Dave Gordon wrote:
> >>>>>> From: Alex Dai <yu.dai@intel.com>
> >>>>>>
> >>>>>> i915_gem_object_write() is a generic function to copy data from a plain
> >>>>>> linear buffer to a paged gem object.
> >>>>>>
> >>>>>> We will need this for the microcontroller firmware loading support code.
> >>>>>>
> >>>>>> Issue: VIZ-4884
> >>>>>> Signed-off-by: Alex Dai <yu.dai@intel.com>
> >>>>>> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
> >>>>>> ---
> >>>>>>  drivers/gpu/drm/i915/i915_drv.h |    2 ++
> >>>>>>  drivers/gpu/drm/i915/i915_gem.c |   28 ++++++++++++++++++++++++++++
> >>>>>>  2 files changed, 30 insertions(+)
> >>>>>>
> >>>>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> >>>>>> index 611fbd8..9094c06 100644
> >>>>>> --- a/drivers/gpu/drm/i915/i915_drv.h
> >>>>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
> >>>>>> @@ -2713,6 +2713,8 @@ void *i915_gem_object_alloc(struct drm_device *dev);
> >>>>>>  void i915_gem_object_free(struct drm_i915_gem_object *obj);
> >>>>>>  void i915_gem_object_init(struct drm_i915_gem_object *obj,
> >>>>>>  			 const struct drm_i915_gem_object_ops *ops);
> >>>>>> +int i915_gem_object_write(struct drm_i915_gem_object *obj,
> >>>>>> +			  const void *data, size_t size);
> >>>>>>  struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
> >>>>>>  						  size_t size);
> >>>>>>  void i915_init_vm(struct drm_i915_private *dev_priv,
> >>>>>> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> >>>>>> index be35f04..75d63c2 100644
> >>>>>> --- a/drivers/gpu/drm/i915/i915_gem.c
> >>>>>> +++ b/drivers/gpu/drm/i915/i915_gem.c
> >>>>>> @@ -5392,3 +5392,31 @@ bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj)
> >>>>>>  	return false;
> >>>>>>  }
> >>>>>>  
> >>>>>> +/* Fill the @obj with the @size amount of @data */
> >>>>>> +int i915_gem_object_write(struct drm_i915_gem_object *obj,
> >>>>>> +			const void *data, size_t size)
> >>>>>> +{
> >>>>>> +	struct sg_table *sg;
> >>>>>> +	size_t bytes;
> >>>>>> +	int ret;
> >>>>>> +
> >>>>>> +	ret = i915_gem_object_get_pages(obj);
> >>>>>> +	if (ret)
> >>>>>> +		return ret;
> >>>>>> +
> >>>>>> +	i915_gem_object_pin_pages(obj);
> >>>>>
> >>>>> You don't set the object into the CPU domain, or instead manually handle
> >>>>> the domain flushing. You don't handle objects that cannot be written
> >>>>> directly by the CPU, nor do you handle objects whose representation in
> >>>>> memory is not linear.
> >>>>> -Chris
> >>>>
> >>>> No we don't handle just any random gem object, but we do return an error
> >>>> code for any types not supported. However, as we don't really need the
> >>>> full generality of writing into a gem object of any type, I will replace
> >>>> this function with one that combines the allocation of a new object
> >>>> (which will therefore definitely be of the correct type, in the correct
> >>>> domain, etc) and filling it with the data to be preserved.
> >>
> >> The usage pattern for the particular case is going to be:
> >> 	Once-only:
> >> 		Allocate
> >> 		Fill
> >> 	Then each time GuC is (re-)initialised:
> >> 		Map to GTT
> >> 		DMA-read from buffer into GuC private memory
> >> 		Unmap
> >> 	Only on unload:
> >> 		Dispose
> >>
> >> So our object is write-once by the CPU (and that's always the first
> >> operation), thereafter read-occasionally by the GuC's DMA engine.
> > 
> > Yup. The problem is more that on atom platforms the objects aren't
> > coherent by default and generally you need to do something. Hence we
> > either have
> > - an explicit set_caching call to document that this is a gpu object which
> >   is always coherent (so also on chv/bxt), even when that's a no-op on big
> >   core
> > - or wrap everything in set_domain calls, even when those are no-ops too.
> > 
> > If either of those lack, reviews tend to freak out preemptively and the
> > reptil brain takes over ;-)
> > 
> > Cheers, Daniel
> 
> We don't need "coherency" as such. The buffer is filled (once only) by
> the CPU (so I should put a set-to-cpu-domain between the allocate and
> fill stages?) Once it's filled, the CPU need not read or write it ever
> again.
> 
> Then before the DMA engine accesses it, we call i915_gem_obj_ggtt_pin,
> which I'm assuming will take care of any coherency issues (making sure
> the data written by the CPU is now visible to the DMA engine) when it
> puts the buffer into the GTT-readable domain. Is that not sufficient?

Pinning is orthogonal to coherency, it'll just make sure the backing
storage is there. set_domain(CPU) before writing and set_domain(GTT)
before each upload to the guc using the hw copy thing would be prudent.
The coherency tracking should no-op out any calls which aren't needed for
you.
-Daniel
Chris Wilson June 24, 2015, 9:40 a.m. UTC | #14
On Thu, Jun 18, 2015 at 07:28:26PM +0100, Dave Gordon wrote:
> We don't need "coherency" as such. The buffer is filled (once only) by
> the CPU (so I should put a set-to-cpu-domain between the allocate and
> fill stages?) Once it's filled, the CPU need not read or write it ever
> again.
> 
> Then before the DMA engine accesses it, we call i915_gem_obj_ggtt_pin,
> which I'm assuming will take care of any coherency issues (making sure
> the data written by the CPU is now visible to the DMA engine) when it
> puts the buffer into the GTT-readable domain. Is that not sufficient?

No. pin just ensures that there is a binding for the object in the
appropriate VM and then increments the vma's pin_count to make sure it
can not be relinquished until we say so. That is we often do want
multiple mappings of an object in different VM, and direct access from
the CPU (i.e. in the CPU domain whilst bound to the GPU).

To ensure that is ready for access by the GPU, you need to set it to the
appropriate domain prior to that access.
-Chris
Dave Gordon June 25, 2015, 12:28 p.m. UTC | #15
On 24/06/15 10:32, Daniel Vetter wrote:
> On Thu, Jun 18, 2015 at 07:28:26PM +0100, Dave Gordon wrote:
>> On 18/06/15 15:31, Daniel Vetter wrote:
>>> On Thu, Jun 18, 2015 at 12:49:55PM +0100, Dave Gordon wrote:
>>>> On 17/06/15 13:02, Daniel Vetter wrote:
>>>>> On Wed, Jun 17, 2015 at 08:23:40AM +0100, Dave Gordon wrote:
>>>>>> On 15/06/15 21:09, Chris Wilson wrote:
>>>>>>> On Mon, Jun 15, 2015 at 07:36:19PM +0100, Dave Gordon wrote:
>>>>>>>> From: Alex Dai <yu.dai@intel.com>
>>>>>>>>
>>>>>>>> i915_gem_object_write() is a generic function to copy data from a plain
>>>>>>>> linear buffer to a paged gem object.
>>>>>>>>
>>>>>>>> We will need this for the microcontroller firmware loading support code.
>>>>>>>>
>>>>>>>> Issue: VIZ-4884
>>>>>>>> Signed-off-by: Alex Dai <yu.dai@intel.com>
>>>>>>>> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
>>>>>>>> ---
>>>>>>>>  drivers/gpu/drm/i915/i915_drv.h |    2 ++
>>>>>>>>  drivers/gpu/drm/i915/i915_gem.c |   28 ++++++++++++++++++++++++++++
>>>>>>>>  2 files changed, 30 insertions(+)
>>>>>>>>
>>>>>>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>>>>>>>> index 611fbd8..9094c06 100644
>>>>>>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>>>>>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>>>>>>> @@ -2713,6 +2713,8 @@ void *i915_gem_object_alloc(struct drm_device *dev);
>>>>>>>>  void i915_gem_object_free(struct drm_i915_gem_object *obj);
>>>>>>>>  void i915_gem_object_init(struct drm_i915_gem_object *obj,
>>>>>>>>  			 const struct drm_i915_gem_object_ops *ops);
>>>>>>>> +int i915_gem_object_write(struct drm_i915_gem_object *obj,
>>>>>>>> +			  const void *data, size_t size);
>>>>>>>>  struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
>>>>>>>>  						  size_t size);
>>>>>>>>  void i915_init_vm(struct drm_i915_private *dev_priv,
>>>>>>>> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
>>>>>>>> index be35f04..75d63c2 100644
>>>>>>>> --- a/drivers/gpu/drm/i915/i915_gem.c
>>>>>>>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>>>>>>>> @@ -5392,3 +5392,31 @@ bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj)
>>>>>>>>  	return false;
>>>>>>>>  }
>>>>>>>>  
>>>>>>>> +/* Fill the @obj with the @size amount of @data */
>>>>>>>> +int i915_gem_object_write(struct drm_i915_gem_object *obj,
>>>>>>>> +			const void *data, size_t size)
>>>>>>>> +{
>>>>>>>> +	struct sg_table *sg;
>>>>>>>> +	size_t bytes;
>>>>>>>> +	int ret;
>>>>>>>> +
>>>>>>>> +	ret = i915_gem_object_get_pages(obj);
>>>>>>>> +	if (ret)
>>>>>>>> +		return ret;
>>>>>>>> +
>>>>>>>> +	i915_gem_object_pin_pages(obj);
>>>>>>>
>>>>>>> You don't set the object into the CPU domain, or instead manually handle
>>>>>>> the domain flushing. You don't handle objects that cannot be written
>>>>>>> directly by the CPU, nor do you handle objects whose representation in
>>>>>>> memory is not linear.
>>>>>>> -Chris
>>>>>>
>>>>>> No we don't handle just any random gem object, but we do return an error
>>>>>> code for any types not supported. However, as we don't really need the
>>>>>> full generality of writing into a gem object of any type, I will replace
>>>>>> this function with one that combines the allocation of a new object
>>>>>> (which will therefore definitely be of the correct type, in the correct
>>>>>> domain, etc) and filling it with the data to be preserved.
>>>>
>>>> The usage pattern for the particular case is going to be:
>>>> 	Once-only:
>>>> 		Allocate
>>>> 		Fill
>>>> 	Then each time GuC is (re-)initialised:
>>>> 		Map to GTT
>>>> 		DMA-read from buffer into GuC private memory
>>>> 		Unmap
>>>> 	Only on unload:
>>>> 		Dispose
>>>>
>>>> So our object is write-once by the CPU (and that's always the first
>>>> operation), thereafter read-occasionally by the GuC's DMA engine.
>>>
>>> Yup. The problem is more that on atom platforms the objects aren't
>>> coherent by default and generally you need to do something. Hence we
>>> either have
>>> - an explicit set_caching call to document that this is a gpu object which
>>>   is always coherent (so also on chv/bxt), even when that's a no-op on big
>>>   core
>>> - or wrap everything in set_domain calls, even when those are no-ops too.
>>>
>>> If either of those lack, reviews tend to freak out preemptively and the
>>> reptil brain takes over ;-)
>>>
>>> Cheers, Daniel
>>
>> We don't need "coherency" as such. The buffer is filled (once only) by
>> the CPU (so I should put a set-to-cpu-domain between the allocate and
>> fill stages?) Once it's filled, the CPU need not read or write it ever
>> again.
>>
>> Then before the DMA engine accesses it, we call i915_gem_obj_ggtt_pin,
>> which I'm assuming will take care of any coherency issues (making sure
>> the data written by the CPU is now visible to the DMA engine) when it
>> puts the buffer into the GTT-readable domain. Is that not sufficient?
> 
> Pinning is orthogonal to coherency, it'll just make sure the backing
> storage is there. set_domain(CPU) before writing and set_domain(GTT)
> before each upload to the guc using the hw copy thing would be prudent.
> The coherency tracking should no-op out any calls which aren't needed for
> you.
> -Daniel

OK, done; I had already added set_domain(CPU) in the allocate-and-fill
code, now I've just added i915_gem_object_set_to_gtt_domain(obj, false)
in the dma-to-h/w code.

.Dave.
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 611fbd8..9094c06 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2713,6 +2713,8 @@  void *i915_gem_object_alloc(struct drm_device *dev);
 void i915_gem_object_free(struct drm_i915_gem_object *obj);
 void i915_gem_object_init(struct drm_i915_gem_object *obj,
 			 const struct drm_i915_gem_object_ops *ops);
+int i915_gem_object_write(struct drm_i915_gem_object *obj,
+			  const void *data, size_t size);
 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
 						  size_t size);
 void i915_init_vm(struct drm_i915_private *dev_priv,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index be35f04..75d63c2 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -5392,3 +5392,31 @@  bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj)
 	return false;
 }
 
+/* Fill the @obj with the @size amount of @data */
+int i915_gem_object_write(struct drm_i915_gem_object *obj,
+			const void *data, size_t size)
+{
+	struct sg_table *sg;
+	size_t bytes;
+	int ret;
+
+	ret = i915_gem_object_get_pages(obj);
+	if (ret)
+		return ret;
+
+	i915_gem_object_pin_pages(obj);
+
+	sg = obj->pages;
+
+	bytes = sg_copy_from_buffer(sg->sgl, sg->nents, (void *)data, size);
+
+	i915_gem_object_unpin_pages(obj);
+
+	if (WARN_ON(bytes != size)) {
+		DRM_ERROR("Incomplete copy, wrote %zu of %zu", bytes, size);
+		i915_gem_object_put_pages(obj);
+		return -EIO;
+	}
+
+	return 0;
+}