diff mbox series

[1/9] drm/format-helper: Pass destination pitch to drm_fb_memcpy_dstclip()

Message ID 20200625120011.16168-2-tzimmermann@suse.de (mailing list archive)
State New, archived
Headers show
Series drm: Support simple-framebuffer devices and firmware fbs | expand

Commit Message

Thomas Zimmermann June 25, 2020, noon UTC
The memcpy's destination buffer might have a different pitch than the
source. Support different pitches as function argument.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
---
 drivers/gpu/drm/drm_format_helper.c    | 9 +++++----
 drivers/gpu/drm/mgag200/mgag200_mode.c | 2 +-
 drivers/gpu/drm/tiny/cirrus.c          | 2 +-
 include/drm/drm_format_helper.h        | 2 +-
 4 files changed, 8 insertions(+), 7 deletions(-)

Comments

Daniel Vetter June 29, 2020, 8:40 a.m. UTC | #1
On Thu, Jun 25, 2020 at 02:00:03PM +0200, Thomas Zimmermann wrote:
> The memcpy's destination buffer might have a different pitch than the
> source. Support different pitches as function argument.
> 
> Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>

But I do have questions ... why did we allocate a source drm_framebuffer
with mismatching pitch? That sounds backwards, especially for simplekms.

Would be good to add the reasons why we need this to the commit message,
I'm sure I'll discover it later on eventually.
-Daniel

> ---
>  drivers/gpu/drm/drm_format_helper.c    | 9 +++++----
>  drivers/gpu/drm/mgag200/mgag200_mode.c | 2 +-
>  drivers/gpu/drm/tiny/cirrus.c          | 2 +-
>  include/drm/drm_format_helper.h        | 2 +-
>  4 files changed, 8 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_format_helper.c b/drivers/gpu/drm/drm_format_helper.c
> index c043ca364c86..8d5a683afea7 100644
> --- a/drivers/gpu/drm/drm_format_helper.c
> +++ b/drivers/gpu/drm/drm_format_helper.c
> @@ -52,6 +52,7 @@ EXPORT_SYMBOL(drm_fb_memcpy);
>  /**
>   * drm_fb_memcpy_dstclip - Copy clip buffer
>   * @dst: Destination buffer (iomem)
> + * @dst_pitch: Number of bytes between two consecutive scanlines within dst
>   * @vaddr: Source buffer
>   * @fb: DRM framebuffer
>   * @clip: Clip rectangle area to copy
> @@ -59,12 +60,12 @@ EXPORT_SYMBOL(drm_fb_memcpy);
>   * This function applies clipping on dst, i.e. the destination is a
>   * full (iomem) framebuffer but only the clip rect content is copied over.
>   */
> -void drm_fb_memcpy_dstclip(void __iomem *dst, void *vaddr,
> -			   struct drm_framebuffer *fb,
> +void drm_fb_memcpy_dstclip(void __iomem *dst, unsigned int dst_pitch,
> +			   void *vaddr, struct drm_framebuffer *fb,
>  			   struct drm_rect *clip)
>  {
>  	unsigned int cpp = fb->format->cpp[0];
> -	unsigned int offset = clip_offset(clip, fb->pitches[0], cpp);
> +	unsigned int offset = clip_offset(clip, dst_pitch, cpp);
>  	size_t len = (clip->x2 - clip->x1) * cpp;
>  	unsigned int y, lines = clip->y2 - clip->y1;
>  
> @@ -73,7 +74,7 @@ void drm_fb_memcpy_dstclip(void __iomem *dst, void *vaddr,
>  	for (y = 0; y < lines; y++) {
>  		memcpy_toio(dst, vaddr, len);
>  		vaddr += fb->pitches[0];
> -		dst += fb->pitches[0];
> +		dst += dst_pitch;
>  	}
>  }
>  EXPORT_SYMBOL(drm_fb_memcpy_dstclip);
> diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c b/drivers/gpu/drm/mgag200/mgag200_mode.c
> index f16bd278ab7e..7d4f3a62d885 100644
> --- a/drivers/gpu/drm/mgag200/mgag200_mode.c
> +++ b/drivers/gpu/drm/mgag200/mgag200_mode.c
> @@ -1586,7 +1586,7 @@ mgag200_handle_damage(struct mga_device *mdev, struct drm_framebuffer *fb,
>  	if (drm_WARN_ON(dev, !vmap))
>  		return; /* BUG: SHMEM BO should always be vmapped */
>  
> -	drm_fb_memcpy_dstclip(mdev->vram, vmap, fb, clip);
> +	drm_fb_memcpy_dstclip(mdev->vram, fb->pitches[0], vmap, fb, clip);
>  
>  	drm_gem_shmem_vunmap(fb->obj[0], vmap);
>  
> diff --git a/drivers/gpu/drm/tiny/cirrus.c b/drivers/gpu/drm/tiny/cirrus.c
> index 744a8e337e41..2dd9e5e31e3d 100644
> --- a/drivers/gpu/drm/tiny/cirrus.c
> +++ b/drivers/gpu/drm/tiny/cirrus.c
> @@ -327,7 +327,7 @@ static int cirrus_fb_blit_rect(struct drm_framebuffer *fb,
>  		goto out_dev_exit;
>  
>  	if (cirrus->cpp == fb->format->cpp[0])
> -		drm_fb_memcpy_dstclip(cirrus->vram,
> +		drm_fb_memcpy_dstclip(cirrus->vram, fb->pitches[0],
>  				      vmap, fb, rect);
>  
>  	else if (fb->format->cpp[0] == 4 && cirrus->cpp == 2)
> diff --git a/include/drm/drm_format_helper.h b/include/drm/drm_format_helper.h
> index 5f9e37032468..2b5036a5fbe7 100644
> --- a/include/drm/drm_format_helper.h
> +++ b/include/drm/drm_format_helper.h
> @@ -11,7 +11,7 @@ struct drm_rect;
>  
>  void drm_fb_memcpy(void *dst, void *vaddr, struct drm_framebuffer *fb,
>  		   struct drm_rect *clip);
> -void drm_fb_memcpy_dstclip(void __iomem *dst, void *vaddr,
> +void drm_fb_memcpy_dstclip(void __iomem *dst, unsigned int dst_pitch, void *vaddr,
>  			   struct drm_framebuffer *fb,
>  			   struct drm_rect *clip);
>  void drm_fb_swab(void *dst, void *src, struct drm_framebuffer *fb,
> -- 
> 2.27.0
>
Thomas Zimmermann Sept. 25, 2020, 2:55 p.m. UTC | #2
Hi

Am 29.06.20 um 10:40 schrieb Daniel Vetter:
> On Thu, Jun 25, 2020 at 02:00:03PM +0200, Thomas Zimmermann wrote:
>> The memcpy's destination buffer might have a different pitch than the
>> source. Support different pitches as function argument.
>>
>> Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
> 
> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> 
> But I do have questions ... why did we allocate a source drm_framebuffer
> with mismatching pitch? That sounds backwards, especially for simplekms.

There's userspace that allocates framebuffers in tiles of 64x64 pixels.
I think I've seen this with Gnome. So if you have a 800x600 display
mode, the allocated framebuffer has a scanline pitch of 832 pixels and
the final 32 pixels are ignored.

In regular drivers, we can handle this with the VGA offset register [1]
or some equivalent. That's obviously not an option with simplekms, so
the different pitch is required.

Best regards
Thomas

[1]
https://web.stanford.edu/class/cs140/projects/pintos/specs/freevga/vga/crtcreg.htm#13

> 
> Would be good to add the reasons why we need this to the commit message,
> I'm sure I'll discover it later on eventually.
> -Daniel
> 
>> ---
>>  drivers/gpu/drm/drm_format_helper.c    | 9 +++++----
>>  drivers/gpu/drm/mgag200/mgag200_mode.c | 2 +-
>>  drivers/gpu/drm/tiny/cirrus.c          | 2 +-
>>  include/drm/drm_format_helper.h        | 2 +-
>>  4 files changed, 8 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/drm_format_helper.c b/drivers/gpu/drm/drm_format_helper.c
>> index c043ca364c86..8d5a683afea7 100644
>> --- a/drivers/gpu/drm/drm_format_helper.c
>> +++ b/drivers/gpu/drm/drm_format_helper.c
>> @@ -52,6 +52,7 @@ EXPORT_SYMBOL(drm_fb_memcpy);
>>  /**
>>   * drm_fb_memcpy_dstclip - Copy clip buffer
>>   * @dst: Destination buffer (iomem)
>> + * @dst_pitch: Number of bytes between two consecutive scanlines within dst
>>   * @vaddr: Source buffer
>>   * @fb: DRM framebuffer
>>   * @clip: Clip rectangle area to copy
>> @@ -59,12 +60,12 @@ EXPORT_SYMBOL(drm_fb_memcpy);
>>   * This function applies clipping on dst, i.e. the destination is a
>>   * full (iomem) framebuffer but only the clip rect content is copied over.
>>   */
>> -void drm_fb_memcpy_dstclip(void __iomem *dst, void *vaddr,
>> -			   struct drm_framebuffer *fb,
>> +void drm_fb_memcpy_dstclip(void __iomem *dst, unsigned int dst_pitch,
>> +			   void *vaddr, struct drm_framebuffer *fb,
>>  			   struct drm_rect *clip)
>>  {
>>  	unsigned int cpp = fb->format->cpp[0];
>> -	unsigned int offset = clip_offset(clip, fb->pitches[0], cpp);
>> +	unsigned int offset = clip_offset(clip, dst_pitch, cpp);
>>  	size_t len = (clip->x2 - clip->x1) * cpp;
>>  	unsigned int y, lines = clip->y2 - clip->y1;
>>  
>> @@ -73,7 +74,7 @@ void drm_fb_memcpy_dstclip(void __iomem *dst, void *vaddr,
>>  	for (y = 0; y < lines; y++) {
>>  		memcpy_toio(dst, vaddr, len);
>>  		vaddr += fb->pitches[0];
>> -		dst += fb->pitches[0];
>> +		dst += dst_pitch;
>>  	}
>>  }
>>  EXPORT_SYMBOL(drm_fb_memcpy_dstclip);
>> diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c b/drivers/gpu/drm/mgag200/mgag200_mode.c
>> index f16bd278ab7e..7d4f3a62d885 100644
>> --- a/drivers/gpu/drm/mgag200/mgag200_mode.c
>> +++ b/drivers/gpu/drm/mgag200/mgag200_mode.c
>> @@ -1586,7 +1586,7 @@ mgag200_handle_damage(struct mga_device *mdev, struct drm_framebuffer *fb,
>>  	if (drm_WARN_ON(dev, !vmap))
>>  		return; /* BUG: SHMEM BO should always be vmapped */
>>  
>> -	drm_fb_memcpy_dstclip(mdev->vram, vmap, fb, clip);
>> +	drm_fb_memcpy_dstclip(mdev->vram, fb->pitches[0], vmap, fb, clip);
>>  
>>  	drm_gem_shmem_vunmap(fb->obj[0], vmap);
>>  
>> diff --git a/drivers/gpu/drm/tiny/cirrus.c b/drivers/gpu/drm/tiny/cirrus.c
>> index 744a8e337e41..2dd9e5e31e3d 100644
>> --- a/drivers/gpu/drm/tiny/cirrus.c
>> +++ b/drivers/gpu/drm/tiny/cirrus.c
>> @@ -327,7 +327,7 @@ static int cirrus_fb_blit_rect(struct drm_framebuffer *fb,
>>  		goto out_dev_exit;
>>  
>>  	if (cirrus->cpp == fb->format->cpp[0])
>> -		drm_fb_memcpy_dstclip(cirrus->vram,
>> +		drm_fb_memcpy_dstclip(cirrus->vram, fb->pitches[0],
>>  				      vmap, fb, rect);
>>  
>>  	else if (fb->format->cpp[0] == 4 && cirrus->cpp == 2)
>> diff --git a/include/drm/drm_format_helper.h b/include/drm/drm_format_helper.h
>> index 5f9e37032468..2b5036a5fbe7 100644
>> --- a/include/drm/drm_format_helper.h
>> +++ b/include/drm/drm_format_helper.h
>> @@ -11,7 +11,7 @@ struct drm_rect;
>>  
>>  void drm_fb_memcpy(void *dst, void *vaddr, struct drm_framebuffer *fb,
>>  		   struct drm_rect *clip);
>> -void drm_fb_memcpy_dstclip(void __iomem *dst, void *vaddr,
>> +void drm_fb_memcpy_dstclip(void __iomem *dst, unsigned int dst_pitch, void *vaddr,
>>  			   struct drm_framebuffer *fb,
>>  			   struct drm_rect *clip);
>>  void drm_fb_swab(void *dst, void *src, struct drm_framebuffer *fb,
>> -- 
>> 2.27.0
>>
>
Daniel Vetter Sept. 26, 2020, 4:42 p.m. UTC | #3
On Fri, Sep 25, 2020 at 4:55 PM Thomas Zimmermann <tzimmermann@suse.de> wrote:
>
> Hi
>
> Am 29.06.20 um 10:40 schrieb Daniel Vetter:
> > On Thu, Jun 25, 2020 at 02:00:03PM +0200, Thomas Zimmermann wrote:
> >> The memcpy's destination buffer might have a different pitch than the
> >> source. Support different pitches as function argument.
> >>
> >> Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
> >
> > Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> >
> > But I do have questions ... why did we allocate a source drm_framebuffer
> > with mismatching pitch? That sounds backwards, especially for simplekms.
>
> There's userspace that allocates framebuffers in tiles of 64x64 pixels.
> I think I've seen this with Gnome. So if you have a 800x600 display
> mode, the allocated framebuffer has a scanline pitch of 832 pixels and
> the final 32 pixels are ignored.

At least with dumb buffer allocation ioctls userspace should not do
that. If it wants 800x600, it needs to allocate 800x600, not something
else. The driver is supposed to apply any rounding necessary for the
size. Or is this a buffer allocated somewhere else and then shared?
-Daniel

> In regular drivers, we can handle this with the VGA offset register [1]
> or some equivalent. That's obviously not an option with simplekms, so
> the different pitch is required.
>
> Best regards
> Thomas
>
> [1]
> https://web.stanford.edu/class/cs140/projects/pintos/specs/freevga/vga/crtcreg.htm#13
>
> >
> > Would be good to add the reasons why we need this to the commit message,
> > I'm sure I'll discover it later on eventually.
> > -Daniel
> >
> >> ---
> >>  drivers/gpu/drm/drm_format_helper.c    | 9 +++++----
> >>  drivers/gpu/drm/mgag200/mgag200_mode.c | 2 +-
> >>  drivers/gpu/drm/tiny/cirrus.c          | 2 +-
> >>  include/drm/drm_format_helper.h        | 2 +-
> >>  4 files changed, 8 insertions(+), 7 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/drm_format_helper.c b/drivers/gpu/drm/drm_format_helper.c
> >> index c043ca364c86..8d5a683afea7 100644
> >> --- a/drivers/gpu/drm/drm_format_helper.c
> >> +++ b/drivers/gpu/drm/drm_format_helper.c
> >> @@ -52,6 +52,7 @@ EXPORT_SYMBOL(drm_fb_memcpy);
> >>  /**
> >>   * drm_fb_memcpy_dstclip - Copy clip buffer
> >>   * @dst: Destination buffer (iomem)
> >> + * @dst_pitch: Number of bytes between two consecutive scanlines within dst
> >>   * @vaddr: Source buffer
> >>   * @fb: DRM framebuffer
> >>   * @clip: Clip rectangle area to copy
> >> @@ -59,12 +60,12 @@ EXPORT_SYMBOL(drm_fb_memcpy);
> >>   * This function applies clipping on dst, i.e. the destination is a
> >>   * full (iomem) framebuffer but only the clip rect content is copied over.
> >>   */
> >> -void drm_fb_memcpy_dstclip(void __iomem *dst, void *vaddr,
> >> -                       struct drm_framebuffer *fb,
> >> +void drm_fb_memcpy_dstclip(void __iomem *dst, unsigned int dst_pitch,
> >> +                       void *vaddr, struct drm_framebuffer *fb,
> >>                         struct drm_rect *clip)
> >>  {
> >>      unsigned int cpp = fb->format->cpp[0];
> >> -    unsigned int offset = clip_offset(clip, fb->pitches[0], cpp);
> >> +    unsigned int offset = clip_offset(clip, dst_pitch, cpp);
> >>      size_t len = (clip->x2 - clip->x1) * cpp;
> >>      unsigned int y, lines = clip->y2 - clip->y1;
> >>
> >> @@ -73,7 +74,7 @@ void drm_fb_memcpy_dstclip(void __iomem *dst, void *vaddr,
> >>      for (y = 0; y < lines; y++) {
> >>              memcpy_toio(dst, vaddr, len);
> >>              vaddr += fb->pitches[0];
> >> -            dst += fb->pitches[0];
> >> +            dst += dst_pitch;
> >>      }
> >>  }
> >>  EXPORT_SYMBOL(drm_fb_memcpy_dstclip);
> >> diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c b/drivers/gpu/drm/mgag200/mgag200_mode.c
> >> index f16bd278ab7e..7d4f3a62d885 100644
> >> --- a/drivers/gpu/drm/mgag200/mgag200_mode.c
> >> +++ b/drivers/gpu/drm/mgag200/mgag200_mode.c
> >> @@ -1586,7 +1586,7 @@ mgag200_handle_damage(struct mga_device *mdev, struct drm_framebuffer *fb,
> >>      if (drm_WARN_ON(dev, !vmap))
> >>              return; /* BUG: SHMEM BO should always be vmapped */
> >>
> >> -    drm_fb_memcpy_dstclip(mdev->vram, vmap, fb, clip);
> >> +    drm_fb_memcpy_dstclip(mdev->vram, fb->pitches[0], vmap, fb, clip);
> >>
> >>      drm_gem_shmem_vunmap(fb->obj[0], vmap);
> >>
> >> diff --git a/drivers/gpu/drm/tiny/cirrus.c b/drivers/gpu/drm/tiny/cirrus.c
> >> index 744a8e337e41..2dd9e5e31e3d 100644
> >> --- a/drivers/gpu/drm/tiny/cirrus.c
> >> +++ b/drivers/gpu/drm/tiny/cirrus.c
> >> @@ -327,7 +327,7 @@ static int cirrus_fb_blit_rect(struct drm_framebuffer *fb,
> >>              goto out_dev_exit;
> >>
> >>      if (cirrus->cpp == fb->format->cpp[0])
> >> -            drm_fb_memcpy_dstclip(cirrus->vram,
> >> +            drm_fb_memcpy_dstclip(cirrus->vram, fb->pitches[0],
> >>                                    vmap, fb, rect);
> >>
> >>      else if (fb->format->cpp[0] == 4 && cirrus->cpp == 2)
> >> diff --git a/include/drm/drm_format_helper.h b/include/drm/drm_format_helper.h
> >> index 5f9e37032468..2b5036a5fbe7 100644
> >> --- a/include/drm/drm_format_helper.h
> >> +++ b/include/drm/drm_format_helper.h
> >> @@ -11,7 +11,7 @@ struct drm_rect;
> >>
> >>  void drm_fb_memcpy(void *dst, void *vaddr, struct drm_framebuffer *fb,
> >>                 struct drm_rect *clip);
> >> -void drm_fb_memcpy_dstclip(void __iomem *dst, void *vaddr,
> >> +void drm_fb_memcpy_dstclip(void __iomem *dst, unsigned int dst_pitch, void *vaddr,
> >>                         struct drm_framebuffer *fb,
> >>                         struct drm_rect *clip);
> >>  void drm_fb_swab(void *dst, void *src, struct drm_framebuffer *fb,
> >> --
> >> 2.27.0
> >>
> >
>
> --
> Thomas Zimmermann
> Graphics Driver Developer
> SUSE Software Solutions Germany GmbH
> Maxfeldstr. 5, 90409 Nürnberg, Germany
> (HRB 36809, AG Nürnberg)
> Geschäftsführer: Felix Imendörffer
>
Thomas Zimmermann Sept. 28, 2020, 7:22 a.m. UTC | #4
Hi

Am 26.09.20 um 18:42 schrieb Daniel Vetter:
> On Fri, Sep 25, 2020 at 4:55 PM Thomas Zimmermann <tzimmermann@suse.de> wrote:
>>
>> Hi
>>
>> Am 29.06.20 um 10:40 schrieb Daniel Vetter:
>>> On Thu, Jun 25, 2020 at 02:00:03PM +0200, Thomas Zimmermann wrote:
>>>> The memcpy's destination buffer might have a different pitch than the
>>>> source. Support different pitches as function argument.
>>>>
>>>> Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
>>>
>>> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
>>>
>>> But I do have questions ... why did we allocate a source drm_framebuffer
>>> with mismatching pitch? That sounds backwards, especially for simplekms.
>>
>> There's userspace that allocates framebuffers in tiles of 64x64 pixels.
>> I think I've seen this with Gnome. So if you have a 800x600 display
>> mode, the allocated framebuffer has a scanline pitch of 832 pixels and
>> the final 32 pixels are ignored.
> 
> At least with dumb buffer allocation ioctls userspace should not do
> that. If it wants 800x600, it needs to allocate 800x600, not something

That ship has sailed.

> else. The driver is supposed to apply any rounding necessary for the
> size. Or is this a buffer allocated somewhere else and then shared?

I don't quite remember where exactly this was implemented. It was not a
shared buffer, though. IIRC the buffer allocation code in one of the
libs rounded the size towards multiples of 64. I remember thinking that
it was probably done for tiled rendering.

Best regards
Thomas

> -Daniel
> 
>> In regular drivers, we can handle this with the VGA offset register [1]
>> or some equivalent. That's obviously not an option with simplekms, so
>> the different pitch is required.
>>
>> Best regards
>> Thomas
>>
>> [1]
>> https://web.stanford.edu/class/cs140/projects/pintos/specs/freevga/vga/crtcreg.htm#13
>>
>>>
>>> Would be good to add the reasons why we need this to the commit message,
>>> I'm sure I'll discover it later on eventually.
>>> -Daniel
>>>
>>>> ---
>>>>  drivers/gpu/drm/drm_format_helper.c    | 9 +++++----
>>>>  drivers/gpu/drm/mgag200/mgag200_mode.c | 2 +-
>>>>  drivers/gpu/drm/tiny/cirrus.c          | 2 +-
>>>>  include/drm/drm_format_helper.h        | 2 +-
>>>>  4 files changed, 8 insertions(+), 7 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/drm_format_helper.c b/drivers/gpu/drm/drm_format_helper.c
>>>> index c043ca364c86..8d5a683afea7 100644
>>>> --- a/drivers/gpu/drm/drm_format_helper.c
>>>> +++ b/drivers/gpu/drm/drm_format_helper.c
>>>> @@ -52,6 +52,7 @@ EXPORT_SYMBOL(drm_fb_memcpy);
>>>>  /**
>>>>   * drm_fb_memcpy_dstclip - Copy clip buffer
>>>>   * @dst: Destination buffer (iomem)
>>>> + * @dst_pitch: Number of bytes between two consecutive scanlines within dst
>>>>   * @vaddr: Source buffer
>>>>   * @fb: DRM framebuffer
>>>>   * @clip: Clip rectangle area to copy
>>>> @@ -59,12 +60,12 @@ EXPORT_SYMBOL(drm_fb_memcpy);
>>>>   * This function applies clipping on dst, i.e. the destination is a
>>>>   * full (iomem) framebuffer but only the clip rect content is copied over.
>>>>   */
>>>> -void drm_fb_memcpy_dstclip(void __iomem *dst, void *vaddr,
>>>> -                       struct drm_framebuffer *fb,
>>>> +void drm_fb_memcpy_dstclip(void __iomem *dst, unsigned int dst_pitch,
>>>> +                       void *vaddr, struct drm_framebuffer *fb,
>>>>                         struct drm_rect *clip)
>>>>  {
>>>>      unsigned int cpp = fb->format->cpp[0];
>>>> -    unsigned int offset = clip_offset(clip, fb->pitches[0], cpp);
>>>> +    unsigned int offset = clip_offset(clip, dst_pitch, cpp);
>>>>      size_t len = (clip->x2 - clip->x1) * cpp;
>>>>      unsigned int y, lines = clip->y2 - clip->y1;
>>>>
>>>> @@ -73,7 +74,7 @@ void drm_fb_memcpy_dstclip(void __iomem *dst, void *vaddr,
>>>>      for (y = 0; y < lines; y++) {
>>>>              memcpy_toio(dst, vaddr, len);
>>>>              vaddr += fb->pitches[0];
>>>> -            dst += fb->pitches[0];
>>>> +            dst += dst_pitch;
>>>>      }
>>>>  }
>>>>  EXPORT_SYMBOL(drm_fb_memcpy_dstclip);
>>>> diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c b/drivers/gpu/drm/mgag200/mgag200_mode.c
>>>> index f16bd278ab7e..7d4f3a62d885 100644
>>>> --- a/drivers/gpu/drm/mgag200/mgag200_mode.c
>>>> +++ b/drivers/gpu/drm/mgag200/mgag200_mode.c
>>>> @@ -1586,7 +1586,7 @@ mgag200_handle_damage(struct mga_device *mdev, struct drm_framebuffer *fb,
>>>>      if (drm_WARN_ON(dev, !vmap))
>>>>              return; /* BUG: SHMEM BO should always be vmapped */
>>>>
>>>> -    drm_fb_memcpy_dstclip(mdev->vram, vmap, fb, clip);
>>>> +    drm_fb_memcpy_dstclip(mdev->vram, fb->pitches[0], vmap, fb, clip);
>>>>
>>>>      drm_gem_shmem_vunmap(fb->obj[0], vmap);
>>>>
>>>> diff --git a/drivers/gpu/drm/tiny/cirrus.c b/drivers/gpu/drm/tiny/cirrus.c
>>>> index 744a8e337e41..2dd9e5e31e3d 100644
>>>> --- a/drivers/gpu/drm/tiny/cirrus.c
>>>> +++ b/drivers/gpu/drm/tiny/cirrus.c
>>>> @@ -327,7 +327,7 @@ static int cirrus_fb_blit_rect(struct drm_framebuffer *fb,
>>>>              goto out_dev_exit;
>>>>
>>>>      if (cirrus->cpp == fb->format->cpp[0])
>>>> -            drm_fb_memcpy_dstclip(cirrus->vram,
>>>> +            drm_fb_memcpy_dstclip(cirrus->vram, fb->pitches[0],
>>>>                                    vmap, fb, rect);
>>>>
>>>>      else if (fb->format->cpp[0] == 4 && cirrus->cpp == 2)
>>>> diff --git a/include/drm/drm_format_helper.h b/include/drm/drm_format_helper.h
>>>> index 5f9e37032468..2b5036a5fbe7 100644
>>>> --- a/include/drm/drm_format_helper.h
>>>> +++ b/include/drm/drm_format_helper.h
>>>> @@ -11,7 +11,7 @@ struct drm_rect;
>>>>
>>>>  void drm_fb_memcpy(void *dst, void *vaddr, struct drm_framebuffer *fb,
>>>>                 struct drm_rect *clip);
>>>> -void drm_fb_memcpy_dstclip(void __iomem *dst, void *vaddr,
>>>> +void drm_fb_memcpy_dstclip(void __iomem *dst, unsigned int dst_pitch, void *vaddr,
>>>>                         struct drm_framebuffer *fb,
>>>>                         struct drm_rect *clip);
>>>>  void drm_fb_swab(void *dst, void *src, struct drm_framebuffer *fb,
>>>> --
>>>> 2.27.0
>>>>
>>>
>>
>> --
>> Thomas Zimmermann
>> Graphics Driver Developer
>> SUSE Software Solutions Germany GmbH
>> Maxfeldstr. 5, 90409 Nürnberg, Germany
>> (HRB 36809, AG Nürnberg)
>> Geschäftsführer: Felix Imendörffer
>>
> 
>
Daniel Vetter Sept. 28, 2020, 8:53 a.m. UTC | #5
On Mon, Sep 28, 2020 at 9:22 AM Thomas Zimmermann <tzimmermann@suse.de> wrote:
>
> Hi
>
> Am 26.09.20 um 18:42 schrieb Daniel Vetter:
> > On Fri, Sep 25, 2020 at 4:55 PM Thomas Zimmermann <tzimmermann@suse.de> wrote:
> >>
> >> Hi
> >>
> >> Am 29.06.20 um 10:40 schrieb Daniel Vetter:
> >>> On Thu, Jun 25, 2020 at 02:00:03PM +0200, Thomas Zimmermann wrote:
> >>>> The memcpy's destination buffer might have a different pitch than the
> >>>> source. Support different pitches as function argument.
> >>>>
> >>>> Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
> >>>
> >>> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> >>>
> >>> But I do have questions ... why did we allocate a source drm_framebuffer
> >>> with mismatching pitch? That sounds backwards, especially for simplekms.
> >>
> >> There's userspace that allocates framebuffers in tiles of 64x64 pixels.
> >> I think I've seen this with Gnome. So if you have a 800x600 display
> >> mode, the allocated framebuffer has a scanline pitch of 832 pixels and
> >> the final 32 pixels are ignored.
> >
> > At least with dumb buffer allocation ioctls userspace should not do
> > that. If it wants 800x600, it needs to allocate 800x600, not something
>
> That ship has sailed.

Not really, right now that ship is simply leaking and sinking. If we
decide to patch this up from the kernel side, then indeed it has
sailed. And I'm not sure that's a good idea.

> > else. The driver is supposed to apply any rounding necessary for the
> > size. Or is this a buffer allocated somewhere else and then shared?
>
> I don't quite remember where exactly this was implemented. It was not a
> shared buffer, though. IIRC the buffer allocation code in one of the
> libs rounded the size towards multiples of 64. I remember thinking that
> it was probably done for tiled rendering.

Yeah, but you don't do rendering on dumb buffers. Like ever. So this
smells like a userspace bug.

If it's for shared buffers then I think that sounds more reasonable.
-Daniel

>
> Best regards
> Thomas
>
> > -Daniel
> >
> >> In regular drivers, we can handle this with the VGA offset register [1]
> >> or some equivalent. That's obviously not an option with simplekms, so
> >> the different pitch is required.
> >>
> >> Best regards
> >> Thomas
> >>
> >> [1]
> >> https://web.stanford.edu/class/cs140/projects/pintos/specs/freevga/vga/crtcreg.htm#13
> >>
> >>>
> >>> Would be good to add the reasons why we need this to the commit message,
> >>> I'm sure I'll discover it later on eventually.
> >>> -Daniel
> >>>
> >>>> ---
> >>>>  drivers/gpu/drm/drm_format_helper.c    | 9 +++++----
> >>>>  drivers/gpu/drm/mgag200/mgag200_mode.c | 2 +-
> >>>>  drivers/gpu/drm/tiny/cirrus.c          | 2 +-
> >>>>  include/drm/drm_format_helper.h        | 2 +-
> >>>>  4 files changed, 8 insertions(+), 7 deletions(-)
> >>>>
> >>>> diff --git a/drivers/gpu/drm/drm_format_helper.c b/drivers/gpu/drm/drm_format_helper.c
> >>>> index c043ca364c86..8d5a683afea7 100644
> >>>> --- a/drivers/gpu/drm/drm_format_helper.c
> >>>> +++ b/drivers/gpu/drm/drm_format_helper.c
> >>>> @@ -52,6 +52,7 @@ EXPORT_SYMBOL(drm_fb_memcpy);
> >>>>  /**
> >>>>   * drm_fb_memcpy_dstclip - Copy clip buffer
> >>>>   * @dst: Destination buffer (iomem)
> >>>> + * @dst_pitch: Number of bytes between two consecutive scanlines within dst
> >>>>   * @vaddr: Source buffer
> >>>>   * @fb: DRM framebuffer
> >>>>   * @clip: Clip rectangle area to copy
> >>>> @@ -59,12 +60,12 @@ EXPORT_SYMBOL(drm_fb_memcpy);
> >>>>   * This function applies clipping on dst, i.e. the destination is a
> >>>>   * full (iomem) framebuffer but only the clip rect content is copied over.
> >>>>   */
> >>>> -void drm_fb_memcpy_dstclip(void __iomem *dst, void *vaddr,
> >>>> -                       struct drm_framebuffer *fb,
> >>>> +void drm_fb_memcpy_dstclip(void __iomem *dst, unsigned int dst_pitch,
> >>>> +                       void *vaddr, struct drm_framebuffer *fb,
> >>>>                         struct drm_rect *clip)
> >>>>  {
> >>>>      unsigned int cpp = fb->format->cpp[0];
> >>>> -    unsigned int offset = clip_offset(clip, fb->pitches[0], cpp);
> >>>> +    unsigned int offset = clip_offset(clip, dst_pitch, cpp);
> >>>>      size_t len = (clip->x2 - clip->x1) * cpp;
> >>>>      unsigned int y, lines = clip->y2 - clip->y1;
> >>>>
> >>>> @@ -73,7 +74,7 @@ void drm_fb_memcpy_dstclip(void __iomem *dst, void *vaddr,
> >>>>      for (y = 0; y < lines; y++) {
> >>>>              memcpy_toio(dst, vaddr, len);
> >>>>              vaddr += fb->pitches[0];
> >>>> -            dst += fb->pitches[0];
> >>>> +            dst += dst_pitch;
> >>>>      }
> >>>>  }
> >>>>  EXPORT_SYMBOL(drm_fb_memcpy_dstclip);
> >>>> diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c b/drivers/gpu/drm/mgag200/mgag200_mode.c
> >>>> index f16bd278ab7e..7d4f3a62d885 100644
> >>>> --- a/drivers/gpu/drm/mgag200/mgag200_mode.c
> >>>> +++ b/drivers/gpu/drm/mgag200/mgag200_mode.c
> >>>> @@ -1586,7 +1586,7 @@ mgag200_handle_damage(struct mga_device *mdev, struct drm_framebuffer *fb,
> >>>>      if (drm_WARN_ON(dev, !vmap))
> >>>>              return; /* BUG: SHMEM BO should always be vmapped */
> >>>>
> >>>> -    drm_fb_memcpy_dstclip(mdev->vram, vmap, fb, clip);
> >>>> +    drm_fb_memcpy_dstclip(mdev->vram, fb->pitches[0], vmap, fb, clip);
> >>>>
> >>>>      drm_gem_shmem_vunmap(fb->obj[0], vmap);
> >>>>
> >>>> diff --git a/drivers/gpu/drm/tiny/cirrus.c b/drivers/gpu/drm/tiny/cirrus.c
> >>>> index 744a8e337e41..2dd9e5e31e3d 100644
> >>>> --- a/drivers/gpu/drm/tiny/cirrus.c
> >>>> +++ b/drivers/gpu/drm/tiny/cirrus.c
> >>>> @@ -327,7 +327,7 @@ static int cirrus_fb_blit_rect(struct drm_framebuffer *fb,
> >>>>              goto out_dev_exit;
> >>>>
> >>>>      if (cirrus->cpp == fb->format->cpp[0])
> >>>> -            drm_fb_memcpy_dstclip(cirrus->vram,
> >>>> +            drm_fb_memcpy_dstclip(cirrus->vram, fb->pitches[0],
> >>>>                                    vmap, fb, rect);
> >>>>
> >>>>      else if (fb->format->cpp[0] == 4 && cirrus->cpp == 2)
> >>>> diff --git a/include/drm/drm_format_helper.h b/include/drm/drm_format_helper.h
> >>>> index 5f9e37032468..2b5036a5fbe7 100644
> >>>> --- a/include/drm/drm_format_helper.h
> >>>> +++ b/include/drm/drm_format_helper.h
> >>>> @@ -11,7 +11,7 @@ struct drm_rect;
> >>>>
> >>>>  void drm_fb_memcpy(void *dst, void *vaddr, struct drm_framebuffer *fb,
> >>>>                 struct drm_rect *clip);
> >>>> -void drm_fb_memcpy_dstclip(void __iomem *dst, void *vaddr,
> >>>> +void drm_fb_memcpy_dstclip(void __iomem *dst, unsigned int dst_pitch, void *vaddr,
> >>>>                         struct drm_framebuffer *fb,
> >>>>                         struct drm_rect *clip);
> >>>>  void drm_fb_swab(void *dst, void *src, struct drm_framebuffer *fb,
> >>>> --
> >>>> 2.27.0
> >>>>
> >>>
> >>
> >> --
> >> Thomas Zimmermann
> >> Graphics Driver Developer
> >> SUSE Software Solutions Germany GmbH
> >> Maxfeldstr. 5, 90409 Nürnberg, Germany
> >> (HRB 36809, AG Nürnberg)
> >> Geschäftsführer: Felix Imendörffer
> >>
> >
> >
>
> --
> Thomas Zimmermann
> Graphics Driver Developer
> SUSE Software Solutions Germany GmbH
> Maxfeldstr. 5, 90409 Nürnberg, Germany
> (HRB 36809, AG Nürnberg)
> Geschäftsführer: Felix Imendörffer
>
Thomas Zimmermann Sept. 28, 2020, 9:13 a.m. UTC | #6
Hi

Am 28.09.20 um 10:53 schrieb Daniel Vetter:
> On Mon, Sep 28, 2020 at 9:22 AM Thomas Zimmermann <tzimmermann@suse.de> wrote:
>>
>> Hi
>>
>> Am 26.09.20 um 18:42 schrieb Daniel Vetter:
>>> On Fri, Sep 25, 2020 at 4:55 PM Thomas Zimmermann <tzimmermann@suse.de> wrote:
>>>>
>>>> Hi
>>>>
>>>> Am 29.06.20 um 10:40 schrieb Daniel Vetter:
>>>>> On Thu, Jun 25, 2020 at 02:00:03PM +0200, Thomas Zimmermann wrote:
>>>>>> The memcpy's destination buffer might have a different pitch than the
>>>>>> source. Support different pitches as function argument.
>>>>>>
>>>>>> Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
>>>>>
>>>>> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
>>>>>
>>>>> But I do have questions ... why did we allocate a source drm_framebuffer
>>>>> with mismatching pitch? That sounds backwards, especially for simplekms.
>>>>
>>>> There's userspace that allocates framebuffers in tiles of 64x64 pixels.
>>>> I think I've seen this with Gnome. So if you have a 800x600 display
>>>> mode, the allocated framebuffer has a scanline pitch of 832 pixels and
>>>> the final 32 pixels are ignored.
>>>
>>> At least with dumb buffer allocation ioctls userspace should not do
>>> that. If it wants 800x600, it needs to allocate 800x600, not something
>>
>> That ship has sailed.
> 
> Not really, right now that ship is simply leaking and sinking. If we
> decide to patch this up from the kernel side, then indeed it has
> sailed. And I'm not sure that's a good idea.

We have code in at least cirrus, ast and mgag200 to support this. And
userspace has been behaving like this since at least when I got involved
(2017).

> 
>>> else. The driver is supposed to apply any rounding necessary for the
>>> size. Or is this a buffer allocated somewhere else and then shared?
>>
>> I don't quite remember where exactly this was implemented. It was not a
>> shared buffer, though. IIRC the buffer allocation code in one of the
>> libs rounded the size towards multiples of 64. I remember thinking that
>> it was probably done for tiled rendering.
> 
> Yeah, but you don't do rendering on dumb buffers. Like ever. So this
> smells like a userspace bug.

It's also part of the software rendering. It is not a bug, but
implemented deliberately in one of the userspace components that
allocates framebuffers (but I cannot remember which one.)

Best regards
Thomas

> 
> If it's for shared buffers then I think that sounds more reasonable.
> -Daniel
> 
>>
>> Best regards
>> Thomas
>>
>>> -Daniel
>>>
>>>> In regular drivers, we can handle this with the VGA offset register [1]
>>>> or some equivalent. That's obviously not an option with simplekms, so
>>>> the different pitch is required.
>>>>
>>>> Best regards
>>>> Thomas
>>>>
>>>> [1]
>>>> https://web.stanford.edu/class/cs140/projects/pintos/specs/freevga/vga/crtcreg.htm#13
>>>>
>>>>>
>>>>> Would be good to add the reasons why we need this to the commit message,
>>>>> I'm sure I'll discover it later on eventually.
>>>>> -Daniel
>>>>>
>>>>>> ---
>>>>>>  drivers/gpu/drm/drm_format_helper.c    | 9 +++++----
>>>>>>  drivers/gpu/drm/mgag200/mgag200_mode.c | 2 +-
>>>>>>  drivers/gpu/drm/tiny/cirrus.c          | 2 +-
>>>>>>  include/drm/drm_format_helper.h        | 2 +-
>>>>>>  4 files changed, 8 insertions(+), 7 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/drm_format_helper.c b/drivers/gpu/drm/drm_format_helper.c
>>>>>> index c043ca364c86..8d5a683afea7 100644
>>>>>> --- a/drivers/gpu/drm/drm_format_helper.c
>>>>>> +++ b/drivers/gpu/drm/drm_format_helper.c
>>>>>> @@ -52,6 +52,7 @@ EXPORT_SYMBOL(drm_fb_memcpy);
>>>>>>  /**
>>>>>>   * drm_fb_memcpy_dstclip - Copy clip buffer
>>>>>>   * @dst: Destination buffer (iomem)
>>>>>> + * @dst_pitch: Number of bytes between two consecutive scanlines within dst
>>>>>>   * @vaddr: Source buffer
>>>>>>   * @fb: DRM framebuffer
>>>>>>   * @clip: Clip rectangle area to copy
>>>>>> @@ -59,12 +60,12 @@ EXPORT_SYMBOL(drm_fb_memcpy);
>>>>>>   * This function applies clipping on dst, i.e. the destination is a
>>>>>>   * full (iomem) framebuffer but only the clip rect content is copied over.
>>>>>>   */
>>>>>> -void drm_fb_memcpy_dstclip(void __iomem *dst, void *vaddr,
>>>>>> -                       struct drm_framebuffer *fb,
>>>>>> +void drm_fb_memcpy_dstclip(void __iomem *dst, unsigned int dst_pitch,
>>>>>> +                       void *vaddr, struct drm_framebuffer *fb,
>>>>>>                         struct drm_rect *clip)
>>>>>>  {
>>>>>>      unsigned int cpp = fb->format->cpp[0];
>>>>>> -    unsigned int offset = clip_offset(clip, fb->pitches[0], cpp);
>>>>>> +    unsigned int offset = clip_offset(clip, dst_pitch, cpp);
>>>>>>      size_t len = (clip->x2 - clip->x1) * cpp;
>>>>>>      unsigned int y, lines = clip->y2 - clip->y1;
>>>>>>
>>>>>> @@ -73,7 +74,7 @@ void drm_fb_memcpy_dstclip(void __iomem *dst, void *vaddr,
>>>>>>      for (y = 0; y < lines; y++) {
>>>>>>              memcpy_toio(dst, vaddr, len);
>>>>>>              vaddr += fb->pitches[0];
>>>>>> -            dst += fb->pitches[0];
>>>>>> +            dst += dst_pitch;
>>>>>>      }
>>>>>>  }
>>>>>>  EXPORT_SYMBOL(drm_fb_memcpy_dstclip);
>>>>>> diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c b/drivers/gpu/drm/mgag200/mgag200_mode.c
>>>>>> index f16bd278ab7e..7d4f3a62d885 100644
>>>>>> --- a/drivers/gpu/drm/mgag200/mgag200_mode.c
>>>>>> +++ b/drivers/gpu/drm/mgag200/mgag200_mode.c
>>>>>> @@ -1586,7 +1586,7 @@ mgag200_handle_damage(struct mga_device *mdev, struct drm_framebuffer *fb,
>>>>>>      if (drm_WARN_ON(dev, !vmap))
>>>>>>              return; /* BUG: SHMEM BO should always be vmapped */
>>>>>>
>>>>>> -    drm_fb_memcpy_dstclip(mdev->vram, vmap, fb, clip);
>>>>>> +    drm_fb_memcpy_dstclip(mdev->vram, fb->pitches[0], vmap, fb, clip);
>>>>>>
>>>>>>      drm_gem_shmem_vunmap(fb->obj[0], vmap);
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/tiny/cirrus.c b/drivers/gpu/drm/tiny/cirrus.c
>>>>>> index 744a8e337e41..2dd9e5e31e3d 100644
>>>>>> --- a/drivers/gpu/drm/tiny/cirrus.c
>>>>>> +++ b/drivers/gpu/drm/tiny/cirrus.c
>>>>>> @@ -327,7 +327,7 @@ static int cirrus_fb_blit_rect(struct drm_framebuffer *fb,
>>>>>>              goto out_dev_exit;
>>>>>>
>>>>>>      if (cirrus->cpp == fb->format->cpp[0])
>>>>>> -            drm_fb_memcpy_dstclip(cirrus->vram,
>>>>>> +            drm_fb_memcpy_dstclip(cirrus->vram, fb->pitches[0],
>>>>>>                                    vmap, fb, rect);
>>>>>>
>>>>>>      else if (fb->format->cpp[0] == 4 && cirrus->cpp == 2)
>>>>>> diff --git a/include/drm/drm_format_helper.h b/include/drm/drm_format_helper.h
>>>>>> index 5f9e37032468..2b5036a5fbe7 100644
>>>>>> --- a/include/drm/drm_format_helper.h
>>>>>> +++ b/include/drm/drm_format_helper.h
>>>>>> @@ -11,7 +11,7 @@ struct drm_rect;
>>>>>>
>>>>>>  void drm_fb_memcpy(void *dst, void *vaddr, struct drm_framebuffer *fb,
>>>>>>                 struct drm_rect *clip);
>>>>>> -void drm_fb_memcpy_dstclip(void __iomem *dst, void *vaddr,
>>>>>> +void drm_fb_memcpy_dstclip(void __iomem *dst, unsigned int dst_pitch, void *vaddr,
>>>>>>                         struct drm_framebuffer *fb,
>>>>>>                         struct drm_rect *clip);
>>>>>>  void drm_fb_swab(void *dst, void *src, struct drm_framebuffer *fb,
>>>>>> --
>>>>>> 2.27.0
>>>>>>
>>>>>
>>>>
>>>> --
>>>> Thomas Zimmermann
>>>> Graphics Driver Developer
>>>> SUSE Software Solutions Germany GmbH
>>>> Maxfeldstr. 5, 90409 Nürnberg, Germany
>>>> (HRB 36809, AG Nürnberg)
>>>> Geschäftsführer: Felix Imendörffer
>>>>
>>>
>>>
>>
>> --
>> Thomas Zimmermann
>> Graphics Driver Developer
>> SUSE Software Solutions Germany GmbH
>> Maxfeldstr. 5, 90409 Nürnberg, Germany
>> (HRB 36809, AG Nürnberg)
>> Geschäftsführer: Felix Imendörffer
>>
> 
>
Gerd Hoffmann Sept. 28, 2020, 10:24 a.m. UTC | #7
Hi,

> > I don't quite remember where exactly this was implemented. It was not a
> > shared buffer, though. IIRC the buffer allocation code in one of the
> > libs rounded the size towards multiples of 64. I remember thinking that
> > it was probably done for tiled rendering.

Happens when running gnome in wayland mode, so whatever the display
server is in that case (mutter?) or one of the libraries it uses.

> Yeah, but you don't do rendering on dumb buffers. Like ever. So this
> smells like a userspace bug.
> 
> If it's for shared buffers then I think that sounds more reasonable.

Well, wayland can use dma-bufs for buffer sharing between wayland server
and wayland client.  Dunno whenever it also does that for the software
rendering case, and I have absolutely no idea how the buffer allocation
code paths look like.  But possibly it isn't known at buffer allocation
time whenever a given buffer will be touched by a gpu at some point in
the future?

take care,
  Gerd
Pekka Paalanen Sept. 28, 2020, 1:42 p.m. UTC | #8
On Mon, 28 Sep 2020 12:24:28 +0200
Gerd Hoffmann <kraxel@redhat.com> wrote:

>   Hi,
> 
> > > I don't quite remember where exactly this was implemented. It was not a
> > > shared buffer, though. IIRC the buffer allocation code in one of the
> > > libs rounded the size towards multiples of 64. I remember thinking that
> > > it was probably done for tiled rendering.  
> 
> Happens when running gnome in wayland mode, so whatever the display
> server is in that case (mutter?) or one of the libraries it uses.
> 
> > Yeah, but you don't do rendering on dumb buffers. Like ever. So this
> > smells like a userspace bug.
> > 
> > If it's for shared buffers then I think that sounds more reasonable.  
> 
> Well, wayland can use dma-bufs for buffer sharing between wayland server
> and wayland client.  Dunno whenever it also does that for the software
> rendering case, and I have absolutely no idea how the buffer allocation
> code paths look like.  But possibly it isn't known at buffer allocation
> time whenever a given buffer will be touched by a gpu at some point in
> the future?

Hi,

generally, all buffer allocation is in Mesa GBM with Mutter, even for
software rendering, as IIRC Mutter does not have a software renderer at
all, it only has (various?) GL paths.

Mutter does have code for allocating dumb buffers on "secondary GPUs"
that I touched this or last year, and I'm fairly certain that does not
do any tricks with size or stride. You also never touch that code if
you only have one DRM device in use.

Wayland apps OTOH should not have access to allocate dumb buffers in
the first place, AFAIU, unless maybe vgem or something.

Added Jonas to CC.


Thanks,
pq
Daniel Vetter Sept. 29, 2020, 9:19 a.m. UTC | #9
On Mon, Sep 28, 2020 at 11:13:06AM +0200, Thomas Zimmermann wrote:
> Hi
> 
> Am 28.09.20 um 10:53 schrieb Daniel Vetter:
> > On Mon, Sep 28, 2020 at 9:22 AM Thomas Zimmermann <tzimmermann@suse.de> wrote:
> >>
> >> Hi
> >>
> >> Am 26.09.20 um 18:42 schrieb Daniel Vetter:
> >>> On Fri, Sep 25, 2020 at 4:55 PM Thomas Zimmermann <tzimmermann@suse.de> wrote:
> >>>>
> >>>> Hi
> >>>>
> >>>> Am 29.06.20 um 10:40 schrieb Daniel Vetter:
> >>>>> On Thu, Jun 25, 2020 at 02:00:03PM +0200, Thomas Zimmermann wrote:
> >>>>>> The memcpy's destination buffer might have a different pitch than the
> >>>>>> source. Support different pitches as function argument.
> >>>>>>
> >>>>>> Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
> >>>>>
> >>>>> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> >>>>>
> >>>>> But I do have questions ... why did we allocate a source drm_framebuffer
> >>>>> with mismatching pitch? That sounds backwards, especially for simplekms.
> >>>>
> >>>> There's userspace that allocates framebuffers in tiles of 64x64 pixels.
> >>>> I think I've seen this with Gnome. So if you have a 800x600 display
> >>>> mode, the allocated framebuffer has a scanline pitch of 832 pixels and
> >>>> the final 32 pixels are ignored.
> >>>
> >>> At least with dumb buffer allocation ioctls userspace should not do
> >>> that. If it wants 800x600, it needs to allocate 800x600, not something
> >>
> >> That ship has sailed.
> > 
> > Not really, right now that ship is simply leaking and sinking. If we
> > decide to patch this up from the kernel side, then indeed it has
> > sailed. And I'm not sure that's a good idea.
> 
> We have code in at least cirrus, ast and mgag200 to support this. And
> userspace has been behaving like this since at least when I got involved
> (2017).

Hm where do these drivers copy stuff around and rematch the stride?

> >>> else. The driver is supposed to apply any rounding necessary for the
> >>> size. Or is this a buffer allocated somewhere else and then shared?
> >>
> >> I don't quite remember where exactly this was implemented. It was not a
> >> shared buffer, though. IIRC the buffer allocation code in one of the
> >> libs rounded the size towards multiples of 64. I remember thinking that
> >> it was probably done for tiled rendering.
> > 
> > Yeah, but you don't do rendering on dumb buffers. Like ever. So this
> > smells like a userspace bug.
> 
> It's also part of the software rendering. It is not a bug, but
> implemented deliberately in one of the userspace components that
> allocates framebuffers (but I cannot remember which one.)

I think it would be good to document this.

We already fake xrgb8888 everywhere because userspace is not flexible
enough, I guess having to fake 64b stride support everywhere isn't that
much worse.

But it's definitely not great either :-/
-Daniel

> 
> Best regards
> Thomas
> 
> > 
> > If it's for shared buffers then I think that sounds more reasonable.
> > -Daniel
> > 
> >>
> >> Best regards
> >> Thomas
> >>
> >>> -Daniel
> >>>
> >>>> In regular drivers, we can handle this with the VGA offset register [1]
> >>>> or some equivalent. That's obviously not an option with simplekms, so
> >>>> the different pitch is required.
> >>>>
> >>>> Best regards
> >>>> Thomas
> >>>>
> >>>> [1]
> >>>> https://web.stanford.edu/class/cs140/projects/pintos/specs/freevga/vga/crtcreg.htm#13
> >>>>
> >>>>>
> >>>>> Would be good to add the reasons why we need this to the commit message,
> >>>>> I'm sure I'll discover it later on eventually.
> >>>>> -Daniel
> >>>>>
> >>>>>> ---
> >>>>>>  drivers/gpu/drm/drm_format_helper.c    | 9 +++++----
> >>>>>>  drivers/gpu/drm/mgag200/mgag200_mode.c | 2 +-
> >>>>>>  drivers/gpu/drm/tiny/cirrus.c          | 2 +-
> >>>>>>  include/drm/drm_format_helper.h        | 2 +-
> >>>>>>  4 files changed, 8 insertions(+), 7 deletions(-)
> >>>>>>
> >>>>>> diff --git a/drivers/gpu/drm/drm_format_helper.c b/drivers/gpu/drm/drm_format_helper.c
> >>>>>> index c043ca364c86..8d5a683afea7 100644
> >>>>>> --- a/drivers/gpu/drm/drm_format_helper.c
> >>>>>> +++ b/drivers/gpu/drm/drm_format_helper.c
> >>>>>> @@ -52,6 +52,7 @@ EXPORT_SYMBOL(drm_fb_memcpy);
> >>>>>>  /**
> >>>>>>   * drm_fb_memcpy_dstclip - Copy clip buffer
> >>>>>>   * @dst: Destination buffer (iomem)
> >>>>>> + * @dst_pitch: Number of bytes between two consecutive scanlines within dst
> >>>>>>   * @vaddr: Source buffer
> >>>>>>   * @fb: DRM framebuffer
> >>>>>>   * @clip: Clip rectangle area to copy
> >>>>>> @@ -59,12 +60,12 @@ EXPORT_SYMBOL(drm_fb_memcpy);
> >>>>>>   * This function applies clipping on dst, i.e. the destination is a
> >>>>>>   * full (iomem) framebuffer but only the clip rect content is copied over.
> >>>>>>   */
> >>>>>> -void drm_fb_memcpy_dstclip(void __iomem *dst, void *vaddr,
> >>>>>> -                       struct drm_framebuffer *fb,
> >>>>>> +void drm_fb_memcpy_dstclip(void __iomem *dst, unsigned int dst_pitch,
> >>>>>> +                       void *vaddr, struct drm_framebuffer *fb,
> >>>>>>                         struct drm_rect *clip)
> >>>>>>  {
> >>>>>>      unsigned int cpp = fb->format->cpp[0];
> >>>>>> -    unsigned int offset = clip_offset(clip, fb->pitches[0], cpp);
> >>>>>> +    unsigned int offset = clip_offset(clip, dst_pitch, cpp);
> >>>>>>      size_t len = (clip->x2 - clip->x1) * cpp;
> >>>>>>      unsigned int y, lines = clip->y2 - clip->y1;
> >>>>>>
> >>>>>> @@ -73,7 +74,7 @@ void drm_fb_memcpy_dstclip(void __iomem *dst, void *vaddr,
> >>>>>>      for (y = 0; y < lines; y++) {
> >>>>>>              memcpy_toio(dst, vaddr, len);
> >>>>>>              vaddr += fb->pitches[0];
> >>>>>> -            dst += fb->pitches[0];
> >>>>>> +            dst += dst_pitch;
> >>>>>>      }
> >>>>>>  }
> >>>>>>  EXPORT_SYMBOL(drm_fb_memcpy_dstclip);
> >>>>>> diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c b/drivers/gpu/drm/mgag200/mgag200_mode.c
> >>>>>> index f16bd278ab7e..7d4f3a62d885 100644
> >>>>>> --- a/drivers/gpu/drm/mgag200/mgag200_mode.c
> >>>>>> +++ b/drivers/gpu/drm/mgag200/mgag200_mode.c
> >>>>>> @@ -1586,7 +1586,7 @@ mgag200_handle_damage(struct mga_device *mdev, struct drm_framebuffer *fb,
> >>>>>>      if (drm_WARN_ON(dev, !vmap))
> >>>>>>              return; /* BUG: SHMEM BO should always be vmapped */
> >>>>>>
> >>>>>> -    drm_fb_memcpy_dstclip(mdev->vram, vmap, fb, clip);
> >>>>>> +    drm_fb_memcpy_dstclip(mdev->vram, fb->pitches[0], vmap, fb, clip);
> >>>>>>
> >>>>>>      drm_gem_shmem_vunmap(fb->obj[0], vmap);
> >>>>>>
> >>>>>> diff --git a/drivers/gpu/drm/tiny/cirrus.c b/drivers/gpu/drm/tiny/cirrus.c
> >>>>>> index 744a8e337e41..2dd9e5e31e3d 100644
> >>>>>> --- a/drivers/gpu/drm/tiny/cirrus.c
> >>>>>> +++ b/drivers/gpu/drm/tiny/cirrus.c
> >>>>>> @@ -327,7 +327,7 @@ static int cirrus_fb_blit_rect(struct drm_framebuffer *fb,
> >>>>>>              goto out_dev_exit;
> >>>>>>
> >>>>>>      if (cirrus->cpp == fb->format->cpp[0])
> >>>>>> -            drm_fb_memcpy_dstclip(cirrus->vram,
> >>>>>> +            drm_fb_memcpy_dstclip(cirrus->vram, fb->pitches[0],
> >>>>>>                                    vmap, fb, rect);
> >>>>>>
> >>>>>>      else if (fb->format->cpp[0] == 4 && cirrus->cpp == 2)
> >>>>>> diff --git a/include/drm/drm_format_helper.h b/include/drm/drm_format_helper.h
> >>>>>> index 5f9e37032468..2b5036a5fbe7 100644
> >>>>>> --- a/include/drm/drm_format_helper.h
> >>>>>> +++ b/include/drm/drm_format_helper.h
> >>>>>> @@ -11,7 +11,7 @@ struct drm_rect;
> >>>>>>
> >>>>>>  void drm_fb_memcpy(void *dst, void *vaddr, struct drm_framebuffer *fb,
> >>>>>>                 struct drm_rect *clip);
> >>>>>> -void drm_fb_memcpy_dstclip(void __iomem *dst, void *vaddr,
> >>>>>> +void drm_fb_memcpy_dstclip(void __iomem *dst, unsigned int dst_pitch, void *vaddr,
> >>>>>>                         struct drm_framebuffer *fb,
> >>>>>>                         struct drm_rect *clip);
> >>>>>>  void drm_fb_swab(void *dst, void *src, struct drm_framebuffer *fb,
> >>>>>> --
> >>>>>> 2.27.0
> >>>>>>
> >>>>>
> >>>>
> >>>> --
> >>>> Thomas Zimmermann
> >>>> Graphics Driver Developer
> >>>> SUSE Software Solutions Germany GmbH
> >>>> Maxfeldstr. 5, 90409 Nürnberg, Germany
> >>>> (HRB 36809, AG Nürnberg)
> >>>> Geschäftsführer: Felix Imendörffer
> >>>>
> >>>
> >>>
> >>
> >> --
> >> Thomas Zimmermann
> >> Graphics Driver Developer
> >> SUSE Software Solutions Germany GmbH
> >> Maxfeldstr. 5, 90409 Nürnberg, Germany
> >> (HRB 36809, AG Nürnberg)
> >> Geschäftsführer: Felix Imendörffer
> >>
> > 
> > 
> 
> -- 
> Thomas Zimmermann
> Graphics Driver Developer
> SUSE Software Solutions Germany GmbH
> Maxfeldstr. 5, 90409 Nürnberg, Germany
> (HRB 36809, AG Nürnberg)
> Geschäftsführer: Felix Imendörffer
>
Thomas Zimmermann Sept. 29, 2020, 9:39 a.m. UTC | #10
Hi

Am 29.09.20 um 11:19 schrieb Daniel Vetter:
> On Mon, Sep 28, 2020 at 11:13:06AM +0200, Thomas Zimmermann wrote:
>> Hi
>>
>> Am 28.09.20 um 10:53 schrieb Daniel Vetter:
>>> On Mon, Sep 28, 2020 at 9:22 AM Thomas Zimmermann <tzimmermann@suse.de> wrote:
>>>>
>>>> Hi
>>>>
>>>> Am 26.09.20 um 18:42 schrieb Daniel Vetter:
>>>>> On Fri, Sep 25, 2020 at 4:55 PM Thomas Zimmermann <tzimmermann@suse.de> wrote:
>>>>>>
>>>>>> Hi
>>>>>>
>>>>>> Am 29.06.20 um 10:40 schrieb Daniel Vetter:
>>>>>>> On Thu, Jun 25, 2020 at 02:00:03PM +0200, Thomas Zimmermann wrote:
>>>>>>>> The memcpy's destination buffer might have a different pitch than the
>>>>>>>> source. Support different pitches as function argument.
>>>>>>>>
>>>>>>>> Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
>>>>>>>
>>>>>>> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
>>>>>>>
>>>>>>> But I do have questions ... why did we allocate a source drm_framebuffer
>>>>>>> with mismatching pitch? That sounds backwards, especially for simplekms.
>>>>>>
>>>>>> There's userspace that allocates framebuffers in tiles of 64x64 pixels.
>>>>>> I think I've seen this with Gnome. So if you have a 800x600 display
>>>>>> mode, the allocated framebuffer has a scanline pitch of 832 pixels and
>>>>>> the final 32 pixels are ignored.
>>>>>
>>>>> At least with dumb buffer allocation ioctls userspace should not do
>>>>> that. If it wants 800x600, it needs to allocate 800x600, not something
>>>>
>>>> That ship has sailed.
>>>
>>> Not really, right now that ship is simply leaking and sinking. If we
>>> decide to patch this up from the kernel side, then indeed it has
>>> sailed. And I'm not sure that's a good idea.
>>
>> We have code in at least cirrus, ast and mgag200 to support this. And
>> userspace has been behaving like this since at least when I got involved
>> (2017).
> 
> Hm where do these drivers copy stuff around and rematch the stride?

They don't. These drivers adopt their HW stride to match whatever
userspace framebuffers tell them. [1] And that's because userspace gives
them framebuffer sizes like 832*640. And then they do a memcpy with the
given width.

My sole point here was that userspace already relies on this behavior.

Best regards
Thomas

[1]
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/gpu/drm/tiny/cirrus.c#n285

> 
>>>>> else. The driver is supposed to apply any rounding necessary for the
>>>>> size. Or is this a buffer allocated somewhere else and then shared?
>>>>
>>>> I don't quite remember where exactly this was implemented. It was not a
>>>> shared buffer, though. IIRC the buffer allocation code in one of the
>>>> libs rounded the size towards multiples of 64. I remember thinking that
>>>> it was probably done for tiled rendering.
>>>
>>> Yeah, but you don't do rendering on dumb buffers. Like ever. So this
>>> smells like a userspace bug.
>>
>> It's also part of the software rendering. It is not a bug, but
>> implemented deliberately in one of the userspace components that
>> allocates framebuffers (but I cannot remember which one.)
> 
> I think it would be good to document this.
> 
> We already fake xrgb8888 everywhere because userspace is not flexible
> enough, I guess having to fake 64b stride support everywhere isn't that
> much worse.
> 
> But it's definitely not great either :-/
> -Daniel
> 
>>
>> Best regards
>> Thomas
>>
>>>
>>> If it's for shared buffers then I think that sounds more reasonable.
>>> -Daniel
>>>
>>>>
>>>> Best regards
>>>> Thomas
>>>>
>>>>> -Daniel
>>>>>
>>>>>> In regular drivers, we can handle this with the VGA offset register [1]
>>>>>> or some equivalent. That's obviously not an option with simplekms, so
>>>>>> the different pitch is required.
>>>>>>
>>>>>> Best regards
>>>>>> Thomas
>>>>>>
>>>>>> [1]
>>>>>> https://web.stanford.edu/class/cs140/projects/pintos/specs/freevga/vga/crtcreg.htm#13
>>>>>>
>>>>>>>
>>>>>>> Would be good to add the reasons why we need this to the commit message,
>>>>>>> I'm sure I'll discover it later on eventually.
>>>>>>> -Daniel
>>>>>>>
>>>>>>>> ---
>>>>>>>>  drivers/gpu/drm/drm_format_helper.c    | 9 +++++----
>>>>>>>>  drivers/gpu/drm/mgag200/mgag200_mode.c | 2 +-
>>>>>>>>  drivers/gpu/drm/tiny/cirrus.c          | 2 +-
>>>>>>>>  include/drm/drm_format_helper.h        | 2 +-
>>>>>>>>  4 files changed, 8 insertions(+), 7 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/drivers/gpu/drm/drm_format_helper.c b/drivers/gpu/drm/drm_format_helper.c
>>>>>>>> index c043ca364c86..8d5a683afea7 100644
>>>>>>>> --- a/drivers/gpu/drm/drm_format_helper.c
>>>>>>>> +++ b/drivers/gpu/drm/drm_format_helper.c
>>>>>>>> @@ -52,6 +52,7 @@ EXPORT_SYMBOL(drm_fb_memcpy);
>>>>>>>>  /**
>>>>>>>>   * drm_fb_memcpy_dstclip - Copy clip buffer
>>>>>>>>   * @dst: Destination buffer (iomem)
>>>>>>>> + * @dst_pitch: Number of bytes between two consecutive scanlines within dst
>>>>>>>>   * @vaddr: Source buffer
>>>>>>>>   * @fb: DRM framebuffer
>>>>>>>>   * @clip: Clip rectangle area to copy
>>>>>>>> @@ -59,12 +60,12 @@ EXPORT_SYMBOL(drm_fb_memcpy);
>>>>>>>>   * This function applies clipping on dst, i.e. the destination is a
>>>>>>>>   * full (iomem) framebuffer but only the clip rect content is copied over.
>>>>>>>>   */
>>>>>>>> -void drm_fb_memcpy_dstclip(void __iomem *dst, void *vaddr,
>>>>>>>> -                       struct drm_framebuffer *fb,
>>>>>>>> +void drm_fb_memcpy_dstclip(void __iomem *dst, unsigned int dst_pitch,
>>>>>>>> +                       void *vaddr, struct drm_framebuffer *fb,
>>>>>>>>                         struct drm_rect *clip)
>>>>>>>>  {
>>>>>>>>      unsigned int cpp = fb->format->cpp[0];
>>>>>>>> -    unsigned int offset = clip_offset(clip, fb->pitches[0], cpp);
>>>>>>>> +    unsigned int offset = clip_offset(clip, dst_pitch, cpp);
>>>>>>>>      size_t len = (clip->x2 - clip->x1) * cpp;
>>>>>>>>      unsigned int y, lines = clip->y2 - clip->y1;
>>>>>>>>
>>>>>>>> @@ -73,7 +74,7 @@ void drm_fb_memcpy_dstclip(void __iomem *dst, void *vaddr,
>>>>>>>>      for (y = 0; y < lines; y++) {
>>>>>>>>              memcpy_toio(dst, vaddr, len);
>>>>>>>>              vaddr += fb->pitches[0];
>>>>>>>> -            dst += fb->pitches[0];
>>>>>>>> +            dst += dst_pitch;
>>>>>>>>      }
>>>>>>>>  }
>>>>>>>>  EXPORT_SYMBOL(drm_fb_memcpy_dstclip);
>>>>>>>> diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c b/drivers/gpu/drm/mgag200/mgag200_mode.c
>>>>>>>> index f16bd278ab7e..7d4f3a62d885 100644
>>>>>>>> --- a/drivers/gpu/drm/mgag200/mgag200_mode.c
>>>>>>>> +++ b/drivers/gpu/drm/mgag200/mgag200_mode.c
>>>>>>>> @@ -1586,7 +1586,7 @@ mgag200_handle_damage(struct mga_device *mdev, struct drm_framebuffer *fb,
>>>>>>>>      if (drm_WARN_ON(dev, !vmap))
>>>>>>>>              return; /* BUG: SHMEM BO should always be vmapped */
>>>>>>>>
>>>>>>>> -    drm_fb_memcpy_dstclip(mdev->vram, vmap, fb, clip);
>>>>>>>> +    drm_fb_memcpy_dstclip(mdev->vram, fb->pitches[0], vmap, fb, clip);
>>>>>>>>
>>>>>>>>      drm_gem_shmem_vunmap(fb->obj[0], vmap);
>>>>>>>>
>>>>>>>> diff --git a/drivers/gpu/drm/tiny/cirrus.c b/drivers/gpu/drm/tiny/cirrus.c
>>>>>>>> index 744a8e337e41..2dd9e5e31e3d 100644
>>>>>>>> --- a/drivers/gpu/drm/tiny/cirrus.c
>>>>>>>> +++ b/drivers/gpu/drm/tiny/cirrus.c
>>>>>>>> @@ -327,7 +327,7 @@ static int cirrus_fb_blit_rect(struct drm_framebuffer *fb,
>>>>>>>>              goto out_dev_exit;
>>>>>>>>
>>>>>>>>      if (cirrus->cpp == fb->format->cpp[0])
>>>>>>>> -            drm_fb_memcpy_dstclip(cirrus->vram,
>>>>>>>> +            drm_fb_memcpy_dstclip(cirrus->vram, fb->pitches[0],
>>>>>>>>                                    vmap, fb, rect);
>>>>>>>>
>>>>>>>>      else if (fb->format->cpp[0] == 4 && cirrus->cpp == 2)
>>>>>>>> diff --git a/include/drm/drm_format_helper.h b/include/drm/drm_format_helper.h
>>>>>>>> index 5f9e37032468..2b5036a5fbe7 100644
>>>>>>>> --- a/include/drm/drm_format_helper.h
>>>>>>>> +++ b/include/drm/drm_format_helper.h
>>>>>>>> @@ -11,7 +11,7 @@ struct drm_rect;
>>>>>>>>
>>>>>>>>  void drm_fb_memcpy(void *dst, void *vaddr, struct drm_framebuffer *fb,
>>>>>>>>                 struct drm_rect *clip);
>>>>>>>> -void drm_fb_memcpy_dstclip(void __iomem *dst, void *vaddr,
>>>>>>>> +void drm_fb_memcpy_dstclip(void __iomem *dst, unsigned int dst_pitch, void *vaddr,
>>>>>>>>                         struct drm_framebuffer *fb,
>>>>>>>>                         struct drm_rect *clip);
>>>>>>>>  void drm_fb_swab(void *dst, void *src, struct drm_framebuffer *fb,
>>>>>>>> --
>>>>>>>> 2.27.0
>>>>>>>>
>>>>>>>
>>>>>>
>>>>>> --
>>>>>> Thomas Zimmermann
>>>>>> Graphics Driver Developer
>>>>>> SUSE Software Solutions Germany GmbH
>>>>>> Maxfeldstr. 5, 90409 Nürnberg, Germany
>>>>>> (HRB 36809, AG Nürnberg)
>>>>>> Geschäftsführer: Felix Imendörffer
>>>>>>
>>>>>
>>>>>
>>>>
>>>> --
>>>> Thomas Zimmermann
>>>> Graphics Driver Developer
>>>> SUSE Software Solutions Germany GmbH
>>>> Maxfeldstr. 5, 90409 Nürnberg, Germany
>>>> (HRB 36809, AG Nürnberg)
>>>> Geschäftsführer: Felix Imendörffer
>>>>
>>>
>>>
>>
>> -- 
>> Thomas Zimmermann
>> Graphics Driver Developer
>> SUSE Software Solutions Germany GmbH
>> Maxfeldstr. 5, 90409 Nürnberg, Germany
>> (HRB 36809, AG Nürnberg)
>> Geschäftsführer: Felix Imendörffer
>>
> 
> 
> 
>
Daniel Vetter Sept. 29, 2020, 11:32 a.m. UTC | #11
On Tue, Sep 29, 2020 at 11:39:21AM +0200, Thomas Zimmermann wrote:
> Hi
> 
> Am 29.09.20 um 11:19 schrieb Daniel Vetter:
> > On Mon, Sep 28, 2020 at 11:13:06AM +0200, Thomas Zimmermann wrote:
> >> Hi
> >>
> >> Am 28.09.20 um 10:53 schrieb Daniel Vetter:
> >>> On Mon, Sep 28, 2020 at 9:22 AM Thomas Zimmermann <tzimmermann@suse.de> wrote:
> >>>>
> >>>> Hi
> >>>>
> >>>> Am 26.09.20 um 18:42 schrieb Daniel Vetter:
> >>>>> On Fri, Sep 25, 2020 at 4:55 PM Thomas Zimmermann <tzimmermann@suse.de> wrote:
> >>>>>>
> >>>>>> Hi
> >>>>>>
> >>>>>> Am 29.06.20 um 10:40 schrieb Daniel Vetter:
> >>>>>>> On Thu, Jun 25, 2020 at 02:00:03PM +0200, Thomas Zimmermann wrote:
> >>>>>>>> The memcpy's destination buffer might have a different pitch than the
> >>>>>>>> source. Support different pitches as function argument.
> >>>>>>>>
> >>>>>>>> Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
> >>>>>>>
> >>>>>>> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> >>>>>>>
> >>>>>>> But I do have questions ... why did we allocate a source drm_framebuffer
> >>>>>>> with mismatching pitch? That sounds backwards, especially for simplekms.
> >>>>>>
> >>>>>> There's userspace that allocates framebuffers in tiles of 64x64 pixels.
> >>>>>> I think I've seen this with Gnome. So if you have a 800x600 display
> >>>>>> mode, the allocated framebuffer has a scanline pitch of 832 pixels and
> >>>>>> the final 32 pixels are ignored.
> >>>>>
> >>>>> At least with dumb buffer allocation ioctls userspace should not do
> >>>>> that. If it wants 800x600, it needs to allocate 800x600, not something
> >>>>
> >>>> That ship has sailed.
> >>>
> >>> Not really, right now that ship is simply leaking and sinking. If we
> >>> decide to patch this up from the kernel side, then indeed it has
> >>> sailed. And I'm not sure that's a good idea.
> >>
> >> We have code in at least cirrus, ast and mgag200 to support this. And
> >> userspace has been behaving like this since at least when I got involved
> >> (2017).
> > 
> > Hm where do these drivers copy stuff around and rematch the stride?
> 
> They don't. These drivers adopt their HW stride to match whatever
> userspace framebuffers tell them. [1] And that's because userspace gives
> them framebuffer sizes like 832*640. And then they do a memcpy with the
> given width.
> 
> My sole point here was that userspace already relies on this behavior.

Yeah but the problem is, if we support this through copying, then we rob
userspace of the change to do something better.

I guess the reason that userspace aligns to 64b is because of i915 gbm
(iirc 64b is the requirement for being able to render, or maybe it's the
requirement for scanout). So you get zero-copy buffer sharing.

But if the kernel now does copies on its own, without telling userspace,
then we might end up with a fairly crappy path. It might not matter much
for old devices like this one, but for cases where we really care about
zero-copy it does. And unlike the kernel userspace can perhaps do the
copying using a different gpu for these cases.

So that's kinda why I'm vary of rolling this out at large scale. Since
once we've done it, we cannot ever undo it since that breaks existing
userspace on existing hardware. Which means even if we fix userspace to
have a better/different fallback that uses a correctly sized buffer and
maybe even a gpu copy, that code wont run because the kernel papers over
the problem with a dog-slow re-stride cpu copy.
-Daniel

> 
> Best regards
> Thomas
> 
> [1]
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/gpu/drm/tiny/cirrus.c#n285
> 
> > 
> >>>>> else. The driver is supposed to apply any rounding necessary for the
> >>>>> size. Or is this a buffer allocated somewhere else and then shared?
> >>>>
> >>>> I don't quite remember where exactly this was implemented. It was not a
> >>>> shared buffer, though. IIRC the buffer allocation code in one of the
> >>>> libs rounded the size towards multiples of 64. I remember thinking that
> >>>> it was probably done for tiled rendering.
> >>>
> >>> Yeah, but you don't do rendering on dumb buffers. Like ever. So this
> >>> smells like a userspace bug.
> >>
> >> It's also part of the software rendering. It is not a bug, but
> >> implemented deliberately in one of the userspace components that
> >> allocates framebuffers (but I cannot remember which one.)
> > 
> > I think it would be good to document this.
> > 
> > We already fake xrgb8888 everywhere because userspace is not flexible
> > enough, I guess having to fake 64b stride support everywhere isn't that
> > much worse.
> > 
> > But it's definitely not great either :-/
> > -Daniel
> > 
> >>
> >> Best regards
> >> Thomas
> >>
> >>>
> >>> If it's for shared buffers then I think that sounds more reasonable.
> >>> -Daniel
> >>>
> >>>>
> >>>> Best regards
> >>>> Thomas
> >>>>
> >>>>> -Daniel
> >>>>>
> >>>>>> In regular drivers, we can handle this with the VGA offset register [1]
> >>>>>> or some equivalent. That's obviously not an option with simplekms, so
> >>>>>> the different pitch is required.
> >>>>>>
> >>>>>> Best regards
> >>>>>> Thomas
> >>>>>>
> >>>>>> [1]
> >>>>>> https://web.stanford.edu/class/cs140/projects/pintos/specs/freevga/vga/crtcreg.htm#13
> >>>>>>
> >>>>>>>
> >>>>>>> Would be good to add the reasons why we need this to the commit message,
> >>>>>>> I'm sure I'll discover it later on eventually.
> >>>>>>> -Daniel
> >>>>>>>
> >>>>>>>> ---
> >>>>>>>>  drivers/gpu/drm/drm_format_helper.c    | 9 +++++----
> >>>>>>>>  drivers/gpu/drm/mgag200/mgag200_mode.c | 2 +-
> >>>>>>>>  drivers/gpu/drm/tiny/cirrus.c          | 2 +-
> >>>>>>>>  include/drm/drm_format_helper.h        | 2 +-
> >>>>>>>>  4 files changed, 8 insertions(+), 7 deletions(-)
> >>>>>>>>
> >>>>>>>> diff --git a/drivers/gpu/drm/drm_format_helper.c b/drivers/gpu/drm/drm_format_helper.c
> >>>>>>>> index c043ca364c86..8d5a683afea7 100644
> >>>>>>>> --- a/drivers/gpu/drm/drm_format_helper.c
> >>>>>>>> +++ b/drivers/gpu/drm/drm_format_helper.c
> >>>>>>>> @@ -52,6 +52,7 @@ EXPORT_SYMBOL(drm_fb_memcpy);
> >>>>>>>>  /**
> >>>>>>>>   * drm_fb_memcpy_dstclip - Copy clip buffer
> >>>>>>>>   * @dst: Destination buffer (iomem)
> >>>>>>>> + * @dst_pitch: Number of bytes between two consecutive scanlines within dst
> >>>>>>>>   * @vaddr: Source buffer
> >>>>>>>>   * @fb: DRM framebuffer
> >>>>>>>>   * @clip: Clip rectangle area to copy
> >>>>>>>> @@ -59,12 +60,12 @@ EXPORT_SYMBOL(drm_fb_memcpy);
> >>>>>>>>   * This function applies clipping on dst, i.e. the destination is a
> >>>>>>>>   * full (iomem) framebuffer but only the clip rect content is copied over.
> >>>>>>>>   */
> >>>>>>>> -void drm_fb_memcpy_dstclip(void __iomem *dst, void *vaddr,
> >>>>>>>> -                       struct drm_framebuffer *fb,
> >>>>>>>> +void drm_fb_memcpy_dstclip(void __iomem *dst, unsigned int dst_pitch,
> >>>>>>>> +                       void *vaddr, struct drm_framebuffer *fb,
> >>>>>>>>                         struct drm_rect *clip)
> >>>>>>>>  {
> >>>>>>>>      unsigned int cpp = fb->format->cpp[0];
> >>>>>>>> -    unsigned int offset = clip_offset(clip, fb->pitches[0], cpp);
> >>>>>>>> +    unsigned int offset = clip_offset(clip, dst_pitch, cpp);
> >>>>>>>>      size_t len = (clip->x2 - clip->x1) * cpp;
> >>>>>>>>      unsigned int y, lines = clip->y2 - clip->y1;
> >>>>>>>>
> >>>>>>>> @@ -73,7 +74,7 @@ void drm_fb_memcpy_dstclip(void __iomem *dst, void *vaddr,
> >>>>>>>>      for (y = 0; y < lines; y++) {
> >>>>>>>>              memcpy_toio(dst, vaddr, len);
> >>>>>>>>              vaddr += fb->pitches[0];
> >>>>>>>> -            dst += fb->pitches[0];
> >>>>>>>> +            dst += dst_pitch;
> >>>>>>>>      }
> >>>>>>>>  }
> >>>>>>>>  EXPORT_SYMBOL(drm_fb_memcpy_dstclip);
> >>>>>>>> diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c b/drivers/gpu/drm/mgag200/mgag200_mode.c
> >>>>>>>> index f16bd278ab7e..7d4f3a62d885 100644
> >>>>>>>> --- a/drivers/gpu/drm/mgag200/mgag200_mode.c
> >>>>>>>> +++ b/drivers/gpu/drm/mgag200/mgag200_mode.c
> >>>>>>>> @@ -1586,7 +1586,7 @@ mgag200_handle_damage(struct mga_device *mdev, struct drm_framebuffer *fb,
> >>>>>>>>      if (drm_WARN_ON(dev, !vmap))
> >>>>>>>>              return; /* BUG: SHMEM BO should always be vmapped */
> >>>>>>>>
> >>>>>>>> -    drm_fb_memcpy_dstclip(mdev->vram, vmap, fb, clip);
> >>>>>>>> +    drm_fb_memcpy_dstclip(mdev->vram, fb->pitches[0], vmap, fb, clip);
> >>>>>>>>
> >>>>>>>>      drm_gem_shmem_vunmap(fb->obj[0], vmap);
> >>>>>>>>
> >>>>>>>> diff --git a/drivers/gpu/drm/tiny/cirrus.c b/drivers/gpu/drm/tiny/cirrus.c
> >>>>>>>> index 744a8e337e41..2dd9e5e31e3d 100644
> >>>>>>>> --- a/drivers/gpu/drm/tiny/cirrus.c
> >>>>>>>> +++ b/drivers/gpu/drm/tiny/cirrus.c
> >>>>>>>> @@ -327,7 +327,7 @@ static int cirrus_fb_blit_rect(struct drm_framebuffer *fb,
> >>>>>>>>              goto out_dev_exit;
> >>>>>>>>
> >>>>>>>>      if (cirrus->cpp == fb->format->cpp[0])
> >>>>>>>> -            drm_fb_memcpy_dstclip(cirrus->vram,
> >>>>>>>> +            drm_fb_memcpy_dstclip(cirrus->vram, fb->pitches[0],
> >>>>>>>>                                    vmap, fb, rect);
> >>>>>>>>
> >>>>>>>>      else if (fb->format->cpp[0] == 4 && cirrus->cpp == 2)
> >>>>>>>> diff --git a/include/drm/drm_format_helper.h b/include/drm/drm_format_helper.h
> >>>>>>>> index 5f9e37032468..2b5036a5fbe7 100644
> >>>>>>>> --- a/include/drm/drm_format_helper.h
> >>>>>>>> +++ b/include/drm/drm_format_helper.h
> >>>>>>>> @@ -11,7 +11,7 @@ struct drm_rect;
> >>>>>>>>
> >>>>>>>>  void drm_fb_memcpy(void *dst, void *vaddr, struct drm_framebuffer *fb,
> >>>>>>>>                 struct drm_rect *clip);
> >>>>>>>> -void drm_fb_memcpy_dstclip(void __iomem *dst, void *vaddr,
> >>>>>>>> +void drm_fb_memcpy_dstclip(void __iomem *dst, unsigned int dst_pitch, void *vaddr,
> >>>>>>>>                         struct drm_framebuffer *fb,
> >>>>>>>>                         struct drm_rect *clip);
> >>>>>>>>  void drm_fb_swab(void *dst, void *src, struct drm_framebuffer *fb,
> >>>>>>>> --
> >>>>>>>> 2.27.0
> >>>>>>>>
> >>>>>>>
> >>>>>>
> >>>>>> --
> >>>>>> Thomas Zimmermann
> >>>>>> Graphics Driver Developer
> >>>>>> SUSE Software Solutions Germany GmbH
> >>>>>> Maxfeldstr. 5, 90409 Nürnberg, Germany
> >>>>>> (HRB 36809, AG Nürnberg)
> >>>>>> Geschäftsführer: Felix Imendörffer
> >>>>>>
> >>>>>
> >>>>>
> >>>>
> >>>> --
> >>>> Thomas Zimmermann
> >>>> Graphics Driver Developer
> >>>> SUSE Software Solutions Germany GmbH
> >>>> Maxfeldstr. 5, 90409 Nürnberg, Germany
> >>>> (HRB 36809, AG Nürnberg)
> >>>> Geschäftsführer: Felix Imendörffer
> >>>>
> >>>
> >>>
> >>
> >> -- 
> >> Thomas Zimmermann
> >> Graphics Driver Developer
> >> SUSE Software Solutions Germany GmbH
> >> Maxfeldstr. 5, 90409 Nürnberg, Germany
> >> (HRB 36809, AG Nürnberg)
> >> Geschäftsführer: Felix Imendörffer
> >>
> > 
> > 
> > 
> > 
> 
> -- 
> Thomas Zimmermann
> Graphics Driver Developer
> SUSE Software Solutions Germany GmbH
> Maxfeldstr. 5, 90409 Nürnberg, Germany
> (HRB 36809, AG Nürnberg)
> Geschäftsführer: Felix Imendörffer
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/drm_format_helper.c b/drivers/gpu/drm/drm_format_helper.c
index c043ca364c86..8d5a683afea7 100644
--- a/drivers/gpu/drm/drm_format_helper.c
+++ b/drivers/gpu/drm/drm_format_helper.c
@@ -52,6 +52,7 @@  EXPORT_SYMBOL(drm_fb_memcpy);
 /**
  * drm_fb_memcpy_dstclip - Copy clip buffer
  * @dst: Destination buffer (iomem)
+ * @dst_pitch: Number of bytes between two consecutive scanlines within dst
  * @vaddr: Source buffer
  * @fb: DRM framebuffer
  * @clip: Clip rectangle area to copy
@@ -59,12 +60,12 @@  EXPORT_SYMBOL(drm_fb_memcpy);
  * This function applies clipping on dst, i.e. the destination is a
  * full (iomem) framebuffer but only the clip rect content is copied over.
  */
-void drm_fb_memcpy_dstclip(void __iomem *dst, void *vaddr,
-			   struct drm_framebuffer *fb,
+void drm_fb_memcpy_dstclip(void __iomem *dst, unsigned int dst_pitch,
+			   void *vaddr, struct drm_framebuffer *fb,
 			   struct drm_rect *clip)
 {
 	unsigned int cpp = fb->format->cpp[0];
-	unsigned int offset = clip_offset(clip, fb->pitches[0], cpp);
+	unsigned int offset = clip_offset(clip, dst_pitch, cpp);
 	size_t len = (clip->x2 - clip->x1) * cpp;
 	unsigned int y, lines = clip->y2 - clip->y1;
 
@@ -73,7 +74,7 @@  void drm_fb_memcpy_dstclip(void __iomem *dst, void *vaddr,
 	for (y = 0; y < lines; y++) {
 		memcpy_toio(dst, vaddr, len);
 		vaddr += fb->pitches[0];
-		dst += fb->pitches[0];
+		dst += dst_pitch;
 	}
 }
 EXPORT_SYMBOL(drm_fb_memcpy_dstclip);
diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c b/drivers/gpu/drm/mgag200/mgag200_mode.c
index f16bd278ab7e..7d4f3a62d885 100644
--- a/drivers/gpu/drm/mgag200/mgag200_mode.c
+++ b/drivers/gpu/drm/mgag200/mgag200_mode.c
@@ -1586,7 +1586,7 @@  mgag200_handle_damage(struct mga_device *mdev, struct drm_framebuffer *fb,
 	if (drm_WARN_ON(dev, !vmap))
 		return; /* BUG: SHMEM BO should always be vmapped */
 
-	drm_fb_memcpy_dstclip(mdev->vram, vmap, fb, clip);
+	drm_fb_memcpy_dstclip(mdev->vram, fb->pitches[0], vmap, fb, clip);
 
 	drm_gem_shmem_vunmap(fb->obj[0], vmap);
 
diff --git a/drivers/gpu/drm/tiny/cirrus.c b/drivers/gpu/drm/tiny/cirrus.c
index 744a8e337e41..2dd9e5e31e3d 100644
--- a/drivers/gpu/drm/tiny/cirrus.c
+++ b/drivers/gpu/drm/tiny/cirrus.c
@@ -327,7 +327,7 @@  static int cirrus_fb_blit_rect(struct drm_framebuffer *fb,
 		goto out_dev_exit;
 
 	if (cirrus->cpp == fb->format->cpp[0])
-		drm_fb_memcpy_dstclip(cirrus->vram,
+		drm_fb_memcpy_dstclip(cirrus->vram, fb->pitches[0],
 				      vmap, fb, rect);
 
 	else if (fb->format->cpp[0] == 4 && cirrus->cpp == 2)
diff --git a/include/drm/drm_format_helper.h b/include/drm/drm_format_helper.h
index 5f9e37032468..2b5036a5fbe7 100644
--- a/include/drm/drm_format_helper.h
+++ b/include/drm/drm_format_helper.h
@@ -11,7 +11,7 @@  struct drm_rect;
 
 void drm_fb_memcpy(void *dst, void *vaddr, struct drm_framebuffer *fb,
 		   struct drm_rect *clip);
-void drm_fb_memcpy_dstclip(void __iomem *dst, void *vaddr,
+void drm_fb_memcpy_dstclip(void __iomem *dst, unsigned int dst_pitch, void *vaddr,
 			   struct drm_framebuffer *fb,
 			   struct drm_rect *clip);
 void drm_fb_swab(void *dst, void *src, struct drm_framebuffer *fb,