diff mbox series

[RFC] drm/ssd130x: Allocate buffer in the CRTC's .atomic_check() callback

Message ID 20230830062546.720679-1-javierm@redhat.com (mailing list archive)
State Rejected
Headers show
Series [RFC] drm/ssd130x: Allocate buffer in the CRTC's .atomic_check() callback | expand

Commit Message

Javier Martinez Canillas Aug. 30, 2023, 6:25 a.m. UTC
The commit 45b58669e532 ("drm/ssd130x: Allocate buffer in the plane's
.atomic_check() callback") moved the allocation of the intermediate and
HW buffers from the encoder's .atomic_enable callback to primary plane's
.atomic_check callback.

This was suggested by Maxime Ripard because drivers aren't allowed to fail
after drm_atomic_helper_swap_state() has been called, and the encoder's
.atomic_enable happens after the new atomic state has been swapped.

But that change caused a performance regression in very slow platforms,
since now the allocation happens for every plane's atomic state commit.
For example, Geert Uytterhoeven reports that is the case on a VexRiscV
softcore (RISC-V CPU implementation on an FPGA).

To prevent that, move the move the buffers' allocation and free to the
CRTC's .atomic_check and .atomic_destroy_state callbacks, so that only
happens on a modeset. Since the intermediate buffer is only needed when
not using the controller native format (R1), doing the buffer allocation
at that CRTC's .atomic_check time would be enough.

Fixes: 45b58669e532 ("drm/ssd130x: Allocate buffer in the plane's .atomic_check() callback")
Suggested-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Javier Martinez Canillas <javierm@redhat.com>
---
Hello,

This is a RFC because I'm not sure if there is a net benefit after this
change. I find the currect code much cleaner and less error prone, even
when Geert reports that performs worse on his (very slow) platform.

But I'm still posting it to see what others think. I've tested the patch
on an I2C ssd1306 OLED and found no regressions.

The patch is on top on Geert's latest patch-set that adds support for the
DRM_FORMAT_R1 to the ssd130x driver:

https://lore.kernel.org/dri-devel/cover.1692888745.git.geert@linux-m68k.org

Best regards,
Javier

 drivers/gpu/drm/solomon/ssd130x.c | 106 ++++++++++++++++--------------
 1 file changed, 56 insertions(+), 50 deletions(-)

Comments

Thomas Zimmermann Aug. 30, 2023, 7:08 a.m. UTC | #1
Hi Javier

Am 30.08.23 um 08:25 schrieb Javier Martinez Canillas:
> The commit 45b58669e532 ("drm/ssd130x: Allocate buffer in the plane's
> .atomic_check() callback") moved the allocation of the intermediate and
> HW buffers from the encoder's .atomic_enable callback to primary plane's
> .atomic_check callback.
> 
> This was suggested by Maxime Ripard because drivers aren't allowed to fail
> after drm_atomic_helper_swap_state() has been called, and the encoder's
> .atomic_enable happens after the new atomic state has been swapped.
> 
> But that change caused a performance regression in very slow platforms,
> since now the allocation happens for every plane's atomic state commit.
> For example, Geert Uytterhoeven reports that is the case on a VexRiscV
> softcore (RISC-V CPU implementation on an FPGA).
> 
> To prevent that, move the move the buffers' allocation and free to the

Double 'move the'

And maybe buffer's rather than buffers'

> CRTC's .atomic_check and .atomic_destroy_state callbacks, so that only
> happens on a modeset. Since the intermediate buffer is only needed when
> not using the controller native format (R1), doing the buffer allocation
> at that CRTC's .atomic_check time would be enough.
> 
> Fixes: 45b58669e532 ("drm/ssd130x: Allocate buffer in the plane's .atomic_check() callback")
> Suggested-by: Geert Uytterhoeven <geert@linux-m68k.org>
> Signed-off-by: Javier Martinez Canillas <javierm@redhat.com>
> ---
> Hello,
> 
> This is a RFC because I'm not sure if there is a net benefit after this
> change. I find the currect code much cleaner and less error prone, even
> when Geert reports that performs worse on his (very slow) platform.
> 
> But I'm still posting it to see what others think. I've tested the patch
> on an I2C ssd1306 OLED and found no regressions.
> 
> The patch is on top on Geert's latest patch-set that adds support for the
> DRM_FORMAT_R1 to the ssd130x driver:
> 
> https://lore.kernel.org/dri-devel/cover.1692888745.git.geert@linux-m68k.org
> 
> Best regards,
> Javier
> 
>   drivers/gpu/drm/solomon/ssd130x.c | 106 ++++++++++++++++--------------
>   1 file changed, 56 insertions(+), 50 deletions(-)
> 
> diff --git a/drivers/gpu/drm/solomon/ssd130x.c b/drivers/gpu/drm/solomon/ssd130x.c
> index 0d2b36ba4011..60536cd0c42d 100644
> --- a/drivers/gpu/drm/solomon/ssd130x.c
> +++ b/drivers/gpu/drm/solomon/ssd130x.c
> @@ -650,46 +650,6 @@ static int ssd130x_fb_blit_rect(struct drm_plane_state *state,
>   	return ret;
>   }
>   
> -static int ssd130x_primary_plane_helper_atomic_check(struct drm_plane *plane,
> -						     struct drm_atomic_state *state)
> -{
> -	struct drm_device *drm = plane->dev;
> -	struct ssd130x_device *ssd130x = drm_to_ssd130x(drm);
> -	struct drm_plane_state *plane_state = drm_atomic_get_new_plane_state(state, plane);
> -	struct ssd130x_plane_state *ssd130x_state = to_ssd130x_plane_state(plane_state);
> -	unsigned int page_height = ssd130x->device_info->page_height;
> -	unsigned int pages = DIV_ROUND_UP(ssd130x->height, page_height);
> -	const struct drm_format_info *fi;
> -	unsigned int pitch;
> -	int ret;
> -
> -	ret = drm_plane_helper_atomic_check(plane, state);
> -	if (ret)
> -		return ret;
> -
> -	ssd130x_state->data_array = kcalloc(ssd130x->width, pages, GFP_KERNEL);
> -	if (!ssd130x_state->data_array)
> -		return -ENOMEM;
> -
> -	if (plane_state->fb->format->format != DRM_FORMAT_R1) {
> -		fi = drm_format_info(DRM_FORMAT_R1);
> -		if (!fi)
> -			return -EINVAL;
> -
> -		pitch = drm_format_info_min_pitch(fi, 0, ssd130x->width);
> -
> -		ssd130x_state->buffer = kcalloc(pitch, ssd130x->height, GFP_KERNEL);
> -		if (!ssd130x_state->buffer) {
> -			kfree(ssd130x_state->data_array);
> -			/* Set to prevent a double free in .atomic_destroy_state() */
> -			ssd130x_state->data_array = NULL;
> -			return -ENOMEM;
> -		}
> -	}
> -
> -	return 0;
> -}
> -
>   static void ssd130x_primary_plane_helper_atomic_update(struct drm_plane *plane,
>   						       struct drm_atomic_state *state)
>   {
> @@ -762,10 +722,6 @@ static struct drm_plane_state *ssd130x_primary_plane_duplicate_state(struct drm_
>   	if (!ssd130x_state)
>   		return NULL;
>   
> -	/* The buffers are not duplicated and are allocated in .atomic_check */
> -	ssd130x_state->buffer = NULL;
> -	ssd130x_state->data_array = NULL;
> -
>   	new_shadow_plane_state = &ssd130x_state->base;
>   
>   	__drm_gem_duplicate_shadow_plane_state(plane, new_shadow_plane_state);
> @@ -778,9 +734,6 @@ static void ssd130x_primary_plane_destroy_state(struct drm_plane *plane,
>   {
>   	struct ssd130x_plane_state *ssd130x_state = to_ssd130x_plane_state(state);
>   
> -	kfree(ssd130x_state->data_array);
> -	kfree(ssd130x_state->buffer);
> -
>   	__drm_gem_destroy_shadow_plane_state(&ssd130x_state->base);
>   
>   	kfree(ssd130x_state);
> @@ -788,7 +741,7 @@ static void ssd130x_primary_plane_destroy_state(struct drm_plane *plane,
>   
>   static const struct drm_plane_helper_funcs ssd130x_primary_plane_helper_funcs = {
>   	DRM_GEM_SHADOW_PLANE_HELPER_FUNCS,
> -	.atomic_check = ssd130x_primary_plane_helper_atomic_check,
> +	.atomic_check = drm_plane_helper_atomic_check,
>   	.atomic_update = ssd130x_primary_plane_helper_atomic_update,
>   	.atomic_disable = ssd130x_primary_plane_helper_atomic_disable,
>   };
> @@ -818,6 +771,59 @@ static enum drm_mode_status ssd130x_crtc_helper_mode_valid(struct drm_crtc *crtc
>   	return MODE_OK;
>   }
>   
> +int ssd130x_crtc_helper_atomic_check(struct drm_crtc *crtc, struct drm_atomic_state *state)
> +{
> +	struct drm_device *drm = crtc->dev;
> +	struct ssd130x_device *ssd130x = drm_to_ssd130x(drm);
> +	struct drm_plane *plane = crtc->primary;

Using 'primary' is deprecated. You can only refer from plane to crtc 
easily. The other direction is complicated.

> +	struct drm_plane_state *plane_state = drm_atomic_get_new_plane_state(state, plane);
> +	struct ssd130x_plane_state *ssd130x_state = to_ssd130x_plane_state(plane_state);

I'd store these pointers in the CRTC state and fetch them from within 
ssd130x_primary_plane_helper_atomic_update() during the display update. 
This guarantees that the pointer addresses are always legal.

Besides the pointers, the CRTC state can also store the primary plane 
format, which you update from the plane's atomic check. By doing so, you 
wont need to refer to the plane state from the CRTC's atomic_check. The 
plane's atomic_check runs before the CRTC's atomic_check. [1]

The struct ssd130x_plane_state can then be replaced by stardard DRM 
shadow-plane state. It also solves the problem with 'crtc->primary'.

[1] 
https://elixir.bootlin.com/linux/v6.5/source/drivers/gpu/drm/drm_atomic_helper.c#L974

> +	unsigned int page_height = ssd130x->device_info->page_height;
> +	unsigned int pages = DIV_ROUND_UP(ssd130x->height, page_height);
> +	const struct drm_format_info *fi;
> +	unsigned int pitch;
> +	int ret;
> +
> +	ret = drm_crtc_helper_atomic_check(crtc, state);
> +	if (ret)
> +		return ret;
> +
> +	ssd130x_state->data_array = kcalloc(ssd130x->width, pages, GFP_KERNEL);

Do you really need kcalloc here? kmalloc seems good enough.

> +	if (!ssd130x_state->data_array)
> +		return -ENOMEM;
> +
> +	if (plane_state->fb->format->format != DRM_FORMAT_R1) {
> +		fi = drm_format_info(DRM_FORMAT_R1);
> +		if (!fi)
> +			return -EINVAL;
> +
> +		pitch = drm_format_info_min_pitch(fi, 0, ssd130x->width);
> +
> +		ssd130x_state->buffer = kcalloc(pitch, ssd130x->height, GFP_KERNEL);

Same question about kcalloc.

> +		if (!ssd130x_state->buffer) {
> +			kfree(ssd130x_state->data_array);
> +			/* Set to prevent a double free in .atomic_destroy_state() */
> +			ssd130x_state->data_array = NULL;
> +			return -ENOMEM;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +static void ssd130x_crtc_destroy_state(struct drm_crtc *crtc,
> +				       struct drm_crtc_state *state)
> +{
> +	struct drm_plane *plane = crtc->primary;
> +	struct drm_plane_state *plane_state = drm_atomic_get_new_plane_state(state->state, plane);
> +	struct ssd130x_plane_state *ssd130x_state = to_ssd130x_plane_state(plane_state);
> +
> +	drm_atomic_helper_crtc_destroy_state(crtc, state);
> +
> +	kfree(ssd130x_state->data_array);
> +	kfree(ssd130x_state->buffer);

This cross references among state-helpers has the potential to blow up. 
It's not really clear if there even is a plane state here.

Also see my comment on the allocation of these buffers., which would 
solve this problem as well.

Best regards
Thomas

> +}
> +
>   /*
>    * The CRTC is always enabled. Screen updates are performed by
>    * the primary plane's atomic_update function. Disabling clears
> @@ -825,7 +831,7 @@ static enum drm_mode_status ssd130x_crtc_helper_mode_valid(struct drm_crtc *crtc
>    */
>   static const struct drm_crtc_helper_funcs ssd130x_crtc_helper_funcs = {
>   	.mode_valid = ssd130x_crtc_helper_mode_valid,
> -	.atomic_check = drm_crtc_helper_atomic_check,
> +	.atomic_check = ssd130x_crtc_helper_atomic_check,
>   };
>   
>   static const struct drm_crtc_funcs ssd130x_crtc_funcs = {
> @@ -834,7 +840,7 @@ static const struct drm_crtc_funcs ssd130x_crtc_funcs = {
>   	.set_config = drm_atomic_helper_set_config,
>   	.page_flip = drm_atomic_helper_page_flip,
>   	.atomic_duplicate_state = drm_atomic_helper_crtc_duplicate_state,
> -	.atomic_destroy_state = drm_atomic_helper_crtc_destroy_state,
> +	.atomic_destroy_state = ssd130x_crtc_destroy_state,
>   };
>   
>   static void ssd130x_encoder_helper_atomic_enable(struct drm_encoder *encoder,
Geert Uytterhoeven Aug. 30, 2023, 7:40 a.m. UTC | #2
Hi Thomas,

On Wed, Aug 30, 2023 at 9:08 AM Thomas Zimmermann <tzimmermann@suse.de> wrote:
> Am 30.08.23 um 08:25 schrieb Javier Martinez Canillas:
> > The commit 45b58669e532 ("drm/ssd130x: Allocate buffer in the plane's
> > .atomic_check() callback") moved the allocation of the intermediate and
> > HW buffers from the encoder's .atomic_enable callback to primary plane's
> > .atomic_check callback.
> >
> > This was suggested by Maxime Ripard because drivers aren't allowed to fail
> > after drm_atomic_helper_swap_state() has been called, and the encoder's
> > .atomic_enable happens after the new atomic state has been swapped.
> >
> > But that change caused a performance regression in very slow platforms,
> > since now the allocation happens for every plane's atomic state commit.
> > For example, Geert Uytterhoeven reports that is the case on a VexRiscV
> > softcore (RISC-V CPU implementation on an FPGA).
> >
> > To prevent that, move the move the buffers' allocation and free to the
>
> Double 'move the'
>
> And maybe buffer's rather than buffers'
>
> > CRTC's .atomic_check and .atomic_destroy_state callbacks, so that only
> > happens on a modeset. Since the intermediate buffer is only needed when
> > not using the controller native format (R1), doing the buffer allocation
> > at that CRTC's .atomic_check time would be enough.
> >
> > Fixes: 45b58669e532 ("drm/ssd130x: Allocate buffer in the plane's .atomic_check() callback")
> > Suggested-by: Geert Uytterhoeven <geert@linux-m68k.org>
> > Signed-off-by: Javier Martinez Canillas <javierm@redhat.com>

Javier: thanks for your patch!

> Besides the pointers, the CRTC state can also store the primary plane
> format, which you update from the plane's atomic check. By doing so, you
> wont need to refer to the plane state from the CRTC's atomic_check. The
> plane's atomic_check runs before the CRTC's atomic_check. [1]

I haven't tested Javier's patch yet, but does this mean that his patch
won't help?

The problem I saw was that these buffers were allocated and freed
over and over again on each flash of the cursor of the text console
on top of the emulated frame buffer device.

Gr{oetje,eeting}s,

                        Geert
Thomas Zimmermann Aug. 30, 2023, 7:45 a.m. UTC | #3
Hi Geert

Am 30.08.23 um 09:40 schrieb Geert Uytterhoeven:
> Hi Thomas,
> 
> On Wed, Aug 30, 2023 at 9:08 AM Thomas Zimmermann <tzimmermann@suse.de> wrote:
>> Am 30.08.23 um 08:25 schrieb Javier Martinez Canillas:
>>> The commit 45b58669e532 ("drm/ssd130x: Allocate buffer in the plane's
>>> .atomic_check() callback") moved the allocation of the intermediate and
>>> HW buffers from the encoder's .atomic_enable callback to primary plane's
>>> .atomic_check callback.
>>>
>>> This was suggested by Maxime Ripard because drivers aren't allowed to fail
>>> after drm_atomic_helper_swap_state() has been called, and the encoder's
>>> .atomic_enable happens after the new atomic state has been swapped.
>>>
>>> But that change caused a performance regression in very slow platforms,
>>> since now the allocation happens for every plane's atomic state commit.
>>> For example, Geert Uytterhoeven reports that is the case on a VexRiscV
>>> softcore (RISC-V CPU implementation on an FPGA).
>>>
>>> To prevent that, move the move the buffers' allocation and free to the
>>
>> Double 'move the'
>>

>> And maybe buffer's rather than buffers'

Scratch that remark.

>>
>>> CRTC's .atomic_check and .atomic_destroy_state callbacks, so that only
>>> happens on a modeset. Since the intermediate buffer is only needed when
>>> not using the controller native format (R1), doing the buffer allocation
>>> at that CRTC's .atomic_check time would be enough.
>>>
>>> Fixes: 45b58669e532 ("drm/ssd130x: Allocate buffer in the plane's .atomic_check() callback")
>>> Suggested-by: Geert Uytterhoeven <geert@linux-m68k.org>
>>> Signed-off-by: Javier Martinez Canillas <javierm@redhat.com>
> 
> Javier: thanks for your patch!
> 
>> Besides the pointers, the CRTC state can also store the primary plane
>> format, which you update from the plane's atomic check. By doing so, you
>> wont need to refer to the plane state from the CRTC's atomic_check. The
>> plane's atomic_check runs before the CRTC's atomic_check. [1]
> 
> I haven't tested Javier's patch yet, but does this mean that his patch
> won't help?
> 
> The problem I saw was that these buffers were allocated and freed
> over and over again on each flash of the cursor of the text console
> on top of the emulated frame buffer device.

Javier's current patch should resolve this problem. The temporary 
buffers are now only allocated on display-mode/format changes, but not 
on each single screen update. My review concerns only the implementation.

Best regards
Thomas

> 
> Gr{oetje,eeting}s,
> 
>                          Geert
>
Thomas Zimmermann Sept. 1, 2023, 6:53 a.m. UTC | #4
Hi Javier,

another idea about this patch: why not just keep the allocation in the 
plane's atomic check, but store the temporary buffers in a plane struct. 
You'd only grow the arrays length in atomic_check and later fetch the 
pointers in atomic_update. It needs some locking, but nothing complicated.

Best regards
Thomas

Am 30.08.23 um 08:25 schrieb Javier Martinez Canillas:
> The commit 45b58669e532 ("drm/ssd130x: Allocate buffer in the plane's
> .atomic_check() callback") moved the allocation of the intermediate and
> HW buffers from the encoder's .atomic_enable callback to primary plane's
> .atomic_check callback.
> 
> This was suggested by Maxime Ripard because drivers aren't allowed to fail
> after drm_atomic_helper_swap_state() has been called, and the encoder's
> .atomic_enable happens after the new atomic state has been swapped.
> 
> But that change caused a performance regression in very slow platforms,
> since now the allocation happens for every plane's atomic state commit.
> For example, Geert Uytterhoeven reports that is the case on a VexRiscV
> softcore (RISC-V CPU implementation on an FPGA).
> 
> To prevent that, move the move the buffers' allocation and free to the
> CRTC's .atomic_check and .atomic_destroy_state callbacks, so that only
> happens on a modeset. Since the intermediate buffer is only needed when
> not using the controller native format (R1), doing the buffer allocation
> at that CRTC's .atomic_check time would be enough.
> 
> Fixes: 45b58669e532 ("drm/ssd130x: Allocate buffer in the plane's .atomic_check() callback")
> Suggested-by: Geert Uytterhoeven <geert@linux-m68k.org>
> Signed-off-by: Javier Martinez Canillas <javierm@redhat.com>
> ---
> Hello,
> 
> This is a RFC because I'm not sure if there is a net benefit after this
> change. I find the currect code much cleaner and less error prone, even
> when Geert reports that performs worse on his (very slow) platform.
> 
> But I'm still posting it to see what others think. I've tested the patch
> on an I2C ssd1306 OLED and found no regressions.
> 
> The patch is on top on Geert's latest patch-set that adds support for the
> DRM_FORMAT_R1 to the ssd130x driver:
> 
> https://lore.kernel.org/dri-devel/cover.1692888745.git.geert@linux-m68k.org
> 
> Best regards,
> Javier
> 
>   drivers/gpu/drm/solomon/ssd130x.c | 106 ++++++++++++++++--------------
>   1 file changed, 56 insertions(+), 50 deletions(-)
> 
> diff --git a/drivers/gpu/drm/solomon/ssd130x.c b/drivers/gpu/drm/solomon/ssd130x.c
> index 0d2b36ba4011..60536cd0c42d 100644
> --- a/drivers/gpu/drm/solomon/ssd130x.c
> +++ b/drivers/gpu/drm/solomon/ssd130x.c
> @@ -650,46 +650,6 @@ static int ssd130x_fb_blit_rect(struct drm_plane_state *state,
>   	return ret;
>   }
>   
> -static int ssd130x_primary_plane_helper_atomic_check(struct drm_plane *plane,
> -						     struct drm_atomic_state *state)
> -{
> -	struct drm_device *drm = plane->dev;
> -	struct ssd130x_device *ssd130x = drm_to_ssd130x(drm);
> -	struct drm_plane_state *plane_state = drm_atomic_get_new_plane_state(state, plane);
> -	struct ssd130x_plane_state *ssd130x_state = to_ssd130x_plane_state(plane_state);
> -	unsigned int page_height = ssd130x->device_info->page_height;
> -	unsigned int pages = DIV_ROUND_UP(ssd130x->height, page_height);
> -	const struct drm_format_info *fi;
> -	unsigned int pitch;
> -	int ret;
> -
> -	ret = drm_plane_helper_atomic_check(plane, state);
> -	if (ret)
> -		return ret;
> -
> -	ssd130x_state->data_array = kcalloc(ssd130x->width, pages, GFP_KERNEL);
> -	if (!ssd130x_state->data_array)
> -		return -ENOMEM;
> -
> -	if (plane_state->fb->format->format != DRM_FORMAT_R1) {
> -		fi = drm_format_info(DRM_FORMAT_R1);
> -		if (!fi)
> -			return -EINVAL;
> -
> -		pitch = drm_format_info_min_pitch(fi, 0, ssd130x->width);
> -
> -		ssd130x_state->buffer = kcalloc(pitch, ssd130x->height, GFP_KERNEL);
> -		if (!ssd130x_state->buffer) {
> -			kfree(ssd130x_state->data_array);
> -			/* Set to prevent a double free in .atomic_destroy_state() */
> -			ssd130x_state->data_array = NULL;
> -			return -ENOMEM;
> -		}
> -	}
> -
> -	return 0;
> -}
> -
>   static void ssd130x_primary_plane_helper_atomic_update(struct drm_plane *plane,
>   						       struct drm_atomic_state *state)
>   {
> @@ -762,10 +722,6 @@ static struct drm_plane_state *ssd130x_primary_plane_duplicate_state(struct drm_
>   	if (!ssd130x_state)
>   		return NULL;
>   
> -	/* The buffers are not duplicated and are allocated in .atomic_check */
> -	ssd130x_state->buffer = NULL;
> -	ssd130x_state->data_array = NULL;
> -
>   	new_shadow_plane_state = &ssd130x_state->base;
>   
>   	__drm_gem_duplicate_shadow_plane_state(plane, new_shadow_plane_state);
> @@ -778,9 +734,6 @@ static void ssd130x_primary_plane_destroy_state(struct drm_plane *plane,
>   {
>   	struct ssd130x_plane_state *ssd130x_state = to_ssd130x_plane_state(state);
>   
> -	kfree(ssd130x_state->data_array);
> -	kfree(ssd130x_state->buffer);
> -
>   	__drm_gem_destroy_shadow_plane_state(&ssd130x_state->base);
>   
>   	kfree(ssd130x_state);
> @@ -788,7 +741,7 @@ static void ssd130x_primary_plane_destroy_state(struct drm_plane *plane,
>   
>   static const struct drm_plane_helper_funcs ssd130x_primary_plane_helper_funcs = {
>   	DRM_GEM_SHADOW_PLANE_HELPER_FUNCS,
> -	.atomic_check = ssd130x_primary_plane_helper_atomic_check,
> +	.atomic_check = drm_plane_helper_atomic_check,
>   	.atomic_update = ssd130x_primary_plane_helper_atomic_update,
>   	.atomic_disable = ssd130x_primary_plane_helper_atomic_disable,
>   };
> @@ -818,6 +771,59 @@ static enum drm_mode_status ssd130x_crtc_helper_mode_valid(struct drm_crtc *crtc
>   	return MODE_OK;
>   }
>   
> +int ssd130x_crtc_helper_atomic_check(struct drm_crtc *crtc, struct drm_atomic_state *state)
> +{
> +	struct drm_device *drm = crtc->dev;
> +	struct ssd130x_device *ssd130x = drm_to_ssd130x(drm);
> +	struct drm_plane *plane = crtc->primary;
> +	struct drm_plane_state *plane_state = drm_atomic_get_new_plane_state(state, plane);
> +	struct ssd130x_plane_state *ssd130x_state = to_ssd130x_plane_state(plane_state);
> +	unsigned int page_height = ssd130x->device_info->page_height;
> +	unsigned int pages = DIV_ROUND_UP(ssd130x->height, page_height);
> +	const struct drm_format_info *fi;
> +	unsigned int pitch;
> +	int ret;
> +
> +	ret = drm_crtc_helper_atomic_check(crtc, state);
> +	if (ret)
> +		return ret;
> +
> +	ssd130x_state->data_array = kcalloc(ssd130x->width, pages, GFP_KERNEL);
> +	if (!ssd130x_state->data_array)
> +		return -ENOMEM;
> +
> +	if (plane_state->fb->format->format != DRM_FORMAT_R1) {
> +		fi = drm_format_info(DRM_FORMAT_R1);
> +		if (!fi)
> +			return -EINVAL;
> +
> +		pitch = drm_format_info_min_pitch(fi, 0, ssd130x->width);
> +
> +		ssd130x_state->buffer = kcalloc(pitch, ssd130x->height, GFP_KERNEL);
> +		if (!ssd130x_state->buffer) {
> +			kfree(ssd130x_state->data_array);
> +			/* Set to prevent a double free in .atomic_destroy_state() */
> +			ssd130x_state->data_array = NULL;
> +			return -ENOMEM;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +static void ssd130x_crtc_destroy_state(struct drm_crtc *crtc,
> +				       struct drm_crtc_state *state)
> +{
> +	struct drm_plane *plane = crtc->primary;
> +	struct drm_plane_state *plane_state = drm_atomic_get_new_plane_state(state->state, plane);
> +	struct ssd130x_plane_state *ssd130x_state = to_ssd130x_plane_state(plane_state);
> +
> +	drm_atomic_helper_crtc_destroy_state(crtc, state);
> +
> +	kfree(ssd130x_state->data_array);
> +	kfree(ssd130x_state->buffer);
> +}
> +
>   /*
>    * The CRTC is always enabled. Screen updates are performed by
>    * the primary plane's atomic_update function. Disabling clears
> @@ -825,7 +831,7 @@ static enum drm_mode_status ssd130x_crtc_helper_mode_valid(struct drm_crtc *crtc
>    */
>   static const struct drm_crtc_helper_funcs ssd130x_crtc_helper_funcs = {
>   	.mode_valid = ssd130x_crtc_helper_mode_valid,
> -	.atomic_check = drm_crtc_helper_atomic_check,
> +	.atomic_check = ssd130x_crtc_helper_atomic_check,
>   };
>   
>   static const struct drm_crtc_funcs ssd130x_crtc_funcs = {
> @@ -834,7 +840,7 @@ static const struct drm_crtc_funcs ssd130x_crtc_funcs = {
>   	.set_config = drm_atomic_helper_set_config,
>   	.page_flip = drm_atomic_helper_page_flip,
>   	.atomic_duplicate_state = drm_atomic_helper_crtc_duplicate_state,
> -	.atomic_destroy_state = drm_atomic_helper_crtc_destroy_state,
> +	.atomic_destroy_state = ssd130x_crtc_destroy_state,
>   };
>   
>   static void ssd130x_encoder_helper_atomic_enable(struct drm_encoder *encoder,
Javier Martinez Canillas Sept. 1, 2023, 7:48 a.m. UTC | #5
Thomas Zimmermann <tzimmermann@suse.de> writes:

> Hi Javier,
>
> another idea about this patch: why not just keep the allocation in the 
> plane's atomic check, but store the temporary buffers in a plane struct. 
> You'd only grow the arrays length in atomic_check and later fetch the 
> pointers in atomic_update. It needs some locking, but nothing complicated.
>

Yes, that would work too. Another option is to just move the buffers to
struct ssd130x_device as it was before commit 45b58669e532 ("drm/ssd130x:
Allocate buffer in the plane's .atomic_check() callback") but just make
them fixed arrays with the size of the biggest format.

That will be some memory wasted but will prevent the problem of trying to
allocate buffers after drm_atomic_helper_swap_state() has been called.

> Best regards
> Thomas
>
> Am 30.08.23 um 08:25 schrieb Javier Martinez Canillas:
Maxime Ripard Sept. 1, 2023, 8:22 a.m. UTC | #6
Hi,

On Wed, Aug 30, 2023 at 08:25:08AM +0200, Javier Martinez Canillas wrote:
> The commit 45b58669e532 ("drm/ssd130x: Allocate buffer in the plane's
> .atomic_check() callback") moved the allocation of the intermediate and
> HW buffers from the encoder's .atomic_enable callback to primary plane's
> .atomic_check callback.
> 
> This was suggested by Maxime Ripard because drivers aren't allowed to fail
> after drm_atomic_helper_swap_state() has been called, and the encoder's
> .atomic_enable happens after the new atomic state has been swapped.
> 
> But that change caused a performance regression in very slow platforms,
> since now the allocation happens for every plane's atomic state commit.
> For example, Geert Uytterhoeven reports that is the case on a VexRiscV
> softcore (RISC-V CPU implementation on an FPGA).

I'd like to have numbers on that. It's a bit surprising to me that,
given how many objects we already allocate during a commit, two small
additional allocations affect performances so dramatically, even on a
slow platform.

Maxime
Maxime Ripard Sept. 1, 2023, 8:25 a.m. UTC | #7
On Fri, Sep 01, 2023 at 09:48:09AM +0200, Javier Martinez Canillas wrote:
> Thomas Zimmermann <tzimmermann@suse.de> writes:
> 
> > Hi Javier,
> >
> > another idea about this patch: why not just keep the allocation in the 
> > plane's atomic check, but store the temporary buffers in a plane struct. 
> > You'd only grow the arrays length in atomic_check and later fetch the 
> > pointers in atomic_update. It needs some locking, but nothing complicated.
> >
> 
> Yes, that would work too. Another option is to just move the buffers to
> struct ssd130x_device as it was before commit 45b58669e532 ("drm/ssd130x:
> Allocate buffer in the plane's .atomic_check() callback") but just make
> them fixed arrays with the size of the biggest format.
> 
> That will be some memory wasted but will prevent the problem of trying to
> allocate buffers after drm_atomic_helper_swap_state() has been called.

If we want to go that road, we don't even need an extra allocation, it
can be part of the state or object structure itself.

Maxime
Geert Uytterhoeven Sept. 1, 2023, 8:36 a.m. UTC | #8
Hi Maxime,

On Fri, Sep 1, 2023 at 10:22 AM Maxime Ripard <mripard@kernel.org> wrote:
> On Wed, Aug 30, 2023 at 08:25:08AM +0200, Javier Martinez Canillas wrote:
> > The commit 45b58669e532 ("drm/ssd130x: Allocate buffer in the plane's
> > .atomic_check() callback") moved the allocation of the intermediate and
> > HW buffers from the encoder's .atomic_enable callback to primary plane's
> > .atomic_check callback.
> >
> > This was suggested by Maxime Ripard because drivers aren't allowed to fail
> > after drm_atomic_helper_swap_state() has been called, and the encoder's
> > .atomic_enable happens after the new atomic state has been swapped.
> >
> > But that change caused a performance regression in very slow platforms,
> > since now the allocation happens for every plane's atomic state commit.
> > For example, Geert Uytterhoeven reports that is the case on a VexRiscV
> > softcore (RISC-V CPU implementation on an FPGA).
>
> I'd like to have numbers on that. It's a bit surprising to me that,
> given how many objects we already allocate during a commit, two small
> additional allocations affect performances so dramatically, even on a
> slow platform.

To be fair, I didn't benchmark that.  Perhaps it's just too slow due to
all these other allocations (and whatever else happens).

I just find it extremely silly to allocate a buffer over and over again,
while we know that buffer is needed for each and every display update.

Gr{oetje,eeting}s,

                        Geert
Javier Martinez Canillas Sept. 1, 2023, 9:19 a.m. UTC | #9
Maxime Ripard <mripard@kernel.org> writes:

> On Fri, Sep 01, 2023 at 09:48:09AM +0200, Javier Martinez Canillas wrote:
>> Thomas Zimmermann <tzimmermann@suse.de> writes:
>> 
>> > Hi Javier,
>> >
>> > another idea about this patch: why not just keep the allocation in the 
>> > plane's atomic check, but store the temporary buffers in a plane struct. 
>> > You'd only grow the arrays length in atomic_check and later fetch the 
>> > pointers in atomic_update. It needs some locking, but nothing complicated.
>> >
>> 
>> Yes, that would work too. Another option is to just move the buffers to
>> struct ssd130x_device as it was before commit 45b58669e532 ("drm/ssd130x:
>> Allocate buffer in the plane's .atomic_check() callback") but just make
>> them fixed arrays with the size of the biggest format.
>> 
>> That will be some memory wasted but will prevent the problem of trying to
>> allocate buffers after drm_atomic_helper_swap_state() has been called.
>
> If we want to go that road, we don't even need an extra allocation, it
> can be part of the state or object structure itself.
>

Yes, I meant to have it as fixed-length arrays. But still the best option
seems to be to allocate them but in the CRTC's .atomic_check() and store
them in a CRTC private state as Thomas suggested.

Geert will post a v2 of his R1 support patches, I'll wait for those and
after that try to implement Thomas' suggestion.
Javier Martinez Canillas Sept. 1, 2023, 9:23 a.m. UTC | #10
Geert Uytterhoeven <geert@linux-m68k.org> writes:

> Hi Maxime,
>
> On Fri, Sep 1, 2023 at 10:22 AM Maxime Ripard <mripard@kernel.org> wrote:
>> On Wed, Aug 30, 2023 at 08:25:08AM +0200, Javier Martinez Canillas wrote:
>> > The commit 45b58669e532 ("drm/ssd130x: Allocate buffer in the plane's
>> > .atomic_check() callback") moved the allocation of the intermediate and
>> > HW buffers from the encoder's .atomic_enable callback to primary plane's
>> > .atomic_check callback.
>> >
>> > This was suggested by Maxime Ripard because drivers aren't allowed to fail
>> > after drm_atomic_helper_swap_state() has been called, and the encoder's
>> > .atomic_enable happens after the new atomic state has been swapped.
>> >
>> > But that change caused a performance regression in very slow platforms,
>> > since now the allocation happens for every plane's atomic state commit.
>> > For example, Geert Uytterhoeven reports that is the case on a VexRiscV
>> > softcore (RISC-V CPU implementation on an FPGA).
>>
>> I'd like to have numbers on that. It's a bit surprising to me that,
>> given how many objects we already allocate during a commit, two small
>> additional allocations affect performances so dramatically, even on a
>> slow platform.
>
> To be fair, I didn't benchmark that.  Perhaps it's just too slow due to
> all these other allocations (and whatever else happens).
>
> I just find it extremely silly to allocate a buffer over and over again,
> while we know that buffer is needed for each and every display update.
>

Is not efficient that's true, but on the other hand we have other objects
that are destroyed and created on each atomic update.

In the ssd1307fb driver, the buffer is allocated on ssd1307fb_update_rect()
(by calling the ssd1307fb_alloc_array() function), so it also happens for
every display update in the fbdev driver.

> Gr{oetje,eeting}s,
>
Thomas Zimmermann Sept. 1, 2023, 10:59 a.m. UTC | #11
Hi

Am 01.09.23 um 09:48 schrieb Javier Martinez Canillas:
> Thomas Zimmermann <tzimmermann@suse.de> writes:
> 
>> Hi Javier,
>>
>> another idea about this patch: why not just keep the allocation in the
>> plane's atomic check, but store the temporary buffers in a plane struct.
>> You'd only grow the arrays length in atomic_check and later fetch the
>> pointers in atomic_update. It needs some locking, but nothing complicated.
>>
> 
> Yes, that would work too. Another option is to just move the buffers to
> struct ssd130x_device as it was before commit 45b58669e532 ("drm/ssd130x:

Adding something like a struct ssd130x_plane that holds the temporary 
memory has the advantage of making a clear connection between the memory 
and the plane. If nothing else, to the next programmer reading the code.

> Allocate buffer in the plane's .atomic_check() callback") but just make
> them fixed arrays with the size of the biggest format.

What is the size of the biggest format? I haven't read the driver code, 
but a shadow plane can be up to 4096 pixels wide. It's 16 KiB for 
XRGB888. Not too much, but not nothing either.

To reduce allocation and/or locking overhead, you could try to update 
the pointers in the plane struct with RCU semantics. Plane updates would 
use whatever pointer they saw, while the plane's atomic_check could grow 
the memory buffers as necessary.

Best regards
Thomas

> 
> That will be some memory wasted but will prevent the problem of trying to
> allocate buffers after drm_atomic_helper_swap_state() has been called.
> 
>> Best regards
>> Thomas
>>
>> Am 30.08.23 um 08:25 schrieb Javier Martinez Canillas:
>
Maxime Ripard Sept. 1, 2023, noon UTC | #12
On Fri, Sep 01, 2023 at 10:36:17AM +0200, Geert Uytterhoeven wrote:
> Hi Maxime,
> 
> On Fri, Sep 1, 2023 at 10:22 AM Maxime Ripard <mripard@kernel.org> wrote:
> > On Wed, Aug 30, 2023 at 08:25:08AM +0200, Javier Martinez Canillas wrote:
> > > The commit 45b58669e532 ("drm/ssd130x: Allocate buffer in the plane's
> > > .atomic_check() callback") moved the allocation of the intermediate and
> > > HW buffers from the encoder's .atomic_enable callback to primary plane's
> > > .atomic_check callback.
> > >
> > > This was suggested by Maxime Ripard because drivers aren't allowed to fail
> > > after drm_atomic_helper_swap_state() has been called, and the encoder's
> > > .atomic_enable happens after the new atomic state has been swapped.
> > >
> > > But that change caused a performance regression in very slow platforms,
> > > since now the allocation happens for every plane's atomic state commit.
> > > For example, Geert Uytterhoeven reports that is the case on a VexRiscV
> > > softcore (RISC-V CPU implementation on an FPGA).
> >
> > I'd like to have numbers on that. It's a bit surprising to me that,
> > given how many objects we already allocate during a commit, two small
> > additional allocations affect performances so dramatically, even on a
> > slow platform.
> 
> To be fair, I didn't benchmark that.  Perhaps it's just too slow due to
> all these other allocations (and whatever else happens).
> 
> I just find it extremely silly to allocate a buffer over and over again,
> while we know that buffer is needed for each and every display update.

Maybe it's silly, but I guess it depends on what you want to optimize
for. You won't know the size of that buffer before you're in
atomic_check. So it's a different trade-off than you would like, but I
wouldn't call it extremely silly.

Maxime
Geert Uytterhoeven Sept. 1, 2023, 12:08 p.m. UTC | #13
Hi Maxime,

On Fri, Sep 1, 2023 at 2:00 PM Maxime Ripard <mripard@kernel.org> wrote:
> On Fri, Sep 01, 2023 at 10:36:17AM +0200, Geert Uytterhoeven wrote:
> > On Fri, Sep 1, 2023 at 10:22 AM Maxime Ripard <mripard@kernel.org> wrote:
> > > On Wed, Aug 30, 2023 at 08:25:08AM +0200, Javier Martinez Canillas wrote:
> > > > The commit 45b58669e532 ("drm/ssd130x: Allocate buffer in the plane's
> > > > .atomic_check() callback") moved the allocation of the intermediate and
> > > > HW buffers from the encoder's .atomic_enable callback to primary plane's
> > > > .atomic_check callback.
> > > >
> > > > This was suggested by Maxime Ripard because drivers aren't allowed to fail
> > > > after drm_atomic_helper_swap_state() has been called, and the encoder's
> > > > .atomic_enable happens after the new atomic state has been swapped.
> > > >
> > > > But that change caused a performance regression in very slow platforms,
> > > > since now the allocation happens for every plane's atomic state commit.
> > > > For example, Geert Uytterhoeven reports that is the case on a VexRiscV
> > > > softcore (RISC-V CPU implementation on an FPGA).
> > >
> > > I'd like to have numbers on that. It's a bit surprising to me that,
> > > given how many objects we already allocate during a commit, two small
> > > additional allocations affect performances so dramatically, even on a
> > > slow platform.
> >
> > To be fair, I didn't benchmark that.  Perhaps it's just too slow due to
> > all these other allocations (and whatever else happens).
> >
> > I just find it extremely silly to allocate a buffer over and over again,
> > while we know that buffer is needed for each and every display update.
>
> Maybe it's silly, but I guess it depends on what you want to optimize
> for. You won't know the size of that buffer before you're in
> atomic_check. So it's a different trade-off than you would like, but I
> wouldn't call it extremely silly.

The size of ssd130x_plane_state.data_array[] is fixed, and depends
on the actual display connected.
The size of ssd130x_plane_state.buffer[]  is also fixed, and depends
on the plane's size (which is currently fixed to the display size).

Gr{oetje,eeting}s,

                        Geert
Javier Martinez Canillas Sept. 1, 2023, 12:21 p.m. UTC | #14
Geert Uytterhoeven <geert@linux-m68k.org> writes:

Hello Geert,

> Hi Maxime,
>
> On Fri, Sep 1, 2023 at 2:00 PM Maxime Ripard <mripard@kernel.org> wrote:
>> On Fri, Sep 01, 2023 at 10:36:17AM +0200, Geert Uytterhoeven wrote:
>> > On Fri, Sep 1, 2023 at 10:22 AM Maxime Ripard <mripard@kernel.org> wrote:
>> > > On Wed, Aug 30, 2023 at 08:25:08AM +0200, Javier Martinez Canillas wrote:
>> > > > The commit 45b58669e532 ("drm/ssd130x: Allocate buffer in the plane's
>> > > > .atomic_check() callback") moved the allocation of the intermediate and
>> > > > HW buffers from the encoder's .atomic_enable callback to primary plane's
>> > > > .atomic_check callback.
>> > > >
>> > > > This was suggested by Maxime Ripard because drivers aren't allowed to fail
>> > > > after drm_atomic_helper_swap_state() has been called, and the encoder's
>> > > > .atomic_enable happens after the new atomic state has been swapped.
>> > > >
>> > > > But that change caused a performance regression in very slow platforms,
>> > > > since now the allocation happens for every plane's atomic state commit.
>> > > > For example, Geert Uytterhoeven reports that is the case on a VexRiscV
>> > > > softcore (RISC-V CPU implementation on an FPGA).
>> > >
>> > > I'd like to have numbers on that. It's a bit surprising to me that,
>> > > given how many objects we already allocate during a commit, two small
>> > > additional allocations affect performances so dramatically, even on a
>> > > slow platform.
>> >
>> > To be fair, I didn't benchmark that.  Perhaps it's just too slow due to
>> > all these other allocations (and whatever else happens).
>> >
>> > I just find it extremely silly to allocate a buffer over and over again,
>> > while we know that buffer is needed for each and every display update.
>>
>> Maybe it's silly, but I guess it depends on what you want to optimize
>> for. You won't know the size of that buffer before you're in
>> atomic_check. So it's a different trade-off than you would like, but I
>> wouldn't call it extremely silly.
>
> The size of ssd130x_plane_state.data_array[] is fixed, and depends
> on the actual display connected.

Agreed.

> The size of ssd130x_plane_state.buffer[]  is also fixed, and depends
> on the plane's size (which is currently fixed to the display size).
>

Well, one could say that also depends on the format chosen. That is, if
XRGB8888 is used then its size should be the fixed display size but if R1
is used its size could be 0 (since the shadow plane will already store the
pixels in R1 format).

> Gr{oetje,eeting}s,
>
Maxime Ripard Sept. 4, 2023, 8:04 a.m. UTC | #15
On Fri, Sep 01, 2023 at 02:08:11PM +0200, Geert Uytterhoeven wrote:
> Hi Maxime,
> 
> On Fri, Sep 1, 2023 at 2:00 PM Maxime Ripard <mripard@kernel.org> wrote:
> > On Fri, Sep 01, 2023 at 10:36:17AM +0200, Geert Uytterhoeven wrote:
> > > On Fri, Sep 1, 2023 at 10:22 AM Maxime Ripard <mripard@kernel.org> wrote:
> > > > On Wed, Aug 30, 2023 at 08:25:08AM +0200, Javier Martinez Canillas wrote:
> > > > > The commit 45b58669e532 ("drm/ssd130x: Allocate buffer in the plane's
> > > > > .atomic_check() callback") moved the allocation of the intermediate and
> > > > > HW buffers from the encoder's .atomic_enable callback to primary plane's
> > > > > .atomic_check callback.
> > > > >
> > > > > This was suggested by Maxime Ripard because drivers aren't allowed to fail
> > > > > after drm_atomic_helper_swap_state() has been called, and the encoder's
> > > > > .atomic_enable happens after the new atomic state has been swapped.
> > > > >
> > > > > But that change caused a performance regression in very slow platforms,
> > > > > since now the allocation happens for every plane's atomic state commit.
> > > > > For example, Geert Uytterhoeven reports that is the case on a VexRiscV
> > > > > softcore (RISC-V CPU implementation on an FPGA).
> > > >
> > > > I'd like to have numbers on that. It's a bit surprising to me that,
> > > > given how many objects we already allocate during a commit, two small
> > > > additional allocations affect performances so dramatically, even on a
> > > > slow platform.
> > >
> > > To be fair, I didn't benchmark that.  Perhaps it's just too slow due to
> > > all these other allocations (and whatever else happens).
> > >
> > > I just find it extremely silly to allocate a buffer over and over again,
> > > while we know that buffer is needed for each and every display update.
> >
> > Maybe it's silly, but I guess it depends on what you want to optimize
> > for. You won't know the size of that buffer before you're in
> > atomic_check. So it's a different trade-off than you would like, but I
> > wouldn't call it extremely silly.
> 
> The size of ssd130x_plane_state.data_array[] is fixed, and depends
> on the actual display connected.

That one can be tied to the CRTC state if needed. It would only be
allocated on each modeset, so probably once for that kind of device.

> The size of ssd130x_plane_state.buffer[]  is also fixed, and depends
> on the plane's size (which is currently fixed to the display size).

Doesn't it depend on the format as well?

Maxime
Javier Martinez Canillas Sept. 6, 2023, 12:04 p.m. UTC | #16
Maxime Ripard <mripard@kernel.org> writes:

> On Fri, Sep 01, 2023 at 02:08:11PM +0200, Geert Uytterhoeven wrote:
>> Hi Maxime,
>> 
>> On Fri, Sep 1, 2023 at 2:00 PM Maxime Ripard <mripard@kernel.org> wrote:
>> > On Fri, Sep 01, 2023 at 10:36:17AM +0200, Geert Uytterhoeven wrote:
>> > > On Fri, Sep 1, 2023 at 10:22 AM Maxime Ripard <mripard@kernel.org> wrote:
>> > > > On Wed, Aug 30, 2023 at 08:25:08AM +0200, Javier Martinez Canillas wrote:
>> > > > > The commit 45b58669e532 ("drm/ssd130x: Allocate buffer in the plane's
>> > > > > .atomic_check() callback") moved the allocation of the intermediate and
>> > > > > HW buffers from the encoder's .atomic_enable callback to primary plane's
>> > > > > .atomic_check callback.
>> > > > >
>> > > > > This was suggested by Maxime Ripard because drivers aren't allowed to fail
>> > > > > after drm_atomic_helper_swap_state() has been called, and the encoder's
>> > > > > .atomic_enable happens after the new atomic state has been swapped.
>> > > > >
>> > > > > But that change caused a performance regression in very slow platforms,
>> > > > > since now the allocation happens for every plane's atomic state commit.
>> > > > > For example, Geert Uytterhoeven reports that is the case on a VexRiscV
>> > > > > softcore (RISC-V CPU implementation on an FPGA).
>> > > >
>> > > > I'd like to have numbers on that. It's a bit surprising to me that,
>> > > > given how many objects we already allocate during a commit, two small
>> > > > additional allocations affect performances so dramatically, even on a
>> > > > slow platform.
>> > >
>> > > To be fair, I didn't benchmark that.  Perhaps it's just too slow due to
>> > > all these other allocations (and whatever else happens).
>> > >
>> > > I just find it extremely silly to allocate a buffer over and over again,
>> > > while we know that buffer is needed for each and every display update.
>> >
>> > Maybe it's silly, but I guess it depends on what you want to optimize
>> > for. You won't know the size of that buffer before you're in
>> > atomic_check. So it's a different trade-off than you would like, but I
>> > wouldn't call it extremely silly.
>> 
>> The size of ssd130x_plane_state.data_array[] is fixed, and depends
>> on the actual display connected.
>
> That one can be tied to the CRTC state if needed. It would only be
> allocated on each modeset, so probably once for that kind of device.
>

Yes.

>> The size of ssd130x_plane_state.buffer[]  is also fixed, and depends
>> on the plane's size (which is currently fixed to the display size).
>
> Doesn't it depend on the format as well?
>

Yes and no. The buffer[] size is fixed, but whether that intermediate
buffer is needed or not will depend if the native format was chosen.

So one could say that is either 0 (not used) or the fixed size needed
to do the format conversion from XRGB8888 to R1.

> Maxime
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/solomon/ssd130x.c b/drivers/gpu/drm/solomon/ssd130x.c
index 0d2b36ba4011..60536cd0c42d 100644
--- a/drivers/gpu/drm/solomon/ssd130x.c
+++ b/drivers/gpu/drm/solomon/ssd130x.c
@@ -650,46 +650,6 @@  static int ssd130x_fb_blit_rect(struct drm_plane_state *state,
 	return ret;
 }
 
-static int ssd130x_primary_plane_helper_atomic_check(struct drm_plane *plane,
-						     struct drm_atomic_state *state)
-{
-	struct drm_device *drm = plane->dev;
-	struct ssd130x_device *ssd130x = drm_to_ssd130x(drm);
-	struct drm_plane_state *plane_state = drm_atomic_get_new_plane_state(state, plane);
-	struct ssd130x_plane_state *ssd130x_state = to_ssd130x_plane_state(plane_state);
-	unsigned int page_height = ssd130x->device_info->page_height;
-	unsigned int pages = DIV_ROUND_UP(ssd130x->height, page_height);
-	const struct drm_format_info *fi;
-	unsigned int pitch;
-	int ret;
-
-	ret = drm_plane_helper_atomic_check(plane, state);
-	if (ret)
-		return ret;
-
-	ssd130x_state->data_array = kcalloc(ssd130x->width, pages, GFP_KERNEL);
-	if (!ssd130x_state->data_array)
-		return -ENOMEM;
-
-	if (plane_state->fb->format->format != DRM_FORMAT_R1) {
-		fi = drm_format_info(DRM_FORMAT_R1);
-		if (!fi)
-			return -EINVAL;
-
-		pitch = drm_format_info_min_pitch(fi, 0, ssd130x->width);
-
-		ssd130x_state->buffer = kcalloc(pitch, ssd130x->height, GFP_KERNEL);
-		if (!ssd130x_state->buffer) {
-			kfree(ssd130x_state->data_array);
-			/* Set to prevent a double free in .atomic_destroy_state() */
-			ssd130x_state->data_array = NULL;
-			return -ENOMEM;
-		}
-	}
-
-	return 0;
-}
-
 static void ssd130x_primary_plane_helper_atomic_update(struct drm_plane *plane,
 						       struct drm_atomic_state *state)
 {
@@ -762,10 +722,6 @@  static struct drm_plane_state *ssd130x_primary_plane_duplicate_state(struct drm_
 	if (!ssd130x_state)
 		return NULL;
 
-	/* The buffers are not duplicated and are allocated in .atomic_check */
-	ssd130x_state->buffer = NULL;
-	ssd130x_state->data_array = NULL;
-
 	new_shadow_plane_state = &ssd130x_state->base;
 
 	__drm_gem_duplicate_shadow_plane_state(plane, new_shadow_plane_state);
@@ -778,9 +734,6 @@  static void ssd130x_primary_plane_destroy_state(struct drm_plane *plane,
 {
 	struct ssd130x_plane_state *ssd130x_state = to_ssd130x_plane_state(state);
 
-	kfree(ssd130x_state->data_array);
-	kfree(ssd130x_state->buffer);
-
 	__drm_gem_destroy_shadow_plane_state(&ssd130x_state->base);
 
 	kfree(ssd130x_state);
@@ -788,7 +741,7 @@  static void ssd130x_primary_plane_destroy_state(struct drm_plane *plane,
 
 static const struct drm_plane_helper_funcs ssd130x_primary_plane_helper_funcs = {
 	DRM_GEM_SHADOW_PLANE_HELPER_FUNCS,
-	.atomic_check = ssd130x_primary_plane_helper_atomic_check,
+	.atomic_check = drm_plane_helper_atomic_check,
 	.atomic_update = ssd130x_primary_plane_helper_atomic_update,
 	.atomic_disable = ssd130x_primary_plane_helper_atomic_disable,
 };
@@ -818,6 +771,59 @@  static enum drm_mode_status ssd130x_crtc_helper_mode_valid(struct drm_crtc *crtc
 	return MODE_OK;
 }
 
+int ssd130x_crtc_helper_atomic_check(struct drm_crtc *crtc, struct drm_atomic_state *state)
+{
+	struct drm_device *drm = crtc->dev;
+	struct ssd130x_device *ssd130x = drm_to_ssd130x(drm);
+	struct drm_plane *plane = crtc->primary;
+	struct drm_plane_state *plane_state = drm_atomic_get_new_plane_state(state, plane);
+	struct ssd130x_plane_state *ssd130x_state = to_ssd130x_plane_state(plane_state);
+	unsigned int page_height = ssd130x->device_info->page_height;
+	unsigned int pages = DIV_ROUND_UP(ssd130x->height, page_height);
+	const struct drm_format_info *fi;
+	unsigned int pitch;
+	int ret;
+
+	ret = drm_crtc_helper_atomic_check(crtc, state);
+	if (ret)
+		return ret;
+
+	ssd130x_state->data_array = kcalloc(ssd130x->width, pages, GFP_KERNEL);
+	if (!ssd130x_state->data_array)
+		return -ENOMEM;
+
+	if (plane_state->fb->format->format != DRM_FORMAT_R1) {
+		fi = drm_format_info(DRM_FORMAT_R1);
+		if (!fi)
+			return -EINVAL;
+
+		pitch = drm_format_info_min_pitch(fi, 0, ssd130x->width);
+
+		ssd130x_state->buffer = kcalloc(pitch, ssd130x->height, GFP_KERNEL);
+		if (!ssd130x_state->buffer) {
+			kfree(ssd130x_state->data_array);
+			/* Set to prevent a double free in .atomic_destroy_state() */
+			ssd130x_state->data_array = NULL;
+			return -ENOMEM;
+		}
+	}
+
+	return 0;
+}
+
+static void ssd130x_crtc_destroy_state(struct drm_crtc *crtc,
+				       struct drm_crtc_state *state)
+{
+	struct drm_plane *plane = crtc->primary;
+	struct drm_plane_state *plane_state = drm_atomic_get_new_plane_state(state->state, plane);
+	struct ssd130x_plane_state *ssd130x_state = to_ssd130x_plane_state(plane_state);
+
+	drm_atomic_helper_crtc_destroy_state(crtc, state);
+
+	kfree(ssd130x_state->data_array);
+	kfree(ssd130x_state->buffer);
+}
+
 /*
  * The CRTC is always enabled. Screen updates are performed by
  * the primary plane's atomic_update function. Disabling clears
@@ -825,7 +831,7 @@  static enum drm_mode_status ssd130x_crtc_helper_mode_valid(struct drm_crtc *crtc
  */
 static const struct drm_crtc_helper_funcs ssd130x_crtc_helper_funcs = {
 	.mode_valid = ssd130x_crtc_helper_mode_valid,
-	.atomic_check = drm_crtc_helper_atomic_check,
+	.atomic_check = ssd130x_crtc_helper_atomic_check,
 };
 
 static const struct drm_crtc_funcs ssd130x_crtc_funcs = {
@@ -834,7 +840,7 @@  static const struct drm_crtc_funcs ssd130x_crtc_funcs = {
 	.set_config = drm_atomic_helper_set_config,
 	.page_flip = drm_atomic_helper_page_flip,
 	.atomic_duplicate_state = drm_atomic_helper_crtc_duplicate_state,
-	.atomic_destroy_state = drm_atomic_helper_crtc_destroy_state,
+	.atomic_destroy_state = ssd130x_crtc_destroy_state,
 };
 
 static void ssd130x_encoder_helper_atomic_enable(struct drm_encoder *encoder,