diff mbox series

[v2,4/5] drm: Add and export function drm_gem_cma_sync_data

Message ID 20210307202835.253907-5-paul@crapouillou.net (mailing list archive)
State New, archived
Headers show
Series Add option to mmap GEM buffers cached | expand

Commit Message

Paul Cercueil March 7, 2021, 8:28 p.m. UTC
This function can be used by drivers that use damage clips and have
CMA GEM objects backed by non-coherent memory. Calling this function
in a plane's .atomic_update ensures that all the data in the backing
memory have been written to RAM.

Signed-off-by: Paul Cercueil <paul@crapouillou.net>
---
 drivers/gpu/drm/drm_gem_cma_helper.c | 43 ++++++++++++++++++++++++++++
 include/drm/drm_gem_cma_helper.h     |  5 ++++
 2 files changed, 48 insertions(+)

Comments

Paul Cercueil March 11, 2021, 12:33 p.m. UTC | #1
Le jeu. 11 mars 2021 à 12:28, Christoph Hellwig <hch@infradead.org> a 
écrit :
> On Sun, Mar 07, 2021 at 08:28:34PM +0000, Paul Cercueil wrote:
>>  +	drm_atomic_for_each_plane_damage(&iter, &clip) {
>>  +		for (i = 0; i < finfo->num_planes; i++) {
>>  +			daddr = drm_fb_cma_get_gem_addr(state->fb, state, i);
>>  +
>>  +			/* Ignore x1/x2 values, invalidate complete lines */
>>  +			offset = clip.y1 * state->fb->pitches[i];
>>  +
>>  +			dma_sync_single_for_device(dev, daddr + offset,
>>  +				       (clip.y2 - clip.y1) * state->fb->pitches[i],
>>  +				       DMA_TO_DEVICE);
> 
> Are these helpers only ever used to transfer data to the device and
> never from it?  If so please clearly document that.

Yes. In the DRM world, are there cases where we transfer data from the 
device? I assume these cases are handled by v4l2 instead.

-Paul
Thomas Zimmermann March 15, 2021, 7:43 a.m. UTC | #2
Hi

Am 11.03.21 um 13:33 schrieb Paul Cercueil:
> 
> 
> Le jeu. 11 mars 2021 à 12:28, Christoph Hellwig <hch@infradead.org> a 
> écrit :
>> On Sun, Mar 07, 2021 at 08:28:34PM +0000, Paul Cercueil wrote:
>>>  +    drm_atomic_for_each_plane_damage(&iter, &clip) {
>>>  +        for (i = 0; i < finfo->num_planes; i++) {
>>>  +            daddr = drm_fb_cma_get_gem_addr(state->fb, state, i);
>>>  +
>>>  +            /* Ignore x1/x2 values, invalidate complete lines */
>>>  +            offset = clip.y1 * state->fb->pitches[i];
>>>  +
>>>  +            dma_sync_single_for_device(dev, daddr + offset,
>>>  +                       (clip.y2 - clip.y1) * state->fb->pitches[i],
>>>  +                       DMA_TO_DEVICE);
>>
>> Are these helpers only ever used to transfer data to the device and
>> never from it?  If so please clearly document that.
> 
> Yes. In the DRM world, are there cases where we transfer data from the 
> device? I assume these cases are handled by v4l2 instead.

Software rendering (i.e., anything wrt dumb buffers) likely reads back 
framebuffer content during blit operations. For devices where this is a 
slow operation (e.g., PCI read) we set struct 
drm_mode_config.prefer_shadow to hint renderers to use shadow buffering.

Best regards
Thomas

> 
> -Paul
> 
>
Paul Cercueil March 15, 2021, 11:59 a.m. UTC | #3
Hi Thomas,

Le lun. 15 mars 2021 à 8:43, Thomas Zimmermann <tzimmermann@suse.de> a 
écrit :
> Hi
> 
> Am 11.03.21 um 13:33 schrieb Paul Cercueil:
>> 
>> 
>> Le jeu. 11 mars 2021 à 12:28, Christoph Hellwig <hch@infradead.org> 
>> a écrit :
>>> On Sun, Mar 07, 2021 at 08:28:34PM +0000, Paul Cercueil wrote:
>>>>  +    drm_atomic_for_each_plane_damage(&iter, &clip) {
>>>>  +        for (i = 0; i < finfo->num_planes; i++) {
>>>>  +            daddr = drm_fb_cma_get_gem_addr(state->fb, state, i);
>>>>  +
>>>>  +            /* Ignore x1/x2 values, invalidate complete lines */
>>>>  +            offset = clip.y1 * state->fb->pitches[i];
>>>>  +
>>>>  +            dma_sync_single_for_device(dev, daddr + offset,
>>>>  +                       (clip.y2 - clip.y1) * 
>>>> state->fb->pitches[i],
>>>>  +                       DMA_TO_DEVICE);
>>> 
>>> Are these helpers only ever used to transfer data to the device and
>>> never from it?  If so please clearly document that.
>> 
>> Yes. In the DRM world, are there cases where we transfer data from 
>> the device? I assume these cases are handled by v4l2 instead.
> 
> Software rendering (i.e., anything wrt dumb buffers) likely reads 
> back framebuffer content during blit operations. For devices where 
> this is a slow operation (e.g., PCI read) we set struct 
> drm_mode_config.prefer_shadow to hint renderers to use shadow 
> buffering.

This has been brought up a few times already. I answered that in the 
cover letter. In my case, *writes* (e.g. dumb memcpy) are also slower 
with a write-combine buffer than with a non-coherent buffer + cache 
sync. So a shadow buffer does nothing for me.

Cheers,
-Paul
diff mbox series

Patch

diff --git a/drivers/gpu/drm/drm_gem_cma_helper.c b/drivers/gpu/drm/drm_gem_cma_helper.c
index e39b0464e19d..fdae54a18670 100644
--- a/drivers/gpu/drm/drm_gem_cma_helper.c
+++ b/drivers/gpu/drm/drm_gem_cma_helper.c
@@ -18,9 +18,14 @@ 
 #include <linux/slab.h>
 
 #include <drm/drm.h>
+#include <drm/drm_damage_helper.h>
 #include <drm/drm_device.h>
 #include <drm/drm_drv.h>
+#include <drm/drm_fourcc.h>
+#include <drm/drm_fb_cma_helper.h>
+#include <drm/drm_framebuffer.h>
 #include <drm/drm_gem_cma_helper.h>
+#include <drm/drm_plane.h>
 #include <drm/drm_vma_manager.h>
 
 /**
@@ -684,3 +689,41 @@  drm_gem_cma_prime_import_sg_table_vmap(struct drm_device *dev,
 	return obj;
 }
 EXPORT_SYMBOL(drm_gem_cma_prime_import_sg_table_vmap);
+
+/**
+ * drm_gem_cma_sync_data - Sync GEM object to non-coherent backing memory
+ * @dev: DRM device
+ * @old_state: Old plane state
+ * @state: New plane state
+ *
+ * This function can be used by drivers that use damage clips and have
+ * CMA GEM objects backed by non-coherent memory. Calling this function
+ * in a plane's .atomic_update ensures that all the data in the backing
+ * memory have been written to RAM.
+ */
+void drm_gem_cma_sync_data(struct device *dev,
+			   struct drm_plane_state *old_state,
+			   struct drm_plane_state *state)
+{
+	const struct drm_format_info *finfo = state->fb->format;
+	struct drm_atomic_helper_damage_iter iter;
+	unsigned int offset, i;
+	struct drm_rect clip;
+	dma_addr_t daddr;
+
+	drm_atomic_helper_damage_iter_init(&iter, old_state, state);
+
+	drm_atomic_for_each_plane_damage(&iter, &clip) {
+		for (i = 0; i < finfo->num_planes; i++) {
+			daddr = drm_fb_cma_get_gem_addr(state->fb, state, i);
+
+			/* Ignore x1/x2 values, invalidate complete lines */
+			offset = clip.y1 * state->fb->pitches[i];
+
+			dma_sync_single_for_device(dev, daddr + offset,
+				       (clip.y2 - clip.y1) * state->fb->pitches[i],
+				       DMA_TO_DEVICE);
+		}
+	}
+}
+EXPORT_SYMBOL_GPL(drm_gem_cma_sync_data);
diff --git a/include/drm/drm_gem_cma_helper.h b/include/drm/drm_gem_cma_helper.h
index 6a3f7e1312cc..cdd3fb456916 100644
--- a/include/drm/drm_gem_cma_helper.h
+++ b/include/drm/drm_gem_cma_helper.h
@@ -7,6 +7,7 @@ 
 #include <drm/drm_gem.h>
 
 struct drm_mode_create_dumb;
+struct drm_plane_state;
 
 /**
  * struct drm_gem_cma_object - GEM object backed by CMA memory allocations
@@ -190,4 +191,8 @@  drm_gem_cma_prime_import_sg_table_vmap(struct drm_device *drm,
 				       struct dma_buf_attachment *attach,
 				       struct sg_table *sgt);
 
+void drm_gem_cma_sync_data(struct device *dev,
+			   struct drm_plane_state *old_state,
+			   struct drm_plane_state *state);
+
 #endif /* __DRM_GEM_CMA_HELPER_H__ */