Message ID | 20200226043041.289764-3-paul@crapouillou.net (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v2,1/3] gpu/drm: ingenic: Add trick to support 16bpp on 24-bit panels | expand |
On Wed, Feb 26, 2020 at 01:30:41AM -0300, Paul Cercueil wrote: > Ingenic SoCs are most notably used in cheap chinese handheld gaming > consoles. There, the games and applications generally render in software > directly in the emulated framebuffer using SDL1. > > Since the emulated framebuffer is mapped as write-combine by default, > these applications start to run really slow as soon as alpha-blending is > used. > > Add a 'cached_gem_buffers' option to the ingenic-drm driver to mmap the > GEM buffers as fully cached to address this issue. > > v2: Use standard noncoherent DMA APIs > > Signed-off-by: Paul Cercueil <paul@crapouillou.net> Dumb buffers don't have any cache flushing controls, so without a ->dirty callback I'm not sure how this exactly works. I think you need a pile more here. Also there's a prefere_shadow bit that you're supposed to set in this case, and which userspace is supposed to obey. Also kinda surprised that fbdev userspace is this bad, since generally all of fbdev is wc. Traditionally at least. -Daniel > --- > drivers/gpu/drm/ingenic/ingenic-drm.c | 35 +++++++++++++++++++++++++-- > 1 file changed, 33 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/ingenic/ingenic-drm.c b/drivers/gpu/drm/ingenic/ingenic-drm.c > index 3f8cc98d41fe..e51ac8d62d27 100644 > --- a/drivers/gpu/drm/ingenic/ingenic-drm.c > +++ b/drivers/gpu/drm/ingenic/ingenic-drm.c > @@ -6,6 +6,8 @@ > > #include <linux/clk.h> > #include <linux/dma-mapping.h> > +#include <linux/dma-noncoherent.h> > +#include <linux/io.h> > #include <linux/module.h> > #include <linux/of_device.h> > #include <linux/platform_device.h> > @@ -30,6 +32,11 @@ > #include <drm/drm_probe_helper.h> > #include <drm/drm_vblank.h> > > +static bool ingenic_drm_cached_gem_buf; > +module_param_named(cached_gem_buffers, ingenic_drm_cached_gem_buf, bool, 0400); > +MODULE_PARM_DESC(cached_gem_buffers, > + "Enable fully cached GEM buffers [default=false]"); > + > #define JZ_REG_LCD_CFG 0x00 > #define JZ_REG_LCD_VSYNC 0x04 > #define JZ_REG_LCD_HSYNC 0x08 > @@ -379,15 +386,23 @@ static void ingenic_drm_plane_atomic_update(struct drm_plane *plane, > struct drm_plane_state *state = plane->state; > unsigned int width, height, cpp; > dma_addr_t addr; > + uint32_t len; > > if (state && state->fb) { > addr = drm_fb_cma_get_gem_addr(state->fb, state, 0); > + > width = state->src_w >> 16; > height = state->src_h >> 16; > cpp = state->fb->format->cpp[plane->index]; > + len = width * height * cpp; > + > + if (ingenic_drm_cached_gem_buf) { > + dma_cache_sync(priv->dev, phys_to_virt(addr), > + len, DMA_TO_DEVICE); > + } > > priv->dma_hwdesc->addr = addr; > - priv->dma_hwdesc->cmd = width * height * cpp / 4; > + priv->dma_hwdesc->cmd = len / 4; > priv->dma_hwdesc->cmd |= JZ_LCD_CMD_EOF_IRQ; > } > } > @@ -532,6 +547,22 @@ static void ingenic_drm_disable_vblank(struct drm_crtc *crtc) > > DEFINE_DRM_GEM_CMA_FOPS(ingenic_drm_fops); > > +static int ingenic_drm_gem_mmap(struct drm_gem_object *obj, > + struct vm_area_struct *vma) > +{ > + struct drm_gem_cma_object *cma_obj = to_drm_gem_cma_obj(obj); > + struct ingenic_drm *priv = drm_device_get_priv(obj->dev); > + unsigned long attrs = DMA_ATTR_NON_CONSISTENT; > + > + if (!ingenic_drm_cached_gem_buf) > + return drm_gem_cma_prime_mmap(obj, vma); > + > + vma->vm_page_prot = dma_pgprot(priv->dev, vma->vm_page_prot, attrs); > + > + return dma_mmap_attrs(priv->dev, vma, cma_obj->vaddr, cma_obj->paddr, > + vma->vm_end - vma->vm_start, attrs); > +} > + > static struct drm_driver ingenic_drm_driver_data = { > .driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_ATOMIC, > .name = "ingenic-drm", > @@ -553,7 +584,7 @@ static struct drm_driver ingenic_drm_driver_data = { > .gem_prime_import_sg_table = drm_gem_cma_prime_import_sg_table, > .gem_prime_vmap = drm_gem_cma_prime_vmap, > .gem_prime_vunmap = drm_gem_cma_prime_vunmap, > - .gem_prime_mmap = drm_gem_cma_prime_mmap, > + .gem_prime_mmap = ingenic_drm_gem_mmap, > > .irq_handler = ingenic_drm_irq_handler, > .release = ingenic_drm_release, > -- > 2.25.0 >
diff --git a/drivers/gpu/drm/ingenic/ingenic-drm.c b/drivers/gpu/drm/ingenic/ingenic-drm.c index 3f8cc98d41fe..e51ac8d62d27 100644 --- a/drivers/gpu/drm/ingenic/ingenic-drm.c +++ b/drivers/gpu/drm/ingenic/ingenic-drm.c @@ -6,6 +6,8 @@ #include <linux/clk.h> #include <linux/dma-mapping.h> +#include <linux/dma-noncoherent.h> +#include <linux/io.h> #include <linux/module.h> #include <linux/of_device.h> #include <linux/platform_device.h> @@ -30,6 +32,11 @@ #include <drm/drm_probe_helper.h> #include <drm/drm_vblank.h> +static bool ingenic_drm_cached_gem_buf; +module_param_named(cached_gem_buffers, ingenic_drm_cached_gem_buf, bool, 0400); +MODULE_PARM_DESC(cached_gem_buffers, + "Enable fully cached GEM buffers [default=false]"); + #define JZ_REG_LCD_CFG 0x00 #define JZ_REG_LCD_VSYNC 0x04 #define JZ_REG_LCD_HSYNC 0x08 @@ -379,15 +386,23 @@ static void ingenic_drm_plane_atomic_update(struct drm_plane *plane, struct drm_plane_state *state = plane->state; unsigned int width, height, cpp; dma_addr_t addr; + uint32_t len; if (state && state->fb) { addr = drm_fb_cma_get_gem_addr(state->fb, state, 0); + width = state->src_w >> 16; height = state->src_h >> 16; cpp = state->fb->format->cpp[plane->index]; + len = width * height * cpp; + + if (ingenic_drm_cached_gem_buf) { + dma_cache_sync(priv->dev, phys_to_virt(addr), + len, DMA_TO_DEVICE); + } priv->dma_hwdesc->addr = addr; - priv->dma_hwdesc->cmd = width * height * cpp / 4; + priv->dma_hwdesc->cmd = len / 4; priv->dma_hwdesc->cmd |= JZ_LCD_CMD_EOF_IRQ; } } @@ -532,6 +547,22 @@ static void ingenic_drm_disable_vblank(struct drm_crtc *crtc) DEFINE_DRM_GEM_CMA_FOPS(ingenic_drm_fops); +static int ingenic_drm_gem_mmap(struct drm_gem_object *obj, + struct vm_area_struct *vma) +{ + struct drm_gem_cma_object *cma_obj = to_drm_gem_cma_obj(obj); + struct ingenic_drm *priv = drm_device_get_priv(obj->dev); + unsigned long attrs = DMA_ATTR_NON_CONSISTENT; + + if (!ingenic_drm_cached_gem_buf) + return drm_gem_cma_prime_mmap(obj, vma); + + vma->vm_page_prot = dma_pgprot(priv->dev, vma->vm_page_prot, attrs); + + return dma_mmap_attrs(priv->dev, vma, cma_obj->vaddr, cma_obj->paddr, + vma->vm_end - vma->vm_start, attrs); +} + static struct drm_driver ingenic_drm_driver_data = { .driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_ATOMIC, .name = "ingenic-drm", @@ -553,7 +584,7 @@ static struct drm_driver ingenic_drm_driver_data = { .gem_prime_import_sg_table = drm_gem_cma_prime_import_sg_table, .gem_prime_vmap = drm_gem_cma_prime_vmap, .gem_prime_vunmap = drm_gem_cma_prime_vunmap, - .gem_prime_mmap = drm_gem_cma_prime_mmap, + .gem_prime_mmap = ingenic_drm_gem_mmap, .irq_handler = ingenic_drm_irq_handler, .release = ingenic_drm_release,
Ingenic SoCs are most notably used in cheap chinese handheld gaming consoles. There, the games and applications generally render in software directly in the emulated framebuffer using SDL1. Since the emulated framebuffer is mapped as write-combine by default, these applications start to run really slow as soon as alpha-blending is used. Add a 'cached_gem_buffers' option to the ingenic-drm driver to mmap the GEM buffers as fully cached to address this issue. v2: Use standard noncoherent DMA APIs Signed-off-by: Paul Cercueil <paul@crapouillou.net> --- drivers/gpu/drm/ingenic/ingenic-drm.c | 35 +++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-)