Message ID | 20181127103252.20994-1-andr2000@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | drm/xen-front: Make shmem backed display buffer coherent | expand |
Daniel, could you please comment? Thank you On 11/27/18 12:32 PM, Oleksandr Andrushchenko wrote: > From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> > > When GEM backing storage is allocated with drm_gem_get_pages > the backing pages may be cached, thus making it possible that > the backend sees only partial content of the buffer which may > lead to screen artifacts. Make sure that the frontend's > memory is coherent and the backend always sees correct display > buffer content. > > Fixes: c575b7eeb89f ("drm/xen-front: Add support for Xen PV display frontend") > > Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> > --- > drivers/gpu/drm/xen/xen_drm_front_gem.c | 62 +++++++++++++++++++------ > 1 file changed, 48 insertions(+), 14 deletions(-) > > diff --git a/drivers/gpu/drm/xen/xen_drm_front_gem.c b/drivers/gpu/drm/xen/xen_drm_front_gem.c > index 47ff019d3aef..c592735e49d2 100644 > --- a/drivers/gpu/drm/xen/xen_drm_front_gem.c > +++ b/drivers/gpu/drm/xen/xen_drm_front_gem.c > @@ -33,8 +33,11 @@ struct xen_gem_object { > /* set for buffers allocated by the backend */ > bool be_alloc; > > - /* this is for imported PRIME buffer */ > - struct sg_table *sgt_imported; > + /* > + * this is for imported PRIME buffer or the one allocated via > + * drm_gem_get_pages. > + */ > + struct sg_table *sgt; > }; > > static inline struct xen_gem_object * > @@ -77,10 +80,21 @@ static struct xen_gem_object *gem_create_obj(struct drm_device *dev, > return xen_obj; > } > > +struct sg_table *xen_drm_front_gem_get_sg_table(struct drm_gem_object *gem_obj) > +{ > + struct xen_gem_object *xen_obj = to_xen_gem_obj(gem_obj); > + > + if (!xen_obj->pages) > + return ERR_PTR(-ENOMEM); > + > + return drm_prime_pages_to_sg(xen_obj->pages, xen_obj->num_pages); > +} > + > static struct xen_gem_object *gem_create(struct drm_device *dev, size_t size) > { > struct xen_drm_front_drm_info *drm_info = dev->dev_private; > struct xen_gem_object *xen_obj; > + struct address_space *mapping; > int ret; > > size = round_up(size, PAGE_SIZE); > @@ -113,10 +127,14 @@ static struct xen_gem_object *gem_create(struct drm_device *dev, size_t size) > xen_obj->be_alloc = true; > return xen_obj; > } > + > /* > * need to allocate backing pages now, so we can share those > * with the backend > */ > + mapping = xen_obj->base.filp->f_mapping; > + mapping_set_gfp_mask(mapping, GFP_USER | __GFP_DMA32); > + > xen_obj->num_pages = DIV_ROUND_UP(size, PAGE_SIZE); > xen_obj->pages = drm_gem_get_pages(&xen_obj->base); > if (IS_ERR_OR_NULL(xen_obj->pages)) { > @@ -125,8 +143,27 @@ static struct xen_gem_object *gem_create(struct drm_device *dev, size_t size) > goto fail; > } > > + xen_obj->sgt = xen_drm_front_gem_get_sg_table(&xen_obj->base); > + if (IS_ERR_OR_NULL(xen_obj->sgt)){ > + ret = PTR_ERR(xen_obj->sgt); > + xen_obj->sgt = NULL; > + goto fail_put_pages; > + } > + > + if (!dma_map_sg(dev->dev, xen_obj->sgt->sgl, xen_obj->sgt->nents, > + DMA_BIDIRECTIONAL)) { > + ret = -EFAULT; > + goto fail_free_sgt; > + } > + > return xen_obj; > > +fail_free_sgt: > + sg_free_table(xen_obj->sgt); > + xen_obj->sgt = NULL; > +fail_put_pages: > + drm_gem_put_pages(&xen_obj->base, xen_obj->pages, true, false); > + xen_obj->pages = NULL; > fail: > DRM_ERROR("Failed to allocate buffer with size %zu\n", size); > return ERR_PTR(ret); > @@ -149,7 +186,7 @@ void xen_drm_front_gem_free_object_unlocked(struct drm_gem_object *gem_obj) > struct xen_gem_object *xen_obj = to_xen_gem_obj(gem_obj); > > if (xen_obj->base.import_attach) { > - drm_prime_gem_destroy(&xen_obj->base, xen_obj->sgt_imported); > + drm_prime_gem_destroy(&xen_obj->base, xen_obj->sgt); > gem_free_pages_array(xen_obj); > } else { > if (xen_obj->pages) { > @@ -158,6 +195,13 @@ void xen_drm_front_gem_free_object_unlocked(struct drm_gem_object *gem_obj) > xen_obj->pages); > gem_free_pages_array(xen_obj); > } else { > + if (xen_obj->sgt) { > + dma_unmap_sg(xen_obj->base.dev->dev, > + xen_obj->sgt->sgl, > + xen_obj->sgt->nents, > + DMA_BIDIRECTIONAL); > + sg_free_table(xen_obj->sgt); > + } > drm_gem_put_pages(&xen_obj->base, > xen_obj->pages, true, false); > } > @@ -174,16 +218,6 @@ struct page **xen_drm_front_gem_get_pages(struct drm_gem_object *gem_obj) > return xen_obj->pages; > } > > -struct sg_table *xen_drm_front_gem_get_sg_table(struct drm_gem_object *gem_obj) > -{ > - struct xen_gem_object *xen_obj = to_xen_gem_obj(gem_obj); > - > - if (!xen_obj->pages) > - return ERR_PTR(-ENOMEM); > - > - return drm_prime_pages_to_sg(xen_obj->pages, xen_obj->num_pages); > -} > - > struct drm_gem_object * > xen_drm_front_gem_import_sg_table(struct drm_device *dev, > struct dma_buf_attachment *attach, > @@ -203,7 +237,7 @@ xen_drm_front_gem_import_sg_table(struct drm_device *dev, > if (ret < 0) > return ERR_PTR(ret); > > - xen_obj->sgt_imported = sgt; > + xen_obj->sgt = sgt; > > ret = drm_prime_sg_to_page_addr_arrays(sgt, xen_obj->pages, > NULL, xen_obj->num_pages);
On Thu, Dec 13, 2018 at 12:17:54PM +0200, Oleksandr Andrushchenko wrote: > Daniel, could you please comment? Cross-revieweing someone else's stuff would scale better, I don't think I'll get around to anything before next year. -Daniel > > Thank you > > On 11/27/18 12:32 PM, Oleksandr Andrushchenko wrote: > > From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> > > > > When GEM backing storage is allocated with drm_gem_get_pages > > the backing pages may be cached, thus making it possible that > > the backend sees only partial content of the buffer which may > > lead to screen artifacts. Make sure that the frontend's > > memory is coherent and the backend always sees correct display > > buffer content. > > > > Fixes: c575b7eeb89f ("drm/xen-front: Add support for Xen PV display frontend") > > > > Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> > > --- > > drivers/gpu/drm/xen/xen_drm_front_gem.c | 62 +++++++++++++++++++------ > > 1 file changed, 48 insertions(+), 14 deletions(-) > > > > diff --git a/drivers/gpu/drm/xen/xen_drm_front_gem.c b/drivers/gpu/drm/xen/xen_drm_front_gem.c > > index 47ff019d3aef..c592735e49d2 100644 > > --- a/drivers/gpu/drm/xen/xen_drm_front_gem.c > > +++ b/drivers/gpu/drm/xen/xen_drm_front_gem.c > > @@ -33,8 +33,11 @@ struct xen_gem_object { > > /* set for buffers allocated by the backend */ > > bool be_alloc; > > - /* this is for imported PRIME buffer */ > > - struct sg_table *sgt_imported; > > + /* > > + * this is for imported PRIME buffer or the one allocated via > > + * drm_gem_get_pages. > > + */ > > + struct sg_table *sgt; > > }; > > static inline struct xen_gem_object * > > @@ -77,10 +80,21 @@ static struct xen_gem_object *gem_create_obj(struct drm_device *dev, > > return xen_obj; > > } > > +struct sg_table *xen_drm_front_gem_get_sg_table(struct drm_gem_object *gem_obj) > > +{ > > + struct xen_gem_object *xen_obj = to_xen_gem_obj(gem_obj); > > + > > + if (!xen_obj->pages) > > + return ERR_PTR(-ENOMEM); > > + > > + return drm_prime_pages_to_sg(xen_obj->pages, xen_obj->num_pages); > > +} > > + > > static struct xen_gem_object *gem_create(struct drm_device *dev, size_t size) > > { > > struct xen_drm_front_drm_info *drm_info = dev->dev_private; > > struct xen_gem_object *xen_obj; > > + struct address_space *mapping; > > int ret; > > size = round_up(size, PAGE_SIZE); > > @@ -113,10 +127,14 @@ static struct xen_gem_object *gem_create(struct drm_device *dev, size_t size) > > xen_obj->be_alloc = true; > > return xen_obj; > > } > > + > > /* > > * need to allocate backing pages now, so we can share those > > * with the backend > > */ > > + mapping = xen_obj->base.filp->f_mapping; > > + mapping_set_gfp_mask(mapping, GFP_USER | __GFP_DMA32); > > + > > xen_obj->num_pages = DIV_ROUND_UP(size, PAGE_SIZE); > > xen_obj->pages = drm_gem_get_pages(&xen_obj->base); > > if (IS_ERR_OR_NULL(xen_obj->pages)) { > > @@ -125,8 +143,27 @@ static struct xen_gem_object *gem_create(struct drm_device *dev, size_t size) > > goto fail; > > } > > + xen_obj->sgt = xen_drm_front_gem_get_sg_table(&xen_obj->base); > > + if (IS_ERR_OR_NULL(xen_obj->sgt)){ > > + ret = PTR_ERR(xen_obj->sgt); > > + xen_obj->sgt = NULL; > > + goto fail_put_pages; > > + } > > + > > + if (!dma_map_sg(dev->dev, xen_obj->sgt->sgl, xen_obj->sgt->nents, > > + DMA_BIDIRECTIONAL)) { > > + ret = -EFAULT; > > + goto fail_free_sgt; > > + } > > + > > return xen_obj; > > +fail_free_sgt: > > + sg_free_table(xen_obj->sgt); > > + xen_obj->sgt = NULL; > > +fail_put_pages: > > + drm_gem_put_pages(&xen_obj->base, xen_obj->pages, true, false); > > + xen_obj->pages = NULL; > > fail: > > DRM_ERROR("Failed to allocate buffer with size %zu\n", size); > > return ERR_PTR(ret); > > @@ -149,7 +186,7 @@ void xen_drm_front_gem_free_object_unlocked(struct drm_gem_object *gem_obj) > > struct xen_gem_object *xen_obj = to_xen_gem_obj(gem_obj); > > if (xen_obj->base.import_attach) { > > - drm_prime_gem_destroy(&xen_obj->base, xen_obj->sgt_imported); > > + drm_prime_gem_destroy(&xen_obj->base, xen_obj->sgt); > > gem_free_pages_array(xen_obj); > > } else { > > if (xen_obj->pages) { > > @@ -158,6 +195,13 @@ void xen_drm_front_gem_free_object_unlocked(struct drm_gem_object *gem_obj) > > xen_obj->pages); > > gem_free_pages_array(xen_obj); > > } else { > > + if (xen_obj->sgt) { > > + dma_unmap_sg(xen_obj->base.dev->dev, > > + xen_obj->sgt->sgl, > > + xen_obj->sgt->nents, > > + DMA_BIDIRECTIONAL); > > + sg_free_table(xen_obj->sgt); > > + } > > drm_gem_put_pages(&xen_obj->base, > > xen_obj->pages, true, false); > > } > > @@ -174,16 +218,6 @@ struct page **xen_drm_front_gem_get_pages(struct drm_gem_object *gem_obj) > > return xen_obj->pages; > > } > > -struct sg_table *xen_drm_front_gem_get_sg_table(struct drm_gem_object *gem_obj) > > -{ > > - struct xen_gem_object *xen_obj = to_xen_gem_obj(gem_obj); > > - > > - if (!xen_obj->pages) > > - return ERR_PTR(-ENOMEM); > > - > > - return drm_prime_pages_to_sg(xen_obj->pages, xen_obj->num_pages); > > -} > > - > > struct drm_gem_object * > > xen_drm_front_gem_import_sg_table(struct drm_device *dev, > > struct dma_buf_attachment *attach, > > @@ -203,7 +237,7 @@ xen_drm_front_gem_import_sg_table(struct drm_device *dev, > > if (ret < 0) > > return ERR_PTR(ret); > > - xen_obj->sgt_imported = sgt; > > + xen_obj->sgt = sgt; > > ret = drm_prime_sg_to_page_addr_arrays(sgt, xen_obj->pages, > > NULL, xen_obj->num_pages); > _______________________________________________ > dri-devel mailing list > dri-devel@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/dri-devel
On 12/13/18 5:48 PM, Daniel Vetter wrote: > On Thu, Dec 13, 2018 at 12:17:54PM +0200, Oleksandr Andrushchenko wrote: >> Daniel, could you please comment? > Cross-revieweing someone else's stuff would scale better, fair enough > I don't think > I'll get around to anything before next year. I put you on CC explicitly because you had comments on other patch [1] and this one tries to solve the issue raised (I tried to figure out at [2] if this is the way to go, but it seems I have no alternative here). While at it [3] (I hope) addresses your comments and the series just needs your single ack/nack to get in: all the rest ack/r-b are already there. Do you mind looking at it? > -Daniel Thank you very much for your time, Oleksandr >> Thank you >> >> On 11/27/18 12:32 PM, Oleksandr Andrushchenko wrote: >>> From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> >>> >>> When GEM backing storage is allocated with drm_gem_get_pages >>> the backing pages may be cached, thus making it possible that >>> the backend sees only partial content of the buffer which may >>> lead to screen artifacts. Make sure that the frontend's >>> memory is coherent and the backend always sees correct display >>> buffer content. >>> >>> Fixes: c575b7eeb89f ("drm/xen-front: Add support for Xen PV display frontend") >>> >>> Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> >>> --- >>> drivers/gpu/drm/xen/xen_drm_front_gem.c | 62 +++++++++++++++++++------ >>> 1 file changed, 48 insertions(+), 14 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/xen/xen_drm_front_gem.c b/drivers/gpu/drm/xen/xen_drm_front_gem.c >>> index 47ff019d3aef..c592735e49d2 100644 >>> --- a/drivers/gpu/drm/xen/xen_drm_front_gem.c >>> +++ b/drivers/gpu/drm/xen/xen_drm_front_gem.c >>> @@ -33,8 +33,11 @@ struct xen_gem_object { >>> /* set for buffers allocated by the backend */ >>> bool be_alloc; >>> - /* this is for imported PRIME buffer */ >>> - struct sg_table *sgt_imported; >>> + /* >>> + * this is for imported PRIME buffer or the one allocated via >>> + * drm_gem_get_pages. >>> + */ >>> + struct sg_table *sgt; >>> }; >>> static inline struct xen_gem_object * >>> @@ -77,10 +80,21 @@ static struct xen_gem_object *gem_create_obj(struct drm_device *dev, >>> return xen_obj; >>> } >>> +struct sg_table *xen_drm_front_gem_get_sg_table(struct drm_gem_object *gem_obj) >>> +{ >>> + struct xen_gem_object *xen_obj = to_xen_gem_obj(gem_obj); >>> + >>> + if (!xen_obj->pages) >>> + return ERR_PTR(-ENOMEM); >>> + >>> + return drm_prime_pages_to_sg(xen_obj->pages, xen_obj->num_pages); >>> +} >>> + >>> static struct xen_gem_object *gem_create(struct drm_device *dev, size_t size) >>> { >>> struct xen_drm_front_drm_info *drm_info = dev->dev_private; >>> struct xen_gem_object *xen_obj; >>> + struct address_space *mapping; >>> int ret; >>> size = round_up(size, PAGE_SIZE); >>> @@ -113,10 +127,14 @@ static struct xen_gem_object *gem_create(struct drm_device *dev, size_t size) >>> xen_obj->be_alloc = true; >>> return xen_obj; >>> } >>> + >>> /* >>> * need to allocate backing pages now, so we can share those >>> * with the backend >>> */ >>> + mapping = xen_obj->base.filp->f_mapping; >>> + mapping_set_gfp_mask(mapping, GFP_USER | __GFP_DMA32); >>> + >>> xen_obj->num_pages = DIV_ROUND_UP(size, PAGE_SIZE); >>> xen_obj->pages = drm_gem_get_pages(&xen_obj->base); >>> if (IS_ERR_OR_NULL(xen_obj->pages)) { >>> @@ -125,8 +143,27 @@ static struct xen_gem_object *gem_create(struct drm_device *dev, size_t size) >>> goto fail; >>> } >>> + xen_obj->sgt = xen_drm_front_gem_get_sg_table(&xen_obj->base); >>> + if (IS_ERR_OR_NULL(xen_obj->sgt)){ >>> + ret = PTR_ERR(xen_obj->sgt); >>> + xen_obj->sgt = NULL; >>> + goto fail_put_pages; >>> + } >>> + >>> + if (!dma_map_sg(dev->dev, xen_obj->sgt->sgl, xen_obj->sgt->nents, >>> + DMA_BIDIRECTIONAL)) { >>> + ret = -EFAULT; >>> + goto fail_free_sgt; >>> + } >>> + >>> return xen_obj; >>> +fail_free_sgt: >>> + sg_free_table(xen_obj->sgt); >>> + xen_obj->sgt = NULL; >>> +fail_put_pages: >>> + drm_gem_put_pages(&xen_obj->base, xen_obj->pages, true, false); >>> + xen_obj->pages = NULL; >>> fail: >>> DRM_ERROR("Failed to allocate buffer with size %zu\n", size); >>> return ERR_PTR(ret); >>> @@ -149,7 +186,7 @@ void xen_drm_front_gem_free_object_unlocked(struct drm_gem_object *gem_obj) >>> struct xen_gem_object *xen_obj = to_xen_gem_obj(gem_obj); >>> if (xen_obj->base.import_attach) { >>> - drm_prime_gem_destroy(&xen_obj->base, xen_obj->sgt_imported); >>> + drm_prime_gem_destroy(&xen_obj->base, xen_obj->sgt); >>> gem_free_pages_array(xen_obj); >>> } else { >>> if (xen_obj->pages) { >>> @@ -158,6 +195,13 @@ void xen_drm_front_gem_free_object_unlocked(struct drm_gem_object *gem_obj) >>> xen_obj->pages); >>> gem_free_pages_array(xen_obj); >>> } else { >>> + if (xen_obj->sgt) { >>> + dma_unmap_sg(xen_obj->base.dev->dev, >>> + xen_obj->sgt->sgl, >>> + xen_obj->sgt->nents, >>> + DMA_BIDIRECTIONAL); >>> + sg_free_table(xen_obj->sgt); >>> + } >>> drm_gem_put_pages(&xen_obj->base, >>> xen_obj->pages, true, false); >>> } >>> @@ -174,16 +218,6 @@ struct page **xen_drm_front_gem_get_pages(struct drm_gem_object *gem_obj) >>> return xen_obj->pages; >>> } >>> -struct sg_table *xen_drm_front_gem_get_sg_table(struct drm_gem_object *gem_obj) >>> -{ >>> - struct xen_gem_object *xen_obj = to_xen_gem_obj(gem_obj); >>> - >>> - if (!xen_obj->pages) >>> - return ERR_PTR(-ENOMEM); >>> - >>> - return drm_prime_pages_to_sg(xen_obj->pages, xen_obj->num_pages); >>> -} >>> - >>> struct drm_gem_object * >>> xen_drm_front_gem_import_sg_table(struct drm_device *dev, >>> struct dma_buf_attachment *attach, >>> @@ -203,7 +237,7 @@ xen_drm_front_gem_import_sg_table(struct drm_device *dev, >>> if (ret < 0) >>> return ERR_PTR(ret); >>> - xen_obj->sgt_imported = sgt; >>> + xen_obj->sgt = sgt; >>> ret = drm_prime_sg_to_page_addr_arrays(sgt, xen_obj->pages, >>> NULL, xen_obj->num_pages); >> _______________________________________________ >> dri-devel mailing list >> dri-devel@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/dri-devel [1] https://patchwork.kernel.org/patch/10693787/ [2] https://lists.xen.org/archives/html/xen-devel/2018-11/msg02882.html [3] https://patchwork.kernel.org/patch/10705853/
On Fri, Dec 14, 2018 at 09:09:45AM +0200, Oleksandr Andrushchenko wrote: > On 12/13/18 5:48 PM, Daniel Vetter wrote: > > On Thu, Dec 13, 2018 at 12:17:54PM +0200, Oleksandr Andrushchenko wrote: > > > Daniel, could you please comment? > > Cross-revieweing someone else's stuff would scale better, > fair enough > > I don't think > > I'll get around to anything before next year. > > I put you on CC explicitly because you had comments on other patch [1] > > and this one tries to solve the issue raised (I tried to figure out > > at [2] if this is the way to go, but it seems I have no alternative here). > > While at it [3] (I hope) addresses your comments and the series just > > needs your single ack/nack to get in: all the rest ack/r-b are already > > there. Do you mind looking at it? As mentioned, much better if you aim for more per review with others, not just me. And all that dma coherency stuff isn't something a really understand all that well (I just know we have lots of pain). For options maybe work together with Gerd Hoffman or Noralf Tronnes, I think either has some patches pending that also need some review. -Daniel > > > -Daniel > > Thank you very much for your time, > > Oleksandr > > > > Thank you > > > > > > On 11/27/18 12:32 PM, Oleksandr Andrushchenko wrote: > > > > From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> > > > > > > > > When GEM backing storage is allocated with drm_gem_get_pages > > > > the backing pages may be cached, thus making it possible that > > > > the backend sees only partial content of the buffer which may > > > > lead to screen artifacts. Make sure that the frontend's > > > > memory is coherent and the backend always sees correct display > > > > buffer content. > > > > > > > > Fixes: c575b7eeb89f ("drm/xen-front: Add support for Xen PV display frontend") > > > > > > > > Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> > > > > --- > > > > drivers/gpu/drm/xen/xen_drm_front_gem.c | 62 +++++++++++++++++++------ > > > > 1 file changed, 48 insertions(+), 14 deletions(-) > > > > > > > > diff --git a/drivers/gpu/drm/xen/xen_drm_front_gem.c b/drivers/gpu/drm/xen/xen_drm_front_gem.c > > > > index 47ff019d3aef..c592735e49d2 100644 > > > > --- a/drivers/gpu/drm/xen/xen_drm_front_gem.c > > > > +++ b/drivers/gpu/drm/xen/xen_drm_front_gem.c > > > > @@ -33,8 +33,11 @@ struct xen_gem_object { > > > > /* set for buffers allocated by the backend */ > > > > bool be_alloc; > > > > - /* this is for imported PRIME buffer */ > > > > - struct sg_table *sgt_imported; > > > > + /* > > > > + * this is for imported PRIME buffer or the one allocated via > > > > + * drm_gem_get_pages. > > > > + */ > > > > + struct sg_table *sgt; > > > > }; > > > > static inline struct xen_gem_object * > > > > @@ -77,10 +80,21 @@ static struct xen_gem_object *gem_create_obj(struct drm_device *dev, > > > > return xen_obj; > > > > } > > > > +struct sg_table *xen_drm_front_gem_get_sg_table(struct drm_gem_object *gem_obj) > > > > +{ > > > > + struct xen_gem_object *xen_obj = to_xen_gem_obj(gem_obj); > > > > + > > > > + if (!xen_obj->pages) > > > > + return ERR_PTR(-ENOMEM); > > > > + > > > > + return drm_prime_pages_to_sg(xen_obj->pages, xen_obj->num_pages); > > > > +} > > > > + > > > > static struct xen_gem_object *gem_create(struct drm_device *dev, size_t size) > > > > { > > > > struct xen_drm_front_drm_info *drm_info = dev->dev_private; > > > > struct xen_gem_object *xen_obj; > > > > + struct address_space *mapping; > > > > int ret; > > > > size = round_up(size, PAGE_SIZE); > > > > @@ -113,10 +127,14 @@ static struct xen_gem_object *gem_create(struct drm_device *dev, size_t size) > > > > xen_obj->be_alloc = true; > > > > return xen_obj; > > > > } > > > > + > > > > /* > > > > * need to allocate backing pages now, so we can share those > > > > * with the backend > > > > */ > > > > + mapping = xen_obj->base.filp->f_mapping; > > > > + mapping_set_gfp_mask(mapping, GFP_USER | __GFP_DMA32); > > > > + > > > > xen_obj->num_pages = DIV_ROUND_UP(size, PAGE_SIZE); > > > > xen_obj->pages = drm_gem_get_pages(&xen_obj->base); > > > > if (IS_ERR_OR_NULL(xen_obj->pages)) { > > > > @@ -125,8 +143,27 @@ static struct xen_gem_object *gem_create(struct drm_device *dev, size_t size) > > > > goto fail; > > > > } > > > > + xen_obj->sgt = xen_drm_front_gem_get_sg_table(&xen_obj->base); > > > > + if (IS_ERR_OR_NULL(xen_obj->sgt)){ > > > > + ret = PTR_ERR(xen_obj->sgt); > > > > + xen_obj->sgt = NULL; > > > > + goto fail_put_pages; > > > > + } > > > > + > > > > + if (!dma_map_sg(dev->dev, xen_obj->sgt->sgl, xen_obj->sgt->nents, > > > > + DMA_BIDIRECTIONAL)) { > > > > + ret = -EFAULT; > > > > + goto fail_free_sgt; > > > > + } > > > > + > > > > return xen_obj; > > > > +fail_free_sgt: > > > > + sg_free_table(xen_obj->sgt); > > > > + xen_obj->sgt = NULL; > > > > +fail_put_pages: > > > > + drm_gem_put_pages(&xen_obj->base, xen_obj->pages, true, false); > > > > + xen_obj->pages = NULL; > > > > fail: > > > > DRM_ERROR("Failed to allocate buffer with size %zu\n", size); > > > > return ERR_PTR(ret); > > > > @@ -149,7 +186,7 @@ void xen_drm_front_gem_free_object_unlocked(struct drm_gem_object *gem_obj) > > > > struct xen_gem_object *xen_obj = to_xen_gem_obj(gem_obj); > > > > if (xen_obj->base.import_attach) { > > > > - drm_prime_gem_destroy(&xen_obj->base, xen_obj->sgt_imported); > > > > + drm_prime_gem_destroy(&xen_obj->base, xen_obj->sgt); > > > > gem_free_pages_array(xen_obj); > > > > } else { > > > > if (xen_obj->pages) { > > > > @@ -158,6 +195,13 @@ void xen_drm_front_gem_free_object_unlocked(struct drm_gem_object *gem_obj) > > > > xen_obj->pages); > > > > gem_free_pages_array(xen_obj); > > > > } else { > > > > + if (xen_obj->sgt) { > > > > + dma_unmap_sg(xen_obj->base.dev->dev, > > > > + xen_obj->sgt->sgl, > > > > + xen_obj->sgt->nents, > > > > + DMA_BIDIRECTIONAL); > > > > + sg_free_table(xen_obj->sgt); > > > > + } > > > > drm_gem_put_pages(&xen_obj->base, > > > > xen_obj->pages, true, false); > > > > } > > > > @@ -174,16 +218,6 @@ struct page **xen_drm_front_gem_get_pages(struct drm_gem_object *gem_obj) > > > > return xen_obj->pages; > > > > } > > > > -struct sg_table *xen_drm_front_gem_get_sg_table(struct drm_gem_object *gem_obj) > > > > -{ > > > > - struct xen_gem_object *xen_obj = to_xen_gem_obj(gem_obj); > > > > - > > > > - if (!xen_obj->pages) > > > > - return ERR_PTR(-ENOMEM); > > > > - > > > > - return drm_prime_pages_to_sg(xen_obj->pages, xen_obj->num_pages); > > > > -} > > > > - > > > > struct drm_gem_object * > > > > xen_drm_front_gem_import_sg_table(struct drm_device *dev, > > > > struct dma_buf_attachment *attach, > > > > @@ -203,7 +237,7 @@ xen_drm_front_gem_import_sg_table(struct drm_device *dev, > > > > if (ret < 0) > > > > return ERR_PTR(ret); > > > > - xen_obj->sgt_imported = sgt; > > > > + xen_obj->sgt = sgt; > > > > ret = drm_prime_sg_to_page_addr_arrays(sgt, xen_obj->pages, > > > > NULL, xen_obj->num_pages); > > > _______________________________________________ > > > dri-devel mailing list > > > dri-devel@lists.freedesktop.org > > > https://lists.freedesktop.org/mailman/listinfo/dri-devel > > [1] https://patchwork.kernel.org/patch/10693787/ > > [2] https://lists.xen.org/archives/html/xen-devel/2018-11/msg02882.html > > [3] https://patchwork.kernel.org/patch/10705853/ > >
On 12/14/18 10:35 AM, Daniel Vetter wrote: > On Fri, Dec 14, 2018 at 09:09:45AM +0200, Oleksandr Andrushchenko wrote: >> On 12/13/18 5:48 PM, Daniel Vetter wrote: >>> On Thu, Dec 13, 2018 at 12:17:54PM +0200, Oleksandr Andrushchenko wrote: >>>> Daniel, could you please comment? >>> Cross-revieweing someone else's stuff would scale better, >> fair enough >>> I don't think >>> I'll get around to anything before next year. >> I put you on CC explicitly because you had comments on other patch [1] >> >> and this one tries to solve the issue raised (I tried to figure out >> >> at [2] if this is the way to go, but it seems I have no alternative here). >> >> While at it [3] (I hope) addresses your comments and the series just >> >> needs your single ack/nack to get in: all the rest ack/r-b are already >> >> there. Do you mind looking at it? > As mentioned, much better if you aim for more per review with others, not > just me. And all that dma coherency stuff isn't something a really > understand all that well (I just know we have lots of pain). For options > maybe work together with Gerd Hoffman or Noralf Tronnes, I think either > has some patches pending that also need some review. Fair enough, thank you > -Daniel > >>> -Daniel >> Thank you very much for your time, >> >> Oleksandr >> >>>> Thank you >>>> >>>> On 11/27/18 12:32 PM, Oleksandr Andrushchenko wrote: >>>>> From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> >>>>> >>>>> When GEM backing storage is allocated with drm_gem_get_pages >>>>> the backing pages may be cached, thus making it possible that >>>>> the backend sees only partial content of the buffer which may >>>>> lead to screen artifacts. Make sure that the frontend's >>>>> memory is coherent and the backend always sees correct display >>>>> buffer content. >>>>> >>>>> Fixes: c575b7eeb89f ("drm/xen-front: Add support for Xen PV display frontend") >>>>> >>>>> Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> >>>>> --- >>>>> drivers/gpu/drm/xen/xen_drm_front_gem.c | 62 +++++++++++++++++++------ >>>>> 1 file changed, 48 insertions(+), 14 deletions(-) >>>>> >>>>> diff --git a/drivers/gpu/drm/xen/xen_drm_front_gem.c b/drivers/gpu/drm/xen/xen_drm_front_gem.c >>>>> index 47ff019d3aef..c592735e49d2 100644 >>>>> --- a/drivers/gpu/drm/xen/xen_drm_front_gem.c >>>>> +++ b/drivers/gpu/drm/xen/xen_drm_front_gem.c >>>>> @@ -33,8 +33,11 @@ struct xen_gem_object { >>>>> /* set for buffers allocated by the backend */ >>>>> bool be_alloc; >>>>> - /* this is for imported PRIME buffer */ >>>>> - struct sg_table *sgt_imported; >>>>> + /* >>>>> + * this is for imported PRIME buffer or the one allocated via >>>>> + * drm_gem_get_pages. >>>>> + */ >>>>> + struct sg_table *sgt; >>>>> }; >>>>> static inline struct xen_gem_object * >>>>> @@ -77,10 +80,21 @@ static struct xen_gem_object *gem_create_obj(struct drm_device *dev, >>>>> return xen_obj; >>>>> } >>>>> +struct sg_table *xen_drm_front_gem_get_sg_table(struct drm_gem_object *gem_obj) >>>>> +{ >>>>> + struct xen_gem_object *xen_obj = to_xen_gem_obj(gem_obj); >>>>> + >>>>> + if (!xen_obj->pages) >>>>> + return ERR_PTR(-ENOMEM); >>>>> + >>>>> + return drm_prime_pages_to_sg(xen_obj->pages, xen_obj->num_pages); >>>>> +} >>>>> + >>>>> static struct xen_gem_object *gem_create(struct drm_device *dev, size_t size) >>>>> { >>>>> struct xen_drm_front_drm_info *drm_info = dev->dev_private; >>>>> struct xen_gem_object *xen_obj; >>>>> + struct address_space *mapping; >>>>> int ret; >>>>> size = round_up(size, PAGE_SIZE); >>>>> @@ -113,10 +127,14 @@ static struct xen_gem_object *gem_create(struct drm_device *dev, size_t size) >>>>> xen_obj->be_alloc = true; >>>>> return xen_obj; >>>>> } >>>>> + >>>>> /* >>>>> * need to allocate backing pages now, so we can share those >>>>> * with the backend >>>>> */ >>>>> + mapping = xen_obj->base.filp->f_mapping; >>>>> + mapping_set_gfp_mask(mapping, GFP_USER | __GFP_DMA32); >>>>> + >>>>> xen_obj->num_pages = DIV_ROUND_UP(size, PAGE_SIZE); >>>>> xen_obj->pages = drm_gem_get_pages(&xen_obj->base); >>>>> if (IS_ERR_OR_NULL(xen_obj->pages)) { >>>>> @@ -125,8 +143,27 @@ static struct xen_gem_object *gem_create(struct drm_device *dev, size_t size) >>>>> goto fail; >>>>> } >>>>> + xen_obj->sgt = xen_drm_front_gem_get_sg_table(&xen_obj->base); >>>>> + if (IS_ERR_OR_NULL(xen_obj->sgt)){ >>>>> + ret = PTR_ERR(xen_obj->sgt); >>>>> + xen_obj->sgt = NULL; >>>>> + goto fail_put_pages; >>>>> + } >>>>> + >>>>> + if (!dma_map_sg(dev->dev, xen_obj->sgt->sgl, xen_obj->sgt->nents, >>>>> + DMA_BIDIRECTIONAL)) { >>>>> + ret = -EFAULT; >>>>> + goto fail_free_sgt; >>>>> + } >>>>> + >>>>> return xen_obj; >>>>> +fail_free_sgt: >>>>> + sg_free_table(xen_obj->sgt); >>>>> + xen_obj->sgt = NULL; >>>>> +fail_put_pages: >>>>> + drm_gem_put_pages(&xen_obj->base, xen_obj->pages, true, false); >>>>> + xen_obj->pages = NULL; >>>>> fail: >>>>> DRM_ERROR("Failed to allocate buffer with size %zu\n", size); >>>>> return ERR_PTR(ret); >>>>> @@ -149,7 +186,7 @@ void xen_drm_front_gem_free_object_unlocked(struct drm_gem_object *gem_obj) >>>>> struct xen_gem_object *xen_obj = to_xen_gem_obj(gem_obj); >>>>> if (xen_obj->base.import_attach) { >>>>> - drm_prime_gem_destroy(&xen_obj->base, xen_obj->sgt_imported); >>>>> + drm_prime_gem_destroy(&xen_obj->base, xen_obj->sgt); >>>>> gem_free_pages_array(xen_obj); >>>>> } else { >>>>> if (xen_obj->pages) { >>>>> @@ -158,6 +195,13 @@ void xen_drm_front_gem_free_object_unlocked(struct drm_gem_object *gem_obj) >>>>> xen_obj->pages); >>>>> gem_free_pages_array(xen_obj); >>>>> } else { >>>>> + if (xen_obj->sgt) { >>>>> + dma_unmap_sg(xen_obj->base.dev->dev, >>>>> + xen_obj->sgt->sgl, >>>>> + xen_obj->sgt->nents, >>>>> + DMA_BIDIRECTIONAL); >>>>> + sg_free_table(xen_obj->sgt); >>>>> + } >>>>> drm_gem_put_pages(&xen_obj->base, >>>>> xen_obj->pages, true, false); >>>>> } >>>>> @@ -174,16 +218,6 @@ struct page **xen_drm_front_gem_get_pages(struct drm_gem_object *gem_obj) >>>>> return xen_obj->pages; >>>>> } >>>>> -struct sg_table *xen_drm_front_gem_get_sg_table(struct drm_gem_object *gem_obj) >>>>> -{ >>>>> - struct xen_gem_object *xen_obj = to_xen_gem_obj(gem_obj); >>>>> - >>>>> - if (!xen_obj->pages) >>>>> - return ERR_PTR(-ENOMEM); >>>>> - >>>>> - return drm_prime_pages_to_sg(xen_obj->pages, xen_obj->num_pages); >>>>> -} >>>>> - >>>>> struct drm_gem_object * >>>>> xen_drm_front_gem_import_sg_table(struct drm_device *dev, >>>>> struct dma_buf_attachment *attach, >>>>> @@ -203,7 +237,7 @@ xen_drm_front_gem_import_sg_table(struct drm_device *dev, >>>>> if (ret < 0) >>>>> return ERR_PTR(ret); >>>>> - xen_obj->sgt_imported = sgt; >>>>> + xen_obj->sgt = sgt; >>>>> ret = drm_prime_sg_to_page_addr_arrays(sgt, xen_obj->pages, >>>>> NULL, xen_obj->num_pages); >>>> _______________________________________________ >>>> dri-devel mailing list >>>> dri-devel@lists.freedesktop.org >>>> https://lists.freedesktop.org/mailman/listinfo/dri-devel >> [1] https://patchwork.kernel.org/patch/10693787/ >> >> [2] https://lists.xen.org/archives/html/xen-devel/2018-11/msg02882.html >> >> [3] https://patchwork.kernel.org/patch/10705853/ >> >>
Den 27.11.2018 11.32, skrev Oleksandr Andrushchenko: > From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> > > When GEM backing storage is allocated with drm_gem_get_pages > the backing pages may be cached, thus making it possible that > the backend sees only partial content of the buffer which may > lead to screen artifacts. Make sure that the frontend's > memory is coherent and the backend always sees correct display > buffer content. > > Fixes: c575b7eeb89f ("drm/xen-front: Add support for Xen PV display frontend") > > Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> > --- > drivers/gpu/drm/xen/xen_drm_front_gem.c | 62 +++++++++++++++++++------ > 1 file changed, 48 insertions(+), 14 deletions(-) > > diff --git a/drivers/gpu/drm/xen/xen_drm_front_gem.c b/drivers/gpu/drm/xen/xen_drm_front_gem.c > index 47ff019d3aef..c592735e49d2 100644 > --- a/drivers/gpu/drm/xen/xen_drm_front_gem.c > +++ b/drivers/gpu/drm/xen/xen_drm_front_gem.c > @@ -33,8 +33,11 @@ struct xen_gem_object { > /* set for buffers allocated by the backend */ > bool be_alloc; > > - /* this is for imported PRIME buffer */ > - struct sg_table *sgt_imported; > + /* > + * this is for imported PRIME buffer or the one allocated via > + * drm_gem_get_pages. > + */ > + struct sg_table *sgt; > }; > > static inline struct xen_gem_object * > @@ -77,10 +80,21 @@ static struct xen_gem_object *gem_create_obj(struct drm_device *dev, > return xen_obj; > } > > +struct sg_table *xen_drm_front_gem_get_sg_table(struct drm_gem_object *gem_obj) > +{ > + struct xen_gem_object *xen_obj = to_xen_gem_obj(gem_obj); > + > + if (!xen_obj->pages) > + return ERR_PTR(-ENOMEM); > + > + return drm_prime_pages_to_sg(xen_obj->pages, xen_obj->num_pages); > +} > + > static struct xen_gem_object *gem_create(struct drm_device *dev, size_t size) > { > struct xen_drm_front_drm_info *drm_info = dev->dev_private; > struct xen_gem_object *xen_obj; > + struct address_space *mapping; > int ret; > > size = round_up(size, PAGE_SIZE); > @@ -113,10 +127,14 @@ static struct xen_gem_object *gem_create(struct drm_device *dev, size_t size) > xen_obj->be_alloc = true; > return xen_obj; > } > + > /* > * need to allocate backing pages now, so we can share those > * with the backend > */ Let's see if I understand what you're doing: Here you say that the pages should be DMA accessible for devices that can only see 4GB. > + mapping = xen_obj->base.filp->f_mapping; > + mapping_set_gfp_mask(mapping, GFP_USER | __GFP_DMA32); > + > xen_obj->num_pages = DIV_ROUND_UP(size, PAGE_SIZE); > xen_obj->pages = drm_gem_get_pages(&xen_obj->base); > if (IS_ERR_OR_NULL(xen_obj->pages)) { > @@ -125,8 +143,27 @@ static struct xen_gem_object *gem_create(struct drm_device *dev, size_t size) > goto fail; > } > > + xen_obj->sgt = xen_drm_front_gem_get_sg_table(&xen_obj->base); > + if (IS_ERR_OR_NULL(xen_obj->sgt)){ > + ret = PTR_ERR(xen_obj->sgt); > + xen_obj->sgt = NULL; > + goto fail_put_pages; > + } > + > + if (!dma_map_sg(dev->dev, xen_obj->sgt->sgl, xen_obj->sgt->nents, > + DMA_BIDIRECTIONAL)) { Are you using the DMA streaming API as a way to flush the caches? Does this mean that GFP_USER isn't making the buffer coherent? Noralf. > + ret = -EFAULT; > + goto fail_free_sgt; > + } > + > return xen_obj; > > +fail_free_sgt: > + sg_free_table(xen_obj->sgt); > + xen_obj->sgt = NULL; > +fail_put_pages: > + drm_gem_put_pages(&xen_obj->base, xen_obj->pages, true, false); > + xen_obj->pages = NULL; > fail: > DRM_ERROR("Failed to allocate buffer with size %zu\n", size); > return ERR_PTR(ret); > @@ -149,7 +186,7 @@ void xen_drm_front_gem_free_object_unlocked(struct drm_gem_object *gem_obj) > struct xen_gem_object *xen_obj = to_xen_gem_obj(gem_obj); > > if (xen_obj->base.import_attach) { > - drm_prime_gem_destroy(&xen_obj->base, xen_obj->sgt_imported); > + drm_prime_gem_destroy(&xen_obj->base, xen_obj->sgt); > gem_free_pages_array(xen_obj); > } else { > if (xen_obj->pages) { > @@ -158,6 +195,13 @@ void xen_drm_front_gem_free_object_unlocked(struct drm_gem_object *gem_obj) > xen_obj->pages); > gem_free_pages_array(xen_obj); > } else { > + if (xen_obj->sgt) { > + dma_unmap_sg(xen_obj->base.dev->dev, > + xen_obj->sgt->sgl, > + xen_obj->sgt->nents, > + DMA_BIDIRECTIONAL); > + sg_free_table(xen_obj->sgt); > + } > drm_gem_put_pages(&xen_obj->base, > xen_obj->pages, true, false); > } > @@ -174,16 +218,6 @@ struct page **xen_drm_front_gem_get_pages(struct drm_gem_object *gem_obj) > return xen_obj->pages; > } > > -struct sg_table *xen_drm_front_gem_get_sg_table(struct drm_gem_object *gem_obj) > -{ > - struct xen_gem_object *xen_obj = to_xen_gem_obj(gem_obj); > - > - if (!xen_obj->pages) > - return ERR_PTR(-ENOMEM); > - > - return drm_prime_pages_to_sg(xen_obj->pages, xen_obj->num_pages); > -} > - > struct drm_gem_object * > xen_drm_front_gem_import_sg_table(struct drm_device *dev, > struct dma_buf_attachment *attach, > @@ -203,7 +237,7 @@ xen_drm_front_gem_import_sg_table(struct drm_device *dev, > if (ret < 0) > return ERR_PTR(ret); > > - xen_obj->sgt_imported = sgt; > + xen_obj->sgt = sgt; > > ret = drm_prime_sg_to_page_addr_arrays(sgt, xen_obj->pages, > NULL, xen_obj->num_pages);
On 12/18/18 9:20 PM, Noralf Trønnes wrote: > > Den 27.11.2018 11.32, skrev Oleksandr Andrushchenko: >> From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> >> >> When GEM backing storage is allocated with drm_gem_get_pages >> the backing pages may be cached, thus making it possible that >> the backend sees only partial content of the buffer which may >> lead to screen artifacts. Make sure that the frontend's >> memory is coherent and the backend always sees correct display >> buffer content. >> >> Fixes: c575b7eeb89f ("drm/xen-front: Add support for Xen PV display >> frontend") >> >> Signed-off-by: Oleksandr Andrushchenko >> <oleksandr_andrushchenko@epam.com> >> --- >> drivers/gpu/drm/xen/xen_drm_front_gem.c | 62 +++++++++++++++++++------ >> 1 file changed, 48 insertions(+), 14 deletions(-) >> >> diff --git a/drivers/gpu/drm/xen/xen_drm_front_gem.c >> b/drivers/gpu/drm/xen/xen_drm_front_gem.c >> index 47ff019d3aef..c592735e49d2 100644 >> --- a/drivers/gpu/drm/xen/xen_drm_front_gem.c >> +++ b/drivers/gpu/drm/xen/xen_drm_front_gem.c >> @@ -33,8 +33,11 @@ struct xen_gem_object { >> /* set for buffers allocated by the backend */ >> bool be_alloc; >> - /* this is for imported PRIME buffer */ >> - struct sg_table *sgt_imported; >> + /* >> + * this is for imported PRIME buffer or the one allocated via >> + * drm_gem_get_pages. >> + */ >> + struct sg_table *sgt; >> }; >> static inline struct xen_gem_object * >> @@ -77,10 +80,21 @@ static struct xen_gem_object >> *gem_create_obj(struct drm_device *dev, >> return xen_obj; >> } >> +struct sg_table *xen_drm_front_gem_get_sg_table(struct >> drm_gem_object *gem_obj) >> +{ >> + struct xen_gem_object *xen_obj = to_xen_gem_obj(gem_obj); >> + >> + if (!xen_obj->pages) >> + return ERR_PTR(-ENOMEM); >> + >> + return drm_prime_pages_to_sg(xen_obj->pages, xen_obj->num_pages); >> +} >> + >> static struct xen_gem_object *gem_create(struct drm_device *dev, >> size_t size) >> { >> struct xen_drm_front_drm_info *drm_info = dev->dev_private; >> struct xen_gem_object *xen_obj; >> + struct address_space *mapping; >> int ret; >> size = round_up(size, PAGE_SIZE); >> @@ -113,10 +127,14 @@ static struct xen_gem_object *gem_create(struct >> drm_device *dev, size_t size) >> xen_obj->be_alloc = true; >> return xen_obj; >> } >> + >> /* >> * need to allocate backing pages now, so we can share those >> * with the backend >> */ > > > Let's see if I understand what you're doing: > > Here you say that the pages should be DMA accessible for devices that can > only see 4GB. Yes, your understanding is correct. As we are a para-virtualized device we do not have strict requirements for 32-bit DMA. But, via dma-buf export, the buffer we create can be used by real HW, e.g. one can pass-through real HW devices into a guest domain and they can import our buffer (yes, they can be IOMMU backed and other conditions may apply). So, this is why we are limiting to DMA32 here, just to allow more possible use-cases > >> + mapping = xen_obj->base.filp->f_mapping; >> + mapping_set_gfp_mask(mapping, GFP_USER | __GFP_DMA32); >> + >> xen_obj->num_pages = DIV_ROUND_UP(size, PAGE_SIZE); >> xen_obj->pages = drm_gem_get_pages(&xen_obj->base); >> if (IS_ERR_OR_NULL(xen_obj->pages)) { >> @@ -125,8 +143,27 @@ static struct xen_gem_object *gem_create(struct >> drm_device *dev, size_t size) >> goto fail; >> } >> + xen_obj->sgt = xen_drm_front_gem_get_sg_table(&xen_obj->base); >> + if (IS_ERR_OR_NULL(xen_obj->sgt)){ >> + ret = PTR_ERR(xen_obj->sgt); >> + xen_obj->sgt = NULL; >> + goto fail_put_pages; >> + } >> + >> + if (!dma_map_sg(dev->dev, xen_obj->sgt->sgl, xen_obj->sgt->nents, >> + DMA_BIDIRECTIONAL)) { > > > Are you using the DMA streaming API as a way to flush the caches? Yes > Does this mean that GFP_USER isn't making the buffer coherent? No, it didn't help. I had a question [1] if there are any other better way to achieve the same, but didn't have any response yet. So, I implemented it via DMA API which helped. > > Noralf. > >> + ret = -EFAULT; >> + goto fail_free_sgt; >> + } >> + >> return xen_obj; >> +fail_free_sgt: >> + sg_free_table(xen_obj->sgt); >> + xen_obj->sgt = NULL; >> +fail_put_pages: >> + drm_gem_put_pages(&xen_obj->base, xen_obj->pages, true, false); >> + xen_obj->pages = NULL; >> fail: >> DRM_ERROR("Failed to allocate buffer with size %zu\n", size); >> return ERR_PTR(ret); >> @@ -149,7 +186,7 @@ void >> xen_drm_front_gem_free_object_unlocked(struct drm_gem_object *gem_obj) >> struct xen_gem_object *xen_obj = to_xen_gem_obj(gem_obj); >> if (xen_obj->base.import_attach) { >> - drm_prime_gem_destroy(&xen_obj->base, xen_obj->sgt_imported); >> + drm_prime_gem_destroy(&xen_obj->base, xen_obj->sgt); >> gem_free_pages_array(xen_obj); >> } else { >> if (xen_obj->pages) { >> @@ -158,6 +195,13 @@ void >> xen_drm_front_gem_free_object_unlocked(struct drm_gem_object *gem_obj) >> xen_obj->pages); >> gem_free_pages_array(xen_obj); >> } else { >> + if (xen_obj->sgt) { >> + dma_unmap_sg(xen_obj->base.dev->dev, >> + xen_obj->sgt->sgl, >> + xen_obj->sgt->nents, >> + DMA_BIDIRECTIONAL); >> + sg_free_table(xen_obj->sgt); >> + } >> drm_gem_put_pages(&xen_obj->base, >> xen_obj->pages, true, false); >> } >> @@ -174,16 +218,6 @@ struct page **xen_drm_front_gem_get_pages(struct >> drm_gem_object *gem_obj) >> return xen_obj->pages; >> } >> -struct sg_table *xen_drm_front_gem_get_sg_table(struct >> drm_gem_object *gem_obj) >> -{ >> - struct xen_gem_object *xen_obj = to_xen_gem_obj(gem_obj); >> - >> - if (!xen_obj->pages) >> - return ERR_PTR(-ENOMEM); >> - >> - return drm_prime_pages_to_sg(xen_obj->pages, xen_obj->num_pages); >> -} >> - >> struct drm_gem_object * >> xen_drm_front_gem_import_sg_table(struct drm_device *dev, >> struct dma_buf_attachment *attach, >> @@ -203,7 +237,7 @@ xen_drm_front_gem_import_sg_table(struct >> drm_device *dev, >> if (ret < 0) >> return ERR_PTR(ret); >> - xen_obj->sgt_imported = sgt; >> + xen_obj->sgt = sgt; >> ret = drm_prime_sg_to_page_addr_arrays(sgt, xen_obj->pages, >> NULL, xen_obj->num_pages); > _______________________________________________ > dri-devel mailing list > dri-devel@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/dri-devel Thank you, Oleksandr [1] https://www.mail-archive.com/xen-devel@lists.xenproject.org/msg31745.html
Hi, > > > + mapping = xen_obj->base.filp->f_mapping; > > > + mapping_set_gfp_mask(mapping, GFP_USER | __GFP_DMA32); > > Let's see if I understand what you're doing: > > > > Here you say that the pages should be DMA accessible for devices that can > > only see 4GB. > > Yes, your understanding is correct. As we are a para-virtualized device we > do not have strict requirements for 32-bit DMA. But, via dma-buf export, > the buffer we create can be used by real HW, e.g. one can pass-through > real HW devices into a guest domain and they can import our buffer (yes, > they can be IOMMU backed and other conditions may apply). > So, this is why we are limiting to DMA32 here, just to allow more possible > use-cases Sure this actually helps? It's below 4G in guest physical address space, so it can be backed by pages which are actually above 4G in host physical address space ... > > > + if (!dma_map_sg(dev->dev, xen_obj->sgt->sgl, xen_obj->sgt->nents, > > > + DMA_BIDIRECTIONAL)) { > > > > > > Are you using the DMA streaming API as a way to flush the caches? > Yes > > Does this mean that GFP_USER isn't making the buffer coherent? > > No, it didn't help. I had a question [1] if there are any other better way > to achieve the same, but didn't have any response yet. So, I implemented > it via DMA API which helped. set_pages_array_*() ? See arch/x86/include/asm/set_memory.h HTH, Gerd
On 12/19/18 3:14 PM, Gerd Hoffmann wrote: > Hi, > >>>> + mapping = xen_obj->base.filp->f_mapping; >>>> + mapping_set_gfp_mask(mapping, GFP_USER | __GFP_DMA32); >>> Let's see if I understand what you're doing: >>> >>> Here you say that the pages should be DMA accessible for devices that can >>> only see 4GB. >> Yes, your understanding is correct. As we are a para-virtualized device we >> do not have strict requirements for 32-bit DMA. But, via dma-buf export, >> the buffer we create can be used by real HW, e.g. one can pass-through >> real HW devices into a guest domain and they can import our buffer (yes, >> they can be IOMMU backed and other conditions may apply). >> So, this is why we are limiting to DMA32 here, just to allow more possible >> use-cases > Sure this actually helps? It's below 4G in guest physical address > space, so it can be backed by pages which are actually above 4G in host > physical address space ... Yes, you are right here. This is why I wrote about the IOMMU and other conditions. E.g. you can have a device which only expects 32-bit, but thanks to IOMMU it can access pages above 4GiB seamlessly. So, this is why I *hope* that this code *may* help such devices. Do you think I don't need that and have to remove? >>>> + if (!dma_map_sg(dev->dev, xen_obj->sgt->sgl, xen_obj->sgt->nents, >>>> + DMA_BIDIRECTIONAL)) { >>> >>> Are you using the DMA streaming API as a way to flush the caches? >> Yes >>> Does this mean that GFP_USER isn't making the buffer coherent? >> No, it didn't help. I had a question [1] if there are any other better way >> to achieve the same, but didn't have any response yet. So, I implemented >> it via DMA API which helped. > set_pages_array_*() ? > > See arch/x86/include/asm/set_memory.h Well, x86... I am on arm which doesn't define that... > HTH, > Gerd > Thank you, Oleksandr
Hi, > > Sure this actually helps? It's below 4G in guest physical address > > space, so it can be backed by pages which are actually above 4G in host > > physical address space ... > > Yes, you are right here. This is why I wrote about the IOMMU > and other conditions. E.g. you can have a device which only > expects 32-bit, but thanks to IOMMU it can access pages above > 4GiB seamlessly. So, this is why I *hope* that this code *may* help > such devices. Do you think I don't need that and have to remove? I would try without that, and maybe add a runtime option (module parameter) later if it turns out some hardware actually needs that. Devices which can do 32bit DMA only become less and less common these days. > > > > > + if (!dma_map_sg(dev->dev, xen_obj->sgt->sgl, xen_obj->sgt->nents, > > > > > + DMA_BIDIRECTIONAL)) { > > > > > > > > Are you using the DMA streaming API as a way to flush the caches? > > > Yes > > > > Does this mean that GFP_USER isn't making the buffer coherent? > > > No, it didn't help. I had a question [1] if there are any other better way > > > to achieve the same, but didn't have any response yet. So, I implemented > > > it via DMA API which helped. > > set_pages_array_*() ? > > > > See arch/x86/include/asm/set_memory.h > Well, x86... I am on arm which doesn't define that... Oh, arm. Maybe ask on a arm list then. I know on arm you have to care about caching a lot more, but that also is where my knowledge ends ... Using dma_map_sg for cache flushing looks like a sledge hammer approach to me. But maybe it is needed to make xen flush the caches (xen guests have their own dma mapping implementation, right? Or is this different on arm than on x86?). cheers, Gerd
Den 19.12.2018 09.18, skrev Oleksandr Andrushchenko: > On 12/18/18 9:20 PM, Noralf Trønnes wrote: >> >> Den 27.11.2018 11.32, skrev Oleksandr Andrushchenko: >>> From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> >>> >>> When GEM backing storage is allocated with drm_gem_get_pages >>> the backing pages may be cached, thus making it possible that >>> the backend sees only partial content of the buffer which may >>> lead to screen artifacts. Make sure that the frontend's >>> memory is coherent and the backend always sees correct display >>> buffer content. >>> >>> Fixes: c575b7eeb89f ("drm/xen-front: Add support for Xen PV display >>> frontend") >>> >>> Signed-off-by: Oleksandr Andrushchenko >>> <oleksandr_andrushchenko@epam.com> >>> --- >>> drivers/gpu/drm/xen/xen_drm_front_gem.c | 62 >>> +++++++++++++++++++------ >>> 1 file changed, 48 insertions(+), 14 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/xen/xen_drm_front_gem.c >>> b/drivers/gpu/drm/xen/xen_drm_front_gem.c >>> index 47ff019d3aef..c592735e49d2 100644 >>> --- a/drivers/gpu/drm/xen/xen_drm_front_gem.c >>> +++ b/drivers/gpu/drm/xen/xen_drm_front_gem.c >>> @@ -33,8 +33,11 @@ struct xen_gem_object { >>> /* set for buffers allocated by the backend */ >>> bool be_alloc; >>> - /* this is for imported PRIME buffer */ >>> - struct sg_table *sgt_imported; >>> + /* >>> + * this is for imported PRIME buffer or the one allocated via >>> + * drm_gem_get_pages. >>> + */ >>> + struct sg_table *sgt; >>> }; >>> static inline struct xen_gem_object * >>> @@ -77,10 +80,21 @@ static struct xen_gem_object >>> *gem_create_obj(struct drm_device *dev, >>> return xen_obj; >>> } >>> +struct sg_table *xen_drm_front_gem_get_sg_table(struct >>> drm_gem_object *gem_obj) >>> +{ >>> + struct xen_gem_object *xen_obj = to_xen_gem_obj(gem_obj); >>> + >>> + if (!xen_obj->pages) >>> + return ERR_PTR(-ENOMEM); >>> + >>> + return drm_prime_pages_to_sg(xen_obj->pages, xen_obj->num_pages); >>> +} >>> + >>> static struct xen_gem_object *gem_create(struct drm_device *dev, >>> size_t size) >>> { >>> struct xen_drm_front_drm_info *drm_info = dev->dev_private; >>> struct xen_gem_object *xen_obj; >>> + struct address_space *mapping; >>> int ret; >>> size = round_up(size, PAGE_SIZE); >>> @@ -113,10 +127,14 @@ static struct xen_gem_object >>> *gem_create(struct drm_device *dev, size_t size) >>> xen_obj->be_alloc = true; >>> return xen_obj; >>> } >>> + >>> /* >>> * need to allocate backing pages now, so we can share those >>> * with the backend >>> */ >> >> >> Let's see if I understand what you're doing: >> >> Here you say that the pages should be DMA accessible for devices that >> can >> only see 4GB. > > Yes, your understanding is correct. As we are a para-virtualized > device we > > do not have strict requirements for 32-bit DMA. But, via dma-buf export, > > the buffer we create can be used by real HW, e.g. one can pass-through > > real HW devices into a guest domain and they can import our buffer (yes, > > they can be IOMMU backed and other conditions may apply). > > So, this is why we are limiting to DMA32 here, just to allow more > possible > > use-cases > >> >>> + mapping = xen_obj->base.filp->f_mapping; >>> + mapping_set_gfp_mask(mapping, GFP_USER | __GFP_DMA32); >>> + >>> xen_obj->num_pages = DIV_ROUND_UP(size, PAGE_SIZE); >>> xen_obj->pages = drm_gem_get_pages(&xen_obj->base); >>> if (IS_ERR_OR_NULL(xen_obj->pages)) { >>> @@ -125,8 +143,27 @@ static struct xen_gem_object *gem_create(struct >>> drm_device *dev, size_t size) >>> goto fail; >>> } >>> + xen_obj->sgt = xen_drm_front_gem_get_sg_table(&xen_obj->base); >>> + if (IS_ERR_OR_NULL(xen_obj->sgt)){ >>> + ret = PTR_ERR(xen_obj->sgt); >>> + xen_obj->sgt = NULL; >>> + goto fail_put_pages; >>> + } >>> + >>> + if (!dma_map_sg(dev->dev, xen_obj->sgt->sgl, xen_obj->sgt->nents, >>> + DMA_BIDIRECTIONAL)) { >> >> >> Are you using the DMA streaming API as a way to flush the caches? > Yes >> Does this mean that GFP_USER isn't making the buffer coherent? > > No, it didn't help. I had a question [1] if there are any other better > way > > to achieve the same, but didn't have any response yet. So, I implemented > > it via DMA API which helped. As Gerd says asking on the arm list is probably the best way of finding a future proof solution and understanding what's going on. But if you don't get any help there and you end up with the present solution I suggest you add a comment that this is for flushing the caches on arm. With the current code one can be led to believe that the driver uses the dma address somewhere. What about x86, does the problem exist there? I wonder if you can call dma_unmap_sg() right away since the flushing has already happened. That would contain this flushing "hack" inside the gem_create function. I also suggest calling drm_prime_pages_to_sg() directly to increase readability, since the check in xen_drm_front_gem_get_sg_table() isn't necessary for this use case. Noralf. > >> >> Noralf. >> >>> + ret = -EFAULT; >>> + goto fail_free_sgt; >>> + } >>> + >>> return xen_obj; >>> +fail_free_sgt: >>> + sg_free_table(xen_obj->sgt); >>> + xen_obj->sgt = NULL; >>> +fail_put_pages: >>> + drm_gem_put_pages(&xen_obj->base, xen_obj->pages, true, false); >>> + xen_obj->pages = NULL; >>> fail: >>> DRM_ERROR("Failed to allocate buffer with size %zu\n", size); >>> return ERR_PTR(ret); >>> @@ -149,7 +186,7 @@ void >>> xen_drm_front_gem_free_object_unlocked(struct drm_gem_object *gem_obj) >>> struct xen_gem_object *xen_obj = to_xen_gem_obj(gem_obj); >>> if (xen_obj->base.import_attach) { >>> - drm_prime_gem_destroy(&xen_obj->base, xen_obj->sgt_imported); >>> + drm_prime_gem_destroy(&xen_obj->base, xen_obj->sgt); >>> gem_free_pages_array(xen_obj); >>> } else { >>> if (xen_obj->pages) { >>> @@ -158,6 +195,13 @@ void >>> xen_drm_front_gem_free_object_unlocked(struct drm_gem_object *gem_obj) >>> xen_obj->pages); >>> gem_free_pages_array(xen_obj); >>> } else { >>> + if (xen_obj->sgt) { >>> + dma_unmap_sg(xen_obj->base.dev->dev, >>> + xen_obj->sgt->sgl, >>> + xen_obj->sgt->nents, >>> + DMA_BIDIRECTIONAL); >>> + sg_free_table(xen_obj->sgt); >>> + } >>> drm_gem_put_pages(&xen_obj->base, >>> xen_obj->pages, true, false); >>> } >>> @@ -174,16 +218,6 @@ struct page >>> **xen_drm_front_gem_get_pages(struct drm_gem_object *gem_obj) >>> return xen_obj->pages; >>> } >>> -struct sg_table *xen_drm_front_gem_get_sg_table(struct >>> drm_gem_object *gem_obj) >>> -{ >>> - struct xen_gem_object *xen_obj = to_xen_gem_obj(gem_obj); >>> - >>> - if (!xen_obj->pages) >>> - return ERR_PTR(-ENOMEM); >>> - >>> - return drm_prime_pages_to_sg(xen_obj->pages, xen_obj->num_pages); >>> -} >>> - >>> struct drm_gem_object * >>> xen_drm_front_gem_import_sg_table(struct drm_device *dev, >>> struct dma_buf_attachment *attach, >>> @@ -203,7 +237,7 @@ xen_drm_front_gem_import_sg_table(struct >>> drm_device *dev, >>> if (ret < 0) >>> return ERR_PTR(ret); >>> - xen_obj->sgt_imported = sgt; >>> + xen_obj->sgt = sgt; >>> ret = drm_prime_sg_to_page_addr_arrays(sgt, xen_obj->pages, >>> NULL, xen_obj->num_pages); >> _______________________________________________ >> dri-devel mailing list >> dri-devel@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/dri-devel > > Thank you, > > Oleksandr > > [1] > https://www.mail-archive.com/xen-devel@lists.xenproject.org/msg31745.html >
On 12/19/18 4:10 PM, Gerd Hoffmann wrote: > Hi, > >>> Sure this actually helps? It's below 4G in guest physical address >>> space, so it can be backed by pages which are actually above 4G in host >>> physical address space ... >> Yes, you are right here. This is why I wrote about the IOMMU >> and other conditions. E.g. you can have a device which only >> expects 32-bit, but thanks to IOMMU it can access pages above >> 4GiB seamlessly. So, this is why I *hope* that this code *may* help >> such devices. Do you think I don't need that and have to remove? > I would try without that, and maybe add a runtime option (module > parameter) later if it turns out some hardware actually needs that. > Devices which can do 32bit DMA only become less and less common these > days. Good point, will remove then >>>>>> + if (!dma_map_sg(dev->dev, xen_obj->sgt->sgl, xen_obj->sgt->nents, >>>>>> + DMA_BIDIRECTIONAL)) { >>>>> Are you using the DMA streaming API as a way to flush the caches? >>>> Yes >>>>> Does this mean that GFP_USER isn't making the buffer coherent? >>>> No, it didn't help. I had a question [1] if there are any other better way >>>> to achieve the same, but didn't have any response yet. So, I implemented >>>> it via DMA API which helped. >>> set_pages_array_*() ? >>> >>> See arch/x86/include/asm/set_memory.h >> Well, x86... I am on arm which doesn't define that... > Oh, arm. Maybe ask on a arm list then. I know on arm you have to care > about caching a lot more, but that also is where my knowledge ends ... > > Using dma_map_sg for cache flushing looks like a sledge hammer approach > to me. It is. This is why I am so unsure this is way to go > But maybe it is needed to make xen flush the caches (xen guests > have their own dma mapping implementation, right? Or is this different > on arm than on x86?). I'll try to figure out > cheers, > Gerd > Thank you, Oleksandr
On 12/19/18 6:14 PM, Noralf Trønnes wrote: > > Den 19.12.2018 09.18, skrev Oleksandr Andrushchenko: >> On 12/18/18 9:20 PM, Noralf Trønnes wrote: >>> >>> Den 27.11.2018 11.32, skrev Oleksandr Andrushchenko: >>>> From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> >>>> >>>> When GEM backing storage is allocated with drm_gem_get_pages >>>> the backing pages may be cached, thus making it possible that >>>> the backend sees only partial content of the buffer which may >>>> lead to screen artifacts. Make sure that the frontend's >>>> memory is coherent and the backend always sees correct display >>>> buffer content. >>>> >>>> Fixes: c575b7eeb89f ("drm/xen-front: Add support for Xen PV display >>>> frontend") >>>> >>>> Signed-off-by: Oleksandr Andrushchenko >>>> <oleksandr_andrushchenko@epam.com> >>>> --- >>>> drivers/gpu/drm/xen/xen_drm_front_gem.c | 62 >>>> +++++++++++++++++++------ >>>> 1 file changed, 48 insertions(+), 14 deletions(-) >>>> >>>> diff --git a/drivers/gpu/drm/xen/xen_drm_front_gem.c >>>> b/drivers/gpu/drm/xen/xen_drm_front_gem.c >>>> index 47ff019d3aef..c592735e49d2 100644 >>>> --- a/drivers/gpu/drm/xen/xen_drm_front_gem.c >>>> +++ b/drivers/gpu/drm/xen/xen_drm_front_gem.c >>>> @@ -33,8 +33,11 @@ struct xen_gem_object { >>>> /* set for buffers allocated by the backend */ >>>> bool be_alloc; >>>> - /* this is for imported PRIME buffer */ >>>> - struct sg_table *sgt_imported; >>>> + /* >>>> + * this is for imported PRIME buffer or the one allocated via >>>> + * drm_gem_get_pages. >>>> + */ >>>> + struct sg_table *sgt; >>>> }; >>>> static inline struct xen_gem_object * >>>> @@ -77,10 +80,21 @@ static struct xen_gem_object >>>> *gem_create_obj(struct drm_device *dev, >>>> return xen_obj; >>>> } >>>> +struct sg_table *xen_drm_front_gem_get_sg_table(struct >>>> drm_gem_object *gem_obj) >>>> +{ >>>> + struct xen_gem_object *xen_obj = to_xen_gem_obj(gem_obj); >>>> + >>>> + if (!xen_obj->pages) >>>> + return ERR_PTR(-ENOMEM); >>>> + >>>> + return drm_prime_pages_to_sg(xen_obj->pages, xen_obj->num_pages); >>>> +} >>>> + >>>> static struct xen_gem_object *gem_create(struct drm_device *dev, >>>> size_t size) >>>> { >>>> struct xen_drm_front_drm_info *drm_info = dev->dev_private; >>>> struct xen_gem_object *xen_obj; >>>> + struct address_space *mapping; >>>> int ret; >>>> size = round_up(size, PAGE_SIZE); >>>> @@ -113,10 +127,14 @@ static struct xen_gem_object >>>> *gem_create(struct drm_device *dev, size_t size) >>>> xen_obj->be_alloc = true; >>>> return xen_obj; >>>> } >>>> + >>>> /* >>>> * need to allocate backing pages now, so we can share those >>>> * with the backend >>>> */ >>> >>> >>> Let's see if I understand what you're doing: >>> >>> Here you say that the pages should be DMA accessible for devices >>> that can >>> only see 4GB. >> >> Yes, your understanding is correct. As we are a para-virtualized >> device we >> >> do not have strict requirements for 32-bit DMA. But, via dma-buf export, >> >> the buffer we create can be used by real HW, e.g. one can pass-through >> >> real HW devices into a guest domain and they can import our buffer (yes, >> >> they can be IOMMU backed and other conditions may apply). >> >> So, this is why we are limiting to DMA32 here, just to allow more >> possible >> >> use-cases >> >>> >>>> + mapping = xen_obj->base.filp->f_mapping; >>>> + mapping_set_gfp_mask(mapping, GFP_USER | __GFP_DMA32); >>>> + >>>> xen_obj->num_pages = DIV_ROUND_UP(size, PAGE_SIZE); >>>> xen_obj->pages = drm_gem_get_pages(&xen_obj->base); >>>> if (IS_ERR_OR_NULL(xen_obj->pages)) { >>>> @@ -125,8 +143,27 @@ static struct xen_gem_object >>>> *gem_create(struct drm_device *dev, size_t size) >>>> goto fail; >>>> } >>>> + xen_obj->sgt = xen_drm_front_gem_get_sg_table(&xen_obj->base); >>>> + if (IS_ERR_OR_NULL(xen_obj->sgt)){ >>>> + ret = PTR_ERR(xen_obj->sgt); >>>> + xen_obj->sgt = NULL; >>>> + goto fail_put_pages; >>>> + } >>>> + >>>> + if (!dma_map_sg(dev->dev, xen_obj->sgt->sgl, xen_obj->sgt->nents, >>>> + DMA_BIDIRECTIONAL)) { >>> >>> >>> Are you using the DMA streaming API as a way to flush the caches? >> Yes >>> Does this mean that GFP_USER isn't making the buffer coherent? >> >> No, it didn't help. I had a question [1] if there are any other >> better way >> >> to achieve the same, but didn't have any response yet. So, I implemented >> >> it via DMA API which helped. > > As Gerd says asking on the arm list is probably the best way of finding a > future proof solution and understanding what's going on. Yes, it seems so > > But if you don't get any help there and you end up with the present > solution I suggest you add a comment that this is for flushing the caches > on arm. With the current code one can be led to believe that the driver > uses the dma address somewhere. Makes sense > > What about x86, does the problem exist there? > Yes, but there I could do drm_clflush_pages which is not implemented for ARM > I wonder if you can call dma_unmap_sg() right away since the flushing has > already happened. That would contain this flushing "hack" inside the > gem_create function. Yes, I was thinking about this "solution" as well > > I also suggest calling drm_prime_pages_to_sg() directly to increase > readability, since the check in xen_drm_front_gem_get_sg_table() isn't > necessary for this use case. This can be done > > > Noralf. > > >> >>> >>> Noralf. >>> >>>> + ret = -EFAULT; >>>> + goto fail_free_sgt; >>>> + } >>>> + >>>> return xen_obj; >>>> +fail_free_sgt: >>>> + sg_free_table(xen_obj->sgt); >>>> + xen_obj->sgt = NULL; >>>> +fail_put_pages: >>>> + drm_gem_put_pages(&xen_obj->base, xen_obj->pages, true, false); >>>> + xen_obj->pages = NULL; >>>> fail: >>>> DRM_ERROR("Failed to allocate buffer with size %zu\n", size); >>>> return ERR_PTR(ret); >>>> @@ -149,7 +186,7 @@ void >>>> xen_drm_front_gem_free_object_unlocked(struct drm_gem_object *gem_obj) >>>> struct xen_gem_object *xen_obj = to_xen_gem_obj(gem_obj); >>>> if (xen_obj->base.import_attach) { >>>> - drm_prime_gem_destroy(&xen_obj->base, xen_obj->sgt_imported); >>>> + drm_prime_gem_destroy(&xen_obj->base, xen_obj->sgt); >>>> gem_free_pages_array(xen_obj); >>>> } else { >>>> if (xen_obj->pages) { >>>> @@ -158,6 +195,13 @@ void >>>> xen_drm_front_gem_free_object_unlocked(struct drm_gem_object *gem_obj) >>>> xen_obj->pages); >>>> gem_free_pages_array(xen_obj); >>>> } else { >>>> + if (xen_obj->sgt) { >>>> + dma_unmap_sg(xen_obj->base.dev->dev, >>>> + xen_obj->sgt->sgl, >>>> + xen_obj->sgt->nents, >>>> + DMA_BIDIRECTIONAL); >>>> + sg_free_table(xen_obj->sgt); >>>> + } >>>> drm_gem_put_pages(&xen_obj->base, >>>> xen_obj->pages, true, false); >>>> } >>>> @@ -174,16 +218,6 @@ struct page >>>> **xen_drm_front_gem_get_pages(struct drm_gem_object *gem_obj) >>>> return xen_obj->pages; >>>> } >>>> -struct sg_table *xen_drm_front_gem_get_sg_table(struct >>>> drm_gem_object *gem_obj) >>>> -{ >>>> - struct xen_gem_object *xen_obj = to_xen_gem_obj(gem_obj); >>>> - >>>> - if (!xen_obj->pages) >>>> - return ERR_PTR(-ENOMEM); >>>> - >>>> - return drm_prime_pages_to_sg(xen_obj->pages, xen_obj->num_pages); >>>> -} >>>> - >>>> struct drm_gem_object * >>>> xen_drm_front_gem_import_sg_table(struct drm_device *dev, >>>> struct dma_buf_attachment *attach, >>>> @@ -203,7 +237,7 @@ xen_drm_front_gem_import_sg_table(struct >>>> drm_device *dev, >>>> if (ret < 0) >>>> return ERR_PTR(ret); >>>> - xen_obj->sgt_imported = sgt; >>>> + xen_obj->sgt = sgt; >>>> ret = drm_prime_sg_to_page_addr_arrays(sgt, xen_obj->pages, >>>> NULL, xen_obj->num_pages); >>> _______________________________________________ >>> dri-devel mailing list >>> dri-devel@lists.freedesktop.org >>> https://lists.freedesktop.org/mailman/listinfo/dri-devel >> >> Thank you, >> >> Oleksandr >> >> [1] >> https://www.mail-archive.com/xen-devel@lists.xenproject.org/msg31745.html >> Thank you, Oleksandr
On Tue, Dec 18, 2018 at 08:20:22PM +0100, Noralf Trønnes wrote: > > + if (!dma_map_sg(dev->dev, xen_obj->sgt->sgl, xen_obj->sgt->nents, > > + DMA_BIDIRECTIONAL)) { > > > Are you using the DMA streaming API as a way to flush the caches? This looks rather broken. Please send the whole patch series to the iommu list for proper review. > Does this mean that GFP_USER isn't making the buffer coherent? How could GFP_USER make memory coherent in any way?
On Wed, Dec 19, 2018 at 02:14:52PM +0100, Gerd Hoffmann wrote: > > > > > + if (!dma_map_sg(dev->dev, xen_obj->sgt->sgl, xen_obj->sgt->nents, > > > > + DMA_BIDIRECTIONAL)) { > > > > > > > > > Are you using the DMA streaming API as a way to flush the caches? > > Yes > > > Does this mean that GFP_USER isn't making the buffer coherent? > > > > No, it didn't help. I had a question [1] if there are any other better way > > to achieve the same, but didn't have any response yet. So, I implemented > > it via DMA API which helped. > > set_pages_array_*() ? > > See arch/x86/include/asm/set_memory.h That sounds even more bogus, don't go there.
On 12/20/18 5:36 PM, Christoph Hellwig wrote: > On Tue, Dec 18, 2018 at 08:20:22PM +0100, Noralf Trønnes wrote: >>> + if (!dma_map_sg(dev->dev, xen_obj->sgt->sgl, xen_obj->sgt->nents, >>> + DMA_BIDIRECTIONAL)) { >> >> Are you using the DMA streaming API as a way to flush the caches? > This looks rather broken. Please send the whole patch series to > the iommu list for proper review. This is the only patch [1], no series. And at the moment I think there is nothing to review as I am not sure how to deal with those shmem pages: this patch is rather to start a discussion on how shmem pages can be flushed on ARM (the only workaround I have so far is in this patch which uses DMA API). This is where I am looking for some advice, so I can implement the patch the right way. >> Does this mean that GFP_USER isn't making the buffer coherent? > How could GFP_USER make memory coherent in any way? I am no way an expert here, but other DRM drivers allocate buffers from shmem and then use DMA API [2], for example [3] [1] https://patchwork.kernel.org/patch/10700089/ [2] https://elixir.bootlin.com/linux/v4.20-rc7/ident/drm_gem_get_pages [3] https://elixir.bootlin.com/linux/v4.20-rc7/source/drivers/gpu/drm/omapdrm/omap_gem.c#L248
On Thu, Dec 20, 2018 at 05:49:39PM +0200, Oleksandr Andrushchenko wrote: > This is the only patch [1], no series. And at the moment I think > there is nothing to review as I am not sure how to deal with those > shmem pages: this patch is rather to start a discussion on how shmem > pages can be flushed on ARM (the only workaround I have so far is > in this patch which uses DMA API). This is where I am looking for > some advice, so I can implement the patch the right way. shmem is basically page cache. So you need to use the DMA streaming API (dma_map_*) to map it for DMA. You need to make sure no one access the kernel mapping at the same time as you do DMA to it, so the pages should be locked. This is how the normal file system I/O path works.
On Thu, Dec 20, 2018 at 6:39 PM Christoph Hellwig <hch@infradead.org> wrote: > > On Thu, Dec 20, 2018 at 05:49:39PM +0200, Oleksandr Andrushchenko wrote: > > This is the only patch [1], no series. And at the moment I think > > there is nothing to review as I am not sure how to deal with those > > shmem pages: this patch is rather to start a discussion on how shmem > > pages can be flushed on ARM (the only workaround I have so far is > > in this patch which uses DMA API). This is where I am looking for > > some advice, so I can implement the patch the right way. > > shmem is basically page cache. So you need to use the DMA streaming > API (dma_map_*) to map it for DMA. You need to make sure no one > access the kernel mapping at the same time as you do DMA to it, > so the pages should be locked. This is how the normal file system > I/O path works. I wasn't around back then, but afaiui drm uses shmem because that was the only way mm folks let us have swappable memory. We proposed a gemfs a while ago to be able to mix up our own allocator with that, wasn't approved. What we most definitely not want to end up with though is actually streaming dma, because with that all the zero copy buffer sharing tricks become pointless. There's pretty epic amounts of hacks to work around this, I have no idea what's supposed to give here. -Daniel
On Thu, Dec 20, 2018 at 07:29:37PM +0100, Daniel Vetter wrote: > What we most definitely not want to end up with though is actually > streaming dma, because with that all the zero copy buffer sharing > tricks become pointless. There's pretty epic amounts of hacks to work > around this, I have no idea what's supposed to give here. Err, with streaming DMA buffer sharing is trivial. The coherent DMA allocator is what causes all kinds of horrible hacks that can't actually work on various platforms.
On Thu, Dec 20, 2018 at 7:33 PM Christoph Hellwig <hch@infradead.org> wrote: > > On Thu, Dec 20, 2018 at 07:29:37PM +0100, Daniel Vetter wrote: > > What we most definitely not want to end up with though is actually > > streaming dma, because with that all the zero copy buffer sharing > > tricks become pointless. There's pretty epic amounts of hacks to work > > around this, I have no idea what's supposed to give here. > > Err, with streaming DMA buffer sharing is trivial. The coherent DMA > allocator is what causes all kinds of horrible hacks that can't actually > work on various platforms. Hm, I thought the streaming dma api is the one that causes bounce buffers and all that fun. If you're unlucky at least. -Daniel
On Thu, Dec 20, 2018 at 07:35:15PM +0100, Daniel Vetter wrote: > > Err, with streaming DMA buffer sharing is trivial. The coherent DMA > > allocator is what causes all kinds of horrible hacks that can't actually > > work on various platforms. > > Hm, I thought the streaming dma api is the one that causes bounce > buffers and all that fun. If you're unlucky at least. Yes it may. But even if that happens everything will actually work, just slower. While the dma coherent API is simply broken. But if you don't want bounce buffering you need to use the dma noncoherent allocator as proposed here: https://lists.linuxfoundation.org/pipermail/iommu/2018-December/031982.html which combines allocating memory that doesn't need to be bounce buffered with a sharing scheme that can actually work.
On 12/20/18 8:38 PM, Christoph Hellwig wrote: > On Thu, Dec 20, 2018 at 07:35:15PM +0100, Daniel Vetter wrote: >>> Err, with streaming DMA buffer sharing is trivial. The coherent DMA >>> allocator is what causes all kinds of horrible hacks that can't actually >>> work on various platforms. >> Hm, I thought the streaming dma api is the one that causes bounce >> buffers and all that fun. If you're unlucky at least. > Yes it may. But even if that happens everything will actually work, > just slower. While the dma coherent API is simply broken. > > But if you don't want bounce buffering you need to use the dma > noncoherent allocator as proposed here: > > https://lists.linuxfoundation.org/pipermail/iommu/2018-December/031982.html > > which combines allocating memory that doesn't need to be bounce > buffered with a sharing scheme that can actually work. So, the bottom line will be: I can use DMA API for what I need, but: 1. I need to remove GFP_USER 2. No need for DMA32 (so no chance for bouncing to step in) 3. I may need to check if mapping and unmapping of the buffer at once will also help, e.g. no need to have the buffer mapped until it is destroyed Did I get it all right? Thank you, Oleksandr
diff --git a/drivers/gpu/drm/xen/xen_drm_front_gem.c b/drivers/gpu/drm/xen/xen_drm_front_gem.c index 47ff019d3aef..c592735e49d2 100644 --- a/drivers/gpu/drm/xen/xen_drm_front_gem.c +++ b/drivers/gpu/drm/xen/xen_drm_front_gem.c @@ -33,8 +33,11 @@ struct xen_gem_object { /* set for buffers allocated by the backend */ bool be_alloc; - /* this is for imported PRIME buffer */ - struct sg_table *sgt_imported; + /* + * this is for imported PRIME buffer or the one allocated via + * drm_gem_get_pages. + */ + struct sg_table *sgt; }; static inline struct xen_gem_object * @@ -77,10 +80,21 @@ static struct xen_gem_object *gem_create_obj(struct drm_device *dev, return xen_obj; } +struct sg_table *xen_drm_front_gem_get_sg_table(struct drm_gem_object *gem_obj) +{ + struct xen_gem_object *xen_obj = to_xen_gem_obj(gem_obj); + + if (!xen_obj->pages) + return ERR_PTR(-ENOMEM); + + return drm_prime_pages_to_sg(xen_obj->pages, xen_obj->num_pages); +} + static struct xen_gem_object *gem_create(struct drm_device *dev, size_t size) { struct xen_drm_front_drm_info *drm_info = dev->dev_private; struct xen_gem_object *xen_obj; + struct address_space *mapping; int ret; size = round_up(size, PAGE_SIZE); @@ -113,10 +127,14 @@ static struct xen_gem_object *gem_create(struct drm_device *dev, size_t size) xen_obj->be_alloc = true; return xen_obj; } + /* * need to allocate backing pages now, so we can share those * with the backend */ + mapping = xen_obj->base.filp->f_mapping; + mapping_set_gfp_mask(mapping, GFP_USER | __GFP_DMA32); + xen_obj->num_pages = DIV_ROUND_UP(size, PAGE_SIZE); xen_obj->pages = drm_gem_get_pages(&xen_obj->base); if (IS_ERR_OR_NULL(xen_obj->pages)) { @@ -125,8 +143,27 @@ static struct xen_gem_object *gem_create(struct drm_device *dev, size_t size) goto fail; } + xen_obj->sgt = xen_drm_front_gem_get_sg_table(&xen_obj->base); + if (IS_ERR_OR_NULL(xen_obj->sgt)){ + ret = PTR_ERR(xen_obj->sgt); + xen_obj->sgt = NULL; + goto fail_put_pages; + } + + if (!dma_map_sg(dev->dev, xen_obj->sgt->sgl, xen_obj->sgt->nents, + DMA_BIDIRECTIONAL)) { + ret = -EFAULT; + goto fail_free_sgt; + } + return xen_obj; +fail_free_sgt: + sg_free_table(xen_obj->sgt); + xen_obj->sgt = NULL; +fail_put_pages: + drm_gem_put_pages(&xen_obj->base, xen_obj->pages, true, false); + xen_obj->pages = NULL; fail: DRM_ERROR("Failed to allocate buffer with size %zu\n", size); return ERR_PTR(ret); @@ -149,7 +186,7 @@ void xen_drm_front_gem_free_object_unlocked(struct drm_gem_object *gem_obj) struct xen_gem_object *xen_obj = to_xen_gem_obj(gem_obj); if (xen_obj->base.import_attach) { - drm_prime_gem_destroy(&xen_obj->base, xen_obj->sgt_imported); + drm_prime_gem_destroy(&xen_obj->base, xen_obj->sgt); gem_free_pages_array(xen_obj); } else { if (xen_obj->pages) { @@ -158,6 +195,13 @@ void xen_drm_front_gem_free_object_unlocked(struct drm_gem_object *gem_obj) xen_obj->pages); gem_free_pages_array(xen_obj); } else { + if (xen_obj->sgt) { + dma_unmap_sg(xen_obj->base.dev->dev, + xen_obj->sgt->sgl, + xen_obj->sgt->nents, + DMA_BIDIRECTIONAL); + sg_free_table(xen_obj->sgt); + } drm_gem_put_pages(&xen_obj->base, xen_obj->pages, true, false); } @@ -174,16 +218,6 @@ struct page **xen_drm_front_gem_get_pages(struct drm_gem_object *gem_obj) return xen_obj->pages; } -struct sg_table *xen_drm_front_gem_get_sg_table(struct drm_gem_object *gem_obj) -{ - struct xen_gem_object *xen_obj = to_xen_gem_obj(gem_obj); - - if (!xen_obj->pages) - return ERR_PTR(-ENOMEM); - - return drm_prime_pages_to_sg(xen_obj->pages, xen_obj->num_pages); -} - struct drm_gem_object * xen_drm_front_gem_import_sg_table(struct drm_device *dev, struct dma_buf_attachment *attach, @@ -203,7 +237,7 @@ xen_drm_front_gem_import_sg_table(struct drm_device *dev, if (ret < 0) return ERR_PTR(ret); - xen_obj->sgt_imported = sgt; + xen_obj->sgt = sgt; ret = drm_prime_sg_to_page_addr_arrays(sgt, xen_obj->pages, NULL, xen_obj->num_pages);