Message ID | 20240614133556.11378-3-tzimmermann@suse.de (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | drm/{ttm,xe}: Improve ttm_bo_vmap() and update xe | expand |
Am 14.06.24 um 15:21 schrieb Thomas Zimmermann: > For each instances of struct iosys_map set up by ttm_bo_vmap(), store > the type of allocation in the instance. Use this information to unmap > the memory in ttm_bo_vunmap(). This change simplifies the unmap code > and puts the complicated logic entirely into the map code. I'm not sure that's a good idea. The mapping information should already be available in the resource and storing it in the iosys_map structures duplicates that information. So we might run into the issue that the resource has changed and so we need a different approach now, but the iosys_map will say that we should unmap things for example. Regards, Christian. > > Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de> > --- > drivers/gpu/drm/ttm/ttm_bo_util.c | 46 +++++++++++++++++++++---------- > 1 file changed, 31 insertions(+), 15 deletions(-) > > diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c > index 0b3f4267130c4..a9df0deff2deb 100644 > --- a/drivers/gpu/drm/ttm/ttm_bo_util.c > +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c > @@ -36,6 +36,7 @@ > #include <drm/ttm/ttm_tt.h> > > #include <drm/drm_cache.h> > +#include <drm/drm_device.h> > > struct ttm_transfer_obj { > struct ttm_buffer_object base; > @@ -479,24 +480,29 @@ int ttm_bo_vmap(struct ttm_buffer_object *bo, struct iosys_map *map) > > if (mem->bus.is_iomem) { > void __iomem *vaddr_iomem; > + u16 alloc_flags; > > - if (mem->bus.addr) > + if (mem->bus.addr) { > vaddr_iomem = (void __iomem *)mem->bus.addr; > - else if (mem->bus.caching == ttm_write_combined) > - vaddr_iomem = ioremap_wc(mem->bus.offset, > - bo->base.size); > + alloc_flags = ttm_bo_map_premapped; > + } else if (mem->bus.caching == ttm_write_combined) { > + vaddr_iomem = ioremap_wc(mem->bus.offset, bo->base.size); > + alloc_flags = ttm_bo_map_iomap; > #ifdef CONFIG_X86 > - else if (mem->bus.caching == ttm_cached) > - vaddr_iomem = ioremap_cache(mem->bus.offset, > - bo->base.size); > + } else if (mem->bus.caching == ttm_cached) { > + vaddr_iomem = ioremap_cache(mem->bus.offset, bo->base.size); > + alloc_flags = ttm_bo_map_iomap; > #endif > - else > + } else { > vaddr_iomem = ioremap(mem->bus.offset, bo->base.size); > + alloc_flags = ttm_bo_map_iomap; > + } > > if (!vaddr_iomem) > return -ENOMEM; > > iosys_map_set_vaddr_iomem(map, vaddr_iomem); > + map->alloc_flags = alloc_flags; > > } else { > struct ttm_operation_ctx ctx = { > @@ -506,6 +512,7 @@ int ttm_bo_vmap(struct ttm_buffer_object *bo, struct iosys_map *map) > struct ttm_tt *ttm = bo->ttm; > pgprot_t prot; > void *vaddr; > + u16 alloc_flags; > > ret = ttm_tt_populate(bo->bdev, ttm, &ctx); > if (ret) > @@ -519,8 +526,10 @@ int ttm_bo_vmap(struct ttm_buffer_object *bo, struct iosys_map *map) > vaddr = vmap(ttm->pages, ttm->num_pages, 0, prot); > if (!vaddr) > return -ENOMEM; > + alloc_flags = ttm_bo_map_vmap; > > iosys_map_set_vaddr(map, vaddr); > + map->alloc_flags = alloc_flags; > } > > return 0; > @@ -537,20 +546,27 @@ EXPORT_SYMBOL(ttm_bo_vmap); > */ > void ttm_bo_vunmap(struct ttm_buffer_object *bo, struct iosys_map *map) > { > - struct ttm_resource *mem = bo->resource; > - > dma_resv_assert_held(bo->base.resv); > > if (iosys_map_is_null(map)) > return; > > - if (!map->is_iomem) > - vunmap(map->vaddr); > - else if (!mem->bus.addr) > + switch (map->alloc_flags) { > + case ttm_bo_map_iomap: > iounmap(map->vaddr_iomem); > - iosys_map_clear(map); > - > + break; > + case ttm_bo_map_vmap: > + vunmap(map->vaddr); > + break; > + case ttm_bo_map_premapped: > + break; > + default: > + drm_err(bo->base.dev, "Unsupported alloc_flags 0x%x\n", map->alloc_flags); > + return; > + } > ttm_mem_io_free(bo->bdev, bo->resource); > + > + iosys_map_clear(map); > } > EXPORT_SYMBOL(ttm_bo_vunmap); >
Hi Am 14.06.24 um 16:31 schrieb Christian König: > Am 14.06.24 um 15:21 schrieb Thomas Zimmermann: >> For each instances of struct iosys_map set up by ttm_bo_vmap(), store >> the type of allocation in the instance. Use this information to unmap >> the memory in ttm_bo_vunmap(). This change simplifies the unmap code >> and puts the complicated logic entirely into the map code. > > I'm not sure that's a good idea. > > The mapping information should already be available in the resource > and storing it in the iosys_map structures duplicates that information. > > So we might run into the issue that the resource has changed and so we > need a different approach now, but the iosys_map will say that we > should unmap things for example. Patches 1 and 2 are only here to make patch 4 (add the kmap special case) work. How can I distinguish between vmap'ed and kmap'ed memory? It's not clear to me, whether there is a benefit from patch 4. The xe driver makes it sound like that, but the rest of the kernel appears to disagree. Best regards Thomas > > Regards, > Christian. > >> >> Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de> >> --- >> drivers/gpu/drm/ttm/ttm_bo_util.c | 46 +++++++++++++++++++++---------- >> 1 file changed, 31 insertions(+), 15 deletions(-) >> >> diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c >> b/drivers/gpu/drm/ttm/ttm_bo_util.c >> index 0b3f4267130c4..a9df0deff2deb 100644 >> --- a/drivers/gpu/drm/ttm/ttm_bo_util.c >> +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c >> @@ -36,6 +36,7 @@ >> #include <drm/ttm/ttm_tt.h> >> #include <drm/drm_cache.h> >> +#include <drm/drm_device.h> >> struct ttm_transfer_obj { >> struct ttm_buffer_object base; >> @@ -479,24 +480,29 @@ int ttm_bo_vmap(struct ttm_buffer_object *bo, >> struct iosys_map *map) >> if (mem->bus.is_iomem) { >> void __iomem *vaddr_iomem; >> + u16 alloc_flags; >> - if (mem->bus.addr) >> + if (mem->bus.addr) { >> vaddr_iomem = (void __iomem *)mem->bus.addr; >> - else if (mem->bus.caching == ttm_write_combined) >> - vaddr_iomem = ioremap_wc(mem->bus.offset, >> - bo->base.size); >> + alloc_flags = ttm_bo_map_premapped; >> + } else if (mem->bus.caching == ttm_write_combined) { >> + vaddr_iomem = ioremap_wc(mem->bus.offset, bo->base.size); >> + alloc_flags = ttm_bo_map_iomap; >> #ifdef CONFIG_X86 >> - else if (mem->bus.caching == ttm_cached) >> - vaddr_iomem = ioremap_cache(mem->bus.offset, >> - bo->base.size); >> + } else if (mem->bus.caching == ttm_cached) { >> + vaddr_iomem = ioremap_cache(mem->bus.offset, >> bo->base.size); >> + alloc_flags = ttm_bo_map_iomap; >> #endif >> - else >> + } else { >> vaddr_iomem = ioremap(mem->bus.offset, bo->base.size); >> + alloc_flags = ttm_bo_map_iomap; >> + } >> if (!vaddr_iomem) >> return -ENOMEM; >> iosys_map_set_vaddr_iomem(map, vaddr_iomem); >> + map->alloc_flags = alloc_flags; >> } else { >> struct ttm_operation_ctx ctx = { >> @@ -506,6 +512,7 @@ int ttm_bo_vmap(struct ttm_buffer_object *bo, >> struct iosys_map *map) >> struct ttm_tt *ttm = bo->ttm; >> pgprot_t prot; >> void *vaddr; >> + u16 alloc_flags; >> ret = ttm_tt_populate(bo->bdev, ttm, &ctx); >> if (ret) >> @@ -519,8 +526,10 @@ int ttm_bo_vmap(struct ttm_buffer_object *bo, >> struct iosys_map *map) >> vaddr = vmap(ttm->pages, ttm->num_pages, 0, prot); >> if (!vaddr) >> return -ENOMEM; >> + alloc_flags = ttm_bo_map_vmap; >> iosys_map_set_vaddr(map, vaddr); >> + map->alloc_flags = alloc_flags; >> } >> return 0; >> @@ -537,20 +546,27 @@ EXPORT_SYMBOL(ttm_bo_vmap); >> */ >> void ttm_bo_vunmap(struct ttm_buffer_object *bo, struct iosys_map >> *map) >> { >> - struct ttm_resource *mem = bo->resource; >> - >> dma_resv_assert_held(bo->base.resv); >> if (iosys_map_is_null(map)) >> return; >> - if (!map->is_iomem) >> - vunmap(map->vaddr); >> - else if (!mem->bus.addr) >> + switch (map->alloc_flags) { >> + case ttm_bo_map_iomap: >> iounmap(map->vaddr_iomem); >> - iosys_map_clear(map); >> - >> + break; >> + case ttm_bo_map_vmap: >> + vunmap(map->vaddr); >> + break; >> + case ttm_bo_map_premapped: >> + break; >> + default: >> + drm_err(bo->base.dev, "Unsupported alloc_flags 0x%x\n", >> map->alloc_flags); >> + return; >> + } >> ttm_mem_io_free(bo->bdev, bo->resource); >> + >> + iosys_map_clear(map); >> } >> EXPORT_SYMBOL(ttm_bo_vunmap); >
Hi, Am 17.06.24 um 14:32 schrieb Thomas Zimmermann: > Hi > > Am 14.06.24 um 16:31 schrieb Christian König: >> Am 14.06.24 um 15:21 schrieb Thomas Zimmermann: >>> For each instances of struct iosys_map set up by ttm_bo_vmap(), store >>> the type of allocation in the instance. Use this information to unmap >>> the memory in ttm_bo_vunmap(). This change simplifies the unmap code >>> and puts the complicated logic entirely into the map code. >> >> I'm not sure that's a good idea. >> >> The mapping information should already be available in the resource >> and storing it in the iosys_map structures duplicates that information. >> >> So we might run into the issue that the resource has changed and so >> we need a different approach now, but the iosys_map will say that we >> should unmap things for example. > > Patches 1 and 2 are only here to make patch 4 (add the kmap special > case) work. How can I distinguish between vmap'ed and kmap'ed memory? > It's not clear to me, whether there is a benefit from patch 4. The xe > driver makes it sound like that, but the rest of the kernel appears to > disagree. Yeah, exactly that's the point. The last time we talked about that we came to the conclusion that the kmap approach of mapping only a single page or range of pages isn't that useful in general. The only use case where you actually need this is the ttm_bo_vm_access_kmap() helper and that is static and internal to TTM. So what exactly is the use case xe tries to handle here? Regards, Christian. > > Best regards > Thomas > >> >> Regards, >> Christian. >> >>> >>> Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de> >>> --- >>> drivers/gpu/drm/ttm/ttm_bo_util.c | 46 >>> +++++++++++++++++++++---------- >>> 1 file changed, 31 insertions(+), 15 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c >>> b/drivers/gpu/drm/ttm/ttm_bo_util.c >>> index 0b3f4267130c4..a9df0deff2deb 100644 >>> --- a/drivers/gpu/drm/ttm/ttm_bo_util.c >>> +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c >>> @@ -36,6 +36,7 @@ >>> #include <drm/ttm/ttm_tt.h> >>> #include <drm/drm_cache.h> >>> +#include <drm/drm_device.h> >>> struct ttm_transfer_obj { >>> struct ttm_buffer_object base; >>> @@ -479,24 +480,29 @@ int ttm_bo_vmap(struct ttm_buffer_object *bo, >>> struct iosys_map *map) >>> if (mem->bus.is_iomem) { >>> void __iomem *vaddr_iomem; >>> + u16 alloc_flags; >>> - if (mem->bus.addr) >>> + if (mem->bus.addr) { >>> vaddr_iomem = (void __iomem *)mem->bus.addr; >>> - else if (mem->bus.caching == ttm_write_combined) >>> - vaddr_iomem = ioremap_wc(mem->bus.offset, >>> - bo->base.size); >>> + alloc_flags = ttm_bo_map_premapped; >>> + } else if (mem->bus.caching == ttm_write_combined) { >>> + vaddr_iomem = ioremap_wc(mem->bus.offset, bo->base.size); >>> + alloc_flags = ttm_bo_map_iomap; >>> #ifdef CONFIG_X86 >>> - else if (mem->bus.caching == ttm_cached) >>> - vaddr_iomem = ioremap_cache(mem->bus.offset, >>> - bo->base.size); >>> + } else if (mem->bus.caching == ttm_cached) { >>> + vaddr_iomem = ioremap_cache(mem->bus.offset, >>> bo->base.size); >>> + alloc_flags = ttm_bo_map_iomap; >>> #endif >>> - else >>> + } else { >>> vaddr_iomem = ioremap(mem->bus.offset, bo->base.size); >>> + alloc_flags = ttm_bo_map_iomap; >>> + } >>> if (!vaddr_iomem) >>> return -ENOMEM; >>> iosys_map_set_vaddr_iomem(map, vaddr_iomem); >>> + map->alloc_flags = alloc_flags; >>> } else { >>> struct ttm_operation_ctx ctx = { >>> @@ -506,6 +512,7 @@ int ttm_bo_vmap(struct ttm_buffer_object *bo, >>> struct iosys_map *map) >>> struct ttm_tt *ttm = bo->ttm; >>> pgprot_t prot; >>> void *vaddr; >>> + u16 alloc_flags; >>> ret = ttm_tt_populate(bo->bdev, ttm, &ctx); >>> if (ret) >>> @@ -519,8 +526,10 @@ int ttm_bo_vmap(struct ttm_buffer_object *bo, >>> struct iosys_map *map) >>> vaddr = vmap(ttm->pages, ttm->num_pages, 0, prot); >>> if (!vaddr) >>> return -ENOMEM; >>> + alloc_flags = ttm_bo_map_vmap; >>> iosys_map_set_vaddr(map, vaddr); >>> + map->alloc_flags = alloc_flags; >>> } >>> return 0; >>> @@ -537,20 +546,27 @@ EXPORT_SYMBOL(ttm_bo_vmap); >>> */ >>> void ttm_bo_vunmap(struct ttm_buffer_object *bo, struct iosys_map >>> *map) >>> { >>> - struct ttm_resource *mem = bo->resource; >>> - >>> dma_resv_assert_held(bo->base.resv); >>> if (iosys_map_is_null(map)) >>> return; >>> - if (!map->is_iomem) >>> - vunmap(map->vaddr); >>> - else if (!mem->bus.addr) >>> + switch (map->alloc_flags) { >>> + case ttm_bo_map_iomap: >>> iounmap(map->vaddr_iomem); >>> - iosys_map_clear(map); >>> - >>> + break; >>> + case ttm_bo_map_vmap: >>> + vunmap(map->vaddr); >>> + break; >>> + case ttm_bo_map_premapped: >>> + break; >>> + default: >>> + drm_err(bo->base.dev, "Unsupported alloc_flags 0x%x\n", >>> map->alloc_flags); >>> + return; >>> + } >>> ttm_mem_io_free(bo->bdev, bo->resource); >>> + >>> + iosys_map_clear(map); >>> } >>> EXPORT_SYMBOL(ttm_bo_vunmap); >> >
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 0b3f4267130c4..a9df0deff2deb 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -36,6 +36,7 @@ #include <drm/ttm/ttm_tt.h> #include <drm/drm_cache.h> +#include <drm/drm_device.h> struct ttm_transfer_obj { struct ttm_buffer_object base; @@ -479,24 +480,29 @@ int ttm_bo_vmap(struct ttm_buffer_object *bo, struct iosys_map *map) if (mem->bus.is_iomem) { void __iomem *vaddr_iomem; + u16 alloc_flags; - if (mem->bus.addr) + if (mem->bus.addr) { vaddr_iomem = (void __iomem *)mem->bus.addr; - else if (mem->bus.caching == ttm_write_combined) - vaddr_iomem = ioremap_wc(mem->bus.offset, - bo->base.size); + alloc_flags = ttm_bo_map_premapped; + } else if (mem->bus.caching == ttm_write_combined) { + vaddr_iomem = ioremap_wc(mem->bus.offset, bo->base.size); + alloc_flags = ttm_bo_map_iomap; #ifdef CONFIG_X86 - else if (mem->bus.caching == ttm_cached) - vaddr_iomem = ioremap_cache(mem->bus.offset, - bo->base.size); + } else if (mem->bus.caching == ttm_cached) { + vaddr_iomem = ioremap_cache(mem->bus.offset, bo->base.size); + alloc_flags = ttm_bo_map_iomap; #endif - else + } else { vaddr_iomem = ioremap(mem->bus.offset, bo->base.size); + alloc_flags = ttm_bo_map_iomap; + } if (!vaddr_iomem) return -ENOMEM; iosys_map_set_vaddr_iomem(map, vaddr_iomem); + map->alloc_flags = alloc_flags; } else { struct ttm_operation_ctx ctx = { @@ -506,6 +512,7 @@ int ttm_bo_vmap(struct ttm_buffer_object *bo, struct iosys_map *map) struct ttm_tt *ttm = bo->ttm; pgprot_t prot; void *vaddr; + u16 alloc_flags; ret = ttm_tt_populate(bo->bdev, ttm, &ctx); if (ret) @@ -519,8 +526,10 @@ int ttm_bo_vmap(struct ttm_buffer_object *bo, struct iosys_map *map) vaddr = vmap(ttm->pages, ttm->num_pages, 0, prot); if (!vaddr) return -ENOMEM; + alloc_flags = ttm_bo_map_vmap; iosys_map_set_vaddr(map, vaddr); + map->alloc_flags = alloc_flags; } return 0; @@ -537,20 +546,27 @@ EXPORT_SYMBOL(ttm_bo_vmap); */ void ttm_bo_vunmap(struct ttm_buffer_object *bo, struct iosys_map *map) { - struct ttm_resource *mem = bo->resource; - dma_resv_assert_held(bo->base.resv); if (iosys_map_is_null(map)) return; - if (!map->is_iomem) - vunmap(map->vaddr); - else if (!mem->bus.addr) + switch (map->alloc_flags) { + case ttm_bo_map_iomap: iounmap(map->vaddr_iomem); - iosys_map_clear(map); - + break; + case ttm_bo_map_vmap: + vunmap(map->vaddr); + break; + case ttm_bo_map_premapped: + break; + default: + drm_err(bo->base.dev, "Unsupported alloc_flags 0x%x\n", map->alloc_flags); + return; + } ttm_mem_io_free(bo->bdev, bo->resource); + + iosys_map_clear(map); } EXPORT_SYMBOL(ttm_bo_vunmap);
For each instances of struct iosys_map set up by ttm_bo_vmap(), store the type of allocation in the instance. Use this information to unmap the memory in ttm_bo_vunmap(). This change simplifies the unmap code and puts the complicated logic entirely into the map code. Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de> --- drivers/gpu/drm/ttm/ttm_bo_util.c | 46 +++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 15 deletions(-)