Message ID | 1499980105-7721-2-git-send-email-Felix.Kuehling@amd.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 17-07-13 05:08 PM, Felix Kuehling wrote: > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > @@ -78,6 +78,8 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, > struct dma_fence **fence); > > int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma); > +int amdgpu_bo_mmap(struct file *filp, struct vm_area_struct *vma, > + struct ttm_bo_device *bdev); > bool amdgpu_ttm_is_bound(struct ttm_tt *ttm); > int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem); > int amdgpu_ttm_recover_gart(struct amdgpu_device *adev); Oops, this is a remnant from the old version that hacked the ttm_vm_ops in amdgpu. I'll remove this before I submit.
On 14/07/17 06:23 AM, Felix Kuehling wrote: > On 17-07-13 05:08 PM, Felix Kuehling wrote: >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h >> @@ -78,6 +78,8 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, >> struct dma_fence **fence); >> >> int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma); >> +int amdgpu_bo_mmap(struct file *filp, struct vm_area_struct *vma, >> + struct ttm_bo_device *bdev); >> bool amdgpu_ttm_is_bound(struct ttm_tt *ttm); >> int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem); >> int amdgpu_ttm_recover_gart(struct amdgpu_device *adev); > Oops, this is a remnant from the old version that hacked the ttm_vm_ops > in amdgpu. I'll remove this before I submit. With that fixed (and possibly excluding driver-private memory types if necessary, per discussion of patch 1), Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
Am 13.07.2017 um 23:08 schrieb Felix Kuehling: > Allows gdb to access contents of user mode mapped VRAM BOs. > > Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 59 +++++++++++++++++++++++++++++++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 2 ++ > 2 files changed, 61 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > index ff5614b..d65551d 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > @@ -1115,6 +1115,64 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, > return ttm_bo_eviction_valuable(bo, place); > } > > +static int amdgpu_ttm_access_vram(struct ttm_buffer_object *bo, > + unsigned long offset, > + void *buf, int len, int write) > +{ > + struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo); > + struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev); > + struct drm_mm_node *nodes = abo->tbo.mem.mm_node; > + uint32_t value = 0; > + int result = 0; > + uint64_t pos; > + unsigned long flags; > + > + while (offset >= (nodes->size << PAGE_SHIFT)) { > + offset -= nodes->size << PAGE_SHIFT; > + ++nodes; > + } > + pos = (nodes->start << PAGE_SHIFT) + offset; This silently assumes that a read would never cross a node boundary, doesn't it? Christian. > + > + while (len && pos < adev->mc.mc_vram_size) { > + uint64_t aligned_pos = pos & ~(uint64_t)3; > + uint32_t bytes = 4 - (pos & 3); > + uint32_t shift = (pos & 3) * 8; > + uint32_t mask = 0xffffffff << shift; > + > + if (len < bytes) { > + mask &= 0xffffffff >> (bytes - len) * 8; > + bytes = len; > + } > + > + spin_lock_irqsave(&adev->mmio_idx_lock, flags); > + WREG32(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000); > + WREG32(mmMM_INDEX_HI, aligned_pos >> 31); > + if (!write || mask != 0xffffffff) > + value = RREG32(mmMM_DATA); > + if (write) { > + value &= ~mask; > + value |= (*(uint32_t *)buf << shift) & mask; > + WREG32(mmMM_DATA, value); > + } > + spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); > + if (!write) { > + value = (value & mask) >> shift; > + memcpy(buf, &value, bytes); > + } > + > + result += bytes; > + buf = (uint8_t *)buf + bytes; > + pos += bytes; > + len -= bytes; > + if (pos >= (nodes->start + nodes->size) << PAGE_SHIFT) { > + ++nodes; > + pos = (nodes->start << PAGE_SHIFT); > + } > + } > + > + return result; > +} > + > static struct ttm_bo_driver amdgpu_bo_driver = { > .ttm_tt_create = &amdgpu_ttm_tt_create, > .ttm_tt_populate = &amdgpu_ttm_tt_populate, > @@ -1130,6 +1188,7 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, > .io_mem_reserve = &amdgpu_ttm_io_mem_reserve, > .io_mem_free = &amdgpu_ttm_io_mem_free, > .io_mem_pfn = amdgpu_ttm_io_mem_pfn, > + .access_vram = &amdgpu_ttm_access_vram > }; > > int amdgpu_ttm_init(struct amdgpu_device *adev) > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > index f137c24..a22e430 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > @@ -78,6 +78,8 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, > struct dma_fence **fence); > > int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma); > +int amdgpu_bo_mmap(struct file *filp, struct vm_area_struct *vma, > + struct ttm_bo_device *bdev); > bool amdgpu_ttm_is_bound(struct ttm_tt *ttm); > int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem); > int amdgpu_ttm_recover_gart(struct amdgpu_device *adev);
On 17-07-14 06:08 AM, Christian König wrote: > Am 13.07.2017 um 23:08 schrieb Felix Kuehling: >> Allows gdb to access contents of user mode mapped VRAM BOs. >> >> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> >> --- >> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 59 >> +++++++++++++++++++++++++++++++++ >> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 2 ++ >> 2 files changed, 61 insertions(+) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c >> index ff5614b..d65551d 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c >> @@ -1115,6 +1115,64 @@ static bool >> amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, >> return ttm_bo_eviction_valuable(bo, place); >> } >> +static int amdgpu_ttm_access_vram(struct ttm_buffer_object *bo, >> + unsigned long offset, >> + void *buf, int len, int write) >> +{ >> + struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo); >> + struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev); >> + struct drm_mm_node *nodes = abo->tbo.mem.mm_node; >> + uint32_t value = 0; >> + int result = 0; >> + uint64_t pos; >> + unsigned long flags; >> + >> + while (offset >= (nodes->size << PAGE_SHIFT)) { >> + offset -= nodes->size << PAGE_SHIFT; >> + ++nodes; >> + } >> + pos = (nodes->start << PAGE_SHIFT) + offset; > > This silently assumes that a read would never cross a node boundary, > doesn't it? It doesn't. See below ... > > Christian. > >> + >> + while (len && pos < adev->mc.mc_vram_size) { >> + uint64_t aligned_pos = pos & ~(uint64_t)3; >> + uint32_t bytes = 4 - (pos & 3); >> + uint32_t shift = (pos & 3) * 8; >> + uint32_t mask = 0xffffffff << shift; >> + >> + if (len < bytes) { >> + mask &= 0xffffffff >> (bytes - len) * 8; >> + bytes = len; >> + } >> + >> + spin_lock_irqsave(&adev->mmio_idx_lock, flags); >> + WREG32(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000); >> + WREG32(mmMM_INDEX_HI, aligned_pos >> 31); >> + if (!write || mask != 0xffffffff) >> + value = RREG32(mmMM_DATA); >> + if (write) { >> + value &= ~mask; >> + value |= (*(uint32_t *)buf << shift) & mask; >> + WREG32(mmMM_DATA, value); >> + } >> + spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); >> + if (!write) { >> + value = (value & mask) >> shift; >> + memcpy(buf, &value, bytes); >> + } >> + >> + result += bytes; >> + buf = (uint8_t *)buf + bytes; >> + pos += bytes; >> + len -= bytes; >> + if (pos >= (nodes->start + nodes->size) << PAGE_SHIFT) { >> + ++nodes; >> + pos = (nodes->start << PAGE_SHIFT); ... Here I handle crossing a node boundary. Yes, I actually added this case to my kfdtest unit test and made sure it works, along with all odd alignments that the code above handles. Regards, Felix >> + } >> + } >> + >> + return result; >> +} >> + >> static struct ttm_bo_driver amdgpu_bo_driver = { >> .ttm_tt_create = &amdgpu_ttm_tt_create, >> .ttm_tt_populate = &amdgpu_ttm_tt_populate, >> @@ -1130,6 +1188,7 @@ static bool >> amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, >> .io_mem_reserve = &amdgpu_ttm_io_mem_reserve, >> .io_mem_free = &amdgpu_ttm_io_mem_free, >> .io_mem_pfn = amdgpu_ttm_io_mem_pfn, >> + .access_vram = &amdgpu_ttm_access_vram >> }; >> int amdgpu_ttm_init(struct amdgpu_device *adev) >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h >> index f137c24..a22e430 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h >> @@ -78,6 +78,8 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, >> struct dma_fence **fence); >> int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma); >> +int amdgpu_bo_mmap(struct file *filp, struct vm_area_struct *vma, >> + struct ttm_bo_device *bdev); >> bool amdgpu_ttm_is_bound(struct ttm_tt *ttm); >> int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct >> ttm_mem_reg *bo_mem); >> int amdgpu_ttm_recover_gart(struct amdgpu_device *adev); > >
Am 14.07.2017 um 21:44 schrieb Felix Kuehling: > On 17-07-14 06:08 AM, Christian König wrote: >> Am 13.07.2017 um 23:08 schrieb Felix Kuehling: >> [SNIP] >>> + result += bytes; >>> + buf = (uint8_t *)buf + bytes; >>> + pos += bytes; >>> + len -= bytes; >>> + if (pos >= (nodes->start + nodes->size) << PAGE_SHIFT) { >>> + ++nodes; >>> + pos = (nodes->start << PAGE_SHIFT); > ... Here I handle crossing a node boundary. Yes, I actually added this > case to my kfdtest unit test and made sure it works, along with all odd > alignments that the code above handles. Ah, I see. Sorry totally missed that chunk. In this case the patch is Acked-by: Christian König <christian.koenig@amd.com> Regards, Christian. > > Regards, > Felix
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index ff5614b..d65551d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1115,6 +1115,64 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, return ttm_bo_eviction_valuable(bo, place); } +static int amdgpu_ttm_access_vram(struct ttm_buffer_object *bo, + unsigned long offset, + void *buf, int len, int write) +{ + struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo); + struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev); + struct drm_mm_node *nodes = abo->tbo.mem.mm_node; + uint32_t value = 0; + int result = 0; + uint64_t pos; + unsigned long flags; + + while (offset >= (nodes->size << PAGE_SHIFT)) { + offset -= nodes->size << PAGE_SHIFT; + ++nodes; + } + pos = (nodes->start << PAGE_SHIFT) + offset; + + while (len && pos < adev->mc.mc_vram_size) { + uint64_t aligned_pos = pos & ~(uint64_t)3; + uint32_t bytes = 4 - (pos & 3); + uint32_t shift = (pos & 3) * 8; + uint32_t mask = 0xffffffff << shift; + + if (len < bytes) { + mask &= 0xffffffff >> (bytes - len) * 8; + bytes = len; + } + + spin_lock_irqsave(&adev->mmio_idx_lock, flags); + WREG32(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000); + WREG32(mmMM_INDEX_HI, aligned_pos >> 31); + if (!write || mask != 0xffffffff) + value = RREG32(mmMM_DATA); + if (write) { + value &= ~mask; + value |= (*(uint32_t *)buf << shift) & mask; + WREG32(mmMM_DATA, value); + } + spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); + if (!write) { + value = (value & mask) >> shift; + memcpy(buf, &value, bytes); + } + + result += bytes; + buf = (uint8_t *)buf + bytes; + pos += bytes; + len -= bytes; + if (pos >= (nodes->start + nodes->size) << PAGE_SHIFT) { + ++nodes; + pos = (nodes->start << PAGE_SHIFT); + } + } + + return result; +} + static struct ttm_bo_driver amdgpu_bo_driver = { .ttm_tt_create = &amdgpu_ttm_tt_create, .ttm_tt_populate = &amdgpu_ttm_tt_populate, @@ -1130,6 +1188,7 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, .io_mem_reserve = &amdgpu_ttm_io_mem_reserve, .io_mem_free = &amdgpu_ttm_io_mem_free, .io_mem_pfn = amdgpu_ttm_io_mem_pfn, + .access_vram = &amdgpu_ttm_access_vram }; int amdgpu_ttm_init(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index f137c24..a22e430 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -78,6 +78,8 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, struct dma_fence **fence); int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma); +int amdgpu_bo_mmap(struct file *filp, struct vm_area_struct *vma, + struct ttm_bo_device *bdev); bool amdgpu_ttm_is_bound(struct ttm_tt *ttm); int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem); int amdgpu_ttm_recover_gart(struct amdgpu_device *adev);
Allows gdb to access contents of user mode mapped VRAM BOs. Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 59 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 2 ++ 2 files changed, 61 insertions(+)