diff mbox

[2/2] drm/amdgpu: Implement ttm_bo_driver.access_vram callback

Message ID 1499980105-7721-2-git-send-email-Felix.Kuehling@amd.com (mailing list archive)
State New, archived
Headers show

Commit Message

Felix Kuehling July 13, 2017, 9:08 p.m. UTC
Allows gdb to access contents of user mode mapped VRAM BOs.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 59 +++++++++++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h |  2 ++
 2 files changed, 61 insertions(+)

Comments

Felix Kuehling July 13, 2017, 9:23 p.m. UTC | #1
On 17-07-13 05:08 PM, Felix Kuehling wrote:
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> @@ -78,6 +78,8 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>  			struct dma_fence **fence);
>  
>  int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);
> +int amdgpu_bo_mmap(struct file *filp, struct vm_area_struct *vma,
> +		   struct ttm_bo_device *bdev);
>  bool amdgpu_ttm_is_bound(struct ttm_tt *ttm);
>  int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem);
>  int amdgpu_ttm_recover_gart(struct amdgpu_device *adev);
Oops, this is a remnant from the old version that hacked the ttm_vm_ops
in amdgpu. I'll remove this before I submit.
Michel Dänzer July 14, 2017, 3:26 a.m. UTC | #2
On 14/07/17 06:23 AM, Felix Kuehling wrote:
> On 17-07-13 05:08 PM, Felix Kuehling wrote:
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> @@ -78,6 +78,8 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>>  			struct dma_fence **fence);
>>  
>>  int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);
>> +int amdgpu_bo_mmap(struct file *filp, struct vm_area_struct *vma,
>> +		   struct ttm_bo_device *bdev);
>>  bool amdgpu_ttm_is_bound(struct ttm_tt *ttm);
>>  int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem);
>>  int amdgpu_ttm_recover_gart(struct amdgpu_device *adev);
> Oops, this is a remnant from the old version that hacked the ttm_vm_ops
> in amdgpu. I'll remove this before I submit.

With that fixed (and possibly excluding driver-private memory types if
necessary, per discussion of patch 1),

Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
Christian König July 14, 2017, 10:08 a.m. UTC | #3
Am 13.07.2017 um 23:08 schrieb Felix Kuehling:
> Allows gdb to access contents of user mode mapped VRAM BOs.
>
> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 59 +++++++++++++++++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h |  2 ++
>   2 files changed, 61 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index ff5614b..d65551d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -1115,6 +1115,64 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
>   	return ttm_bo_eviction_valuable(bo, place);
>   }
>   
> +static int amdgpu_ttm_access_vram(struct ttm_buffer_object *bo,
> +				  unsigned long offset,
> +				  void *buf, int len, int write)
> +{
> +	struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo);
> +	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
> +	struct drm_mm_node *nodes = abo->tbo.mem.mm_node;
> +	uint32_t value = 0;
> +	int result = 0;
> +	uint64_t pos;
> +	unsigned long flags;
> +
> +	while (offset >= (nodes->size << PAGE_SHIFT)) {
> +		offset -= nodes->size << PAGE_SHIFT;
> +		++nodes;
> +	}
> +	pos = (nodes->start << PAGE_SHIFT) + offset;

This silently assumes that a read would never cross a node boundary, 
doesn't it?

Christian.

> +
> +	while (len && pos < adev->mc.mc_vram_size) {
> +		uint64_t aligned_pos = pos & ~(uint64_t)3;
> +		uint32_t bytes = 4 - (pos & 3);
> +		uint32_t shift = (pos & 3) * 8;
> +		uint32_t mask = 0xffffffff << shift;
> +
> +		if (len < bytes) {
> +			mask &= 0xffffffff >> (bytes - len) * 8;
> +			bytes = len;
> +		}
> +
> +		spin_lock_irqsave(&adev->mmio_idx_lock, flags);
> +		WREG32(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000);
> +		WREG32(mmMM_INDEX_HI, aligned_pos >> 31);
> +		if (!write || mask != 0xffffffff)
> +			value = RREG32(mmMM_DATA);
> +		if (write) {
> +			value &= ~mask;
> +			value |= (*(uint32_t *)buf << shift) & mask;
> +			WREG32(mmMM_DATA, value);
> +		}
> +		spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
> +		if (!write) {
> +			value = (value & mask) >> shift;
> +			memcpy(buf, &value, bytes);
> +		}
> +
> +		result += bytes;
> +		buf = (uint8_t *)buf + bytes;
> +		pos += bytes;
> +		len -= bytes;
> +		if (pos >= (nodes->start + nodes->size) << PAGE_SHIFT) {
> +			++nodes;
> +			pos = (nodes->start << PAGE_SHIFT);
> +		}
> +	}
> +
> +	return result;
> +}
> +
>   static struct ttm_bo_driver amdgpu_bo_driver = {
>   	.ttm_tt_create = &amdgpu_ttm_tt_create,
>   	.ttm_tt_populate = &amdgpu_ttm_tt_populate,
> @@ -1130,6 +1188,7 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
>   	.io_mem_reserve = &amdgpu_ttm_io_mem_reserve,
>   	.io_mem_free = &amdgpu_ttm_io_mem_free,
>   	.io_mem_pfn = amdgpu_ttm_io_mem_pfn,
> +	.access_vram = &amdgpu_ttm_access_vram
>   };
>   
>   int amdgpu_ttm_init(struct amdgpu_device *adev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> index f137c24..a22e430 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> @@ -78,6 +78,8 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>   			struct dma_fence **fence);
>   
>   int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);
> +int amdgpu_bo_mmap(struct file *filp, struct vm_area_struct *vma,
> +		   struct ttm_bo_device *bdev);
>   bool amdgpu_ttm_is_bound(struct ttm_tt *ttm);
>   int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem);
>   int amdgpu_ttm_recover_gart(struct amdgpu_device *adev);
Felix Kuehling July 14, 2017, 7:44 p.m. UTC | #4
On 17-07-14 06:08 AM, Christian König wrote:
> Am 13.07.2017 um 23:08 schrieb Felix Kuehling:
>> Allows gdb to access contents of user mode mapped VRAM BOs.
>>
>> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 59
>> +++++++++++++++++++++++++++++++++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h |  2 ++
>>   2 files changed, 61 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> index ff5614b..d65551d 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> @@ -1115,6 +1115,64 @@ static bool
>> amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
>>       return ttm_bo_eviction_valuable(bo, place);
>>   }
>>   +static int amdgpu_ttm_access_vram(struct ttm_buffer_object *bo,
>> +                  unsigned long offset,
>> +                  void *buf, int len, int write)
>> +{
>> +    struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo);
>> +    struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
>> +    struct drm_mm_node *nodes = abo->tbo.mem.mm_node;
>> +    uint32_t value = 0;
>> +    int result = 0;
>> +    uint64_t pos;
>> +    unsigned long flags;
>> +
>> +    while (offset >= (nodes->size << PAGE_SHIFT)) {
>> +        offset -= nodes->size << PAGE_SHIFT;
>> +        ++nodes;
>> +    }
>> +    pos = (nodes->start << PAGE_SHIFT) + offset;
>
> This silently assumes that a read would never cross a node boundary,
> doesn't it?

It doesn't. See below ...

>
> Christian.
>
>> +
>> +    while (len && pos < adev->mc.mc_vram_size) {
>> +        uint64_t aligned_pos = pos & ~(uint64_t)3;
>> +        uint32_t bytes = 4 - (pos & 3);
>> +        uint32_t shift = (pos & 3) * 8;
>> +        uint32_t mask = 0xffffffff << shift;
>> +
>> +        if (len < bytes) {
>> +            mask &= 0xffffffff >> (bytes - len) * 8;
>> +            bytes = len;
>> +        }
>> +
>> +        spin_lock_irqsave(&adev->mmio_idx_lock, flags);
>> +        WREG32(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000);
>> +        WREG32(mmMM_INDEX_HI, aligned_pos >> 31);
>> +        if (!write || mask != 0xffffffff)
>> +            value = RREG32(mmMM_DATA);
>> +        if (write) {
>> +            value &= ~mask;
>> +            value |= (*(uint32_t *)buf << shift) & mask;
>> +            WREG32(mmMM_DATA, value);
>> +        }
>> +        spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
>> +        if (!write) {
>> +            value = (value & mask) >> shift;
>> +            memcpy(buf, &value, bytes);
>> +        }
>> +
>> +        result += bytes;
>> +        buf = (uint8_t *)buf + bytes;
>> +        pos += bytes;
>> +        len -= bytes;
>> +        if (pos >= (nodes->start + nodes->size) << PAGE_SHIFT) {
>> +            ++nodes;
>> +            pos = (nodes->start << PAGE_SHIFT);

... Here I handle crossing a node boundary. Yes, I actually added this
case to my kfdtest unit test and made sure it works, along with all odd
alignments that the code above handles.

Regards,
  Felix

>> +        }
>> +    }
>> +
>> +    return result;
>> +}
>> +
>>   static struct ttm_bo_driver amdgpu_bo_driver = {
>>       .ttm_tt_create = &amdgpu_ttm_tt_create,
>>       .ttm_tt_populate = &amdgpu_ttm_tt_populate,
>> @@ -1130,6 +1188,7 @@ static bool
>> amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
>>       .io_mem_reserve = &amdgpu_ttm_io_mem_reserve,
>>       .io_mem_free = &amdgpu_ttm_io_mem_free,
>>       .io_mem_pfn = amdgpu_ttm_io_mem_pfn,
>> +    .access_vram = &amdgpu_ttm_access_vram
>>   };
>>     int amdgpu_ttm_init(struct amdgpu_device *adev)
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> index f137c24..a22e430 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> @@ -78,6 +78,8 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>>               struct dma_fence **fence);
>>     int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);
>> +int amdgpu_bo_mmap(struct file *filp, struct vm_area_struct *vma,
>> +           struct ttm_bo_device *bdev);
>>   bool amdgpu_ttm_is_bound(struct ttm_tt *ttm);
>>   int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct
>> ttm_mem_reg *bo_mem);
>>   int amdgpu_ttm_recover_gart(struct amdgpu_device *adev);
>
>
Christian König July 17, 2017, 5:04 p.m. UTC | #5
Am 14.07.2017 um 21:44 schrieb Felix Kuehling:
> On 17-07-14 06:08 AM, Christian König wrote:
>> Am 13.07.2017 um 23:08 schrieb Felix Kuehling:
>> [SNIP]
>>> +        result += bytes;
>>> +        buf = (uint8_t *)buf + bytes;
>>> +        pos += bytes;
>>> +        len -= bytes;
>>> +        if (pos >= (nodes->start + nodes->size) << PAGE_SHIFT) {
>>> +            ++nodes;
>>> +            pos = (nodes->start << PAGE_SHIFT);
> ... Here I handle crossing a node boundary. Yes, I actually added this
> case to my kfdtest unit test and made sure it works, along with all odd
> alignments that the code above handles.

Ah, I see. Sorry totally missed that chunk. In this case the patch is 
Acked-by: Christian König <christian.koenig@amd.com>

Regards,
Christian.

>
> Regards,
>    Felix
diff mbox

Patch

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index ff5614b..d65551d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1115,6 +1115,64 @@  static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
 	return ttm_bo_eviction_valuable(bo, place);
 }
 
+static int amdgpu_ttm_access_vram(struct ttm_buffer_object *bo,
+				  unsigned long offset,
+				  void *buf, int len, int write)
+{
+	struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo);
+	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
+	struct drm_mm_node *nodes = abo->tbo.mem.mm_node;
+	uint32_t value = 0;
+	int result = 0;
+	uint64_t pos;
+	unsigned long flags;
+
+	while (offset >= (nodes->size << PAGE_SHIFT)) {
+		offset -= nodes->size << PAGE_SHIFT;
+		++nodes;
+	}
+	pos = (nodes->start << PAGE_SHIFT) + offset;
+
+	while (len && pos < adev->mc.mc_vram_size) {
+		uint64_t aligned_pos = pos & ~(uint64_t)3;
+		uint32_t bytes = 4 - (pos & 3);
+		uint32_t shift = (pos & 3) * 8;
+		uint32_t mask = 0xffffffff << shift;
+
+		if (len < bytes) {
+			mask &= 0xffffffff >> (bytes - len) * 8;
+			bytes = len;
+		}
+
+		spin_lock_irqsave(&adev->mmio_idx_lock, flags);
+		WREG32(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000);
+		WREG32(mmMM_INDEX_HI, aligned_pos >> 31);
+		if (!write || mask != 0xffffffff)
+			value = RREG32(mmMM_DATA);
+		if (write) {
+			value &= ~mask;
+			value |= (*(uint32_t *)buf << shift) & mask;
+			WREG32(mmMM_DATA, value);
+		}
+		spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
+		if (!write) {
+			value = (value & mask) >> shift;
+			memcpy(buf, &value, bytes);
+		}
+
+		result += bytes;
+		buf = (uint8_t *)buf + bytes;
+		pos += bytes;
+		len -= bytes;
+		if (pos >= (nodes->start + nodes->size) << PAGE_SHIFT) {
+			++nodes;
+			pos = (nodes->start << PAGE_SHIFT);
+		}
+	}
+
+	return result;
+}
+
 static struct ttm_bo_driver amdgpu_bo_driver = {
 	.ttm_tt_create = &amdgpu_ttm_tt_create,
 	.ttm_tt_populate = &amdgpu_ttm_tt_populate,
@@ -1130,6 +1188,7 @@  static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
 	.io_mem_reserve = &amdgpu_ttm_io_mem_reserve,
 	.io_mem_free = &amdgpu_ttm_io_mem_free,
 	.io_mem_pfn = amdgpu_ttm_io_mem_pfn,
+	.access_vram = &amdgpu_ttm_access_vram
 };
 
 int amdgpu_ttm_init(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index f137c24..a22e430 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -78,6 +78,8 @@  int amdgpu_fill_buffer(struct amdgpu_bo *bo,
 			struct dma_fence **fence);
 
 int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);
+int amdgpu_bo_mmap(struct file *filp, struct vm_area_struct *vma,
+		   struct ttm_bo_device *bdev);
 bool amdgpu_ttm_is_bound(struct ttm_tt *ttm);
 int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem);
 int amdgpu_ttm_recover_gart(struct amdgpu_device *adev);