diff mbox

[5/6] drm/amdgpu: save the PD addr before scheduling the job

Message ID 1465991045-2328-5-git-send-email-deathsimple@vodafone.de (mailing list archive)
State New, archived
Headers show

Commit Message

Christian König June 15, 2016, 11:44 a.m. UTC
From: Christian König <christian.koenig@amd.com>

When we pipeline evictions the page directory could already be
moving somewhere else when grab_id is called.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 6 ++----
 2 files changed, 4 insertions(+), 4 deletions(-)

Comments

Chunming Zhou June 16, 2016, 8:33 a.m. UTC | #1
On 2016年06月15日 19:44, Christian König wrote:
> From: Christian König <christian.koenig@amd.com>
>
> When we pipeline evictions the page directory could already be
> moving somewhere else when grab_id is called.
Isn't PD bo protected by job fence?
I think before job fence is signalled, the PD bo is safe, there 
shouldn't be a chance to evict PD bo.

Regards,
David Zhou
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 ++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 6 ++----
>   2 files changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index a3d7d13..850c4dd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -661,6 +661,8 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
>   		}
>   	}
>   
> +	p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);
> +
>   	r = amdgpu_bo_vm_update_pte(p, vm);
>   	if (!r)
>   		amdgpu_cs_sync_rings(p);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index d3e0576..82efb40 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -177,7 +177,6 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
>   		      struct amdgpu_sync *sync, struct fence *fence,
>   		      unsigned *vm_id, uint64_t *vm_pd_addr)
>   {
> -	uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);
>   	struct amdgpu_device *adev = ring->adev;
>   	struct fence *updates = sync->last_vm_update;
>   	struct amdgpu_vm_id *id, *idle;
> @@ -250,7 +249,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
>   		if (atomic64_read(&id->owner) != vm->client_id)
>   			continue;
>   
> -		if (pd_addr != id->pd_gpu_addr)
> +		if (*vm_pd_addr != id->pd_gpu_addr)
>   			continue;
>   
>   		if (!same_ring &&
> @@ -298,14 +297,13 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
>   	fence_put(id->flushed_updates);
>   	id->flushed_updates = fence_get(updates);
>   
> -	id->pd_gpu_addr = pd_addr;
> +	id->pd_gpu_addr = *vm_pd_addr;
>   
>   	list_move_tail(&id->list, &adev->vm_manager.ids_lru);
>   	atomic64_set(&id->owner, vm->client_id);
>   	vm->ids[ring->idx] = id;
>   
>   	*vm_id = id - adev->vm_manager.ids;
> -	*vm_pd_addr = pd_addr;
>   	trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, *vm_pd_addr);
>   
>   error:
Christian König June 16, 2016, 9:52 a.m. UTC | #2
Am 16.06.2016 um 10:33 schrieb zhoucm1:
>
>
> On 2016年06月15日 19:44, Christian König wrote:
>> From: Christian König <christian.koenig@amd.com>
>>
>> When we pipeline evictions the page directory could already be
>> moving somewhere else when grab_id is called.
> Isn't PD bo protected by job fence?
> I think before job fence is signalled, the PD bo is safe, there 
> shouldn't be a chance to evict PD bo.

The crux here is that we start to pipeline BO evictions (we plan them 
but don't execute them immediately).

E.g. the eviction won't happen before the protecting fence is signaled, 
but we have it planned and so the address returned by 
amdgpu_bo_gpu_offset() is already the new one.

Regards,
Christian.

>
> Regards,
> David Zhou
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 ++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 6 ++----
>>   2 files changed, 4 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> index a3d7d13..850c4dd 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> @@ -661,6 +661,8 @@ static int amdgpu_cs_ib_vm_chunk(struct 
>> amdgpu_device *adev,
>>           }
>>       }
>>   +    p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);
>> +
>>       r = amdgpu_bo_vm_update_pte(p, vm);
>>       if (!r)
>>           amdgpu_cs_sync_rings(p);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index d3e0576..82efb40 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -177,7 +177,6 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, 
>> struct amdgpu_ring *ring,
>>                 struct amdgpu_sync *sync, struct fence *fence,
>>                 unsigned *vm_id, uint64_t *vm_pd_addr)
>>   {
>> -    uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);
>>       struct amdgpu_device *adev = ring->adev;
>>       struct fence *updates = sync->last_vm_update;
>>       struct amdgpu_vm_id *id, *idle;
>> @@ -250,7 +249,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, 
>> struct amdgpu_ring *ring,
>>           if (atomic64_read(&id->owner) != vm->client_id)
>>               continue;
>>   -        if (pd_addr != id->pd_gpu_addr)
>> +        if (*vm_pd_addr != id->pd_gpu_addr)
>>               continue;
>>             if (!same_ring &&
>> @@ -298,14 +297,13 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, 
>> struct amdgpu_ring *ring,
>>       fence_put(id->flushed_updates);
>>       id->flushed_updates = fence_get(updates);
>>   -    id->pd_gpu_addr = pd_addr;
>> +    id->pd_gpu_addr = *vm_pd_addr;
>>         list_move_tail(&id->list, &adev->vm_manager.ids_lru);
>>       atomic64_set(&id->owner, vm->client_id);
>>       vm->ids[ring->idx] = id;
>>         *vm_id = id - adev->vm_manager.ids;
>> -    *vm_pd_addr = pd_addr;
>>       trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, *vm_pd_addr);
>>     error:
>
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
Chunming Zhou June 16, 2016, 9:54 a.m. UTC | #3
On 2016年06月16日 17:52, Christian König wrote:
> Am 16.06.2016 um 10:33 schrieb zhoucm1:
>>
>>
>> On 2016年06月15日 19:44, Christian König wrote:
>>> From: Christian König <christian.koenig@amd.com>
>>>
>>> When we pipeline evictions the page directory could already be
>>> moving somewhere else when grab_id is called.
>> Isn't PD bo protected by job fence?
>> I think before job fence is signalled, the PD bo is safe, there 
>> shouldn't be a chance to evict PD bo.
>
> The crux here is that we start to pipeline BO evictions (we plan them 
> but don't execute them immediately).
>
> E.g. the eviction won't happen before the protecting fence is 
> signaled, but we have it planned and so the address returned by 
> amdgpu_bo_gpu_offset() is already the new one.
Thanks for mentioned, I see the code in ttm_bo_handle_move_mem:
     if (bo->mem.mm_node) {
         bo->offset = (bo->mem.start << PAGE_SHIFT) +
             bdev->man[bo->mem.mem_type].gpu_offset;
         bo->cur_placement = bo->mem.placement;
     } else
         bo->offset = 0;

it seems better to update the offset after the moving-fence is 
signalled, add a moving-fence callback?

Regards,
David Zhou
>
> Regards,
> Christian.
>
>>
>> Regards,
>> David Zhou
>>>
>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 ++
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 6 ++----
>>>   2 files changed, 4 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> index a3d7d13..850c4dd 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> @@ -661,6 +661,8 @@ static int amdgpu_cs_ib_vm_chunk(struct 
>>> amdgpu_device *adev,
>>>           }
>>>       }
>>>   +    p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);
>>> +
>>>       r = amdgpu_bo_vm_update_pte(p, vm);
>>>       if (!r)
>>>           amdgpu_cs_sync_rings(p);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> index d3e0576..82efb40 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> @@ -177,7 +177,6 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, 
>>> struct amdgpu_ring *ring,
>>>                 struct amdgpu_sync *sync, struct fence *fence,
>>>                 unsigned *vm_id, uint64_t *vm_pd_addr)
>>>   {
>>> -    uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);
>>>       struct amdgpu_device *adev = ring->adev;
>>>       struct fence *updates = sync->last_vm_update;
>>>       struct amdgpu_vm_id *id, *idle;
>>> @@ -250,7 +249,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, 
>>> struct amdgpu_ring *ring,
>>>           if (atomic64_read(&id->owner) != vm->client_id)
>>>               continue;
>>>   -        if (pd_addr != id->pd_gpu_addr)
>>> +        if (*vm_pd_addr != id->pd_gpu_addr)
>>>               continue;
>>>             if (!same_ring &&
>>> @@ -298,14 +297,13 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, 
>>> struct amdgpu_ring *ring,
>>>       fence_put(id->flushed_updates);
>>>       id->flushed_updates = fence_get(updates);
>>>   -    id->pd_gpu_addr = pd_addr;
>>> +    id->pd_gpu_addr = *vm_pd_addr;
>>>         list_move_tail(&id->list, &adev->vm_manager.ids_lru);
>>>       atomic64_set(&id->owner, vm->client_id);
>>>       vm->ids[ring->idx] = id;
>>>         *vm_id = id - adev->vm_manager.ids;
>>> -    *vm_pd_addr = pd_addr;
>>>       trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, *vm_pd_addr);
>>>     error:
>>
>> _______________________________________________
>> dri-devel mailing list
>> dri-devel@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>
Christian König June 16, 2016, 10:10 a.m. UTC | #4
Am 16.06.2016 um 11:54 schrieb zhoucm1:
>
>
> On 2016年06月16日 17:52, Christian König wrote:
>> Am 16.06.2016 um 10:33 schrieb zhoucm1:
>>>
>>>
>>> On 2016年06月15日 19:44, Christian König wrote:
>>>> From: Christian König <christian.koenig@amd.com>
>>>>
>>>> When we pipeline evictions the page directory could already be
>>>> moving somewhere else when grab_id is called.
>>> Isn't PD bo protected by job fence?
>>> I think before job fence is signalled, the PD bo is safe, there 
>>> shouldn't be a chance to evict PD bo.
>>
>> The crux here is that we start to pipeline BO evictions (we plan them 
>> but don't execute them immediately).
>>
>> E.g. the eviction won't happen before the protecting fence is 
>> signaled, but we have it planned and so the address returned by 
>> amdgpu_bo_gpu_offset() is already the new one.
> Thanks for mentioned, I see the code in ttm_bo_handle_move_mem:
>     if (bo->mem.mm_node) {
>         bo->offset = (bo->mem.start << PAGE_SHIFT) +
>             bdev->man[bo->mem.mem_type].gpu_offset;
>         bo->cur_placement = bo->mem.placement;
>     } else
>         bo->offset = 0;
>
> it seems better to update the offset after the moving-fence is 
> signalled, add a moving-fence callback?

No, when the next operation wants to move the BO back in it needs the 
already updated offset.

All we need to do is to make sure that all offsets are determined when 
the BO is validated into the domain where the submission needs it.

This is actually a requirement for retrieving all buffer offsets anyway 
because an application could request to move the buffer directly after 
making a submission with it.

The only reason we haven't noticed that previously is because 
applications can't affect the PD directly and we didn't pipelined 
evictions (the only other reason for moving a buffer).

I should probably take a look at adding a couple of warning to 
amdgpu_bo_gpu_offset() again.

Regards,
Christian.

>
> Regards,
> David Zhou
>>
>> Regards,
>> Christian.
>>
>>>
>>> Regards,
>>> David Zhou
>>>>
>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>> ---
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 ++
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 6 ++----
>>>>   2 files changed, 4 insertions(+), 4 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>> index a3d7d13..850c4dd 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>> @@ -661,6 +661,8 @@ static int amdgpu_cs_ib_vm_chunk(struct 
>>>> amdgpu_device *adev,
>>>>           }
>>>>       }
>>>>   +    p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);
>>>> +
>>>>       r = amdgpu_bo_vm_update_pte(p, vm);
>>>>       if (!r)
>>>>           amdgpu_cs_sync_rings(p);
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> index d3e0576..82efb40 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> @@ -177,7 +177,6 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, 
>>>> struct amdgpu_ring *ring,
>>>>                 struct amdgpu_sync *sync, struct fence *fence,
>>>>                 unsigned *vm_id, uint64_t *vm_pd_addr)
>>>>   {
>>>> -    uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);
>>>>       struct amdgpu_device *adev = ring->adev;
>>>>       struct fence *updates = sync->last_vm_update;
>>>>       struct amdgpu_vm_id *id, *idle;
>>>> @@ -250,7 +249,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, 
>>>> struct amdgpu_ring *ring,
>>>>           if (atomic64_read(&id->owner) != vm->client_id)
>>>>               continue;
>>>>   -        if (pd_addr != id->pd_gpu_addr)
>>>> +        if (*vm_pd_addr != id->pd_gpu_addr)
>>>>               continue;
>>>>             if (!same_ring &&
>>>> @@ -298,14 +297,13 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, 
>>>> struct amdgpu_ring *ring,
>>>>       fence_put(id->flushed_updates);
>>>>       id->flushed_updates = fence_get(updates);
>>>>   -    id->pd_gpu_addr = pd_addr;
>>>> +    id->pd_gpu_addr = *vm_pd_addr;
>>>>         list_move_tail(&id->list, &adev->vm_manager.ids_lru);
>>>>       atomic64_set(&id->owner, vm->client_id);
>>>>       vm->ids[ring->idx] = id;
>>>>         *vm_id = id - adev->vm_manager.ids;
>>>> -    *vm_pd_addr = pd_addr;
>>>>       trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, *vm_pd_addr);
>>>>     error:
>>>
>>> _______________________________________________
>>> dri-devel mailing list
>>> dri-devel@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>>
>
diff mbox

Patch

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index a3d7d13..850c4dd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -661,6 +661,8 @@  static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
 		}
 	}
 
+	p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);
+
 	r = amdgpu_bo_vm_update_pte(p, vm);
 	if (!r)
 		amdgpu_cs_sync_rings(p);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index d3e0576..82efb40 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -177,7 +177,6 @@  int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 		      struct amdgpu_sync *sync, struct fence *fence,
 		      unsigned *vm_id, uint64_t *vm_pd_addr)
 {
-	uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);
 	struct amdgpu_device *adev = ring->adev;
 	struct fence *updates = sync->last_vm_update;
 	struct amdgpu_vm_id *id, *idle;
@@ -250,7 +249,7 @@  int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 		if (atomic64_read(&id->owner) != vm->client_id)
 			continue;
 
-		if (pd_addr != id->pd_gpu_addr)
+		if (*vm_pd_addr != id->pd_gpu_addr)
 			continue;
 
 		if (!same_ring &&
@@ -298,14 +297,13 @@  int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 	fence_put(id->flushed_updates);
 	id->flushed_updates = fence_get(updates);
 
-	id->pd_gpu_addr = pd_addr;
+	id->pd_gpu_addr = *vm_pd_addr;
 
 	list_move_tail(&id->list, &adev->vm_manager.ids_lru);
 	atomic64_set(&id->owner, vm->client_id);
 	vm->ids[ring->idx] = id;
 
 	*vm_id = id - adev->vm_manager.ids;
-	*vm_pd_addr = pd_addr;
 	trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, *vm_pd_addr);
 
 error: