Message ID | 1465991045-2328-5-git-send-email-deathsimple@vodafone.de (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 2016年06月15日 19:44, Christian König wrote: > From: Christian König <christian.koenig@amd.com> > > When we pipeline evictions the page directory could already be > moving somewhere else when grab_id is called. Isn't PD bo protected by job fence? I think before job fence is signalled, the PD bo is safe, there shouldn't be a chance to evict PD bo. Regards, David Zhou > > Signed-off-by: Christian König <christian.koenig@amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 ++ > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 6 ++---- > 2 files changed, 4 insertions(+), 4 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > index a3d7d13..850c4dd 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > @@ -661,6 +661,8 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, > } > } > > + p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->page_directory); > + > r = amdgpu_bo_vm_update_pte(p, vm); > if (!r) > amdgpu_cs_sync_rings(p); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > index d3e0576..82efb40 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > @@ -177,7 +177,6 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, > struct amdgpu_sync *sync, struct fence *fence, > unsigned *vm_id, uint64_t *vm_pd_addr) > { > - uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory); > struct amdgpu_device *adev = ring->adev; > struct fence *updates = sync->last_vm_update; > struct amdgpu_vm_id *id, *idle; > @@ -250,7 +249,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, > if (atomic64_read(&id->owner) != vm->client_id) > continue; > > - if (pd_addr != id->pd_gpu_addr) > + if (*vm_pd_addr != id->pd_gpu_addr) > continue; > > if (!same_ring && > @@ -298,14 +297,13 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, > fence_put(id->flushed_updates); > id->flushed_updates = fence_get(updates); > > - id->pd_gpu_addr = pd_addr; > + id->pd_gpu_addr = *vm_pd_addr; > > list_move_tail(&id->list, &adev->vm_manager.ids_lru); > atomic64_set(&id->owner, vm->client_id); > vm->ids[ring->idx] = id; > > *vm_id = id - adev->vm_manager.ids; > - *vm_pd_addr = pd_addr; > trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, *vm_pd_addr); > > error:
Am 16.06.2016 um 10:33 schrieb zhoucm1: > > > On 2016年06月15日 19:44, Christian König wrote: >> From: Christian König <christian.koenig@amd.com> >> >> When we pipeline evictions the page directory could already be >> moving somewhere else when grab_id is called. > Isn't PD bo protected by job fence? > I think before job fence is signalled, the PD bo is safe, there > shouldn't be a chance to evict PD bo. The crux here is that we start to pipeline BO evictions (we plan them but don't execute them immediately). E.g. the eviction won't happen before the protecting fence is signaled, but we have it planned and so the address returned by amdgpu_bo_gpu_offset() is already the new one. Regards, Christian. > > Regards, > David Zhou >> >> Signed-off-by: Christian König <christian.koenig@amd.com> >> --- >> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 ++ >> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 6 ++---- >> 2 files changed, 4 insertions(+), 4 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >> index a3d7d13..850c4dd 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >> @@ -661,6 +661,8 @@ static int amdgpu_cs_ib_vm_chunk(struct >> amdgpu_device *adev, >> } >> } >> + p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->page_directory); >> + >> r = amdgpu_bo_vm_update_pte(p, vm); >> if (!r) >> amdgpu_cs_sync_rings(p); >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> index d3e0576..82efb40 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> @@ -177,7 +177,6 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, >> struct amdgpu_ring *ring, >> struct amdgpu_sync *sync, struct fence *fence, >> unsigned *vm_id, uint64_t *vm_pd_addr) >> { >> - uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory); >> struct amdgpu_device *adev = ring->adev; >> struct fence *updates = sync->last_vm_update; >> struct amdgpu_vm_id *id, *idle; >> @@ -250,7 +249,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, >> struct amdgpu_ring *ring, >> if (atomic64_read(&id->owner) != vm->client_id) >> continue; >> - if (pd_addr != id->pd_gpu_addr) >> + if (*vm_pd_addr != id->pd_gpu_addr) >> continue; >> if (!same_ring && >> @@ -298,14 +297,13 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, >> struct amdgpu_ring *ring, >> fence_put(id->flushed_updates); >> id->flushed_updates = fence_get(updates); >> - id->pd_gpu_addr = pd_addr; >> + id->pd_gpu_addr = *vm_pd_addr; >> list_move_tail(&id->list, &adev->vm_manager.ids_lru); >> atomic64_set(&id->owner, vm->client_id); >> vm->ids[ring->idx] = id; >> *vm_id = id - adev->vm_manager.ids; >> - *vm_pd_addr = pd_addr; >> trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, *vm_pd_addr); >> error: > > _______________________________________________ > dri-devel mailing list > dri-devel@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/dri-devel
On 2016年06月16日 17:52, Christian König wrote: > Am 16.06.2016 um 10:33 schrieb zhoucm1: >> >> >> On 2016年06月15日 19:44, Christian König wrote: >>> From: Christian König <christian.koenig@amd.com> >>> >>> When we pipeline evictions the page directory could already be >>> moving somewhere else when grab_id is called. >> Isn't PD bo protected by job fence? >> I think before job fence is signalled, the PD bo is safe, there >> shouldn't be a chance to evict PD bo. > > The crux here is that we start to pipeline BO evictions (we plan them > but don't execute them immediately). > > E.g. the eviction won't happen before the protecting fence is > signaled, but we have it planned and so the address returned by > amdgpu_bo_gpu_offset() is already the new one. Thanks for mentioned, I see the code in ttm_bo_handle_move_mem: if (bo->mem.mm_node) { bo->offset = (bo->mem.start << PAGE_SHIFT) + bdev->man[bo->mem.mem_type].gpu_offset; bo->cur_placement = bo->mem.placement; } else bo->offset = 0; it seems better to update the offset after the moving-fence is signalled, add a moving-fence callback? Regards, David Zhou > > Regards, > Christian. > >> >> Regards, >> David Zhou >>> >>> Signed-off-by: Christian König <christian.koenig@amd.com> >>> --- >>> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 ++ >>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 6 ++---- >>> 2 files changed, 4 insertions(+), 4 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >>> index a3d7d13..850c4dd 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >>> @@ -661,6 +661,8 @@ static int amdgpu_cs_ib_vm_chunk(struct >>> amdgpu_device *adev, >>> } >>> } >>> + p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->page_directory); >>> + >>> r = amdgpu_bo_vm_update_pte(p, vm); >>> if (!r) >>> amdgpu_cs_sync_rings(p); >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>> index d3e0576..82efb40 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>> @@ -177,7 +177,6 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, >>> struct amdgpu_ring *ring, >>> struct amdgpu_sync *sync, struct fence *fence, >>> unsigned *vm_id, uint64_t *vm_pd_addr) >>> { >>> - uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory); >>> struct amdgpu_device *adev = ring->adev; >>> struct fence *updates = sync->last_vm_update; >>> struct amdgpu_vm_id *id, *idle; >>> @@ -250,7 +249,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, >>> struct amdgpu_ring *ring, >>> if (atomic64_read(&id->owner) != vm->client_id) >>> continue; >>> - if (pd_addr != id->pd_gpu_addr) >>> + if (*vm_pd_addr != id->pd_gpu_addr) >>> continue; >>> if (!same_ring && >>> @@ -298,14 +297,13 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, >>> struct amdgpu_ring *ring, >>> fence_put(id->flushed_updates); >>> id->flushed_updates = fence_get(updates); >>> - id->pd_gpu_addr = pd_addr; >>> + id->pd_gpu_addr = *vm_pd_addr; >>> list_move_tail(&id->list, &adev->vm_manager.ids_lru); >>> atomic64_set(&id->owner, vm->client_id); >>> vm->ids[ring->idx] = id; >>> *vm_id = id - adev->vm_manager.ids; >>> - *vm_pd_addr = pd_addr; >>> trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, *vm_pd_addr); >>> error: >> >> _______________________________________________ >> dri-devel mailing list >> dri-devel@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/dri-devel >
Am 16.06.2016 um 11:54 schrieb zhoucm1: > > > On 2016年06月16日 17:52, Christian König wrote: >> Am 16.06.2016 um 10:33 schrieb zhoucm1: >>> >>> >>> On 2016年06月15日 19:44, Christian König wrote: >>>> From: Christian König <christian.koenig@amd.com> >>>> >>>> When we pipeline evictions the page directory could already be >>>> moving somewhere else when grab_id is called. >>> Isn't PD bo protected by job fence? >>> I think before job fence is signalled, the PD bo is safe, there >>> shouldn't be a chance to evict PD bo. >> >> The crux here is that we start to pipeline BO evictions (we plan them >> but don't execute them immediately). >> >> E.g. the eviction won't happen before the protecting fence is >> signaled, but we have it planned and so the address returned by >> amdgpu_bo_gpu_offset() is already the new one. > Thanks for mentioned, I see the code in ttm_bo_handle_move_mem: > if (bo->mem.mm_node) { > bo->offset = (bo->mem.start << PAGE_SHIFT) + > bdev->man[bo->mem.mem_type].gpu_offset; > bo->cur_placement = bo->mem.placement; > } else > bo->offset = 0; > > it seems better to update the offset after the moving-fence is > signalled, add a moving-fence callback? No, when the next operation wants to move the BO back in it needs the already updated offset. All we need to do is to make sure that all offsets are determined when the BO is validated into the domain where the submission needs it. This is actually a requirement for retrieving all buffer offsets anyway because an application could request to move the buffer directly after making a submission with it. The only reason we haven't noticed that previously is because applications can't affect the PD directly and we didn't pipelined evictions (the only other reason for moving a buffer). I should probably take a look at adding a couple of warning to amdgpu_bo_gpu_offset() again. Regards, Christian. > > Regards, > David Zhou >> >> Regards, >> Christian. >> >>> >>> Regards, >>> David Zhou >>>> >>>> Signed-off-by: Christian König <christian.koenig@amd.com> >>>> --- >>>> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 ++ >>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 6 ++---- >>>> 2 files changed, 4 insertions(+), 4 deletions(-) >>>> >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >>>> index a3d7d13..850c4dd 100644 >>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >>>> @@ -661,6 +661,8 @@ static int amdgpu_cs_ib_vm_chunk(struct >>>> amdgpu_device *adev, >>>> } >>>> } >>>> + p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->page_directory); >>>> + >>>> r = amdgpu_bo_vm_update_pte(p, vm); >>>> if (!r) >>>> amdgpu_cs_sync_rings(p); >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>>> index d3e0576..82efb40 100644 >>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>>> @@ -177,7 +177,6 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, >>>> struct amdgpu_ring *ring, >>>> struct amdgpu_sync *sync, struct fence *fence, >>>> unsigned *vm_id, uint64_t *vm_pd_addr) >>>> { >>>> - uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory); >>>> struct amdgpu_device *adev = ring->adev; >>>> struct fence *updates = sync->last_vm_update; >>>> struct amdgpu_vm_id *id, *idle; >>>> @@ -250,7 +249,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, >>>> struct amdgpu_ring *ring, >>>> if (atomic64_read(&id->owner) != vm->client_id) >>>> continue; >>>> - if (pd_addr != id->pd_gpu_addr) >>>> + if (*vm_pd_addr != id->pd_gpu_addr) >>>> continue; >>>> if (!same_ring && >>>> @@ -298,14 +297,13 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, >>>> struct amdgpu_ring *ring, >>>> fence_put(id->flushed_updates); >>>> id->flushed_updates = fence_get(updates); >>>> - id->pd_gpu_addr = pd_addr; >>>> + id->pd_gpu_addr = *vm_pd_addr; >>>> list_move_tail(&id->list, &adev->vm_manager.ids_lru); >>>> atomic64_set(&id->owner, vm->client_id); >>>> vm->ids[ring->idx] = id; >>>> *vm_id = id - adev->vm_manager.ids; >>>> - *vm_pd_addr = pd_addr; >>>> trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, *vm_pd_addr); >>>> error: >>> >>> _______________________________________________ >>> dri-devel mailing list >>> dri-devel@lists.freedesktop.org >>> https://lists.freedesktop.org/mailman/listinfo/dri-devel >> >
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index a3d7d13..850c4dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -661,6 +661,8 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, } } + p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->page_directory); + r = amdgpu_bo_vm_update_pte(p, vm); if (!r) amdgpu_cs_sync_rings(p); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index d3e0576..82efb40 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -177,7 +177,6 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, struct amdgpu_sync *sync, struct fence *fence, unsigned *vm_id, uint64_t *vm_pd_addr) { - uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory); struct amdgpu_device *adev = ring->adev; struct fence *updates = sync->last_vm_update; struct amdgpu_vm_id *id, *idle; @@ -250,7 +249,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, if (atomic64_read(&id->owner) != vm->client_id) continue; - if (pd_addr != id->pd_gpu_addr) + if (*vm_pd_addr != id->pd_gpu_addr) continue; if (!same_ring && @@ -298,14 +297,13 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, fence_put(id->flushed_updates); id->flushed_updates = fence_get(updates); - id->pd_gpu_addr = pd_addr; + id->pd_gpu_addr = *vm_pd_addr; list_move_tail(&id->list, &adev->vm_manager.ids_lru); atomic64_set(&id->owner, vm->client_id); vm->ids[ring->idx] = id; *vm_id = id - adev->vm_manager.ids; - *vm_pd_addr = pd_addr; trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, *vm_pd_addr); error: