Message ID | 1349711740-24823-1-git-send-email-deathsimple@vodafone.de (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Mon, Oct 8, 2012 at 11:55 AM, Christian König <deathsimple@vodafone.de> wrote: > Based on Dmitries work, but splitting the code into page > directory and page table handling makes it far more > readable and (hopefully) more reliable. > > Allocations of page tables are made from the SA on demand, > that should still work fine since all page tables are of > the same size. > > Also using the fact that allocations from the SA are mostly > continuously (except for end of buffer wraps and under very > high memory pressure) to group updates send to the chipset > specific code into larger chunks. > > v3: mostly a rewrite of Dmitries previous patch. > v4: fix some typos and coding style > > Signed-off-by: Dmitry Cherkasov <Dmitrii.Cherkasov@amd.com> > Signed-off-by: Christian König <deathsimple@vodafone.de> > Tested-by: Michel Dänzer <michel.daenzer@amd.com> For the series: Reviewed-by: Alex Deucher <alexander.deucher@amd.com> > --- > drivers/gpu/drm/radeon/ni.c | 2 +- > drivers/gpu/drm/radeon/radeon.h | 11 +- > drivers/gpu/drm/radeon/radeon_gart.c | 322 ++++++++++++++++++++++++++-------- > 3 files changed, 262 insertions(+), 73 deletions(-) > > diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c > index 9a46f7d..48e2337 100644 > --- a/drivers/gpu/drm/radeon/ni.c > +++ b/drivers/gpu/drm/radeon/ni.c > @@ -1576,7 +1576,7 @@ void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) > radeon_ring_write(ring, 0); > > radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_END_ADDR + (vm->id << 2), 0)); > - radeon_ring_write(ring, vm->last_pfn); > + radeon_ring_write(ring, rdev->vm_manager.max_pfn); > > radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2), 0)); > radeon_ring_write(ring, vm->pd_gpu_addr >> 12); > diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h > index b04c064..bc6b56b 100644 > --- a/drivers/gpu/drm/radeon/radeon.h > +++ b/drivers/gpu/drm/radeon/radeon.h > @@ -663,9 +663,14 @@ struct radeon_vm { > struct list_head list; > struct list_head va; > unsigned id; > - unsigned last_pfn; > - u64 pd_gpu_addr; > - struct radeon_sa_bo *sa_bo; > + > + /* contains the page directory */ > + struct radeon_sa_bo *page_directory; > + uint64_t pd_gpu_addr; > + > + /* array of page tables, one for each page directory entry */ > + struct radeon_sa_bo **page_tables; > + > struct mutex mutex; > /* last fence for cs using this vm */ > struct radeon_fence *fence; > diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c > index 753b7ca..b36b615 100644 > --- a/drivers/gpu/drm/radeon/radeon_gart.c > +++ b/drivers/gpu/drm/radeon/radeon_gart.c > @@ -423,6 +423,18 @@ void radeon_gart_fini(struct radeon_device *rdev) > */ > > /** > + * radeon_vm_num_pde - return the number of page directory entries > + * > + * @rdev: radeon_device pointer > + * > + * Calculate the number of page directory entries (cayman+). > + */ > +static unsigned radeon_vm_num_pdes(struct radeon_device *rdev) > +{ > + return rdev->vm_manager.max_pfn >> RADEON_VM_BLOCK_SIZE; > +} > + > +/** > * radeon_vm_directory_size - returns the size of the page directory in bytes > * > * @rdev: radeon_device pointer > @@ -431,7 +443,7 @@ void radeon_gart_fini(struct radeon_device *rdev) > */ > static unsigned radeon_vm_directory_size(struct radeon_device *rdev) > { > - return (rdev->vm_manager.max_pfn >> RADEON_VM_BLOCK_SIZE) * 8; > + return RADEON_GPU_PAGE_ALIGN(radeon_vm_num_pdes(rdev) * 8); > } > > /** > @@ -451,11 +463,11 @@ int radeon_vm_manager_init(struct radeon_device *rdev) > > if (!rdev->vm_manager.enabled) { > /* allocate enough for 2 full VM pts */ > - size = RADEON_GPU_PAGE_ALIGN(radeon_vm_directory_size(rdev)); > - size += RADEON_GPU_PAGE_ALIGN(rdev->vm_manager.max_pfn * 8); > + size = radeon_vm_directory_size(rdev); > + size += rdev->vm_manager.max_pfn * 8; > size *= 2; > r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager, > - size, > + RADEON_GPU_PAGE_ALIGN(size), > RADEON_GEM_DOMAIN_VRAM); > if (r) { > dev_err(rdev->dev, "failed to allocate vm bo (%dKB)\n", > @@ -476,7 +488,7 @@ int radeon_vm_manager_init(struct radeon_device *rdev) > > /* restore page table */ > list_for_each_entry(vm, &rdev->vm_manager.lru_vm, list) { > - if (vm->sa_bo == NULL) > + if (vm->page_directory == NULL) > continue; > > list_for_each_entry(bo_va, &vm->va, vm_list) { > @@ -500,16 +512,25 @@ static void radeon_vm_free_pt(struct radeon_device *rdev, > struct radeon_vm *vm) > { > struct radeon_bo_va *bo_va; > + int i; > > - if (!vm->sa_bo) > + if (!vm->page_directory) > return; > > list_del_init(&vm->list); > - radeon_sa_bo_free(rdev, &vm->sa_bo, vm->fence); > + radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); > > list_for_each_entry(bo_va, &vm->va, vm_list) { > bo_va->valid = false; > } > + > + if (vm->page_tables == NULL) > + return; > + > + for (i = 0; i < radeon_vm_num_pdes(rdev); i++) > + radeon_sa_bo_free(rdev, &vm->page_tables[i], vm->fence); > + > + kfree(vm->page_tables); > } > > /** > @@ -546,6 +567,35 @@ void radeon_vm_manager_fini(struct radeon_device *rdev) > } > > /** > + * radeon_vm_evict - evict page table to make room for new one > + * > + * @rdev: radeon_device pointer > + * @vm: VM we want to allocate something for > + * > + * Evict a VM from the lru, making sure that it isn't @vm. (cayman+). > + * Returns 0 for success, -ENOMEM for failure. > + * > + * Global and local mutex must be locked! > + */ > +int radeon_vm_evict(struct radeon_device *rdev, struct radeon_vm *vm) > +{ > + struct radeon_vm *vm_evict; > + > + if (list_empty(&rdev->vm_manager.lru_vm)) > + return -ENOMEM; > + > + vm_evict = list_first_entry(&rdev->vm_manager.lru_vm, > + struct radeon_vm, list); > + if (vm_evict == vm) > + return -ENOMEM; > + > + mutex_lock(&vm_evict->mutex); > + radeon_vm_free_pt(rdev, vm_evict); > + mutex_unlock(&vm_evict->mutex); > + return 0; > +} > + > +/** > * radeon_vm_alloc_pt - allocates a page table for a VM > * > * @rdev: radeon_device pointer > @@ -559,20 +609,15 @@ void radeon_vm_manager_fini(struct radeon_device *rdev) > */ > int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm) > { > - struct radeon_vm *vm_evict; > - int r; > + unsigned pd_size, pts_size; > u64 *pd_addr; > - int tables_size; > + int r; > > if (vm == NULL) { > return -EINVAL; > } > > - /* allocate enough to cover the current VM size */ > - tables_size = RADEON_GPU_PAGE_ALIGN(radeon_vm_directory_size(rdev)); > - tables_size += RADEON_GPU_PAGE_ALIGN(vm->last_pfn * 8); > - > - if (vm->sa_bo != NULL) { > + if (vm->page_directory != NULL) { > /* update lru */ > list_del_init(&vm->list); > list_add_tail(&vm->list, &rdev->vm_manager.lru_vm); > @@ -580,25 +625,34 @@ int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm) > } > > retry: > - r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, &vm->sa_bo, > - tables_size, RADEON_GPU_PAGE_SIZE, false); > + pd_size = RADEON_GPU_PAGE_ALIGN(radeon_vm_directory_size(rdev)); > + r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, > + &vm->page_directory, pd_size, > + RADEON_GPU_PAGE_SIZE, false); > if (r == -ENOMEM) { > - if (list_empty(&rdev->vm_manager.lru_vm)) { > + r = radeon_vm_evict(rdev, vm); > + if (r) > return r; > - } > - vm_evict = list_first_entry(&rdev->vm_manager.lru_vm, struct radeon_vm, list); > - mutex_lock(&vm_evict->mutex); > - radeon_vm_free_pt(rdev, vm_evict); > - mutex_unlock(&vm_evict->mutex); > goto retry; > > } else if (r) { > return r; > } > > - pd_addr = radeon_sa_bo_cpu_addr(vm->sa_bo); > - vm->pd_gpu_addr = radeon_sa_bo_gpu_addr(vm->sa_bo); > - memset(pd_addr, 0, tables_size); > + vm->pd_gpu_addr = radeon_sa_bo_gpu_addr(vm->page_directory); > + > + /* Initially clear the page directory */ > + pd_addr = radeon_sa_bo_cpu_addr(vm->page_directory); > + memset(pd_addr, 0, pd_size); > + > + pts_size = radeon_vm_num_pdes(rdev) * sizeof(struct radeon_sa_bo *); > + vm->page_tables = kzalloc(pts_size, GFP_KERNEL); > + > + if (vm->page_tables == NULL) { > + DRM_ERROR("Cannot allocate memory for page table array\n"); > + radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); > + return -ENOMEM; > + } > > list_add_tail(&vm->list, &rdev->vm_manager.lru_vm); > return radeon_vm_bo_update_pte(rdev, vm, rdev->ring_tmp_bo.bo, > @@ -793,20 +847,6 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, > } > > mutex_lock(&vm->mutex); > - if (last_pfn > vm->last_pfn) { > - /* release mutex and lock in right order */ > - mutex_unlock(&vm->mutex); > - mutex_lock(&rdev->vm_manager.lock); > - mutex_lock(&vm->mutex); > - /* and check again */ > - if (last_pfn > vm->last_pfn) { > - /* grow va space 32M by 32M */ > - unsigned align = ((32 << 20) >> 12) - 1; > - radeon_vm_free_pt(rdev, vm); > - vm->last_pfn = (last_pfn + align) & ~align; > - } > - mutex_unlock(&rdev->vm_manager.lock); > - } > head = &vm->va; > last_offset = 0; > list_for_each_entry(tmp, &vm->va, vm_list) { > @@ -865,6 +905,155 @@ uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr) > } > > /** > + * radeon_vm_update_pdes - make sure that page directory is valid > + * > + * @rdev: radeon_device pointer > + * @vm: requested vm > + * @start: start of GPU address range > + * @end: end of GPU address range > + * > + * Allocates new page tables if necessary > + * and updates the page directory (cayman+). > + * Returns 0 for success, error for failure. > + * > + * Global and local mutex must be locked! > + */ > +static int radeon_vm_update_pdes(struct radeon_device *rdev, > + struct radeon_vm *vm, > + uint64_t start, uint64_t end) > +{ > + static const uint32_t incr = RADEON_VM_PTE_COUNT * 8; > + > + uint64_t last_pde = ~0, last_pt = ~0; > + unsigned count = 0; > + uint64_t pt_idx; > + int r; > + > + start = (start / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE; > + end = (end / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE; > + > + /* walk over the address space and update the page directory */ > + for (pt_idx = start; pt_idx <= end; ++pt_idx) { > + uint64_t pde, pt; > + > + if (vm->page_tables[pt_idx]) > + continue; > + > +retry: > + r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, > + &vm->page_tables[pt_idx], > + RADEON_VM_PTE_COUNT * 8, > + RADEON_GPU_PAGE_SIZE, false); > + > + if (r == -ENOMEM) { > + r = radeon_vm_evict(rdev, vm); > + if (r) > + return r; > + goto retry; > + } else if (r) { > + return r; > + } > + > + pde = vm->pd_gpu_addr + pt_idx * 8; > + > + pt = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]); > + > + if (((last_pde + 8 * count) != pde) || > + ((last_pt + incr * count) != pt)) { > + > + if (count) { > + radeon_asic_vm_set_page(rdev, last_pde, > + last_pt, count, incr, > + RADEON_VM_PAGE_VALID); > + } > + > + count = 1; > + last_pde = pde; > + last_pt = pt; > + } else { > + ++count; > + } > + } > + > + if (count) { > + radeon_asic_vm_set_page(rdev, last_pde, last_pt, count, > + incr, RADEON_VM_PAGE_VALID); > + > + } > + > + return 0; > +} > + > +/** > + * radeon_vm_update_ptes - make sure that page tables are valid > + * > + * @rdev: radeon_device pointer > + * @vm: requested vm > + * @start: start of GPU address range > + * @end: end of GPU address range > + * @dst: destination address to map to > + * @flags: mapping flags > + * > + * Update the page tables in the range @start - @end (cayman+). > + * > + * Global and local mutex must be locked! > + */ > +static void radeon_vm_update_ptes(struct radeon_device *rdev, > + struct radeon_vm *vm, > + uint64_t start, uint64_t end, > + uint64_t dst, uint32_t flags) > +{ > + static const uint64_t mask = RADEON_VM_PTE_COUNT - 1; > + > + uint64_t last_pte = ~0, last_dst = ~0; > + unsigned count = 0; > + uint64_t addr; > + > + start = start / RADEON_GPU_PAGE_SIZE; > + end = end / RADEON_GPU_PAGE_SIZE; > + > + /* walk over the address space and update the page tables */ > + for (addr = start; addr < end; ) { > + uint64_t pt_idx = addr >> RADEON_VM_BLOCK_SIZE; > + unsigned nptes; > + uint64_t pte; > + > + if ((addr & ~mask) == (end & ~mask)) > + nptes = end - addr; > + else > + nptes = RADEON_VM_PTE_COUNT - (addr & mask); > + > + pte = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]); > + pte += (addr & mask) * 8; > + > + if (((last_pte + 8 * count) != pte) || > + ((count + nptes) > 1 << 11)) { > + > + if (count) { > + radeon_asic_vm_set_page(rdev, last_pte, > + last_dst, count, > + RADEON_GPU_PAGE_SIZE, > + flags); > + } > + > + count = nptes; > + last_pte = pte; > + last_dst = dst; > + } else { > + count += nptes; > + } > + > + addr += nptes; > + dst += nptes * RADEON_GPU_PAGE_SIZE; > + } > + > + if (count) { > + radeon_asic_vm_set_page(rdev, last_pte, last_dst, count, > + RADEON_GPU_PAGE_SIZE, flags); > + } > +} > + > +/** > * radeon_vm_bo_update_pte - map a bo into the vm page table > * > * @rdev: radeon_device pointer > @@ -887,12 +1076,11 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, > struct radeon_semaphore *sem = NULL; > struct radeon_bo_va *bo_va; > unsigned nptes, npdes, ndw; > - uint64_t pe, addr; > - uint64_t pfn; > + uint64_t addr; > int r; > > /* nothing to do if vm isn't bound */ > - if (vm->sa_bo == NULL) > + if (vm->page_directory == NULL) > return 0; > > bo_va = radeon_vm_bo_find(vm, bo); > @@ -939,25 +1127,29 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, > } > } > > - /* estimate number of dw needed */ > - /* reserve space for 32-bit padding */ > - ndw = 32; > - > nptes = radeon_bo_ngpu_pages(bo); > > - pfn = (bo_va->soffset / RADEON_GPU_PAGE_SIZE); > + /* assume two extra pdes in case the mapping overlaps the borders */ > + npdes = (nptes >> RADEON_VM_BLOCK_SIZE) + 2; > + > + /* estimate number of dw needed */ > + /* semaphore, fence and padding */ > + ndw = 32; > > - /* handle cases where a bo spans several pdes */ > - npdes = (ALIGN(pfn + nptes, RADEON_VM_PTE_COUNT) - > - (pfn & ~(RADEON_VM_PTE_COUNT - 1))) >> RADEON_VM_BLOCK_SIZE; > + if (RADEON_VM_BLOCK_SIZE > 11) > + /* reserve space for one header for every 2k dwords */ > + ndw += (nptes >> 11) * 3; > + else > + /* reserve space for one header for > + every (1 << BLOCK_SIZE) entries */ > + ndw += (nptes >> RADEON_VM_BLOCK_SIZE) * 3; > > - /* reserve space for one header for every 2k dwords */ > - ndw += (nptes >> 11) * 3; > /* reserve space for pte addresses */ > ndw += nptes * 2; > > /* reserve space for one header for every 2k dwords */ > ndw += (npdes >> 11) * 3; > + > /* reserve space for pde addresses */ > ndw += npdes * 2; > > @@ -971,22 +1163,14 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, > radeon_fence_note_sync(vm->fence, ridx); > } > > - /* update page table entries */ > - pe = vm->pd_gpu_addr; > - pe += radeon_vm_directory_size(rdev); > - pe += (bo_va->soffset / RADEON_GPU_PAGE_SIZE) * 8; > - > - radeon_asic_vm_set_page(rdev, pe, addr, nptes, > - RADEON_GPU_PAGE_SIZE, bo_va->flags); > - > - /* update page directory entries */ > - addr = pe; > - > - pe = vm->pd_gpu_addr; > - pe += ((bo_va->soffset / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE) * 8; > + r = radeon_vm_update_pdes(rdev, vm, bo_va->soffset, bo_va->eoffset); > + if (r) { > + radeon_ring_unlock_undo(rdev, ring); > + return r; > + } > > - radeon_asic_vm_set_page(rdev, pe, addr, npdes, > - RADEON_VM_PTE_COUNT * 8, RADEON_VM_PAGE_VALID); > + radeon_vm_update_ptes(rdev, vm, bo_va->soffset, bo_va->eoffset, > + addr, bo_va->flags); > > radeon_fence_unref(&vm->fence); > r = radeon_fence_emit(rdev, &vm->fence, ridx); > @@ -997,6 +1181,7 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, > radeon_ring_unlock_commit(rdev, ring); > radeon_semaphore_free(rdev, &sem, vm->fence); > radeon_fence_unref(&vm->last_flush); > + > return 0; > } > > @@ -1068,7 +1253,6 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) > > vm->id = 0; > vm->fence = NULL; > - vm->last_pfn = 0; > mutex_init(&vm->mutex); > INIT_LIST_HEAD(&vm->list); > INIT_LIST_HEAD(&vm->va); > -- > 1.7.9.5 >
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 9a46f7d..48e2337 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1576,7 +1576,7 @@ void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) radeon_ring_write(ring, 0); radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_END_ADDR + (vm->id << 2), 0)); - radeon_ring_write(ring, vm->last_pfn); + radeon_ring_write(ring, rdev->vm_manager.max_pfn); radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2), 0)); radeon_ring_write(ring, vm->pd_gpu_addr >> 12); diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index b04c064..bc6b56b 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -663,9 +663,14 @@ struct radeon_vm { struct list_head list; struct list_head va; unsigned id; - unsigned last_pfn; - u64 pd_gpu_addr; - struct radeon_sa_bo *sa_bo; + + /* contains the page directory */ + struct radeon_sa_bo *page_directory; + uint64_t pd_gpu_addr; + + /* array of page tables, one for each page directory entry */ + struct radeon_sa_bo **page_tables; + struct mutex mutex; /* last fence for cs using this vm */ struct radeon_fence *fence; diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index 753b7ca..b36b615 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -423,6 +423,18 @@ void radeon_gart_fini(struct radeon_device *rdev) */ /** + * radeon_vm_num_pde - return the number of page directory entries + * + * @rdev: radeon_device pointer + * + * Calculate the number of page directory entries (cayman+). + */ +static unsigned radeon_vm_num_pdes(struct radeon_device *rdev) +{ + return rdev->vm_manager.max_pfn >> RADEON_VM_BLOCK_SIZE; +} + +/** * radeon_vm_directory_size - returns the size of the page directory in bytes * * @rdev: radeon_device pointer @@ -431,7 +443,7 @@ void radeon_gart_fini(struct radeon_device *rdev) */ static unsigned radeon_vm_directory_size(struct radeon_device *rdev) { - return (rdev->vm_manager.max_pfn >> RADEON_VM_BLOCK_SIZE) * 8; + return RADEON_GPU_PAGE_ALIGN(radeon_vm_num_pdes(rdev) * 8); } /** @@ -451,11 +463,11 @@ int radeon_vm_manager_init(struct radeon_device *rdev) if (!rdev->vm_manager.enabled) { /* allocate enough for 2 full VM pts */ - size = RADEON_GPU_PAGE_ALIGN(radeon_vm_directory_size(rdev)); - size += RADEON_GPU_PAGE_ALIGN(rdev->vm_manager.max_pfn * 8); + size = radeon_vm_directory_size(rdev); + size += rdev->vm_manager.max_pfn * 8; size *= 2; r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager, - size, + RADEON_GPU_PAGE_ALIGN(size), RADEON_GEM_DOMAIN_VRAM); if (r) { dev_err(rdev->dev, "failed to allocate vm bo (%dKB)\n", @@ -476,7 +488,7 @@ int radeon_vm_manager_init(struct radeon_device *rdev) /* restore page table */ list_for_each_entry(vm, &rdev->vm_manager.lru_vm, list) { - if (vm->sa_bo == NULL) + if (vm->page_directory == NULL) continue; list_for_each_entry(bo_va, &vm->va, vm_list) { @@ -500,16 +512,25 @@ static void radeon_vm_free_pt(struct radeon_device *rdev, struct radeon_vm *vm) { struct radeon_bo_va *bo_va; + int i; - if (!vm->sa_bo) + if (!vm->page_directory) return; list_del_init(&vm->list); - radeon_sa_bo_free(rdev, &vm->sa_bo, vm->fence); + radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); list_for_each_entry(bo_va, &vm->va, vm_list) { bo_va->valid = false; } + + if (vm->page_tables == NULL) + return; + + for (i = 0; i < radeon_vm_num_pdes(rdev); i++) + radeon_sa_bo_free(rdev, &vm->page_tables[i], vm->fence); + + kfree(vm->page_tables); } /** @@ -546,6 +567,35 @@ void radeon_vm_manager_fini(struct radeon_device *rdev) } /** + * radeon_vm_evict - evict page table to make room for new one + * + * @rdev: radeon_device pointer + * @vm: VM we want to allocate something for + * + * Evict a VM from the lru, making sure that it isn't @vm. (cayman+). + * Returns 0 for success, -ENOMEM for failure. + * + * Global and local mutex must be locked! + */ +int radeon_vm_evict(struct radeon_device *rdev, struct radeon_vm *vm) +{ + struct radeon_vm *vm_evict; + + if (list_empty(&rdev->vm_manager.lru_vm)) + return -ENOMEM; + + vm_evict = list_first_entry(&rdev->vm_manager.lru_vm, + struct radeon_vm, list); + if (vm_evict == vm) + return -ENOMEM; + + mutex_lock(&vm_evict->mutex); + radeon_vm_free_pt(rdev, vm_evict); + mutex_unlock(&vm_evict->mutex); + return 0; +} + +/** * radeon_vm_alloc_pt - allocates a page table for a VM * * @rdev: radeon_device pointer @@ -559,20 +609,15 @@ void radeon_vm_manager_fini(struct radeon_device *rdev) */ int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm) { - struct radeon_vm *vm_evict; - int r; + unsigned pd_size, pts_size; u64 *pd_addr; - int tables_size; + int r; if (vm == NULL) { return -EINVAL; } - /* allocate enough to cover the current VM size */ - tables_size = RADEON_GPU_PAGE_ALIGN(radeon_vm_directory_size(rdev)); - tables_size += RADEON_GPU_PAGE_ALIGN(vm->last_pfn * 8); - - if (vm->sa_bo != NULL) { + if (vm->page_directory != NULL) { /* update lru */ list_del_init(&vm->list); list_add_tail(&vm->list, &rdev->vm_manager.lru_vm); @@ -580,25 +625,34 @@ int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm) } retry: - r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, &vm->sa_bo, - tables_size, RADEON_GPU_PAGE_SIZE, false); + pd_size = RADEON_GPU_PAGE_ALIGN(radeon_vm_directory_size(rdev)); + r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, + &vm->page_directory, pd_size, + RADEON_GPU_PAGE_SIZE, false); if (r == -ENOMEM) { - if (list_empty(&rdev->vm_manager.lru_vm)) { + r = radeon_vm_evict(rdev, vm); + if (r) return r; - } - vm_evict = list_first_entry(&rdev->vm_manager.lru_vm, struct radeon_vm, list); - mutex_lock(&vm_evict->mutex); - radeon_vm_free_pt(rdev, vm_evict); - mutex_unlock(&vm_evict->mutex); goto retry; } else if (r) { return r; } - pd_addr = radeon_sa_bo_cpu_addr(vm->sa_bo); - vm->pd_gpu_addr = radeon_sa_bo_gpu_addr(vm->sa_bo); - memset(pd_addr, 0, tables_size); + vm->pd_gpu_addr = radeon_sa_bo_gpu_addr(vm->page_directory); + + /* Initially clear the page directory */ + pd_addr = radeon_sa_bo_cpu_addr(vm->page_directory); + memset(pd_addr, 0, pd_size); + + pts_size = radeon_vm_num_pdes(rdev) * sizeof(struct radeon_sa_bo *); + vm->page_tables = kzalloc(pts_size, GFP_KERNEL); + + if (vm->page_tables == NULL) { + DRM_ERROR("Cannot allocate memory for page table array\n"); + radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); + return -ENOMEM; + } list_add_tail(&vm->list, &rdev->vm_manager.lru_vm); return radeon_vm_bo_update_pte(rdev, vm, rdev->ring_tmp_bo.bo, @@ -793,20 +847,6 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, } mutex_lock(&vm->mutex); - if (last_pfn > vm->last_pfn) { - /* release mutex and lock in right order */ - mutex_unlock(&vm->mutex); - mutex_lock(&rdev->vm_manager.lock); - mutex_lock(&vm->mutex); - /* and check again */ - if (last_pfn > vm->last_pfn) { - /* grow va space 32M by 32M */ - unsigned align = ((32 << 20) >> 12) - 1; - radeon_vm_free_pt(rdev, vm); - vm->last_pfn = (last_pfn + align) & ~align; - } - mutex_unlock(&rdev->vm_manager.lock); - } head = &vm->va; last_offset = 0; list_for_each_entry(tmp, &vm->va, vm_list) { @@ -865,6 +905,155 @@ uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr) } /** + * radeon_vm_update_pdes - make sure that page directory is valid + * + * @rdev: radeon_device pointer + * @vm: requested vm + * @start: start of GPU address range + * @end: end of GPU address range + * + * Allocates new page tables if necessary + * and updates the page directory (cayman+). + * Returns 0 for success, error for failure. + * + * Global and local mutex must be locked! + */ +static int radeon_vm_update_pdes(struct radeon_device *rdev, + struct radeon_vm *vm, + uint64_t start, uint64_t end) +{ + static const uint32_t incr = RADEON_VM_PTE_COUNT * 8; + + uint64_t last_pde = ~0, last_pt = ~0; + unsigned count = 0; + uint64_t pt_idx; + int r; + + start = (start / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE; + end = (end / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE; + + /* walk over the address space and update the page directory */ + for (pt_idx = start; pt_idx <= end; ++pt_idx) { + uint64_t pde, pt; + + if (vm->page_tables[pt_idx]) + continue; + +retry: + r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, + &vm->page_tables[pt_idx], + RADEON_VM_PTE_COUNT * 8, + RADEON_GPU_PAGE_SIZE, false); + + if (r == -ENOMEM) { + r = radeon_vm_evict(rdev, vm); + if (r) + return r; + goto retry; + } else if (r) { + return r; + } + + pde = vm->pd_gpu_addr + pt_idx * 8; + + pt = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]); + + if (((last_pde + 8 * count) != pde) || + ((last_pt + incr * count) != pt)) { + + if (count) { + radeon_asic_vm_set_page(rdev, last_pde, + last_pt, count, incr, + RADEON_VM_PAGE_VALID); + } + + count = 1; + last_pde = pde; + last_pt = pt; + } else { + ++count; + } + } + + if (count) { + radeon_asic_vm_set_page(rdev, last_pde, last_pt, count, + incr, RADEON_VM_PAGE_VALID); + + } + + return 0; +} + +/** + * radeon_vm_update_ptes - make sure that page tables are valid + * + * @rdev: radeon_device pointer + * @vm: requested vm + * @start: start of GPU address range + * @end: end of GPU address range + * @dst: destination address to map to + * @flags: mapping flags + * + * Update the page tables in the range @start - @end (cayman+). + * + * Global and local mutex must be locked! + */ +static void radeon_vm_update_ptes(struct radeon_device *rdev, + struct radeon_vm *vm, + uint64_t start, uint64_t end, + uint64_t dst, uint32_t flags) +{ + static const uint64_t mask = RADEON_VM_PTE_COUNT - 1; + + uint64_t last_pte = ~0, last_dst = ~0; + unsigned count = 0; + uint64_t addr; + + start = start / RADEON_GPU_PAGE_SIZE; + end = end / RADEON_GPU_PAGE_SIZE; + + /* walk over the address space and update the page tables */ + for (addr = start; addr < end; ) { + uint64_t pt_idx = addr >> RADEON_VM_BLOCK_SIZE; + unsigned nptes; + uint64_t pte; + + if ((addr & ~mask) == (end & ~mask)) + nptes = end - addr; + else + nptes = RADEON_VM_PTE_COUNT - (addr & mask); + + pte = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]); + pte += (addr & mask) * 8; + + if (((last_pte + 8 * count) != pte) || + ((count + nptes) > 1 << 11)) { + + if (count) { + radeon_asic_vm_set_page(rdev, last_pte, + last_dst, count, + RADEON_GPU_PAGE_SIZE, + flags); + } + + count = nptes; + last_pte = pte; + last_dst = dst; + } else { + count += nptes; + } + + addr += nptes; + dst += nptes * RADEON_GPU_PAGE_SIZE; + } + + if (count) { + radeon_asic_vm_set_page(rdev, last_pte, last_dst, count, + RADEON_GPU_PAGE_SIZE, flags); + } +} + +/** * radeon_vm_bo_update_pte - map a bo into the vm page table * * @rdev: radeon_device pointer @@ -887,12 +1076,11 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, struct radeon_semaphore *sem = NULL; struct radeon_bo_va *bo_va; unsigned nptes, npdes, ndw; - uint64_t pe, addr; - uint64_t pfn; + uint64_t addr; int r; /* nothing to do if vm isn't bound */ - if (vm->sa_bo == NULL) + if (vm->page_directory == NULL) return 0; bo_va = radeon_vm_bo_find(vm, bo); @@ -939,25 +1127,29 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, } } - /* estimate number of dw needed */ - /* reserve space for 32-bit padding */ - ndw = 32; - nptes = radeon_bo_ngpu_pages(bo); - pfn = (bo_va->soffset / RADEON_GPU_PAGE_SIZE); + /* assume two extra pdes in case the mapping overlaps the borders */ + npdes = (nptes >> RADEON_VM_BLOCK_SIZE) + 2; + + /* estimate number of dw needed */ + /* semaphore, fence and padding */ + ndw = 32; - /* handle cases where a bo spans several pdes */ - npdes = (ALIGN(pfn + nptes, RADEON_VM_PTE_COUNT) - - (pfn & ~(RADEON_VM_PTE_COUNT - 1))) >> RADEON_VM_BLOCK_SIZE; + if (RADEON_VM_BLOCK_SIZE > 11) + /* reserve space for one header for every 2k dwords */ + ndw += (nptes >> 11) * 3; + else + /* reserve space for one header for + every (1 << BLOCK_SIZE) entries */ + ndw += (nptes >> RADEON_VM_BLOCK_SIZE) * 3; - /* reserve space for one header for every 2k dwords */ - ndw += (nptes >> 11) * 3; /* reserve space for pte addresses */ ndw += nptes * 2; /* reserve space for one header for every 2k dwords */ ndw += (npdes >> 11) * 3; + /* reserve space for pde addresses */ ndw += npdes * 2; @@ -971,22 +1163,14 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, radeon_fence_note_sync(vm->fence, ridx); } - /* update page table entries */ - pe = vm->pd_gpu_addr; - pe += radeon_vm_directory_size(rdev); - pe += (bo_va->soffset / RADEON_GPU_PAGE_SIZE) * 8; - - radeon_asic_vm_set_page(rdev, pe, addr, nptes, - RADEON_GPU_PAGE_SIZE, bo_va->flags); - - /* update page directory entries */ - addr = pe; - - pe = vm->pd_gpu_addr; - pe += ((bo_va->soffset / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE) * 8; + r = radeon_vm_update_pdes(rdev, vm, bo_va->soffset, bo_va->eoffset); + if (r) { + radeon_ring_unlock_undo(rdev, ring); + return r; + } - radeon_asic_vm_set_page(rdev, pe, addr, npdes, - RADEON_VM_PTE_COUNT * 8, RADEON_VM_PAGE_VALID); + radeon_vm_update_ptes(rdev, vm, bo_va->soffset, bo_va->eoffset, + addr, bo_va->flags); radeon_fence_unref(&vm->fence); r = radeon_fence_emit(rdev, &vm->fence, ridx); @@ -997,6 +1181,7 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, radeon_ring_unlock_commit(rdev, ring); radeon_semaphore_free(rdev, &sem, vm->fence); radeon_fence_unref(&vm->last_flush); + return 0; } @@ -1068,7 +1253,6 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) vm->id = 0; vm->fence = NULL; - vm->last_pfn = 0; mutex_init(&vm->mutex); INIT_LIST_HEAD(&vm->list); INIT_LIST_HEAD(&vm->va);