diff mbox

[2/2] drm/radeon: implement dynamic PTs allocation via SA

Message ID 1348584252-22778-2-git-send-email-Dmitrii.Cherkasov@amd.com (mailing list archive)
State New, archived
Headers show

Commit Message

Dmitry Cherkasov Sept. 25, 2012, 2:44 p.m. UTC
make dynamic allocation of page tables on demand in radeon_vm_update_pte

Signed-off-by: Dmitry Cherkasov <Dmitrii.Cherkasov@amd.com>
---
 drivers/gpu/drm/radeon/radeon.h      |   12 ++++
 drivers/gpu/drm/radeon/radeon_gart.c |  106 ++++++++++++++++++++++++++++++----
 2 files changed, 107 insertions(+), 11 deletions(-)

Comments

Michel Dänzer Sept. 27, 2012, 12:03 p.m. UTC | #1
On Die, 2012-09-25 at 18:44 +0400, Dmitry Cherkasov wrote: 
> make dynamic allocation of page tables on demand in radeon_vm_update_pte
> 
> Signed-off-by: Dmitry Cherkasov <Dmitrii.Cherkasov@amd.com>

This change caused many piglit tests to fail for me on SI with

        radeon: The kernel rejected CS, see dmesg for more information.

There was no corresponding dmesg output though.


Afterwards, stopping X resulted in the kernel fault below. Trying to
start X again hung after that.

Sep 27 13:07:28 ruby kernel: [ 1286.402018] general protection fault: 0000 [#1] SMP 
Sep 27 13:07:28 ruby kernel: [ 1286.402155] Modules linked in: ppdev lp bnep rfcomm bluetooth rfkill cpufreq_stats cpufreq_userspace cpufreq_powersave cpufreq_conservative binfmt_misc nfsd auth_rpcgss nfs_acl nfs lockd fscache sunrpc ext2 fuse k8temp loop firewire_sbp2 snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_intel snd_hda_codec snd_hwdep snd_pcm_oss snd_mixer_oss snd_pcm snd_page_alloc snd_seq_midi snd_seq_midi_event snd_rawmidi snd_seq snd_timer serio_raw pcspkr snd_seq_device powernow_k8 mperf kvm_amd kvm processor parport_pc parport thermal_sys k10temp snd evdev i2c_piix4 soundcore ext4 crc16 jbd2 mbcache dm_mod hid_generic usbhid hid sg sr_mod cdrom sd_mod crc_t10dif microcode firewire_ohci ahci libahci firewire_core ohci_hcd crc_itu_t libata ehci_hcd xhci_hcd scsi_mod radeon(O) r8169 mii usbcore i2c_algo_bit usb_common ttm drm_kms_helper drm i2c_core button
Sep 27 13:07:28 ruby kernel: [ 1286.404249] CPU 0 
Sep 27 13:07:28 ruby kernel: [ 1286.404300] Pid: 21528, comm: Xorg Tainted: G          IO 3.6.0-rc5+ #79 Gigabyte Technology Co., Ltd. GA-A75M-UD2H/GA-A75M-UD2H
Sep 27 13:07:28 ruby kernel: [ 1286.404533] RIP: 0010:[<ffffffff811135cc>]  [<ffffffff811135cc>] kfree+0x5a/0xb1
Sep 27 13:07:28 ruby kernel: [ 1286.404700] RSP: 0018:ffff880201f39b48  EFLAGS: 00010046
Sep 27 13:07:28 ruby kernel: [ 1286.404809] RAX: ffffea00077858c0 RBX: 0000000000000286 RCX: ffff880201f39b28
Sep 27 13:07:28 ruby kernel: [ 1286.404950] RDX: 0200000000000000 RSI: ffff880222628000 RDI: ffff880222628000
Sep 27 13:07:28 ruby kernel: [ 1286.405091] RBP: ffff880201f39b68 R08: 0000000000000002 R09: 0000000000000000
Sep 27 13:07:28 ruby kernel: [ 1286.405232] R10: dead000000200200 R11: ffff8802224d5448 R12: ffff880222628000
Sep 27 13:07:28 ruby kernel: [ 1286.405373] R13: dead000000100100 R14: 0000000000000800 R15: ffff880220b961e0
Sep 27 13:07:28 ruby kernel: [ 1286.405516] FS:  00007f8f59873700(0000) GS:ffff880226600000(0000) knlGS:0000000000000000
Sep 27 13:07:28 ruby kernel: [ 1286.405676] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
Sep 27 13:07:28 ruby kernel: [ 1286.405791] CR2: ffffffffff600400 CR3: 000000000160b000 CR4: 00000000000007f0
Sep 27 13:07:28 ruby kernel: [ 1286.405933] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
Sep 27 13:07:28 ruby kernel: [ 1286.406074] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Sep 27 13:07:28 ruby kernel: [ 1286.406217] Process Xorg (pid: 21528, threadinfo ffff880201f38000, task ffff880220bf4940)
Sep 27 13:07:28 ruby kernel: [ 1286.406376] Stack:
Sep 27 13:07:28 ruby kernel: [ 1286.406422]  0000000000000000 ffff88022281bd80 ffff880224870000 0000000000000800
Sep 27 13:07:28 ruby kernel: [ 1286.406599]  ffff880201f39b98 ffffffffa01532e9 ffff880224871ff0 ffff88022281bd80
Sep 27 13:07:28 ruby kernel: [ 1286.406774]  ffff880224870000 ffff88022281bdb8 ffff880201f39bd8 ffffffffa015474b
Sep 27 13:07:28 ruby kernel: [ 1286.406949] Call Trace:
Sep 27 13:07:28 ruby kernel: [ 1286.407039]  [<ffffffffa01532e9>] radeon_bo_set_tiling_flags+0x149/0x1a4 [radeon]
Sep 27 13:07:28 ruby kernel: [ 1286.407218]  [<ffffffffa015474b>] radeon_vm_fini+0x40/0xce5 [radeon]
Sep 27 13:07:28 ruby kernel: [ 1286.407369]  [<ffffffffa0141409>] radeon_driver_postclose_kms+0x2e/0x47 [radeon]
Sep 27 13:07:28 ruby kernel: [ 1286.407534]  [<ffffffffa001770b>] drm_release+0x4a9/0x552 [drm]
Sep 27 13:07:28 ruby kernel: [ 1286.407660]  [<ffffffff8111ea53>] __fput+0x10f/0x1e5
Sep 27 13:07:28 ruby kernel: [ 1286.407768]  [<ffffffff8111eb32>] ____fput+0x9/0xb
Sep 27 13:07:28 ruby kernel: [ 1286.407872]  [<ffffffff8105a39d>] task_work_run+0x58/0x83
Sep 27 13:07:28 ruby kernel: [ 1286.407987]  [<ffffffff81044c65>] do_exit+0x284/0x7d6
Sep 27 13:07:28 ruby kernel: [ 1286.408096]  [<ffffffff810891b9>] ? lock_release+0x17d/0x1cd
Sep 27 13:07:28 ruby kernel: [ 1286.408214]  [<ffffffff8104543b>] do_group_exit+0x76/0xb5
Sep 27 13:07:28 ruby kernel: [ 1286.408330]  [<ffffffff810518fd>] get_signal_to_deliver+0x502/0x547
Sep 27 13:07:28 ruby kernel: [ 1286.408461]  [<ffffffff8100f11b>] do_signal+0x2f/0x4ac
Sep 27 13:07:28 ruby kernel: [ 1286.408572]  [<ffffffff8108979a>] ? trace_hardirqs_on+0xd/0xf
Sep 27 13:07:28 ruby kernel: [ 1286.408692]  [<ffffffff8108db72>] ? sys_futex+0x11f/0x151
Sep 27 13:07:28 ruby kernel: [ 1286.408806]  [<ffffffff8100f5ed>] do_notify_resume+0x41/0x7e
Sep 27 13:07:28 ruby kernel: [ 1286.408926]  [<ffffffff813a3e32>] int_signal+0x12/0x17
Sep 27 13:07:28 ruby kernel: [ 1286.409030] Code: 3b 00 eb 6d e8 59 ed ff ff 49 83 fc 10 76 6a e8 e6 ec ff ff 48 89 c3 e8 65 24 f7 ff 4c 89 e7 e8 97 f1 ff ff 4c 8b 68 20 4c 89 e7 <49> 63 75 6c e8 c7 61 f7 ff 4c 89 e6 4c 89 ef e8 7f f5 ff ff f6 
Sep 27 13:07:28 ruby kernel: [ 1286.409871] RIP  [<ffffffff811135cc>] kfree+0x5a/0xb1
Sep 27 13:07:28 ruby kernel: [ 1286.409985]  RSP <ffff880201f39b48>
Sep 27 13:07:28 ruby kernel: [ 1286.410062] ---[ end trace 85ca17c4ace45276 ]---
Sep 27 13:07:28 ruby kernel: [ 1286.410157] Fixing recursive fault but reboot is needed!
Dmitry Cherkasov Sept. 27, 2012, 12:18 p.m. UTC | #2
Hi Michel.

2012/9/27 Michel Dänzer <michel@daenzer.net>:
> On Die, 2012-09-25 at 18:44 +0400, Dmitry Cherkasov wrote:
>> make dynamic allocation of page tables on demand in radeon_vm_update_pte
>>
>> Signed-off-by: Dmitry Cherkasov <Dmitrii.Cherkasov@amd.com>
>
> This change caused many piglit tests to fail for me on SI with
>
>         radeon: The kernel rejected CS, see dmesg for more information.
>
> There was no corresponding dmesg output though.
>

Thanks for testing.

So there was no information about GPU hang etc?
Have all failed piglit tests been working without dynamic pte patch?
Michel Dänzer Sept. 27, 2012, 12:30 p.m. UTC | #3
On Don, 2012-09-27 at 16:18 +0400, Dmitry Cherkassov wrote: 
> Hi Michel.
> 
> 2012/9/27 Michel Dänzer <michel@daenzer.net>:
> > On Die, 2012-09-25 at 18:44 +0400, Dmitry Cherkasov wrote:
> >> make dynamic allocation of page tables on demand in radeon_vm_update_pte
> >>
> >> Signed-off-by: Dmitry Cherkasov <Dmitrii.Cherkasov@amd.com>
> >
> > This change caused many piglit tests to fail for me on SI with
> >
> >         radeon: The kernel rejected CS, see dmesg for more information.
> >
> > There was no corresponding dmesg output though.
> >
> 
> Thanks for testing.
> 
> So there was no information about GPU hang etc?

No.

> Have all failed piglit tests been working without dynamic pte patch?

Not sure about all of them, but thousands of them did. Of course, it's
possible that it was just something breaking at some point and all CS
ioctl calls failing after that.
diff mbox

Patch

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index d8a61ed..f1dcdbe 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -659,6 +659,15 @@  struct radeon_ring {
 /* number of entries in page table */
 #define RADEON_VM_PTE_COUNT (1 << RADEON_VM_BLOCK_SIZE)
 
+struct radeon_pt {
+	/* BO containing the page table */
+	/* radeon_sa_bo_gpu_addr(sa_bo); */
+	struct radeon_sa_bo *bo;
+
+	/* GPU address of page table */
+	u64 gpu_addr;
+};
+
 struct radeon_vm {
 	struct list_head		list;
 	struct list_head		va;
@@ -671,6 +680,9 @@  struct radeon_vm {
 	struct radeon_fence		*fence;
 	/* last flush or NULL if we still need to flush */
 	struct radeon_fence		*last_flush;
+
+	/* page tables list */
+	struct radeon_pt *vm_pts;
 };
 
 struct radeon_vm_manager {
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
index 31d6bfa..ada8471 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -500,6 +500,19 @@  static void radeon_vm_free_pt(struct radeon_device *rdev,
 				    struct radeon_vm *vm)
 {
 	struct radeon_bo_va *bo_va;
+	int i;
+
+	int driver_table_entries = (rdev->vm_manager.max_pfn >>
+				    RADEON_VM_BLOCK_SIZE);
+
+	if (vm->id != 0 && vm->vm_pts) {
+		for (i = 0;i < driver_table_entries; i++) {
+			if (vm->vm_pts->bo)
+				radeon_sa_bo_free(rdev, &vm->vm_pts->bo, vm->fence);
+		}
+
+		kfree (vm->vm_pts);
+	}
 
 	if (!vm->sa_bo)
 		return;
@@ -563,6 +576,9 @@  int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm)
 	int r;
 	u64 *pd_addr;
 	int tables_size;
+	int driver_table_size = (rdev->vm_manager.max_pfn >>
+				 RADEON_VM_BLOCK_SIZE) *
+		sizeof(struct radeon_pt);
 
 	if (vm == NULL) {
 		return -EINVAL;
@@ -570,7 +586,6 @@  int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm)
 
 	/* allocate enough to cover the current VM size */
 	tables_size = RADEON_GPU_PAGE_ALIGN(radeon_vm_directory_size(rdev));
-	tables_size += RADEON_GPU_PAGE_ALIGN(vm->last_pfn * 8);
 
 	if (vm->sa_bo != NULL) {
 		/* update lru */
@@ -600,6 +615,16 @@  retry:
 	vm->pd_gpu_addr = radeon_sa_bo_gpu_addr(vm->sa_bo);
 	memset(pd_addr, 0, tables_size);
 
+	vm->vm_pts = kmalloc(driver_table_size, GFP_KERNEL);
+	
+	if (vm->vm_pts == NULL) {
+		DRM_ERROR("Cannot allocate space for driver PDE table: %d kb \n",
+			  driver_table_size / 1024);
+		return -ENOMEM;
+	}
+
+	memset(vm->vm_pts, 0, tables_size);
+
 	list_add_tail(&vm->list, &rdev->vm_manager.lru_vm);
 	return radeon_vm_bo_update_pte(rdev, vm, rdev->ring_tmp_bo.bo,
 				       &rdev->ring_tmp_bo.bo->tbo.mem);
@@ -864,6 +889,8 @@  uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr)
 	return result;
 }
 
+
+
 /**
  * radeon_vm_bo_update_pte - map a bo into the vm page table
  *
@@ -886,10 +913,15 @@  int radeon_vm_bo_update_pte(struct radeon_device *rdev,
 	struct radeon_ring *ring = &rdev->ring[ridx];
 	struct radeon_semaphore *sem = NULL;
 	struct radeon_bo_va *bo_va;
-	unsigned nptes, npdes, ndw;
+	struct radeon_pt *pt;
+	unsigned nptes, npdes, ndw, count;
 	uint64_t pe, addr;
 	uint64_t pfn;
+	uint32_t first_pde, pfns_to_pt_edge, pfns_to_end, pde_count;
+	uint64_t *addr_list;
 	int r;
+	uint64_t mem_pfn_offset;
+	uint64_t pfn_idx, last_pfn, pde_num, pte_num;
 
 	/* nothing to do if vm isn't bound */
 	if (vm->sa_bo == NULL)
@@ -947,6 +979,8 @@  int radeon_vm_bo_update_pte(struct radeon_device *rdev,
 
 	pfn = (bo_va->soffset / RADEON_GPU_PAGE_SIZE);
 
+	first_pde = pfn / RADEON_VM_PTE_COUNT;
+
 	/* handle cases where a bo spans several pdes  */
 	npdes = (ALIGN(pfn + nptes, RADEON_VM_PTE_COUNT) -
 		 (pfn & ~(RADEON_VM_PTE_COUNT - 1))) >> RADEON_VM_BLOCK_SIZE;
@@ -971,22 +1005,72 @@  int radeon_vm_bo_update_pte(struct radeon_device *rdev,
 		radeon_fence_note_sync(vm->fence, ridx);
 	}
 
-	/* update page table entries */
-	pe = vm->pd_gpu_addr;
-	pe += radeon_vm_directory_size(rdev);
-	pe += (bo_va->soffset / RADEON_GPU_PAGE_SIZE) * 8;
+	addr_list = kmalloc(npdes * sizeof(uint64_t), GFP_KERNEL);
 
-	radeon_asic_vm_set_page(rdev, pe, addr, nptes,
-				RADEON_GPU_PAGE_SIZE, NULL, bo_va->flags);
+	if (!addr_list) {
+		DRM_ERROR("cannot alloc memory for addr list\n");
+		return -ENOMEM;
+	}
+
+	pfn_idx = pfn;
+	last_pfn = pfn_idx + nptes;
+
+	/*
+	  On each iteration map count ptes.
+	  count is the least of these two numbers:
+	  # of ptes to PDE span boundary (RADEON_VM_PTE_COUNT multiple) or
+	  # of ptes left to map
+	 */
+
+	for (mem_pfn_offset = 0, pde_count = 0; mem_pfn_offset < nptes;) {
+		pfns_to_end = last_pfn - pfn_idx;
+		pfns_to_pt_edge = RADEON_VM_PTE_COUNT -
+			(pfn_idx % RADEON_VM_PTE_COUNT);
+
+		count = pfns_to_pt_edge < pfns_to_end ?
+			pfns_to_pt_edge : pfns_to_end;
+
+		pde_num = pfn_idx / RADEON_VM_PTE_COUNT;
+		pte_num = pfn_idx % RADEON_VM_PTE_COUNT;
+
+		pt = &vm->vm_pts[pde_num];
+		if (pt->bo == NULL) {
+			r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager,
+					     &pt->bo,
+					     RADEON_VM_PTE_COUNT * 8,
+					     RADEON_GPU_PAGE_SIZE, false);
+			pt->gpu_addr = radeon_sa_bo_gpu_addr(pt->bo);
+
+			if (r == -ENOMEM) {
+				DRM_ERROR("cannot allocate driver page table"
+					  "for vmid = %d", vm->id);
+				return r;
+			}
+		}
+		addr_list[pde_count] = pt->gpu_addr;
+
+		pe = pt->gpu_addr;
+		pe += pte_num * 8;
+
+		radeon_asic_vm_set_page(rdev, pe,
+					addr + mem_pfn_offset * RADEON_GPU_PAGE_SIZE,
+					count,	RADEON_GPU_PAGE_SIZE, NULL, bo_va->flags);
+
+		pfn_idx += count;
+		mem_pfn_offset += count;
+		pde_count++;
+	}
 
 	/* update page directory entries */
-	addr = pe;
+	addr = (uint64_t) &vm->vm_pts[first_pde];
 
 	pe = vm->pd_gpu_addr;
 	pe += ((bo_va->soffset / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE) * 8;
 
-	radeon_asic_vm_set_page(rdev, pe, addr, npdes,
-				RADEON_VM_PTE_COUNT * 8, NULL, RADEON_VM_PAGE_VALID);
+	radeon_asic_vm_set_page(rdev, pe, 0, npdes,
+				0, addr_list, RADEON_VM_PAGE_VALID);
+
+	kfree(addr_list);
 
 	radeon_fence_unref(&vm->fence);
 	r = radeon_fence_emit(rdev, &vm->fence, ridx);