diff mbox series

[2/3] drm/ttm: stop allocating dummy resources during BO creation

Message ID 20220712114605.52369-2-christian.koenig@amd.com (mailing list archive)
State New, archived
Headers show
Series [1/3] drm/i915: audit bo->resource usage | expand

Commit Message

Christian König July 12, 2022, 11:46 a.m. UTC
That should not be necessary any more when drivers should at least be
able to handle the move without a resource.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c | 7 -------
 1 file changed, 7 deletions(-)

Comments

kernel test robot July 16, 2022, 1:55 p.m. UTC | #1
Greeting,

FYI, we noticed the following commit (built with gcc-11):

commit: 8e7efa91c9813eaf7149d4c0880afc05d6ee3be9 ("[PATCH 2/3] drm/ttm: stop allocating dummy resources during BO creation")
url: https://github.com/intel-lab-lkp/linux/commits/Christian-K-nig/drm-i915-audit-bo-resource-usage/20220712-194741
base: git://anongit.freedesktop.org/drm/drm-misc drm-misc-next
patch link: https://lore.kernel.org/intel-gfx/20220712114605.52369-2-christian.koenig@amd.com

in testcase: boot

on test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -smp 2 -m 16G

caused below changes (please refer to attached dmesg/kmsg for entire log/backtrace):



If you fix the issue, kindly add following tag
Reported-by: kernel test robot <oliver.sang@intel.com>


[    5.978437][  T280] BUG: kernel NULL pointer dereference, address: 0000000000000010
[    5.979888][  T280] #PF: supervisor read access in kernel mode
[    5.980313][  T280] #PF: error_code(0x0000) - not-present page
[    5.980729][  T280] PGD 800000012dff9067 P4D 800000012dff9067 PUD 12df13067 PMD 0
[    5.981243][  T280] Oops: 0000 [#1] SMP PTI
[    5.981532][  T280] CPU: 0 PID: 280 Comm: modprobe Not tainted 5.19.0-rc2-00454-g8e7efa91c981 #1
[    5.982122][  T280] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.0-debian-1.16.0-4 04/01/2014
[ 5.982808][ T280] RIP: 0010:ttm_bo_validate (drivers/gpu/drm/ttm/ttm_bo.c:909) ttm
[ 5.983208][ T280] Code: 72 48 8b 74 24 08 31 d2 4c 8d 44 24 10 48 89 d9 48 89 ef e8 10 eb ff ff 83 f8 b8 74 d0 85 c0 75 52 48 8b 95 58 01 00 00 31 c0 <8b> 52 10 85 d2 75 9f 48 8b 44 24 20 65 48 2b 04 25 28 00 00 00 75
All code
========
   0:	72 48                	jb     0x4a
   2:	8b 74 24 08          	mov    0x8(%rsp),%esi
   6:	31 d2                	xor    %edx,%edx
   8:	4c 8d 44 24 10       	lea    0x10(%rsp),%r8
   d:	48 89 d9             	mov    %rbx,%rcx
  10:	48 89 ef             	mov    %rbp,%rdi
  13:	e8 10 eb ff ff       	callq  0xffffffffffffeb28
  18:	83 f8 b8             	cmp    $0xffffffb8,%eax
  1b:	74 d0                	je     0xffffffffffffffed
  1d:	85 c0                	test   %eax,%eax
  1f:	75 52                	jne    0x73
  21:	48 8b 95 58 01 00 00 	mov    0x158(%rbp),%rdx
  28:	31 c0                	xor    %eax,%eax
  2a:*	8b 52 10             	mov    0x10(%rdx),%edx		<-- trapping instruction
  2d:	85 d2                	test   %edx,%edx
  2f:	75 9f                	jne    0xffffffffffffffd0
  31:	48 8b 44 24 20       	mov    0x20(%rsp),%rax
  36:	65 48 2b 04 25 28 00 	sub    %gs:0x28,%rax
  3d:	00 00 
  3f:	75                   	.byte 0x75

Code starting with the faulting instruction
===========================================
   0:	8b 52 10             	mov    0x10(%rdx),%edx
   3:	85 d2                	test   %edx,%edx
   5:	75 9f                	jne    0xffffffffffffffa6
   7:	48 8b 44 24 20       	mov    0x20(%rsp),%rax
   c:	65 48 2b 04 25 28 00 	sub    %gs:0x28,%rax
  13:	00 00 
  15:	75                   	.byte 0x75
[    5.984680][  T280] RSP: 0018:ffffa4bdc07d7908 EFLAGS: 00010246
[    5.985115][  T280] RAX: 0000000000000000 RBX: ffffa4bdc07d7998 RCX: ffff8ff4260895a0
[    5.985683][  T280] RDX: 0000000000000000 RSI: ffffa4bdc07d7998 RDI: ffff8ff42b448600
[    5.986271][  T280] RBP: ffff8ff42b448600 R08: ffffa4bdc07d7918 R09: ffff8ff42b5ea000
[    5.986845][  T280] R10: 00000000ffffffff R11: 0000000000000000 R12: ffff8ff42b4487b0
[    5.987436][  T280] R13: 0000000000000000 R14: ffff8ff42bd5c000 R15: ffff8ff596532e80
[    5.987996][  T280] FS:  00007f6e79ea7700(0000) GS:ffff8ff72fc00000(0000) knlGS:0000000000000000
[    5.988626][  T280] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[    5.989078][  T280] CR2: 0000000000000010 CR3: 000000012deb0000 CR4: 00000000000406f0
[    5.989630][  T280] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[    5.990191][  T280] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[    5.990752][  T280] Call Trace:
[    5.990989][  T280]  <TASK>
[ 5.991199][ T280] ? drm_vma_offset_add (include/drm/drm_mm.h:439 include/drm/drm_mm.h:462 drivers/gpu/drm/drm_vma_manager.c:209) drm
[ 5.991632][ T280] ttm_bo_init_reserved (drivers/gpu/drm/ttm/ttm_bo.c:994) ttm
[ 5.992046][ T280] ttm_bo_init_validate (drivers/gpu/drm/ttm/ttm_bo.c:1054) ttm
[ 5.992454][ T280] ? bo_driver_evict_flags (drivers/gpu/drm/drm_gem_vram_helper.c:132) drm_vram_helper
[ 5.992950][ T280] drm_gem_vram_create (drivers/gpu/drm/drm_gem_vram_helper.c:232) drm_vram_helper
[ 5.993415][ T280] ? bo_driver_evict_flags (drivers/gpu/drm/drm_gem_vram_helper.c:132) drm_vram_helper
[ 5.993892][ T280] drm_gem_vram_fill_create_dumb (drivers/gpu/drm/drm_gem_vram_helper.c:525) drm_vram_helper
[ 5.994385][ T280] drm_client_framebuffer_create (drivers/gpu/drm/drm_client.c:269 drivers/gpu/drm/drm_client.c:419) drm
[ 5.994885][ T280] drm_fb_helper_generic_probe (drivers/gpu/drm/drm_fb_helper.c:2436 (discriminator 4)) drm_kms_helper
[ 5.995436][ T280] drm_fb_helper_single_fb_probe (drivers/gpu/drm/drm_fb_helper.c:1754) drm_kms_helper
[ 5.995970][ T280] __drm_fb_helper_initial_config_and_unlock (drivers/gpu/drm/drm_fb_helper.c:1931) drm_kms_helper
[ 5.996564][ T280] drm_fbdev_client_hotplug (drivers/gpu/drm/drm_fb_helper.c:2025 drivers/gpu/drm/drm_fb_helper.c:2017 drivers/gpu/drm/drm_fb_helper.c:2539) drm_kms_helper
[ 5.997028][ T280] drm_fbdev_generic_setup (drivers/gpu/drm/drm_fb_helper.c:2626) drm_kms_helper
[ 5.997481][ T280] bochs_pci_probe (drivers/gpu/drm/tiny/bochs.c:670 drivers/gpu/drm/tiny/bochs.c:635) bochs
[ 5.997842][ T280] local_pci_probe (drivers/pci/pci-driver.c:324) 
[ 5.998155][ T280] pci_call_probe (drivers/pci/pci-driver.c:392) 
[ 5.998460][ T280] ? kernfs_create_link (fs/kernfs/symlink.c:48) 
[ 5.998794][ T280] pci_device_probe (drivers/pci/pci-driver.c:461) 
[ 5.999110][ T280] really_probe (drivers/base/dd.c:555 drivers/base/dd.c:634) 
[    5.999147][  T278] scsi host1: ata_piix
[ 5.999436][ T280] __driver_probe_device (drivers/base/dd.c:764) 
[    5.999933][  T278] ata1: PATA max MWDMA2 cmd 0x1f0 ctl 0x3f6 bmdma 0xc040 irq 14
[ 6.000100][ T280] driver_probe_device (drivers/base/dd.c:794) 
[    6.000645][  T278] ata2: PATA max MWDMA2 cmd 0x170 ctl 0x376 bmdma 0xc048 irq 15
[ 6.000968][ T280] __driver_attach (drivers/base/dd.c:1164) 
[ 6.001782][ T280] ? __device_attach_driver (drivers/base/dd.c:1116) 
[ 6.002148][ T280] ? __device_attach_driver (drivers/base/dd.c:1116) 
[ 6.002513][ T280] bus_for_each_dev (drivers/base/bus.c:301) 
[ 6.002841][ T280] bus_add_driver (drivers/base/bus.c:618) 
[ 6.003174][ T280] driver_register (drivers/base/driver.c:240) 
[    6.003508][  T280]  ? 0xffffffffc0529000
[ 6.003801][ T280] do_one_initcall (init/main.c:1295) 
[ 6.004137][ T280] ? __cond_resched (kernel/sched/core.c:8218) 
[ 6.004480][ T280] ? kmem_cache_alloc_trace (mm/slub.c:3216 mm/slub.c:3222 mm/slub.c:3253) 
[ 6.004846][ T280] do_init_module (kernel/module/main.c:2434) 
[ 6.005154][ T280] __do_sys_init_module (kernel/module/main.c:2894) 
[    6.005159][  T192] result_service: raw_upload, RESULT_MNT: /10.239.97.5/result, RESULT_ROOT: /10.239.97.5/result/boot/1/vm-snb/quantal-x86_64-core-20190426.cgz/x86_64-rhel-8.3/gcc-11/8e7efa91c9813eaf7149d4c0880afc05d6ee3be9/3, TMP_RESULT_ROOT: /tmp/lkp/result
[ 6.005498][ T280] do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80) 
[ 6.005504][ T280] entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:115) 
[    6.007755][  T280] RIP: 0033:0x7f6e799c0bca
[ 6.008070][ T280] Code: 48 8b 0d 79 32 2c 00 31 d2 48 29 c2 64 89 11 48 83 c8 ff eb ea 90 90 90 90 90 90 90 90 90 90 90 49 89 ca b8 af 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 46 32 2c 00 31 d2 48 29 c2 64
All code
========
   0:	48 8b 0d 79 32 2c 00 	mov    0x2c3279(%rip),%rcx        # 0x2c3280
   7:	31 d2                	xor    %edx,%edx
   9:	48 29 c2             	sub    %rax,%rdx
   c:	64 89 11             	mov    %edx,%fs:(%rcx)
   f:	48 83 c8 ff          	or     $0xffffffffffffffff,%rax
  13:	eb ea                	jmp    0xffffffffffffffff
  15:	90                   	nop
  16:	90                   	nop
  17:	90                   	nop
  18:	90                   	nop
  19:	90                   	nop
  1a:	90                   	nop
  1b:	90                   	nop
  1c:	90                   	nop
  1d:	90                   	nop
  1e:	90                   	nop
  1f:	90                   	nop
  20:	49 89 ca             	mov    %rcx,%r10
  23:	b8 af 00 00 00       	mov    $0xaf,%eax
  28:	0f 05                	syscall 
  2a:*	48 3d 01 f0 ff ff    	cmp    $0xfffffffffffff001,%rax		<-- trapping instruction
  30:	73 01                	jae    0x33
  32:	c3                   	retq   
  33:	48 8b 0d 46 32 2c 00 	mov    0x2c3246(%rip),%rcx        # 0x2c3280
  3a:	31 d2                	xor    %edx,%edx
  3c:	48 29 c2             	sub    %rax,%rdx
  3f:	64                   	fs

Code starting with the faulting instruction
===========================================
   0:	48 3d 01 f0 ff ff    	cmp    $0xfffffffffffff001,%rax
   6:	73 01                	jae    0x9
   8:	c3                   	retq   
   9:	48 8b 0d 46 32 2c 00 	mov    0x2c3246(%rip),%rcx        # 0x2c3256
  10:	31 d2                	xor    %edx,%edx
  12:	48 29 c2             	sub    %rax,%rdx
  15:	64                   	fs


To reproduce:

        # build kernel
	cd linux
	cp config-5.19.0-rc2-00454-g8e7efa91c981 .config
	make HOSTCC=gcc-11 CC=gcc-11 ARCH=x86_64 olddefconfig prepare modules_prepare bzImage modules
	make HOSTCC=gcc-11 CC=gcc-11 ARCH=x86_64 INSTALL_MOD_PATH=<mod-install-dir> modules_install
	cd <mod-install-dir>
	find lib/ | cpio -o -H newc --quiet | gzip > modules.cgz


        git clone https://github.com/intel/lkp-tests.git
        cd lkp-tests
        bin/lkp qemu -k <bzImage> -m modules.cgz job-script # job-script is attached in this email

        # if come across any failure that blocks the test,
        # please remove ~/.lkp and /lkp dir to run from a clean state.
diff mbox series

Patch

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index c1bd006a5525..a95826be8048 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -960,7 +960,6 @@  int ttm_bo_init_reserved(struct ttm_device *bdev, struct ttm_buffer_object *bo,
 			 struct sg_table *sg, struct dma_resv *resv,
 			 void (*destroy) (struct ttm_buffer_object *))
 {
-	static const struct ttm_place sys_mem = { .mem_type = TTM_PL_SYSTEM };
 	int ret;
 
 	kref_init(&bo->kref);
@@ -978,12 +977,6 @@  int ttm_bo_init_reserved(struct ttm_device *bdev, struct ttm_buffer_object *bo,
 		bo->base.resv = &bo->base._resv;
 	atomic_inc(&ttm_glob.bo_count);
 
-	ret = ttm_resource_alloc(bo, &sys_mem, &bo->resource);
-	if (unlikely(ret)) {
-		ttm_bo_put(bo);
-		return ret;
-	}
-
 	/*
 	 * For ttm_bo_type_device buffers, allocate
 	 * address space from the device.