diff mbox

drm/i915/selftests: Prevent background reaping of active objects

Message ID 20180708123745.3318-1-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson July 8, 2018, 12:37 p.m. UTC
igt_mmap_offset_exhaustion() wants to test what happens when the mmap
space is filled with zombie objects, objects discarded by userspace but
still active on the GPU. As they are only protected by the active
reference, we have to be certain that active reference is kept while we
peek into our dangling pointer. That active reference should not be
freed until we retire, but we do that retirement from a background
thread. This leaves us with a subtle timing problem, exacerbated and
highlighted by KASAN:

<3>[  132.380399] BUG: KASAN: use-after-free in drm_gem_create_mmap_offset+0x8c/0xd0
<3>[  132.380430] Read of size 8 at addr ffff8801e13245f8 by task drv_selftest/5822

<4>[  132.380470] CPU: 0 PID: 5822 Comm: drv_selftest Tainted: G     U            4.18.0-rc3-g7ae7763aa2be-kasan_48+ #1
<4>[  132.380473] Hardware name: Dell Inc. XPS 8300  /0Y2MRG, BIOS A06 10/17/2011
<4>[  132.380475] Call Trace:
<4>[  132.380481]  dump_stack+0x7c/0xbb
<4>[  132.380487]  print_address_description+0x65/0x270
<4>[  132.380493]  kasan_report+0x25b/0x380
<4>[  132.380497]  ? drm_gem_create_mmap_offset+0x8c/0xd0
<4>[  132.380503]  drm_gem_create_mmap_offset+0x8c/0xd0
<4>[  132.380584]  i915_gem_object_create_mmap_offset+0x6d/0x100 [i915]
<4>[  132.380650]  igt_mmap_offset_exhaustion+0x462/0x940 [i915]
<4>[  132.380714]  ? i915_gem_close_object+0x740/0x740 [i915]
<4>[  132.380784]  ? igt_gem_huge+0x269/0x3d0 [i915]
<4>[  132.380865]  __i915_subtests+0x5a/0x160 [i915]
<4>[  132.380936]  __run_selftests+0x1a2/0x2f0 [i915]
<4>[  132.381008]  i915_live_selftests+0x4e/0x80 [i915]
<4>[  132.381071]  i915_pci_probe+0xd8/0x1b0 [i915]
<4>[  132.381077]  pci_device_probe+0x1c5/0x3a0
<4>[  132.381087]  driver_probe_device+0x6b6/0xcb0
<4>[  132.381094]  __driver_attach+0x22d/0x2c0
<4>[  132.381100]  ? driver_probe_device+0xcb0/0xcb0
<4>[  132.381103]  bus_for_each_dev+0x113/0x1a0
<4>[  132.381108]  ? check_flags.part.24+0x450/0x450
<4>[  132.381112]  ? subsys_dev_iter_exit+0x10/0x10
<4>[  132.381123]  bus_add_driver+0x38b/0x6e0
<4>[  132.381131]  driver_register+0x189/0x400
<4>[  132.381136]  ? 0xffffffffc12d8000
<4>[  132.381140]  do_one_initcall+0xa0/0x4c0
<4>[  132.381145]  ? initcall_blacklisted+0x180/0x180
<4>[  132.381152]  ? do_init_module+0x4a/0x54c
<4>[  132.381156]  ? rcu_lockdep_current_cpu_online+0xdc/0x130
<4>[  132.381161]  ? kasan_unpoison_shadow+0x30/0x40
<4>[  132.381169]  do_init_module+0x1b5/0x54c
<4>[  132.381177]  load_module+0x619e/0x9b70
<4>[  132.381202]  ? module_frob_arch_sections+0x20/0x20
<4>[  132.381211]  ? vfs_read+0x257/0x2f0
<4>[  132.381214]  ? vfs_read+0x257/0x2f0
<4>[  132.381221]  ? kernel_read+0x8b/0x130
<4>[  132.381231]  ? copy_strings_kernel+0x120/0x120
<4>[  132.381244]  ? __se_sys_finit_module+0x17c/0x1a0
<4>[  132.381248]  __se_sys_finit_module+0x17c/0x1a0
<4>[  132.381252]  ? __ia32_sys_init_module+0xa0/0xa0
<4>[  132.381261]  ? __se_sys_newstat+0x77/0xd0
<4>[  132.381265]  ? cp_new_stat+0x590/0x590
<4>[  132.381269]  ? kmem_cache_free+0x2f0/0x340
<4>[  132.381285]  do_syscall_64+0x97/0x400
<4>[  132.381292]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
<4>[  132.381295] RIP: 0033:0x7eff4af46839
<4>[  132.381297] Code: 00 f3 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 1f f6 2c 00 f7 d8 64 89 01 48
<4>[  132.381426] RSP: 002b:00007ffcd84f4cf8 EFLAGS: 00000246 ORIG_RAX: 0000000000000139
<4>[  132.381432] RAX: ffffffffffffffda RBX: 000055dfdeb429a0 RCX: 00007eff4af46839
<4>[  132.381435] RDX: 0000000000000000 RSI: 000055dfdeb43670 RDI: 0000000000000004
<4>[  132.381437] RBP: 000055dfdeb43670 R08: 0000000000000004 R09: 0000000000000000
<4>[  132.381440] R10: 00007ffcd84f4e60 R11: 0000000000000246 R12: 0000000000000000
<4>[  132.381442] R13: 000055dfdeb3bec0 R14: 0000000000000000 R15: 000000000000003b

<3>[  132.381466] Allocated by task 5822:
<4>[  132.381485]  kmem_cache_alloc+0xdf/0x2e0
<4>[  132.381546]  i915_gem_object_create_internal+0x24/0x1e0 [i915]
<4>[  132.381609]  igt_mmap_offset_exhaustion+0x257/0x940 [i915]
<4>[  132.381677]  __i915_subtests+0x5a/0x160 [i915]
<4>[  132.381742]  __run_selftests+0x1a2/0x2f0 [i915]
<4>[  132.381806]  i915_live_selftests+0x4e/0x80 [i915]
<4>[  132.381865]  i915_pci_probe+0xd8/0x1b0 [i915]
<4>[  132.381868]  pci_device_probe+0x1c5/0x3a0
<4>[  132.381871]  driver_probe_device+0x6b6/0xcb0
<4>[  132.381874]  __driver_attach+0x22d/0x2c0
<4>[  132.381877]  bus_for_each_dev+0x113/0x1a0
<4>[  132.381880]  bus_add_driver+0x38b/0x6e0
<4>[  132.381884]  driver_register+0x189/0x400
<4>[  132.381886]  do_one_initcall+0xa0/0x4c0
<4>[  132.381889]  do_init_module+0x1b5/0x54c
<4>[  132.381892]  load_module+0x619e/0x9b70
<4>[  132.381895]  __se_sys_finit_module+0x17c/0x1a0
<4>[  132.381898]  do_syscall_64+0x97/0x400
<4>[  132.381901]  entry_SYSCALL_64_after_hwframe+0x49/0xbe

<3>[  132.381914] Freed by task 150:
<4>[  132.381931]  kmem_cache_free+0xb7/0x340
<4>[  132.381995]  __i915_gem_free_objects+0x875/0xf50 [i915]
<4>[  132.382054]  __i915_gem_free_work+0x69/0xb0 [i915]
<4>[  132.382058]  process_one_work+0x78b/0x1740
<4>[  132.382061]  worker_thread+0x82/0xb80
<4>[  132.382064]  kthread+0x30c/0x3d0
<4>[  132.382067]  ret_from_fork+0x3a/0x50

<3>[  132.382081] The buggy address belongs to the object at ffff8801e1324500
                   which belongs to the cache drm_i915_gem_object of size 1168
<3>[  132.382133] The buggy address is located 248 bytes inside of
                   1168-byte region [ffff8801e1324500, ffff8801e1324990)
<3>[  132.382179] The buggy address belongs to the page:
<0>[  132.382202] page:ffffea000784c800 count:1 mapcount:0 mapping:ffff8801dedf6500 index:0xffff8801e1323ec0 compound_mapcount: 0
<0>[  132.382251] flags: 0x8000000000008100(slab|head)
<1>[  132.382274] raw: 8000000000008100 ffff8801d6317440 ffff8801d6317440 ffff8801dedf6500
<1>[  132.382307] raw: ffff8801e1323ec0 0000000000140013 00000001ffffffff 0000000000000000
<1>[  132.382339] page dumped because: kasan: bad access detected

<3>[  132.382373] Memory state around the buggy address:
<3>[  132.382395]  ffff8801e1324480: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
<3>[  132.382426]  ffff8801e1324500: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
<3>[  132.382457] >ffff8801e1324580: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
<3>[  132.382488]                                                                 ^
<3>[  132.382517]  ffff8801e1324600: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
<3>[  132.382548]  ffff8801e1324680: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb

This patch tricks the system into running without the background retire
thread, until after we finish the test. The only reaping should then be
performed by the mmap offset routine to reclaim the space as required.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/selftests/i915_gem_object.c | 9 +++++++++
 1 file changed, 9 insertions(+)

Comments

kernel test robot July 8, 2018, 3:36 p.m. UTC | #1
Hi Chris,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on drm-intel/for-linux-next]
[also build test ERROR on v4.18-rc3 next-20180706]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Chris-Wilson/drm-i915-selftests-Prevent-background-reaping-of-active-objects/20180708-204032
base:   git://anongit.freedesktop.org/drm-intel for-linux-next
config: i386-randconfig-a1-201827 (attached as .config)
compiler: gcc-4.9 (Debian 4.9.4-2) 4.9.4
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

All errors (new ones prefixed by >>):

   In file included from drivers/gpu/drm/i915/i915_gem.c:6153:0:
   drivers/gpu/drm/i915/selftests/i915_gem_object.c: In function 'igt_mmap_offset_exhaustion':
>> drivers/gpu/drm/i915/selftests/i915_gem_object.c:555:2: error: incompatible type for argument 1 of 'cancel_delayed_work_sync'
     cancel_delayed_work_sync(i915->gt.retire_work);
     ^
   In file included from include/linux/srcu.h:34:0,
                    from include/linux/notifier.h:16,
                    from include/linux/memory_hotplug.h:7,
                    from include/linux/mmzone.h:777,
                    from include/linux/gfp.h:6,
                    from include/linux/idr.h:16,
                    from include/linux/kernfs.h:14,
                    from include/linux/sysfs.h:16,
                    from include/linux/kobject.h:20,
                    from include/linux/cdev.h:5,
                    from include/drm/drmP.h:36,
                    from drivers/gpu/drm/i915/i915_gem.c:28:
   include/linux/workqueue.h:484:13: note: expected 'struct delayed_work *' but argument is of type 'struct delayed_work'
    extern bool cancel_delayed_work_sync(struct delayed_work *dwork);
                ^
   In file included from drivers/gpu/drm/i915/i915_gem.c:6153:0:
   drivers/gpu/drm/i915/selftests/i915_gem_object.c:556:2: error: incompatible type for argument 1 of 'cancel_delayed_work_sync'
     cancel_delayed_work_sync(i915->gt.idle_work);
     ^
   In file included from include/linux/srcu.h:34:0,
                    from include/linux/notifier.h:16,
                    from include/linux/memory_hotplug.h:7,
                    from include/linux/mmzone.h:777,
                    from include/linux/gfp.h:6,
                    from include/linux/idr.h:16,
                    from include/linux/kernfs.h:14,
                    from include/linux/sysfs.h:16,
                    from include/linux/kobject.h:20,
                    from include/linux/cdev.h:5,
                    from include/drm/drmP.h:36,
                    from drivers/gpu/drm/i915/i915_gem.c:28:
   include/linux/workqueue.h:484:13: note: expected 'struct delayed_work *' but argument is of type 'struct delayed_work'
    extern bool cancel_delayed_work_sync(struct delayed_work *dwork);
                ^

vim +/cancel_delayed_work_sync +555 drivers/gpu/drm/i915/selftests/i915_gem_object.c

   493	
   494	static int igt_mmap_offset_exhaustion(void *arg)
   495	{
   496		struct drm_i915_private *i915 = arg;
   497		struct drm_mm *mm = &i915->drm.vma_offset_manager->vm_addr_space_mm;
   498		struct drm_i915_gem_object *obj;
   499		struct drm_mm_node resv, *hole;
   500		u64 hole_start, hole_end;
   501		int loop, err;
   502	
   503		/* Trim the device mmap space to only a page */
   504		memset(&resv, 0, sizeof(resv));
   505		drm_mm_for_each_hole(hole, mm, hole_start, hole_end) {
   506			resv.start = hole_start;
   507			resv.size = hole_end - hole_start - 1; /* PAGE_SIZE units */
   508			err = drm_mm_reserve_node(mm, &resv);
   509			if (err) {
   510				pr_err("Failed to trim VMA manager, err=%d\n", err);
   511				return err;
   512			}
   513			break;
   514		}
   515	
   516		/* Just fits! */
   517		if (!assert_mmap_offset(i915, PAGE_SIZE, 0)) {
   518			pr_err("Unable to insert object into single page hole\n");
   519			err = -EINVAL;
   520			goto out;
   521		}
   522	
   523		/* Too large */
   524		if (!assert_mmap_offset(i915, 2*PAGE_SIZE, -ENOSPC)) {
   525			pr_err("Unexpectedly succeeded in inserting too large object into single page hole\n");
   526			err = -EINVAL;
   527			goto out;
   528		}
   529	
   530		/* Fill the hole, further allocation attempts should then fail */
   531		obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
   532		if (IS_ERR(obj)) {
   533			err = PTR_ERR(obj);
   534			goto out;
   535		}
   536	
   537		err = i915_gem_object_create_mmap_offset(obj);
   538		if (err) {
   539			pr_err("Unable to insert object into reclaimed hole\n");
   540			goto err_obj;
   541		}
   542	
   543		if (!assert_mmap_offset(i915, PAGE_SIZE, -ENOSPC)) {
   544			pr_err("Unexpectedly succeeded in inserting object into no holes!\n");
   545			err = -EINVAL;
   546			goto err_obj;
   547		}
   548	
   549		i915_gem_object_put(obj);
   550	
   551		/* Disable background reaper */
   552		mutex_lock(&i915->drm.struct_mutex);
   553		i915_gem_unpark(i915);
   554		mutex_unlock(&i915->drm.struct_mutex);
 > 555		cancel_delayed_work_sync(i915->gt.retire_work);
   556		cancel_delayed_work_sync(i915->gt.idle_work);
   557		GEM_BUG_ON(!i915->gt.awake);
   558	
   559		/* Now fill with busy dead objects that we expect to reap */
   560		for (loop = 0; loop < 3; loop++) {
   561			if (i915_terminally_wedged(&i915->gpu_error))
   562				break;
   563	
   564			obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
   565			if (IS_ERR(obj)) {
   566				err = PTR_ERR(obj);
   567				goto out;
   568			}
   569	
   570			mutex_lock(&i915->drm.struct_mutex);
   571			intel_runtime_pm_get(i915);
   572			err = make_obj_busy(obj);
   573			intel_runtime_pm_put(i915);
   574			mutex_unlock(&i915->drm.struct_mutex);
   575			if (err) {
   576				pr_err("[loop %d] Failed to busy the object\n", loop);
   577				goto err_obj;
   578			}
   579	
   580			GEM_BUG_ON(!i915_gem_object_is_active(obj));
   581			err = i915_gem_object_create_mmap_offset(obj);
   582			if (err) {
   583				pr_err("[loop %d] i915_gem_object_create_mmap_offset failed with err=%d\n",
   584				       loop, err);
   585				goto out;
   586			}
   587		}
   588	
   589	out:
   590		drm_mm_remove_node(&resv);
   591		queue_delayed_work(i915->wq, &i915->gt.retire_work, 0);
   592		return err;
   593	err_obj:
   594		i915_gem_object_put(obj);
   595		goto out;
   596	}
   597	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
index 25c2b2d433bd..d80713f32da2 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
@@ -548,6 +548,14 @@  static int igt_mmap_offset_exhaustion(void *arg)
 
 	i915_gem_object_put(obj);
 
+	/* Disable background reaper */
+	mutex_lock(&i915->drm.struct_mutex);
+	i915_gem_unpark(i915);
+	mutex_unlock(&i915->drm.struct_mutex);
+	cancel_delayed_work_sync(i915->gt.retire_work);
+	cancel_delayed_work_sync(i915->gt.idle_work);
+	GEM_BUG_ON(!i915->gt.awake);
+
 	/* Now fill with busy dead objects that we expect to reap */
 	for (loop = 0; loop < 3; loop++) {
 		if (i915_terminally_wedged(&i915->gpu_error))
@@ -580,6 +588,7 @@  static int igt_mmap_offset_exhaustion(void *arg)
 
 out:
 	drm_mm_remove_node(&resv);
+	queue_delayed_work(i915->wq, &i915->gt.retire_work, 0);
 	return err;
 err_obj:
 	i915_gem_object_put(obj);