Message ID | 20180708123745.3318-1-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Hi Chris, Thank you for the patch! Yet something to improve: [auto build test ERROR on drm-intel/for-linux-next] [also build test ERROR on v4.18-rc3 next-20180706] [if your patch is applied to the wrong git tree, please drop us a note to help improve the system] url: https://github.com/0day-ci/linux/commits/Chris-Wilson/drm-i915-selftests-Prevent-background-reaping-of-active-objects/20180708-204032 base: git://anongit.freedesktop.org/drm-intel for-linux-next config: i386-randconfig-a1-201827 (attached as .config) compiler: gcc-4.9 (Debian 4.9.4-2) 4.9.4 reproduce: # save the attached .config to linux build tree make ARCH=i386 All errors (new ones prefixed by >>): In file included from drivers/gpu/drm/i915/i915_gem.c:6153:0: drivers/gpu/drm/i915/selftests/i915_gem_object.c: In function 'igt_mmap_offset_exhaustion': >> drivers/gpu/drm/i915/selftests/i915_gem_object.c:555:2: error: incompatible type for argument 1 of 'cancel_delayed_work_sync' cancel_delayed_work_sync(i915->gt.retire_work); ^ In file included from include/linux/srcu.h:34:0, from include/linux/notifier.h:16, from include/linux/memory_hotplug.h:7, from include/linux/mmzone.h:777, from include/linux/gfp.h:6, from include/linux/idr.h:16, from include/linux/kernfs.h:14, from include/linux/sysfs.h:16, from include/linux/kobject.h:20, from include/linux/cdev.h:5, from include/drm/drmP.h:36, from drivers/gpu/drm/i915/i915_gem.c:28: include/linux/workqueue.h:484:13: note: expected 'struct delayed_work *' but argument is of type 'struct delayed_work' extern bool cancel_delayed_work_sync(struct delayed_work *dwork); ^ In file included from drivers/gpu/drm/i915/i915_gem.c:6153:0: drivers/gpu/drm/i915/selftests/i915_gem_object.c:556:2: error: incompatible type for argument 1 of 'cancel_delayed_work_sync' cancel_delayed_work_sync(i915->gt.idle_work); ^ In file included from include/linux/srcu.h:34:0, from include/linux/notifier.h:16, from include/linux/memory_hotplug.h:7, from include/linux/mmzone.h:777, from include/linux/gfp.h:6, from include/linux/idr.h:16, from include/linux/kernfs.h:14, from include/linux/sysfs.h:16, from include/linux/kobject.h:20, from include/linux/cdev.h:5, from include/drm/drmP.h:36, from drivers/gpu/drm/i915/i915_gem.c:28: include/linux/workqueue.h:484:13: note: expected 'struct delayed_work *' but argument is of type 'struct delayed_work' extern bool cancel_delayed_work_sync(struct delayed_work *dwork); ^ vim +/cancel_delayed_work_sync +555 drivers/gpu/drm/i915/selftests/i915_gem_object.c 493 494 static int igt_mmap_offset_exhaustion(void *arg) 495 { 496 struct drm_i915_private *i915 = arg; 497 struct drm_mm *mm = &i915->drm.vma_offset_manager->vm_addr_space_mm; 498 struct drm_i915_gem_object *obj; 499 struct drm_mm_node resv, *hole; 500 u64 hole_start, hole_end; 501 int loop, err; 502 503 /* Trim the device mmap space to only a page */ 504 memset(&resv, 0, sizeof(resv)); 505 drm_mm_for_each_hole(hole, mm, hole_start, hole_end) { 506 resv.start = hole_start; 507 resv.size = hole_end - hole_start - 1; /* PAGE_SIZE units */ 508 err = drm_mm_reserve_node(mm, &resv); 509 if (err) { 510 pr_err("Failed to trim VMA manager, err=%d\n", err); 511 return err; 512 } 513 break; 514 } 515 516 /* Just fits! */ 517 if (!assert_mmap_offset(i915, PAGE_SIZE, 0)) { 518 pr_err("Unable to insert object into single page hole\n"); 519 err = -EINVAL; 520 goto out; 521 } 522 523 /* Too large */ 524 if (!assert_mmap_offset(i915, 2*PAGE_SIZE, -ENOSPC)) { 525 pr_err("Unexpectedly succeeded in inserting too large object into single page hole\n"); 526 err = -EINVAL; 527 goto out; 528 } 529 530 /* Fill the hole, further allocation attempts should then fail */ 531 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 532 if (IS_ERR(obj)) { 533 err = PTR_ERR(obj); 534 goto out; 535 } 536 537 err = i915_gem_object_create_mmap_offset(obj); 538 if (err) { 539 pr_err("Unable to insert object into reclaimed hole\n"); 540 goto err_obj; 541 } 542 543 if (!assert_mmap_offset(i915, PAGE_SIZE, -ENOSPC)) { 544 pr_err("Unexpectedly succeeded in inserting object into no holes!\n"); 545 err = -EINVAL; 546 goto err_obj; 547 } 548 549 i915_gem_object_put(obj); 550 551 /* Disable background reaper */ 552 mutex_lock(&i915->drm.struct_mutex); 553 i915_gem_unpark(i915); 554 mutex_unlock(&i915->drm.struct_mutex); > 555 cancel_delayed_work_sync(i915->gt.retire_work); 556 cancel_delayed_work_sync(i915->gt.idle_work); 557 GEM_BUG_ON(!i915->gt.awake); 558 559 /* Now fill with busy dead objects that we expect to reap */ 560 for (loop = 0; loop < 3; loop++) { 561 if (i915_terminally_wedged(&i915->gpu_error)) 562 break; 563 564 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 565 if (IS_ERR(obj)) { 566 err = PTR_ERR(obj); 567 goto out; 568 } 569 570 mutex_lock(&i915->drm.struct_mutex); 571 intel_runtime_pm_get(i915); 572 err = make_obj_busy(obj); 573 intel_runtime_pm_put(i915); 574 mutex_unlock(&i915->drm.struct_mutex); 575 if (err) { 576 pr_err("[loop %d] Failed to busy the object\n", loop); 577 goto err_obj; 578 } 579 580 GEM_BUG_ON(!i915_gem_object_is_active(obj)); 581 err = i915_gem_object_create_mmap_offset(obj); 582 if (err) { 583 pr_err("[loop %d] i915_gem_object_create_mmap_offset failed with err=%d\n", 584 loop, err); 585 goto out; 586 } 587 } 588 589 out: 590 drm_mm_remove_node(&resv); 591 queue_delayed_work(i915->wq, &i915->gt.retire_work, 0); 592 return err; 593 err_obj: 594 i915_gem_object_put(obj); 595 goto out; 596 } 597 --- 0-DAY kernel test infrastructure Open Source Technology Center https://lists.01.org/pipermail/kbuild-all Intel Corporation
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c index 25c2b2d433bd..d80713f32da2 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c @@ -548,6 +548,14 @@ static int igt_mmap_offset_exhaustion(void *arg) i915_gem_object_put(obj); + /* Disable background reaper */ + mutex_lock(&i915->drm.struct_mutex); + i915_gem_unpark(i915); + mutex_unlock(&i915->drm.struct_mutex); + cancel_delayed_work_sync(i915->gt.retire_work); + cancel_delayed_work_sync(i915->gt.idle_work); + GEM_BUG_ON(!i915->gt.awake); + /* Now fill with busy dead objects that we expect to reap */ for (loop = 0; loop < 3; loop++) { if (i915_terminally_wedged(&i915->gpu_error)) @@ -580,6 +588,7 @@ static int igt_mmap_offset_exhaustion(void *arg) out: drm_mm_remove_node(&resv); + queue_delayed_work(i915->wq, &i915->gt.retire_work, 0); return err; err_obj: i915_gem_object_put(obj);
igt_mmap_offset_exhaustion() wants to test what happens when the mmap space is filled with zombie objects, objects discarded by userspace but still active on the GPU. As they are only protected by the active reference, we have to be certain that active reference is kept while we peek into our dangling pointer. That active reference should not be freed until we retire, but we do that retirement from a background thread. This leaves us with a subtle timing problem, exacerbated and highlighted by KASAN: <3>[ 132.380399] BUG: KASAN: use-after-free in drm_gem_create_mmap_offset+0x8c/0xd0 <3>[ 132.380430] Read of size 8 at addr ffff8801e13245f8 by task drv_selftest/5822 <4>[ 132.380470] CPU: 0 PID: 5822 Comm: drv_selftest Tainted: G U 4.18.0-rc3-g7ae7763aa2be-kasan_48+ #1 <4>[ 132.380473] Hardware name: Dell Inc. XPS 8300 /0Y2MRG, BIOS A06 10/17/2011 <4>[ 132.380475] Call Trace: <4>[ 132.380481] dump_stack+0x7c/0xbb <4>[ 132.380487] print_address_description+0x65/0x270 <4>[ 132.380493] kasan_report+0x25b/0x380 <4>[ 132.380497] ? drm_gem_create_mmap_offset+0x8c/0xd0 <4>[ 132.380503] drm_gem_create_mmap_offset+0x8c/0xd0 <4>[ 132.380584] i915_gem_object_create_mmap_offset+0x6d/0x100 [i915] <4>[ 132.380650] igt_mmap_offset_exhaustion+0x462/0x940 [i915] <4>[ 132.380714] ? i915_gem_close_object+0x740/0x740 [i915] <4>[ 132.380784] ? igt_gem_huge+0x269/0x3d0 [i915] <4>[ 132.380865] __i915_subtests+0x5a/0x160 [i915] <4>[ 132.380936] __run_selftests+0x1a2/0x2f0 [i915] <4>[ 132.381008] i915_live_selftests+0x4e/0x80 [i915] <4>[ 132.381071] i915_pci_probe+0xd8/0x1b0 [i915] <4>[ 132.381077] pci_device_probe+0x1c5/0x3a0 <4>[ 132.381087] driver_probe_device+0x6b6/0xcb0 <4>[ 132.381094] __driver_attach+0x22d/0x2c0 <4>[ 132.381100] ? driver_probe_device+0xcb0/0xcb0 <4>[ 132.381103] bus_for_each_dev+0x113/0x1a0 <4>[ 132.381108] ? check_flags.part.24+0x450/0x450 <4>[ 132.381112] ? subsys_dev_iter_exit+0x10/0x10 <4>[ 132.381123] bus_add_driver+0x38b/0x6e0 <4>[ 132.381131] driver_register+0x189/0x400 <4>[ 132.381136] ? 0xffffffffc12d8000 <4>[ 132.381140] do_one_initcall+0xa0/0x4c0 <4>[ 132.381145] ? initcall_blacklisted+0x180/0x180 <4>[ 132.381152] ? do_init_module+0x4a/0x54c <4>[ 132.381156] ? rcu_lockdep_current_cpu_online+0xdc/0x130 <4>[ 132.381161] ? kasan_unpoison_shadow+0x30/0x40 <4>[ 132.381169] do_init_module+0x1b5/0x54c <4>[ 132.381177] load_module+0x619e/0x9b70 <4>[ 132.381202] ? module_frob_arch_sections+0x20/0x20 <4>[ 132.381211] ? vfs_read+0x257/0x2f0 <4>[ 132.381214] ? vfs_read+0x257/0x2f0 <4>[ 132.381221] ? kernel_read+0x8b/0x130 <4>[ 132.381231] ? copy_strings_kernel+0x120/0x120 <4>[ 132.381244] ? __se_sys_finit_module+0x17c/0x1a0 <4>[ 132.381248] __se_sys_finit_module+0x17c/0x1a0 <4>[ 132.381252] ? __ia32_sys_init_module+0xa0/0xa0 <4>[ 132.381261] ? __se_sys_newstat+0x77/0xd0 <4>[ 132.381265] ? cp_new_stat+0x590/0x590 <4>[ 132.381269] ? kmem_cache_free+0x2f0/0x340 <4>[ 132.381285] do_syscall_64+0x97/0x400 <4>[ 132.381292] entry_SYSCALL_64_after_hwframe+0x49/0xbe <4>[ 132.381295] RIP: 0033:0x7eff4af46839 <4>[ 132.381297] Code: 00 f3 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 1f f6 2c 00 f7 d8 64 89 01 48 <4>[ 132.381426] RSP: 002b:00007ffcd84f4cf8 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 <4>[ 132.381432] RAX: ffffffffffffffda RBX: 000055dfdeb429a0 RCX: 00007eff4af46839 <4>[ 132.381435] RDX: 0000000000000000 RSI: 000055dfdeb43670 RDI: 0000000000000004 <4>[ 132.381437] RBP: 000055dfdeb43670 R08: 0000000000000004 R09: 0000000000000000 <4>[ 132.381440] R10: 00007ffcd84f4e60 R11: 0000000000000246 R12: 0000000000000000 <4>[ 132.381442] R13: 000055dfdeb3bec0 R14: 0000000000000000 R15: 000000000000003b <3>[ 132.381466] Allocated by task 5822: <4>[ 132.381485] kmem_cache_alloc+0xdf/0x2e0 <4>[ 132.381546] i915_gem_object_create_internal+0x24/0x1e0 [i915] <4>[ 132.381609] igt_mmap_offset_exhaustion+0x257/0x940 [i915] <4>[ 132.381677] __i915_subtests+0x5a/0x160 [i915] <4>[ 132.381742] __run_selftests+0x1a2/0x2f0 [i915] <4>[ 132.381806] i915_live_selftests+0x4e/0x80 [i915] <4>[ 132.381865] i915_pci_probe+0xd8/0x1b0 [i915] <4>[ 132.381868] pci_device_probe+0x1c5/0x3a0 <4>[ 132.381871] driver_probe_device+0x6b6/0xcb0 <4>[ 132.381874] __driver_attach+0x22d/0x2c0 <4>[ 132.381877] bus_for_each_dev+0x113/0x1a0 <4>[ 132.381880] bus_add_driver+0x38b/0x6e0 <4>[ 132.381884] driver_register+0x189/0x400 <4>[ 132.381886] do_one_initcall+0xa0/0x4c0 <4>[ 132.381889] do_init_module+0x1b5/0x54c <4>[ 132.381892] load_module+0x619e/0x9b70 <4>[ 132.381895] __se_sys_finit_module+0x17c/0x1a0 <4>[ 132.381898] do_syscall_64+0x97/0x400 <4>[ 132.381901] entry_SYSCALL_64_after_hwframe+0x49/0xbe <3>[ 132.381914] Freed by task 150: <4>[ 132.381931] kmem_cache_free+0xb7/0x340 <4>[ 132.381995] __i915_gem_free_objects+0x875/0xf50 [i915] <4>[ 132.382054] __i915_gem_free_work+0x69/0xb0 [i915] <4>[ 132.382058] process_one_work+0x78b/0x1740 <4>[ 132.382061] worker_thread+0x82/0xb80 <4>[ 132.382064] kthread+0x30c/0x3d0 <4>[ 132.382067] ret_from_fork+0x3a/0x50 <3>[ 132.382081] The buggy address belongs to the object at ffff8801e1324500 which belongs to the cache drm_i915_gem_object of size 1168 <3>[ 132.382133] The buggy address is located 248 bytes inside of 1168-byte region [ffff8801e1324500, ffff8801e1324990) <3>[ 132.382179] The buggy address belongs to the page: <0>[ 132.382202] page:ffffea000784c800 count:1 mapcount:0 mapping:ffff8801dedf6500 index:0xffff8801e1323ec0 compound_mapcount: 0 <0>[ 132.382251] flags: 0x8000000000008100(slab|head) <1>[ 132.382274] raw: 8000000000008100 ffff8801d6317440 ffff8801d6317440 ffff8801dedf6500 <1>[ 132.382307] raw: ffff8801e1323ec0 0000000000140013 00000001ffffffff 0000000000000000 <1>[ 132.382339] page dumped because: kasan: bad access detected <3>[ 132.382373] Memory state around the buggy address: <3>[ 132.382395] ffff8801e1324480: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc <3>[ 132.382426] ffff8801e1324500: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb <3>[ 132.382457] >ffff8801e1324580: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb <3>[ 132.382488] ^ <3>[ 132.382517] ffff8801e1324600: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb <3>[ 132.382548] ffff8801e1324680: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb This patch tricks the system into running without the background retire thread, until after we finish the test. The only reaping should then be performed by the mmap offset routine to reclaim the space as required. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> --- drivers/gpu/drm/i915/selftests/i915_gem_object.c | 9 +++++++++ 1 file changed, 9 insertions(+)