diff mbox series

[4/5] dma-buf: generalize dma_fence unwrap & merging v2

Message ID 20220506141009.18047-4-christian.koenig@amd.com (mailing list archive)
State New, archived
Headers show
Series [1/5] dma-buf: cleanup dma_fence_unwrap selftest v2 | expand

Commit Message

Christian König May 6, 2022, 2:10 p.m. UTC
Introduce a dma_fence_unwrap_merge() macro which allows to unwrap fences
which potentially can be containers as well and then merge them back
together into a flat dma_fence_array.

v2: rename the function, add some more comments about how the wrapper is
    used, move filtering of signaled fences into the unwrap iterator,
    add complex selftest which covers more cases.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/dma-buf/dma-fence-unwrap.c    |  99 +++++++++++++++++++++
 drivers/dma-buf/st-dma-fence-unwrap.c | 109 +++++++++++++++++++++++
 drivers/dma-buf/sync_file.c           | 119 ++------------------------
 include/linux/dma-fence-unwrap.h      |  24 ++++++
 4 files changed, 238 insertions(+), 113 deletions(-)

Comments

kernel test robot May 12, 2022, 2:56 p.m. UTC | #1
Greeting,

FYI, we noticed the following commit (built with gcc-11):

commit: a9290ca07a36882b114c3cd9bbd8f66ed47508bd ("[PATCH 4/5] dma-buf: generalize dma_fence unwrap & merging v2")
url: https://github.com/intel-lab-lkp/linux/commits/Christian-K-nig/dma-buf-cleanup-dma_fence_unwrap-selftest-v2/20220506-221317
base: git://anongit.freedesktop.org/drm/drm drm-next
patch link: https://lore.kernel.org/dri-devel/20220506141009.18047-4-christian.koenig@amd.com

in testcase: igt
version: igt-x86_64-eddc67c5-1_20220430
with following parameters:

	group: group-04
	ucode: 0xc2



on test machine: 20 threads 1 sockets Commet Lake with 16G memory

caused below changes (please refer to attached dmesg/kmsg for entire log/backtrace):



If you fix the issue, kindly add following tag
Reported-by: kernel test robot <oliver.sang@intel.com>


kern :err : [   35.911985] BUG: KASAN: slab-out-of-bounds in __dma_fence_unwrap_merge (drivers/dma-buf/dma-fence-unwrap.c:130) 
kern  :err   : [   35.920255] Write of size 8 at addr ffff888105400508 by task api_intel_bb/1309

kern  :err   : [   35.930379] CPU: 4 PID: 1309 Comm: api_intel_bb Not tainted 5.18.0-rc5-01118-ga9290ca07a36 #1
kern  :err   : [   35.939601] Hardware name: Intel Corporation CometLake Client Platform/CometLake S UDIMM (ERB/CRB), BIOS CMLSFWR1.R00.2212.D00.2104290922 04/29/2021
kern  :err   : [   35.953601] Call Trace:
kern  :err   : [   35.956758]  <TASK>
kern :err : [   35.959564] ? __dma_fence_unwrap_merge (drivers/dma-buf/dma-fence-unwrap.c:130) 
kern :err : [   35.965157] dump_stack_lvl (lib/dump_stack.c:107 (discriminator 1)) 
kern :err : [   35.969534] print_address_description+0x1f/0x200 
kern :err : [   35.975983] ? __dma_fence_unwrap_merge (drivers/dma-buf/dma-fence-unwrap.c:130) 
kern :err : [   35.981562] print_report.cold (mm/kasan/report.c:430) 
kern :err : [   35.986277] ? _raw_spin_lock_irqsave (arch/x86/include/asm/atomic.h:202 include/linux/atomic/atomic-instrumented.h:543 include/asm-generic/qspinlock.h:82 include/linux/spinlock.h:185 include/linux/spinlock_api_smp.h:111 kernel/locking/spinlock.c:162) 
kern :err : [   35.991606] kasan_report (mm/kasan/report.c:162 mm/kasan/report.c:493) 
kern :err : [   35.995892] ? __dma_fence_unwrap_merge (drivers/dma-buf/dma-fence-unwrap.c:130) 
kern :err : [   36.001474] __dma_fence_unwrap_merge (drivers/dma-buf/dma-fence-unwrap.c:130) 
kern :err : [   36.006878] sync_file_merge+0xf7/0x240 
kern :err : [   36.012465] ? _raw_spin_lock (arch/x86/include/asm/atomic.h:202 include/linux/atomic/atomic-instrumented.h:543 include/asm-generic/qspinlock.h:82 include/linux/spinlock.h:185 include/linux/spinlock_api_smp.h:134 kernel/locking/spinlock.c:154) 
kern :err : [   36.017088] ? sync_file_create (drivers/dma-buf/sync_file.c:159) 
kern :err : [   36.021798] ? __fget_files (arch/x86/include/asm/atomic64_64.h:22 include/linux/atomic/atomic-arch-fallback.h:2293 include/linux/atomic/atomic-arch-fallback.h:2318 include/linux/atomic/atomic-long.h:491 include/linux/atomic/atomic-instrumented.h:1846 fs/file.c:903 fs/file.c:934) 
kern :err : [   36.026342] sync_file_ioctl (drivers/dma-buf/sync_file.c:235 drivers/dma-buf/sync_file.c:360) 
kern :err : [   36.030966] ? sync_file_ioctl_fence_info (drivers/dma-buf/sync_file.c:355) 
kern :err : [   36.036717] ? task_work_run (kernel/task_work.c:167 (discriminator 1)) 
kern :err : [   36.041254] __x64_sys_ioctl (fs/ioctl.c:51 fs/ioctl.c:870 fs/ioctl.c:856 fs/ioctl.c:856) 
kern :err : [   36.045884] do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80) 
kern :err : [   36.050166] entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:115) 
kern  :err   : [   36.055922] RIP: 0033:0x7fd878745e57
kern :err : [ 36.060203] Code: 00 00 90 48 8b 05 39 a0 0c 00 64 c7 00 26 00 00 00 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 09 a0 0c 00 f7 d8 64 89 01 48
All code
========
   0:	00 00                	add    %al,(%rax)
   2:	90                   	nop
   3:	48 8b 05 39 a0 0c 00 	mov    0xca039(%rip),%rax        # 0xca043
   a:	64 c7 00 26 00 00 00 	movl   $0x26,%fs:(%rax)
  11:	48 c7 c0 ff ff ff ff 	mov    $0xffffffffffffffff,%rax
  18:	c3                   	retq   
  19:	66 2e 0f 1f 84 00 00 	nopw   %cs:0x0(%rax,%rax,1)
  20:	00 00 00 
  23:	b8 10 00 00 00       	mov    $0x10,%eax
  28:	0f 05                	syscall 
  2a:*	48 3d 01 f0 ff ff    	cmp    $0xfffffffffffff001,%rax		<-- trapping instruction
  30:	73 01                	jae    0x33
  32:	c3                   	retq   
  33:	48 8b 0d 09 a0 0c 00 	mov    0xca009(%rip),%rcx        # 0xca043
  3a:	f7 d8                	neg    %eax
  3c:	64 89 01             	mov    %eax,%fs:(%rcx)
  3f:	48                   	rex.W

Code starting with the faulting instruction
===========================================
   0:	48 3d 01 f0 ff ff    	cmp    $0xfffffffffffff001,%rax
   6:	73 01                	jae    0x9
   8:	c3                   	retq   
   9:	48 8b 0d 09 a0 0c 00 	mov    0xca009(%rip),%rcx        # 0xca019
  10:	f7 d8                	neg    %eax
  12:	64 89 01             	mov    %eax,%fs:(%rcx)
  15:	48                   	rex.W
kern  :err   : [   36.079659] RSP: 002b:00007ffe4d4d2e88 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
kern  :err   : [   36.087937] RAX: ffffffffffffffda RBX: 00005558619a1940 RCX: 00007fd878745e57
kern  :err   : [   36.095770] RDX: 00007ffe4d4d2e90 RSI: 00000000c0303e03 RDI: 0000000000000008
kern  :err   : [   36.103613] RBP: 0000000000000006 R08: 000000000000000f R09: 00005558619a4c30
kern  :err   : [   36.111444] R10: 0000000000000006 R11: 0000000000000246 R12: 00005558619a1a00
kern  :err   : [   36.119279] R13: 00005558619a46e0 R14: 00007ffe4d4d2ef0 R15: 0000000000000000
kern  :err   : [   36.127113]  </TASK>

kern  :err   : [   36.132209] Allocated by task 1309:
kern :warn : [   36.136405] kasan_save_stack (mm/kasan/common.c:39) 
kern :warn : [   36.140943] __kasan_kmalloc (mm/kasan/common.c:45 mm/kasan/common.c:436 mm/kasan/common.c:515 mm/kasan/common.c:524) 
kern :warn : [   36.145395] __dma_fence_unwrap_merge (include/linux/slab.h:621 drivers/dma-buf/dma-fence-unwrap.c:81) 
kern :warn : [   36.150800] sync_file_merge+0xf7/0x240 
kern :warn : [   36.156386] sync_file_ioctl (drivers/dma-buf/sync_file.c:235 drivers/dma-buf/sync_file.c:360) 
kern :warn : [   36.161010] __x64_sys_ioctl (fs/ioctl.c:51 fs/ioctl.c:870 fs/ioctl.c:856 fs/ioctl.c:856) 
kern :warn : [   36.165643] do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80) 
kern :warn : [   36.169921] entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:115) 

kern  :err   : [   36.177867] The buggy address belongs to the object at ffff888105400500
which belongs to the cache kmalloc-8 of size 8
kern  :err   : [   36.191437] The buggy address is located 0 bytes to the right of
8-byte region [ffff888105400500, ffff888105400508)

kern  :err   : [   36.206942] The buggy address belongs to the physical page:
kern  :warn  : [   36.213220] page:00000000c4ee5dee refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff8881054008c0 pfn:0x105400
kern  :warn  : [   36.224636] flags: 0x17ffffc0000200(slab|node=0|zone=2|lastcpupid=0x1fffff)
kern  :warn  : [   36.232305] raw: 0017ffffc0000200 ffffea0004155e80 dead000000000002 ffff888100042280
kern  :warn  : [   36.240745] raw: ffff8881054008c0 0000000080660035 00000001ffffffff 0000000000000000
kern  :warn  : [   36.249190] page dumped because: kasan: bad access detected

kern  :err   : [   36.257659] Memory state around the buggy address:
kern  :err   : [   36.263155]  ffff888105400400: fc fc fa fc fc fc fc fb fc fc fc fc fb fc fc fc
kern  :err   : [   36.271079]  ffff888105400480: fc fb fc fc fc fc fb fc fc fc fc fb fc fc fc fc
kern  :err   : [   36.279001] >ffff888105400500: 00 fc fc fc fc fb fc fc fc fc fa fc fc fc fc fb
kern  :err   : [   36.286921]                       ^
kern  :err   : [   36.291117]  ffff888105400580: fc fc fc fc fb fc fc fc fc fb fc fc fc fc fb fc
kern  :err   : [   36.299043]  ffff888105400600: fc fc fc fa fc fc fc fc fb fc fc fc fc fb fc fc
kern  :err   : [   36.306970] ==================================================================
kern  :warn  : [   36.314953] Disabling lock debugging due to kernel taint
user  :info  : [   36.321624] [IGT] api_intel_bb: exiting, ret=0
kern  :info  : [   36.381966] Console: switching to colour frame buffer device 160x64
kern  :info  : [   36.448188] Console: switching to colour dummy device 80x25
user  :info  : [   36.454538] [IGT] api_intel_bb: executing
user  :info  : [   36.459757] [IGT] api_intel_bb: starting subtest blit-noreloc-keep-cache-random
user  :info  : [   36.471434] [IGT] api_intel_bb: exiting, ret=0
kern  :info  : [   36.531917] Console: switching to colour frame buffer device 160x64
kern  :info  : [   36.598425] Console: switching to colour dummy device 80x25
user  :info  : [   36.604786] [IGT] api_intel_bb: executing
user  :info  : [   36.609923] [IGT] api_intel_bb: starting subtest blit-noreloc-purge-cache
user  :info  : [   36.621155] [IGT] api_intel_bb: exiting, ret=0
kern  :info  : [   36.681867] Console: switching to colour frame buffer device 160x64
kern  :info  : [   36.748514] Console: switching to colour dummy device 80x25
user  :info  : [   36.755092] [IGT] api_intel_bb: executing
user  :info  : [   36.760433] [IGT] api_intel_bb: starting subtest blit-noreloc-purge-cache-random
user  :info  : [   36.772151] [IGT] api_intel_bb: exiting, ret=0
kern  :info  : [   36.831817] Console: switching to colour frame buffer device 160x64
kern  :info  : [   36.897995] Console: switching to colour dummy device 80x25
user  :info  : [   36.904350] [IGT] api_intel_bb: executing
user  :info  : [   36.909457] [IGT] api_intel_bb: starting subtest blit-reloc-keep-cache
user  :info  : [   36.921693] [IGT] api_intel_bb: exiting, ret=0
kern  :info  : [   36.981895] Console: switching to colour frame buffer device 160x64
kern  :info  : [   37.047892] Console: switching to colour dummy device 80x25
user  :info  : [   37.054232] [IGT] api_intel_bb: executing
user  :info  : [   37.059343] [IGT] api_intel_bb: starting subtest blit-reloc-purge-cache
user  :info  : [   37.071548] [IGT] api_intel_bb: exiting, ret=0
kern  :info  : [   37.131724] Console: switching to colour frame buffer device 160x64
kern  :info  : [   37.197818] Console: switching to colour dummy device 80x25
user  :info  : [   37.204190] [IGT] api_intel_bb: executing
user  :info  : [   37.209296] [IGT] api_intel_bb: starting subtest delta-check
user  :info  : [   37.216856] [IGT] api_intel_bb: exiting, ret=0
user  :notice: [   37.245164] result_service: raw_upload, RESULT_MNT: /internal-lkp-server/result, RESULT_ROOT: /internal-lkp-server/result/igt/group-04-ucode=0xc2/lkp-cml-d02/debian-10.4-x86_64-20200603.cgz/x86_64-rhel-8.3-func/gcc-11/a9290ca07a36882b114c3cd9bbd8f66ed47508bd/1, TMP_RESULT_ROOT: /tmp/lkp/result

user  :notice: [   37.276355] run-job /lkp/jobs/scheduled/lkp-cml-d02/igt-group-04-ucode=0xc2-debian-10.4-x86_64-20200603.cgz-a9290ca07a36882b114c3cd9bbd8f66ed47508bd-20220511-19224-132epq3-1.yaml

kern  :info  : [   37.281678] Console: switching to colour frame buffer device 160x64
kern  :info  : [   37.366074] Console: switching to colour dummy device 80x25
user  :info  : [   37.372429] [IGT] api_intel_bb: executing
user  :info  : [   37.377548] [IGT] api_intel_bb: starting subtest destroy-bb
user  :info  : [   37.388923] [IGT] api_intel_bb: exiting, ret=0
kern  :info  : [   37.431625] Console: switching to colour frame buffer device 160x64
kern  :info  : [   37.497522] Console: switching to colour dummy device 80x25
user  :info  : [   37.503871] [IGT] api_intel_bb: executing
user  :info  : [   37.508999] [IGT] api_intel_bb: starting subtest full-batch
user  :info  : [   37.516733] [IGT] api_intel_bb: exiting, ret=0
kern  :info  : [   37.564907] Console: switching to colour frame buffer device 160x64
kern  :info  : [   37.630954] Console: switching to colour dummy device 80x25
user  :info  : [   37.637306] [IGT] api_intel_bb: executing
user  :info  : [   37.642423] [IGT] api_intel_bb: starting subtest intel-bb-blit-none
user  :notice: [   38.035871] /usr/bin/wget -q --timeout=1800 --tries=1 --local-encoding=UTF-8 http://internal-lkp-server:80/~lkp/cgi-bin/lkp-jobfile-append-var?job_file=/lkp/jobs/scheduled/lkp-cml-d02/igt-group-04-ucode=0xc2-debian-10.4-x86_64-20200603.cgz-a9290ca07a36882b114c3cd9bbd8f66ed47508bd-20220511-19224-132epq3-1.yaml&job_state=running -O /dev/null

user  :notice: [   38.069080] target ucode: 0xc2

user  :notice: [   38.075557] current_version: c2, target_version: c2



To reproduce:

        git clone https://github.com/intel/lkp-tests.git
        cd lkp-tests
        sudo bin/lkp install job.yaml           # job file is attached in this email
        bin/lkp split-job --compatible job.yaml # generate the yaml file for lkp run
        sudo bin/lkp run generated-yaml-file

        # if come across any failure that blocks the test,
        # please remove ~/.lkp and /lkp dir to run from a clean state.
diff mbox series

Patch

diff --git a/drivers/dma-buf/dma-fence-unwrap.c b/drivers/dma-buf/dma-fence-unwrap.c
index 711be125428c..0e51547bbabd 100644
--- a/drivers/dma-buf/dma-fence-unwrap.c
+++ b/drivers/dma-buf/dma-fence-unwrap.c
@@ -11,6 +11,7 @@ 
 #include <linux/dma-fence-array.h>
 #include <linux/dma-fence-chain.h>
 #include <linux/dma-fence-unwrap.h>
+#include <linux/slab.h>
 
 /* Internal helper to start new array iteration, don't use directly */
 static struct dma_fence *
@@ -57,3 +58,101 @@  struct dma_fence *dma_fence_unwrap_next(struct dma_fence_unwrap *cursor)
 	return __dma_fence_unwrap_array(cursor);
 }
 EXPORT_SYMBOL_GPL(dma_fence_unwrap_next);
+
+/* Implementation for the dma_fence_merge() marco, don't use directly */
+struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences,
+					   struct dma_fence **fences,
+					   struct dma_fence_unwrap *iter)
+{
+	struct dma_fence_array *result;
+	struct dma_fence *tmp, **array;
+	unsigned int i;
+	size_t count;
+
+	count = 0;
+	for (i = 0; i < num_fences; ++i) {
+		dma_fence_unwrap_for_each(tmp, &iter[i], fences[i])
+			++count;
+	}
+
+	if (count == 0)
+		return dma_fence_get_stub();
+
+	array = kmalloc_array(count, sizeof(*array), GFP_KERNEL);
+	if (!array)
+		return NULL;
+
+	/*
+	 * This trashes the input fence array and uses it as position for the
+	 * following merge loop. This works because the dma_fence_merge()
+	 * wrapper macro is creating this temporary array on the stack together
+	 * with the iterators.
+	 */
+	for (i = 0; i < num_fences; ++i)
+		fences[i] = dma_fence_unwrap_first(fences[i], &iter[i]);
+
+	count = 0;
+	do {
+		unsigned int sel;
+
+restart:
+		tmp = NULL;
+		for (i = 0; i < num_fences; ++i) {
+			struct dma_fence *next = fences[i];
+
+			if (!next)
+				continue;
+
+			/*
+			 * We can't guarantee that inpute fences are ordered by
+			 * context, but it is still quite likely when this
+			 * function is used multiple times. So attempt to order
+			 * the fences by context as we pass over them and merge
+			 * fences with the same context.
+			 */
+			if (!tmp || tmp->context > next->context) {
+				tmp = next;
+				sel = i;
+
+			} else if (tmp->context < next->context) {
+				continue;
+
+			} else if (dma_fence_is_later(tmp, next)) {
+				fences[i] = dma_fence_unwrap_next(&iter[i]);
+				goto restart;
+			} else {
+				fences[sel] = dma_fence_unwrap_next(&iter[sel]);
+				goto restart;
+			}
+		}
+
+		if (tmp) {
+			array[count++] = dma_fence_get(tmp);
+			fences[sel] = dma_fence_unwrap_next(&iter[sel]);
+		}
+	} while (tmp);
+
+	if (count == 0) {
+		tmp = dma_fence_get_stub();
+		goto return_tmp;
+	}
+
+	if (count == 1) {
+		tmp = array[0];
+		goto return_tmp;
+	}
+
+	result = dma_fence_array_create(count, array,
+					dma_fence_context_alloc(1),
+					1, false);
+	if (!result) {
+		tmp = NULL;
+		goto return_tmp;
+	}
+	return &result->base;
+
+return_tmp:
+	kfree(array);
+	return tmp;
+}
+EXPORT_SYMBOL_GPL(__dma_fence_unwrap_merge);
diff --git a/drivers/dma-buf/st-dma-fence-unwrap.c b/drivers/dma-buf/st-dma-fence-unwrap.c
index e20c5a7dcfe4..4105d5ea8dde 100644
--- a/drivers/dma-buf/st-dma-fence-unwrap.c
+++ b/drivers/dma-buf/st-dma-fence-unwrap.c
@@ -238,6 +238,113 @@  static int unwrap_chain_array(void *arg)
 	return err;
 }
 
+static int unwrap_merge(void *arg)
+{
+	struct dma_fence *fence, *f1, *f2, *f3;
+	struct dma_fence_unwrap iter;
+	int err = 0;
+
+	f1 = mock_fence();
+	if (!f1)
+		return -ENOMEM;
+
+	f2 = mock_fence();
+	if (!f2) {
+		err = -ENOMEM;
+		goto error_put_f1;
+	}
+
+	f3 = dma_fence_unwrap_merge(f1, f2);
+	if (!f3) {
+		err = -ENOMEM;
+		goto error_put_f2;
+	}
+
+	dma_fence_unwrap_for_each(fence, &iter, f3) {
+		if (fence == f1) {
+			dma_fence_put(f1);
+			f1 = NULL;
+		} else if (fence == f2) {
+			dma_fence_put(f2);
+			f2 = NULL;
+		} else {
+			pr_err("Unexpected fence!\n");
+			err = -EINVAL;
+		}
+	}
+
+	if (f1 || f2) {
+		pr_err("Not all fences seen!\n");
+		err = -EINVAL;
+	}
+
+	dma_fence_put(f3);
+error_put_f2:
+	dma_fence_put(f2);
+error_put_f1:
+	dma_fence_put(f1);
+	return err;
+}
+
+static int unwrap_merge_complex(void *arg)
+{
+	struct dma_fence *fence, *f1, *f2, *f3, *f4, *f5;
+	struct dma_fence_unwrap iter;
+	int err = -ENOMEM;
+
+	f1 = mock_fence();
+	if (!f1)
+		return -ENOMEM;
+
+	f2 = mock_fence();
+	if (!f2)
+		goto error_put_f1;
+
+	f3 = dma_fence_unwrap_merge(f1, f2);
+	if (!f3)
+		goto error_put_f2;
+
+	/* The resulting array has the fences in reverse */
+	f4 = dma_fence_unwrap_merge(f2, f1);
+	if (!f4)
+		goto error_put_f3;
+
+	/* Signaled fences should be filtered, the two arrays merged. */
+	f5 = dma_fence_unwrap_merge(f3, f4, dma_fence_get_stub());
+	if (!f5)
+		goto error_put_f4;
+
+	err = 0;
+	dma_fence_unwrap_for_each(fence, &iter, f5) {
+		if (fence == f1) {
+			dma_fence_put(f1);
+			f1 = NULL;
+		} else if (fence == f2) {
+			dma_fence_put(f2);
+			f2 = NULL;
+		} else {
+			pr_err("Unexpected fence!\n");
+			err = -EINVAL;
+		}
+	}
+
+	if (f1 || f2) {
+		pr_err("Not all fences seen!\n");
+		err = -EINVAL;
+	}
+
+	dma_fence_put(f5);
+error_put_f4:
+	dma_fence_put(f4);
+error_put_f3:
+	dma_fence_put(f3);
+error_put_f2:
+	dma_fence_put(f2);
+error_put_f1:
+	dma_fence_put(f1);
+	return err;
+}
+
 int dma_fence_unwrap(void)
 {
 	static const struct subtest tests[] = {
@@ -245,6 +352,8 @@  int dma_fence_unwrap(void)
 		SUBTEST(unwrap_array),
 		SUBTEST(unwrap_chain),
 		SUBTEST(unwrap_chain_array),
+		SUBTEST(unwrap_merge),
+		SUBTEST(unwrap_merge_complex),
 	};
 
 	return subtests(tests, NULL);
diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
index 0fe564539166..3ebec19a8e02 100644
--- a/drivers/dma-buf/sync_file.c
+++ b/drivers/dma-buf/sync_file.c
@@ -146,50 +146,6 @@  char *sync_file_get_name(struct sync_file *sync_file, char *buf, int len)
 	return buf;
 }
 
-static int sync_file_set_fence(struct sync_file *sync_file,
-			       struct dma_fence **fences, int num_fences)
-{
-	struct dma_fence_array *array;
-
-	/*
-	 * The reference for the fences in the new sync_file and held
-	 * in add_fence() during the merge procedure, so for num_fences == 1
-	 * we already own a new reference to the fence. For num_fence > 1
-	 * we own the reference of the dma_fence_array creation.
-	 */
-
-	if (num_fences == 0) {
-		sync_file->fence = dma_fence_get_stub();
-		kfree(fences);
-
-	} else if (num_fences == 1) {
-		sync_file->fence = fences[0];
-		kfree(fences);
-
-	} else {
-		array = dma_fence_array_create(num_fences, fences,
-					       dma_fence_context_alloc(1),
-					       1, false);
-		if (!array)
-			return -ENOMEM;
-
-		sync_file->fence = &array->base;
-	}
-
-	return 0;
-}
-
-static void add_fence(struct dma_fence **fences,
-		      int *i, struct dma_fence *fence)
-{
-	fences[*i] = fence;
-
-	if (!dma_fence_is_signaled(fence)) {
-		dma_fence_get(fence);
-		(*i)++;
-	}
-}
-
 /**
  * sync_file_merge() - merge two sync_files
  * @name:	name of new fence
@@ -203,84 +159,21 @@  static void add_fence(struct dma_fence **fences,
 static struct sync_file *sync_file_merge(const char *name, struct sync_file *a,
 					 struct sync_file *b)
 {
-	struct dma_fence *a_fence, *b_fence, **fences;
-	struct dma_fence_unwrap a_iter, b_iter;
-	unsigned int index, num_fences;
 	struct sync_file *sync_file;
+	struct dma_fence *fence;
 
 	sync_file = sync_file_alloc();
 	if (!sync_file)
 		return NULL;
 
-	num_fences = 0;
-	dma_fence_unwrap_for_each(a_fence, &a_iter, a->fence)
-		++num_fences;
-	dma_fence_unwrap_for_each(b_fence, &b_iter, b->fence)
-		++num_fences;
-
-	if (num_fences > INT_MAX)
-		goto err_free_sync_file;
-
-	fences = kcalloc(num_fences, sizeof(*fences), GFP_KERNEL);
-	if (!fences)
-		goto err_free_sync_file;
-
-	/*
-	 * We can't guarantee that fences in both a and b are ordered, but it is
-	 * still quite likely.
-	 *
-	 * So attempt to order the fences as we pass over them and merge fences
-	 * with the same context.
-	 */
-
-	index = 0;
-	for (a_fence = dma_fence_unwrap_first(a->fence, &a_iter),
-	     b_fence = dma_fence_unwrap_first(b->fence, &b_iter);
-	     a_fence || b_fence; ) {
-
-		if (!b_fence) {
-			add_fence(fences, &index, a_fence);
-			a_fence = dma_fence_unwrap_next(&a_iter);
-
-		} else if (!a_fence) {
-			add_fence(fences, &index, b_fence);
-			b_fence = dma_fence_unwrap_next(&b_iter);
-
-		} else if (a_fence->context < b_fence->context) {
-			add_fence(fences, &index, a_fence);
-			a_fence = dma_fence_unwrap_next(&a_iter);
-
-		} else if (b_fence->context < a_fence->context) {
-			add_fence(fences, &index, b_fence);
-			b_fence = dma_fence_unwrap_next(&b_iter);
-
-		} else if (__dma_fence_is_later(a_fence->seqno, b_fence->seqno,
-						a_fence->ops)) {
-			add_fence(fences, &index, a_fence);
-			a_fence = dma_fence_unwrap_next(&a_iter);
-			b_fence = dma_fence_unwrap_next(&b_iter);
-
-		} else {
-			add_fence(fences, &index, b_fence);
-			a_fence = dma_fence_unwrap_next(&a_iter);
-			b_fence = dma_fence_unwrap_next(&b_iter);
-		}
+	fence = dma_fence_unwrap_merge(a->fence, b->fence);
+	if (!fence) {
+		fput(sync_file->file);
+		return NULL;
 	}
-
-	if (sync_file_set_fence(sync_file, fences, index) < 0)
-		goto err_put_fences;
-
+	sync_file->fence = fence;
 	strlcpy(sync_file->user_name, name, sizeof(sync_file->user_name));
 	return sync_file;
-
-err_put_fences:
-	while (index)
-		dma_fence_put(fences[--index]);
-	kfree(fences);
-
-err_free_sync_file:
-	fput(sync_file->file);
-	return NULL;
 }
 
 static int sync_file_release(struct inode *inode, struct file *file)
diff --git a/include/linux/dma-fence-unwrap.h b/include/linux/dma-fence-unwrap.h
index a4d342fef8e0..390de1ee9d35 100644
--- a/include/linux/dma-fence-unwrap.h
+++ b/include/linux/dma-fence-unwrap.h
@@ -52,4 +52,28 @@  struct dma_fence *dma_fence_unwrap_next(struct dma_fence_unwrap *cursor);
 	     fence = dma_fence_unwrap_next(cursor))			\
 		if (!dma_fence_is_signaled(fence))
 
+struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences,
+					   struct dma_fence **fences,
+					   struct dma_fence_unwrap *cursors);
+
+/**
+ * dma_fence_unwrap_merge - unwrap and merge fences
+ *
+ * All fences given as parameters are unwrapped and merged back together as flat
+ * dma_fence_array. Useful if multiple containers need to be merged together.
+ *
+ * Implemented as a macro to allocate the necessary arrays on the stack and
+ * account the stack frame size to the caller.
+ *
+ * Returns NULL on memory allocation failure, a dma_fence object representing
+ * all the given fences otherwise.
+ */
+#define dma_fence_unwrap_merge(...)					\
+	({								\
+		struct dma_fence *__f[] = { __VA_ARGS__ };		\
+		struct dma_fence_unwrap __c[ARRAY_SIZE(__f)];		\
+									\
+		__dma_fence_unwrap_merge(ARRAY_SIZE(__f), __f, __c);	\
+	})
+
 #endif