diff mbox

[v4] tests/exynos: add fimg2d performance analysis

Message ID 1446457929-8302-1-git-send-email-tjakobi@math.uni-bielefeld.de (mailing list archive)
State New, archived
Headers show

Commit Message

Tobias Jakobi Nov. 2, 2015, 9:52 a.m. UTC
Currently only fast solid color clear performance is measured.
A large buffer is allocated and solid color clear operations
are executed on it with randomly chosen properties (position
and size of the region, clear color). Execution time is
measured and output together with the amount of pixels
processed.

The 'simple' variant only executes one G2D command buffer at
a time, while the 'multi' variant executes multiple ones. This
can be used to measure setup/exec overhead.

The test also serves a stability check. If clocks/voltages are
too high or low respectively, the test quickly reveals this.

v2: Add GPLv2 header, argument handling and documentation.
    Tool is only installed when requested.
v3: Free images array in fimg2d_perf_multi() as pointed out
    by Hyungwon Hwang.
v4: Include header for error numbers (fixes build).

Signed-off-by: Tobias Jakobi <tjakobi@math.uni-bielefeld.de>
---
 tests/exynos/Makefile.am          |  19 ++-
 tests/exynos/exynos_fimg2d_perf.c | 327 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 344 insertions(+), 2 deletions(-)
 create mode 100644 tests/exynos/exynos_fimg2d_perf.c

Comments

Hyungwon Hwang Nov. 9, 2015, 7:45 a.m. UTC | #1
Hello,

I think this patch should update .gitignore, not for adding the built
binary to untracked file list.

Also, I want to make clear about the purpose of this test program. What
do you want to get after this test? This program runs G2D with
randomly chosen number of pixel and shows the elapsed time to do
that. I run it on my board. But I could not find any meaning of the
test. If you just want to know the execution time of solid fill, what
about get the width and height from user and run the same tests
iteratively for more accurate result? Or at least, increasing number of
pixels?


Best regards,
Hyungwon Hwang


On Mon, 02 Nov 2015 10:52:09 +0100
Tobias Jakobi <tjakobi@math.uni-bielefeld.de> wrote:

> Currently only fast solid color clear performance is measured.
> A large buffer is allocated and solid color clear operations
> are executed on it with randomly chosen properties (position
> and size of the region, clear color). Execution time is
> measured and output together with the amount of pixels
> processed.
> 
> The 'simple' variant only executes one G2D command buffer at
> a time, while the 'multi' variant executes multiple ones. This
> can be used to measure setup/exec overhead.
> 
> The test also serves a stability check. If clocks/voltages are
> too high or low respectively, the test quickly reveals this.
> 
> v2: Add GPLv2 header, argument handling and documentation.
>     Tool is only installed when requested.
> v3: Free images array in fimg2d_perf_multi() as pointed out
>     by Hyungwon Hwang.
> v4: Include header for error numbers (fixes build).
> 
> Signed-off-by: Tobias Jakobi <tjakobi@math.uni-bielefeld.de>
> ---
>  tests/exynos/Makefile.am          |  19 ++-
>  tests/exynos/exynos_fimg2d_perf.c | 327
> ++++++++++++++++++++++++++++++++++++++ 2 files changed, 344
> insertions(+), 2 deletions(-) create mode 100644
> tests/exynos/exynos_fimg2d_perf.c
> 
> diff --git a/tests/exynos/Makefile.am b/tests/exynos/Makefile.am
> index b21d016..e82d199 100644
> --- a/tests/exynos/Makefile.am
> +++ b/tests/exynos/Makefile.am
> @@ -5,16 +5,31 @@ AM_CFLAGS = \
>  	-I $(top_srcdir)/exynos \
>  	-I $(top_srcdir)
>  
> +bin_PROGRAMS =
> +noinst_PROGRAMS =
> +
>  if HAVE_LIBKMS
>  if HAVE_INSTALL_TESTS
> -bin_PROGRAMS = \
> +bin_PROGRAMS += \
>  	exynos_fimg2d_test
>  else
> -noinst_PROGRAMS = \
> +noinst_PROGRAMS += \
>  	exynos_fimg2d_test
>  endif
>  endif
>  
> +if HAVE_INSTALL_TESTS
> +bin_PROGRAMS += \
> +	exynos_fimg2d_perf
> +else
> +noinst_PROGRAMS += \
> +	exynos_fimg2d_perf
> +endif
> +
> +exynos_fimg2d_perf_LDADD = \
> +	$(top_builddir)/libdrm.la \
> +	$(top_builddir)/exynos/libdrm_exynos.la
> +
>  exynos_fimg2d_test_LDADD = \
>  	$(top_builddir)/libdrm.la \
>  	$(top_builddir)/libkms/libkms.la \
> diff --git a/tests/exynos/exynos_fimg2d_perf.c
> b/tests/exynos/exynos_fimg2d_perf.c new file mode 100644
> index 0000000..1699bba
> --- /dev/null
> +++ b/tests/exynos/exynos_fimg2d_perf.c
> @@ -0,0 +1,327 @@
> +/*
> + * Copyright (C) 2015 - Tobias Jakobi
> + *
> + * This is free software: you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published
> + * by the Free Software Foundation, either version 2 of the License,
> + * or (at your option) any later version.
> + *
> + * It is distributed in the hope that it will be useful, but
> + * WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + * You should have received a copy of the GNU General Public License
> + * along with it. If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#include <stdlib.h>
> +#include <stdio.h>
> +#include <time.h>
> +#include <getopt.h>
> +#include <errno.h>
> +
> +#include <xf86drm.h>
> +
> +#include "exynos_drm.h"
> +#include "exynos_drmif.h"
> +#include "exynos_fimg2d.h"
> +
> +static int output_mathematica = 0;
> +
> +static int fimg2d_perf_simple(struct exynos_bo *bo, struct
> g2d_context *ctx,
> +			unsigned buf_width, unsigned buf_height,
> unsigned iterations) +{
> +	struct timespec tspec = { 0 };
> +	struct g2d_image img = { 0 };
> +
> +	unsigned long long g2d_time;
> +	unsigned i;
> +	int ret = 0;
> +
> +	img.width = buf_width;
> +	img.height = buf_height;
> +	img.stride = buf_width * 4;
> +	img.color_mode = G2D_COLOR_FMT_ARGB8888 | G2D_ORDER_AXRGB;
> +	img.buf_type = G2D_IMGBUF_GEM;
> +	img.bo[0] = bo->handle;
> +
> +	srand(time(NULL));
> +
> +	printf("starting simple G2D performance test\n");
> +	printf("buffer width = %u, buffer height = %u, iterations =
> %u\n",
> +		buf_width, buf_height, iterations);
> +
> +	if (output_mathematica)
> +		putchar('{');
> +
> +	for (i = 0; i < iterations; ++i) {
> +		unsigned x, y, w, h;
> +
> +		x = rand() % buf_width;
> +		y = rand() % buf_height;
> +
> +		if (x == (buf_width - 1))
> +			x -= 1;
> +		if (y == (buf_height - 1))
> +			y -= 1;
> +
> +		w = rand() % (buf_width - x);
> +		h = rand() % (buf_height - y);
> +
> +		if (w == 0) w = 1;
> +		if (h == 0) h = 1;
> +
> +		img.color = rand();
> +
> +		ret = g2d_solid_fill(ctx, &img, x, y, w, h);
> +
> +		clock_gettime(CLOCK_MONOTONIC, &tspec);
> +
> +		if (ret == 0)
> +			ret = g2d_exec(ctx);
> +
> +		if (ret != 0) {
> +			fprintf(stderr, "error: iteration %u failed
> (x = %u, y = %u, w = %u, h = %u)\n",
> +				i, x, y, w, h);
> +			break;
> +		} else {
> +			struct timespec end = { 0 };
> +			clock_gettime(CLOCK_MONOTONIC, &end);
> +
> +			g2d_time = (end.tv_sec - tspec.tv_sec) *
> 1000000000ULL;
> +			g2d_time += (end.tv_nsec - tspec.tv_nsec);
> +
> +			if (output_mathematica) {
> +				if (i != 0) putchar(',');
> +				printf("{%u,%llu}", w * h, g2d_time);
> +			} else {
> +				printf("num_pixels = %u, usecs =
> %llu\n", w * h, g2d_time);
> +			}
> +		}
> +	}
> +
> +	if (output_mathematica)
> +		printf("}\n");
> +
> +	return ret;
> +}
> +
> +static int fimg2d_perf_multi(struct exynos_bo *bo, struct
> g2d_context *ctx,
> +			unsigned buf_width, unsigned buf_height,
> unsigned iterations, unsigned batch) +{
> +	struct timespec tspec = { 0 };
> +	struct g2d_image *images;
> +
> +	unsigned long long g2d_time;
> +	unsigned i, j;
> +	int ret = 0;
> +
> +	images = calloc(batch, sizeof(struct g2d_image));
> +	if (images == NULL) {
> +		fprintf(stderr, "error: failed to allocate G2D
> images.\n");
> +		return -ENOMEM;
> +	}
> +
> +	for (i = 0; i < batch; ++i) {
> +		images[i].width = buf_width;
> +		images[i].height = buf_height;
> +		images[i].stride = buf_width * 4;
> +		images[i].color_mode = G2D_COLOR_FMT_ARGB8888 |
> G2D_ORDER_AXRGB;
> +		images[i].buf_type = G2D_IMGBUF_GEM;
> +		images[i].bo[0] = bo->handle;
> +	}
> +
> +	srand(time(NULL));
> +
> +	printf("starting multi G2D performance test (batch size =
> %u)\n", batch);
> +	printf("buffer width = %u, buffer height = %u, iterations =
> %u\n",
> +		buf_width, buf_height, iterations);
> +
> +	if (output_mathematica)
> +		putchar('{');
> +
> +	for (i = 0; i < iterations; ++i) {
> +		unsigned num_pixels = 0;
> +
> +		for (j = 0; j < batch; ++j) {
> +			unsigned x, y, w, h;
> +
> +			x = rand() % buf_width;
> +			y = rand() % buf_height;
> +
> +			if (x == (buf_width - 1))
> +				x -= 1;
> +			if (y == (buf_height - 1))
> +				y -= 1;
> +
> +			w = rand() % (buf_width - x);
> +			h = rand() % (buf_height - y);
> +
> +			if (w == 0) w = 1;
> +			if (h == 0) h = 1;
> +
> +			images[j].color = rand();
> +
> +			num_pixels += w * h;
> +
> +			ret = g2d_solid_fill(ctx, &images[j], x, y,
> w, h);
> +			if (ret != 0)
> +				break;
> +		}
> +
> +		clock_gettime(CLOCK_MONOTONIC, &tspec);
> +
> +		if (ret == 0)
> +			ret = g2d_exec(ctx);
> +
> +		if (ret != 0) {
> +			fprintf(stderr, "error: iteration %u failed
> (num_pixels = %u)\n", i, num_pixels);
> +			break;
> +		} else {
> +			struct timespec end = { 0 };
> +			clock_gettime(CLOCK_MONOTONIC, &end);
> +
> +			g2d_time = (end.tv_sec - tspec.tv_sec) *
> 1000000000ULL;
> +			g2d_time += (end.tv_nsec - tspec.tv_nsec);
> +
> +			if (output_mathematica) {
> +				if (i != 0) putchar(',');
> +				printf("{%u,%llu}", num_pixels,
> g2d_time);
> +			} else {
> +				printf("num_pixels = %u, usecs =
> %llu\n", num_pixels, g2d_time);
> +			}
> +		}
> +	}
> +
> +	if (output_mathematica)
> +		printf("}\n");
> +
> +	free(images);
> +
> +	return ret;
> +}
> +
> +static void usage(const char *name)
> +{
> +	fprintf(stderr, "usage: %s [-ibwh]\n\n", name);
> +
> +	fprintf(stderr, "\t-i <number of iterations>\n");
> +	fprintf(stderr, "\t-b <size of a batch> (default = 3)\n\n");
> +
> +	fprintf(stderr, "\t-w <buffer width> (default = 4096)\n");
> +	fprintf(stderr, "\t-h <buffer height> (default = 4096)\n\n");
> +
> +	fprintf(stderr, "\t-M <enable Mathematica styled output>\n");
> +
> +	exit(0);
> +}
> +
> +int main(int argc, char **argv)
> +{
> +	int fd, ret, c, parsefail;
> +
> +	struct exynos_device *dev;
> +	struct g2d_context *ctx;
> +	struct exynos_bo *bo;
> +
> +	unsigned int iters = 0, batch = 3;
> +	unsigned int bufw = 4096, bufh = 4096;
> +
> +	ret = 0;
> +	parsefail = 0;
> +
> +	while ((c = getopt(argc, argv, "i:b:w:h:M")) != -1) {
> +		switch (c) {
> +		case 'i':
> +			if (sscanf(optarg, "%u", &iters) != 1)
> +				parsefail = 1;
> +			break;
> +		case 'b':
> +			if (sscanf(optarg, "%u", &batch) != 1)
> +				parsefail = 1;
> +			break;
> +		case 'w':
> +			if (sscanf(optarg, "%u", &bufw) != 1)
> +				parsefail = 1;
> +			break;
> +		case 'h':
> +			if (sscanf(optarg, "%u", &bufh) != 1)
> +				parsefail = 1;
> +			break;
> +		case 'M':
> +			output_mathematica = 1;
> +			break;
> +		default:
> +			parsefail = 1;
> +			break;
> +		}
> +	}
> +
> +	if (parsefail || (argc == 1) || (iters == 0))
> +		usage(argv[0]);
> +
> +	if (bufw < 2 || bufw > 4096 || bufh < 2 || bufh > 4096) {
> +		fprintf(stderr, "error: buffer width/height should
> be in the range 2 to 4096.\n");
> +		ret = -1;
> +
> +		goto out;
> +	}
> +
> +	if (bufw == 0 || bufh == 0) {
> +		fprintf(stderr, "error: buffer width/height should
> be non-zero.\n");
> +		ret = -1;
> +
> +		goto out;
> +	}
> +
> +	fd = drmOpen("exynos", NULL);
> +	if (fd < 0) {
> +		fprintf(stderr, "error: failed to open drm\n");
> +		ret = -1;
> +
> +		goto out;
> +	}
> +
> +	dev = exynos_device_create(fd);
> +	if (dev == NULL) {
> +		fprintf(stderr, "error: failed to create device\n");
> +		ret = -2;
> +
> +		goto fail;
> +	}
> +
> +	ctx = g2d_init(fd);
> +	if (ctx == NULL) {
> +		fprintf(stderr, "error: failed to init G2D\n");
> +		ret = -3;
> +
> +		goto g2d_fail;
> +	}
> +
> +	bo = exynos_bo_create(dev, bufw * bufh * 4, 0);
> +	if (bo == NULL) {
> +		fprintf(stderr, "error: failed to create bo\n");
> +		ret = -4;
> +
> +		goto bo_fail;
> +	}
> +
> +	ret = fimg2d_perf_simple(bo, ctx, bufw, bufh, iters);
> +
> +	if (ret == 0)
> +		ret = fimg2d_perf_multi(bo, ctx, bufw, bufh, iters,
> batch); +
> +	exynos_bo_destroy(bo);
> +
> +bo_fail:
> +	g2d_fini(ctx);
> +
> +g2d_fail:
> +	exynos_device_destroy(dev);
> +
> +fail:
> +	drmClose(fd);
> +
> +out:
> +	return ret;
> +}

--
To unsubscribe from this list: send the line "unsubscribe linux-samsung-soc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tobias Jakobi Nov. 9, 2015, 9:47 a.m. UTC | #2
Hello Hyungwon,


Hyungwon Hwang wrote:
> Hello,
> 
> I think this patch should update .gitignore, not for adding the built
> binary to untracked file list.
Thanks!


> Also, I want to make clear about the purpose of this test program. What
> do you want to get after this test? This program runs G2D with
> randomly chosen number of pixel and shows the elapsed time to do
> that. I run it on my board. But I could not find any meaning of the
> test. If you just want to know the execution time of solid fill, what
> about get the width and height from user and run the same tests
> iteratively for more accurate result? Or at least, increasing number of
> pixels?
The test is to measure the dependency between amount of pixels the G2D
has to process and the amount of time for the G2D to process such pixels.

It's exactly what a performance test should do, measure the time it
takes for a certain workload to complete.

In particular the test wants to answer the question if the dependency
stated above is of linear type.

Of course it's not, since we have setup time, so at least it should be
affine linear. But even that is not true, since you see subtle
'branching' when doing high density plots (that's why I added export of
the data to Mathematica).


What you ask for (user input) is in fact already implemented. The user
can specify the buffer width and height, which in turn limits the size
of the rectangle that is solid filled.

If you want smaller rectangles filled, decrease buffer width and height,
if you want bigger ones filled, increase.


The second purpose is to stress test the G2D, as already indicated in
the commit description. The G2D can be overclocked quite a lot under
certain conditions. With increase MIF/INT voltages I can run it with
400MHz instead of the 200MHz defaults. The application can now be used
to check stability. E.g. if voltages are too low the system can quickly
lock-up.

In particular one could also check how processing time depends on the
clock rate of the G2D. One interesting question here is how memory
bandwidth limits us.



With best wishes,
Tobias


> 
> 
> Best regards,
> Hyungwon Hwang
> 
> 
> On Mon, 02 Nov 2015 10:52:09 +0100
> Tobias Jakobi <tjakobi@math.uni-bielefeld.de> wrote:
> 
>> Currently only fast solid color clear performance is measured.
>> A large buffer is allocated and solid color clear operations
>> are executed on it with randomly chosen properties (position
>> and size of the region, clear color). Execution time is
>> measured and output together with the amount of pixels
>> processed.
>>
>> The 'simple' variant only executes one G2D command buffer at
>> a time, while the 'multi' variant executes multiple ones. This
>> can be used to measure setup/exec overhead.
>>
>> The test also serves a stability check. If clocks/voltages are
>> too high or low respectively, the test quickly reveals this.
>>
>> v2: Add GPLv2 header, argument handling and documentation.
>>     Tool is only installed when requested.
>> v3: Free images array in fimg2d_perf_multi() as pointed out
>>     by Hyungwon Hwang.
>> v4: Include header for error numbers (fixes build).
>>
>> Signed-off-by: Tobias Jakobi <tjakobi@math.uni-bielefeld.de>
>> ---
>>  tests/exynos/Makefile.am          |  19 ++-
>>  tests/exynos/exynos_fimg2d_perf.c | 327
>> ++++++++++++++++++++++++++++++++++++++ 2 files changed, 344
>> insertions(+), 2 deletions(-) create mode 100644
>> tests/exynos/exynos_fimg2d_perf.c
>>
>> diff --git a/tests/exynos/Makefile.am b/tests/exynos/Makefile.am
>> index b21d016..e82d199 100644
>> --- a/tests/exynos/Makefile.am
>> +++ b/tests/exynos/Makefile.am
>> @@ -5,16 +5,31 @@ AM_CFLAGS = \
>>  	-I $(top_srcdir)/exynos \
>>  	-I $(top_srcdir)
>>  
>> +bin_PROGRAMS =
>> +noinst_PROGRAMS =
>> +
>>  if HAVE_LIBKMS
>>  if HAVE_INSTALL_TESTS
>> -bin_PROGRAMS = \
>> +bin_PROGRAMS += \
>>  	exynos_fimg2d_test
>>  else
>> -noinst_PROGRAMS = \
>> +noinst_PROGRAMS += \
>>  	exynos_fimg2d_test
>>  endif
>>  endif
>>  
>> +if HAVE_INSTALL_TESTS
>> +bin_PROGRAMS += \
>> +	exynos_fimg2d_perf
>> +else
>> +noinst_PROGRAMS += \
>> +	exynos_fimg2d_perf
>> +endif
>> +
>> +exynos_fimg2d_perf_LDADD = \
>> +	$(top_builddir)/libdrm.la \
>> +	$(top_builddir)/exynos/libdrm_exynos.la
>> +
>>  exynos_fimg2d_test_LDADD = \
>>  	$(top_builddir)/libdrm.la \
>>  	$(top_builddir)/libkms/libkms.la \
>> diff --git a/tests/exynos/exynos_fimg2d_perf.c
>> b/tests/exynos/exynos_fimg2d_perf.c new file mode 100644
>> index 0000000..1699bba
>> --- /dev/null
>> +++ b/tests/exynos/exynos_fimg2d_perf.c
>> @@ -0,0 +1,327 @@
>> +/*
>> + * Copyright (C) 2015 - Tobias Jakobi
>> + *
>> + * This is free software: you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License as published
>> + * by the Free Software Foundation, either version 2 of the License,
>> + * or (at your option) any later version.
>> + *
>> + * It is distributed in the hope that it will be useful, but
>> + * WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
>> + * GNU General Public License for more details.
>> + * You should have received a copy of the GNU General Public License
>> + * along with it. If not, see <http://www.gnu.org/licenses/>.
>> + */
>> +
>> +#include <stdlib.h>
>> +#include <stdio.h>
>> +#include <time.h>
>> +#include <getopt.h>
>> +#include <errno.h>
>> +
>> +#include <xf86drm.h>
>> +
>> +#include "exynos_drm.h"
>> +#include "exynos_drmif.h"
>> +#include "exynos_fimg2d.h"
>> +
>> +static int output_mathematica = 0;
>> +
>> +static int fimg2d_perf_simple(struct exynos_bo *bo, struct
>> g2d_context *ctx,
>> +			unsigned buf_width, unsigned buf_height,
>> unsigned iterations) +{
>> +	struct timespec tspec = { 0 };
>> +	struct g2d_image img = { 0 };
>> +
>> +	unsigned long long g2d_time;
>> +	unsigned i;
>> +	int ret = 0;
>> +
>> +	img.width = buf_width;
>> +	img.height = buf_height;
>> +	img.stride = buf_width * 4;
>> +	img.color_mode = G2D_COLOR_FMT_ARGB8888 | G2D_ORDER_AXRGB;
>> +	img.buf_type = G2D_IMGBUF_GEM;
>> +	img.bo[0] = bo->handle;
>> +
>> +	srand(time(NULL));
>> +
>> +	printf("starting simple G2D performance test\n");
>> +	printf("buffer width = %u, buffer height = %u, iterations =
>> %u\n",
>> +		buf_width, buf_height, iterations);
>> +
>> +	if (output_mathematica)
>> +		putchar('{');
>> +
>> +	for (i = 0; i < iterations; ++i) {
>> +		unsigned x, y, w, h;
>> +
>> +		x = rand() % buf_width;
>> +		y = rand() % buf_height;
>> +
>> +		if (x == (buf_width - 1))
>> +			x -= 1;
>> +		if (y == (buf_height - 1))
>> +			y -= 1;
>> +
>> +		w = rand() % (buf_width - x);
>> +		h = rand() % (buf_height - y);
>> +
>> +		if (w == 0) w = 1;
>> +		if (h == 0) h = 1;
>> +
>> +		img.color = rand();
>> +
>> +		ret = g2d_solid_fill(ctx, &img, x, y, w, h);
>> +
>> +		clock_gettime(CLOCK_MONOTONIC, &tspec);
>> +
>> +		if (ret == 0)
>> +			ret = g2d_exec(ctx);
>> +
>> +		if (ret != 0) {
>> +			fprintf(stderr, "error: iteration %u failed
>> (x = %u, y = %u, w = %u, h = %u)\n",
>> +				i, x, y, w, h);
>> +			break;
>> +		} else {
>> +			struct timespec end = { 0 };
>> +			clock_gettime(CLOCK_MONOTONIC, &end);
>> +
>> +			g2d_time = (end.tv_sec - tspec.tv_sec) *
>> 1000000000ULL;
>> +			g2d_time += (end.tv_nsec - tspec.tv_nsec);
>> +
>> +			if (output_mathematica) {
>> +				if (i != 0) putchar(',');
>> +				printf("{%u,%llu}", w * h, g2d_time);
>> +			} else {
>> +				printf("num_pixels = %u, usecs =
>> %llu\n", w * h, g2d_time);
>> +			}
>> +		}
>> +	}
>> +
>> +	if (output_mathematica)
>> +		printf("}\n");
>> +
>> +	return ret;
>> +}
>> +
>> +static int fimg2d_perf_multi(struct exynos_bo *bo, struct
>> g2d_context *ctx,
>> +			unsigned buf_width, unsigned buf_height,
>> unsigned iterations, unsigned batch) +{
>> +	struct timespec tspec = { 0 };
>> +	struct g2d_image *images;
>> +
>> +	unsigned long long g2d_time;
>> +	unsigned i, j;
>> +	int ret = 0;
>> +
>> +	images = calloc(batch, sizeof(struct g2d_image));
>> +	if (images == NULL) {
>> +		fprintf(stderr, "error: failed to allocate G2D
>> images.\n");
>> +		return -ENOMEM;
>> +	}
>> +
>> +	for (i = 0; i < batch; ++i) {
>> +		images[i].width = buf_width;
>> +		images[i].height = buf_height;
>> +		images[i].stride = buf_width * 4;
>> +		images[i].color_mode = G2D_COLOR_FMT_ARGB8888 |
>> G2D_ORDER_AXRGB;
>> +		images[i].buf_type = G2D_IMGBUF_GEM;
>> +		images[i].bo[0] = bo->handle;
>> +	}
>> +
>> +	srand(time(NULL));
>> +
>> +	printf("starting multi G2D performance test (batch size =
>> %u)\n", batch);
>> +	printf("buffer width = %u, buffer height = %u, iterations =
>> %u\n",
>> +		buf_width, buf_height, iterations);
>> +
>> +	if (output_mathematica)
>> +		putchar('{');
>> +
>> +	for (i = 0; i < iterations; ++i) {
>> +		unsigned num_pixels = 0;
>> +
>> +		for (j = 0; j < batch; ++j) {
>> +			unsigned x, y, w, h;
>> +
>> +			x = rand() % buf_width;
>> +			y = rand() % buf_height;
>> +
>> +			if (x == (buf_width - 1))
>> +				x -= 1;
>> +			if (y == (buf_height - 1))
>> +				y -= 1;
>> +
>> +			w = rand() % (buf_width - x);
>> +			h = rand() % (buf_height - y);
>> +
>> +			if (w == 0) w = 1;
>> +			if (h == 0) h = 1;
>> +
>> +			images[j].color = rand();
>> +
>> +			num_pixels += w * h;
>> +
>> +			ret = g2d_solid_fill(ctx, &images[j], x, y,
>> w, h);
>> +			if (ret != 0)
>> +				break;
>> +		}
>> +
>> +		clock_gettime(CLOCK_MONOTONIC, &tspec);
>> +
>> +		if (ret == 0)
>> +			ret = g2d_exec(ctx);
>> +
>> +		if (ret != 0) {
>> +			fprintf(stderr, "error: iteration %u failed
>> (num_pixels = %u)\n", i, num_pixels);
>> +			break;
>> +		} else {
>> +			struct timespec end = { 0 };
>> +			clock_gettime(CLOCK_MONOTONIC, &end);
>> +
>> +			g2d_time = (end.tv_sec - tspec.tv_sec) *
>> 1000000000ULL;
>> +			g2d_time += (end.tv_nsec - tspec.tv_nsec);
>> +
>> +			if (output_mathematica) {
>> +				if (i != 0) putchar(',');
>> +				printf("{%u,%llu}", num_pixels,
>> g2d_time);
>> +			} else {
>> +				printf("num_pixels = %u, usecs =
>> %llu\n", num_pixels, g2d_time);
>> +			}
>> +		}
>> +	}
>> +
>> +	if (output_mathematica)
>> +		printf("}\n");
>> +
>> +	free(images);
>> +
>> +	return ret;
>> +}
>> +
>> +static void usage(const char *name)
>> +{
>> +	fprintf(stderr, "usage: %s [-ibwh]\n\n", name);
>> +
>> +	fprintf(stderr, "\t-i <number of iterations>\n");
>> +	fprintf(stderr, "\t-b <size of a batch> (default = 3)\n\n");
>> +
>> +	fprintf(stderr, "\t-w <buffer width> (default = 4096)\n");
>> +	fprintf(stderr, "\t-h <buffer height> (default = 4096)\n\n");
>> +
>> +	fprintf(stderr, "\t-M <enable Mathematica styled output>\n");
>> +
>> +	exit(0);
>> +}
>> +
>> +int main(int argc, char **argv)
>> +{
>> +	int fd, ret, c, parsefail;
>> +
>> +	struct exynos_device *dev;
>> +	struct g2d_context *ctx;
>> +	struct exynos_bo *bo;
>> +
>> +	unsigned int iters = 0, batch = 3;
>> +	unsigned int bufw = 4096, bufh = 4096;
>> +
>> +	ret = 0;
>> +	parsefail = 0;
>> +
>> +	while ((c = getopt(argc, argv, "i:b:w:h:M")) != -1) {
>> +		switch (c) {
>> +		case 'i':
>> +			if (sscanf(optarg, "%u", &iters) != 1)
>> +				parsefail = 1;
>> +			break;
>> +		case 'b':
>> +			if (sscanf(optarg, "%u", &batch) != 1)
>> +				parsefail = 1;
>> +			break;
>> +		case 'w':
>> +			if (sscanf(optarg, "%u", &bufw) != 1)
>> +				parsefail = 1;
>> +			break;
>> +		case 'h':
>> +			if (sscanf(optarg, "%u", &bufh) != 1)
>> +				parsefail = 1;
>> +			break;
>> +		case 'M':
>> +			output_mathematica = 1;
>> +			break;
>> +		default:
>> +			parsefail = 1;
>> +			break;
>> +		}
>> +	}
>> +
>> +	if (parsefail || (argc == 1) || (iters == 0))
>> +		usage(argv[0]);
>> +
>> +	if (bufw < 2 || bufw > 4096 || bufh < 2 || bufh > 4096) {
>> +		fprintf(stderr, "error: buffer width/height should
>> be in the range 2 to 4096.\n");
>> +		ret = -1;
>> +
>> +		goto out;
>> +	}
>> +
>> +	if (bufw == 0 || bufh == 0) {
>> +		fprintf(stderr, "error: buffer width/height should
>> be non-zero.\n");
>> +		ret = -1;
>> +
>> +		goto out;
>> +	}
>> +
>> +	fd = drmOpen("exynos", NULL);
>> +	if (fd < 0) {
>> +		fprintf(stderr, "error: failed to open drm\n");
>> +		ret = -1;
>> +
>> +		goto out;
>> +	}
>> +
>> +	dev = exynos_device_create(fd);
>> +	if (dev == NULL) {
>> +		fprintf(stderr, "error: failed to create device\n");
>> +		ret = -2;
>> +
>> +		goto fail;
>> +	}
>> +
>> +	ctx = g2d_init(fd);
>> +	if (ctx == NULL) {
>> +		fprintf(stderr, "error: failed to init G2D\n");
>> +		ret = -3;
>> +
>> +		goto g2d_fail;
>> +	}
>> +
>> +	bo = exynos_bo_create(dev, bufw * bufh * 4, 0);
>> +	if (bo == NULL) {
>> +		fprintf(stderr, "error: failed to create bo\n");
>> +		ret = -4;
>> +
>> +		goto bo_fail;
>> +	}
>> +
>> +	ret = fimg2d_perf_simple(bo, ctx, bufw, bufh, iters);
>> +
>> +	if (ret == 0)
>> +		ret = fimg2d_perf_multi(bo, ctx, bufw, bufh, iters,
>> batch); +
>> +	exynos_bo_destroy(bo);
>> +
>> +bo_fail:
>> +	g2d_fini(ctx);
>> +
>> +g2d_fail:
>> +	exynos_device_destroy(dev);
>> +
>> +fail:
>> +	drmClose(fd);
>> +
>> +out:
>> +	return ret;
>> +}
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-samsung-soc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Hyungwon Hwang Nov. 10, 2015, 2:59 a.m. UTC | #3
Hello Tobias,

On Mon, 09 Nov 2015 10:47:13 +0100
Tobias Jakobi <tjakobi@math.uni-bielefeld.de> wrote:

> Hello Hyungwon,
> 
> 
> Hyungwon Hwang wrote:
> > Hello,
> > 
> > I think this patch should update .gitignore, not for adding the
> > built binary to untracked file list.
> Thanks!
> 
> 
> > Also, I want to make clear about the purpose of this test program.
> > What do you want to get after this test? This program runs G2D with
> > randomly chosen number of pixel and shows the elapsed time to do
> > that. I run it on my board. But I could not find any meaning of the
> > test. If you just want to know the execution time of solid fill,
> > what about get the width and height from user and run the same tests
> > iteratively for more accurate result? Or at least, increasing
> > number of pixels?
> The test is to measure the dependency between amount of pixels the G2D
> has to process and the amount of time for the G2D to process such
> pixels.
> 
> It's exactly what a performance test should do, measure the time it
> takes for a certain workload to complete.
> 
> In particular the test wants to answer the question if the dependency
> stated above is of linear type.
> 
> Of course it's not, since we have setup time, so at least it should be
> affine linear. But even that is not true, since you see subtle
> 'branching' when doing high density plots (that's why I added export
> of the data to Mathematica).
> 
> 
> What you ask for (user input) is in fact already implemented. The user
> can specify the buffer width and height, which in turn limits the size
> of the rectangle that is solid filled.
> 
> If you want smaller rectangles filled, decrease buffer width and
> height, if you want bigger ones filled, increase.
> 
> 
> The second purpose is to stress test the G2D, as already indicated in
> the commit description. The G2D can be overclocked quite a lot under
> certain conditions. With increase MIF/INT voltages I can run it with
> 400MHz instead of the 200MHz defaults. The application can now be used
> to check stability. E.g. if voltages are too low the system can
> quickly lock-up.
> 
> In particular one could also check how processing time depends on the
> clock rate of the G2D. One interesting question here is how memory
> bandwidth limits us.
> 
> 
> 
> With best wishes,
> Tobias

Yes. I agree with the broad view. Please see the below, I run the test
2 times in a row.

root@localhost:~# ./exynos_fimg2d_perf  -i 10 -w 1024 -h 1024   
exynos/fimg2d: G2D version (4.1).
starting simple G2D performance test
buffer width = 1024, buffer height = 1024, iterations = 10
num_pixels = 136000, usecs = 236000
num_pixels = 8492, usecs = 47083
num_pixels = 100688, usecs = 200042
num_pixels = 141312, usecs = 216667
num_pixels = 39962, usecs = 92708
num_pixels = 95046, usecs = 156542
num_pixels = 2562, usecs = 34666
num_pixels = 176485, usecs = 326916
num_pixels = 17760, usecs = 56625
num_pixels = 1625, usecs = 31833
starting multi G2D performance test (batch size = 3)
buffer width = 1024, buffer height = 1024, iterations = 10
num_pixels = 245180, usecs = 385083
num_pixels = 276320, usecs = 398625
num_pixels = 196807, usecs = 356666
num_pixels = 305540, usecs = 420458
num_pixels = 65978, usecs = 120250
num_pixels = 265028, usecs = 379417
num_pixels = 139079, usecs = 213667
num_pixels = 24970, usecs = 67625
num_pixels = 46808, usecs = 114125
num_pixels = 100804, usecs = 179750
root@localhost:~# ./exynos_fimg2d_perf  -i 10 -w 1024 -h 1024 
exynos/fimg2d: G2D version (4.1).
starting simple G2D performance test
buffer width = 1024, buffer height = 1024, iterations = 10
num_pixels = 18676, usecs = 95541
num_pixels = 117056, usecs = 218875
num_pixels = 80784, usecs = 137209
num_pixels = 427, usecs = 33209
num_pixels = 238044, usecs = 403041
num_pixels = 4392, usecs = 37709
num_pixels = 19880, usecs = 59750
num_pixels = 3666, usecs = 36542
num_pixels = 4630, usecs = 36166
num_pixels = 70834, usecs = 125917
starting multi G2D performance test (batch size = 3)
buffer width = 1024, buffer height = 1024, iterations = 10
num_pixels = 216516, usecs = 347042
num_pixels = 242863, usecs = 422417
num_pixels = 28176, usecs = 72292
num_pixels = 110713, usecs = 179167
num_pixels = 292266, usecs = 431750
num_pixels = 274127, usecs = 392833
num_pixels = 291659, usecs = 415875
num_pixels = 140202, usecs = 218833
num_pixels = 122400, usecs = 193084
num_pixels = 168647, usecs = 251375

As you said, I can adjust the buffer width and height. But because the
program choose the number of pixel to process randomly, I can't compare
the result after I modified something (clock, or something else like
you mentioned). Also I can figure out the tendency between the number
of pixels and the processing time after I draw the graph. But it is too
hard not by doing that, because the number of pixels are not in
increasing order at least. I think that this program will be more useful
if the user can control the whole situation more tightly. But if you
need this kind of test, then it's OK to me.

Best regards,
Hyungwon Hwang

> 
> 
> > 
> > 
> > Best regards,
> > Hyungwon Hwang
> > 
> > 
> > On Mon, 02 Nov 2015 10:52:09 +0100
> > Tobias Jakobi <tjakobi@math.uni-bielefeld.de> wrote:
> > 
> >> Currently only fast solid color clear performance is measured.
> >> A large buffer is allocated and solid color clear operations
> >> are executed on it with randomly chosen properties (position
> >> and size of the region, clear color). Execution time is
> >> measured and output together with the amount of pixels
> >> processed.
> >>
> >> The 'simple' variant only executes one G2D command buffer at
> >> a time, while the 'multi' variant executes multiple ones. This
> >> can be used to measure setup/exec overhead.
> >>
> >> The test also serves a stability check. If clocks/voltages are
> >> too high or low respectively, the test quickly reveals this.
> >>
> >> v2: Add GPLv2 header, argument handling and documentation.
> >>     Tool is only installed when requested.
> >> v3: Free images array in fimg2d_perf_multi() as pointed out
> >>     by Hyungwon Hwang.
> >> v4: Include header for error numbers (fixes build).
> >>
> >> Signed-off-by: Tobias Jakobi <tjakobi@math.uni-bielefeld.de>
> >> ---
> >>  tests/exynos/Makefile.am          |  19 ++-
> >>  tests/exynos/exynos_fimg2d_perf.c | 327
> >> ++++++++++++++++++++++++++++++++++++++ 2 files changed, 344
> >> insertions(+), 2 deletions(-) create mode 100644
> >> tests/exynos/exynos_fimg2d_perf.c
> >>
> >> diff --git a/tests/exynos/Makefile.am b/tests/exynos/Makefile.am
> >> index b21d016..e82d199 100644
> >> --- a/tests/exynos/Makefile.am
> >> +++ b/tests/exynos/Makefile.am
> >> @@ -5,16 +5,31 @@ AM_CFLAGS = \
> >>  	-I $(top_srcdir)/exynos \
> >>  	-I $(top_srcdir)
> >>  
> >> +bin_PROGRAMS =
> >> +noinst_PROGRAMS =
> >> +
> >>  if HAVE_LIBKMS
> >>  if HAVE_INSTALL_TESTS
> >> -bin_PROGRAMS = \
> >> +bin_PROGRAMS += \
> >>  	exynos_fimg2d_test
> >>  else
> >> -noinst_PROGRAMS = \
> >> +noinst_PROGRAMS += \
> >>  	exynos_fimg2d_test
> >>  endif
> >>  endif
> >>  
> >> +if HAVE_INSTALL_TESTS
> >> +bin_PROGRAMS += \
> >> +	exynos_fimg2d_perf
> >> +else
> >> +noinst_PROGRAMS += \
> >> +	exynos_fimg2d_perf
> >> +endif
> >> +
> >> +exynos_fimg2d_perf_LDADD = \
> >> +	$(top_builddir)/libdrm.la \
> >> +	$(top_builddir)/exynos/libdrm_exynos.la
> >> +
> >>  exynos_fimg2d_test_LDADD = \
> >>  	$(top_builddir)/libdrm.la \
> >>  	$(top_builddir)/libkms/libkms.la \
> >> diff --git a/tests/exynos/exynos_fimg2d_perf.c
> >> b/tests/exynos/exynos_fimg2d_perf.c new file mode 100644
> >> index 0000000..1699bba
> >> --- /dev/null
> >> +++ b/tests/exynos/exynos_fimg2d_perf.c
> >> @@ -0,0 +1,327 @@
> >> +/*
> >> + * Copyright (C) 2015 - Tobias Jakobi
> >> + *
> >> + * This is free software: you can redistribute it and/or modify
> >> + * it under the terms of the GNU General Public License as
> >> published
> >> + * by the Free Software Foundation, either version 2 of the
> >> License,
> >> + * or (at your option) any later version.
> >> + *
> >> + * It is distributed in the hope that it will be useful, but
> >> + * WITHOUT ANY WARRANTY; without even the implied warranty of
> >> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> >> + * GNU General Public License for more details.
> >> + * You should have received a copy of the GNU General Public
> >> License
> >> + * along with it. If not, see <http://www.gnu.org/licenses/>.
> >> + */
> >> +
> >> +#include <stdlib.h>
> >> +#include <stdio.h>
> >> +#include <time.h>
> >> +#include <getopt.h>
> >> +#include <errno.h>
> >> +
> >> +#include <xf86drm.h>
> >> +
> >> +#include "exynos_drm.h"
> >> +#include "exynos_drmif.h"
> >> +#include "exynos_fimg2d.h"
> >> +
> >> +static int output_mathematica = 0;
> >> +
> >> +static int fimg2d_perf_simple(struct exynos_bo *bo, struct
> >> g2d_context *ctx,
> >> +			unsigned buf_width, unsigned buf_height,
> >> unsigned iterations) +{
> >> +	struct timespec tspec = { 0 };
> >> +	struct g2d_image img = { 0 };
> >> +
> >> +	unsigned long long g2d_time;
> >> +	unsigned i;
> >> +	int ret = 0;
> >> +
> >> +	img.width = buf_width;
> >> +	img.height = buf_height;
> >> +	img.stride = buf_width * 4;
> >> +	img.color_mode = G2D_COLOR_FMT_ARGB8888 | G2D_ORDER_AXRGB;
> >> +	img.buf_type = G2D_IMGBUF_GEM;
> >> +	img.bo[0] = bo->handle;
> >> +
> >> +	srand(time(NULL));
> >> +
> >> +	printf("starting simple G2D performance test\n");
> >> +	printf("buffer width = %u, buffer height = %u, iterations
> >> = %u\n",
> >> +		buf_width, buf_height, iterations);
> >> +
> >> +	if (output_mathematica)
> >> +		putchar('{');
> >> +
> >> +	for (i = 0; i < iterations; ++i) {
> >> +		unsigned x, y, w, h;
> >> +
> >> +		x = rand() % buf_width;
> >> +		y = rand() % buf_height;
> >> +
> >> +		if (x == (buf_width - 1))
> >> +			x -= 1;
> >> +		if (y == (buf_height - 1))
> >> +			y -= 1;
> >> +
> >> +		w = rand() % (buf_width - x);
> >> +		h = rand() % (buf_height - y);
> >> +
> >> +		if (w == 0) w = 1;
> >> +		if (h == 0) h = 1;
> >> +
> >> +		img.color = rand();
> >> +
> >> +		ret = g2d_solid_fill(ctx, &img, x, y, w, h);
> >> +
> >> +		clock_gettime(CLOCK_MONOTONIC, &tspec);
> >> +
> >> +		if (ret == 0)
> >> +			ret = g2d_exec(ctx);
> >> +
> >> +		if (ret != 0) {
> >> +			fprintf(stderr, "error: iteration %u
> >> failed (x = %u, y = %u, w = %u, h = %u)\n",
> >> +				i, x, y, w, h);
> >> +			break;
> >> +		} else {
> >> +			struct timespec end = { 0 };
> >> +			clock_gettime(CLOCK_MONOTONIC, &end);
> >> +
> >> +			g2d_time = (end.tv_sec - tspec.tv_sec) *
> >> 1000000000ULL;
> >> +			g2d_time += (end.tv_nsec - tspec.tv_nsec);
> >> +
> >> +			if (output_mathematica) {
> >> +				if (i != 0) putchar(',');
> >> +				printf("{%u,%llu}", w * h,
> >> g2d_time);
> >> +			} else {
> >> +				printf("num_pixels = %u, usecs =
> >> %llu\n", w * h, g2d_time);
> >> +			}
> >> +		}
> >> +	}
> >> +
> >> +	if (output_mathematica)
> >> +		printf("}\n");
> >> +
> >> +	return ret;
> >> +}
> >> +
> >> +static int fimg2d_perf_multi(struct exynos_bo *bo, struct
> >> g2d_context *ctx,
> >> +			unsigned buf_width, unsigned buf_height,
> >> unsigned iterations, unsigned batch) +{
> >> +	struct timespec tspec = { 0 };
> >> +	struct g2d_image *images;
> >> +
> >> +	unsigned long long g2d_time;
> >> +	unsigned i, j;
> >> +	int ret = 0;
> >> +
> >> +	images = calloc(batch, sizeof(struct g2d_image));
> >> +	if (images == NULL) {
> >> +		fprintf(stderr, "error: failed to allocate G2D
> >> images.\n");
> >> +		return -ENOMEM;
> >> +	}
> >> +
> >> +	for (i = 0; i < batch; ++i) {
> >> +		images[i].width = buf_width;
> >> +		images[i].height = buf_height;
> >> +		images[i].stride = buf_width * 4;
> >> +		images[i].color_mode = G2D_COLOR_FMT_ARGB8888 |
> >> G2D_ORDER_AXRGB;
> >> +		images[i].buf_type = G2D_IMGBUF_GEM;
> >> +		images[i].bo[0] = bo->handle;
> >> +	}
> >> +
> >> +	srand(time(NULL));
> >> +
> >> +	printf("starting multi G2D performance test (batch size =
> >> %u)\n", batch);
> >> +	printf("buffer width = %u, buffer height = %u, iterations
> >> = %u\n",
> >> +		buf_width, buf_height, iterations);
> >> +
> >> +	if (output_mathematica)
> >> +		putchar('{');
> >> +
> >> +	for (i = 0; i < iterations; ++i) {
> >> +		unsigned num_pixels = 0;
> >> +
> >> +		for (j = 0; j < batch; ++j) {
> >> +			unsigned x, y, w, h;
> >> +
> >> +			x = rand() % buf_width;
> >> +			y = rand() % buf_height;
> >> +
> >> +			if (x == (buf_width - 1))
> >> +				x -= 1;
> >> +			if (y == (buf_height - 1))
> >> +				y -= 1;
> >> +
> >> +			w = rand() % (buf_width - x);
> >> +			h = rand() % (buf_height - y);
> >> +
> >> +			if (w == 0) w = 1;
> >> +			if (h == 0) h = 1;
> >> +
> >> +			images[j].color = rand();
> >> +
> >> +			num_pixels += w * h;
> >> +
> >> +			ret = g2d_solid_fill(ctx, &images[j], x,
> >> y, w, h);
> >> +			if (ret != 0)
> >> +				break;
> >> +		}
> >> +
> >> +		clock_gettime(CLOCK_MONOTONIC, &tspec);
> >> +
> >> +		if (ret == 0)
> >> +			ret = g2d_exec(ctx);
> >> +
> >> +		if (ret != 0) {
> >> +			fprintf(stderr, "error: iteration %u
> >> failed (num_pixels = %u)\n", i, num_pixels);
> >> +			break;
> >> +		} else {
> >> +			struct timespec end = { 0 };
> >> +			clock_gettime(CLOCK_MONOTONIC, &end);
> >> +
> >> +			g2d_time = (end.tv_sec - tspec.tv_sec) *
> >> 1000000000ULL;
> >> +			g2d_time += (end.tv_nsec - tspec.tv_nsec);
> >> +
> >> +			if (output_mathematica) {
> >> +				if (i != 0) putchar(',');
> >> +				printf("{%u,%llu}", num_pixels,
> >> g2d_time);
> >> +			} else {
> >> +				printf("num_pixels = %u, usecs =
> >> %llu\n", num_pixels, g2d_time);
> >> +			}
> >> +		}
> >> +	}
> >> +
> >> +	if (output_mathematica)
> >> +		printf("}\n");
> >> +
> >> +	free(images);
> >> +
> >> +	return ret;
> >> +}
> >> +
> >> +static void usage(const char *name)
> >> +{
> >> +	fprintf(stderr, "usage: %s [-ibwh]\n\n", name);
> >> +
> >> +	fprintf(stderr, "\t-i <number of iterations>\n");
> >> +	fprintf(stderr, "\t-b <size of a batch> (default =
> >> 3)\n\n"); +
> >> +	fprintf(stderr, "\t-w <buffer width> (default = 4096)\n");
> >> +	fprintf(stderr, "\t-h <buffer height> (default =
> >> 4096)\n\n"); +
> >> +	fprintf(stderr, "\t-M <enable Mathematica styled
> >> output>\n"); +
> >> +	exit(0);
> >> +}
> >> +
> >> +int main(int argc, char **argv)
> >> +{
> >> +	int fd, ret, c, parsefail;
> >> +
> >> +	struct exynos_device *dev;
> >> +	struct g2d_context *ctx;
> >> +	struct exynos_bo *bo;
> >> +
> >> +	unsigned int iters = 0, batch = 3;
> >> +	unsigned int bufw = 4096, bufh = 4096;
> >> +
> >> +	ret = 0;
> >> +	parsefail = 0;
> >> +
> >> +	while ((c = getopt(argc, argv, "i:b:w:h:M")) != -1) {
> >> +		switch (c) {
> >> +		case 'i':
> >> +			if (sscanf(optarg, "%u", &iters) != 1)
> >> +				parsefail = 1;
> >> +			break;
> >> +		case 'b':
> >> +			if (sscanf(optarg, "%u", &batch) != 1)
> >> +				parsefail = 1;
> >> +			break;
> >> +		case 'w':
> >> +			if (sscanf(optarg, "%u", &bufw) != 1)
> >> +				parsefail = 1;
> >> +			break;
> >> +		case 'h':
> >> +			if (sscanf(optarg, "%u", &bufh) != 1)
> >> +				parsefail = 1;
> >> +			break;
> >> +		case 'M':
> >> +			output_mathematica = 1;
> >> +			break;
> >> +		default:
> >> +			parsefail = 1;
> >> +			break;
> >> +		}
> >> +	}
> >> +
> >> +	if (parsefail || (argc == 1) || (iters == 0))
> >> +		usage(argv[0]);
> >> +
> >> +	if (bufw < 2 || bufw > 4096 || bufh < 2 || bufh > 4096) {
> >> +		fprintf(stderr, "error: buffer width/height should
> >> be in the range 2 to 4096.\n");
> >> +		ret = -1;
> >> +
> >> +		goto out;
> >> +	}
> >> +
> >> +	if (bufw == 0 || bufh == 0) {
> >> +		fprintf(stderr, "error: buffer width/height should
> >> be non-zero.\n");
> >> +		ret = -1;
> >> +
> >> +		goto out;
> >> +	}
> >> +
> >> +	fd = drmOpen("exynos", NULL);
> >> +	if (fd < 0) {
> >> +		fprintf(stderr, "error: failed to open drm\n");
> >> +		ret = -1;
> >> +
> >> +		goto out;
> >> +	}
> >> +
> >> +	dev = exynos_device_create(fd);
> >> +	if (dev == NULL) {
> >> +		fprintf(stderr, "error: failed to create
> >> device\n");
> >> +		ret = -2;
> >> +
> >> +		goto fail;
> >> +	}
> >> +
> >> +	ctx = g2d_init(fd);
> >> +	if (ctx == NULL) {
> >> +		fprintf(stderr, "error: failed to init G2D\n");
> >> +		ret = -3;
> >> +
> >> +		goto g2d_fail;
> >> +	}
> >> +
> >> +	bo = exynos_bo_create(dev, bufw dsa* bufh * 4, 0);
> >> +	if (bo == NULL) {
> >> +		fprintf(stderr, "error: failed to create bo\n");
> >> +		ret = -4;
> >> +
> >> +		goto bo_fail;
> >> +	}
> >> +
> >> +	ret = fimg2d_perf_simple(bo, ctx, bufw, bufh, iters);
> >> +
> >> +	if (ret == 0)
> >> +		ret = fimg2d_perf_multi(bo, ctx, bufw, bufh,
> >> iters, batch); +
> >> +	exynos_bo_destroy(bo);
> >> +
> >> +bo_fail:
> >> +	g2d_fini(ctx);
> >> +
> >> +g2d_fail:
> >> +	exynos_device_destroy(dev);
> >> +
> >> +fail:
> >> +	drmClose(fd);
> >> +
> >> +out:
> >> +	return ret;
> >> +}
> > 
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-samsung-soc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tobias Jakobi Nov. 10, 2015, 1:23 p.m. UTC | #4
Hello Hyungwon,


Hyungwon Hwang wrote:
> Hello Tobias,
> 
> On Mon, 09 Nov 2015 10:47:13 +0100
> Tobias Jakobi <tjakobi@math.uni-bielefeld.de> wrote:
> 
>> Hello Hyungwon,
>>
>>
>> Hyungwon Hwang wrote:
>>> Hello,
>>>
>>> I think this patch should update .gitignore, not for adding the
>>> built binary to untracked file list.
>> Thanks!
>>
>>
>>> Also, I want to make clear about the purpose of this test program.
>>> What do you want to get after this test? This program runs G2D with
>>> randomly chosen number of pixel and shows the elapsed time to do
>>> that. I run it on my board. But I could not find any meaning of the
>>> test. If you just want to know the execution time of solid fill,
>>> what about get the width and height from user and run the same tests
>>> iteratively for more accurate result? Or at least, increasing
>>> number of pixels?
>> The test is to measure the dependency between amount of pixels the G2D
>> has to process and the amount of time for the G2D to process such
>> pixels.
>>
>> It's exactly what a performance test should do, measure the time it
>> takes for a certain workload to complete.
>>
>> In particular the test wants to answer the question if the dependency
>> stated above is of linear type.
>>
>> Of course it's not, since we have setup time, so at least it should be
>> affine linear. But even that is not true, since you see subtle
>> 'branching' when doing high density plots (that's why I added export
>> of the data to Mathematica).
>>
>>
>> What you ask for (user input) is in fact already implemented. The user
>> can specify the buffer width and height, which in turn limits the size
>> of the rectangle that is solid filled.
>>
>> If you want smaller rectangles filled, decrease buffer width and
>> height, if you want bigger ones filled, increase.
>>
>>
>> The second purpose is to stress test the G2D, as already indicated in
>> the commit description. The G2D can be overclocked quite a lot under
>> certain conditions. With increase MIF/INT voltages I can run it with
>> 400MHz instead of the 200MHz defaults. The application can now be used
>> to check stability. E.g. if voltages are too low the system can
>> quickly lock-up.
>>
>> In particular one could also check how processing time depends on the
>> clock rate of the G2D. One interesting question here is how memory
>> bandwidth limits us.
>>
>>
>>
>> With best wishes,
>> Tobias
> 
> Yes. I agree with the broad view. Please see the below, I run the test
> 2 times in a row.
> 
> root@localhost:~# ./exynos_fimg2d_perf  -i 10 -w 1024 -h 1024   
> exynos/fimg2d: G2D version (4.1).
> starting simple G2D performance test
> buffer width = 1024, buffer height = 1024, iterations = 10
> num_pixels = 136000, usecs = 236000
> num_pixels = 8492, usecs = 47083
> num_pixels = 100688, usecs = 200042
> num_pixels = 141312, usecs = 216667
> num_pixels = 39962, usecs = 92708
> num_pixels = 95046, usecs = 156542
> num_pixels = 2562, usecs = 34666
> num_pixels = 176485, usecs = 326916
> num_pixels = 17760, usecs = 56625
> num_pixels = 1625, usecs = 31833
> starting multi G2D performance test (batch size = 3)
> buffer width = 1024, buffer height = 1024, iterations = 10
> num_pixels = 245180, usecs = 385083
> num_pixels = 276320, usecs = 398625
> num_pixels = 196807, usecs = 356666
> num_pixels = 305540, usecs = 420458
> num_pixels = 65978, usecs = 120250
> num_pixels = 265028, usecs = 379417
> num_pixels = 139079, usecs = 213667
> num_pixels = 24970, usecs = 67625
> num_pixels = 46808, usecs = 114125
> num_pixels = 100804, usecs = 179750
> root@localhost:~# ./exynos_fimg2d_perf  -i 10 -w 1024 -h 1024 
> exynos/fimg2d: G2D version (4.1).
> starting simple G2D performance test
> buffer width = 1024, buffer height = 1024, iterations = 10
> num_pixels = 18676, usecs = 95541
> num_pixels = 117056, usecs = 218875
> num_pixels = 80784, usecs = 137209
> num_pixels = 427, usecs = 33209
> num_pixels = 238044, usecs = 403041
> num_pixels = 4392, usecs = 37709
> num_pixels = 19880, usecs = 59750
> num_pixels = 3666, usecs = 36542
> num_pixels = 4630, usecs = 36166
> num_pixels = 70834, usecs = 125917
> starting multi G2D performance test (batch size = 3)
> buffer width = 1024, buffer height = 1024, iterations = 10
> num_pixels = 216516, usecs = 347042
> num_pixels = 242863, usecs = 422417
> num_pixels = 28176, usecs = 72292
> num_pixels = 110713, usecs = 179167
> num_pixels = 292266, usecs = 431750
> num_pixels = 274127, usecs = 392833
> num_pixels = 291659, usecs = 415875
> num_pixels = 140202, usecs = 218833
> num_pixels = 122400, usecs = 193084
> num_pixels = 168647, usecs = 251375
> 
> As you said, I can adjust the buffer width and height. But because the
> program choose the number of pixel to process randomly, I can't compare
> the result after I modified something (clock, or something else like
> you mentioned).
I have trouble following you here. It seems to be that by 'compare' you
mean that you want to compare performance using these numbers by hand.

That's of course a real pain in the ass, and I would never recommend it!



> Also I can figure out the tendency between the number
> of pixels and the processing time after I draw the graph. But it is too
> hard not by doing that, because the number of pixels are not in
> increasing order at least.
That's why you don't do analysis of large data sets yourself.

The intended way is to feed the data in your program of choice and then
apply various methods there.

This e.g. is a plot done via Mathematica:
https://www.math.uni-bielefeld.de/~tjakobi/exynos/g2d_clear_perf.pdf

By fitting a polynomial of degree 1 to the data you can then extract
average setup time and average processing time per pixel.

That's information that actually interests you. The 'raw' data you
posted above are just individual samples and are therefore only of
limited meaning. It's the large amount of samples and the averaging of
these that gives you statistical meaning.


You could argue that the test application should implement all this
itself, but I would strongly disagree with that. We've got GnuPlot,
Maple, MATLAB, Mathematica, Octave, SAGE, and probably a dozen of other
tools that do data analysis way better than we ever could.



> I think that this program will be more useful
> if the user can control the whole situation more tightly. But if you
> need this kind of test, then it's OK to me.
I hope the explanation above reveals why you don't want the user to
control the situation 'more tightly' :)



With best wishes,
Tobias


> Best regards,
> Hyungwon Hwang

--
To unsubscribe from this list: send the line "unsubscribe linux-samsung-soc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Hyungwon Hwang Nov. 11, 2015, 1:24 a.m. UTC | #5
On Tue, 10 Nov 2015 14:23:51 +0100
Tobias Jakobi <tjakobi@math.uni-bielefeld.de> wrote:

> Hello Hyungwon,
> 
> 
> Hyungwon Hwang wrote:
> > Hello Tobias,
> > 
> > On Mon, 09 Nov 2015 10:47:13 +0100
> > Tobias Jakobi <tjakobi@math.uni-bielefeld.de> wrote:
> > 
> >> Hello Hyungwon,
> >>
> >>
> >> Hyungwon Hwang wrote:
> >>> Hello,
> >>>
> >>> I think this patch should update .gitignore, not for adding the
> >>> built binary to untracked file list.
> >> Thanks!
> >>
> >>
> >>> Also, I want to make clear about the purpose of this test program.
> >>> What do you want to get after this test? This program runs G2D
> >>> with randomly chosen number of pixel and shows the elapsed time
> >>> to do that. I run it on my board. But I could not find any
> >>> meaning of the test. If you just want to know the execution time
> >>> of solid fill, what about get the width and height from user and
> >>> run the same tests iteratively for more accurate result? Or at
> >>> least, increasing number of pixels?
> >> The test is to measure the dependency between amount of pixels the
> >> G2D has to process and the amount of time for the G2D to process
> >> such pixels.
> >>
> >> It's exactly what a performance test should do, measure the time it
> >> takes for a certain workload to complete.
> >>
> >> In particular the test wants to answer the question if the
> >> dependency stated above is of linear type.
> >>
> >> Of course it's not, since we have setup time, so at least it
> >> should be affine linear. But even that is not true, since you see
> >> subtle 'branching' when doing high density plots (that's why I
> >> added export of the data to Mathematica).
> >>
> >>
> >> What you ask for (user input) is in fact already implemented. The
> >> user can specify the buffer width and height, which in turn limits
> >> the size of the rectangle that is solid filled.
> >>
> >> If you want smaller rectangles filled, decrease buffer width and
> >> height, if you want bigger ones filled, increase.
> >>
> >>
> >> The second purpose is to stress test the G2D, as already indicated
> >> in the commit description. The G2D can be overclocked quite a lot
> >> under certain conditions. With increase MIF/INT voltages I can run
> >> it with 400MHz instead of the 200MHz defaults. The application can
> >> now be used to check stability. E.g. if voltages are too low the
> >> system can quickly lock-up.
> >>
> >> In particular one could also check how processing time depends on
> >> the clock rate of the G2D. One interesting question here is how
> >> memory bandwidth limits us.
> >>
> >>
> >>
> >> With best wishes,
> >> Tobias
> > 
> > Yes. I agree with the broad view. Please see the below, I run the
> > test 2 times in a row.
> > 
> > root@localhost:~# ./exynos_fimg2d_perf  -i 10 -w 1024 -h 1024   
> > exynos/fimg2d: G2D version (4.1).
> > starting simple G2D performance test
> > buffer width = 1024, buffer height = 1024, iterations = 10
> > num_pixels = 136000, usecs = 236000
> > num_pixels = 8492, usecs = 47083
> > num_pixels = 100688, usecs = 200042
> > num_pixels = 141312, usecs = 216667
> > num_pixels = 39962, usecs = 92708
> > num_pixels = 95046, usecs = 156542
> > num_pixels = 2562, usecs = 34666
> > num_pixels = 176485, usecs = 326916
> > num_pixels = 17760, usecs = 56625
> > num_pixels = 1625, usecs = 31833
> > starting multi G2D performance test (batch size = 3)
> > buffer width = 1024, buffer height = 1024, iterations = 10
> > num_pixels = 245180, usecs = 385083
> > num_pixels = 276320, usecs = 398625
> > num_pixels = 196807, usecs = 356666
> > num_pixels = 305540, usecs = 420458
> > num_pixels = 65978, usecs = 120250
> > num_pixels = 265028, usecs = 379417
> > num_pixels = 139079, usecs = 213667
> > num_pixels = 24970, usecs = 67625
> > num_pixels = 46808, usecs = 114125
> > num_pixels = 100804, usecs = 179750
> > root@localhost:~# ./exynos_fimg2d_perf  -i 10 -w 1024 -h 1024 
> > exynos/fimg2d: G2D version (4.1).
> > starting simple G2D performance test
> > buffer width = 1024, buffer height = 1024, iterations = 10
> > num_pixels = 18676, usecs = 95541
> > num_pixels = 117056, usecs = 218875
> > num_pixels = 80784, usecs = 137209
> > num_pixels = 427, usecs = 33209
> > num_pixels = 238044, usecs = 403041
> > num_pixels = 4392, usecs = 37709
> > num_pixels = 19880, usecs = 59750
> > num_pixels = 3666, usecs = 36542
> > num_pixels = 4630, usecs = 36166
> > num_pixels = 70834, usecs = 125917
> > starting multi G2D performance test (batch size = 3)
> > buffer width = 1024, buffer height = 1024, iterations = 10
> > num_pixels = 216516, usecs = 347042
> > num_pixels = 242863, usecs = 422417
> > num_pixels = 28176, usecs = 72292
> > num_pixels = 110713, usecs = 179167
> > num_pixels = 292266, usecs = 431750
> > num_pixels = 274127, usecs = 392833
> > num_pixels = 291659, usecs = 415875
> > num_pixels = 140202, usecs = 218833
> > num_pixels = 122400, usecs = 193084
> > num_pixels = 168647, usecs = 251375
> > 
> > As you said, I can adjust the buffer width and height. But because
> > the program choose the number of pixel to process randomly, I can't
> > compare the result after I modified something (clock, or something
> > else like you mentioned).
> I have trouble following you here. It seems to be that by 'compare'
> you mean that you want to compare performance using these numbers by
> hand.
> 
> That's of course a real pain in the ass, and I would never recommend
> it!
> 
> 
> 
> > Also I can figure out the tendency between the number
> > of pixels and the processing time after I draw the graph. But it is
> > too hard not by doing that, because the number of pixels are not in
> > increasing order at least.
> That's why you don't do analysis of large data sets yourself.
> 
> The intended way is to feed the data in your program of choice and
> then apply various methods there.
> 
> This e.g. is a plot done via Mathematica:
> https://www.math.uni-bielefeld.de/~tjakobi/exynos/g2d_clear_perf.pdf
> 
> By fitting a polynomial of degree 1 to the data you can then extract
> average setup time and average processing time per pixel.
> 
> That's information that actually interests you. The 'raw' data you
> posted above are just individual samples and are therefore only of
> limited meaning. It's the large amount of samples and the averaging of
> these that gives you statistical meaning.
> 
> 
> You could argue that the test application should implement all this
> itself, but I would strongly disagree with that. We've got GnuPlot,
> Maple, MATLAB, Mathematica, Octave, SAGE, and probably a dozen of
> other tools that do data analysis way better than we ever could.
> 
> 
> 
> > I think that this program will be more useful
> > if the user can control the whole situation more tightly. But if you
> > need this kind of test, then it's OK to me.
> I hope the explanation above reveals why you don't want the user to
> control the situation 'more tightly' :)

Yes. I could understand what you intended and find where you stand
about this issue. Thanks for kind explanation.


Tested-by: Hyungwon Hwang <human.hwang@samsung.com>
Reviewed-by: Hyungwon Hwang <human.hwang@samsung.com>

Best regards,
Hyungwon Hwang


> 
> 
> 
> With best wishes,
> Tobias
> 
> 
> > Best regards,
> > Hyungwon Hwang
> 
> --
> To unsubscribe from this list: send the line "unsubscribe
> linux-samsung-soc" in the body of a message to
> majordomo@vger.kernel.org More majordomo info at
> http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line "unsubscribe linux-samsung-soc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/tests/exynos/Makefile.am b/tests/exynos/Makefile.am
index b21d016..e82d199 100644
--- a/tests/exynos/Makefile.am
+++ b/tests/exynos/Makefile.am
@@ -5,16 +5,31 @@  AM_CFLAGS = \
 	-I $(top_srcdir)/exynos \
 	-I $(top_srcdir)
 
+bin_PROGRAMS =
+noinst_PROGRAMS =
+
 if HAVE_LIBKMS
 if HAVE_INSTALL_TESTS
-bin_PROGRAMS = \
+bin_PROGRAMS += \
 	exynos_fimg2d_test
 else
-noinst_PROGRAMS = \
+noinst_PROGRAMS += \
 	exynos_fimg2d_test
 endif
 endif
 
+if HAVE_INSTALL_TESTS
+bin_PROGRAMS += \
+	exynos_fimg2d_perf
+else
+noinst_PROGRAMS += \
+	exynos_fimg2d_perf
+endif
+
+exynos_fimg2d_perf_LDADD = \
+	$(top_builddir)/libdrm.la \
+	$(top_builddir)/exynos/libdrm_exynos.la
+
 exynos_fimg2d_test_LDADD = \
 	$(top_builddir)/libdrm.la \
 	$(top_builddir)/libkms/libkms.la \
diff --git a/tests/exynos/exynos_fimg2d_perf.c b/tests/exynos/exynos_fimg2d_perf.c
new file mode 100644
index 0000000..1699bba
--- /dev/null
+++ b/tests/exynos/exynos_fimg2d_perf.c
@@ -0,0 +1,327 @@ 
+/*
+ * Copyright (C) 2015 - Tobias Jakobi
+ *
+ * This is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation, either version 2 of the License,
+ * or (at your option) any later version.
+ *
+ * It is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ * You should have received a copy of the GNU General Public License
+ * along with it. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <time.h>
+#include <getopt.h>
+#include <errno.h>
+
+#include <xf86drm.h>
+
+#include "exynos_drm.h"
+#include "exynos_drmif.h"
+#include "exynos_fimg2d.h"
+
+static int output_mathematica = 0;
+
+static int fimg2d_perf_simple(struct exynos_bo *bo, struct g2d_context *ctx,
+			unsigned buf_width, unsigned buf_height, unsigned iterations)
+{
+	struct timespec tspec = { 0 };
+	struct g2d_image img = { 0 };
+
+	unsigned long long g2d_time;
+	unsigned i;
+	int ret = 0;
+
+	img.width = buf_width;
+	img.height = buf_height;
+	img.stride = buf_width * 4;
+	img.color_mode = G2D_COLOR_FMT_ARGB8888 | G2D_ORDER_AXRGB;
+	img.buf_type = G2D_IMGBUF_GEM;
+	img.bo[0] = bo->handle;
+
+	srand(time(NULL));
+
+	printf("starting simple G2D performance test\n");
+	printf("buffer width = %u, buffer height = %u, iterations = %u\n",
+		buf_width, buf_height, iterations);
+
+	if (output_mathematica)
+		putchar('{');
+
+	for (i = 0; i < iterations; ++i) {
+		unsigned x, y, w, h;
+
+		x = rand() % buf_width;
+		y = rand() % buf_height;
+
+		if (x == (buf_width - 1))
+			x -= 1;
+		if (y == (buf_height - 1))
+			y -= 1;
+
+		w = rand() % (buf_width - x);
+		h = rand() % (buf_height - y);
+
+		if (w == 0) w = 1;
+		if (h == 0) h = 1;
+
+		img.color = rand();
+
+		ret = g2d_solid_fill(ctx, &img, x, y, w, h);
+
+		clock_gettime(CLOCK_MONOTONIC, &tspec);
+
+		if (ret == 0)
+			ret = g2d_exec(ctx);
+
+		if (ret != 0) {
+			fprintf(stderr, "error: iteration %u failed (x = %u, y = %u, w = %u, h = %u)\n",
+				i, x, y, w, h);
+			break;
+		} else {
+			struct timespec end = { 0 };
+			clock_gettime(CLOCK_MONOTONIC, &end);
+
+			g2d_time = (end.tv_sec - tspec.tv_sec) * 1000000000ULL;
+			g2d_time += (end.tv_nsec - tspec.tv_nsec);
+
+			if (output_mathematica) {
+				if (i != 0) putchar(',');
+				printf("{%u,%llu}", w * h, g2d_time);
+			} else {
+				printf("num_pixels = %u, usecs = %llu\n", w * h, g2d_time);
+			}
+		}
+	}
+
+	if (output_mathematica)
+		printf("}\n");
+
+	return ret;
+}
+
+static int fimg2d_perf_multi(struct exynos_bo *bo, struct g2d_context *ctx,
+			unsigned buf_width, unsigned buf_height, unsigned iterations, unsigned batch)
+{
+	struct timespec tspec = { 0 };
+	struct g2d_image *images;
+
+	unsigned long long g2d_time;
+	unsigned i, j;
+	int ret = 0;
+
+	images = calloc(batch, sizeof(struct g2d_image));
+	if (images == NULL) {
+		fprintf(stderr, "error: failed to allocate G2D images.\n");
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < batch; ++i) {
+		images[i].width = buf_width;
+		images[i].height = buf_height;
+		images[i].stride = buf_width * 4;
+		images[i].color_mode = G2D_COLOR_FMT_ARGB8888 | G2D_ORDER_AXRGB;
+		images[i].buf_type = G2D_IMGBUF_GEM;
+		images[i].bo[0] = bo->handle;
+	}
+
+	srand(time(NULL));
+
+	printf("starting multi G2D performance test (batch size = %u)\n", batch);
+	printf("buffer width = %u, buffer height = %u, iterations = %u\n",
+		buf_width, buf_height, iterations);
+
+	if (output_mathematica)
+		putchar('{');
+
+	for (i = 0; i < iterations; ++i) {
+		unsigned num_pixels = 0;
+
+		for (j = 0; j < batch; ++j) {
+			unsigned x, y, w, h;
+
+			x = rand() % buf_width;
+			y = rand() % buf_height;
+
+			if (x == (buf_width - 1))
+				x -= 1;
+			if (y == (buf_height - 1))
+				y -= 1;
+
+			w = rand() % (buf_width - x);
+			h = rand() % (buf_height - y);
+
+			if (w == 0) w = 1;
+			if (h == 0) h = 1;
+
+			images[j].color = rand();
+
+			num_pixels += w * h;
+
+			ret = g2d_solid_fill(ctx, &images[j], x, y, w, h);
+			if (ret != 0)
+				break;
+		}
+
+		clock_gettime(CLOCK_MONOTONIC, &tspec);
+
+		if (ret == 0)
+			ret = g2d_exec(ctx);
+
+		if (ret != 0) {
+			fprintf(stderr, "error: iteration %u failed (num_pixels = %u)\n", i, num_pixels);
+			break;
+		} else {
+			struct timespec end = { 0 };
+			clock_gettime(CLOCK_MONOTONIC, &end);
+
+			g2d_time = (end.tv_sec - tspec.tv_sec) * 1000000000ULL;
+			g2d_time += (end.tv_nsec - tspec.tv_nsec);
+
+			if (output_mathematica) {
+				if (i != 0) putchar(',');
+				printf("{%u,%llu}", num_pixels, g2d_time);
+			} else {
+				printf("num_pixels = %u, usecs = %llu\n", num_pixels, g2d_time);
+			}
+		}
+	}
+
+	if (output_mathematica)
+		printf("}\n");
+
+	free(images);
+
+	return ret;
+}
+
+static void usage(const char *name)
+{
+	fprintf(stderr, "usage: %s [-ibwh]\n\n", name);
+
+	fprintf(stderr, "\t-i <number of iterations>\n");
+	fprintf(stderr, "\t-b <size of a batch> (default = 3)\n\n");
+
+	fprintf(stderr, "\t-w <buffer width> (default = 4096)\n");
+	fprintf(stderr, "\t-h <buffer height> (default = 4096)\n\n");
+
+	fprintf(stderr, "\t-M <enable Mathematica styled output>\n");
+
+	exit(0);
+}
+
+int main(int argc, char **argv)
+{
+	int fd, ret, c, parsefail;
+
+	struct exynos_device *dev;
+	struct g2d_context *ctx;
+	struct exynos_bo *bo;
+
+	unsigned int iters = 0, batch = 3;
+	unsigned int bufw = 4096, bufh = 4096;
+
+	ret = 0;
+	parsefail = 0;
+
+	while ((c = getopt(argc, argv, "i:b:w:h:M")) != -1) {
+		switch (c) {
+		case 'i':
+			if (sscanf(optarg, "%u", &iters) != 1)
+				parsefail = 1;
+			break;
+		case 'b':
+			if (sscanf(optarg, "%u", &batch) != 1)
+				parsefail = 1;
+			break;
+		case 'w':
+			if (sscanf(optarg, "%u", &bufw) != 1)
+				parsefail = 1;
+			break;
+		case 'h':
+			if (sscanf(optarg, "%u", &bufh) != 1)
+				parsefail = 1;
+			break;
+		case 'M':
+			output_mathematica = 1;
+			break;
+		default:
+			parsefail = 1;
+			break;
+		}
+	}
+
+	if (parsefail || (argc == 1) || (iters == 0))
+		usage(argv[0]);
+
+	if (bufw < 2 || bufw > 4096 || bufh < 2 || bufh > 4096) {
+		fprintf(stderr, "error: buffer width/height should be in the range 2 to 4096.\n");
+		ret = -1;
+
+		goto out;
+	}
+
+	if (bufw == 0 || bufh == 0) {
+		fprintf(stderr, "error: buffer width/height should be non-zero.\n");
+		ret = -1;
+
+		goto out;
+	}
+
+	fd = drmOpen("exynos", NULL);
+	if (fd < 0) {
+		fprintf(stderr, "error: failed to open drm\n");
+		ret = -1;
+
+		goto out;
+	}
+
+	dev = exynos_device_create(fd);
+	if (dev == NULL) {
+		fprintf(stderr, "error: failed to create device\n");
+		ret = -2;
+
+		goto fail;
+	}
+
+	ctx = g2d_init(fd);
+	if (ctx == NULL) {
+		fprintf(stderr, "error: failed to init G2D\n");
+		ret = -3;
+
+		goto g2d_fail;
+	}
+
+	bo = exynos_bo_create(dev, bufw * bufh * 4, 0);
+	if (bo == NULL) {
+		fprintf(stderr, "error: failed to create bo\n");
+		ret = -4;
+
+		goto bo_fail;
+	}
+
+	ret = fimg2d_perf_simple(bo, ctx, bufw, bufh, iters);
+
+	if (ret == 0)
+		ret = fimg2d_perf_multi(bo, ctx, bufw, bufh, iters, batch);
+
+	exynos_bo_destroy(bo);
+
+bo_fail:
+	g2d_fini(ctx);
+
+g2d_fail:
+	exynos_device_destroy(dev);
+
+fail:
+	drmClose(fd);
+
+out:
+	return ret;
+}