Message ID | 1395227586-18623-4-git-send-email-tvrtko.ursulin@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Wed, Mar 19, 2014 at 04:13:06AM -0700, Tvrtko Ursulin wrote: > From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > > This adds a small benchmark for the new userptr functionality. > > Apart from basic surface creation and destruction, also tested is the > impact of having userptr surfaces in the process address space. Reason > for that is the impact of MMU notifiers on common address space > operations like munmap() which is per process. > > v2: > * Moved to benchmarks. > * Added pointer read/write tests. > * Changed output to say iterations per second instead of > operations per second. > * Multiply result by batch size for multi-create* tests > for a more comparable number with create-destroy test. > > v3: > * Fixed ioctl detection on kernels without MMU_NOTIFIERs. > > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > Cc: Chris Wilson <chris@chris-wilson.co.uk> > --- > Android.mk | 3 +- > benchmarks/.gitignore | 1 + > benchmarks/Android.mk | 36 +++ > benchmarks/Makefile.am | 7 +- > benchmarks/Makefile.sources | 6 + > benchmarks/gem_userptr_benchmark.c | 513 +++++++++++++++++++++++++++++++++++++ > 6 files changed, 558 insertions(+), 8 deletions(-) > create mode 100644 benchmarks/Android.mk > create mode 100644 benchmarks/Makefile.sources > create mode 100644 benchmarks/gem_userptr_benchmark.c > > diff --git a/Android.mk b/Android.mk > index 8aeb2d4..0c969b8 100644 > --- a/Android.mk > +++ b/Android.mk > @@ -1,2 +1 @@ > -include $(call all-named-subdir-makefiles, lib tests tools) > - > +include $(call all-named-subdir-makefiles, lib tests tools benchmarks) > diff --git a/benchmarks/.gitignore b/benchmarks/.gitignore > index ddea6f7..09e5bd8 100644 > --- a/benchmarks/.gitignore > +++ b/benchmarks/.gitignore > @@ -1,3 +1,4 @@ > +gem_userptr_benchmark > intel_upload_blit_large > intel_upload_blit_large_gtt > intel_upload_blit_large_map > diff --git a/benchmarks/Android.mk b/benchmarks/Android.mk > new file mode 100644 > index 0000000..5bb8ef5 > --- /dev/null > +++ b/benchmarks/Android.mk > @@ -0,0 +1,36 @@ > +LOCAL_PATH := $(call my-dir) > + > +include $(LOCAL_PATH)/Makefile.sources > + > +#================# > + > +define add_benchmark > + include $(CLEAR_VARS) > + > + LOCAL_SRC_FILES := $1.c > + > + LOCAL_CFLAGS += -DHAVE_STRUCT_SYSINFO_TOTALRAM > + LOCAL_CFLAGS += -DANDROID -UNDEBUG -include "check-ndebug.h" > + LOCAL_CFLAGS += -std=c99 > + # FIXME: drop once Bionic correctly annotates "noreturn" on pthread_exit > + LOCAL_CFLAGS += -Wno-error=return-type > + # Excessive complaining for established cases. Rely on the Linux version warnings. > + LOCAL_CFLAGS += -Wno-sign-compare > + > + LOCAL_MODULE := $1 > + LOCAL_MODULE_TAGS := optional > + > + LOCAL_STATIC_LIBRARIES := libintel_gpu_tools > + > + LOCAL_SHARED_LIBRARIES := libpciaccess \ > + libdrm \ > + libdrm_intel > + > + include $(BUILD_EXECUTABLE) > +endef > + > +#================# > + > +benchmark_list := $(bin_PROGRAMS) > + > +$(foreach item,$(benchmark_list),$(eval $(call add_benchmark,$(item)))) > diff --git a/benchmarks/Makefile.am b/benchmarks/Makefile.am > index e2ad784..d173bf4 100644 > --- a/benchmarks/Makefile.am > +++ b/benchmarks/Makefile.am > @@ -1,9 +1,4 @@ > - > -bin_PROGRAMS = \ > - intel_upload_blit_large \ > - intel_upload_blit_large_gtt \ > - intel_upload_blit_large_map \ > - intel_upload_blit_small > +include Makefile.sources > > AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/lib > AM_CFLAGS = $(DRM_CFLAGS) $(CWARNFLAGS) $(CAIRO_CFLAGS) > diff --git a/benchmarks/Makefile.sources b/benchmarks/Makefile.sources > new file mode 100644 > index 0000000..fd6c107 > --- /dev/null > +++ b/benchmarks/Makefile.sources > @@ -0,0 +1,6 @@ > +bin_PROGRAMS = \ > + intel_upload_blit_large \ > + intel_upload_blit_large_gtt \ > + intel_upload_blit_large_map \ > + intel_upload_blit_small \ > + gem_userptr_benchmark You might split the makefile cleanup aspect of this into a separate patch, but I'm fine either way. > diff --git a/benchmarks/gem_userptr_benchmark.c b/benchmarks/gem_userptr_benchmark.c > new file mode 100644 > index 0000000..218f6f1 > --- /dev/null > +++ b/benchmarks/gem_userptr_benchmark.c > @@ -0,0 +1,513 @@ > +/* > + * Copyright © 2014 Intel Corporation > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice (including the next > + * paragraph) shall be included in all copies or substantial portions of the > + * Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS > + * IN THE SOFTWARE. > + * > + * Authors: > + * Tvrtko Ursulin <tvrtko.ursulin@intel.com> > + * > + */ > + > +/** @file gem_userptr_benchmark.c > + * > + * Benchmark the userptr code and impact of having userptr surfaces > + * in process address space on some normal operations. > + * > + */ > + > +#include <stdlib.h> > +#include <stdio.h> > +#include <string.h> > +#include <fcntl.h> > +#include <inttypes.h> > +#include <errno.h> > +#include <sys/stat.h> > +#include <sys/time.h> > +#include <sys/mman.h> > +#include "drm.h" > +#include "i915_drm.h" > +#include "drmtest.h" > +#include "intel_bufmgr.h" > +#include "intel_batchbuffer.h" > +#include "intel_gpu_tools.h" > + > +#define WIDTH 128 > +#define HEIGHT 128 > +#define PAGE_SIZE 4096 > + > +#define LOCAL_I915_GEM_USERPTR 0x34 > +#define LOCAL_IOCTL_I915_GEM_USERPTR DRM_IOWR (DRM_COMMAND_BASE + LOCAL_I915_GEM_USERPTR, struct local_i915_gem_userptr) > +struct local_i915_gem_userptr { > + uint64_t user_ptr; > + uint64_t user_size; > + uint32_t flags; > +#define I915_USERPTR_READ_ONLY (1<<0) > +#define I915_USERPTR_UNSYNCHRONIZED (1<<31) > + uint32_t handle; > +}; > + > +static uint32_t userptr_flags = I915_USERPTR_UNSYNCHRONIZED; > + > +static uint32_t linear[WIDTH*HEIGHT]; I may have missed it, but I don't think we use this variable except to do sizeof(linear). If that's the case, a constant for the buffer size might make the code more readable. > + > +static void gem_userptr_test_unsynchronized(void) > +{ > + userptr_flags = I915_USERPTR_UNSYNCHRONIZED; > +} > + > +static void gem_userptr_test_synchronized(void) > +{ > + userptr_flags = 0; > +} > + > +static int gem_userptr(int fd, void *ptr, int size, int read_only, uint32_t *handle) > +{ > + struct local_i915_gem_userptr userptr; > + int ret; > + > + userptr.user_ptr = (uintptr_t)ptr; > + userptr.user_size = size; > + userptr.flags = userptr_flags; > + if (read_only) > + userptr.flags |= I915_USERPTR_READ_ONLY; > + > + ret = drmIoctl(fd, LOCAL_IOCTL_I915_GEM_USERPTR, &userptr); > + if (ret) > + ret = errno; > + igt_skip_on_f(ret == ENODEV && > + (userptr_flags & I915_USERPTR_UNSYNCHRONIZED) == 0, > + "Skipping, synchronized mappings with no kernel CONFIG_MMU_NOTIFIER?"); > + if (ret == 0) > + *handle = userptr.handle; > + > + return ret; > +} > + > +static uint32_t > +create_userptr(int fd, uint32_t val, uint32_t *ptr) > +{ > + uint32_t handle; > + int i, ret; > + > + ret = gem_userptr(fd, ptr, sizeof(linear), 0, &handle); > + igt_assert(ret == 0); > + igt_assert(handle != 0); > + > + /* Fill the BO with dwords starting at val */ > + for (i = 0; i < WIDTH*HEIGHT; i++) > + ptr[i] = val++; > + > + return handle; > +} I don't see that this function is used in this test. > + > +static void **handle_ptr_map; > +static unsigned int num_handle_ptr_map; I'd prefer that we explicitly initialize at least num_handle_ptr_map. > + > +static void add_handle_ptr(uint32_t handle, void *ptr) > +{ > + if (handle >= num_handle_ptr_map) { > + handle_ptr_map = realloc(handle_ptr_map, > + (handle + 1000) * sizeof(void*)); > + num_handle_ptr_map = handle + 1000; > + } > + > + handle_ptr_map[handle] = ptr; > +} > + > +static void *get_handle_ptr(uint32_t handle) > +{ > + return handle_ptr_map[handle]; > +} > + > +static void free_handle_ptr(uint32_t handle) > +{ > + igt_assert(handle < num_handle_ptr_map); > + igt_assert(handle_ptr_map[handle]); > + > + free(handle_ptr_map[handle]); > + handle_ptr_map[handle] = NULL; > +} > + > +static uint32_t create_userptr_bo(int fd, int size) > +{ > + void *ptr; > + uint32_t handle; > + int ret; > + > + ret = posix_memalign(&ptr, PAGE_SIZE, size); > + igt_assert(ret == 0); > + > + ret = gem_userptr(fd, (uint32_t *)ptr, size, 0, &handle); > + igt_assert(ret == 0); > + add_handle_ptr(handle, ptr); > + > + return handle; > +} > + > +static void free_userptr_bo(int fd, uint32_t handle) > +{ > + gem_close(fd, handle); > + free_handle_ptr(handle); > +} > + > +static int has_userptr(int fd) > +{ > + uint32_t handle = 0; > + void *ptr; > + uint32_t oldflags; > + int ret; > + > + assert(posix_memalign(&ptr, PAGE_SIZE, PAGE_SIZE) == 0); > + oldflags = userptr_flags; > + gem_userptr_test_unsynchronized(); > + ret = gem_userptr(fd, ptr, PAGE_SIZE, 0, &handle); > + userptr_flags = oldflags; > + if (ret != 0) { > + free(ptr); > + return 0; > + } > + > + gem_close(fd, handle); > + free(ptr); > + > + return handle != 0; > +} > + > +static const unsigned int nr_bos[] = {0, 1, 10, 100, 1000}; > +static const unsigned int test_duration_sec = 3; > + > +static volatile unsigned int run_test; > + > +static void alarm_handler(int sig) > +{ > + assert(run_test == 1); > + run_test = 0; > +} > + > +static void start_test(unsigned int duration) > +{ > + run_test = 1; > + if (duration == 0) > + duration = test_duration_sec; > + signal(SIGALRM, alarm_handler); > + alarm(duration); > +} > + > +static void exchange_ptr(void *array, unsigned i, unsigned j) > +{ > + void **arr, *tmp; > + arr = (void **)array; > + > + tmp = arr[i]; > + arr[i] = arr[j]; > + arr[j] = tmp; > +} > + > +static void test_malloc_free(int random) > +{ > + unsigned long iter = 0; > + unsigned int i, tot = 1000; > + void *ptr[tot]; > + > + start_test(test_duration_sec); > + > + while (run_test) { > + for (i = 0; i < tot; i++) { > + ptr[i] = malloc(1000); > + assert(ptr[i]); > + } > + if (random) > + igt_permute_array(ptr, tot, exchange_ptr); > + for (i = 0; i < tot; i++) > + free(ptr[i]); > + iter++; > + } > + > + printf("%8lu iter/s\n", iter / test_duration_sec); > +} > + > +static void test_malloc_realloc_free(int random) > +{ > + unsigned long iter = 0; > + unsigned int i, tot = 1000; > + void *ptr[tot]; > + > + start_test(test_duration_sec); > + > + while (run_test) { > + for (i = 0; i < tot; i++) { > + ptr[i] = malloc(1000); > + assert(ptr[i]); > + } > + if (random) > + igt_permute_array(ptr, tot, exchange_ptr); > + for (i = 0; i < tot; i++) { > + ptr[i] = realloc(ptr[i], 2000); > + assert(ptr[i]); > + } > + if (random) > + igt_permute_array(ptr, tot, exchange_ptr); > + for (i = 0; i < tot; i++) > + free(ptr[i]); > + iter++; > + } > + > + printf("%8lu iter/s\n", iter / test_duration_sec); > +} > + > +static void test_mmap_unmap(int random) > +{ > + unsigned long iter = 0; > + unsigned int i, tot = 1000; > + void *ptr[tot]; > + > + start_test(test_duration_sec); > + > + while (run_test) { > + for (i = 0; i < tot; i++) { > + ptr[i] = mmap(NULL, 1000, PROT_READ | PROT_WRITE, > + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); > + assert(ptr[i] != MAP_FAILED); > + } > + if (random) > + igt_permute_array(ptr, tot, exchange_ptr); > + for (i = 0; i < tot; i++) > + munmap(ptr[i], 1000); > + iter++; > + } > + > + printf("%8lu iter/s\n", iter / test_duration_sec); > +} > + > +static void test_ptr_read(void *ptr) > +{ > + unsigned long iter = 0; > + volatile unsigned long *p; > + unsigned long i, loops; > + register unsigned long v; > + > + loops = sizeof(linear) / sizeof(unsigned long) / 4; > + > + start_test(test_duration_sec); > + > + while (run_test) { > + p = (unsigned long *)ptr; > + for (i = 0; i < loops; i++) { > + v = *p++; > + v = *p++; > + v = *p++; > + v = *p++; > + } > + iter++; > + } > + > + printf("%8lu MB/s\n", iter / test_duration_sec * sizeof(linear) / 1000000); > +} > + > +static void test_ptr_write(void *ptr) > +{ > + unsigned long iter = 0; > + volatile unsigned long *p; > + register unsigned long i, loops; > + > + loops = sizeof(linear) / sizeof(unsigned long) / 4; > + > + start_test(test_duration_sec); > + > + while (run_test) { > + p = (unsigned long *)ptr; > + for (i = 0; i < loops; i++) { > + *p++ = i; > + *p++ = i; > + *p++ = i; > + *p++ = i; > + } > + iter++; > + } > + > + printf("%8lu MB/s\n", iter / test_duration_sec * sizeof(linear) / 1000000); > +} > + > +static void test_impact(int fd) > +{ > + unsigned int total = sizeof(nr_bos) / sizeof(nr_bos[0]); > + unsigned int subtest, i; > + uint32_t handles[nr_bos[total-1]]; > + void *ptr; > + char buffer[sizeof(linear)]; > + > + for (subtest = 0; subtest < total; subtest++) { > + for (i = 0; i < nr_bos[subtest]; i++) > + handles[i] = create_userptr_bo(fd, sizeof(linear)); > + > + if (nr_bos[subtest] > 0) > + ptr = get_handle_ptr(handles[0]); > + else > + ptr = buffer; > + > + printf("ptr-read, %5u bos = ", nr_bos[subtest]); > + test_ptr_read(ptr); > + > + printf("ptr-write %5u bos = ", nr_bos[subtest]); > + test_ptr_write(ptr); > + > + printf("malloc-free, %5u bos = ", nr_bos[subtest]); > + test_malloc_free(0); > + printf("malloc-free-random %5u bos = ", nr_bos[subtest]); > + test_malloc_free(1); > + > + printf("malloc-realloc-free, %5u bos = ", nr_bos[subtest]); > + test_malloc_realloc_free(0); > + printf("malloc-realloc-free-random, %5u bos = ", nr_bos[subtest]); > + test_malloc_realloc_free(1); > + > + printf("mmap-unmap, %5u bos = ", nr_bos[subtest]); > + test_mmap_unmap(0); > + printf("mmap-unmap-random, %5u bos = ", nr_bos[subtest]); > + test_mmap_unmap(1); > + > + for (i = 0; i < nr_bos[subtest]; i++) > + free_userptr_bo(fd, handles[i]); > + } > +} > + > +static void test_single(int fd) > +{ > + char *ptr, *bo_ptr; > + uint32_t handle = 0; > + unsigned long iter = 0; > + int ret; > + unsigned long map_size = sizeof(linear) + PAGE_SIZE - 1; > + > + ptr = mmap(NULL, map_size, PROT_READ | PROT_WRITE, > + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); > + assert(ptr != MAP_FAILED); > + > + bo_ptr = (char *)(((unsigned long)ptr + (PAGE_SIZE - 1)) > + & ~(PAGE_SIZE - 1)); You might add an ALIGN macro in this file or a suitable header. A couple of .c files in lib/ individually define one already. Brad > + > + start_test(test_duration_sec); > + > + while (run_test) { > + ret = gem_userptr(fd, bo_ptr, sizeof(linear), 0, &handle); > + assert(ret == 0); > + gem_close(fd, handle); > + iter++; > + } > + > + munmap(ptr, map_size); > + > + printf("%8lu iter/s\n", iter / test_duration_sec); > +} > + > +static void test_multiple(int fd, unsigned int batch, int random) > +{ > + char *ptr, *bo_ptr; > + uint32_t handles[10000]; > + int map[10000]; > + unsigned long iter = 0; > + int ret; > + int i; > + unsigned long map_size = batch * sizeof(linear) + PAGE_SIZE - 1; > + > + assert(batch < (sizeof(handles) / sizeof(handles[0]))); > + assert(batch < (sizeof(map) / sizeof(map[0]))); > + > + ptr = mmap(NULL, map_size, PROT_READ | PROT_WRITE, > + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); > + assert(ptr != MAP_FAILED); > + > + bo_ptr = (char *)(((unsigned long)ptr + (PAGE_SIZE - 1)) > + & ~(PAGE_SIZE - 1)); > + > + for (i = 0; i < batch; i++) > + map[i] = i; > + > + start_test(test_duration_sec); > + > + while (run_test) { > + if (random) > + igt_permute_array(map, batch, igt_exchange_int); > + for (i = 0; i < batch; i++) { > + ret = gem_userptr(fd, bo_ptr + map[i] * sizeof(linear), > + sizeof(linear), > + 0, &handles[i]); > + assert(ret == 0); > + } > + if (random) > + igt_permute_array(map, batch, igt_exchange_int); > + for (i = 0; i < batch; i++) > + gem_close(fd, handles[map[i]]); > + iter++; > + } > + > + munmap(ptr, map_size); > + > + printf("%8lu iter/s\n", iter * batch / test_duration_sec); > +} > + > +static void test_userptr(int fd) > +{ > + printf("create-destroy = "); > + test_single(fd); > + > + printf("multi-create-destroy = "); > + test_multiple(fd, 100, 0); > + > + printf("multi-create-destroy-random = "); > + test_multiple(fd, 100, 1); > +} > + > +int main(int argc, char **argv) > +{ > + int fd = -1, ret; > + > + igt_skip_on_simulation(); > + > + igt_subtest_init(argc, argv); > + > + fd = drm_open_any(); > + igt_assert(fd >= 0); > + > + ret = has_userptr(fd); > + igt_skip_on_f(ret == 0, "No userptr support - %s (%d)\n", > + strerror(errno), ret); > + > + > + gem_userptr_test_unsynchronized(); > + > + igt_subtest("userptr-unsync") > + test_userptr(fd); > + > + igt_subtest("userptr-impact-unsync") > + test_impact(fd); > + > + gem_userptr_test_synchronized(); > + > + igt_subtest("userptr-sync") > + test_userptr(fd); > + > + igt_subtest("userptr-impact-sync") > + test_impact(fd); > + > + igt_exit(); > + > + return 0; > +} > -- > 1.9.0 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
On Thu, Apr 17, 2014 at 04:18:46PM -0700, Volkin, Bradley D wrote: > On Wed, Mar 19, 2014 at 04:13:06AM -0700, Tvrtko Ursulin wrote: > > + bo_ptr = (char *)(((unsigned long)ptr + (PAGE_SIZE - 1)) > > + & ~(PAGE_SIZE - 1)); > > You might add an ALIGN macro in this file or a suitable header. A couple of > .c files in lib/ individually define one already. lib/drmtest.h (with the gtkdoc for it include) is the current grab-bag for such random useful pieces. We already have an ARRAY_SIZE in there. -Daniel
Hi Brad, On 04/18/2014 12:18 AM, Volkin, Bradley D wrote: > On Wed, Mar 19, 2014 at 04:13:06AM -0700, Tvrtko Ursulin wrote: >> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> >> >> This adds a small benchmark for the new userptr functionality. >> >> Apart from basic surface creation and destruction, also tested is the >> impact of having userptr surfaces in the process address space. Reason >> for that is the impact of MMU notifiers on common address space >> operations like munmap() which is per process. >> >> v2: >> * Moved to benchmarks. >> * Added pointer read/write tests. >> * Changed output to say iterations per second instead of >> operations per second. >> * Multiply result by batch size for multi-create* tests >> for a more comparable number with create-destroy test. >> >> v3: >> * Fixed ioctl detection on kernels without MMU_NOTIFIERs. >> >> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> >> Cc: Chris Wilson <chris@chris-wilson.co.uk> >> --- >> Android.mk | 3 +- >> benchmarks/.gitignore | 1 + >> benchmarks/Android.mk | 36 +++ >> benchmarks/Makefile.am | 7 +- >> benchmarks/Makefile.sources | 6 + >> benchmarks/gem_userptr_benchmark.c | 513 +++++++++++++++++++++++++++++++++++++ >> 6 files changed, 558 insertions(+), 8 deletions(-) >> create mode 100644 benchmarks/Android.mk >> create mode 100644 benchmarks/Makefile.sources >> create mode 100644 benchmarks/gem_userptr_benchmark.c >> >> diff --git a/Android.mk b/Android.mk >> index 8aeb2d4..0c969b8 100644 >> --- a/Android.mk >> +++ b/Android.mk >> @@ -1,2 +1 @@ >> -include $(call all-named-subdir-makefiles, lib tests tools) >> - >> +include $(call all-named-subdir-makefiles, lib tests tools benchmarks) >> diff --git a/benchmarks/.gitignore b/benchmarks/.gitignore >> index ddea6f7..09e5bd8 100644 >> --- a/benchmarks/.gitignore >> +++ b/benchmarks/.gitignore >> @@ -1,3 +1,4 @@ >> +gem_userptr_benchmark >> intel_upload_blit_large >> intel_upload_blit_large_gtt >> intel_upload_blit_large_map >> diff --git a/benchmarks/Android.mk b/benchmarks/Android.mk >> new file mode 100644 >> index 0000000..5bb8ef5 >> --- /dev/null >> +++ b/benchmarks/Android.mk >> @@ -0,0 +1,36 @@ >> +LOCAL_PATH := $(call my-dir) >> + >> +include $(LOCAL_PATH)/Makefile.sources >> + >> +#================# >> + >> +define add_benchmark >> + include $(CLEAR_VARS) >> + >> + LOCAL_SRC_FILES := $1.c >> + >> + LOCAL_CFLAGS += -DHAVE_STRUCT_SYSINFO_TOTALRAM >> + LOCAL_CFLAGS += -DANDROID -UNDEBUG -include "check-ndebug.h" >> + LOCAL_CFLAGS += -std=c99 >> + # FIXME: drop once Bionic correctly annotates "noreturn" on pthread_exit >> + LOCAL_CFLAGS += -Wno-error=return-type >> + # Excessive complaining for established cases. Rely on the Linux version warnings. >> + LOCAL_CFLAGS += -Wno-sign-compare >> + >> + LOCAL_MODULE := $1 >> + LOCAL_MODULE_TAGS := optional >> + >> + LOCAL_STATIC_LIBRARIES := libintel_gpu_tools >> + >> + LOCAL_SHARED_LIBRARIES := libpciaccess \ >> + libdrm \ >> + libdrm_intel >> + >> + include $(BUILD_EXECUTABLE) >> +endef >> + >> +#================# >> + >> +benchmark_list := $(bin_PROGRAMS) >> + >> +$(foreach item,$(benchmark_list),$(eval $(call add_benchmark,$(item)))) >> diff --git a/benchmarks/Makefile.am b/benchmarks/Makefile.am >> index e2ad784..d173bf4 100644 >> --- a/benchmarks/Makefile.am >> +++ b/benchmarks/Makefile.am >> @@ -1,9 +1,4 @@ >> - >> -bin_PROGRAMS = \ >> - intel_upload_blit_large \ >> - intel_upload_blit_large_gtt \ >> - intel_upload_blit_large_map \ >> - intel_upload_blit_small >> +include Makefile.sources >> >> AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/lib >> AM_CFLAGS = $(DRM_CFLAGS) $(CWARNFLAGS) $(CAIRO_CFLAGS) >> diff --git a/benchmarks/Makefile.sources b/benchmarks/Makefile.sources >> new file mode 100644 >> index 0000000..fd6c107 >> --- /dev/null >> +++ b/benchmarks/Makefile.sources >> @@ -0,0 +1,6 @@ >> +bin_PROGRAMS = \ >> + intel_upload_blit_large \ >> + intel_upload_blit_large_gtt \ >> + intel_upload_blit_large_map \ >> + intel_upload_blit_small \ >> + gem_userptr_benchmark > > You might split the makefile cleanup aspect of this into a separate > patch, but I'm fine either way. Good idea, will try to squeeze that in. >> diff --git a/benchmarks/gem_userptr_benchmark.c b/benchmarks/gem_userptr_benchmark.c >> new file mode 100644 >> index 0000000..218f6f1 >> --- /dev/null >> +++ b/benchmarks/gem_userptr_benchmark.c >> @@ -0,0 +1,513 @@ >> +/* >> + * Copyright © 2014 Intel Corporation >> + * >> + * Permission is hereby granted, free of charge, to any person obtaining a >> + * copy of this software and associated documentation files (the "Software"), >> + * to deal in the Software without restriction, including without limitation >> + * the rights to use, copy, modify, merge, publish, distribute, sublicense, >> + * and/or sell copies of the Software, and to permit persons to whom the >> + * Software is furnished to do so, subject to the following conditions: >> + * >> + * The above copyright notice and this permission notice (including the next >> + * paragraph) shall be included in all copies or substantial portions of the >> + * Software. >> + * >> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR >> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, >> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL >> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER >> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING >> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS >> + * IN THE SOFTWARE. >> + * >> + * Authors: >> + * Tvrtko Ursulin <tvrtko.ursulin@intel.com> >> + * >> + */ >> + >> +/** @file gem_userptr_benchmark.c >> + * >> + * Benchmark the userptr code and impact of having userptr surfaces >> + * in process address space on some normal operations. >> + * >> + */ >> + >> +#include <stdlib.h> >> +#include <stdio.h> >> +#include <string.h> >> +#include <fcntl.h> >> +#include <inttypes.h> >> +#include <errno.h> >> +#include <sys/stat.h> >> +#include <sys/time.h> >> +#include <sys/mman.h> >> +#include "drm.h" >> +#include "i915_drm.h" >> +#include "drmtest.h" >> +#include "intel_bufmgr.h" >> +#include "intel_batchbuffer.h" >> +#include "intel_gpu_tools.h" >> + >> +#define WIDTH 128 >> +#define HEIGHT 128 >> +#define PAGE_SIZE 4096 >> + >> +#define LOCAL_I915_GEM_USERPTR 0x34 >> +#define LOCAL_IOCTL_I915_GEM_USERPTR DRM_IOWR (DRM_COMMAND_BASE + LOCAL_I915_GEM_USERPTR, struct local_i915_gem_userptr) >> +struct local_i915_gem_userptr { >> + uint64_t user_ptr; >> + uint64_t user_size; >> + uint32_t flags; >> +#define I915_USERPTR_READ_ONLY (1<<0) >> +#define I915_USERPTR_UNSYNCHRONIZED (1<<31) >> + uint32_t handle; >> +}; >> + >> +static uint32_t userptr_flags = I915_USERPTR_UNSYNCHRONIZED; >> + >> +static uint32_t linear[WIDTH*HEIGHT]; > > I may have missed it, but I don't think we use this variable except > to do sizeof(linear). If that's the case, a constant for the buffer > size might make the code more readable. You're right, too much copy & paste. >> + >> +static void gem_userptr_test_unsynchronized(void) >> +{ >> + userptr_flags = I915_USERPTR_UNSYNCHRONIZED; >> +} >> + >> +static void gem_userptr_test_synchronized(void) >> +{ >> + userptr_flags = 0; >> +} >> + >> +static int gem_userptr(int fd, void *ptr, int size, int read_only, uint32_t *handle) >> +{ >> + struct local_i915_gem_userptr userptr; >> + int ret; >> + >> + userptr.user_ptr = (uintptr_t)ptr; >> + userptr.user_size = size; >> + userptr.flags = userptr_flags; >> + if (read_only) >> + userptr.flags |= I915_USERPTR_READ_ONLY; >> + >> + ret = drmIoctl(fd, LOCAL_IOCTL_I915_GEM_USERPTR, &userptr); >> + if (ret) >> + ret = errno; >> + igt_skip_on_f(ret == ENODEV && >> + (userptr_flags & I915_USERPTR_UNSYNCHRONIZED) == 0, >> + "Skipping, synchronized mappings with no kernel CONFIG_MMU_NOTIFIER?"); >> + if (ret == 0) >> + *handle = userptr.handle; >> + >> + return ret; >> +} >> + >> +static uint32_t >> +create_userptr(int fd, uint32_t val, uint32_t *ptr) >> +{ >> + uint32_t handle; >> + int i, ret; >> + >> + ret = gem_userptr(fd, ptr, sizeof(linear), 0, &handle); >> + igt_assert(ret == 0); >> + igt_assert(handle != 0); >> + >> + /* Fill the BO with dwords starting at val */ >> + for (i = 0; i < WIDTH*HEIGHT; i++) >> + ptr[i] = val++; >> + >> + return handle; >> +} > > I don't see that this function is used in this test. You are right, artifact of this file starting from gem_userptr_blits.c and then removing stuff. Obviously not enough removed. >> + >> +static void **handle_ptr_map; >> +static unsigned int num_handle_ptr_map; > > I'd prefer that we explicitly initialize at least num_handle_ptr_map. To zero, why? >> + >> +static void add_handle_ptr(uint32_t handle, void *ptr) >> +{ >> + if (handle >= num_handle_ptr_map) { >> + handle_ptr_map = realloc(handle_ptr_map, >> + (handle + 1000) * sizeof(void*)); >> + num_handle_ptr_map = handle + 1000; >> + } >> + >> + handle_ptr_map[handle] = ptr; >> +} >> + >> +static void *get_handle_ptr(uint32_t handle) >> +{ >> + return handle_ptr_map[handle]; >> +} >> + >> +static void free_handle_ptr(uint32_t handle) >> +{ >> + igt_assert(handle < num_handle_ptr_map); >> + igt_assert(handle_ptr_map[handle]); >> + >> + free(handle_ptr_map[handle]); >> + handle_ptr_map[handle] = NULL; >> +} >> + >> +static uint32_t create_userptr_bo(int fd, int size) >> +{ >> + void *ptr; >> + uint32_t handle; >> + int ret; >> + >> + ret = posix_memalign(&ptr, PAGE_SIZE, size); >> + igt_assert(ret == 0); >> + >> + ret = gem_userptr(fd, (uint32_t *)ptr, size, 0, &handle); >> + igt_assert(ret == 0); >> + add_handle_ptr(handle, ptr); >> + >> + return handle; >> +} >> + >> +static void free_userptr_bo(int fd, uint32_t handle) >> +{ >> + gem_close(fd, handle); >> + free_handle_ptr(handle); >> +} >> + >> +static int has_userptr(int fd) >> +{ >> + uint32_t handle = 0; >> + void *ptr; >> + uint32_t oldflags; >> + int ret; >> + >> + assert(posix_memalign(&ptr, PAGE_SIZE, PAGE_SIZE) == 0); >> + oldflags = userptr_flags; >> + gem_userptr_test_unsynchronized(); >> + ret = gem_userptr(fd, ptr, PAGE_SIZE, 0, &handle); >> + userptr_flags = oldflags; >> + if (ret != 0) { >> + free(ptr); >> + return 0; >> + } >> + >> + gem_close(fd, handle); >> + free(ptr); >> + >> + return handle != 0; >> +} >> + >> +static const unsigned int nr_bos[] = {0, 1, 10, 100, 1000}; >> +static const unsigned int test_duration_sec = 3; >> + >> +static volatile unsigned int run_test; >> + >> +static void alarm_handler(int sig) >> +{ >> + assert(run_test == 1); >> + run_test = 0; >> +} >> + >> +static void start_test(unsigned int duration) >> +{ >> + run_test = 1; >> + if (duration == 0) >> + duration = test_duration_sec; >> + signal(SIGALRM, alarm_handler); >> + alarm(duration); >> +} >> + >> +static void exchange_ptr(void *array, unsigned i, unsigned j) >> +{ >> + void **arr, *tmp; >> + arr = (void **)array; >> + >> + tmp = arr[i]; >> + arr[i] = arr[j]; >> + arr[j] = tmp; >> +} >> + >> +static void test_malloc_free(int random) >> +{ >> + unsigned long iter = 0; >> + unsigned int i, tot = 1000; >> + void *ptr[tot]; >> + >> + start_test(test_duration_sec); >> + >> + while (run_test) { >> + for (i = 0; i < tot; i++) { >> + ptr[i] = malloc(1000); >> + assert(ptr[i]); >> + } >> + if (random) >> + igt_permute_array(ptr, tot, exchange_ptr); >> + for (i = 0; i < tot; i++) >> + free(ptr[i]); >> + iter++; >> + } >> + >> + printf("%8lu iter/s\n", iter / test_duration_sec); >> +} >> + >> +static void test_malloc_realloc_free(int random) >> +{ >> + unsigned long iter = 0; >> + unsigned int i, tot = 1000; >> + void *ptr[tot]; >> + >> + start_test(test_duration_sec); >> + >> + while (run_test) { >> + for (i = 0; i < tot; i++) { >> + ptr[i] = malloc(1000); >> + assert(ptr[i]); >> + } >> + if (random) >> + igt_permute_array(ptr, tot, exchange_ptr); >> + for (i = 0; i < tot; i++) { >> + ptr[i] = realloc(ptr[i], 2000); >> + assert(ptr[i]); >> + } >> + if (random) >> + igt_permute_array(ptr, tot, exchange_ptr); >> + for (i = 0; i < tot; i++) >> + free(ptr[i]); >> + iter++; >> + } >> + >> + printf("%8lu iter/s\n", iter / test_duration_sec); >> +} >> + >> +static void test_mmap_unmap(int random) >> +{ >> + unsigned long iter = 0; >> + unsigned int i, tot = 1000; >> + void *ptr[tot]; >> + >> + start_test(test_duration_sec); >> + >> + while (run_test) { >> + for (i = 0; i < tot; i++) { >> + ptr[i] = mmap(NULL, 1000, PROT_READ | PROT_WRITE, >> + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); >> + assert(ptr[i] != MAP_FAILED); >> + } >> + if (random) >> + igt_permute_array(ptr, tot, exchange_ptr); >> + for (i = 0; i < tot; i++) >> + munmap(ptr[i], 1000); >> + iter++; >> + } >> + >> + printf("%8lu iter/s\n", iter / test_duration_sec); >> +} >> + >> +static void test_ptr_read(void *ptr) >> +{ >> + unsigned long iter = 0; >> + volatile unsigned long *p; >> + unsigned long i, loops; >> + register unsigned long v; >> + >> + loops = sizeof(linear) / sizeof(unsigned long) / 4; >> + >> + start_test(test_duration_sec); >> + >> + while (run_test) { >> + p = (unsigned long *)ptr; >> + for (i = 0; i < loops; i++) { >> + v = *p++; >> + v = *p++; >> + v = *p++; >> + v = *p++; >> + } >> + iter++; >> + } >> + >> + printf("%8lu MB/s\n", iter / test_duration_sec * sizeof(linear) / 1000000); >> +} >> + >> +static void test_ptr_write(void *ptr) >> +{ >> + unsigned long iter = 0; >> + volatile unsigned long *p; >> + register unsigned long i, loops; >> + >> + loops = sizeof(linear) / sizeof(unsigned long) / 4; >> + >> + start_test(test_duration_sec); >> + >> + while (run_test) { >> + p = (unsigned long *)ptr; >> + for (i = 0; i < loops; i++) { >> + *p++ = i; >> + *p++ = i; >> + *p++ = i; >> + *p++ = i; >> + } >> + iter++; >> + } >> + >> + printf("%8lu MB/s\n", iter / test_duration_sec * sizeof(linear) / 1000000); >> +} >> + >> +static void test_impact(int fd) >> +{ >> + unsigned int total = sizeof(nr_bos) / sizeof(nr_bos[0]); >> + unsigned int subtest, i; >> + uint32_t handles[nr_bos[total-1]]; >> + void *ptr; >> + char buffer[sizeof(linear)]; >> + >> + for (subtest = 0; subtest < total; subtest++) { >> + for (i = 0; i < nr_bos[subtest]; i++) >> + handles[i] = create_userptr_bo(fd, sizeof(linear)); >> + >> + if (nr_bos[subtest] > 0) >> + ptr = get_handle_ptr(handles[0]); >> + else >> + ptr = buffer; >> + >> + printf("ptr-read, %5u bos = ", nr_bos[subtest]); >> + test_ptr_read(ptr); >> + >> + printf("ptr-write %5u bos = ", nr_bos[subtest]); >> + test_ptr_write(ptr); >> + >> + printf("malloc-free, %5u bos = ", nr_bos[subtest]); >> + test_malloc_free(0); >> + printf("malloc-free-random %5u bos = ", nr_bos[subtest]); >> + test_malloc_free(1); >> + >> + printf("malloc-realloc-free, %5u bos = ", nr_bos[subtest]); >> + test_malloc_realloc_free(0); >> + printf("malloc-realloc-free-random, %5u bos = ", nr_bos[subtest]); >> + test_malloc_realloc_free(1); >> + >> + printf("mmap-unmap, %5u bos = ", nr_bos[subtest]); >> + test_mmap_unmap(0); >> + printf("mmap-unmap-random, %5u bos = ", nr_bos[subtest]); >> + test_mmap_unmap(1); >> + >> + for (i = 0; i < nr_bos[subtest]; i++) >> + free_userptr_bo(fd, handles[i]); >> + } >> +} >> + >> +static void test_single(int fd) >> +{ >> + char *ptr, *bo_ptr; >> + uint32_t handle = 0; >> + unsigned long iter = 0; >> + int ret; >> + unsigned long map_size = sizeof(linear) + PAGE_SIZE - 1; >> + >> + ptr = mmap(NULL, map_size, PROT_READ | PROT_WRITE, >> + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); >> + assert(ptr != MAP_FAILED); >> + >> + bo_ptr = (char *)(((unsigned long)ptr + (PAGE_SIZE - 1)) >> + & ~(PAGE_SIZE - 1)); > > You might add an ALIGN macro in this file or a suitable header. A couple of > .c files in lib/ individually define one already. OK, will add that in the patch series. Thanks for the review! Regards, Tvrtko
[snip] On Wed, Apr 23, 2014 at 06:28:54AM -0700, Tvrtko Ursulin wrote: > On 04/18/2014 12:18 AM, Volkin, Bradley D wrote: > > On Wed, Mar 19, 2014 at 04:13:06AM -0700, Tvrtko Ursulin wrote: > >> +static void **handle_ptr_map; > >> +static unsigned int num_handle_ptr_map; > > > > I'd prefer that we explicitly initialize at least num_handle_ptr_map. > > To zero, why? Partly because I just like explicitly initialized variables and partly because I forgot that static variables are *guaranteed* to be initialized to zero vs just-happen-to-be-zero :) You can leave it as is or change it, I'm fine either way. Thanks, Brad > > >> + > >> +static void add_handle_ptr(uint32_t handle, void *ptr) > >> +{ > >> + if (handle >= num_handle_ptr_map) { > >> + handle_ptr_map = realloc(handle_ptr_map, > >> + (handle + 1000) * sizeof(void*)); > >> + num_handle_ptr_map = handle + 1000; > >> + } > >> + > >> + handle_ptr_map[handle] = ptr; > >> +} > >> + > >> +static void *get_handle_ptr(uint32_t handle) > >> +{ > >> + return handle_ptr_map[handle]; > >> +} > >> + > >> +static void free_handle_ptr(uint32_t handle) > >> +{ > >> + igt_assert(handle < num_handle_ptr_map); > >> + igt_assert(handle_ptr_map[handle]); > >> + > >> + free(handle_ptr_map[handle]); > >> + handle_ptr_map[handle] = NULL; > >> +} > >> + > >> +static uint32_t create_userptr_bo(int fd, int size) > >> +{ > >> + void *ptr; > >> + uint32_t handle; > >> + int ret; > >> + > >> + ret = posix_memalign(&ptr, PAGE_SIZE, size); > >> + igt_assert(ret == 0); > >> + > >> + ret = gem_userptr(fd, (uint32_t *)ptr, size, 0, &handle); > >> + igt_assert(ret == 0); > >> + add_handle_ptr(handle, ptr); > >> + > >> + return handle; > >> +} > >> + > >> +static void free_userptr_bo(int fd, uint32_t handle) > >> +{ > >> + gem_close(fd, handle); > >> + free_handle_ptr(handle); > >> +} > >> + > >> +static int has_userptr(int fd) > >> +{ > >> + uint32_t handle = 0; > >> + void *ptr; > >> + uint32_t oldflags; > >> + int ret; > >> + > >> + assert(posix_memalign(&ptr, PAGE_SIZE, PAGE_SIZE) == 0); > >> + oldflags = userptr_flags; > >> + gem_userptr_test_unsynchronized(); > >> + ret = gem_userptr(fd, ptr, PAGE_SIZE, 0, &handle); > >> + userptr_flags = oldflags; > >> + if (ret != 0) { > >> + free(ptr); > >> + return 0; > >> + } > >> + > >> + gem_close(fd, handle); > >> + free(ptr); > >> + > >> + return handle != 0; > >> +} > >> + > >> +static const unsigned int nr_bos[] = {0, 1, 10, 100, 1000}; > >> +static const unsigned int test_duration_sec = 3; > >> + > >> +static volatile unsigned int run_test; > >> + > >> +static void alarm_handler(int sig) > >> +{ > >> + assert(run_test == 1); > >> + run_test = 0; > >> +} > >> + > >> +static void start_test(unsigned int duration) > >> +{ > >> + run_test = 1; > >> + if (duration == 0) > >> + duration = test_duration_sec; > >> + signal(SIGALRM, alarm_handler); > >> + alarm(duration); > >> +} > >> + > >> +static void exchange_ptr(void *array, unsigned i, unsigned j) > >> +{ > >> + void **arr, *tmp; > >> + arr = (void **)array; > >> + > >> + tmp = arr[i]; > >> + arr[i] = arr[j]; > >> + arr[j] = tmp; > >> +} > >> + > >> +static void test_malloc_free(int random) > >> +{ > >> + unsigned long iter = 0; > >> + unsigned int i, tot = 1000; > >> + void *ptr[tot]; > >> + > >> + start_test(test_duration_sec); > >> + > >> + while (run_test) { > >> + for (i = 0; i < tot; i++) { > >> + ptr[i] = malloc(1000); > >> + assert(ptr[i]); > >> + } > >> + if (random) > >> + igt_permute_array(ptr, tot, exchange_ptr); > >> + for (i = 0; i < tot; i++) > >> + free(ptr[i]); > >> + iter++; > >> + } > >> + > >> + printf("%8lu iter/s\n", iter / test_duration_sec); > >> +} > >> + > >> +static void test_malloc_realloc_free(int random) > >> +{ > >> + unsigned long iter = 0; > >> + unsigned int i, tot = 1000; > >> + void *ptr[tot]; > >> + > >> + start_test(test_duration_sec); > >> + > >> + while (run_test) { > >> + for (i = 0; i < tot; i++) { > >> + ptr[i] = malloc(1000); > >> + assert(ptr[i]); > >> + } > >> + if (random) > >> + igt_permute_array(ptr, tot, exchange_ptr); > >> + for (i = 0; i < tot; i++) { > >> + ptr[i] = realloc(ptr[i], 2000); > >> + assert(ptr[i]); > >> + } > >> + if (random) > >> + igt_permute_array(ptr, tot, exchange_ptr); > >> + for (i = 0; i < tot; i++) > >> + free(ptr[i]); > >> + iter++; > >> + } > >> + > >> + printf("%8lu iter/s\n", iter / test_duration_sec); > >> +} > >> + > >> +static void test_mmap_unmap(int random) > >> +{ > >> + unsigned long iter = 0; > >> + unsigned int i, tot = 1000; > >> + void *ptr[tot]; > >> + > >> + start_test(test_duration_sec); > >> + > >> + while (run_test) { > >> + for (i = 0; i < tot; i++) { > >> + ptr[i] = mmap(NULL, 1000, PROT_READ | PROT_WRITE, > >> + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); > >> + assert(ptr[i] != MAP_FAILED); > >> + } > >> + if (random) > >> + igt_permute_array(ptr, tot, exchange_ptr); > >> + for (i = 0; i < tot; i++) > >> + munmap(ptr[i], 1000); > >> + iter++; > >> + } > >> + > >> + printf("%8lu iter/s\n", iter / test_duration_sec); > >> +} > >> + > >> +static void test_ptr_read(void *ptr) > >> +{ > >> + unsigned long iter = 0; > >> + volatile unsigned long *p; > >> + unsigned long i, loops; > >> + register unsigned long v; > >> + > >> + loops = sizeof(linear) / sizeof(unsigned long) / 4; > >> + > >> + start_test(test_duration_sec); > >> + > >> + while (run_test) { > >> + p = (unsigned long *)ptr; > >> + for (i = 0; i < loops; i++) { > >> + v = *p++; > >> + v = *p++; > >> + v = *p++; > >> + v = *p++; > >> + } > >> + iter++; > >> + } > >> + > >> + printf("%8lu MB/s\n", iter / test_duration_sec * sizeof(linear) / 1000000); > >> +} > >> + > >> +static void test_ptr_write(void *ptr) > >> +{ > >> + unsigned long iter = 0; > >> + volatile unsigned long *p; > >> + register unsigned long i, loops; > >> + > >> + loops = sizeof(linear) / sizeof(unsigned long) / 4; > >> + > >> + start_test(test_duration_sec); > >> + > >> + while (run_test) { > >> + p = (unsigned long *)ptr; > >> + for (i = 0; i < loops; i++) { > >> + *p++ = i; > >> + *p++ = i; > >> + *p++ = i; > >> + *p++ = i; > >> + } > >> + iter++; > >> + } > >> + > >> + printf("%8lu MB/s\n", iter / test_duration_sec * sizeof(linear) / 1000000); > >> +} > >> + > >> +static void test_impact(int fd) > >> +{ > >> + unsigned int total = sizeof(nr_bos) / sizeof(nr_bos[0]); > >> + unsigned int subtest, i; > >> + uint32_t handles[nr_bos[total-1]]; > >> + void *ptr; > >> + char buffer[sizeof(linear)]; > >> + > >> + for (subtest = 0; subtest < total; subtest++) { > >> + for (i = 0; i < nr_bos[subtest]; i++) > >> + handles[i] = create_userptr_bo(fd, sizeof(linear)); > >> + > >> + if (nr_bos[subtest] > 0) > >> + ptr = get_handle_ptr(handles[0]); > >> + else > >> + ptr = buffer; > >> + > >> + printf("ptr-read, %5u bos = ", nr_bos[subtest]); > >> + test_ptr_read(ptr); > >> + > >> + printf("ptr-write %5u bos = ", nr_bos[subtest]); > >> + test_ptr_write(ptr); > >> + > >> + printf("malloc-free, %5u bos = ", nr_bos[subtest]); > >> + test_malloc_free(0); > >> + printf("malloc-free-random %5u bos = ", nr_bos[subtest]); > >> + test_malloc_free(1); > >> + > >> + printf("malloc-realloc-free, %5u bos = ", nr_bos[subtest]); > >> + test_malloc_realloc_free(0); > >> + printf("malloc-realloc-free-random, %5u bos = ", nr_bos[subtest]); > >> + test_malloc_realloc_free(1); > >> + > >> + printf("mmap-unmap, %5u bos = ", nr_bos[subtest]); > >> + test_mmap_unmap(0); > >> + printf("mmap-unmap-random, %5u bos = ", nr_bos[subtest]); > >> + test_mmap_unmap(1); > >> + > >> + for (i = 0; i < nr_bos[subtest]; i++) > >> + free_userptr_bo(fd, handles[i]); > >> + } > >> +} > >> + > >> +static void test_single(int fd) > >> +{ > >> + char *ptr, *bo_ptr; > >> + uint32_t handle = 0; > >> + unsigned long iter = 0; > >> + int ret; > >> + unsigned long map_size = sizeof(linear) + PAGE_SIZE - 1; > >> + > >> + ptr = mmap(NULL, map_size, PROT_READ | PROT_WRITE, > >> + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); > >> + assert(ptr != MAP_FAILED); > >> + > >> + bo_ptr = (char *)(((unsigned long)ptr + (PAGE_SIZE - 1)) > >> + & ~(PAGE_SIZE - 1)); > > > > You might add an ALIGN macro in this file or a suitable header. A couple of > > .c files in lib/ individually define one already. > > OK, will add that in the patch series. > > Thanks for the review! > > Regards, > > Tvrtko >
diff --git a/Android.mk b/Android.mk index 8aeb2d4..0c969b8 100644 --- a/Android.mk +++ b/Android.mk @@ -1,2 +1 @@ -include $(call all-named-subdir-makefiles, lib tests tools) - +include $(call all-named-subdir-makefiles, lib tests tools benchmarks) diff --git a/benchmarks/.gitignore b/benchmarks/.gitignore index ddea6f7..09e5bd8 100644 --- a/benchmarks/.gitignore +++ b/benchmarks/.gitignore @@ -1,3 +1,4 @@ +gem_userptr_benchmark intel_upload_blit_large intel_upload_blit_large_gtt intel_upload_blit_large_map diff --git a/benchmarks/Android.mk b/benchmarks/Android.mk new file mode 100644 index 0000000..5bb8ef5 --- /dev/null +++ b/benchmarks/Android.mk @@ -0,0 +1,36 @@ +LOCAL_PATH := $(call my-dir) + +include $(LOCAL_PATH)/Makefile.sources + +#================# + +define add_benchmark + include $(CLEAR_VARS) + + LOCAL_SRC_FILES := $1.c + + LOCAL_CFLAGS += -DHAVE_STRUCT_SYSINFO_TOTALRAM + LOCAL_CFLAGS += -DANDROID -UNDEBUG -include "check-ndebug.h" + LOCAL_CFLAGS += -std=c99 + # FIXME: drop once Bionic correctly annotates "noreturn" on pthread_exit + LOCAL_CFLAGS += -Wno-error=return-type + # Excessive complaining for established cases. Rely on the Linux version warnings. + LOCAL_CFLAGS += -Wno-sign-compare + + LOCAL_MODULE := $1 + LOCAL_MODULE_TAGS := optional + + LOCAL_STATIC_LIBRARIES := libintel_gpu_tools + + LOCAL_SHARED_LIBRARIES := libpciaccess \ + libdrm \ + libdrm_intel + + include $(BUILD_EXECUTABLE) +endef + +#================# + +benchmark_list := $(bin_PROGRAMS) + +$(foreach item,$(benchmark_list),$(eval $(call add_benchmark,$(item)))) diff --git a/benchmarks/Makefile.am b/benchmarks/Makefile.am index e2ad784..d173bf4 100644 --- a/benchmarks/Makefile.am +++ b/benchmarks/Makefile.am @@ -1,9 +1,4 @@ - -bin_PROGRAMS = \ - intel_upload_blit_large \ - intel_upload_blit_large_gtt \ - intel_upload_blit_large_map \ - intel_upload_blit_small +include Makefile.sources AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/lib AM_CFLAGS = $(DRM_CFLAGS) $(CWARNFLAGS) $(CAIRO_CFLAGS) diff --git a/benchmarks/Makefile.sources b/benchmarks/Makefile.sources new file mode 100644 index 0000000..fd6c107 --- /dev/null +++ b/benchmarks/Makefile.sources @@ -0,0 +1,6 @@ +bin_PROGRAMS = \ + intel_upload_blit_large \ + intel_upload_blit_large_gtt \ + intel_upload_blit_large_map \ + intel_upload_blit_small \ + gem_userptr_benchmark diff --git a/benchmarks/gem_userptr_benchmark.c b/benchmarks/gem_userptr_benchmark.c new file mode 100644 index 0000000..218f6f1 --- /dev/null +++ b/benchmarks/gem_userptr_benchmark.c @@ -0,0 +1,513 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Tvrtko Ursulin <tvrtko.ursulin@intel.com> + * + */ + +/** @file gem_userptr_benchmark.c + * + * Benchmark the userptr code and impact of having userptr surfaces + * in process address space on some normal operations. + * + */ + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <fcntl.h> +#include <inttypes.h> +#include <errno.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/mman.h> +#include "drm.h" +#include "i915_drm.h" +#include "drmtest.h" +#include "intel_bufmgr.h" +#include "intel_batchbuffer.h" +#include "intel_gpu_tools.h" + +#define WIDTH 128 +#define HEIGHT 128 +#define PAGE_SIZE 4096 + +#define LOCAL_I915_GEM_USERPTR 0x34 +#define LOCAL_IOCTL_I915_GEM_USERPTR DRM_IOWR (DRM_COMMAND_BASE + LOCAL_I915_GEM_USERPTR, struct local_i915_gem_userptr) +struct local_i915_gem_userptr { + uint64_t user_ptr; + uint64_t user_size; + uint32_t flags; +#define I915_USERPTR_READ_ONLY (1<<0) +#define I915_USERPTR_UNSYNCHRONIZED (1<<31) + uint32_t handle; +}; + +static uint32_t userptr_flags = I915_USERPTR_UNSYNCHRONIZED; + +static uint32_t linear[WIDTH*HEIGHT]; + +static void gem_userptr_test_unsynchronized(void) +{ + userptr_flags = I915_USERPTR_UNSYNCHRONIZED; +} + +static void gem_userptr_test_synchronized(void) +{ + userptr_flags = 0; +} + +static int gem_userptr(int fd, void *ptr, int size, int read_only, uint32_t *handle) +{ + struct local_i915_gem_userptr userptr; + int ret; + + userptr.user_ptr = (uintptr_t)ptr; + userptr.user_size = size; + userptr.flags = userptr_flags; + if (read_only) + userptr.flags |= I915_USERPTR_READ_ONLY; + + ret = drmIoctl(fd, LOCAL_IOCTL_I915_GEM_USERPTR, &userptr); + if (ret) + ret = errno; + igt_skip_on_f(ret == ENODEV && + (userptr_flags & I915_USERPTR_UNSYNCHRONIZED) == 0, + "Skipping, synchronized mappings with no kernel CONFIG_MMU_NOTIFIER?"); + if (ret == 0) + *handle = userptr.handle; + + return ret; +} + +static uint32_t +create_userptr(int fd, uint32_t val, uint32_t *ptr) +{ + uint32_t handle; + int i, ret; + + ret = gem_userptr(fd, ptr, sizeof(linear), 0, &handle); + igt_assert(ret == 0); + igt_assert(handle != 0); + + /* Fill the BO with dwords starting at val */ + for (i = 0; i < WIDTH*HEIGHT; i++) + ptr[i] = val++; + + return handle; +} + +static void **handle_ptr_map; +static unsigned int num_handle_ptr_map; + +static void add_handle_ptr(uint32_t handle, void *ptr) +{ + if (handle >= num_handle_ptr_map) { + handle_ptr_map = realloc(handle_ptr_map, + (handle + 1000) * sizeof(void*)); + num_handle_ptr_map = handle + 1000; + } + + handle_ptr_map[handle] = ptr; +} + +static void *get_handle_ptr(uint32_t handle) +{ + return handle_ptr_map[handle]; +} + +static void free_handle_ptr(uint32_t handle) +{ + igt_assert(handle < num_handle_ptr_map); + igt_assert(handle_ptr_map[handle]); + + free(handle_ptr_map[handle]); + handle_ptr_map[handle] = NULL; +} + +static uint32_t create_userptr_bo(int fd, int size) +{ + void *ptr; + uint32_t handle; + int ret; + + ret = posix_memalign(&ptr, PAGE_SIZE, size); + igt_assert(ret == 0); + + ret = gem_userptr(fd, (uint32_t *)ptr, size, 0, &handle); + igt_assert(ret == 0); + add_handle_ptr(handle, ptr); + + return handle; +} + +static void free_userptr_bo(int fd, uint32_t handle) +{ + gem_close(fd, handle); + free_handle_ptr(handle); +} + +static int has_userptr(int fd) +{ + uint32_t handle = 0; + void *ptr; + uint32_t oldflags; + int ret; + + assert(posix_memalign(&ptr, PAGE_SIZE, PAGE_SIZE) == 0); + oldflags = userptr_flags; + gem_userptr_test_unsynchronized(); + ret = gem_userptr(fd, ptr, PAGE_SIZE, 0, &handle); + userptr_flags = oldflags; + if (ret != 0) { + free(ptr); + return 0; + } + + gem_close(fd, handle); + free(ptr); + + return handle != 0; +} + +static const unsigned int nr_bos[] = {0, 1, 10, 100, 1000}; +static const unsigned int test_duration_sec = 3; + +static volatile unsigned int run_test; + +static void alarm_handler(int sig) +{ + assert(run_test == 1); + run_test = 0; +} + +static void start_test(unsigned int duration) +{ + run_test = 1; + if (duration == 0) + duration = test_duration_sec; + signal(SIGALRM, alarm_handler); + alarm(duration); +} + +static void exchange_ptr(void *array, unsigned i, unsigned j) +{ + void **arr, *tmp; + arr = (void **)array; + + tmp = arr[i]; + arr[i] = arr[j]; + arr[j] = tmp; +} + +static void test_malloc_free(int random) +{ + unsigned long iter = 0; + unsigned int i, tot = 1000; + void *ptr[tot]; + + start_test(test_duration_sec); + + while (run_test) { + for (i = 0; i < tot; i++) { + ptr[i] = malloc(1000); + assert(ptr[i]); + } + if (random) + igt_permute_array(ptr, tot, exchange_ptr); + for (i = 0; i < tot; i++) + free(ptr[i]); + iter++; + } + + printf("%8lu iter/s\n", iter / test_duration_sec); +} + +static void test_malloc_realloc_free(int random) +{ + unsigned long iter = 0; + unsigned int i, tot = 1000; + void *ptr[tot]; + + start_test(test_duration_sec); + + while (run_test) { + for (i = 0; i < tot; i++) { + ptr[i] = malloc(1000); + assert(ptr[i]); + } + if (random) + igt_permute_array(ptr, tot, exchange_ptr); + for (i = 0; i < tot; i++) { + ptr[i] = realloc(ptr[i], 2000); + assert(ptr[i]); + } + if (random) + igt_permute_array(ptr, tot, exchange_ptr); + for (i = 0; i < tot; i++) + free(ptr[i]); + iter++; + } + + printf("%8lu iter/s\n", iter / test_duration_sec); +} + +static void test_mmap_unmap(int random) +{ + unsigned long iter = 0; + unsigned int i, tot = 1000; + void *ptr[tot]; + + start_test(test_duration_sec); + + while (run_test) { + for (i = 0; i < tot; i++) { + ptr[i] = mmap(NULL, 1000, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + assert(ptr[i] != MAP_FAILED); + } + if (random) + igt_permute_array(ptr, tot, exchange_ptr); + for (i = 0; i < tot; i++) + munmap(ptr[i], 1000); + iter++; + } + + printf("%8lu iter/s\n", iter / test_duration_sec); +} + +static void test_ptr_read(void *ptr) +{ + unsigned long iter = 0; + volatile unsigned long *p; + unsigned long i, loops; + register unsigned long v; + + loops = sizeof(linear) / sizeof(unsigned long) / 4; + + start_test(test_duration_sec); + + while (run_test) { + p = (unsigned long *)ptr; + for (i = 0; i < loops; i++) { + v = *p++; + v = *p++; + v = *p++; + v = *p++; + } + iter++; + } + + printf("%8lu MB/s\n", iter / test_duration_sec * sizeof(linear) / 1000000); +} + +static void test_ptr_write(void *ptr) +{ + unsigned long iter = 0; + volatile unsigned long *p; + register unsigned long i, loops; + + loops = sizeof(linear) / sizeof(unsigned long) / 4; + + start_test(test_duration_sec); + + while (run_test) { + p = (unsigned long *)ptr; + for (i = 0; i < loops; i++) { + *p++ = i; + *p++ = i; + *p++ = i; + *p++ = i; + } + iter++; + } + + printf("%8lu MB/s\n", iter / test_duration_sec * sizeof(linear) / 1000000); +} + +static void test_impact(int fd) +{ + unsigned int total = sizeof(nr_bos) / sizeof(nr_bos[0]); + unsigned int subtest, i; + uint32_t handles[nr_bos[total-1]]; + void *ptr; + char buffer[sizeof(linear)]; + + for (subtest = 0; subtest < total; subtest++) { + for (i = 0; i < nr_bos[subtest]; i++) + handles[i] = create_userptr_bo(fd, sizeof(linear)); + + if (nr_bos[subtest] > 0) + ptr = get_handle_ptr(handles[0]); + else + ptr = buffer; + + printf("ptr-read, %5u bos = ", nr_bos[subtest]); + test_ptr_read(ptr); + + printf("ptr-write %5u bos = ", nr_bos[subtest]); + test_ptr_write(ptr); + + printf("malloc-free, %5u bos = ", nr_bos[subtest]); + test_malloc_free(0); + printf("malloc-free-random %5u bos = ", nr_bos[subtest]); + test_malloc_free(1); + + printf("malloc-realloc-free, %5u bos = ", nr_bos[subtest]); + test_malloc_realloc_free(0); + printf("malloc-realloc-free-random, %5u bos = ", nr_bos[subtest]); + test_malloc_realloc_free(1); + + printf("mmap-unmap, %5u bos = ", nr_bos[subtest]); + test_mmap_unmap(0); + printf("mmap-unmap-random, %5u bos = ", nr_bos[subtest]); + test_mmap_unmap(1); + + for (i = 0; i < nr_bos[subtest]; i++) + free_userptr_bo(fd, handles[i]); + } +} + +static void test_single(int fd) +{ + char *ptr, *bo_ptr; + uint32_t handle = 0; + unsigned long iter = 0; + int ret; + unsigned long map_size = sizeof(linear) + PAGE_SIZE - 1; + + ptr = mmap(NULL, map_size, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + assert(ptr != MAP_FAILED); + + bo_ptr = (char *)(((unsigned long)ptr + (PAGE_SIZE - 1)) + & ~(PAGE_SIZE - 1)); + + start_test(test_duration_sec); + + while (run_test) { + ret = gem_userptr(fd, bo_ptr, sizeof(linear), 0, &handle); + assert(ret == 0); + gem_close(fd, handle); + iter++; + } + + munmap(ptr, map_size); + + printf("%8lu iter/s\n", iter / test_duration_sec); +} + +static void test_multiple(int fd, unsigned int batch, int random) +{ + char *ptr, *bo_ptr; + uint32_t handles[10000]; + int map[10000]; + unsigned long iter = 0; + int ret; + int i; + unsigned long map_size = batch * sizeof(linear) + PAGE_SIZE - 1; + + assert(batch < (sizeof(handles) / sizeof(handles[0]))); + assert(batch < (sizeof(map) / sizeof(map[0]))); + + ptr = mmap(NULL, map_size, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + assert(ptr != MAP_FAILED); + + bo_ptr = (char *)(((unsigned long)ptr + (PAGE_SIZE - 1)) + & ~(PAGE_SIZE - 1)); + + for (i = 0; i < batch; i++) + map[i] = i; + + start_test(test_duration_sec); + + while (run_test) { + if (random) + igt_permute_array(map, batch, igt_exchange_int); + for (i = 0; i < batch; i++) { + ret = gem_userptr(fd, bo_ptr + map[i] * sizeof(linear), + sizeof(linear), + 0, &handles[i]); + assert(ret == 0); + } + if (random) + igt_permute_array(map, batch, igt_exchange_int); + for (i = 0; i < batch; i++) + gem_close(fd, handles[map[i]]); + iter++; + } + + munmap(ptr, map_size); + + printf("%8lu iter/s\n", iter * batch / test_duration_sec); +} + +static void test_userptr(int fd) +{ + printf("create-destroy = "); + test_single(fd); + + printf("multi-create-destroy = "); + test_multiple(fd, 100, 0); + + printf("multi-create-destroy-random = "); + test_multiple(fd, 100, 1); +} + +int main(int argc, char **argv) +{ + int fd = -1, ret; + + igt_skip_on_simulation(); + + igt_subtest_init(argc, argv); + + fd = drm_open_any(); + igt_assert(fd >= 0); + + ret = has_userptr(fd); + igt_skip_on_f(ret == 0, "No userptr support - %s (%d)\n", + strerror(errno), ret); + + + gem_userptr_test_unsynchronized(); + + igt_subtest("userptr-unsync") + test_userptr(fd); + + igt_subtest("userptr-impact-unsync") + test_impact(fd); + + gem_userptr_test_synchronized(); + + igt_subtest("userptr-sync") + test_userptr(fd); + + igt_subtest("userptr-impact-sync") + test_impact(fd); + + igt_exit(); + + return 0; +}