diff mbox series

[i-g-t,v5,3/6] lib: Add function to hash a framebuffer

Message ID 20190115174747.3138-4-liviu.dudau@arm.com (mailing list archive)
State New, archived
Headers show
Series igt: Add support for testing writeback connectors | expand

Commit Message

Liviu Dudau Jan. 15, 2019, 5:47 p.m. UTC
From: Brian Starkey <brian.starkey@arm.com>

To use writeback buffers as a CRC source, we need to be able to hash
them. Implement a simple FVA-1a hashing routine for this purpose.

Doing a bytewise hash on the framebuffer directly can be very slow if
the memory is noncached. By making a copy of each line in the FB first
(which can take advantage of word-access speedup), we can do the hash
on a cached copy, which is much faster (10x speedup on my platform).

Signed-off-by: Brian Starkey <brian.starkey@arm.com>
[rebased and updated to the most recent API]
Signed-off-by: Liviu Dudau <liviu.dudau@arm.com>
---
 lib/igt_fb.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 lib/igt_fb.h |  3 +++
 2 files changed, 69 insertions(+)

Comments

Chris Wilson Jan. 15, 2019, 6:47 p.m. UTC | #1
Quoting Liviu Dudau (2019-01-15 17:47:44)
> +int igt_fb_get_crc(struct igt_fb *fb, igt_crc_t *crc)
> +{
> +#define FNV1a_OFFSET_BIAS 2166136261
> +#define FNV1a_PRIME 16777619
> +       uint32_t hash;
> +       void *map;
> +       char *ptr, *line = NULL;
> +       int x, y, cpp = igt_drm_format_to_bpp(fb->drm_format) / 8;
> +       uint32_t stride = calc_plane_stride(fb, 0);
> +
> +       if (fb->is_dumb)
> +               map = kmstest_dumb_map_buffer(fb->fd, fb->gem_handle, fb->size,
> +                                             PROT_READ);
> +       else
> +               map = gem_mmap__gtt(fb->fd, fb->gem_handle, fb->size,
> +                                   PROT_READ);
> +       ptr = map;
> +
> +       /*
> +        * Framebuffers are often uncached, which can make byte-wise accesses
> +        * very slow. We copy each line of the FB into a local buffer to speed
> +        * up the hashing.
> +        */
> +       line = malloc(stride);
> +       if (!line) {
> +               munmap(map, fb->size);
> +               return -ENOMEM;
> +       }
> +
> +       hash = FNV1a_OFFSET_BIAS;
> +
> +       for (y = 0; y < fb->height; y++, ptr += stride) {
> +
> +               memcpy(line, ptr, stride);

igt_memcpy_from_wc() for the reasons cited above.
-Chris
Liviu Dudau Jan. 16, 2019, 11:20 a.m. UTC | #2
On Tue, Jan 15, 2019 at 06:47:47PM +0000, Chris Wilson wrote:
> Quoting Liviu Dudau (2019-01-15 17:47:44)
> > +int igt_fb_get_crc(struct igt_fb *fb, igt_crc_t *crc)
> > +{
> > +#define FNV1a_OFFSET_BIAS 2166136261
> > +#define FNV1a_PRIME 16777619
> > +       uint32_t hash;
> > +       void *map;
> > +       char *ptr, *line = NULL;
> > +       int x, y, cpp = igt_drm_format_to_bpp(fb->drm_format) / 8;
> > +       uint32_t stride = calc_plane_stride(fb, 0);
> > +
> > +       if (fb->is_dumb)
> > +               map = kmstest_dumb_map_buffer(fb->fd, fb->gem_handle, fb->size,
> > +                                             PROT_READ);
> > +       else
> > +               map = gem_mmap__gtt(fb->fd, fb->gem_handle, fb->size,
> > +                                   PROT_READ);
> > +       ptr = map;
> > +
> > +       /*
> > +        * Framebuffers are often uncached, which can make byte-wise accesses
> > +        * very slow. We copy each line of the FB into a local buffer to speed
> > +        * up the hashing.
> > +        */
> > +       line = malloc(stride);
> > +       if (!line) {
> > +               munmap(map, fb->size);
> > +               return -ENOMEM;
> > +       }
> > +
> > +       hash = FNV1a_OFFSET_BIAS;
> > +
> > +       for (y = 0; y < fb->height; y++, ptr += stride) {
> > +
> > +               memcpy(line, ptr, stride);
> 
> igt_memcpy_from_wc() for the reasons cited above.
> -Chris

Hi Chris,

Thanks for pointing out the function, I was not aware of it!

Now, looking at the implementation, and ignoring the fact that it is in a
file called igt_x86.c, it looks to me that it will end up calling memcpy
anyway for Arm drivers. Not being a GCC expert, I am wondering if the
ifunc() wrapper around resolve_memcpy_from_wc() will not actually
prevent the compiler from choosing an optimised version of memcpy for Arm.

I can refresh the patch if you think it is safe to use igt_memcpy_from_wc() for
non-x86 architectures.

Best regards,
Liviu
Chris Wilson Jan. 16, 2019, 11:50 a.m. UTC | #3
Quoting Liviu Dudau (2019-01-16 11:20:09)
> On Tue, Jan 15, 2019 at 06:47:47PM +0000, Chris Wilson wrote:
> > Quoting Liviu Dudau (2019-01-15 17:47:44)
> > > +int igt_fb_get_crc(struct igt_fb *fb, igt_crc_t *crc)
> > > +{
> > > +#define FNV1a_OFFSET_BIAS 2166136261
> > > +#define FNV1a_PRIME 16777619
> > > +       uint32_t hash;
> > > +       void *map;
> > > +       char *ptr, *line = NULL;
> > > +       int x, y, cpp = igt_drm_format_to_bpp(fb->drm_format) / 8;
> > > +       uint32_t stride = calc_plane_stride(fb, 0);
> > > +
> > > +       if (fb->is_dumb)
> > > +               map = kmstest_dumb_map_buffer(fb->fd, fb->gem_handle, fb->size,
> > > +                                             PROT_READ);
> > > +       else
> > > +               map = gem_mmap__gtt(fb->fd, fb->gem_handle, fb->size,
> > > +                                   PROT_READ);
> > > +       ptr = map;
> > > +
> > > +       /*
> > > +        * Framebuffers are often uncached, which can make byte-wise accesses
> > > +        * very slow. We copy each line of the FB into a local buffer to speed
> > > +        * up the hashing.
> > > +        */
> > > +       line = malloc(stride);
> > > +       if (!line) {
> > > +               munmap(map, fb->size);
> > > +               return -ENOMEM;
> > > +       }
> > > +
> > > +       hash = FNV1a_OFFSET_BIAS;
> > > +
> > > +       for (y = 0; y < fb->height; y++, ptr += stride) {
> > > +
> > > +               memcpy(line, ptr, stride);
> > 
> > igt_memcpy_from_wc() for the reasons cited above.
> > -Chris
> 
> Hi Chris,
> 
> Thanks for pointing out the function, I was not aware of it!
> 
> Now, looking at the implementation, and ignoring the fact that it is in a
> file called igt_x86.c, it looks to me that it will end up calling memcpy
> anyway for Arm drivers. Not being a GCC expert, I am wondering if the
> ifunc() wrapper around resolve_memcpy_from_wc() will not actually
> prevent the compiler from choosing an optimised version of memcpy for Arm.
> 
> I can refresh the patch if you think it is safe to use igt_memcpy_from_wc() for
> non-x86 architectures.

You are memcpy'ing from uncached, there is no general purpose optimised
memcpy! :) The compiler builtin (e.g. rep movb for x86) may be the worst
thing you could do ;) For Arm, it will fallback to the general memcpy
routine which will do it own ifunc, just will not be inlined.

It should be safe, and it should be no worse than a call to memcpy with
variable arguments.
-Chris
diff mbox series

Patch

diff --git a/lib/igt_fb.c b/lib/igt_fb.c
index 5cd1829a3..11e200b53 100644
--- a/lib/igt_fb.c
+++ b/lib/igt_fb.c
@@ -2379,6 +2379,72 @@  bool igt_fb_supported_format(uint32_t drm_format)
 	return false;
 }
 
+/*
+ * This implements the FNV-1a hashing algorithm instead of CRC, for
+ * simplicity
+ * http://www.isthe.com/chongo/tech/comp/fnv/index.html
+ *
+ * hash = offset_basis
+ * for each octet_of_data to be hashed
+ *         hash = hash xor octet_of_data
+ *         hash = hash * FNV_prime
+ * return hash
+ *
+ * 32 bit offset_basis = 2166136261
+ * 32 bit FNV_prime = 224 + 28 + 0x93 = 16777619
+ */
+int igt_fb_get_crc(struct igt_fb *fb, igt_crc_t *crc)
+{
+#define FNV1a_OFFSET_BIAS 2166136261
+#define FNV1a_PRIME 16777619
+	uint32_t hash;
+	void *map;
+	char *ptr, *line = NULL;
+	int x, y, cpp = igt_drm_format_to_bpp(fb->drm_format) / 8;
+	uint32_t stride = calc_plane_stride(fb, 0);
+
+	if (fb->is_dumb)
+		map = kmstest_dumb_map_buffer(fb->fd, fb->gem_handle, fb->size,
+					      PROT_READ);
+	else
+		map = gem_mmap__gtt(fb->fd, fb->gem_handle, fb->size,
+				    PROT_READ);
+	ptr = map;
+
+	/*
+	 * Framebuffers are often uncached, which can make byte-wise accesses
+	 * very slow. We copy each line of the FB into a local buffer to speed
+	 * up the hashing.
+	 */
+	line = malloc(stride);
+	if (!line) {
+		munmap(map, fb->size);
+		return -ENOMEM;
+	}
+
+	hash = FNV1a_OFFSET_BIAS;
+
+	for (y = 0; y < fb->height; y++, ptr += stride) {
+
+		memcpy(line, ptr, stride);
+
+		for (x = 0; x < fb->width * cpp; x++) {
+			hash ^= line[x];
+			hash *= FNV1a_PRIME;
+		}
+	}
+
+	crc->n_words = 1;
+	crc->crc[0] = hash;
+
+	free(line);
+	munmap(map, fb->size);
+
+	return 0;
+#undef FNV1a_OFFSET_BIAS
+#undef FNV1a_PRIME
+}
+
 /**
  * igt_format_is_yuv:
  * @drm_format: drm fourcc
diff --git a/lib/igt_fb.h b/lib/igt_fb.h
index 9f027deba..948c5380c 100644
--- a/lib/igt_fb.h
+++ b/lib/igt_fb.h
@@ -37,6 +37,7 @@ 
 #include <i915_drm.h>
 
 #include "igt_color_encoding.h"
+#include "igt_debugfs.h"
 
 /**
  * igt_fb_t:
@@ -173,5 +174,7 @@  const char *igt_format_str(uint32_t drm_format);
 bool igt_fb_supported_format(uint32_t drm_format);
 bool igt_format_is_yuv(uint32_t drm_format);
 
+int igt_fb_get_crc(struct igt_fb *fb, igt_crc_t *crc);
+
 #endif /* __IGT_FB_H__ */