diff mbox series

drm/i915: Add fallback inside memcpy_from_wc functions

Message ID 20220203162703.352447-1-balasubramani.vivekanandan@intel.com (mailing list archive)
State New, archived
Headers show
Series drm/i915: Add fallback inside memcpy_from_wc functions | expand

Commit Message

Vivekanandan, Balasubramani Feb. 3, 2022, 4:27 p.m. UTC
memcpy_from_wc functions can fail if SSE4.1 is not supported or the
supplied addresses are not 16-byte aligned. It was then upto to the
caller to use memcpy as fallback.
Now fallback to memcpy is implemented inside memcpy_from_wc functions
relieving the user from checking the return value of i915_memcpy_from_wc
and doing fallback.

When doing copying from io memory address memcpy_fromio should be used
as fallback. So a new function is added to the family of memcpy_to_wc
functions which should be used while copying from io memory.

This change is implemented also with an intention to perpare for porting
memcpy_from_wc code to ARM64. Since SSE4.1 is not valid for ARM,
accelerated reads will not be supported and the driver should rely on
fallback always.
So there would be few more places in the code where fallback should be
introduced. For e.g. GuC log relay is currently not using fallback since
a GPU supporting GuC submission will mostly have SSE4.1 enabled CPU.
This is no more valid with Discrete GPU and with enabling support for
ARM64.
With fallback moved inside memcpy_from_wc function, call sites would
look neat and fallback can be implemented in a uniform way.

Signed-off-by: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_object.c |  3 +-
 drivers/gpu/drm/i915/gt/selftest_reset.c   |  8 ++-
 drivers/gpu/drm/i915/i915_gpu_error.c      |  9 ++-
 drivers/gpu/drm/i915/i915_memcpy.c         | 78 ++++++++++++++++------
 drivers/gpu/drm/i915/i915_memcpy.h         | 18 ++---
 5 files changed, 77 insertions(+), 39 deletions(-)

Comments

kernel test robot Feb. 4, 2022, 1:30 a.m. UTC | #1
Hi Balasubramani,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on drm-intel/for-linux-next]
[also build test WARNING on drm-tip/drm-tip drm-exynos/exynos-drm-next drm/drm-next tegra-drm/drm/tegra/for-next v5.17-rc2 next-20220203]
[cannot apply to airlied/drm-next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Balasubramani-Vivekanandan/drm-i915-Add-fallback-inside-memcpy_from_wc-functions/20220204-002704
base:   git://anongit.freedesktop.org/drm-intel for-linux-next
config: i386-randconfig-s001 (https://download.01.org/0day-ci/archive/20220204/202202040609.oSW2rFIL-lkp@intel.com/config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce:
        # apt-get install sparse
        # sparse version: v0.6.4-dirty
        # https://github.com/0day-ci/linux/commit/66de634b392157effc065df388510df67de59f2b
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Balasubramani-Vivekanandan/drm-i915-Add-fallback-inside-memcpy_from_wc-functions/20220204-002704
        git checkout 66de634b392157effc065df388510df67de59f2b
        # save the config file to linux build tree
        mkdir build_dir
        make W=1 C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' O=build_dir ARCH=i386 SHELL=/bin/bash drivers/gpu/drm/i915/ net/ipv6/

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>


sparse warnings: (new ones prefixed by >>)
>> drivers/gpu/drm/i915/i915_memcpy.c:189:42: sparse: sparse: incorrect type in argument 2 (different address spaces) @@     expected void const *src @@     got void const [noderef] __iomem *src @@
   drivers/gpu/drm/i915/i915_memcpy.c:189:42: sparse:     expected void const *src
   drivers/gpu/drm/i915/i915_memcpy.c:189:42: sparse:     got void const [noderef] __iomem *src
>> drivers/gpu/drm/i915/i915_memcpy.c:191:45: sparse: sparse: incorrect type in argument 2 (different address spaces) @@     expected void const *[assigned] src @@     got void const [noderef] __iomem *src @@
   drivers/gpu/drm/i915/i915_memcpy.c:191:45: sparse:     expected void const *[assigned] src
   drivers/gpu/drm/i915/i915_memcpy.c:191:45: sparse:     got void const [noderef] __iomem *src
   drivers/gpu/drm/i915/i915_memcpy.c:187:6: sparse: sparse: symbol 'i915_io_memcpy_from_wc' redeclared with different type (incompatible argument 2 (different address spaces)):
>> drivers/gpu/drm/i915/i915_memcpy.c:187:6: sparse:    void extern [addressable] [toplevel] i915_io_memcpy_from_wc( ... )
   drivers/gpu/drm/i915/i915_memcpy.c: note: in included file:
   drivers/gpu/drm/i915/i915_memcpy.h:17:6: sparse: note: previously declared as:
>> drivers/gpu/drm/i915/i915_memcpy.h:17:6: sparse:    void extern [addressable] [toplevel] i915_io_memcpy_from_wc( ... )
--
>> drivers/gpu/drm/i915/gem/i915_gem_object.c:449:37: sparse: sparse: incorrect type in argument 2 (different address spaces) @@     expected void const *src @@     got void [noderef] __iomem *[assigned] src_ptr @@
   drivers/gpu/drm/i915/gem/i915_gem_object.c:449:37: sparse:     expected void const *src
   drivers/gpu/drm/i915/gem/i915_gem_object.c:449:37: sparse:     got void [noderef] __iomem *[assigned] src_ptr

vim +189 drivers/gpu/drm/i915/i915_memcpy.c

   175	
   176	/**
   177	 * i915_io_memcpy_from_wc: perform an accelerated *aligned* read from WC
   178	 * @dst: destination pointer
   179	 * @src: source pointer
   180	 * @len: how many bytes to copy
   181	 *
   182	 * To be used when the when copying from io memory.
   183	 *
   184	 * memcpy_fromio() is used as fallback otherewise no difference to
   185	 * i915_memcpy_from_wc()
   186	 */
 > 187	void i915_io_memcpy_from_wc(void *dst, const void __iomem *src, unsigned long len)
   188	{
 > 189		if (i915_can_memcpy_from_wc(dst, src, len)) {
   190			if (likely(len))
 > 191				__memcpy_ntdqa(dst, src, len >> 4);
   192			return;
   193		}
   194	
   195		/* Fallback */
   196		memcpy_fromio(dst, src, len);
   197	}
   198	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index e03e362d320b..b139a88fce70 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -452,8 +452,7 @@  i915_gem_object_read_from_page_iomap(struct drm_i915_gem_object *obj, u64 offset
 				    PAGE_SIZE);
 
 	src_ptr = src_map + offset_in_page(offset);
-	if (!i915_memcpy_from_wc(dst, (void __force *)src_ptr, size))
-		memcpy_fromio(dst, src_ptr, size);
+	i915_io_memcpy_from_wc(dst, src_ptr, size);
 
 	io_mapping_unmap(src_map);
 }
diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c
index 37c38bdd5f47..64b8521a8b28 100644
--- a/drivers/gpu/drm/i915/gt/selftest_reset.c
+++ b/drivers/gpu/drm/i915/gt/selftest_reset.c
@@ -99,8 +99,10 @@  __igt_reset_stolen(struct intel_gt *gt,
 			memset_io(s, STACK_MAGIC, PAGE_SIZE);
 
 		in = (void __force *)s;
-		if (i915_memcpy_from_wc(tmp, in, PAGE_SIZE))
+		if (i915_can_memcpy_from_wc(tmp, in, PAGE_SIZE)) {
+			i915_io_memcpy_from_wc(tmp, in, PAGE_SIZE);
 			in = tmp;
+		}
 		crc[page] = crc32_le(0, in, PAGE_SIZE);
 
 		io_mapping_unmap(s);
@@ -135,8 +137,10 @@  __igt_reset_stolen(struct intel_gt *gt,
 				      PAGE_SIZE);
 
 		in = (void __force *)s;
-		if (i915_memcpy_from_wc(tmp, in, PAGE_SIZE))
+		if (i915_can_memcpy_from_wc(tmp, in, PAGE_SIZE)) {
+			i915_io_memcpy_from_wc(tmp, in, PAGE_SIZE);
 			in = tmp;
+		}
 		x = crc32_le(0, in, PAGE_SIZE);
 
 		if (x != crc[page] &&
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index aee42eae4729..90db5de86c25 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -296,8 +296,10 @@  static int compress_page(struct i915_vma_compress *c,
 	struct z_stream_s *zstream = &c->zstream;
 
 	zstream->next_in = src;
-	if (wc && c->tmp && i915_memcpy_from_wc(c->tmp, src, PAGE_SIZE))
+	if (wc && c->tmp && i915_can_memcpy_from_wc(c->tmp, src, PAGE_SIZE)) {
+		i915_io_memcpy_from_wc(c->tmp, src, PAGE_SIZE);
 		zstream->next_in = c->tmp;
+	}
 	zstream->avail_in = PAGE_SIZE;
 
 	do {
@@ -396,8 +398,11 @@  static int compress_page(struct i915_vma_compress *c,
 	if (!ptr)
 		return -ENOMEM;
 
-	if (!(wc && i915_memcpy_from_wc(ptr, src, PAGE_SIZE)))
+	if (wc)
+		i915_io_memcpy_from_wc(ptr, src, PAGE_SIZE);
+	else
 		memcpy(ptr, src, PAGE_SIZE);
+
 	list_add_tail(&virt_to_page(ptr)->lru, &dst->page_list);
 	cond_resched();
 
diff --git a/drivers/gpu/drm/i915/i915_memcpy.c b/drivers/gpu/drm/i915/i915_memcpy.c
index 1b021a4902de..b1f8abf35452 100644
--- a/drivers/gpu/drm/i915/i915_memcpy.c
+++ b/drivers/gpu/drm/i915/i915_memcpy.c
@@ -24,15 +24,10 @@ 
 
 #include <linux/kernel.h>
 #include <asm/fpu/api.h>
+#include <linux/io.h>
 
 #include "i915_memcpy.h"
 
-#if IS_ENABLED(CONFIG_DRM_I915_DEBUG)
-#define CI_BUG_ON(expr) BUG_ON(expr)
-#else
-#define CI_BUG_ON(expr) BUILD_BUG_ON_INVALID(expr)
-#endif
-
 static DEFINE_STATIC_KEY_FALSE(has_movntdqa);
 
 static void __memcpy_ntdqa(void *dst, const void *src, unsigned long len)
@@ -93,6 +88,26 @@  static void __memcpy_ntdqu(void *dst, const void *src, unsigned long len)
 	kernel_fpu_end();
 }
 
+/* The movntdqa instructions used for memcpy-from-wc require 16-byte alignment,
+ * as well as SSE4.1 support. To check beforehand, pass in the parameters to
+ * i915_can_memcpy_from_wc() - since we only care about the low 4 bits,
+ * you only need to pass in the minor offsets, page-aligned pointers are
+ * always valid.
+ *
+ * For just checking for SSE4.1, in the foreknowledge that the future use
+ * will be correctly aligned, just use i915_has_memcpy_from_wc().
+ */
+bool i915_can_memcpy_from_wc(void *dst, const void *src, unsigned long len)
+{
+	if (unlikely(((unsigned long)dst | (unsigned long)src | len) & 15))
+		return false;
+
+	if (static_branch_likely(&has_movntdqa))
+		return true;
+
+	return false;
+}
+
 /**
  * i915_memcpy_from_wc: perform an accelerated *aligned* read from WC
  * @dst: destination pointer
@@ -104,24 +119,18 @@  static void __memcpy_ntdqu(void *dst, const void *src, unsigned long len)
  * (@src, @dst) must be aligned to 16 bytes and @len must be a multiple
  * of 16.
  *
- * To test whether accelerated reads from WC are supported, use
- * i915_memcpy_from_wc(NULL, NULL, 0);
- *
- * Returns true if the copy was successful, false if the preconditions
- * are not met.
+ * If the acccelerated read from WC is not possible fallback to memcpy
  */
-bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len)
+void i915_memcpy_from_wc(void *dst, const void *src, unsigned long len)
 {
-	if (unlikely(((unsigned long)dst | (unsigned long)src | len) & 15))
-		return false;
-
-	if (static_branch_likely(&has_movntdqa)) {
+	if (i915_can_memcpy_from_wc(dst, src, len)) {
 		if (likely(len))
 			__memcpy_ntdqa(dst, src, len >> 4);
-		return true;
+		return;
 	}
 
-	return false;
+	/* Fallback */
+	memcpy(dst, src, len);
 }
 
 /**
@@ -134,12 +143,15 @@  bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len)
  * @src to @dst using * non-temporal instructions where available, but
  * accepts that its arguments may not be aligned, but are valid for the
  * potential 16-byte read past the end.
+ *
+ * Fallback to memcpy if accelerated read is not supported
  */
 void i915_unaligned_memcpy_from_wc(void *dst, const void *src, unsigned long len)
 {
 	unsigned long addr;
 
-	CI_BUG_ON(!i915_has_memcpy_from_wc());
+	if (!i915_has_memcpy_from_wc())
+		goto fallback;
 
 	addr = (unsigned long)src;
 	if (!IS_ALIGNED(addr, 16)) {
@@ -154,6 +166,34 @@  void i915_unaligned_memcpy_from_wc(void *dst, const void *src, unsigned long len
 
 	if (likely(len))
 		__memcpy_ntdqu(dst, src, DIV_ROUND_UP(len, 16));
+
+	return;
+
+fallback:
+	memcpy(dst, src, len);
+}
+
+/**
+ * i915_io_memcpy_from_wc: perform an accelerated *aligned* read from WC
+ * @dst: destination pointer
+ * @src: source pointer
+ * @len: how many bytes to copy
+ *
+ * To be used when the when copying from io memory.
+ *
+ * memcpy_fromio() is used as fallback otherewise no difference to
+ * i915_memcpy_from_wc()
+ */
+void i915_io_memcpy_from_wc(void *dst, const void __iomem *src, unsigned long len)
+{
+	if (i915_can_memcpy_from_wc(dst, src, len)) {
+		if (likely(len))
+			__memcpy_ntdqa(dst, src, len >> 4);
+		return;
+	}
+
+	/* Fallback */
+	memcpy_fromio(dst, src, len);
 }
 
 void i915_memcpy_init_early(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_memcpy.h b/drivers/gpu/drm/i915/i915_memcpy.h
index 3df063a3293b..3ae71ba9e3e5 100644
--- a/drivers/gpu/drm/i915/i915_memcpy.h
+++ b/drivers/gpu/drm/i915/i915_memcpy.h
@@ -12,23 +12,13 @@  struct drm_i915_private;
 
 void i915_memcpy_init_early(struct drm_i915_private *i915);
 
-bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len);
+void i915_memcpy_from_wc(void *dst, const void *src, unsigned long len);
 void i915_unaligned_memcpy_from_wc(void *dst, const void *src, unsigned long len);
+void i915_io_memcpy_from_wc(void *dst, const void *src, unsigned long len);
 
-/* The movntdqa instructions used for memcpy-from-wc require 16-byte alignment,
- * as well as SSE4.1 support. i915_memcpy_from_wc() will report if it cannot
- * perform the operation. To check beforehand, pass in the parameters to
- * to i915_can_memcpy_from_wc() - since we only care about the low 4 bits,
- * you only need to pass in the minor offsets, page-aligned pointers are
- * always valid.
- *
- * For just checking for SSE4.1, in the foreknowledge that the future use
- * will be correctly aligned, just use i915_has_memcpy_from_wc().
- */
-#define i915_can_memcpy_from_wc(dst, src, len) \
-	i915_memcpy_from_wc((void *)((unsigned long)(dst) | (unsigned long)(src) | (len)), NULL, 0)
+bool i915_can_memcpy_from_wc(void *dst, const void *src, unsigned long len);
 
 #define i915_has_memcpy_from_wc() \
-	i915_memcpy_from_wc(NULL, NULL, 0)
+	i915_can_memcpy_from_wc(NULL, NULL, 0)
 
 #endif /* __I915_MEMCPY_H__ */