diff mbox series

[v2,1/1] riscv: __asm_copy_to-from_user: Improve using word copy, if size is < 9*SZREG

Message ID 747e611a-2225-0685-b1e6-8b45ef45042d@gmail.com (mailing list archive)
State New, archived
Headers show
Series __asm_copy_to-from_user: Reduce more byte_copy | expand

Commit Message

Akira Tsukamoto Nov. 11, 2021, 8:13 a.m. UTC
Reduce the number of slow byte_copy being used.

Currently byte_copy is used for all the cases when the size is smaller than
9*SZREG. When the size is in between 2*SZREG to 9*SZREG, use faster
unrolled word_copy.

Signed-off-by: Akira Tsukamoto <akira.tsukamoto@gmail.com>
---
 arch/riscv/lib/uaccess.S | 46 ++++++++++++++++++++++++++++++++++++----
 1 file changed, 42 insertions(+), 4 deletions(-)

Comments

kernel test robot Nov. 11, 2021, 11:04 p.m. UTC | #1
Hi Akira,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on v5.15 next-20211111]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Akira-Tsukamoto/__asm_copy_to-from_user-Reduce-more-byte_copy/20211111-161445
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git debe436e77c72fcee804fb867f275e6d31aa999c
config: riscv-buildonly-randconfig-r002-20211111 (attached as .config)
compiler: clang version 14.0.0 (https://github.com/llvm/llvm-project 63ef0e17e28827eae53133b3467bdac7d9729318)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install riscv cross compiling tool for clang build
        # apt-get install binutils-riscv64-linux-gnu
        # https://github.com/0day-ci/linux/commit/cf2e8e9c4e9dc65552ca5ac0c85c198785f5d91c
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Akira-Tsukamoto/__asm_copy_to-from_user-Reduce-more-byte_copy/20211111-161445
        git checkout cf2e8e9c4e9dc65552ca5ac0c85c198785f5d91c
        # save the attached .config to linux build tree
        mkdir build_dir
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=riscv SHELL=/bin/bash

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

>> <instantiation>:5:14: error: unknown token in expression
    .word 100b, 
                ^
   arch/riscv/lib/uaccess.S:92:2: note: while in macro instantiation
    fixup lw a5, 0(a1)
    ^
>> <instantiation>:5:14: error: unknown token in expression
    .word 100b, 
                ^
   arch/riscv/lib/uaccess.S:94:2: note: while in macro instantiation
    fixup sw a5, 0(a0)
    ^

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
kernel test robot Nov. 12, 2021, 4:23 a.m. UTC | #2
Hi Akira,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on linus/master]
[also build test WARNING on v5.15 next-20211111]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Akira-Tsukamoto/__asm_copy_to-from_user-Reduce-more-byte_copy/20211111-161445
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git debe436e77c72fcee804fb867f275e6d31aa999c
config: riscv-allyesconfig (attached as .config)
compiler: riscv64-linux-gcc (GCC) 11.2.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/cf2e8e9c4e9dc65552ca5ac0c85c198785f5d91c
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Akira-Tsukamoto/__asm_copy_to-from_user-Reduce-more-byte_copy/20211111-161445
        git checkout cf2e8e9c4e9dc65552ca5ac0c85c198785f5d91c
        # save the attached .config to linux build tree
        mkdir build_dir
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.2.0 make.cross O=build_dir ARCH=riscv SHELL=/bin/bash

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   arch/riscv/lib/uaccess.S: Assembler messages:
>> arch/riscv/lib/uaccess.S:92: Warning: zero assumed for missing expression
   arch/riscv/lib/uaccess.S:94: Warning: zero assumed for missing expression


vim +92 arch/riscv/lib/uaccess.S

    17	
    18		/* Enable access to user memory */
    19		li t6, SR_SUM
    20		csrs CSR_STATUS, t6
    21	
    22		/* Save for return value */
    23		mv	t5, a2
    24	
    25		/*
    26		 * Register allocation for code below:
    27		 * a0 - start of uncopied dst
    28		 * a1 - start of uncopied src
    29		 * a2 - size
    30		 * t0 - end of uncopied dst
    31		 */
    32		add	t0, a0, a2
    33	
    34		/*
    35		 * Use byte copy only if too small.
    36		 * SZREG holds 4 for RV32 and 8 for RV64
    37		 * a3 - 2*SZREG is minimum size for word_copy
    38		 *      1*SZREG for aligning dst + 1*SZREG for word_copy
    39		 */
    40		li	a3, 2*SZREG
    41		bltu	a2, a3, .Lbyte_copy_tail
    42	
    43		/*
    44		 * Copy first bytes until dst is aligned to word boundary.
    45		 * a0 - start of dst
    46		 * t1 - start of aligned dst
    47		 */
    48		addi	t1, a0, SZREG-1
    49		andi	t1, t1, ~(SZREG-1)
    50		/* dst is already aligned, skip */
    51		beq	a0, t1, .Lskip_align_dst
    52	1:
    53		/* a5 - one byte for copying data */
    54		fixup lb      a5, 0(a1), 10f
    55		addi	a1, a1, 1	/* src */
    56		fixup sb      a5, 0(a0), 10f
    57		addi	a0, a0, 1	/* dst */
    58		bltu	a0, t1, 1b	/* t1 - start of aligned dst */
    59	
    60	.Lskip_align_dst:
    61		/*
    62		 * Now dst is aligned.
    63		 * Use shift-copy if src is misaligned.
    64		 * Use word-copy if both src and dst are aligned because
    65		 * can not use shift-copy which do not require shifting
    66		 */
    67		/* a1 - start of src */
    68		andi	a3, a1, SZREG-1
    69		bnez	a3, .Lshift_copy
    70	
    71	.Lcheck_size_bulk:
    72		/*
    73		 * Evaluate the size if possible to use unrolled.
    74		 * The word_copy_unlrolled requires larger than 8*SZREG
    75		 */
    76		li	a3, 8*SZREG
    77		add	a4, a0, a3
    78		bltu	a4, t0, .Lword_copy_unlrolled
    79	
    80	.Lword_copy:
    81		/*
    82		 * Both src and dst are aligned
    83		 * Not unrolled word copy with every 1*SZREG iteration
    84		 *
    85		 * a0 - start of aligned dst
    86		 * a1 - start of aligned src
    87		 * t0 - end of aligned dst
    88		 */
    89		bgeu	a0, t0, .Lbyte_copy_tail /* check if end of copy */
    90		addi	t0, t0, -(SZREG) /* not to over run */
    91	1:
  > 92		fixup REG_L   a5, 0(a1)
    93		addi	a1, a1, SZREG
    94		fixup REG_S   a5, 0(a0)
    95		addi	a0, a0, SZREG
    96		bltu	a0, t0, 1b
    97	
    98		addi	t0, t0, SZREG /* revert to original value */
    99		j	.Lbyte_copy_tail
   100	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
diff mbox series

Patch

diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S
index 63bc691cff91..50013479cb86 100644
--- a/arch/riscv/lib/uaccess.S
+++ b/arch/riscv/lib/uaccess.S
@@ -34,8 +34,10 @@  ENTRY(__asm_copy_from_user)
 	/*
 	 * Use byte copy only if too small.
 	 * SZREG holds 4 for RV32 and 8 for RV64
+	 * a3 - 2*SZREG is minimum size for word_copy
+	 *      1*SZREG for aligning dst + 1*SZREG for word_copy
 	 */
-	li	a3, 9*SZREG /* size must be larger than size in word_copy */
+	li	a3, 2*SZREG
 	bltu	a2, a3, .Lbyte_copy_tail
 
 	/*
@@ -66,9 +68,40 @@  ENTRY(__asm_copy_from_user)
 	andi	a3, a1, SZREG-1
 	bnez	a3, .Lshift_copy
 
+.Lcheck_size_bulk:
+	/*
+	 * Evaluate the size if possible to use unrolled.
+	 * The word_copy_unlrolled requires larger than 8*SZREG
+	 */
+	li	a3, 8*SZREG
+	add	a4, a0, a3
+	bltu	a4, t0, .Lword_copy_unlrolled
+
 .Lword_copy:
-        /*
-	 * Both src and dst are aligned, unrolled word copy
+	/*
+	 * Both src and dst are aligned
+	 * Not unrolled word copy with every 1*SZREG iteration
+	 *
+	 * a0 - start of aligned dst
+	 * a1 - start of aligned src
+	 * t0 - end of aligned dst
+	 */
+	bgeu	a0, t0, .Lbyte_copy_tail /* check if end of copy */
+	addi	t0, t0, -(SZREG) /* not to over run */
+1:
+	fixup REG_L   a5, 0(a1)
+	addi	a1, a1, SZREG
+	fixup REG_S   a5, 0(a0)
+	addi	a0, a0, SZREG
+	bltu	a0, t0, 1b
+
+	addi	t0, t0, SZREG /* revert to original value */
+	j	.Lbyte_copy_tail
+
+.Lword_copy_unlrolled:
+	/*
+	 * Both src and dst are aligned
+	 * Unrolled word copy with every 8*SZREG iteration
 	 *
 	 * a0 - start of aligned dst
 	 * a1 - start of aligned src
@@ -97,7 +130,12 @@  ENTRY(__asm_copy_from_user)
 	bltu	a0, t0, 2b
 
 	addi	t0, t0, 8*SZREG /* revert to original value */
-	j	.Lbyte_copy_tail
+
+	/*
+	 * Remaining might large enough for word_copy to reduce slow byte
+	 * copy
+	 */
+	j	.Lcheck_size_bulk
 
 .Lshift_copy: