diff mbox series

Fixed: Misaligned memory access. Fixed pointer comparison.

Message ID 20220120233427.262098-1-michael@michaelkloos.com (mailing list archive)
State New, archived
Headers show
Series Fixed: Misaligned memory access. Fixed pointer comparison. | expand

Commit Message

Michael T. Kloos Jan. 20, 2022, 11:34 p.m. UTC
Rewrote the riscv memmove() assembly implementation.  The
previous implementation did not check memory alignment and it
compared 2 pointers with a signed comparison.  The misaligned
memory access would cause the kernel to crash on systems that
did not emulate it firmware and did support it in hardware.
Firmware emulation is slow may not exist.  Additionally, hardware
support may not exist and would likely still run slower than
aligned	accesses even if it did.  The RISC-V spec does not
guarantee that support for misaligned memory accesses will exist.
It should not be depended on.

This patch now checks for the maximum granularity of co-alignment
between the pointers and copies them with that, using single-byte
copy for any unaligned data at their terminations.  It also now uses
unsigned comparison for the pointers.

Added half-word and, if built for 64-bit, double-word copy.

Migrated to the	newer assembler annotations from the now deprecated
ones.

Signed-off-by: Michael T. Kloos <michael@michaelkloos.com>
---
 arch/riscv/lib/memmove.S | 264 +++++++++++++++++++++++++++++++--------
 1 file changed, 210 insertions(+), 54 deletions(-)

Comments

kernel test robot Jan. 22, 2022, 8:15 p.m. UTC | #1
Hi "Michael,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on v5.16 next-20220121]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Michael-T-Kloos/Fixed-Misaligned-memory-access-Fixed-pointer-comparison/20220121-074148
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 2c271fe77d52a0555161926c232cd5bc07178b39
config: riscv-randconfig-r021-20220120 (https://download.01.org/0day-ci/archive/20220123/202201230123.GN7Kvrlq-lkp@intel.com/config)
compiler: clang version 14.0.0 (https://github.com/llvm/llvm-project 7b3d30728816403d1fd73cc5082e9fb761262bce)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install riscv cross compiling tool for clang build
        # apt-get install binutils-riscv64-linux-gnu
        # https://github.com/0day-ci/linux/commit/c1930afcc76babc4e2313f67d0fe103a26f07712
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Michael-T-Kloos/Fixed-Misaligned-memory-access-Fixed-pointer-comparison/20220121-074148
        git checkout c1930afcc76babc4e2313f67d0fe103a26f07712
        # save the config file to linux build tree
        mkdir build_dir
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=riscv SHELL=/bin/bash

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

>> arch/riscv/lib/memmove.S:113:16: error: operand must be a symbol with %lo/%pcrel_lo/%tprel_lo modifier or an integer in the range [-2048, 2047]
    1: jalr zero, (t0)
                  ^


vim +113 arch/riscv/lib/memmove.S

     8	
     9	SYM_FUNC_START(__memmove)
    10	SYM_FUNC_START_ALIAS(memmove)
    11		/*
    12		 * Returns
    13		 *   a0 - dest
    14		 *
    15		 * Parameters
    16		 *   a0 - Inclusive first byte of dest
    17		 *   a1 - Inclusive first byte of src
    18		 *   a2 - Length of copy
    19		 *
    20		 * Because the return matches the parameter register a0,
    21		 * we will not clobber or modify that register.
    22		 */
    23	
    24		/* Return if nothing to do */
    25		beq a0, a1, exit_memmove
    26		beqz a2, exit_memmove
    27	
    28		/*
    29		 * Register Uses
    30		 *   a3 - Inclusive first multibyte of src
    31		 *   a4 - Non-inclusive last multibyte of src
    32		 *   a5 - Non-inclusive last byte of src
    33		 *
    34		 * During the copy
    35		 *      Forward Copy: a1 - Index counter of src
    36		 *      Reverse Copy: a5 - Index counter of src
    37		 *   Both Copy Modes: t2 - Index counter of dest
    38		 *   Both Copy Modes: t1 - Temporary for load-store
    39		 *   Both Copy Modes: t0 - Link
    40		 */
    41	
    42		/*
    43		 * Solve for last byte now.  We will solve the rest when
    44		 * they are needed for the copy because either byte copy
    45		 * does not require any of the others (Wasted effort if
    46		 * byte copy gets used) or we do not yet have enough
    47		 * information to solve them.
    48		 */
    49		add  a5, a1, a2
    50	
    51		/*
    52		 * Byte copy if copying less than SZREG bytes.
    53		 * This can cause problems with the bulk copy
    54		 * implementation below and is small enough not
    55		 * to bother.
    56		 */
    57		andi t0, a2, -SZREG
    58		beqz t0, byte_copy
    59	
    60		/* Determine the maximum granularity of co-alignment. */
    61		xor  t0, a0, a1
    62	#if   SZREG >= 8
    63		andi t1, t0, 0x7
    64		beqz t1, doubleword_copy
    65	#endif
    66		andi t1, t0, 0x3
    67		beqz t1, word_copy
    68		andi t1, t0, 0x1
    69		beqz t1, halfword_copy
    70		/* Fall through to byte copy if nothing larger is found. */
    71	
    72	byte_copy:
    73		bltu a1, a0, byte_copy_reverse
    74	
    75	byte_copy_forward:
    76		add  t2, a0, zero
    77	byte_copy_fw_callin:
    78		beq  a1, a5, exit_memmove
    79		lb   t1, (a1)
    80		sb   t1, (t2)
    81		addi a1, a1, 1
    82		addi t2, t2, 1
    83		j byte_copy_fw_callin
    84	
    85	byte_copy_reverse:
    86		add  t2, a0, a2
    87	byte_copy_rv_callin:
    88		beq  a1, a5, exit_memmove
    89		addi a5, a5, -1
    90		addi t2, t2, -1
    91		lb   t1, (a5)
    92		sb   t1, (t2)
    93		j byte_copy_rv_callin
    94	
    95	exit_memmove:
    96		ret
    97	
    98	copy_bytes_until_aligned_fw:
    99		beq  a1, a3, 1f /* Reuse the return from the other copy loop */
   100		lb   t1, (a1)
   101		sb   t1, (t2)
   102		addi a1, a1, 1
   103		addi t2, t2, 1
   104		j copy_bytes_until_aligned_fw
   105	
   106	copy_bytes_until_aligned_rv:
   107		beq  a4, a5, 1f
   108		addi a5, a5, -1
   109		addi t2, t2, -1
   110		lb   t1, (a5)
   111		sb   t1, (t2)
   112		j copy_bytes_until_aligned_rv
 > 113		1: jalr zero, (t0) /* Return */
   114	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
diff mbox series

Patch

diff --git a/arch/riscv/lib/memmove.S b/arch/riscv/lib/memmove.S
index 07d1d2152ba5..df7cd7a559ae 100644
--- a/arch/riscv/lib/memmove.S
+++ b/arch/riscv/lib/memmove.S
@@ -1,64 +1,220 @@ 
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2022 Michael T. Kloos <michael@michaelkloos.com>
+ */
 
 #include <linux/linkage.h>
 #include <asm/asm.h>
 
-ENTRY(__memmove)
-WEAK(memmove)
-        move    t0, a0
-        move    t1, a1
+SYM_FUNC_START(__memmove)
+SYM_FUNC_START_ALIAS(memmove)
+	/*
+	 * Returns
+	 *   a0 - dest
+	 *
+	 * Parameters
+	 *   a0 - Inclusive first byte of dest
+	 *   a1 - Inclusive first byte of src
+	 *   a2 - Length of copy
+	 *
+	 * Because the return matches the parameter register a0,
+	 * we will not clobber or modify that register.
+	 */
 
-        beq     a0, a1, exit_memcpy
-        beqz    a2, exit_memcpy
-        srli    t2, a2, 0x2
+	/* Return if nothing to do */
+	beq a0, a1, exit_memmove
+	beqz a2, exit_memmove
 
-        slt     t3, a0, a1
-        beqz    t3, do_reverse
+	/*
+	 * Register Uses
+	 *   a3 - Inclusive first multibyte of src
+	 *   a4 - Non-inclusive last multibyte of src
+	 *   a5 - Non-inclusive last byte of src
+	 *
+	 * During the copy
+	 *      Forward Copy: a1 - Index counter of src
+	 *      Reverse Copy: a5 - Index counter of src
+	 *   Both Copy Modes: t2 - Index counter of dest
+	 *   Both Copy Modes: t1 - Temporary for load-store
+	 *   Both Copy Modes: t0 - Link
+	 */
 
-        andi    a2, a2, 0x3
-        li      t4, 1
-        beqz    t2, byte_copy
+	/*
+	 * Solve for last byte now.  We will solve the rest when
+	 * they are needed for the copy because either byte copy
+	 * does not require any of the others (Wasted effort if
+	 * byte copy gets used) or we do not yet have enough
+	 * information to solve them.
+	 */
+	add  a5, a1, a2
 
-word_copy:
-        lw      t3, 0(a1)
-        addi    t2, t2, -1
-        addi    a1, a1, 4
-        sw      t3, 0(a0)
-        addi    a0, a0, 4
-        bnez    t2, word_copy
-        beqz    a2, exit_memcpy
-        j       byte_copy
-
-do_reverse:
-        add     a0, a0, a2
-        add     a1, a1, a2
-        andi    a2, a2, 0x3
-        li      t4, -1
-        beqz    t2, reverse_byte_copy
-
-reverse_word_copy:
-        addi    a1, a1, -4
-        addi    t2, t2, -1
-        lw      t3, 0(a1)
-        addi    a0, a0, -4
-        sw      t3, 0(a0)
-        bnez    t2, reverse_word_copy
-        beqz    a2, exit_memcpy
-
-reverse_byte_copy:
-        addi    a0, a0, -1
-        addi    a1, a1, -1
+	/*
+	 * Byte copy if copying less than SZREG bytes.
+	 * This can cause problems with the bulk copy
+	 * implementation below and is small enough not
+	 * to bother.
+	 */
+	andi t0, a2, -SZREG
+	beqz t0, byte_copy
+
+	/* Determine the maximum granularity of co-alignment. */
+	xor  t0, a0, a1
+#if   SZREG >= 8
+	andi t1, t0, 0x7
+	beqz t1, doubleword_copy
+#endif
+	andi t1, t0, 0x3
+	beqz t1, word_copy
+	andi t1, t0, 0x1
+	beqz t1, halfword_copy
+	/* Fall through to byte copy if nothing larger is found. */
 
 byte_copy:
-        lb      t3, 0(a1)
-        addi    a2, a2, -1
-        sb      t3, 0(a0)
-        add     a1, a1, t4
-        add     a0, a0, t4
-        bnez    a2, byte_copy
-
-exit_memcpy:
-        move a0, t0
-        move a1, t1
-        ret
-END(__memmove)
+	bltu a1, a0, byte_copy_reverse
+
+byte_copy_forward:
+	add  t2, a0, zero
+byte_copy_fw_callin:
+	beq  a1, a5, exit_memmove
+	lb   t1, (a1)
+	sb   t1, (t2)
+	addi a1, a1, 1
+	addi t2, t2, 1
+	j byte_copy_fw_callin
+
+byte_copy_reverse:
+	add  t2, a0, a2
+byte_copy_rv_callin:
+	beq  a1, a5, exit_memmove
+	addi a5, a5, -1
+	addi t2, t2, -1
+	lb   t1, (a5)
+	sb   t1, (t2)
+	j byte_copy_rv_callin
+
+exit_memmove:
+	ret
+
+copy_bytes_until_aligned_fw:
+	beq  a1, a3, 1f /* Reuse the return from the other copy loop */
+	lb   t1, (a1)
+	sb   t1, (t2)
+	addi a1, a1, 1
+	addi t2, t2, 1
+	j copy_bytes_until_aligned_fw
+
+copy_bytes_until_aligned_rv:
+	beq  a4, a5, 1f
+	addi a5, a5, -1
+	addi t2, t2, -1
+	lb   t1, (a5)
+	sb   t1, (t2)
+	j copy_bytes_until_aligned_rv
+	1: jalr zero, (t0) /* Return */
+
+#if   SZREG >= 8
+doubleword_copy:
+	andi a3, a1, -8
+	andi a4, a5, -8
+	beq  a3, a1, 1f
+	addi a3, a3, 8
+	1:
+	bltu a1, a0, doubleword_copy_reverse
+
+doubleword_copy_forward:
+	add  t2, a0, zero
+
+	jal t0, copy_bytes_until_aligned_fw
+
+	1:
+	beq  a1, a4, byte_copy_fw_callin
+	ld   t1, (a1)
+	sd   t1, (t2)
+	addi a1, a1, 8
+	addi t2, t2, 8
+	j 1b
+
+doubleword_copy_reverse:
+	add  t2, a0, a2
+
+	jal t0, copy_bytes_until_aligned_rv
+
+	1:
+	beq  a3, a5, byte_copy_rv_callin
+	addi a5, a5, -8
+	addi t2, t2, -8
+	ld   t1, (a5)
+	sd   t1, (t2)
+	j 1b
+#endif
+
+word_copy:
+	andi a3, a1, -4
+	andi a4, a5, -4
+	beq  a3, a1, 1f
+	addi a3, a3, 4
+	1:
+	bltu a1, a0, word_copy_reverse
+
+word_copy_forward:
+	add  t2, a0, zero
+
+	jal t0, copy_bytes_until_aligned_fw
+
+	1:
+	beq  a1, a4, byte_copy_fw_callin
+	lw   t1, (a1)
+	sw   t1, (t2)
+	addi a1, a1, 4
+	addi t2, t2, 4
+	j 1b
+
+word_copy_reverse:
+	add  t2, a0, a2
+
+	jal t0, copy_bytes_until_aligned_rv
+
+	1:
+	beq  a3, a5, byte_copy_rv_callin
+	addi a5, a5, -4
+	addi t2, t2, -4
+	lw   t1, (a5)
+	sw   t1, (t2)
+	j 1b
+
+halfword_copy:
+	andi a3, a1, -2
+	andi a4, a5, -2
+	beq  a3, a1, 1f
+	addi a3, a3, 2
+	1:
+	bltu a1, a0, halfword_reverse
+
+halfword_forward:
+	add  t2, a0, zero
+
+	jal t0, copy_bytes_until_aligned_fw
+
+	1:
+	beq  a1, a4, byte_copy_fw_callin
+	lh   t1, (a1)
+	sh   t1, (t2)
+	addi a1, a1, 2
+	addi t2, t2, 2
+	j 1b
+
+halfword_reverse:
+	add  t2, a0, a2
+
+	jal t0, copy_bytes_until_aligned_rv
+
+	1:
+	beq  a3, a5, byte_copy_rv_callin
+	addi a5, a5, -2
+	addi t2, t2, -2
+	lh   t1, (a5)
+	sh   t1, (t2)
+	j 1b
+
+SYM_FUNC_END_ALIAS(memmove)
+SYM_FUNC_END(__memmove)