diff mbox series

MIPS: Introduce aligned IO memory operations

Message ID 20200114122343.163685-1-jiaxun.yang@flygoat.com (mailing list archive)
State Deferred
Headers show
Series MIPS: Introduce aligned IO memory operations | expand

Commit Message

Jiaxun Yang Jan. 14, 2020, 12:23 p.m. UTC
Some platforms, such as Loongson64 or QEMU/KVM, don't support unaligned
instructions like lwl or lwr in IO memory access. However, our current
IO memcpy/memset is wired to the generic implementation, which leads
to a fatal result.

Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
---
 arch/mips/Kconfig          |  4 ++
 arch/mips/include/asm/io.h | 10 ++++
 arch/mips/kernel/Makefile  |  2 +-
 arch/mips/kernel/io.c      | 98 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 113 insertions(+), 1 deletion(-)
 create mode 100644 arch/mips/kernel/io.c

Comments

Philippe Mathieu-Daudé Jan. 18, 2020, 2:41 p.m. UTC | #1
Hi Jiaxun,

On Tue, Jan 14, 2020 at 1:24 PM Jiaxun Yang <jiaxun.yang@flygoat.com> wrote:
>
> Some platforms, such as Loongson64 or QEMU/KVM, don't support unaligned
> instructions like lwl or lwr in IO memory access. However, our current
> IO memcpy/memset is wired to the generic implementation, which leads
> to a fatal result.

Do you have a handy reproducer to try with QEMU/KVM?

> Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
> ---
>  arch/mips/Kconfig          |  4 ++
>  arch/mips/include/asm/io.h | 10 ++++
>  arch/mips/kernel/Makefile  |  2 +-
>  arch/mips/kernel/io.c      | 98 ++++++++++++++++++++++++++++++++++++++
>  4 files changed, 113 insertions(+), 1 deletion(-)
>  create mode 100644 arch/mips/kernel/io.c
>
> diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
> index 8b0cd692a43f..15a331aa23a2 100644
> --- a/arch/mips/Kconfig
> +++ b/arch/mips/Kconfig
> @@ -1450,6 +1450,7 @@ config CPU_LOONGSON64
>         select CPU_SUPPORTS_HIGHMEM
>         select CPU_SUPPORTS_HUGEPAGES
>         select CPU_SUPPORTS_MSA
> +       select CPU_NEEDS_ALIGNED_IO
>         select CPU_HAS_LOAD_STORE_LR
>         select CPU_DIEI_BROKEN if !LOONGSON3_ENHANCEMENT
>         select CPU_MIPSR2_IRQ_VI
> @@ -2598,6 +2599,9 @@ config CPU_HAS_LOAD_STORE_LR
>           LWL, LWR, SWL, SWR (Load/store word left/right).
>           LDL, LDR, SDL, SDR (Load/store doubleword left/right, for 64bit systems).
>
> +config CPU_NEEDS_ALIGNED_IO
> +       bool
> +
>  #
>  # Vectored interrupt mode is an R2 feature
>  #
> diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h
> index 3f6ce74335b4..3b0eb4941f23 100644
> --- a/arch/mips/include/asm/io.h
> +++ b/arch/mips/include/asm/io.h
> @@ -577,6 +577,15 @@ BUILDSTRING(l, u32)
>  BUILDSTRING(q, u64)
>  #endif
>
> +#if defined(CONFIG_CPU_NEEDS_ALIGNED_IO)
> +extern void __memcpy_fromio(void *, const volatile void __iomem *, size_t);
> +extern void __memcpy_toio(volatile void __iomem *, const void *, size_t);
> +extern void __memset_io(volatile void __iomem *, int, size_t);
> +
> +#define memset_io(c, v, l)     __memset_io((c), (v), (l))
> +#define memcpy_fromio(a, c, l) __memcpy_fromio((a), (c), (l))
> +#define memcpy_toio(c, a, l)   __memcpy_toio((c), (a), (l))
> +#else
>  static inline void memset_io(volatile void __iomem *addr, unsigned char val, int count)
>  {
>         memset((void __force *) addr, val, count);
> @@ -589,6 +598,7 @@ static inline void memcpy_toio(volatile void __iomem *dst, const void *src, int
>  {
>         memcpy((void __force *) dst, src, count);
>  }
> +#endif
>
>  /*
>   * The caches on some architectures aren't dma-coherent and have need to
> diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
> index d6e97df51cfb..b07b97b9385e 100644
> --- a/arch/mips/kernel/Makefile
> +++ b/arch/mips/kernel/Makefile
> @@ -8,7 +8,7 @@ extra-y         := head.o vmlinux.lds
>  obj-y          += cmpxchg.o cpu-probe.o branch.o elf.o entry.o genex.o idle.o irq.o \
>                    process.o prom.o ptrace.o reset.o setup.o signal.o \
>                    syscall.o time.o topology.o traps.o unaligned.o watch.o \
> -                  vdso.o cacheinfo.o
> +                  vdso.o cacheinfo.o io.o
>
>  ifdef CONFIG_FUNCTION_TRACER
>  CFLAGS_REMOVE_ftrace.o = -pg
> diff --git a/arch/mips/kernel/io.c b/arch/mips/kernel/io.c
> new file mode 100644
> index 000000000000..ca105aa76d4d
> --- /dev/null
> +++ b/arch/mips/kernel/io.c
> @@ -0,0 +1,98 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +
> +#include <linux/export.h>
> +#include <linux/types.h>
> +#include <linux/io.h>
> +
> +#if defined(CONFIG_CPU_NEEDS_ALIGNED_IO)
> +
> +#if defined(CONFIG_64BIT)
> +#define IO_LONG_READ   __raw_readq
> +#define IO_LONG_WRITE  __raw_writeq
> +#define IO_LONG_SIZE   8
> +#else
> +#define IO_LONG_READ   __raw_readl
> +#define IO_LONG_WRITE  __raw_writel
> +#define IO_LONG_SIZE   4
> +#endif
> +
> +void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t count)
> +{
> +       while (count && !IS_ALIGNED((unsigned long)from, IO_LONG_SIZE) &&
> +               !IS_ALIGNED((unsigned long)to, IO_LONG_SIZE)) {
> +               *(u8 *)to = __raw_readb(from);
> +               from++;
> +               to++;
> +               count--;
> +       }
> +
> +       while (count >= IO_LONG_SIZE) {
> +               *(unsigned long *)to = IO_LONG_READ(from);
> +               from += IO_LONG_SIZE;
> +               to += IO_LONG_SIZE;
> +               count -= IO_LONG_SIZE;
> +       }
> +
> +       while (count) {
> +               *(u8 *)to = __raw_readb(from);
> +               from++;
> +               to++;
> +               count--;
> +       }
> +}
> +EXPORT_SYMBOL(__memcpy_fromio);
> +
> +void __memcpy_toio(volatile void __iomem *to, const void *from, size_t count)
> +{
> +       while (count && !IS_ALIGNED((unsigned long)from, IO_LONG_SIZE) &&
> +               !IS_ALIGNED((unsigned long)to, IO_LONG_SIZE)) {
> +               __raw_writeb(*(u8 *)from, to);
> +               from++;
> +               to++;
> +               count--;
> +       }
> +
> +       while (count >= IO_LONG_SIZE) {
> +               IO_LONG_WRITE(*(unsigned long *)from, to);
> +               from += IO_LONG_SIZE;
> +               to += IO_LONG_SIZE;
> +               count -= IO_LONG_SIZE;
> +       }
> +
> +       while (count) {
> +               __raw_writeb(*(u8 *)from, to);
> +               from++;
> +               to++;
> +               count--;
> +       }
> +}
> +EXPORT_SYMBOL(__memcpy_toio);
> +
> +void __memset_io(volatile void __iomem *dst, int c, size_t count)
> +{
> +       unsigned long lc = (u8)c;
> +       int i;
> +
> +       for (i = 1; i < IO_LONG_SIZE; i++)
> +               lc |= (u8)c << (i * BITS_PER_BYTE);
> +
> +       while (count && !IS_ALIGNED((unsigned long)dst, IO_LONG_SIZE)) {
> +               __raw_writeb((u8)c, dst);
> +               dst++;
> +               count--;
> +       }
> +
> +       while (count >= IO_LONG_SIZE) {
> +               IO_LONG_WRITE(lc, dst);
> +               dst += IO_LONG_SIZE;
> +               count -= IO_LONG_SIZE;
> +       }
> +
> +       while (count) {
> +               __raw_writeb(c, dst);
> +               dst++;
> +               count--;
> +       }
> +}
> +EXPORT_SYMBOL(__memset_io);
> +#endif
> --
> 2.24.1
>
Jiaxun Yang Jan. 18, 2020, 3:13 p.m. UTC | #2
18.01.2020, 22:41, "Philippe Mathieu-Daudé" <f4bug@amsat.org>:
> Hi Jiaxun,
>
> On Tue, Jan 14, 2020 at 1:24 PM Jiaxun Yang <jiaxun.yang@flygoat.com> wrote:
>>  Some platforms, such as Loongson64 or QEMU/KVM, don't support unaligned
>>  instructions like lwl or lwr in IO memory access. However, our current
>>  IO memcpy/memset is wired to the generic implementation, which leads
>>  to a fatal result.
>
> Do you have a handy reproducer to try with QEMU/KVM?

It was triggered by QXL DRM driver when I was working on KVM for Loongson
with Huacai.

See arch/mips/kvm/emulate.c, we didn't have unaligned instructions trap
emulation for MMIO. You can construct a simple unaligned memcpy_fromio
case to reproduce it.

Thanks.

>
>>  Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
>>  ---
>>   arch/mips/Kconfig | 4 ++
>>   arch/mips/include/asm/io.h | 10 ++++
>>   arch/mips/kernel/Makefile | 2 +-
>>   arch/mips/kernel/io.c | 98 ++++++++++++++++++++++++++++++++++++++
>>   4 files changed, 113 insertions(+), 1 deletion(-)
>>   create mode 100644 arch/mips/kernel/io.c
>>
>>  diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
>>  index 8b0cd692a43f..15a331aa23a2 100644
>>  --- a/arch/mips/Kconfig
>>  +++ b/arch/mips/Kconfig
>>  @@ -1450,6 +1450,7 @@ config CPU_LOONGSON64
>>          select CPU_SUPPORTS_HIGHMEM
>>          select CPU_SUPPORTS_HUGEPAGES
>>          select CPU_SUPPORTS_MSA
>>  + select CPU_NEEDS_ALIGNED_IO
>>          select CPU_HAS_LOAD_STORE_LR
>>          select CPU_DIEI_BROKEN if !LOONGSON3_ENHANCEMENT
>>          select CPU_MIPSR2_IRQ_VI
>>  @@ -2598,6 +2599,9 @@ config CPU_HAS_LOAD_STORE_LR
>>            LWL, LWR, SWL, SWR (Load/store word left/right).
>>            LDL, LDR, SDL, SDR (Load/store doubleword left/right, for 64bit systems).
>>
>>  +config CPU_NEEDS_ALIGNED_IO
>>  + bool
>>  +
>>   #
>>   # Vectored interrupt mode is an R2 feature
>>   #
>>  diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h
>>  index 3f6ce74335b4..3b0eb4941f23 100644
>>  --- a/arch/mips/include/asm/io.h
>>  +++ b/arch/mips/include/asm/io.h
>>  @@ -577,6 +577,15 @@ BUILDSTRING(l, u32)
>>   BUILDSTRING(q, u64)
>>   #endif
>>
>>  +#if defined(CONFIG_CPU_NEEDS_ALIGNED_IO)
>>  +extern void __memcpy_fromio(void *, const volatile void __iomem *, size_t);
>>  +extern void __memcpy_toio(volatile void __iomem *, const void *, size_t);
>>  +extern void __memset_io(volatile void __iomem *, int, size_t);
>>  +
>>  +#define memset_io(c, v, l) __memset_io((c), (v), (l))
>>  +#define memcpy_fromio(a, c, l) __memcpy_fromio((a), (c), (l))
>>  +#define memcpy_toio(c, a, l) __memcpy_toio((c), (a), (l))
>>  +#else
>>   static inline void memset_io(volatile void __iomem *addr, unsigned char val, int count)
>>   {
>>          memset((void __force *) addr, val, count);
>>  @@ -589,6 +598,7 @@ static inline void memcpy_toio(volatile void __iomem *dst, const void *src, int
>>   {
>>          memcpy((void __force *) dst, src, count);
>>   }
>>  +#endif
>>
>>   /*
>>    * The caches on some architectures aren't dma-coherent and have need to
>>  diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
>>  index d6e97df51cfb..b07b97b9385e 100644
>>  --- a/arch/mips/kernel/Makefile
>>  +++ b/arch/mips/kernel/Makefile
>>  @@ -8,7 +8,7 @@ extra-y := head.o vmlinux.lds
>>   obj-y += cmpxchg.o cpu-probe.o branch.o elf.o entry.o genex.o idle.o irq.o \
>>                     process.o prom.o ptrace.o reset.o setup.o signal.o \
>>                     syscall.o time.o topology.o traps.o unaligned.o watch.o \
>>  - vdso.o cacheinfo.o
>>  + vdso.o cacheinfo.o io.o
>>
>>   ifdef CONFIG_FUNCTION_TRACER
>>   CFLAGS_REMOVE_ftrace.o = -pg
>>  diff --git a/arch/mips/kernel/io.c b/arch/mips/kernel/io.c
>>  new file mode 100644
>>  index 000000000000..ca105aa76d4d
>>  --- /dev/null
>>  +++ b/arch/mips/kernel/io.c
>>  @@ -0,0 +1,98 @@
>>  +// SPDX-License-Identifier: GPL-2.0-or-later
>>  +
>>  +#include <linux/export.h>
>>  +#include <linux/types.h>
>>  +#include <linux/io.h>
>>  +
>>  +#if defined(CONFIG_CPU_NEEDS_ALIGNED_IO)
>>  +
>>  +#if defined(CONFIG_64BIT)
>>  +#define IO_LONG_READ __raw_readq
>>  +#define IO_LONG_WRITE __raw_writeq
>>  +#define IO_LONG_SIZE 8
>>  +#else
>>  +#define IO_LONG_READ __raw_readl
>>  +#define IO_LONG_WRITE __raw_writel
>>  +#define IO_LONG_SIZE 4
>>  +#endif
>>  +
>>  +void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t count)
>>  +{
>>  + while (count && !IS_ALIGNED((unsigned long)from, IO_LONG_SIZE) &&
>>  + !IS_ALIGNED((unsigned long)to, IO_LONG_SIZE)) {
>>  + *(u8 *)to = __raw_readb(from);
>>  + from++;
>>  + to++;
>>  + count--;
>>  + }
>>  +
>>  + while (count >= IO_LONG_SIZE) {
>>  + *(unsigned long *)to = IO_LONG_READ(from);
>>  + from += IO_LONG_SIZE;
>>  + to += IO_LONG_SIZE;
>>  + count -= IO_LONG_SIZE;
>>  + }
>>  +
>>  + while (count) {
>>  + *(u8 *)to = __raw_readb(from);
>>  + from++;
>>  + to++;
>>  + count--;
>>  + }
>>  +}
>>  +EXPORT_SYMBOL(__memcpy_fromio);
>>  +
>>  +void __memcpy_toio(volatile void __iomem *to, const void *from, size_t count)
>>  +{
>>  + while (count && !IS_ALIGNED((unsigned long)from, IO_LONG_SIZE) &&
>>  + !IS_ALIGNED((unsigned long)to, IO_LONG_SIZE)) {
>>  + __raw_writeb(*(u8 *)from, to);
>>  + from++;
>>  + to++;
>>  + count--;
>>  + }
>>  +
>>  + while (count >= IO_LONG_SIZE) {
>>  + IO_LONG_WRITE(*(unsigned long *)from, to);
>>  + from += IO_LONG_SIZE;
>>  + to += IO_LONG_SIZE;
>>  + count -= IO_LONG_SIZE;
>>  + }
>>  +
>>  + while (count) {
>>  + __raw_writeb(*(u8 *)from, to);
>>  + from++;
>>  + to++;
>>  + count--;
>>  + }
>>  +}
>>  +EXPORT_SYMBOL(__memcpy_toio);
>>  +
>>  +void __memset_io(volatile void __iomem *dst, int c, size_t count)
>>  +{
>>  + unsigned long lc = (u8)c;
>>  + int i;
>>  +
>>  + for (i = 1; i < IO_LONG_SIZE; i++)
>>  + lc |= (u8)c << (i * BITS_PER_BYTE);
>>  +
>>  + while (count && !IS_ALIGNED((unsigned long)dst, IO_LONG_SIZE)) {
>>  + __raw_writeb((u8)c, dst);
>>  + dst++;
>>  + count--;
>>  + }
>>  +
>>  + while (count >= IO_LONG_SIZE) {
>>  + IO_LONG_WRITE(lc, dst);
>>  + dst += IO_LONG_SIZE;
>>  + count -= IO_LONG_SIZE;
>>  + }
>>  +
>>  + while (count) {
>>  + __raw_writeb(c, dst);
>>  + dst++;
>>  + count--;
>>  + }
>>  +}
>>  +EXPORT_SYMBOL(__memset_io);
>>  +#endif
>>  --
>>  2.24.1
Paul Burton Jan. 22, 2020, 6:45 p.m. UTC | #3
Hi Jiaxun,

On Tue, Jan 14, 2020 at 08:23:43PM +0800, Jiaxun Yang wrote:
> Some platforms, such as Loongson64 or QEMU/KVM, don't support unaligned
> instructions like lwl or lwr in IO memory access. However, our current
> IO memcpy/memset is wired to the generic implementation, which leads
> to a fatal result.

Hmm, I wonder if we should just do this unconditionally on all systems.
I can't think of a reason it'd ever be a good idea to use lwl/lwr on an
MMIO device. Any thoughts on that?

Thanks,
    Paul

> Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
> ---
>  arch/mips/Kconfig          |  4 ++
>  arch/mips/include/asm/io.h | 10 ++++
>  arch/mips/kernel/Makefile  |  2 +-
>  arch/mips/kernel/io.c      | 98 ++++++++++++++++++++++++++++++++++++++
>  4 files changed, 113 insertions(+), 1 deletion(-)
>  create mode 100644 arch/mips/kernel/io.c
> 
> diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
> index 8b0cd692a43f..15a331aa23a2 100644
> --- a/arch/mips/Kconfig
> +++ b/arch/mips/Kconfig
> @@ -1450,6 +1450,7 @@ config CPU_LOONGSON64
>  	select CPU_SUPPORTS_HIGHMEM
>  	select CPU_SUPPORTS_HUGEPAGES
>  	select CPU_SUPPORTS_MSA
> +	select CPU_NEEDS_ALIGNED_IO
>  	select CPU_HAS_LOAD_STORE_LR
>  	select CPU_DIEI_BROKEN if !LOONGSON3_ENHANCEMENT
>  	select CPU_MIPSR2_IRQ_VI
> @@ -2598,6 +2599,9 @@ config CPU_HAS_LOAD_STORE_LR
>  	  LWL, LWR, SWL, SWR (Load/store word left/right).
>  	  LDL, LDR, SDL, SDR (Load/store doubleword left/right, for 64bit systems).
>  
> +config CPU_NEEDS_ALIGNED_IO
> +	bool
> +
>  #
>  # Vectored interrupt mode is an R2 feature
>  #
> diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h
> index 3f6ce74335b4..3b0eb4941f23 100644
> --- a/arch/mips/include/asm/io.h
> +++ b/arch/mips/include/asm/io.h
> @@ -577,6 +577,15 @@ BUILDSTRING(l, u32)
>  BUILDSTRING(q, u64)
>  #endif
>  
> +#if defined(CONFIG_CPU_NEEDS_ALIGNED_IO)
> +extern void __memcpy_fromio(void *, const volatile void __iomem *, size_t);
> +extern void __memcpy_toio(volatile void __iomem *, const void *, size_t);
> +extern void __memset_io(volatile void __iomem *, int, size_t);
> +
> +#define memset_io(c, v, l)	__memset_io((c), (v), (l))
> +#define memcpy_fromio(a, c, l)	__memcpy_fromio((a), (c), (l))
> +#define memcpy_toio(c, a, l)	__memcpy_toio((c), (a), (l))
> +#else
>  static inline void memset_io(volatile void __iomem *addr, unsigned char val, int count)
>  {
>  	memset((void __force *) addr, val, count);
> @@ -589,6 +598,7 @@ static inline void memcpy_toio(volatile void __iomem *dst, const void *src, int
>  {
>  	memcpy((void __force *) dst, src, count);
>  }
> +#endif
>  
>  /*
>   * The caches on some architectures aren't dma-coherent and have need to
> diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
> index d6e97df51cfb..b07b97b9385e 100644
> --- a/arch/mips/kernel/Makefile
> +++ b/arch/mips/kernel/Makefile
> @@ -8,7 +8,7 @@ extra-y		:= head.o vmlinux.lds
>  obj-y		+= cmpxchg.o cpu-probe.o branch.o elf.o entry.o genex.o idle.o irq.o \
>  		   process.o prom.o ptrace.o reset.o setup.o signal.o \
>  		   syscall.o time.o topology.o traps.o unaligned.o watch.o \
> -		   vdso.o cacheinfo.o
> +		   vdso.o cacheinfo.o io.o
>  
>  ifdef CONFIG_FUNCTION_TRACER
>  CFLAGS_REMOVE_ftrace.o = -pg
> diff --git a/arch/mips/kernel/io.c b/arch/mips/kernel/io.c
> new file mode 100644
> index 000000000000..ca105aa76d4d
> --- /dev/null
> +++ b/arch/mips/kernel/io.c
> @@ -0,0 +1,98 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +
> +#include <linux/export.h>
> +#include <linux/types.h>
> +#include <linux/io.h>
> +
> +#if defined(CONFIG_CPU_NEEDS_ALIGNED_IO)
> +
> +#if defined(CONFIG_64BIT)
> +#define IO_LONG_READ	__raw_readq
> +#define IO_LONG_WRITE	__raw_writeq
> +#define IO_LONG_SIZE	8
> +#else
> +#define IO_LONG_READ	__raw_readl
> +#define IO_LONG_WRITE	__raw_writel
> +#define IO_LONG_SIZE	4
> +#endif
> +
> +void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t count)
> +{
> +	while (count && !IS_ALIGNED((unsigned long)from, IO_LONG_SIZE) &&
> +		!IS_ALIGNED((unsigned long)to, IO_LONG_SIZE)) {
> +		*(u8 *)to = __raw_readb(from);
> +		from++;
> +		to++;
> +		count--;
> +	}
> +
> +	while (count >= IO_LONG_SIZE) {
> +		*(unsigned long *)to = IO_LONG_READ(from);
> +		from += IO_LONG_SIZE;
> +		to += IO_LONG_SIZE;
> +		count -= IO_LONG_SIZE;
> +	}
> +
> +	while (count) {
> +		*(u8 *)to = __raw_readb(from);
> +		from++;
> +		to++;
> +		count--;
> +	}
> +}
> +EXPORT_SYMBOL(__memcpy_fromio);
> +
> +void __memcpy_toio(volatile void __iomem *to, const void *from, size_t count)
> +{
> +	while (count && !IS_ALIGNED((unsigned long)from, IO_LONG_SIZE) &&
> +		!IS_ALIGNED((unsigned long)to, IO_LONG_SIZE)) {
> +		__raw_writeb(*(u8 *)from, to);
> +		from++;
> +		to++;
> +		count--;
> +	}
> +
> +	while (count >= IO_LONG_SIZE) {
> +		IO_LONG_WRITE(*(unsigned long *)from, to);
> +		from += IO_LONG_SIZE;
> +		to += IO_LONG_SIZE;
> +		count -= IO_LONG_SIZE;
> +	}
> +
> +	while (count) {
> +		__raw_writeb(*(u8 *)from, to);
> +		from++;
> +		to++;
> +		count--;
> +	}
> +}
> +EXPORT_SYMBOL(__memcpy_toio);
> +
> +void __memset_io(volatile void __iomem *dst, int c, size_t count)
> +{
> +	unsigned long lc = (u8)c;
> +	int i;
> +
> +	for (i = 1; i < IO_LONG_SIZE; i++)
> +		lc |= (u8)c << (i * BITS_PER_BYTE);
> +
> +	while (count && !IS_ALIGNED((unsigned long)dst, IO_LONG_SIZE)) {
> +		__raw_writeb((u8)c, dst);
> +		dst++;
> +		count--;
> +	}
> +
> +	while (count >= IO_LONG_SIZE) {
> +		IO_LONG_WRITE(lc, dst);
> +		dst += IO_LONG_SIZE;
> +		count -= IO_LONG_SIZE;
> +	}
> +
> +	while (count) {
> +		__raw_writeb(c, dst);
> +		dst++;
> +		count--;
> +	}
> +}
> +EXPORT_SYMBOL(__memset_io);
> +#endif
> -- 
> 2.24.1
>
Thomas Bogendoerfer Jan. 24, 2020, 2:07 p.m. UTC | #4
On Wed, Jan 22, 2020 at 10:45:06AM -0800, Paul Burton wrote:
> Hi Jiaxun,
> 
> On Tue, Jan 14, 2020 at 08:23:43PM +0800, Jiaxun Yang wrote:
> > Some platforms, such as Loongson64 or QEMU/KVM, don't support unaligned
> > instructions like lwl or lwr in IO memory access. However, our current
> > IO memcpy/memset is wired to the generic implementation, which leads
> > to a fatal result.
> 
> Hmm, I wonder if we should just do this unconditionally on all systems.
> I can't think of a reason it'd ever be a good idea to use lwl/lwr on an
> MMIO device. Any thoughts on that?

depends on the type of device. I can see benefits for framebuffers
and memory devices since memset/memcpy are more optimised than the
function in this patch.

Thomas.
Jiaxun Yang Jan. 25, 2020, 3:31 a.m. UTC | #5
于 2020年1月24日 GMT+08:00 下午10:07:51, Thomas Bogendoerfer <tsbogend@alpha.franken.de> 写到:
>On Wed, Jan 22, 2020 at 10:45:06AM -0800, Paul Burton wrote:
>> Hi Jiaxun,
>> 
>> On Tue, Jan 14, 2020 at 08:23:43PM +0800, Jiaxun Yang wrote:
>> > Some platforms, such as Loongson64 or QEMU/KVM, don't support
>unaligned
>> > instructions like lwl or lwr in IO memory access. However, our
>current
>> > IO memcpy/memset is wired to the generic implementation, which
>leads
>> > to a fatal result.
>> 
>> Hmm, I wonder if we should just do this unconditionally on all
>systems.
>> I can't think of a reason it'd ever be a good idea to use lwl/lwr on
>an
>> MMIO device. Any thoughts on that?
>
>depends on the type of device. I can see benefits for framebuffers
>and memory devices since memset/memcpy are more optimised than the
>function in this patch.

lwl/lwr is slower than this implementation on your system?
I thought that other platforms support unaligned request can be benefited from speed up of these instructions.

>
>Thomas.
diff mbox series

Patch

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 8b0cd692a43f..15a331aa23a2 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1450,6 +1450,7 @@  config CPU_LOONGSON64
 	select CPU_SUPPORTS_HIGHMEM
 	select CPU_SUPPORTS_HUGEPAGES
 	select CPU_SUPPORTS_MSA
+	select CPU_NEEDS_ALIGNED_IO
 	select CPU_HAS_LOAD_STORE_LR
 	select CPU_DIEI_BROKEN if !LOONGSON3_ENHANCEMENT
 	select CPU_MIPSR2_IRQ_VI
@@ -2598,6 +2599,9 @@  config CPU_HAS_LOAD_STORE_LR
 	  LWL, LWR, SWL, SWR (Load/store word left/right).
 	  LDL, LDR, SDL, SDR (Load/store doubleword left/right, for 64bit systems).
 
+config CPU_NEEDS_ALIGNED_IO
+	bool
+
 #
 # Vectored interrupt mode is an R2 feature
 #
diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h
index 3f6ce74335b4..3b0eb4941f23 100644
--- a/arch/mips/include/asm/io.h
+++ b/arch/mips/include/asm/io.h
@@ -577,6 +577,15 @@  BUILDSTRING(l, u32)
 BUILDSTRING(q, u64)
 #endif
 
+#if defined(CONFIG_CPU_NEEDS_ALIGNED_IO)
+extern void __memcpy_fromio(void *, const volatile void __iomem *, size_t);
+extern void __memcpy_toio(volatile void __iomem *, const void *, size_t);
+extern void __memset_io(volatile void __iomem *, int, size_t);
+
+#define memset_io(c, v, l)	__memset_io((c), (v), (l))
+#define memcpy_fromio(a, c, l)	__memcpy_fromio((a), (c), (l))
+#define memcpy_toio(c, a, l)	__memcpy_toio((c), (a), (l))
+#else
 static inline void memset_io(volatile void __iomem *addr, unsigned char val, int count)
 {
 	memset((void __force *) addr, val, count);
@@ -589,6 +598,7 @@  static inline void memcpy_toio(volatile void __iomem *dst, const void *src, int
 {
 	memcpy((void __force *) dst, src, count);
 }
+#endif
 
 /*
  * The caches on some architectures aren't dma-coherent and have need to
diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
index d6e97df51cfb..b07b97b9385e 100644
--- a/arch/mips/kernel/Makefile
+++ b/arch/mips/kernel/Makefile
@@ -8,7 +8,7 @@  extra-y		:= head.o vmlinux.lds
 obj-y		+= cmpxchg.o cpu-probe.o branch.o elf.o entry.o genex.o idle.o irq.o \
 		   process.o prom.o ptrace.o reset.o setup.o signal.o \
 		   syscall.o time.o topology.o traps.o unaligned.o watch.o \
-		   vdso.o cacheinfo.o
+		   vdso.o cacheinfo.o io.o
 
 ifdef CONFIG_FUNCTION_TRACER
 CFLAGS_REMOVE_ftrace.o = -pg
diff --git a/arch/mips/kernel/io.c b/arch/mips/kernel/io.c
new file mode 100644
index 000000000000..ca105aa76d4d
--- /dev/null
+++ b/arch/mips/kernel/io.c
@@ -0,0 +1,98 @@ 
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/io.h>
+
+#if defined(CONFIG_CPU_NEEDS_ALIGNED_IO)
+
+#if defined(CONFIG_64BIT)
+#define IO_LONG_READ	__raw_readq
+#define IO_LONG_WRITE	__raw_writeq
+#define IO_LONG_SIZE	8
+#else
+#define IO_LONG_READ	__raw_readl
+#define IO_LONG_WRITE	__raw_writel
+#define IO_LONG_SIZE	4
+#endif
+
+void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t count)
+{
+	while (count && !IS_ALIGNED((unsigned long)from, IO_LONG_SIZE) &&
+		!IS_ALIGNED((unsigned long)to, IO_LONG_SIZE)) {
+		*(u8 *)to = __raw_readb(from);
+		from++;
+		to++;
+		count--;
+	}
+
+	while (count >= IO_LONG_SIZE) {
+		*(unsigned long *)to = IO_LONG_READ(from);
+		from += IO_LONG_SIZE;
+		to += IO_LONG_SIZE;
+		count -= IO_LONG_SIZE;
+	}
+
+	while (count) {
+		*(u8 *)to = __raw_readb(from);
+		from++;
+		to++;
+		count--;
+	}
+}
+EXPORT_SYMBOL(__memcpy_fromio);
+
+void __memcpy_toio(volatile void __iomem *to, const void *from, size_t count)
+{
+	while (count && !IS_ALIGNED((unsigned long)from, IO_LONG_SIZE) &&
+		!IS_ALIGNED((unsigned long)to, IO_LONG_SIZE)) {
+		__raw_writeb(*(u8 *)from, to);
+		from++;
+		to++;
+		count--;
+	}
+
+	while (count >= IO_LONG_SIZE) {
+		IO_LONG_WRITE(*(unsigned long *)from, to);
+		from += IO_LONG_SIZE;
+		to += IO_LONG_SIZE;
+		count -= IO_LONG_SIZE;
+	}
+
+	while (count) {
+		__raw_writeb(*(u8 *)from, to);
+		from++;
+		to++;
+		count--;
+	}
+}
+EXPORT_SYMBOL(__memcpy_toio);
+
+void __memset_io(volatile void __iomem *dst, int c, size_t count)
+{
+	unsigned long lc = (u8)c;
+	int i;
+
+	for (i = 1; i < IO_LONG_SIZE; i++)
+		lc |= (u8)c << (i * BITS_PER_BYTE);
+
+	while (count && !IS_ALIGNED((unsigned long)dst, IO_LONG_SIZE)) {
+		__raw_writeb((u8)c, dst);
+		dst++;
+		count--;
+	}
+
+	while (count >= IO_LONG_SIZE) {
+		IO_LONG_WRITE(lc, dst);
+		dst += IO_LONG_SIZE;
+		count -= IO_LONG_SIZE;
+	}
+
+	while (count) {
+		__raw_writeb(c, dst);
+		dst++;
+		count--;
+	}
+}
+EXPORT_SYMBOL(__memset_io);
+#endif