diff mbox series

[RFC,v8,15/21] riscv: Add vector extension XOR implementation

Message ID 33c485ee595aff1e19a0e43074da59779f58d105.1631121222.git.greentime.hu@sifive.com (mailing list archive)
State New, archived
Headers show
Series riscv: Add vector ISA support | expand

Commit Message

Greentime Hu Sept. 8, 2021, 5:45 p.m. UTC
This patch adds support for vector optimized XOR it is tested in spike and
qemu.

Logs in spike:
[    0.008365] xor: measuring software checksum speed
[    0.048885]    8regs     :  1719.000 MB/sec
[    0.089080]    32regs    :  1717.000 MB/sec
[    0.129275]    rvv       :  7043.000 MB/sec
[    0.129525] xor: using function: rvv (7043.000 MB/sec)

Logs in qemu:
[    0.098943] xor: measuring software checksum speed
[    0.139391]    8regs     :  2911.000 MB/sec
[    0.181079]    32regs    :  2813.000 MB/sec
[    0.224260]    rvv       :    45.000 MB/sec
[    0.225586] xor: using function: 8regs (2911.000 MB/sec)

Co-developed-by: Han-Kuan Chen <hankuan.chen@sifive.com>
Signed-off-by: Han-Kuan Chen <hankuan.chen@sifive.com>
Signed-off-by: Greentime Hu <greentime.hu@sifive.com>
---
 arch/riscv/include/asm/xor.h | 74 ++++++++++++++++++++++++++++++++
 arch/riscv/lib/Makefile      |  1 +
 arch/riscv/lib/xor.S         | 81 ++++++++++++++++++++++++++++++++++++
 3 files changed, 156 insertions(+)
 create mode 100644 arch/riscv/include/asm/xor.h
 create mode 100644 arch/riscv/lib/xor.S

Comments

Christoph Hellwig Sept. 9, 2021, 6:12 a.m. UTC | #1
On Thu, Sep 09, 2021 at 01:45:27AM +0800, Greentime Hu wrote:
> +extern void xor_regs_2_(unsigned long bytes, unsigned long *p1,
> +			unsigned long *p2);
> +extern void xor_regs_3_(unsigned long bytes, unsigned long *p1,
> +			unsigned long *p2, unsigned long *p3);
> +extern void xor_regs_4_(unsigned long bytes, unsigned long *p1,
> +			unsigned long *p2, unsigned long *p3,
> +			unsigned long *p4);
> +extern void xor_regs_5_(unsigned long bytes, unsigned long *p1,
> +			unsigned long *p2, unsigned long *p3, unsigned long *p4,
> +			unsigned long *p5);

There is no need for externs on function declarations ever.

> +static void xor_rvv_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
> +{
> +	kernel_rvv_begin();
> +	xor_regs_2_(bytes, p1, p2);
> +	kernel_rvv_end();
> +}

This looks strange.  Why these wrappers?
Ley Foon Tan Sept. 14, 2021, 8:29 a.m. UTC | #2
On Thu, Sep 9, 2021 at 1:49 AM Greentime Hu <greentime.hu@sifive.com> wrote:
>
> This patch adds support for vector optimized XOR it is tested in spike and
> qemu.
>
> Logs in spike:
> [    0.008365] xor: measuring software checksum speed
> [    0.048885]    8regs     :  1719.000 MB/sec
> [    0.089080]    32regs    :  1717.000 MB/sec
> [    0.129275]    rvv       :  7043.000 MB/sec
> [    0.129525] xor: using function: rvv (7043.000 MB/sec)
>
> Logs in qemu:
> [    0.098943] xor: measuring software checksum speed
> [    0.139391]    8regs     :  2911.000 MB/sec
> [    0.181079]    32regs    :  2813.000 MB/sec
> [    0.224260]    rvv       :    45.000 MB/sec
> [    0.225586] xor: using function: 8regs (2911.000 MB/sec)
>
> Co-developed-by: Han-Kuan Chen <hankuan.chen@sifive.com>
> Signed-off-by: Han-Kuan Chen <hankuan.chen@sifive.com>
> Signed-off-by: Greentime Hu <greentime.hu@sifive.com>
> ---
>  arch/riscv/include/asm/xor.h | 74 ++++++++++++++++++++++++++++++++
>  arch/riscv/lib/Makefile      |  1 +
>  arch/riscv/lib/xor.S         | 81 ++++++++++++++++++++++++++++++++++++
>  3 files changed, 156 insertions(+)
>  create mode 100644 arch/riscv/include/asm/xor.h
>  create mode 100644 arch/riscv/lib/xor.S
>
> diff --git a/arch/riscv/include/asm/xor.h b/arch/riscv/include/asm/xor.h
> new file mode 100644
> index 000000000000..60ee0224913d
> --- /dev/null
> +++ b/arch/riscv/include/asm/xor.h
> @@ -0,0 +1,74 @@


[...]

>
> +extern bool has_vector;
> +#undef XOR_TRY_TEMPLATES
> +#define XOR_TRY_TEMPLATES           \
> +       do {        \
> +               xor_speed(&xor_block_8regs);    \
> +               xor_speed(&xor_block_32regs);    \
> +               if (has_vector) { \
> +                       xor_speed(&xor_block_rvv);\
> +               } \
> +       } while (0)
> +#endif
>
bool has_vector is changed to has_vector() function now, should this
change as well?


Regards
Ley Foon
Greentime Hu Sept. 28, 2021, 7 a.m. UTC | #3
Christoph Hellwig <hch@infradead.org> 於 2021年9月9日 週四 下午2:12寫道:
>
> On Thu, Sep 09, 2021 at 01:45:27AM +0800, Greentime Hu wrote:
> > +extern void xor_regs_2_(unsigned long bytes, unsigned long *p1,
> > +                     unsigned long *p2);
> > +extern void xor_regs_3_(unsigned long bytes, unsigned long *p1,
> > +                     unsigned long *p2, unsigned long *p3);
> > +extern void xor_regs_4_(unsigned long bytes, unsigned long *p1,
> > +                     unsigned long *p2, unsigned long *p3,
> > +                     unsigned long *p4);
> > +extern void xor_regs_5_(unsigned long bytes, unsigned long *p1,
> > +                     unsigned long *p2, unsigned long *p3, unsigned long *p4,
> > +                     unsigned long *p5);
>
> There is no need for externs on function declarations ever.
>
Ok, I'll remove it.

> > +static void xor_rvv_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
> > +{
> > +     kernel_rvv_begin();
> > +     xor_regs_2_(bytes, p1, p2);
> > +     kernel_rvv_end();
> > +}
>
> This looks strange.  Why these wrappers?

We don't use rvv in kernel space generally. If we want to use it, we
need to save all vector registers first.
Just like arm64/x86 implementation in
arch/arm64/include/asm/xor.h
arch/x86/include/asm/xor.h
Greentime Hu Sept. 28, 2021, 7:01 a.m. UTC | #4
Ley Foon Tan <lftan.linux@gmail.com> 於 2021年9月14日 週二 下午4:30寫道:
>
> On Thu, Sep 9, 2021 at 1:49 AM Greentime Hu <greentime.hu@sifive.com> wrote:
> >
> > This patch adds support for vector optimized XOR it is tested in spike and
> > qemu.
> >
> > Logs in spike:
> > [    0.008365] xor: measuring software checksum speed
> > [    0.048885]    8regs     :  1719.000 MB/sec
> > [    0.089080]    32regs    :  1717.000 MB/sec
> > [    0.129275]    rvv       :  7043.000 MB/sec
> > [    0.129525] xor: using function: rvv (7043.000 MB/sec)
> >
> > Logs in qemu:
> > [    0.098943] xor: measuring software checksum speed
> > [    0.139391]    8regs     :  2911.000 MB/sec
> > [    0.181079]    32regs    :  2813.000 MB/sec
> > [    0.224260]    rvv       :    45.000 MB/sec
> > [    0.225586] xor: using function: 8regs (2911.000 MB/sec)
> >
> > Co-developed-by: Han-Kuan Chen <hankuan.chen@sifive.com>
> > Signed-off-by: Han-Kuan Chen <hankuan.chen@sifive.com>
> > Signed-off-by: Greentime Hu <greentime.hu@sifive.com>
> > ---
> >  arch/riscv/include/asm/xor.h | 74 ++++++++++++++++++++++++++++++++
> >  arch/riscv/lib/Makefile      |  1 +
> >  arch/riscv/lib/xor.S         | 81 ++++++++++++++++++++++++++++++++++++
> >  3 files changed, 156 insertions(+)
> >  create mode 100644 arch/riscv/include/asm/xor.h
> >  create mode 100644 arch/riscv/lib/xor.S
> >
> > diff --git a/arch/riscv/include/asm/xor.h b/arch/riscv/include/asm/xor.h
> > new file mode 100644
> > index 000000000000..60ee0224913d
> > --- /dev/null
> > +++ b/arch/riscv/include/asm/xor.h
> > @@ -0,0 +1,74 @@
>
>
> [...]
>
> >
> > +extern bool has_vector;
> > +#undef XOR_TRY_TEMPLATES
> > +#define XOR_TRY_TEMPLATES           \
> > +       do {        \
> > +               xor_speed(&xor_block_8regs);    \
> > +               xor_speed(&xor_block_32regs);    \
> > +               if (has_vector) { \
> > +                       xor_speed(&xor_block_rvv);\
> > +               } \
> > +       } while (0)
> > +#endif
> >
> bool has_vector is changed to has_vector() function now, should this
> change as well?

That's right. Thank you, LeyFoon.
I'll merge the patch to fix the has_vector() issue in next version patchset.
diff mbox series

Patch

diff --git a/arch/riscv/include/asm/xor.h b/arch/riscv/include/asm/xor.h
new file mode 100644
index 000000000000..60ee0224913d
--- /dev/null
+++ b/arch/riscv/include/asm/xor.h
@@ -0,0 +1,74 @@ 
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2020 SiFive
+ */
+
+#include <linux/hardirq.h>
+#include <asm-generic/xor.h>
+#ifdef CONFIG_VECTOR
+#include <asm/vector.h>
+
+extern void xor_regs_2_(unsigned long bytes, unsigned long *p1,
+			unsigned long *p2);
+extern void xor_regs_3_(unsigned long bytes, unsigned long *p1,
+			unsigned long *p2, unsigned long *p3);
+extern void xor_regs_4_(unsigned long bytes, unsigned long *p1,
+			unsigned long *p2, unsigned long *p3,
+			unsigned long *p4);
+extern void xor_regs_5_(unsigned long bytes, unsigned long *p1,
+			unsigned long *p2, unsigned long *p3, unsigned long *p4,
+			unsigned long *p5);
+
+static void xor_rvv_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+{
+	kernel_rvv_begin();
+	xor_regs_2_(bytes, p1, p2);
+	kernel_rvv_end();
+}
+
+static void
+xor_rvv_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+	  unsigned long *p3)
+{
+	kernel_rvv_begin();
+	xor_regs_3_(bytes, p1, p2, p3);
+	kernel_rvv_end();
+}
+
+static void
+xor_rvv_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+	  unsigned long *p3, unsigned long *p4)
+{
+	kernel_rvv_begin();
+	xor_regs_4_(bytes, p1, p2, p3, p4);
+	kernel_rvv_end();
+}
+
+static void
+xor_rvv_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+	  unsigned long *p3, unsigned long *p4, unsigned long *p5)
+{
+	kernel_rvv_begin();
+	xor_regs_5_(bytes, p1, p2, p3, p4, p5);
+	kernel_rvv_end();
+}
+
+static struct xor_block_template xor_block_rvv = {
+	.name = "rvv",
+	.do_2 = xor_rvv_2,
+	.do_3 = xor_rvv_3,
+	.do_4 = xor_rvv_4,
+	.do_5 = xor_rvv_5
+};
+
+extern bool has_vector;
+#undef XOR_TRY_TEMPLATES
+#define XOR_TRY_TEMPLATES           \
+	do {        \
+		xor_speed(&xor_block_8regs);    \
+		xor_speed(&xor_block_32regs);    \
+		if (has_vector) { \
+			xor_speed(&xor_block_rvv);\
+		} \
+	} while (0)
+#endif
diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
index 25d5c9664e57..acd87ac86d24 100644
--- a/arch/riscv/lib/Makefile
+++ b/arch/riscv/lib/Makefile
@@ -7,3 +7,4 @@  lib-$(CONFIG_MMU)	+= uaccess.o
 lib-$(CONFIG_64BIT)	+= tishift.o
 
 obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
+lib-$(CONFIG_VECTOR)	+= xor.o
diff --git a/arch/riscv/lib/xor.S b/arch/riscv/lib/xor.S
new file mode 100644
index 000000000000..de2e234c39ed
--- /dev/null
+++ b/arch/riscv/lib/xor.S
@@ -0,0 +1,81 @@ 
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2020 SiFive
+ */
+#include <linux/linkage.h>
+#include <asm-generic/export.h>
+#include <asm/asm.h>
+
+ENTRY(xor_regs_2_)
+	vsetvli a3, a0, e8, m8
+	vle8.v v0, (a1)
+	vle8.v v8, (a2)
+	sub a0, a0, a3
+	vxor.vv v16, v0, v8
+	add a2, a2, a3
+	vse8.v v16, (a1)
+	add a1, a1, a3
+	bnez a0, xor_regs_2_
+	ret
+END(xor_regs_2_)
+EXPORT_SYMBOL(xor_regs_2_)
+
+ENTRY(xor_regs_3_)
+	vsetvli a4, a0, e8, m8
+	vle8.v v0, (a1)
+	vle8.v v8, (a2)
+	sub a0, a0, a4
+	vxor.vv v0, v0, v8
+	vle8.v v16, (a3)
+	add a2, a2, a4
+	vxor.vv v16, v0, v16
+	add a3, a3, a4
+	vse8.v v16, (a1)
+	add a1, a1, a4
+	bnez a0, xor_regs_3_
+	ret
+END(xor_regs_3_)
+EXPORT_SYMBOL(xor_regs_3_)
+
+ENTRY(xor_regs_4_)
+	vsetvli a5, a0, e8, m8
+	vle8.v v0, (a1)
+	vle8.v v8, (a2)
+	sub a0, a0, a5
+	vxor.vv v0, v0, v8
+	vle8.v v16, (a3)
+	add a2, a2, a5
+	vxor.vv v0, v0, v16
+	vle8.v v24, (a4)
+	add a3, a3, a5
+	vxor.vv v16, v0, v24
+	add a4, a4, a5
+	vse8.v v16, (a1)
+	add a1, a1, a5
+	bnez a0, xor_regs_4_
+	ret
+END(xor_regs_4_)
+EXPORT_SYMBOL(xor_regs_4_)
+
+ENTRY(xor_regs_5_)
+	vsetvli a6, a0, e8, m8
+	vle8.v v0, (a1)
+	vle8.v v8, (a2)
+	sub a0, a0, a6
+	vxor.vv v0, v0, v8
+	vle8.v v16, (a3)
+	add a2, a2, a6
+	vxor.vv v0, v0, v16
+	vle8.v v24, (a4)
+	add a3, a3, a6
+	vxor.vv v0, v0, v24
+	vle8.v v8, (a5)
+	add a4, a4, a6
+	vxor.vv v16, v0, v8
+	add a5, a5, a6
+	vse8.v v16, (a1)
+	add a1, a1, a6
+	bnez a0, xor_regs_5_
+	ret
+END(xor_regs_5_)
+EXPORT_SYMBOL(xor_regs_5_)