diff mbox series

[V3,08/17] riscv: compat: syscall: Add compat_sys_call_table implementation

Message ID 20220120073911.99857-9-guoren@kernel.org (mailing list archive)
State Superseded
Headers show
Series riscv: compat: Add COMPAT mode support for rv64 | expand

Commit Message

Guo Ren Jan. 20, 2022, 7:39 a.m. UTC
From: Guo Ren <guoren@linux.alibaba.com>

Implement compat_syscall_table.c with compat_sys_call_table & fixup
system call such as truncate64,pread64,fallocate which need two
regs to indicate 64bit-arg (copied from arm64).

Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
Signed-off-by: Guo Ren <guoren@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
---
 arch/riscv/include/asm/syscall.h         |  3 +
 arch/riscv/include/uapi/asm/unistd.h     |  2 +-
 arch/riscv/kernel/Makefile               |  1 +
 arch/riscv/kernel/compat_syscall_table.c | 72 ++++++++++++++++++++++++
 arch/riscv/kernel/sys_riscv.c            |  6 +-
 5 files changed, 81 insertions(+), 3 deletions(-)
 create mode 100644 arch/riscv/kernel/compat_syscall_table.c

Comments

Arnd Bergmann Jan. 20, 2022, 2:42 p.m. UTC | #1
On Thu, Jan 20, 2022 at 8:39 AM <guoren@kernel.org> wrote:
>
>  /* The array of function pointers for syscalls. */
>  extern void * const sys_call_table[];
> +#ifdef CONFIG_COMPAT
> +extern void * const compat_sys_call_table[];
> +#endif

No need for the #ifdef, the normal convention is to just define the
extern declaration unconditionally for symbols that may or may not be defined.

> +COMPAT_SYSCALL_DEFINE3(truncate64, const char __user *, pathname,
> +                      arg_u32p(length))
> +{
> +       return ksys_truncate(pathname, arg_u64(length));
> +}

Are you sure these are the right calling conventions? According to [1],
I think the 64-bit argument should be in an aligned pair of registers,
which means you need an extra pad argument as in the arm64 version
of these functions. Same for ftruncate64, pread64, pwrite64, and
readahead.

> +COMPAT_SYSCALL_DEFINE3(ftruncate64, unsigned int, fd, arg_u32p(length))
> +{
> +       return ksys_ftruncate(fd, arg_u64(length));
> +}
> +
> +COMPAT_SYSCALL_DEFINE6(fallocate, int, fd, int, mode,
> +                      arg_u32p(offset), arg_u32p(len))
> +{
> +       return ksys_fallocate(fd, mode, arg_u64(offset), arg_u64(len));
> +}
> +
> +COMPAT_SYSCALL_DEFINE5(pread64, unsigned int, fd, char __user *, buf,
> +                      size_t, count, arg_u32p(pos))
> +{
> +       return ksys_pread64(fd, buf, count, arg_u64(pos));
> +}
> +
> +COMPAT_SYSCALL_DEFINE5(pwrite64, unsigned int, fd,
> +                      const char __user *, buf, size_t, count, arg_u32p(pos))
> +{
> +       return ksys_pwrite64(fd, buf, count, arg_u64(pos));
> +}
> +
> +COMPAT_SYSCALL_DEFINE6(sync_file_range, int, fd, arg_u32p(offset),
> +                      arg_u32p(nbytes), unsigned int, flags)
> +{
> +       return ksys_sync_file_range(fd, arg_u64(offset), arg_u64(nbytes),
> +                                   flags);
> +}
> +
> +COMPAT_SYSCALL_DEFINE4(readahead, int, fd, arg_u32p(offset),
> +                      size_t, count)
> +{
> +       return ksys_readahead(fd, arg_u64(offset), count);
> +}
> +
> +COMPAT_SYSCALL_DEFINE6(fadvise64_64, int, fd, int, advice, arg_u32p(offset),
> +                      arg_u32p(len))
> +{
> +       return ksys_fadvise64_64(fd, arg_u64(offset), arg_u64(len), advice);
> +}

I still feel like these should be the common implementations next to the
native handlers inside of an #ifdef CONFIG_COMPAT.

The names clash with the custom versions defined for powerpc and sparc,
but the duplicates look compatible if you can account for the padded
argument and the lo/hi order of the pairs, so could just be removed here
(all other architectures use custom function names instead).

        Arnd

[1] https://riscv.org/wp-content/uploads/2015/01/riscv-calling.pdf
Guo Ren Jan. 21, 2022, 6:25 a.m. UTC | #2
On Thu, Jan 20, 2022 at 10:43 PM Arnd Bergmann <arnd@arndb.de> wrote:
>
> On Thu, Jan 20, 2022 at 8:39 AM <guoren@kernel.org> wrote:
> >
> >  /* The array of function pointers for syscalls. */
> >  extern void * const sys_call_table[];
> > +#ifdef CONFIG_COMPAT
> > +extern void * const compat_sys_call_table[];
> > +#endif
>
> No need for the #ifdef, the normal convention is to just define the
> extern declaration unconditionally for symbols that may or may not be defined.
Okay

>
> > +COMPAT_SYSCALL_DEFINE3(truncate64, const char __user *, pathname,
> > +                      arg_u32p(length))
> > +{
> > +       return ksys_truncate(pathname, arg_u64(length));
> > +}
>
> Are you sure these are the right calling conventions? According to [1],
> I think the 64-bit argument should be in an aligned pair of registers,
> which means you need an extra pad argument as in the arm64 version
> of these functions. Same for ftruncate64, pread64, pwrite64, and
> readahead.

[1] has abandoned.

See:
https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc

Ltp test results:

ftruncate01                                        PASS       0
ftruncate01_64                                     PASS       0
ftruncate03                                        PASS       0
ftruncate03_64                                     PASS       0
ftruncate04                                        CONF       32
ftruncate04_64                                     CONF       32

truncate02                                         PASS       0
truncate02_64                                      PASS       0
truncate03                                         PASS       0
truncate03_64                                      PASS       0

pread01                                            PASS       0
pread01_64                                         PASS       0
pread02                                            PASS       0
pread02_64                                         PASS       0
pread03                                            PASS       0
pread03_64                                         PASS       0

pwrite01_64                                        PASS       0
pwrite02_64                                        PASS       0
pwrite03_64                                        PASS       0
pwrite04_64                                        PASS       0

readahead01                                        PASS       0
readahead02                                        CONF       32


>
> > +COMPAT_SYSCALL_DEFINE3(ftruncate64, unsigned int, fd, arg_u32p(length))
> > +{
> > +       return ksys_ftruncate(fd, arg_u64(length));
> > +}
> > +
> > +COMPAT_SYSCALL_DEFINE6(fallocate, int, fd, int, mode,
> > +                      arg_u32p(offset), arg_u32p(len))
> > +{
> > +       return ksys_fallocate(fd, mode, arg_u64(offset), arg_u64(len));
> > +}
> > +
> > +COMPAT_SYSCALL_DEFINE5(pread64, unsigned int, fd, char __user *, buf,
> > +                      size_t, count, arg_u32p(pos))
> > +{
> > +       return ksys_pread64(fd, buf, count, arg_u64(pos));
> > +}
> > +
> > +COMPAT_SYSCALL_DEFINE5(pwrite64, unsigned int, fd,
> > +                      const char __user *, buf, size_t, count, arg_u32p(pos))
> > +{
> > +       return ksys_pwrite64(fd, buf, count, arg_u64(pos));
> > +}
> > +
> > +COMPAT_SYSCALL_DEFINE6(sync_file_range, int, fd, arg_u32p(offset),
> > +                      arg_u32p(nbytes), unsigned int, flags)
> > +{
> > +       return ksys_sync_file_range(fd, arg_u64(offset), arg_u64(nbytes),
> > +                                   flags);
> > +}
> > +
> > +COMPAT_SYSCALL_DEFINE4(readahead, int, fd, arg_u32p(offset),
> > +                      size_t, count)
> > +{
> > +       return ksys_readahead(fd, arg_u64(offset), count);
> > +}
> > +
> > +COMPAT_SYSCALL_DEFINE6(fadvise64_64, int, fd, int, advice, arg_u32p(offset),
> > +                      arg_u32p(len))
> > +{
> > +       return ksys_fadvise64_64(fd, arg_u64(offset), arg_u64(len), advice);
> > +}
>
> I still feel like these should be the common implementations next to the
> native handlers inside of an #ifdef CONFIG_COMPAT.
>
> The names clash with the custom versions defined for powerpc and sparc,
> but the duplicates look compatible if you can account for the padded
> argument and the lo/hi order of the pairs, so could just be removed here
> (all other architectures use custom function names instead).
I would try it later.

>
>         Arnd
>
> [1] https://riscv.org/wp-content/uploads/2015/01/riscv-calling.pdf
Arnd Bergmann Jan. 21, 2022, 8:56 a.m. UTC | #3
On Fri, Jan 21, 2022 at 7:25 AM Guo Ren <guoren@kernel.org> wrote:
> On Thu, Jan 20, 2022 at 10:43 PM Arnd Bergmann <arnd@arndb.de> wrote:
> > On Thu, Jan 20, 2022 at 8:39 AM <guoren@kernel.org> wrote:

> > Are you sure these are the right calling conventions? According to [1],
> > I think the 64-bit argument should be in an aligned pair of registers,
> > which means you need an extra pad argument as in the arm64 version
> > of these functions. Same for ftruncate64, pread64, pwrite64, and
> > readahead.
>
> [1] has abandoned.
>
> See:
> https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc

Ok, thanks for the reference, I picked the first one that came up in
a google search and didn't expect this to ever have changed.

> > I still feel like these should be the common implementations next to the
> > native handlers inside of an #ifdef CONFIG_COMPAT.
> >
> > The names clash with the custom versions defined for powerpc and sparc,
> > but the duplicates look compatible if you can account for the padded
> > argument and the lo/hi order of the pairs, so could just be removed here
> > (all other architectures use custom function names instead).
> I would try it later.

This becomes easier then, as powerpc and sparc already have the non-padded
calling conventions, so you could just generalize those without looking at
the other architectures or adding the padding. The powerpc version already
has the dual-endian version, so using that will work on big-endian sparc and
on little-endian riscv as well, though we may need to come up with a better name
for the arg_u32/arg_u64/merge_64 macros in order to put that into a global
header without namespace collisions.

         Arnd
Guo Ren Jan. 21, 2022, 9:22 a.m. UTC | #4
On Fri, Jan 21, 2022 at 4:57 PM Arnd Bergmann <arnd@arndb.de> wrote:
>
> On Fri, Jan 21, 2022 at 7:25 AM Guo Ren <guoren@kernel.org> wrote:
> > On Thu, Jan 20, 2022 at 10:43 PM Arnd Bergmann <arnd@arndb.de> wrote:
> > > On Thu, Jan 20, 2022 at 8:39 AM <guoren@kernel.org> wrote:
>
> > > Are you sure these are the right calling conventions? According to [1],
> > > I think the 64-bit argument should be in an aligned pair of registers,
> > > which means you need an extra pad argument as in the arm64 version
> > > of these functions. Same for ftruncate64, pread64, pwrite64, and
> > > readahead.
> >
> > [1] has abandoned.
> >
> > See:
> > https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc
>
> Ok, thanks for the reference, I picked the first one that came up in
> a google search and didn't expect this to ever have changed.
>
> > > I still feel like these should be the common implementations next to the
> > > native handlers inside of an #ifdef CONFIG_COMPAT.
> > >
> > > The names clash with the custom versions defined for powerpc and sparc,
> > > but the duplicates look compatible if you can account for the padded
> > > argument and the lo/hi order of the pairs, so could just be removed here
> > > (all other architectures use custom function names instead).
> > I would try it later.
>
> This becomes easier then, as powerpc and sparc already have the non-padded
> calling conventions, so you could just generalize those without looking at
> the other architectures or adding the padding. The powerpc version already
> has the dual-endian version, so using that will work on big-endian sparc and
> on little-endian riscv as well, though we may need to come up with a better name
> for the arg_u32/arg_u64/merge_64 macros in order to put that into a global
> header without namespace collisions.
Sounds good, thanks!

>
>          Arnd
diff mbox series

Patch

diff --git a/arch/riscv/include/asm/syscall.h b/arch/riscv/include/asm/syscall.h
index 7ac6a0e275f2..4ff98a22ef24 100644
--- a/arch/riscv/include/asm/syscall.h
+++ b/arch/riscv/include/asm/syscall.h
@@ -16,6 +16,9 @@ 
 
 /* The array of function pointers for syscalls. */
 extern void * const sys_call_table[];
+#ifdef CONFIG_COMPAT
+extern void * const compat_sys_call_table[];
+#endif
 
 /*
  * Only the low 32 bits of orig_r0 are meaningful, so we return int.
diff --git a/arch/riscv/include/uapi/asm/unistd.h b/arch/riscv/include/uapi/asm/unistd.h
index 8062996c2dfd..c9e50eed14aa 100644
--- a/arch/riscv/include/uapi/asm/unistd.h
+++ b/arch/riscv/include/uapi/asm/unistd.h
@@ -15,7 +15,7 @@ 
  * along with this program.  If not, see <https://www.gnu.org/licenses/>.
  */
 
-#ifdef __LP64__
+#if defined(__LP64__) && !defined(__SYSCALL_COMPAT)
 #define __ARCH_WANT_NEW_STAT
 #define __ARCH_WANT_SET_GET_RLIMIT
 #endif /* __LP64__ */
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index 3397ddac1a30..1f2111179615 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -65,3 +65,4 @@  obj-$(CONFIG_CRASH_DUMP)	+= crash_dump.o
 obj-$(CONFIG_JUMP_LABEL)	+= jump_label.o
 
 obj-$(CONFIG_EFI)		+= efi.o
+obj-$(CONFIG_COMPAT)		+= compat_syscall_table.o
diff --git a/arch/riscv/kernel/compat_syscall_table.c b/arch/riscv/kernel/compat_syscall_table.c
new file mode 100644
index 000000000000..53905947678e
--- /dev/null
+++ b/arch/riscv/kernel/compat_syscall_table.c
@@ -0,0 +1,72 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+
+#define __SYSCALL_COMPAT
+
+#include <linux/compat.h>
+#include <linux/syscalls.h>
+#include <asm-generic/mman-common.h>
+#include <asm-generic/syscalls.h>
+#include <asm/syscall.h>
+
+#define arg_u32p(name)  u32, name##_lo, u32, name##_hi
+
+#define arg_u64(name)   (((u64)name##_hi << 32) | \
+			 ((u64)name##_lo & 0xffffffff))
+
+COMPAT_SYSCALL_DEFINE3(truncate64, const char __user *, pathname,
+		       arg_u32p(length))
+{
+	return ksys_truncate(pathname, arg_u64(length));
+}
+
+COMPAT_SYSCALL_DEFINE3(ftruncate64, unsigned int, fd, arg_u32p(length))
+{
+	return ksys_ftruncate(fd, arg_u64(length));
+}
+
+COMPAT_SYSCALL_DEFINE6(fallocate, int, fd, int, mode,
+		       arg_u32p(offset), arg_u32p(len))
+{
+	return ksys_fallocate(fd, mode, arg_u64(offset), arg_u64(len));
+}
+
+COMPAT_SYSCALL_DEFINE5(pread64, unsigned int, fd, char __user *, buf,
+		       size_t, count, arg_u32p(pos))
+{
+	return ksys_pread64(fd, buf, count, arg_u64(pos));
+}
+
+COMPAT_SYSCALL_DEFINE5(pwrite64, unsigned int, fd,
+		       const char __user *, buf, size_t, count, arg_u32p(pos))
+{
+	return ksys_pwrite64(fd, buf, count, arg_u64(pos));
+}
+
+COMPAT_SYSCALL_DEFINE6(sync_file_range, int, fd, arg_u32p(offset),
+		       arg_u32p(nbytes), unsigned int, flags)
+{
+	return ksys_sync_file_range(fd, arg_u64(offset), arg_u64(nbytes),
+				    flags);
+}
+
+COMPAT_SYSCALL_DEFINE4(readahead, int, fd, arg_u32p(offset),
+		       size_t, count)
+{
+	return ksys_readahead(fd, arg_u64(offset), count);
+}
+
+COMPAT_SYSCALL_DEFINE6(fadvise64_64, int, fd, int, advice, arg_u32p(offset),
+		       arg_u32p(len))
+{
+	return ksys_fadvise64_64(fd, arg_u64(offset), arg_u64(len), advice);
+}
+
+#undef __SYSCALL
+#define __SYSCALL(nr, call)      [nr] = (call),
+
+asmlinkage long compat_sys_rt_sigreturn(void);
+
+void * const compat_sys_call_table[__NR_syscalls] = {
+	[0 ... __NR_syscalls - 1] = sys_ni_syscall,
+#include <asm/unistd.h>
+};
diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c
index 12f8a7fce78b..9c0194f176fc 100644
--- a/arch/riscv/kernel/sys_riscv.c
+++ b/arch/riscv/kernel/sys_riscv.c
@@ -33,7 +33,9 @@  SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
 {
 	return riscv_sys_mmap(addr, len, prot, flags, fd, offset, 0);
 }
-#else
+#endif
+
+#if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT)
 SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len,
 	unsigned long, prot, unsigned long, flags,
 	unsigned long, fd, off_t, offset)
@@ -44,7 +46,7 @@  SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len,
 	 */
 	return riscv_sys_mmap(addr, len, prot, flags, fd, offset, 12);
 }
-#endif /* !CONFIG_64BIT */
+#endif
 
 /*
  * Allows the instruction cache to be flushed from userspace.  Despite RISC-V