diff mbox series

[v9,28/31] common-user: Add safe syscall handling for loongarch64 hosts

Message ID 20211214080154.196350-29-git@xen0n.name (mailing list archive)
State New, archived
Headers show
Series LoongArch64 port of QEMU TCG | expand

Commit Message

WANG Xuerui Dec. 14, 2021, 8:01 a.m. UTC
Signed-off-by: WANG Xuerui <git@xen0n.name>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
 .../host/loongarch64/safe-syscall.inc.S       | 81 +++++++++++++++++++
 1 file changed, 81 insertions(+)
 create mode 100644 common-user/host/loongarch64/safe-syscall.inc.S

Comments

Philippe Mathieu-Daudé Dec. 14, 2021, 1:29 p.m. UTC | #1
On 12/14/21 09:01, WANG Xuerui wrote:
> Signed-off-by: WANG Xuerui <git@xen0n.name>
> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  .../host/loongarch64/safe-syscall.inc.S       | 81 +++++++++++++++++++
>  1 file changed, 81 insertions(+)
>  create mode 100644 common-user/host/loongarch64/safe-syscall.inc.S
> 
> diff --git a/common-user/host/loongarch64/safe-syscall.inc.S b/common-user/host/loongarch64/safe-syscall.inc.S
> new file mode 100644
> index 0000000000..61dc6554eb
> --- /dev/null
> +++ b/common-user/host/loongarch64/safe-syscall.inc.S
> @@ -0,0 +1,81 @@
> +/*
> + * safe-syscall.inc.S : host-specific assembly fragment
> + * to handle signals occurring at the same time as system calls.
> + * This is intended to be included by common-user/safe-syscall.S
> + *
> + * Ported to LoongArch by WANG Xuerui <git@xen0n.name>
> + *
> + * Based on safe-syscall.inc.S code for every other architecture,
> + * originally written by Richard Henderson <rth@twiddle.net>
> + * Copyright (C) 2018 Linaro, Inc.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + */
> +
> +        .global safe_syscall_base
> +        .global safe_syscall_start
> +        .global safe_syscall_end
> +        .type   safe_syscall_base, @function
> +        .type   safe_syscall_start, @function
> +        .type   safe_syscall_end, @function
> +
> +        /*
> +         * This is the entry point for making a system call. The calling
> +         * convention here is that of a C varargs function with the
> +         * first argument an 'int *' to the signal_pending flag, the
> +         * second one the system call number (as a 'long'), and all further
> +         * arguments being syscall arguments (also 'long').
> +         */
> +safe_syscall_base:
> +        .cfi_startproc
> +        /*
> +         * The syscall calling convention is nearly the same as C:
> +         * we enter with a0 == &signal_pending
> +         *               a1 == syscall number
> +         *               a2 ... a7 == syscall arguments
> +         *               and return the result in a0
> +         * and the syscall instruction needs
> +         *               a7 == syscall number
> +         *               a0 ... a5 == syscall arguments
> +         *               and returns the result in a0
> +         * Shuffle everything around appropriately.
> +         */
> +        move    $t0, $a0        /* signal_pending pointer */
> +        move    $t1, $a1        /* syscall number */
> +        move    $a0, $a2        /* syscall arguments */
> +        move    $a1, $a3
> +        move    $a2, $a4
> +        move    $a3, $a5
> +        move    $a4, $a6
> +        move    $a5, $a7
> +        move    $a7, $t1
> +
> +        /*
> +         * This next sequence of code works in conjunction with the
> +         * rewind_if_safe_syscall_function(). If a signal is taken
> +         * and the interrupted PC is anywhere between 'safe_syscall_start'
> +         * and 'safe_syscall_end' then we rewind it to 'safe_syscall_start'.
> +         * The code sequence must therefore be able to cope with this, and
> +         * the syscall instruction must be the final one in the sequence.
> +         */
> +safe_syscall_start:
> +        /* If signal_pending is non-zero, don't do the call */
> +        ld.w    $t1, $t0, 0
> +        bnez    $t1, 2f
> +        syscall 0
> +safe_syscall_end:
> +        /* code path for having successfully executed the syscall */
> +        li.w    $t2, -4096
> +        bgtu    $a0, $t2, 0f
> +        jr      $ra
> +
> +        /* code path setting errno */
> +0:      sub.d   $a0, $zero, $a0
> +        b       safe_syscall_set_errno_tail
> +
> +        /* code path when we didn't execute the syscall */
> +2:      li.w    $a0, QEMU_ERESTARTSYS
> +        b       safe_syscall_set_errno_tail
> +        .cfi_endproc
> +        .size   safe_syscall_base, .-safe_syscall_base
> 

Why not rename 0 -> set_errno and 2 -> syscall_not_executed
for readability? (and eventually drop the comments).

Otherwise:
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
WANG Xuerui Dec. 14, 2021, 3:16 p.m. UTC | #2
Hi Philippe,

On 12/14/21 21:29, Philippe Mathieu-Daudé wrote:
> On 12/14/21 09:01, WANG Xuerui wrote:
>> Signed-off-by: WANG Xuerui <git@xen0n.name>
>> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
>> ---
>>   .../host/loongarch64/safe-syscall.inc.S       | 81 +++++++++++++++++++
>>   1 file changed, 81 insertions(+)
>>   create mode 100644 common-user/host/loongarch64/safe-syscall.inc.S
>>
>> diff --git a/common-user/host/loongarch64/safe-syscall.inc.S b/common-user/host/loongarch64/safe-syscall.inc.S
>> new file mode 100644
>> index 0000000000..61dc6554eb
>> --- /dev/null
>> +++ b/common-user/host/loongarch64/safe-syscall.inc.S
>> @@ -0,0 +1,81 @@
>> +/*
>> + * safe-syscall.inc.S : host-specific assembly fragment
>> + * to handle signals occurring at the same time as system calls.
>> + * This is intended to be included by common-user/safe-syscall.S
>> + *
>> + * Ported to LoongArch by WANG Xuerui <git@xen0n.name>
>> + *
>> + * Based on safe-syscall.inc.S code for every other architecture,
>> + * originally written by Richard Henderson <rth@twiddle.net>
>> + * Copyright (C) 2018 Linaro, Inc.
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
>> + * See the COPYING file in the top-level directory.
>> + */
>> +
>> +        .global safe_syscall_base
>> +        .global safe_syscall_start
>> +        .global safe_syscall_end
>> +        .type   safe_syscall_base, @function
>> +        .type   safe_syscall_start, @function
>> +        .type   safe_syscall_end, @function
>> +
>> +        /*
>> +         * This is the entry point for making a system call. The calling
>> +         * convention here is that of a C varargs function with the
>> +         * first argument an 'int *' to the signal_pending flag, the
>> +         * second one the system call number (as a 'long'), and all further
>> +         * arguments being syscall arguments (also 'long').
>> +         */
>> +safe_syscall_base:
>> +        .cfi_startproc
>> +        /*
>> +         * The syscall calling convention is nearly the same as C:
>> +         * we enter with a0 == &signal_pending
>> +         *               a1 == syscall number
>> +         *               a2 ... a7 == syscall arguments
>> +         *               and return the result in a0
>> +         * and the syscall instruction needs
>> +         *               a7 == syscall number
>> +         *               a0 ... a5 == syscall arguments
>> +         *               and returns the result in a0
>> +         * Shuffle everything around appropriately.
>> +         */
>> +        move    $t0, $a0        /* signal_pending pointer */
>> +        move    $t1, $a1        /* syscall number */
>> +        move    $a0, $a2        /* syscall arguments */
>> +        move    $a1, $a3
>> +        move    $a2, $a4
>> +        move    $a3, $a5
>> +        move    $a4, $a6
>> +        move    $a5, $a7
>> +        move    $a7, $t1
>> +
>> +        /*
>> +         * This next sequence of code works in conjunction with the
>> +         * rewind_if_safe_syscall_function(). If a signal is taken
>> +         * and the interrupted PC is anywhere between 'safe_syscall_start'
>> +         * and 'safe_syscall_end' then we rewind it to 'safe_syscall_start'.
>> +         * The code sequence must therefore be able to cope with this, and
>> +         * the syscall instruction must be the final one in the sequence.
>> +         */
>> +safe_syscall_start:
>> +        /* If signal_pending is non-zero, don't do the call */
>> +        ld.w    $t1, $t0, 0
>> +        bnez    $t1, 2f
>> +        syscall 0
>> +safe_syscall_end:
>> +        /* code path for having successfully executed the syscall */
>> +        li.w    $t2, -4096
>> +        bgtu    $a0, $t2, 0f
>> +        jr      $ra
>> +
>> +        /* code path setting errno */
>> +0:      sub.d   $a0, $zero, $a0
>> +        b       safe_syscall_set_errno_tail
>> +
>> +        /* code path when we didn't execute the syscall */
>> +2:      li.w    $a0, QEMU_ERESTARTSYS
>> +        b       safe_syscall_set_errno_tail
>> +        .cfi_endproc
>> +        .size   safe_syscall_base, .-safe_syscall_base
>>
> Why not rename 0 -> set_errno and 2 -> syscall_not_executed
> for readability? (and eventually drop the comments).
This is directly taken from the RISC-V version; aside from that, this is 
similar to all other architectures' adaptation, so maybe a future 
refactor should touch all these other files as well, if we do? I 
personally find the readability to be good, because when you look up 0 
or 2 below you can't miss the comments placed close to the labels.
>
> Otherwise:
> Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Philippe Mathieu-Daudé Dec. 14, 2021, 3:38 p.m. UTC | #3
On 12/14/21 16:16, WANG Xuerui wrote:
> Hi Philippe,
> 
> On 12/14/21 21:29, Philippe Mathieu-Daudé wrote:
>> On 12/14/21 09:01, WANG Xuerui wrote:
>>> Signed-off-by: WANG Xuerui <git@xen0n.name>
>>> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
>>> ---
>>>   .../host/loongarch64/safe-syscall.inc.S       | 81 +++++++++++++++++++
>>>   1 file changed, 81 insertions(+)
>>>   create mode 100644 common-user/host/loongarch64/safe-syscall.inc.S

>>> +safe_syscall_start:
>>> +        /* If signal_pending is non-zero, don't do the call */
>>> +        ld.w    $t1, $t0, 0
>>> +        bnez    $t1, 2f
>>> +        syscall 0
>>> +safe_syscall_end:
>>> +        /* code path for having successfully executed the syscall */
>>> +        li.w    $t2, -4096
>>> +        bgtu    $a0, $t2, 0f
>>> +        jr      $ra
>>> +
>>> +        /* code path setting errno */
>>> +0:      sub.d   $a0, $zero, $a0
>>> +        b       safe_syscall_set_errno_tail
>>> +
>>> +        /* code path when we didn't execute the syscall */
>>> +2:      li.w    $a0, QEMU_ERESTARTSYS
>>> +        b       safe_syscall_set_errno_tail
>>> +        .cfi_endproc
>>> +        .size   safe_syscall_base, .-safe_syscall_base
>>>
>> Why not rename 0 -> set_errno and 2 -> syscall_not_executed
>> for readability? (and eventually drop the comments).
> This is directly taken from the RISC-V version; aside from that, this is
> similar to all other architectures' adaptation, so maybe a future
> refactor should touch all these other files as well, if we do? I
> personally find the readability to be good, because when you look up 0
> or 2 below you can't miss the comments placed close to the labels.

I just noticed that in Richard's "linux-user: simplify safe signal
handling​" series and was going to update here, so we are good :)

>>
>> Otherwise:
>> Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
>
Richard Henderson Dec. 14, 2021, 7:29 p.m. UTC | #4
On 12/14/21 12:01 AM, WANG Xuerui wrote:
> +        move    $t0, $a0        /* signal_pending pointer */
...
> +safe_syscall_start:
> +        /* If signal_pending is non-zero, don't do the call */
> +        ld.w    $t1, $t0, 0
> +        bnez    $t1, 2f
> +        syscall 0

We need a non-syscall clobbered register for signal_pending, per the bug fixed in 
5d9f3ea0817215ad4baac5aa30414e9ebbaaf0d6.

In the case of riscv, because of the way exceptions are delivered, there are no 
syscall-clobbered registers (by the time syscall is distinguished from interrupt, all 
registers have been saved).

In the case of mips, there are no non-syscall-clobbered registers that are not also 
call-saved or syscall arguments, so I had to allocate a stack frame and save/restore s0.

For loongarch64, according to glibc,

#define __SYSCALL_CLOBBERS \
   "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t8", "memory"

which does suggest that a6 is unused, saved across the syscall, and also call-clobbered 
(so we don't have to allocate a stack frame).

I've had a browse through the loongarch kernel code and that seems to be all true. 
(Curiously, loongarch restores more registers than it saves on the way out of 
handle_syscall.  There may be a subtle reason for that, or room for improvement.)


r~
Peter Maydell Dec. 14, 2021, 8:49 p.m. UTC | #5
On Tue, 14 Dec 2021 at 19:29, Richard Henderson
<richard.henderson@linaro.org> wrote:
> For loongarch64, according to glibc,
>
> #define __SYSCALL_CLOBBERS \
>    "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t8", "memory"
>
> which does suggest that a6 is unused, saved across the syscall, and also call-clobbered
> (so we don't have to allocate a stack frame).
>
> I've had a browse through the loongarch kernel code and that seems to be all true.
> (Curiously, loongarch restores more registers than it saves on the way out of
> handle_syscall.  There may be a subtle reason for that, or room for improvement.)

Sadly most of the kernel architectures don't document the "which registers
are clobbered" part of their ABI. It would be helpful if they did. (I did
nudge a local arm kernel dev to have a look at doing that for arm...)

-- PMM
WANG Xuerui Dec. 15, 2021, 12:57 p.m. UTC | #6
Hi Richard,

On 2021/12/15 03:29, Richard Henderson wrote:
> On 12/14/21 12:01 AM, WANG Xuerui wrote:
>> +        move    $t0, $a0        /* signal_pending pointer */
> ...
>> +safe_syscall_start:
>> +        /* If signal_pending is non-zero, don't do the call */
>> +        ld.w    $t1, $t0, 0
>> +        bnez    $t1, 2f
>> +        syscall 0
>
> We need a non-syscall clobbered register for signal_pending, per the
> bug fixed in 5d9f3ea0817215ad4baac5aa30414e9ebbaaf0d6.
>
> In the case of riscv, because of the way exceptions are delivered,
> there are no syscall-clobbered registers (by the time syscall is
> distinguished from interrupt, all registers have been saved).
>
> In the case of mips, there are no non-syscall-clobbered registers that
> are not also call-saved or syscall arguments, so I had to allocate a
> stack frame and save/restore s0.
>
> For loongarch64, according to glibc,
>
> #define __SYSCALL_CLOBBERS \
>   "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t8", "memory"
>
> which does suggest that a6 is unused, saved across the syscall, and
> also call-clobbered (so we don't have to allocate a stack frame).
>
> I've had a browse through the loongarch kernel code and that seems to
> be all true. (Curiously, loongarch restores more registers than it
> saves on the way out of handle_syscall.  There may be a subtle reason
> for that, or room for improvement.)

Of course I completely forgot the fact that LoongArch looks more like
MIPS than RISC-V in kernel land (facepalm)

I've checked the LoongArch kernel sources too and yeah using a6 is ideal
and unlikely to break in the future (we're not allowing any more
7-argument syscalls into the kernel after all). I've just sent v10 with
some other minor changes.

>
>
> r~
diff mbox series

Patch

diff --git a/common-user/host/loongarch64/safe-syscall.inc.S b/common-user/host/loongarch64/safe-syscall.inc.S
new file mode 100644
index 0000000000..61dc6554eb
--- /dev/null
+++ b/common-user/host/loongarch64/safe-syscall.inc.S
@@ -0,0 +1,81 @@ 
+/*
+ * safe-syscall.inc.S : host-specific assembly fragment
+ * to handle signals occurring at the same time as system calls.
+ * This is intended to be included by common-user/safe-syscall.S
+ *
+ * Ported to LoongArch by WANG Xuerui <git@xen0n.name>
+ *
+ * Based on safe-syscall.inc.S code for every other architecture,
+ * originally written by Richard Henderson <rth@twiddle.net>
+ * Copyright (C) 2018 Linaro, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+        .global safe_syscall_base
+        .global safe_syscall_start
+        .global safe_syscall_end
+        .type   safe_syscall_base, @function
+        .type   safe_syscall_start, @function
+        .type   safe_syscall_end, @function
+
+        /*
+         * This is the entry point for making a system call. The calling
+         * convention here is that of a C varargs function with the
+         * first argument an 'int *' to the signal_pending flag, the
+         * second one the system call number (as a 'long'), and all further
+         * arguments being syscall arguments (also 'long').
+         */
+safe_syscall_base:
+        .cfi_startproc
+        /*
+         * The syscall calling convention is nearly the same as C:
+         * we enter with a0 == &signal_pending
+         *               a1 == syscall number
+         *               a2 ... a7 == syscall arguments
+         *               and return the result in a0
+         * and the syscall instruction needs
+         *               a7 == syscall number
+         *               a0 ... a5 == syscall arguments
+         *               and returns the result in a0
+         * Shuffle everything around appropriately.
+         */
+        move    $t0, $a0        /* signal_pending pointer */
+        move    $t1, $a1        /* syscall number */
+        move    $a0, $a2        /* syscall arguments */
+        move    $a1, $a3
+        move    $a2, $a4
+        move    $a3, $a5
+        move    $a4, $a6
+        move    $a5, $a7
+        move    $a7, $t1
+
+        /*
+         * This next sequence of code works in conjunction with the
+         * rewind_if_safe_syscall_function(). If a signal is taken
+         * and the interrupted PC is anywhere between 'safe_syscall_start'
+         * and 'safe_syscall_end' then we rewind it to 'safe_syscall_start'.
+         * The code sequence must therefore be able to cope with this, and
+         * the syscall instruction must be the final one in the sequence.
+         */
+safe_syscall_start:
+        /* If signal_pending is non-zero, don't do the call */
+        ld.w    $t1, $t0, 0
+        bnez    $t1, 2f
+        syscall 0
+safe_syscall_end:
+        /* code path for having successfully executed the syscall */
+        li.w    $t2, -4096
+        bgtu    $a0, $t2, 0f
+        jr      $ra
+
+        /* code path setting errno */
+0:      sub.d   $a0, $zero, $a0
+        b       safe_syscall_set_errno_tail
+
+        /* code path when we didn't execute the syscall */
+2:      li.w    $a0, QEMU_ERESTARTSYS
+        b       safe_syscall_set_errno_tail
+        .cfi_endproc
+        .size   safe_syscall_base, .-safe_syscall_base