diff mbox

[4/6] linux-user: Provide safe_syscall for aarch64

Message ID 1465854326-19160-5-git-send-email-rth@twiddle.net (mailing list archive)
State New, archived
Headers show

Commit Message

Richard Henderson June 13, 2016, 9:45 p.m. UTC
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 linux-user/host/aarch64/hostdep.h          | 34 ++++++++++++++
 linux-user/host/aarch64/safe-syscall.inc.S | 72 ++++++++++++++++++++++++++++++
 2 files changed, 106 insertions(+)
 create mode 100644 linux-user/host/aarch64/hostdep.h
 create mode 100644 linux-user/host/aarch64/safe-syscall.inc.S

Comments

Peter Maydell June 13, 2016, 10:04 p.m. UTC | #1
On 13 June 2016 at 22:45, Richard Henderson <rth@twiddle.net> wrote:
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  linux-user/host/aarch64/hostdep.h          | 34 ++++++++++++++
>  linux-user/host/aarch64/safe-syscall.inc.S | 72 ++++++++++++++++++++++++++++++
>  2 files changed, 106 insertions(+)
>  create mode 100644 linux-user/host/aarch64/hostdep.h
>  create mode 100644 linux-user/host/aarch64/safe-syscall.inc.S
>
> diff --git a/linux-user/host/aarch64/hostdep.h b/linux-user/host/aarch64/hostdep.h
> new file mode 100644
> index 0000000..0ff7985
> --- /dev/null
> +++ b/linux-user/host/aarch64/hostdep.h
> @@ -0,0 +1,34 @@
> +/*
> + * hostdep.h : things which are dependent on the host architecture
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + */
> +
> +#ifndef QEMU_HOSTDEP_H
> +#define QEMU_HOSTDEP_H
> +
> +/* We have a safe-syscall.inc.S */
> +#define HAVE_SAFE_SYSCALL
> +
> +#ifndef __ASSEMBLER__
> +
> +/* These are defined by the safe-syscall.inc.S file */
> +extern char safe_syscall_start[];
> +extern char safe_syscall_end[];
> +
> +/* Adjust the signal context to rewind out of safe-syscall if we're in it */
> +static inline void rewind_if_in_safe_syscall(void *puc)
> +{
> +    struct ucontext *uc = puc;
> +    __u64 *pcreg = &uc->uc_mcontext.pc;
> +
> +    if (*pcreg > (uintptr_t)safe_syscall_start
> +        && *pcreg < (uintptr_t)safe_syscall_end) {
> +        *pcreg = (uintptr_t)safe_syscall_start;
> +    }
> +}
> +
> +#endif /* __ASSEMBLER__ */
> +
> +#endif
> diff --git a/linux-user/host/aarch64/safe-syscall.inc.S b/linux-user/host/aarch64/safe-syscall.inc.S
> new file mode 100644
> index 0000000..5416b90
> --- /dev/null
> +++ b/linux-user/host/aarch64/safe-syscall.inc.S
> @@ -0,0 +1,72 @@
> +/*
> + * safe-syscall.inc.S : host-specific assembly fragment
> + * to handle signals occurring at the same time as system calls.
> + * This is intended to be included by linux-user/safe-syscall.S
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + */
> +
> +        .global safe_syscall_base
> +        .global safe_syscall_start
> +        .global safe_syscall_end
> +        .type   safe_syscall_base, #function
> +        .type   safe_syscall_start, #function
> +        .type   safe_syscall_end, #function

_start and _end aren't function entry points, so is it OK
to mark them as functions?

(The 'as' manual doesn't document what setting .type does in much
detail...)

> +
> +        /* This is the entry point for making a system call. The calling
> +         * convention here is that of a C varargs function with the
> +         * first argument an 'int *' to the signal_pending flag, the
> +         * second one the system call number (as a 'long'), and all further
> +         * arguments being syscall arguments (also 'long').
> +         * We return a long which is the syscall's return value, which
> +         * may be negative-errno on failure. Conversion to the
> +         * -1-and-errno-set convention is done by the calling wrapper.
> +         */
> +safe_syscall_base:
> +        .cfi_startproc
> +        /* The syscall calling convention isn't the same as the
> +         * C one:
> +         * we enter with x0 == *signal_pending
> +         *               x1 == syscall number
> +         *               x2 ... x7, (stack) == syscall arguments
> +         *               and return the result in x0
> +         * and the syscall instruction needs
> +         *               x8 == syscall number
> +         *               x0 ... x6 == syscall arguments
> +         *               and returns the result in x0
> +         * Shuffle everything around appropriately.
> +         */

http://man7.org/linux/man-pages/man2/syscall.2.html
doesn't mention a syscall argument in x7, just x0..x5.

> +       mov     x9, x0          /* signal_pending pointer */
> +       mov     w8, w1          /* syscall number */

Seems a bit odd to use a 32-bit move for this when our input
calling convention has it as 64 bits and the kernel's calling
convention has it as 64 bits.

> +       mov     x0, x2          /* syscall arguments */
> +       mov     x1, x3
> +       mov     x2, x4
> +       mov     x3, x5
> +       mov     x4, x6
> +       mov     x6, x7
> +       ldr     x7, [sp]
> +
> +        /* This next sequence of code works in conjunction with the
> +         * rewind_if_safe_syscall_function(). If a signal is taken
> +         * and the interrupted PC is anywhere between 'safe_syscall_start'
> +         * and 'safe_syscall_end' then we rewind it to 'safe_syscall_start'.
> +         * The code sequence must therefore be able to cope with this, and
> +         * the syscall instruction must be the final one in the sequence.
> +         */
> +safe_syscall_start:
> +        /* if signal_pending is non-zero, don't do the call */
> +       ldr     w10, [x9]
> +       cbnz    w10, 0f
> +        svc    0x0
> +safe_syscall_end:
> +        /* code path for having successfully executed the syscall */
> +        ret
> +
> +0:
> +        /* code path when we didn't execute the syscall */
> +        mov     x0, #-TARGET_ERESTARTSYS
> +        ret
> +        .cfi_endproc
> +
> +        .size   safe_syscall_base, .-safe_syscall_base
> --
> 2.5.5
>

thanks
-- PMM
Richard Henderson June 13, 2016, 10:21 p.m. UTC | #2
On 06/13/2016 03:04 PM, Peter Maydell wrote:
>> +        .global safe_syscall_base
>> +        .global safe_syscall_start
>> +        .global safe_syscall_end
>> +        .type   safe_syscall_base, #function
>> +        .type   safe_syscall_start, #function
>> +        .type   safe_syscall_end, #function
>
> _start and _end aren't function entry points, so is it OK
> to mark them as functions?

Yes.  Indeed, if you don't, the objdump will think these are data symbols and 
fail to disassemble the instructions that follow.


> (The 'as' manual doesn't document what setting .type does in much
> detail...)

It sets the Elf_Sym.st_type field.
But what that implies is very host specific.

> http://man7.org/linux/man-pages/man2/syscall.2.html
> doesn't mention a syscall argument in x7, just x0..x5.

I took glibc as definitive, since that's code that definitely works.

https://sourceware.org/git/?p=glibc.git;a=blob;f=sysdeps/unix/sysv/linux/aarch64/syscall.S;h=98c1b42ee96fe0fb39955849773ca16d095803e0;hb=HEAD

I'll also point you at the kernel's __sys_trace, which does in fact save and 
restore all 8 registers around the tracing call-outs.

>> +       mov     x9, x0          /* signal_pending pointer */
>> +       mov     w8, w1          /* syscall number */
>
> Seems a bit odd to use a 32-bit move for this when our input
> calling convention has it as 64 bits and the kernel's calling
> convention has it as 64 bits.

I got that from glibc too.



r~
Peter Maydell June 13, 2016, 10:28 p.m. UTC | #3
On 13 June 2016 at 23:21, Richard Henderson <rth@twiddle.net> wrote:
> On 06/13/2016 03:04 PM, Peter Maydell wrote:
>>>
>>> +        .global safe_syscall_base
>>> +        .global safe_syscall_start
>>> +        .global safe_syscall_end
>>> +        .type   safe_syscall_base, #function
>>> +        .type   safe_syscall_start, #function
>>> +        .type   safe_syscall_end, #function
>>
>>
>> _start and _end aren't function entry points, so is it OK
>> to mark them as functions?
>
>
> Yes.  Indeed, if you don't, the objdump will think these are data symbols
> and fail to disassemble the instructions that follow.

I was told this was an objdump bug... it certainly
seems like an objdump bug to me, because this works
on the other architectures.
https://bugs.linaro.org/show_bug.cgi?id=815

>> (The 'as' manual doesn't document what setting .type does in much
>> detail...)
>
>
> It sets the Elf_Sym.st_type field.
> But what that implies is very host specific.
>
>> http://man7.org/linux/man-pages/man2/syscall.2.html
>> doesn't mention a syscall argument in x7, just x0..x5.
>
> I took glibc as definitive, since that's code that definitely works.

If we never use the 7th syscall argument (and we can't,
since it doesn't exist on some hosts), then it's
unnecessary work, though I guess coming back to fix
all these host functions later might be tedious.

> https://sourceware.org/git/?p=glibc.git;a=blob;f=sysdeps/unix/sysv/linux/aarch64/syscall.S;h=98c1b42ee96fe0fb39955849773ca16d095803e0;hb=HEAD
>
> I'll also point you at the kernel's __sys_trace, which does in fact save and
> restore all 8 registers around the tracing call-outs.
>
>>> +       mov     x9, x0          /* signal_pending pointer */
>>> +       mov     w8, w1          /* syscall number */
>>
>>
>> Seems a bit odd to use a 32-bit move for this when our input
>> calling convention has it as 64 bits and the kernel's calling
>> convention has it as 64 bits.
>
>
> I got that from glibc too.

glibc's syscall() takes the system parameter as an int and
does a sign-extending move into x0 with an uxtw.
safe_syscall() takes a long, so it's already 64 bits.

thanks
-- PMM
Peter Maydell June 13, 2016, 10:31 p.m. UTC | #4
On 13 June 2016 at 23:28, Peter Maydell <peter.maydell@linaro.org> wrote:
> glibc's syscall() takes the system parameter as an int and
> does a sign-extending move into x0 with an uxtw.

...zero-extending...

-- PMM
Richard Henderson June 13, 2016, 10:38 p.m. UTC | #5
On 06/13/2016 03:28 PM, Peter Maydell wrote:
> On 13 June 2016 at 23:21, Richard Henderson <rth@twiddle.net> wrote:
>> On 06/13/2016 03:04 PM, Peter Maydell wrote:
>>>>
>>>> +        .global safe_syscall_base
>>>> +        .global safe_syscall_start
>>>> +        .global safe_syscall_end
>>>> +        .type   safe_syscall_base, #function
>>>> +        .type   safe_syscall_start, #function
>>>> +        .type   safe_syscall_end, #function
>>>
>>>
>>> _start and _end aren't function entry points, so is it OK
>>> to mark them as functions?
>>
>>
>> Yes.  Indeed, if you don't, the objdump will think these are data symbols
>> and fail to disassemble the instructions that follow.
>
> I was told this was an objdump bug... it certainly
> seems like an objdump bug to me, because this works
> on the other architectures.
> https://bugs.linaro.org/show_bug.cgi?id=815

This may be a hold-over from arm32, which as you recall has a rather 
complicated scheme for annotating data vs instructions.

>> I took glibc as definitive, since that's code that definitely works.
>
> If we never use the 7th syscall argument (and we can't,
> since it doesn't exist on some hosts), then it's
> unnecessary work, though I guess coming back to fix
> all these host functions later might be tedious.

You're probably right about the 7th syscall argument on a 64-bit host.

> glibc's syscall() takes the system parameter as an int and
> does a sign-extending move into x0 with an uxtw.
> safe_syscall() takes a long, so it's already 64 bits.

Well, uxtw is a zero-extending move.  So...


r~
Peter Maydell June 13, 2016, 10:40 p.m. UTC | #6
On 13 June 2016 at 23:38, Richard Henderson <rth@twiddle.net> wrote:
> On 06/13/2016 03:28 PM, Peter Maydell wrote:
>> glibc's syscall() takes the system parameter as an int and
>> does a sign-extending move into x0 with an uxtw.
>> safe_syscall() takes a long, so it's already 64 bits.
>
>
> Well, uxtw is a zero-extending move.  So...

Yeah 'sign-extending' was a thinko. But the point is
that for syscall() the input is 32 bits and the
value it feeds to the kernel is 64 bits, hence the
extension. For safe_syscall() the input is 64 bits
and the value fed to the kernel is also 64 bits,
so the most 'natural' thing is just to move a
64 bit value (saves the reader looking up whether
mov wX, wY clears the high half or not, if nothing else).

thanks
-- PMM
diff mbox

Patch

diff --git a/linux-user/host/aarch64/hostdep.h b/linux-user/host/aarch64/hostdep.h
new file mode 100644
index 0000000..0ff7985
--- /dev/null
+++ b/linux-user/host/aarch64/hostdep.h
@@ -0,0 +1,34 @@ 
+/*
+ * hostdep.h : things which are dependent on the host architecture
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_HOSTDEP_H
+#define QEMU_HOSTDEP_H
+
+/* We have a safe-syscall.inc.S */
+#define HAVE_SAFE_SYSCALL
+
+#ifndef __ASSEMBLER__
+
+/* These are defined by the safe-syscall.inc.S file */
+extern char safe_syscall_start[];
+extern char safe_syscall_end[];
+
+/* Adjust the signal context to rewind out of safe-syscall if we're in it */
+static inline void rewind_if_in_safe_syscall(void *puc)
+{
+    struct ucontext *uc = puc;
+    __u64 *pcreg = &uc->uc_mcontext.pc;
+
+    if (*pcreg > (uintptr_t)safe_syscall_start
+        && *pcreg < (uintptr_t)safe_syscall_end) {
+        *pcreg = (uintptr_t)safe_syscall_start;
+    }
+}
+
+#endif /* __ASSEMBLER__ */
+
+#endif
diff --git a/linux-user/host/aarch64/safe-syscall.inc.S b/linux-user/host/aarch64/safe-syscall.inc.S
new file mode 100644
index 0000000..5416b90
--- /dev/null
+++ b/linux-user/host/aarch64/safe-syscall.inc.S
@@ -0,0 +1,72 @@ 
+/*
+ * safe-syscall.inc.S : host-specific assembly fragment
+ * to handle signals occurring at the same time as system calls.
+ * This is intended to be included by linux-user/safe-syscall.S
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+        .global safe_syscall_base
+        .global safe_syscall_start
+        .global safe_syscall_end
+        .type   safe_syscall_base, #function
+        .type   safe_syscall_start, #function
+        .type   safe_syscall_end, #function
+
+        /* This is the entry point for making a system call. The calling
+         * convention here is that of a C varargs function with the
+         * first argument an 'int *' to the signal_pending flag, the
+         * second one the system call number (as a 'long'), and all further
+         * arguments being syscall arguments (also 'long').
+         * We return a long which is the syscall's return value, which
+         * may be negative-errno on failure. Conversion to the
+         * -1-and-errno-set convention is done by the calling wrapper.
+         */
+safe_syscall_base:
+        .cfi_startproc
+        /* The syscall calling convention isn't the same as the
+         * C one:
+         * we enter with x0 == *signal_pending
+         *               x1 == syscall number
+         *               x2 ... x7, (stack) == syscall arguments
+         *               and return the result in x0
+         * and the syscall instruction needs
+         *               x8 == syscall number
+         *               x0 ... x6 == syscall arguments
+         *               and returns the result in x0
+         * Shuffle everything around appropriately.
+         */
+	mov	x9, x0          /* signal_pending pointer */
+	mov	w8, w1          /* syscall number */
+	mov	x0, x2          /* syscall arguments */
+	mov	x1, x3
+	mov	x2, x4
+	mov	x3, x5
+	mov	x4, x6
+	mov	x6, x7
+	ldr	x7, [sp]
+
+        /* This next sequence of code works in conjunction with the
+         * rewind_if_safe_syscall_function(). If a signal is taken
+         * and the interrupted PC is anywhere between 'safe_syscall_start'
+         * and 'safe_syscall_end' then we rewind it to 'safe_syscall_start'.
+         * The code sequence must therefore be able to cope with this, and
+         * the syscall instruction must be the final one in the sequence.
+         */
+safe_syscall_start:
+        /* if signal_pending is non-zero, don't do the call */
+	ldr	w10, [x9]
+	cbnz	w10, 0f 
+        svc	0x0
+safe_syscall_end:
+        /* code path for having successfully executed the syscall */
+        ret
+
+0:
+        /* code path when we didn't execute the syscall */
+        mov     x0, #-TARGET_ERESTARTSYS
+        ret
+        .cfi_endproc
+
+        .size   safe_syscall_base, .-safe_syscall_base