diff mbox

[2/6] linux-user: Provide safe_syscall for i386

Message ID 1465854326-19160-3-git-send-email-rth@twiddle.net (mailing list archive)
State New, archived
Headers show

Commit Message

Richard Henderson June 13, 2016, 9:45 p.m. UTC
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 linux-user/host/i386/hostdep.h          |  34 ++++++++++
 linux-user/host/i386/safe-syscall.inc.S | 110 ++++++++++++++++++++++++++++++++
 2 files changed, 144 insertions(+)
 create mode 100644 linux-user/host/i386/hostdep.h
 create mode 100644 linux-user/host/i386/safe-syscall.inc.S

Comments

Peter Maydell June 14, 2016, 11:58 a.m. UTC | #1
On 13 June 2016 at 22:45, Richard Henderson <rth@twiddle.net> wrote:
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  linux-user/host/i386/hostdep.h          |  34 ++++++++++
>  linux-user/host/i386/safe-syscall.inc.S | 110 ++++++++++++++++++++++++++++++++
>  2 files changed, 144 insertions(+)
>  create mode 100644 linux-user/host/i386/hostdep.h
>  create mode 100644 linux-user/host/i386/safe-syscall.inc.S
>
> diff --git a/linux-user/host/i386/hostdep.h b/linux-user/host/i386/hostdep.h
> new file mode 100644
> index 0000000..9e2b4d7
> --- /dev/null
> +++ b/linux-user/host/i386/hostdep.h
> @@ -0,0 +1,34 @@
> +/*
> + * hostdep.h : things which are dependent on the host architecture
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + */
> +
> +#ifndef QEMU_HOSTDEP_H
> +#define QEMU_HOSTDEP_H
> +
> +/* We have a safe-syscall.inc.S */
> +#define HAVE_SAFE_SYSCALL
> +
> +#ifndef __ASSEMBLER__
> +
> +/* These are defined by the safe-syscall.inc.S file */
> +extern char safe_syscall_start[];
> +extern char safe_syscall_end[];
> +
> +/* Adjust the signal context to rewind out of safe-syscall if we're in it */
> +static inline void rewind_if_in_safe_syscall(void *puc)
> +{
> +    struct ucontext *uc = puc;
> +    greg_t *pcreg = &uc->uc_mcontext.gregs[REG_EIP];

user-exec.c has
#ifndef REG_EIP
/* for glibc 2.1 */
#define REG_EIP    EIP
#endif

Do we still care about glibc 2.1 ? (Probably not, 2.2 was
released fifteen years ago now...)

> +
> +    if (*pcreg > (uintptr_t)safe_syscall_start
> +        && *pcreg < (uintptr_t)safe_syscall_end) {
> +        *pcreg = (uintptr_t)safe_syscall_start;
> +    }
> +}
> +
> +#endif /* __ASSEMBLER__ */
> +
> +#endif
> diff --git a/linux-user/host/i386/safe-syscall.inc.S b/linux-user/host/i386/safe-syscall.inc.S
> new file mode 100644
> index 0000000..f5f0c64
> --- /dev/null
> +++ b/linux-user/host/i386/safe-syscall.inc.S
> @@ -0,0 +1,110 @@
> +/*
> + * safe-syscall.inc.S : host-specific assembly fragment
> + * to handle signals occurring at the same time as system calls.
> + * This is intended to be included by linux-user/safe-syscall.S

Missing copyright/written by attribution ?

> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + */
> +
> +        .global safe_syscall_base
> +        .global safe_syscall_start
> +        .global safe_syscall_end
> +        .type   safe_syscall_base, @function

I guess 4-space indent would match the rest of QEMU...

> +
> +        /* This is the entry point for making a system call. The calling
> +         * convention here is that of a C varargs function with the
> +         * first argument an 'int *' to the signal_pending flag, the
> +         * second one the system call number (as a 'long'), and all further
> +         * arguments being syscall arguments (also 'long').
> +         * We return a long which is the syscall's return value, which
> +         * may be negative-errno on failure. Conversion to the
> +         * -1-and-errno-set convention is done by the calling wrapper.
> +         */
> +safe_syscall_base:
> +        .cfi_startproc
> +        push    %ebp
> +        .cfi_adjust_cfa_offset 4
> +        .cfi_rel_offset ebp, 0
> +       push    %esi
> +        .cfi_adjust_cfa_offset 4
> +        .cfi_rel_offset esi, 0
> +       push    %edi

Odd indentation here.

> +        .cfi_adjust_cfa_offset 4
> +        .cfi_rel_offset edi, 0
> +       push    %ebx
> +        .cfi_adjust_cfa_offset 4
> +        .cfi_rel_offset ebx, 0
> +
> +        /* The syscall calling convention isn't the same as the C one:
> +         * we enter with 0(%esp) == return address
> +         *               4(%esp) == *signal_pending
> +         *               8(%esp) == syscall number
> +         *               12(%esp) ... 32(%esp) == syscall arguments
> +         *               and return the result in eax
> +         * and the syscall instruction needs
> +         *               eax == syscall number
> +         *               ebx, ecx, edx, esi, edi, ebp == syscall arguments
> +         *               and returns the result in eax
> +         * Shuffle everything around appropriately.
> +        * Note the 16 bytes that we pushed to save registers.
> +         */
> +        mov     12+16(%esp), %ebx       /* the syscall arguments */
> +        mov     16+16(%esp), %ecx
> +        mov     20+16(%esp), %edx
> +        mov     24+16(%esp), %esi
> +        mov     28+16(%esp), %edi
> +        mov     32+16(%esp), %ebp
> +
> +        /* This next sequence of code works in conjunction with the
> +         * rewind_if_safe_syscall_function(). If a signal is taken
> +         * and the interrupted PC is anywhere between 'safe_syscall_start'
> +         * and 'safe_syscall_end' then we rewind it to 'safe_syscall_start'.
> +         * The code sequence must therefore be able to cope with this, and
> +         * the syscall instruction must be the final one in the sequence.
> +         */
> +safe_syscall_start:
> +        /* if signal_pending is non-zero, don't do the call */
> +       mov     4+16(%esp), %eax        /* signal_pending */
> +        cmp    $0, (%eax)
> +        mov     8+16(%esp), %eax        /* syscall number */
> +        jnz     1f

Any particular reason for doing the jump after the mov?

> +        int    $0x80
> +safe_syscall_end:
> +        /* code path for having successfully executed the syscall */
> +        pop     %ebx
> +        .cfi_remember_state
> +        .cfi_def_cfa_offset 4

Shouldn't these all be ".cfi_adjust_cfa_offset -4" ? That's what glibc
uses AFAICT.

> +        .cfi_restore ebx
> +       pop     %edi
> +        .cfi_def_cfa_offset 4
> +        .cfi_restore edi
> +       pop     %esi
> +        .cfi_def_cfa_offset 4
> +        .cfi_restore esi
> +       pop     %ebp
> +        .cfi_def_cfa_offset 4
> +        .cfi_restore ebp
> +        ret
> +
> +1:
> +        /* code path when we didn't execute the syscall */
> +        .cfi_restore_state
> +        mov     $-TARGET_ERESTARTSYS, %eax
> +        pop     %ebx
> +        .cfi_remember_state

We don't need to remember state here I think.

> +        .cfi_def_cfa_offset 4
> +        .cfi_restore ebx
> +       pop     %edi
> +        .cfi_def_cfa_offset 4
> +        .cfi_restore edi
> +       pop     %esi
> +        .cfi_def_cfa_offset 4
> +        .cfi_restore esi
> +       pop     %ebp
> +        .cfi_def_cfa_offset 4
> +        .cfi_restore ebp
> +        ret
> +        .cfi_endproc
> +
> +        .size   safe_syscall_base, .-safe_syscall_base
> --
> 2.5.5

Other than some trivialities like order of register push/pops
this is virtually identical code to the version I had, so it
must be right :-)

thanks
-- PMM
Richard Henderson June 14, 2016, 3:47 p.m. UTC | #2
On 06/14/2016 04:58 AM, Peter Maydell wrote:
>> +    greg_t *pcreg = &uc->uc_mcontext.gregs[REG_EIP];
> 
> user-exec.c has
> #ifndef REG_EIP
> /* for glibc 2.1 */
> #define REG_EIP    EIP
> #endif
> 
> Do we still care about glibc 2.1 ? (Probably not, 2.2 was
> released fifteen years ago now...)
> 

Heh.  I would say not.  We've got other much more recent requirements.

>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
>> + * See the COPYING file in the top-level directory.
>> + */
>> +
>> +        .global safe_syscall_base
>> +        .global safe_syscall_start
>> +        .global safe_syscall_end
>> +        .type   safe_syscall_base, @function
> 
> I guess 4-space indent would match the rest of QEMU...

This is assembler not C.  My brain is tied to a 1-tab indent.

>> +        .cfi_rel_offset esi, 0
>> +       push    %edi
> 
> Odd indentation here.

Yeah, that's mixing code from your x86_64 version which uses spaces not tabs.

>> +        cmp    $0, (%eax)
>> +        mov     8+16(%esp), %eax        /* syscall number */
>> +        jnz     1f
> 
> Any particular reason for doing the jump after the mov?

No.  Indeed, recent cpus will fuse the cmp+jnz so they're better off together.

>> +        .cfi_def_cfa_offset 4
> 
> Shouldn't these all be ".cfi_adjust_cfa_offset -4" ? That's what glibc
> uses AFAICT.

Typo.  Good catch.

>> +        .cfi_remember_state
> 
> We don't need to remember state here I think.

Correct.  Cut and paste.

> Other than some trivialities like order of register push/pops
> this is virtually identical code to the version I had, so it
> must be right :-)

Heh.


r~
diff mbox

Patch

diff --git a/linux-user/host/i386/hostdep.h b/linux-user/host/i386/hostdep.h
new file mode 100644
index 0000000..9e2b4d7
--- /dev/null
+++ b/linux-user/host/i386/hostdep.h
@@ -0,0 +1,34 @@ 
+/*
+ * hostdep.h : things which are dependent on the host architecture
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_HOSTDEP_H
+#define QEMU_HOSTDEP_H
+
+/* We have a safe-syscall.inc.S */
+#define HAVE_SAFE_SYSCALL
+
+#ifndef __ASSEMBLER__
+
+/* These are defined by the safe-syscall.inc.S file */
+extern char safe_syscall_start[];
+extern char safe_syscall_end[];
+
+/* Adjust the signal context to rewind out of safe-syscall if we're in it */
+static inline void rewind_if_in_safe_syscall(void *puc)
+{
+    struct ucontext *uc = puc;
+    greg_t *pcreg = &uc->uc_mcontext.gregs[REG_EIP];
+
+    if (*pcreg > (uintptr_t)safe_syscall_start
+        && *pcreg < (uintptr_t)safe_syscall_end) {
+        *pcreg = (uintptr_t)safe_syscall_start;
+    }
+}
+
+#endif /* __ASSEMBLER__ */
+
+#endif
diff --git a/linux-user/host/i386/safe-syscall.inc.S b/linux-user/host/i386/safe-syscall.inc.S
new file mode 100644
index 0000000..f5f0c64
--- /dev/null
+++ b/linux-user/host/i386/safe-syscall.inc.S
@@ -0,0 +1,110 @@ 
+/*
+ * safe-syscall.inc.S : host-specific assembly fragment
+ * to handle signals occurring at the same time as system calls.
+ * This is intended to be included by linux-user/safe-syscall.S
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+        .global safe_syscall_base
+        .global safe_syscall_start
+        .global safe_syscall_end
+        .type   safe_syscall_base, @function
+
+        /* This is the entry point for making a system call. The calling
+         * convention here is that of a C varargs function with the
+         * first argument an 'int *' to the signal_pending flag, the
+         * second one the system call number (as a 'long'), and all further
+         * arguments being syscall arguments (also 'long').
+         * We return a long which is the syscall's return value, which
+         * may be negative-errno on failure. Conversion to the
+         * -1-and-errno-set convention is done by the calling wrapper.
+         */
+safe_syscall_base:
+        .cfi_startproc
+        push    %ebp
+        .cfi_adjust_cfa_offset 4
+        .cfi_rel_offset ebp, 0
+	push	%esi
+        .cfi_adjust_cfa_offset 4
+        .cfi_rel_offset esi, 0
+	push	%edi
+        .cfi_adjust_cfa_offset 4
+        .cfi_rel_offset edi, 0
+	push	%ebx
+        .cfi_adjust_cfa_offset 4
+        .cfi_rel_offset ebx, 0
+
+        /* The syscall calling convention isn't the same as the C one:
+         * we enter with 0(%esp) == return address
+         *               4(%esp) == *signal_pending
+         *               8(%esp) == syscall number
+         *               12(%esp) ... 32(%esp) == syscall arguments
+         *               and return the result in eax
+         * and the syscall instruction needs
+         *               eax == syscall number
+         *               ebx, ecx, edx, esi, edi, ebp == syscall arguments
+         *               and returns the result in eax
+         * Shuffle everything around appropriately.
+	 * Note the 16 bytes that we pushed to save registers.
+         */
+        mov     12+16(%esp), %ebx       /* the syscall arguments */
+        mov     16+16(%esp), %ecx
+        mov     20+16(%esp), %edx
+        mov     24+16(%esp), %esi
+        mov     28+16(%esp), %edi
+        mov     32+16(%esp), %ebp
+
+        /* This next sequence of code works in conjunction with the
+         * rewind_if_safe_syscall_function(). If a signal is taken
+         * and the interrupted PC is anywhere between 'safe_syscall_start'
+         * and 'safe_syscall_end' then we rewind it to 'safe_syscall_start'.
+         * The code sequence must therefore be able to cope with this, and
+         * the syscall instruction must be the final one in the sequence.
+         */
+safe_syscall_start:
+        /* if signal_pending is non-zero, don't do the call */
+	mov	4+16(%esp), %eax	/* signal_pending */
+        cmp	$0, (%eax)
+        mov     8+16(%esp), %eax        /* syscall number */
+        jnz     1f
+        int	$0x80
+safe_syscall_end:
+        /* code path for having successfully executed the syscall */
+        pop     %ebx
+        .cfi_remember_state
+        .cfi_def_cfa_offset 4
+        .cfi_restore ebx
+	pop	%edi
+        .cfi_def_cfa_offset 4
+        .cfi_restore edi
+	pop	%esi
+        .cfi_def_cfa_offset 4
+        .cfi_restore esi
+	pop	%ebp
+        .cfi_def_cfa_offset 4
+        .cfi_restore ebp
+        ret
+
+1:
+        /* code path when we didn't execute the syscall */
+        .cfi_restore_state
+        mov     $-TARGET_ERESTARTSYS, %eax
+        pop     %ebx
+        .cfi_remember_state
+        .cfi_def_cfa_offset 4
+        .cfi_restore ebx
+	pop	%edi
+        .cfi_def_cfa_offset 4
+        .cfi_restore edi
+	pop	%esi
+        .cfi_def_cfa_offset 4
+        .cfi_restore esi
+	pop	%ebp
+        .cfi_def_cfa_offset 4
+        .cfi_restore ebp
+        ret
+        .cfi_endproc
+
+        .size   safe_syscall_base, .-safe_syscall_base