diff mbox series

x86/ioemul: Rewrite stub generation

Message ID 20200427122041.7162-1-andrew.cooper3@citrix.com (mailing list archive)
State New, archived
Headers show
Series x86/ioemul: Rewrite stub generation | expand

Commit Message

Andrew Cooper April 27, 2020, 12:20 p.m. UTC
The logic is completely undocumented and almost impossible to follow.  It
actually uses return oriented programming.  Rewrite it to conform to more
normal call mechanics, and leave a big comment explaining thing.  As well as
the code being easier to follow, it will execute faster as it isn't fighting
the branch predictor.

Move the ioemul_handle_quirk() function pointer from traps.c to
ioport_emulate.c.  There is no reason for it to be in neither of the two
translation units which use it.  Alter the behaviour to return the number of
bytes written into the stub.

Access the addresses of the host/guest helpers with extern const char arrays.
Nothing good will come of C thinking they are regular functions.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
--
CC: Jan Beulich <JBeulich@suse.com>
CC: Wei Liu <wl@xen.org>
CC: Roger Pau Monné <roger.pau@citrix.com>
---
 xen/arch/x86/ioport_emulate.c  | 11 ++---
 xen/arch/x86/pv/emul-priv-op.c | 91 +++++++++++++++++++++++++++++++-----------
 xen/arch/x86/pv/gpr_switch.S   | 37 +++++------------
 xen/arch/x86/traps.c           |  3 --
 xen/include/asm-x86/io.h       |  3 +-
 5 files changed, 85 insertions(+), 60 deletions(-)

Comments

Roger Pau Monné April 27, 2020, 3:18 p.m. UTC | #1
On Mon, Apr 27, 2020 at 01:20:41PM +0100, Andrew Cooper wrote:
> The logic is completely undocumented and almost impossible to follow.  It
> actually uses return oriented programming.  Rewrite it to conform to more
> normal call mechanics, and leave a big comment explaining thing.  As well as
> the code being easier to follow, it will execute faster as it isn't fighting
> the branch predictor.
> 
> Move the ioemul_handle_quirk() function pointer from traps.c to
> ioport_emulate.c.

Seeing as the newest quirk was added in 2008, I wonder if such quirks
are still relevant?

Maybe they are also used by newer boxes, I really have no idea.

> There is no reason for it to be in neither of the two
> translation units which use it.  Alter the behaviour to return the number of
> bytes written into the stub.
> 
> Access the addresses of the host/guest helpers with extern const char arrays.
> Nothing good will come of C thinking they are regular functions.
> 
> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
> --
> CC: Jan Beulich <JBeulich@suse.com>
> CC: Wei Liu <wl@xen.org>
> CC: Roger Pau Monné <roger.pau@citrix.com>
> ---
>  xen/arch/x86/ioport_emulate.c  | 11 ++---
>  xen/arch/x86/pv/emul-priv-op.c | 91 +++++++++++++++++++++++++++++++-----------
>  xen/arch/x86/pv/gpr_switch.S   | 37 +++++------------
>  xen/arch/x86/traps.c           |  3 --
>  xen/include/asm-x86/io.h       |  3 +-
>  5 files changed, 85 insertions(+), 60 deletions(-)
> 
> diff --git a/xen/arch/x86/ioport_emulate.c b/xen/arch/x86/ioport_emulate.c
> index 499c1f6056..f7511a9c49 100644
> --- a/xen/arch/x86/ioport_emulate.c
> +++ b/xen/arch/x86/ioport_emulate.c
> @@ -8,7 +8,10 @@
>  #include <xen/sched.h>
>  #include <xen/dmi.h>
>  
> -static bool ioemul_handle_proliant_quirk(
> +unsigned int (*ioemul_handle_quirk)(
> +    u8 opcode, char *io_emul_stub, struct cpu_user_regs *regs);

uint8_t for opcode and also I think regs can be made const?

(at least looking at the only implementation from
ioemul_handle_proliant_quirk). I'm not familiar with this area
however.

> +
> +static unsigned int ioemul_handle_proliant_quirk(
>      u8 opcode, char *io_emul_stub, struct cpu_user_regs *regs)
>  {
>      static const char stub[] = {
> @@ -19,18 +22,16 @@ static bool ioemul_handle_proliant_quirk(
>          0xa8, 0x80, /*    test $0x80, %al */
>          0x75, 0xfb, /*    jnz 1b          */
>          0x9d,       /*    popf            */
> -        0xc3,       /*    ret             */
>      };
>      uint16_t port = regs->dx;
>      uint8_t value = regs->al;
>  
>      if ( (opcode != 0xee) || (port != 0xcd4) || !(value & 0x80) )
> -        return false;
> +        return 0;
>  
>      memcpy(io_emul_stub, stub, sizeof(stub));
> -    BUILD_BUG_ON(IOEMUL_QUIRK_STUB_BYTES < sizeof(stub));
>  
> -    return true;
> +    return sizeof(stub);
>  }
>  
>  /* This table is the set of system-specific I/O emulation hooks. */
> diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c
> index e24b84f46a..f150886711 100644
> --- a/xen/arch/x86/pv/emul-priv-op.c
> +++ b/xen/arch/x86/pv/emul-priv-op.c
> @@ -54,51 +54,96 @@ struct priv_op_ctxt {
>      unsigned int bpmatch;
>  };
>  
> -/* I/O emulation support. Helper routines for, and type of, the stack stub. */
> -void host_to_guest_gpr_switch(struct cpu_user_regs *);
> -unsigned long guest_to_host_gpr_switch(unsigned long);
> +/* I/O emulation helpers.  Use non-standard calling conventions. */
> +extern const char load_guest_gprs[], save_guest_gprs[];
>  
>  typedef void io_emul_stub_t(struct cpu_user_regs *);
>  
>  static io_emul_stub_t *io_emul_stub_setup(struct priv_op_ctxt *ctxt, u8 opcode,
>                                            unsigned int port, unsigned int bytes)
>  {
> +    /*
> +     * Construct a stub for IN/OUT emulation.
> +     *
> +     * Some platform drivers communicate with the SMM handler using GPRs as a
> +     * mailbox.  Therefore, we must perform the emulation with the hardware
> +     * domain's registers in view.
> +     *
> +     * We write a stub of the following form, using the guest load/save
> +     * helpers (abnormal calling conventions), and one of several possible
> +     * stubs performing the real I/O.
> +     */
> +    static const char prologue[] = {
> +        0x53,       /* push %rbx */
> +        0x55,       /* push %rbp */
> +        0x41, 0x54, /* push %r12 */
> +        0x41, 0x55, /* push %r13 */
> +        0x41, 0x56, /* push %r14 */
> +        0x41, 0x57, /* push %r15 */
> +        0x57,       /* push %rdi (param for save_guest_gprs) */
> +    };              /* call load_guest_gprs */
> +                    /* <I/O stub> */
> +                    /* call save_guest_gprs */
> +    static const char epilogue[] = {
> +        0x5f,       /* pop %rdi  */
> +        0x41, 0x5f, /* pop %r15  */
> +        0x41, 0x5e, /* pop %r14  */
> +        0x41, 0x5d, /* pop %r13  */
> +        0x41, 0x5c, /* pop %r12  */
> +        0x5d,       /* pop %rbp  */
> +        0x5b,       /* pop %rbx  */
> +        0xc3,       /* ret       */
> +    };
> +
>      struct stubs *this_stubs = &this_cpu(stubs);
>      unsigned long stub_va = this_stubs->addr + STUB_BUF_SIZE / 2;
> -    long disp;
> -    bool use_quirk_stub = false;
> +    unsigned int quirk_bytes = 0;
> +    char *p;
> +
> +    /* Helpers - Read outer scope but only modify p. */
> +#define APPEND_BUFF(b) ({ memcpy(p, b, sizeof(b)); p += sizeof(b); })
> +#define APPEND_CALL(f)                                                  \
> +    ({                                                                  \
> +        long disp = (long)(f) - (stub_va + p - ctxt->io_emul_stub + 5); \
> +        BUG_ON((int32_t)disp != disp);                                  \

I'm not sure I get the point of using signed integers instead of
unsigned ones, AFAICT you just want to check that the displacement is
< 4GB so that a relative call can be used?

Thanks, Roger.
Jan Beulich April 27, 2020, 3:28 p.m. UTC | #2
On 27.04.2020 14:20, Andrew Cooper wrote:
> The logic is completely undocumented and almost impossible to follow.  It
> actually uses return oriented programming.  Rewrite it to conform to more
> normal call mechanics, and leave a big comment explaining thing.  As well as
> the code being easier to follow, it will execute faster as it isn't fighting
> the branch predictor.
> 
> Move the ioemul_handle_quirk() function pointer from traps.c to
> ioport_emulate.c.  There is no reason for it to be in neither of the two
> translation units which use it.  Alter the behaviour to return the number of
> bytes written into the stub.
> 
> Access the addresses of the host/guest helpers with extern const char arrays.
> Nothing good will come of C thinking they are regular functions.

I agree with the C aspect, but imo the assembly routines should,
with the changes you make, be marked as being ordinary functions.
A reasonable linker would then warn about the C file wanting an
STT_OBJECT while the assembly file provides an STT_FUNC. I'd
therefore prefer, along with marking the functions as such, to
have them also declared as functions in C.

> --- a/xen/arch/x86/ioport_emulate.c
> +++ b/xen/arch/x86/ioport_emulate.c
> @@ -8,7 +8,10 @@
>  #include <xen/sched.h>
>  #include <xen/dmi.h>
>  
> -static bool ioemul_handle_proliant_quirk(
> +unsigned int (*ioemul_handle_quirk)(
> +    u8 opcode, char *io_emul_stub, struct cpu_user_regs *regs);

Would you mind adding __read_mostly on this occasion?

> @@ -19,18 +22,16 @@ static bool ioemul_handle_proliant_quirk(
>          0xa8, 0x80, /*    test $0x80, %al */
>          0x75, 0xfb, /*    jnz 1b          */
>          0x9d,       /*    popf            */
> -        0xc3,       /*    ret             */
>      };
>      uint16_t port = regs->dx;
>      uint8_t value = regs->al;
>  
>      if ( (opcode != 0xee) || (port != 0xcd4) || !(value & 0x80) )
> -        return false;
> +        return 0;
>  
>      memcpy(io_emul_stub, stub, sizeof(stub));
> -    BUILD_BUG_ON(IOEMUL_QUIRK_STUB_BYTES < sizeof(stub));

So you treat a build failure for a runtime crash. I can see the
advantages of the new approach, but the original got away with
less stub space. If our L1_CACHE_SHIFT wasn't hard coded to 7
just to cover some unusual CPUs, your new approach would, if I
got the counting and calculations right, not work (with a value
resulting in a 64-byte cache line size). Introducing a Kconfig
option for this should imo not come with a need to re-work the
logic here again. Therefore I'd like us to think about a way
to make the space needed not exceed 32 bytes.

One option might be to arrange for some or all of R12-R15 to
not need spilling. And of course there then still ought to be
a BUILD_BUG_ON() identifying ahead of any testing that yet
smaller cache line sizes would indeed require re-work here.

Jan
Andrew Cooper April 27, 2020, 4:18 p.m. UTC | #3
On 27/04/2020 16:18, Roger Pau Monné wrote:
> On Mon, Apr 27, 2020 at 01:20:41PM +0100, Andrew Cooper wrote:
>> The logic is completely undocumented and almost impossible to follow.  It
>> actually uses return oriented programming.  Rewrite it to conform to more
>> normal call mechanics, and leave a big comment explaining thing.  As well as
>> the code being easier to follow, it will execute faster as it isn't fighting
>> the branch predictor.
>>
>> Move the ioemul_handle_quirk() function pointer from traps.c to
>> ioport_emulate.c.
> Seeing as the newest quirk was added in 2008, I wonder if such quirks
> are still relevant?
>
> Maybe they are also used by newer boxes, I really have no idea.

Its not something which I'd consider altering in this patch anyway.

>
>> +
>> +static unsigned int ioemul_handle_proliant_quirk(
>>      u8 opcode, char *io_emul_stub, struct cpu_user_regs *regs)
>>  {
>>      static const char stub[] = {
>> @@ -19,18 +22,16 @@ static bool ioemul_handle_proliant_quirk(
>>          0xa8, 0x80, /*    test $0x80, %al */
>>          0x75, 0xfb, /*    jnz 1b          */
>>          0x9d,       /*    popf            */
>> -        0xc3,       /*    ret             */
>>      };
>>      uint16_t port = regs->dx;
>>      uint8_t value = regs->al;
>>  
>>      if ( (opcode != 0xee) || (port != 0xcd4) || !(value & 0x80) )
>> -        return false;
>> +        return 0;
>>  
>>      memcpy(io_emul_stub, stub, sizeof(stub));
>> -    BUILD_BUG_ON(IOEMUL_QUIRK_STUB_BYTES < sizeof(stub));
>>  
>> -    return true;
>> +    return sizeof(stub);
>>  }
>>  
>>  /* This table is the set of system-specific I/O emulation hooks. */
>> diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c
>> index e24b84f46a..f150886711 100644
>> --- a/xen/arch/x86/pv/emul-priv-op.c
>> +++ b/xen/arch/x86/pv/emul-priv-op.c
>> @@ -54,51 +54,96 @@ struct priv_op_ctxt {
>>      unsigned int bpmatch;
>>  };
>>  
>> -/* I/O emulation support. Helper routines for, and type of, the stack stub. */
>> -void host_to_guest_gpr_switch(struct cpu_user_regs *);
>> -unsigned long guest_to_host_gpr_switch(unsigned long);
>> +/* I/O emulation helpers.  Use non-standard calling conventions. */
>> +extern const char load_guest_gprs[], save_guest_gprs[];
>>  
>>  typedef void io_emul_stub_t(struct cpu_user_regs *);
>>  
>>  static io_emul_stub_t *io_emul_stub_setup(struct priv_op_ctxt *ctxt, u8 opcode,
>>                                            unsigned int port, unsigned int bytes)
>>  {
>> +    /*
>> +     * Construct a stub for IN/OUT emulation.
>> +     *
>> +     * Some platform drivers communicate with the SMM handler using GPRs as a
>> +     * mailbox.  Therefore, we must perform the emulation with the hardware
>> +     * domain's registers in view.
>> +     *
>> +     * We write a stub of the following form, using the guest load/save
>> +     * helpers (abnormal calling conventions), and one of several possible
>> +     * stubs performing the real I/O.
>> +     */
>> +    static const char prologue[] = {
>> +        0x53,       /* push %rbx */
>> +        0x55,       /* push %rbp */
>> +        0x41, 0x54, /* push %r12 */
>> +        0x41, 0x55, /* push %r13 */
>> +        0x41, 0x56, /* push %r14 */
>> +        0x41, 0x57, /* push %r15 */
>> +        0x57,       /* push %rdi (param for save_guest_gprs) */
>> +    };              /* call load_guest_gprs */
>> +                    /* <I/O stub> */
>> +                    /* call save_guest_gprs */
>> +    static const char epilogue[] = {
>> +        0x5f,       /* pop %rdi  */
>> +        0x41, 0x5f, /* pop %r15  */
>> +        0x41, 0x5e, /* pop %r14  */
>> +        0x41, 0x5d, /* pop %r13  */
>> +        0x41, 0x5c, /* pop %r12  */
>> +        0x5d,       /* pop %rbp  */
>> +        0x5b,       /* pop %rbx  */
>> +        0xc3,       /* ret       */
>> +    };
>> +
>>      struct stubs *this_stubs = &this_cpu(stubs);
>>      unsigned long stub_va = this_stubs->addr + STUB_BUF_SIZE / 2;
>> -    long disp;
>> -    bool use_quirk_stub = false;
>> +    unsigned int quirk_bytes = 0;
>> +    char *p;
>> +
>> +    /* Helpers - Read outer scope but only modify p. */
>> +#define APPEND_BUFF(b) ({ memcpy(p, b, sizeof(b)); p += sizeof(b); })
>> +#define APPEND_CALL(f)                                                  \
>> +    ({                                                                  \
>> +        long disp = (long)(f) - (stub_va + p - ctxt->io_emul_stub + 5); \
>> +        BUG_ON((int32_t)disp != disp);                                  \
> I'm not sure I get the point of using signed integers instead of
> unsigned ones, AFAICT you just want to check that the displacement is
> < 4GB so that a relative call can be used?

Displacements are +/- 2G, not <4G.

Using unsigned here would be buggy.

~Andrew
Roger Pau Monné April 27, 2020, 4:23 p.m. UTC | #4
On Mon, Apr 27, 2020 at 05:18:52PM +0100, Andrew Cooper wrote:
> On 27/04/2020 16:18, Roger Pau Monné wrote:
> > On Mon, Apr 27, 2020 at 01:20:41PM +0100, Andrew Cooper wrote:
> >> +    /* Helpers - Read outer scope but only modify p. */
> >> +#define APPEND_BUFF(b) ({ memcpy(p, b, sizeof(b)); p += sizeof(b); })
> >> +#define APPEND_CALL(f)                                                  \
> >> +    ({                                                                  \
> >> +        long disp = (long)(f) - (stub_va + p - ctxt->io_emul_stub + 5); \
> >> +        BUG_ON((int32_t)disp != disp);                                  \
> > I'm not sure I get the point of using signed integers instead of
> > unsigned ones, AFAICT you just want to check that the displacement is
> > < 4GB so that a relative call can be used?
> 
> Displacements are +/- 2G, not <4G.
> 
> Using unsigned here would be buggy.

Right, sorry for the noise:

Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>

With the minor nits pointed above in the ioemul_handle_quirk.

Thanks, Roger.
Andrew Cooper April 27, 2020, 10:20 p.m. UTC | #5
On 27/04/2020 16:28, Jan Beulich wrote:
> On 27.04.2020 14:20, Andrew Cooper wrote:
>> The logic is completely undocumented and almost impossible to follow.  It
>> actually uses return oriented programming.  Rewrite it to conform to more
>> normal call mechanics, and leave a big comment explaining thing.  As well as
>> the code being easier to follow, it will execute faster as it isn't fighting
>> the branch predictor.
>>
>> Move the ioemul_handle_quirk() function pointer from traps.c to
>> ioport_emulate.c.  There is no reason for it to be in neither of the two
>> translation units which use it.  Alter the behaviour to return the number of
>> bytes written into the stub.
>>
>> Access the addresses of the host/guest helpers with extern const char arrays.
>> Nothing good will come of C thinking they are regular functions.
> I agree with the C aspect, but imo the assembly routines should,
> with the changes you make, be marked as being ordinary functions.

Despite the changes, they are still very much not ordinary functions,
and cannot be used by C.

I have no objection to marking them as STT_FUNCTION (as this doesn't
mean C function), but...

> A reasonable linker would then warn about the C file wanting an
> STT_OBJECT while the assembly file provides an STT_FUNC. I'd
> therefore prefer, along with marking the functions as such, to
> have them also declared as functions in C.

... there is literally nothing safe which C can do with them other than
manipulate their address.

Writing it like this is specifically prevents something from compiling
which will explode at runtime, is someone is naive enough to try using
the function from C.

>> --- a/xen/arch/x86/ioport_emulate.c
>> +++ b/xen/arch/x86/ioport_emulate.c
>> @@ -8,7 +8,10 @@
>>  #include <xen/sched.h>
>>  #include <xen/dmi.h>
>>  
>> -static bool ioemul_handle_proliant_quirk(
>> +unsigned int (*ioemul_handle_quirk)(
>> +    u8 opcode, char *io_emul_stub, struct cpu_user_regs *regs);
> Would you mind adding __read_mostly on this occasion?
>
>> @@ -19,18 +22,16 @@ static bool ioemul_handle_proliant_quirk(
>>          0xa8, 0x80, /*    test $0x80, %al */
>>          0x75, 0xfb, /*    jnz 1b          */
>>          0x9d,       /*    popf            */
>> -        0xc3,       /*    ret             */
>>      };
>>      uint16_t port = regs->dx;
>>      uint8_t value = regs->al;
>>  
>>      if ( (opcode != 0xee) || (port != 0xcd4) || !(value & 0x80) )
>> -        return false;
>> +        return 0;
>>  
>>      memcpy(io_emul_stub, stub, sizeof(stub));
>> -    BUILD_BUG_ON(IOEMUL_QUIRK_STUB_BYTES < sizeof(stub));
> So you treat a build failure for a runtime crash.

I presume you mean s/treat/trade/ here, and the answer is no, not really.

There is nothing which actually forced a connection between the build
time checks and runtime behaviour, so it was only a facade of safety,
not real safety.

>  I can see the
> advantages of the new approach, but the original got away with
> less stub space.

Stub space doesn't matter, so long as it fits.  In particular, ...

> If our L1_CACHE_SHIFT wasn't hard coded to 7
> just to cover some unusual CPUs, your new approach would, if I
> got the counting and calculations right, not work (with a value
> resulting in a 64-byte cache line size).

... the SYSCALL stubs use 64 bytes so Xen cannot function with a shift
less than 7.

> Introducing a Kconfig
> option for this should imo not come with a need to re-work the
> logic here again. Therefore I'd like us to think about a way
> to make the space needed not exceed 32 bytes.

And why would we ever want an option like that?  (I know how you're
going to answer this, so I'm going to pre-emptively point out that there
are hundreds of kilobytes of easier-to-shrink per-cpu data structures
than this one).

Honestly, this suggestion is a total waste of time and effort.  It is an
enormous amount of complexity to micro-optimise a problem which doesn't
exist in the first place.

The stubs are already 128 bytes per CPU and cannot be made smaller. 
Attempting to turn this particular stub into <32 has no benefit (the
stubs don't actually get smaller), and major costs.

~Andrew
Jan Beulich April 28, 2020, 8:24 a.m. UTC | #6
On 28.04.2020 00:20, Andrew Cooper wrote:
> On 27/04/2020 16:28, Jan Beulich wrote:
>> On 27.04.2020 14:20, Andrew Cooper wrote:
>>> The logic is completely undocumented and almost impossible to follow.  It
>>> actually uses return oriented programming.  Rewrite it to conform to more
>>> normal call mechanics, and leave a big comment explaining thing.  As well as
>>> the code being easier to follow, it will execute faster as it isn't fighting
>>> the branch predictor.
>>>
>>> Move the ioemul_handle_quirk() function pointer from traps.c to
>>> ioport_emulate.c.  There is no reason for it to be in neither of the two
>>> translation units which use it.  Alter the behaviour to return the number of
>>> bytes written into the stub.
>>>
>>> Access the addresses of the host/guest helpers with extern const char arrays.
>>> Nothing good will come of C thinking they are regular functions.
>> I agree with the C aspect, but imo the assembly routines should,
>> with the changes you make, be marked as being ordinary functions.
> 
> Despite the changes, they are still very much not ordinary functions,
> and cannot be used by C.
> 
> I have no objection to marking them as STT_FUNCTION (as this doesn't
> mean C function), but...
> 
>> A reasonable linker would then warn about the C file wanting an
>> STT_OBJECT while the assembly file provides an STT_FUNC. I'd
>> therefore prefer, along with marking the functions as such, to
>> have them also declared as functions in C.
> 
> ... there is literally nothing safe which C can do with them other than
> manipulate their address.
> 
> Writing it like this is specifically prevents something from compiling
> which will explode at runtime, is someone is naive enough to try using
> the function from C.

Besides being certain that such an attempt, if it made it into a
submitted patch in the first place, would be caught by review, I
don't see you addressing my main counter argument. Preventing a
declared function to be called can be had by other means, e.g.
__attribute__((__warning__())).

>>> @@ -19,18 +22,16 @@ static bool ioemul_handle_proliant_quirk(
>>>          0xa8, 0x80, /*    test $0x80, %al */
>>>          0x75, 0xfb, /*    jnz 1b          */
>>>          0x9d,       /*    popf            */
>>> -        0xc3,       /*    ret             */
>>>      };
>>>      uint16_t port = regs->dx;
>>>      uint8_t value = regs->al;
>>>  
>>>      if ( (opcode != 0xee) || (port != 0xcd4) || !(value & 0x80) )
>>> -        return false;
>>> +        return 0;
>>>  
>>>      memcpy(io_emul_stub, stub, sizeof(stub));
>>> -    BUILD_BUG_ON(IOEMUL_QUIRK_STUB_BYTES < sizeof(stub));
>> So you treat a build failure for a runtime crash.
> 
> I presume you mean s/treat/trade/ here, and the answer is no, not really.
> 
> There is nothing which actually forced a connection between the build
> time checks and runtime behaviour, so it was only a facade of safety,
> not real safety.

I'm not following, I'm afraid: The above together with

    BUILD_BUG_ON(STUB_BUF_SIZE / 2 < MAX(9, /* Default emul stub */
                                         5 + IOEMUL_QUIRK_STUB_BYTES));

(where the literal numbers live next to what they describe)
did very well provide some level of guarding.

>>  I can see the
>> advantages of the new approach, but the original got away with
>> less stub space.
> 
> Stub space doesn't matter, so long as it fits.  In particular, ...
> 
>> If our L1_CACHE_SHIFT wasn't hard coded to 7
>> just to cover some unusual CPUs, your new approach would, if I
>> got the counting and calculations right, not work (with a value
>> resulting in a 64-byte cache line size).
> 
> ... the SYSCALL stubs use 64 bytes so Xen cannot function with a shift
> less than 7.

Oh, my fault - I read the STUB_BUF_SHIFT expression as min() when
it really is max(). So yes, we can rely on there being 64 bytes.
(Everything further down therefore becomes moot afaict.)

>> Introducing a Kconfig
>> option for this should imo not come with a need to re-work the
>> logic here again. Therefore I'd like us to think about a way
>> to make the space needed not exceed 32 bytes.
> 
> And why would we ever want an option like that?  (I know how you're
> going to answer this, so I'm going to pre-emptively point out that there
> are hundreds of kilobytes of easier-to-shrink per-cpu data structures
> than this one).

Not sure what per-CPU data structures you talk about. I wasn't
thinking of space savings in particular, but rather about getting
our setting in sync with actual hardware. Its value is, afaics,
used in a far more relevant way by xmalloc() and friends.

Jan

> Honestly, this suggestion is a total waste of time and effort.  It is an
> enormous amount of complexity to micro-optimise a problem which doesn't
> exist in the first place.
> 
> The stubs are already 128 bytes per CPU and cannot be made smaller. 
> Attempting to turn this particular stub into <32 has no benefit (the
> stubs don't actually get smaller), and major costs.
> 
> ~Andrew
>
diff mbox series

Patch

diff --git a/xen/arch/x86/ioport_emulate.c b/xen/arch/x86/ioport_emulate.c
index 499c1f6056..f7511a9c49 100644
--- a/xen/arch/x86/ioport_emulate.c
+++ b/xen/arch/x86/ioport_emulate.c
@@ -8,7 +8,10 @@ 
 #include <xen/sched.h>
 #include <xen/dmi.h>
 
-static bool ioemul_handle_proliant_quirk(
+unsigned int (*ioemul_handle_quirk)(
+    u8 opcode, char *io_emul_stub, struct cpu_user_regs *regs);
+
+static unsigned int ioemul_handle_proliant_quirk(
     u8 opcode, char *io_emul_stub, struct cpu_user_regs *regs)
 {
     static const char stub[] = {
@@ -19,18 +22,16 @@  static bool ioemul_handle_proliant_quirk(
         0xa8, 0x80, /*    test $0x80, %al */
         0x75, 0xfb, /*    jnz 1b          */
         0x9d,       /*    popf            */
-        0xc3,       /*    ret             */
     };
     uint16_t port = regs->dx;
     uint8_t value = regs->al;
 
     if ( (opcode != 0xee) || (port != 0xcd4) || !(value & 0x80) )
-        return false;
+        return 0;
 
     memcpy(io_emul_stub, stub, sizeof(stub));
-    BUILD_BUG_ON(IOEMUL_QUIRK_STUB_BYTES < sizeof(stub));
 
-    return true;
+    return sizeof(stub);
 }
 
 /* This table is the set of system-specific I/O emulation hooks. */
diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c
index e24b84f46a..f150886711 100644
--- a/xen/arch/x86/pv/emul-priv-op.c
+++ b/xen/arch/x86/pv/emul-priv-op.c
@@ -54,51 +54,96 @@  struct priv_op_ctxt {
     unsigned int bpmatch;
 };
 
-/* I/O emulation support. Helper routines for, and type of, the stack stub. */
-void host_to_guest_gpr_switch(struct cpu_user_regs *);
-unsigned long guest_to_host_gpr_switch(unsigned long);
+/* I/O emulation helpers.  Use non-standard calling conventions. */
+extern const char load_guest_gprs[], save_guest_gprs[];
 
 typedef void io_emul_stub_t(struct cpu_user_regs *);
 
 static io_emul_stub_t *io_emul_stub_setup(struct priv_op_ctxt *ctxt, u8 opcode,
                                           unsigned int port, unsigned int bytes)
 {
+    /*
+     * Construct a stub for IN/OUT emulation.
+     *
+     * Some platform drivers communicate with the SMM handler using GPRs as a
+     * mailbox.  Therefore, we must perform the emulation with the hardware
+     * domain's registers in view.
+     *
+     * We write a stub of the following form, using the guest load/save
+     * helpers (abnormal calling conventions), and one of several possible
+     * stubs performing the real I/O.
+     */
+    static const char prologue[] = {
+        0x53,       /* push %rbx */
+        0x55,       /* push %rbp */
+        0x41, 0x54, /* push %r12 */
+        0x41, 0x55, /* push %r13 */
+        0x41, 0x56, /* push %r14 */
+        0x41, 0x57, /* push %r15 */
+        0x57,       /* push %rdi (param for save_guest_gprs) */
+    };              /* call load_guest_gprs */
+                    /* <I/O stub> */
+                    /* call save_guest_gprs */
+    static const char epilogue[] = {
+        0x5f,       /* pop %rdi  */
+        0x41, 0x5f, /* pop %r15  */
+        0x41, 0x5e, /* pop %r14  */
+        0x41, 0x5d, /* pop %r13  */
+        0x41, 0x5c, /* pop %r12  */
+        0x5d,       /* pop %rbp  */
+        0x5b,       /* pop %rbx  */
+        0xc3,       /* ret       */
+    };
+
     struct stubs *this_stubs = &this_cpu(stubs);
     unsigned long stub_va = this_stubs->addr + STUB_BUF_SIZE / 2;
-    long disp;
-    bool use_quirk_stub = false;
+    unsigned int quirk_bytes = 0;
+    char *p;
+
+    /* Helpers - Read outer scope but only modify p. */
+#define APPEND_BUFF(b) ({ memcpy(p, b, sizeof(b)); p += sizeof(b); })
+#define APPEND_CALL(f)                                                  \
+    ({                                                                  \
+        long disp = (long)(f) - (stub_va + p - ctxt->io_emul_stub + 5); \
+        BUG_ON((int32_t)disp != disp);                                  \
+        *p++ = 0xe8;                                                    \
+        *(int32_t *)p = disp; p += 4;                                   \
+    })
 
     if ( !ctxt->io_emul_stub )
         ctxt->io_emul_stub =
             map_domain_page(_mfn(this_stubs->mfn)) + (stub_va & ~PAGE_MASK);
 
-    /* call host_to_guest_gpr_switch */
-    ctxt->io_emul_stub[0] = 0xe8;
-    disp = (long)host_to_guest_gpr_switch - (stub_va + 5);
-    BUG_ON((int32_t)disp != disp);
-    *(int32_t *)&ctxt->io_emul_stub[1] = disp;
+    p = ctxt->io_emul_stub;
+
+    APPEND_BUFF(prologue);
+    APPEND_CALL(load_guest_gprs);
 
+    /* Some platforms might need to quirk the stub for specific inputs. */
     if ( unlikely(ioemul_handle_quirk) )
-        use_quirk_stub = ioemul_handle_quirk(opcode, &ctxt->io_emul_stub[5],
-                                             ctxt->ctxt.regs);
+    {
+        quirk_bytes = ioemul_handle_quirk(opcode, p, ctxt->ctxt.regs);
+        p += quirk_bytes;
+    }
 
-    if ( !use_quirk_stub )
+    /* Default I/O stub. */
+    if ( likely(!quirk_bytes) )
     {
-        /* data16 or nop */
-        ctxt->io_emul_stub[5] = (bytes != 2) ? 0x90 : 0x66;
-        /* <io-access opcode> */
-        ctxt->io_emul_stub[6] = opcode;
-        /* imm8 or nop */
-        ctxt->io_emul_stub[7] = !(opcode & 8) ? port : 0x90;
-        /* ret (jumps to guest_to_host_gpr_switch) */
-        ctxt->io_emul_stub[8] = 0xc3;
+        *p++ = (bytes != 2) ? 0x90 : 0x66;  /* data16 or nop */
+        *p++ = opcode;                      /* <opcode>      */
+        *p++ = !(opcode & 8) ? port : 0x90; /* imm8 or nop   */
     }
 
-    BUILD_BUG_ON(STUB_BUF_SIZE / 2 < MAX(9, /* Default emul stub */
-                                         5 + IOEMUL_QUIRK_STUB_BYTES));
+    APPEND_CALL(save_guest_gprs);
+    APPEND_BUFF(epilogue);
+
+    BUG_ON(STUB_BUF_SIZE / 2 < (p - ctxt->io_emul_stub));
 
     /* Handy function-typed pointer to the stub. */
     return (void *)stub_va;
+
+#undef APPEND_CALL
+#undef APPEND_BUFF
 }
 
 
diff --git a/xen/arch/x86/pv/gpr_switch.S b/xen/arch/x86/pv/gpr_switch.S
index 6d26192c2c..e3f8037b69 100644
--- a/xen/arch/x86/pv/gpr_switch.S
+++ b/xen/arch/x86/pv/gpr_switch.S
@@ -9,59 +9,42 @@ 
 
 #include <asm/asm_defns.h>
 
-ENTRY(host_to_guest_gpr_switch)
-        movq  (%rsp), %rcx
-        movq  %rdi, (%rsp)
+/* Load guest GPRs.  Parameter in %rdi, clobbers all registers. */
+ENTRY(load_guest_gprs)
         movq  UREGS_rdx(%rdi), %rdx
-        pushq %rbx
         movq  UREGS_rax(%rdi), %rax
         movq  UREGS_rbx(%rdi), %rbx
-        pushq %rbp
         movq  UREGS_rsi(%rdi), %rsi
         movq  UREGS_rbp(%rdi), %rbp
-        pushq %r12
-        movq  UREGS_r8(%rdi), %r8
+        movq  UREGS_r8 (%rdi), %r8
         movq  UREGS_r12(%rdi), %r12
-        pushq %r13
-        movq  UREGS_r9(%rdi), %r9
+        movq  UREGS_r9 (%rdi), %r9
         movq  UREGS_r13(%rdi), %r13
-        pushq %r14
         movq  UREGS_r10(%rdi), %r10
         movq  UREGS_r14(%rdi), %r14
-        pushq %r15
         movq  UREGS_r11(%rdi), %r11
         movq  UREGS_r15(%rdi), %r15
-        pushq %rcx /* dummy push, filled by guest_to_host_gpr_switch pointer */
-        pushq %rcx
-        leaq  guest_to_host_gpr_switch(%rip),%rcx
-        movq  %rcx,8(%rsp)
         movq  UREGS_rcx(%rdi), %rcx
         movq  UREGS_rdi(%rdi), %rdi
         ret
 
-ENTRY(guest_to_host_gpr_switch)
+/* Save guest GPRs.  Parameter on the stack above the return address. */
+ENTRY(save_guest_gprs)
         pushq %rdi
-        movq  7*8(%rsp), %rdi
+        movq  2*8(%rsp), %rdi
         movq  %rax, UREGS_rax(%rdi)
-        popq  UREGS_rdi(%rdi)
+        popq        UREGS_rdi(%rdi)
         movq  %r15, UREGS_r15(%rdi)
         movq  %r11, UREGS_r11(%rdi)
-        popq  %r15
         movq  %r14, UREGS_r14(%rdi)
         movq  %r10, UREGS_r10(%rdi)
-        popq  %r14
         movq  %r13, UREGS_r13(%rdi)
-        movq  %r9, UREGS_r9(%rdi)
-        popq  %r13
+        movq  %r9,  UREGS_r9 (%rdi)
         movq  %r12, UREGS_r12(%rdi)
-        movq  %r8, UREGS_r8(%rdi)
-        popq  %r12
+        movq  %r8,  UREGS_r8 (%rdi)
         movq  %rbp, UREGS_rbp(%rdi)
         movq  %rsi, UREGS_rsi(%rdi)
-        popq  %rbp
         movq  %rbx, UREGS_rbx(%rdi)
         movq  %rdx, UREGS_rdx(%rdi)
-        popq  %rbx
         movq  %rcx, UREGS_rcx(%rdi)
-        popq  %rcx
         ret
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index e838846c6b..9df050dcf4 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -116,9 +116,6 @@  idt_entry_t *idt_tables[NR_CPUS] __read_mostly;
  */
 DEFINE_PER_CPU_PAGE_ALIGNED(struct tss_page, tss_page);
 
-bool (*ioemul_handle_quirk)(
-    u8 opcode, char *io_emul_stub, struct cpu_user_regs *regs);
-
 static int debug_stack_lines = 20;
 integer_param("debug_stack_lines", debug_stack_lines);
 
diff --git a/xen/include/asm-x86/io.h b/xen/include/asm-x86/io.h
index 8708b79b99..c4ec52cba7 100644
--- a/xen/include/asm-x86/io.h
+++ b/xen/include/asm-x86/io.h
@@ -49,8 +49,7 @@  __OUT(w,"w",short)
 __OUT(l,,int)
 
 /* Function pointer used to handle platform specific I/O port emulation. */
-#define IOEMUL_QUIRK_STUB_BYTES 10
-extern bool (*ioemul_handle_quirk)(
+extern unsigned int (*ioemul_handle_quirk)(
     u8 opcode, char *io_emul_stub, struct cpu_user_regs *regs);
 
 #endif