diff mbox

[v2,10/12] fuzz/x86emul: update fuzzer

Message ID 20170131110809.30001-11-wei.liu2@citrix.com (mailing list archive)
State New, archived
Headers show

Commit Message

Wei Liu Jan. 31, 2017, 11:08 a.m. UTC
Provide the fuzzer with more ops, and more sophisticated input
structure.

Based on a patch originally written by Andrew and George.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Signed-off-by: George Dunlap <george.dunlap@citrix.com>
Signed-off-by: Wei Liu <wei.liu2@citrix.com>
---
Cc: Ian Jackson <ian.jackson@eu.citrix.com>
Cc: Jan Beulich <jbeulich@suse.com>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
Cc: George Dunlap <george.dunlap@eu.citrix.com>

v2:
1. Address comments from Jan.
2. Provide architecturally correct behaviour for rep stub.
---
 .../x86-insn-emulator-fuzzer.c                     | 660 +++++++++++++++++++--
 1 file changed, 598 insertions(+), 62 deletions(-)

Comments

Jan Beulich Jan. 31, 2017, 1:33 p.m. UTC | #1
>>> On 31.01.17 at 12:08, <wei.liu2@citrix.com> wrote:
> @@ -16,26 +17,79 @@
>  
>  #include "x86_emulate.h"
>  
> -static unsigned char data[4096];
> +#include "../../../xen/include/asm-x86/msr-index.h"
> +
> +#define MSR_INDEX_MAX 16
> +
> +#define SEG_NUM x86_seg_none
> +
> +struct input_struct {
> +    unsigned long cr[5];
> +    uint64_t msr[MSR_INDEX_MAX];
> +    struct cpu_user_regs regs;
> +    struct segment_register segments[SEG_NUM];
> +    unsigned long options;
> +    unsigned char data[4096];
> +} input;
> +#define DATA_OFFSET offsetof(struct input_struct, data)
>  static unsigned int data_index;
> -static unsigned int data_max;
> +static unsigned int data_num;
> +
> +/* Randomly return success or failure when processing data.  If
> + * `exception` is false, this function turns _EXCEPTION to _OKAY.
> + */

I'm not sure what coding style is to apply here, but if I consider this
an extension to the insn emulator harness, which in turn is covered
by hypervisor style, then the comment here needs /* to go on a
separate line.

> +int maybe_fail(const char *why, bool exception)
> +{
> +    int rc;
> +
> +    if ( data_index + 1 > data_num )

"data_index >= data_num" would be the more conventional way
to express this.

> +static int _fuzz_rep_read(const char *why, unsigned long *reps)
> +{
> +    int rc;
> +    unsigned long bytes_read = 0;
> +
> +    rc = data_read(why, &bytes_read, sizeof(bytes_read));
> +
> +    if ( bytes_read < *reps )
> +        *reps -= bytes_read;

I don't understand this: In the success case, *reps upon return
indicates the number of iterations done, not the number of
iterations left. So at least the variable naming deserves
improvement.

> +    /* Indicate we haven't done any work if emulation has failed */
> +    if ( rc != X86EMUL_OKAY )
> +        *reps = 0;

That'll result in some code paths never taken. I'd suggest clearing
to zero only in the UNHANDLEABLE case, and simply halving the
amount for EXCEPTION or RETRY (granted the latter can't occur
right now), or even using input data too for determining the new
value.

> +static int fuzz_rep_ins(
> +    uint16_t src_port,
> +    enum x86_segment dst_seg,
> +    unsigned long dst_offset,
> +    unsigned int bytes_per_rep,
> +    unsigned long *reps,
> +    struct x86_emulate_ctxt *ctxt)
> +{
> +    return _fuzz_rep_read("rep_in", reps);

"rep_ins"

> +static int fuzz_cpuid(
> +    uint32_t leaf,
> +    uint32_t subleaf,
> +    struct cpuid_leaf *res,
> +    struct x86_emulate_ctxt *ctxt)
> +{
> +    return emul_test_cpuid(leaf, subleaf, res, ctxt);
> +}

I don't think you need this wrapper anymore. For the purpose of
SET() below a simple #define will do.

> +static int fuzz_read_segment(
> +    enum x86_segment seg,
> +    struct segment_register *reg,
> +    struct x86_emulate_ctxt *ctxt)
> +{
> +    int rc;
> +
> +    if ( seg > SEG_NUM )
> +        return X86EMUL_UNHANDLEABLE;

As said before - this wants to be >= (even more so with
input.segments only having SEG_NUM elements).

> +    rc = maybe_fail("read_segment", true);
> +
> +    if ( rc == X86EMUL_OKAY )
> +        memcpy(reg, input.segments+seg, sizeof(struct segment_register));

Please prefer (type safe) structure assignment.

> +static int fuzz_read_cr(
> +    unsigned int reg,
> +    unsigned long *val,
> +    struct x86_emulate_ctxt *ctxt)
> +{
> +    int rc;
> +
> +    if ( reg > ARRAY_SIZE(input.cr) )
> +        return X86EMUL_UNHANDLEABLE;

>= again.

> +enum {
> +    MSRI_IA32_SYSENTER_CS,
> +    MSRI_IA32_SYSENTER_ESP,
> +    MSRI_IA32_SYSENTER_EIP,
> +    MSRI_EFER,
> +    MSRI_STAR,
> +    MSRI_LSTAR,
> +    MSRI_CSTAR,
> +    MSRI_SYSCALL_MASK
> +};
> +
> +const static unsigned int msr_index[MSR_INDEX_MAX] = {

Conventionally static comes first (as not being part of the type).

> +static int fuzz_read_msr(
> +    unsigned int reg,
> +    uint64_t *val,
> +    struct x86_emulate_ctxt *ctxt) {
> +    int rc;
> +
> +    rc = maybe_fail("read_msr", true);
> +    if ( rc != X86EMUL_OKAY )
> +        return rc;
> +    else

No need for "else".

> +static void setup_fpu_exception_handler(void)
> +{
> +    /* FIXME - just disable exceptions for now */
> +    unsigned long a;
> +
> +    asm volatile ( "fnclex");
> +    a=0x37f;                    /* FCW_DEFAULT in Xen */
> +    asm volatile ( "fldcw %0" :: "m" (a));
> +    a=0x1f80;                   /* MXCSR_DEFAULT in Xen */
> +    asm volatile ( "ldmxcsr %0" :: "m" (a) );
> +}

While I see that the FCW value has changed, the strange local
variable is still there. If you really want to keep it, please at least
add the missing spaces around the = signs. But I'd prefer

    asm volatile ( "fldcw %0" :: "m" (0x37f /* FCW_DEFAULT in Xen */));
    asm volatile ( "ldmxcsr %0" :: "m" (0x1f80 /* MXCSR_DEFAULT in Xen */) );

And then - doesn't the ABI require these settings to be in effect
upon program startup anyway?

> +/*
> + * Constrain input to architecturally-possible states where
> + * the emulator relies on these
> + *
> + * In general we want the emulator to be as absolutely robust as
> + * possible; which means that we want to minimize the number of things
> + * it assumes about the input state.  Tesing this means minimizing and
> + * removing as much of the input constraints as possible.
> + *
> + * So we only add constraints that (in general) have been proven to
> + * cause crashes in the emulator.
> + *
> + * For future reference: other constraints which might be necessary at
> + * some point:
> + *
> + * - EFER.LMA => !EFLAGS.NT
> + * - In VM86 mode (and real mode?), force segment...

As asked for before - please drop the mentioning of real mode here.

> +void sanitize_input(struct x86_emulate_ctxt *ctxt) {
> +    struct cpu_user_regs *regs = &input.regs;
> +    unsigned long bitmap = input.options;
> +
> +    /* Some hooks can't be disabled. */
> +    input.options &= ~((1<<HOOK_read)|(1<<HOOK_insn_fetch));
> +
> +    /* Zero 'private' entries */
> +    regs->error_code = 0;
> +    regs->entry_vector = 0;
> +
> +    CANONICALIZE_MAYBE(rip);
> +    CANONICALIZE_MAYBE(rsp);
> +    CANONICALIZE_MAYBE(rbp);
> +
> +    /*
> +     * CR0.PG can't be set if CR0.PE isn't set.  Set is more interesting, so
> +     * set PE if PG is set.
> +     */
> +    if ( input.cr[0] & X86_CR0_PG )
> +        input.cr[0] |= X86_CR0_PE;
> +
> +    /*
> +     * EFLAGS.VM not available in long mode
> +     */
> +    if ( long_mode_active(ctxt) )
> +        regs->rflags &= ~X86_EFLAGS_VM;
> +
> +    /*
> +     * EFLAGS.VM implies 16-bit mode
> +     */

Both of these are single line comments.

>      do {
> +        /* FIXME: Until we actually implement SIGFPE handling properly */
> +        setup_fpu_exception_handler();

I don't think the comment is appropriate here - all that needs fixing is
the body of that function (which iirc already has a respective comment).

Jan
Wei Liu Jan. 31, 2017, 3:51 p.m. UTC | #2
On Tue, Jan 31, 2017 at 06:33:11AM -0700, Jan Beulich wrote:
> >>> On 31.01.17 at 12:08, <wei.liu2@citrix.com> wrote:
> > @@ -16,26 +17,79 @@
> >  
> >  #include "x86_emulate.h"
> >  
> > -static unsigned char data[4096];
> > +#include "../../../xen/include/asm-x86/msr-index.h"
> > +
> > +#define MSR_INDEX_MAX 16
> > +
> > +#define SEG_NUM x86_seg_none
> > +
> > +struct input_struct {
> > +    unsigned long cr[5];
> > +    uint64_t msr[MSR_INDEX_MAX];
> > +    struct cpu_user_regs regs;
> > +    struct segment_register segments[SEG_NUM];
> > +    unsigned long options;
> > +    unsigned char data[4096];
> > +} input;
> > +#define DATA_OFFSET offsetof(struct input_struct, data)
> >  static unsigned int data_index;
> > -static unsigned int data_max;
> > +static unsigned int data_num;
> > +
> > +/* Randomly return success or failure when processing data.  If
> > + * `exception` is false, this function turns _EXCEPTION to _OKAY.
> > + */
> 
> I'm not sure what coding style is to apply here, but if I consider this
> an extension to the insn emulator harness, which in turn is covered
> by hypervisor style, then the comment here needs /* to go on a
> separate line.
> 

Fixed.

> > +int maybe_fail(const char *why, bool exception)
> > +{
> > +    int rc;
> > +
> > +    if ( data_index + 1 > data_num )
> 
> "data_index >= data_num" would be the more conventional way
> to express this.
> 

Fixed.

> > +static int _fuzz_rep_read(const char *why, unsigned long *reps)
> > +{
> > +    int rc;
> > +    unsigned long bytes_read = 0;
> > +
> > +    rc = data_read(why, &bytes_read, sizeof(bytes_read));
> > +
> > +    if ( bytes_read < *reps )
> > +        *reps -= bytes_read;
> 
> I don't understand this: In the success case, *reps upon return
> indicates the number of iterations done, not the number of
> iterations left. So at least the variable naming deserves
> improvement.
> 

OK, so this should be *reps = bytes_read.


> > +    /* Indicate we haven't done any work if emulation has failed */
> > +    if ( rc != X86EMUL_OKAY )
> > +        *reps = 0;
> 
> That'll result in some code paths never taken. I'd suggest clearing
> to zero only in the UNHANDLEABLE case, and simply halving the
> amount for EXCEPTION or RETRY (granted the latter can't occur
> right now), or even using input data too for determining the new
> value.
> 

No problem. This function is now like:

static int _fuzz_rep_read(const char *why, unsigned long *reps)
{
    int rc;
    unsigned long bytes_read = 0;

    rc = data_read(why, &bytes_read, sizeof(bytes_read));

    if ( bytes_read < *reps )
        *reps = bytes_read;

    switch ( rc )
    {
    case X86EMUL_UNHANDLEABLE:
        *reps = 0;
        break;
    case X86EMUL_EXCEPTION:
    case X86EMUL_RETRY:
        *reps /= 2;
    }
                                                                                                         
    return rc;
}

And I will do the same to rep_write.

> > +static int fuzz_rep_ins(
> > +    uint16_t src_port,
> > +    enum x86_segment dst_seg,
> > +    unsigned long dst_offset,
> > +    unsigned int bytes_per_rep,
> > +    unsigned long *reps,
> > +    struct x86_emulate_ctxt *ctxt)
> > +{
> > +    return _fuzz_rep_read("rep_in", reps);
> 
> "rep_ins"
> 

Fixed.

> > +static int fuzz_cpuid(
> > +    uint32_t leaf,
> > +    uint32_t subleaf,
> > +    struct cpuid_leaf *res,
> > +    struct x86_emulate_ctxt *ctxt)
> > +{
> > +    return emul_test_cpuid(leaf, subleaf, res, ctxt);
> > +}
> 
> I don't think you need this wrapper anymore. For the purpose of
> SET() below a simple #define will do.
> 

Fixed.

> > +static int fuzz_read_segment(
> > +    enum x86_segment seg,
> > +    struct segment_register *reg,
> > +    struct x86_emulate_ctxt *ctxt)
> > +{
> > +    int rc;
> > +
> > +    if ( seg > SEG_NUM )
> > +        return X86EMUL_UNHANDLEABLE;
> 
> As said before - this wants to be >= (even more so with
> input.segments only having SEG_NUM elements).
> 

Done, and fixed write_segment too.

> > +    rc = maybe_fail("read_segment", true);
> > +
> > +    if ( rc == X86EMUL_OKAY )
> > +        memcpy(reg, input.segments+seg, sizeof(struct segment_register));
> 
> Please prefer (type safe) structure assignment.
> 
> > +static int fuzz_read_cr(
> > +    unsigned int reg,
> > +    unsigned long *val,
> > +    struct x86_emulate_ctxt *ctxt)
> > +{
> > +    int rc;
> > +
> > +    if ( reg > ARRAY_SIZE(input.cr) )
> > +        return X86EMUL_UNHANDLEABLE;
> 
> >= again.
> 

Done, and fixed write_cr.

> > +enum {
> > +    MSRI_IA32_SYSENTER_CS,
> > +    MSRI_IA32_SYSENTER_ESP,
> > +    MSRI_IA32_SYSENTER_EIP,
> > +    MSRI_EFER,
> > +    MSRI_STAR,
> > +    MSRI_LSTAR,
> > +    MSRI_CSTAR,
> > +    MSRI_SYSCALL_MASK
> > +};
> > +
> > +const static unsigned int msr_index[MSR_INDEX_MAX] = {
> 
> Conventionally static comes first (as not being part of the type).
> 

Done.

> > +static int fuzz_read_msr(
> > +    unsigned int reg,
> > +    uint64_t *val,
> > +    struct x86_emulate_ctxt *ctxt) {
> > +    int rc;
> > +
> > +    rc = maybe_fail("read_msr", true);
> > +    if ( rc != X86EMUL_OKAY )
> > +        return rc;
> > +    else
> 
> No need for "else".
> 

Done.

> > +static void setup_fpu_exception_handler(void)
> > +{
> > +    /* FIXME - just disable exceptions for now */
> > +    unsigned long a;
> > +
> > +    asm volatile ( "fnclex");
> > +    a=0x37f;                    /* FCW_DEFAULT in Xen */
> > +    asm volatile ( "fldcw %0" :: "m" (a));
> > +    a=0x1f80;                   /* MXCSR_DEFAULT in Xen */
> > +    asm volatile ( "ldmxcsr %0" :: "m" (a) );
> > +}
> 
> While I see that the FCW value has changed, the strange local
> variable is still there. If you really want to keep it, please at least
> add the missing spaces around the = signs. But I'd prefer
> 
>     asm volatile ( "fldcw %0" :: "m" (0x37f /* FCW_DEFAULT in Xen */));
>     asm volatile ( "ldmxcsr %0" :: "m" (0x1f80 /* MXCSR_DEFAULT in Xen */) );
> 

This doesn't work.

x86-insn-emulator-fuzzer.c:445:5: error: memory input 0 is not directly addressable

That's why I kept the local variable. But I will add spaces around =.

> And then - doesn't the ABI require these settings to be in effect
> upon program startup anyway?
> 

I'm not sure about this -- reading AMD64 ABI Draft 0.99.8 doesn't reveal
much for me. But having the code arranged like this hasn't caused any
SIGFPE so far.

What do you suggest I do here?

> > +/*
> > + * Constrain input to architecturally-possible states where
> > + * the emulator relies on these
> > + *
> > + * In general we want the emulator to be as absolutely robust as
> > + * possible; which means that we want to minimize the number of things
> > + * it assumes about the input state.  Tesing this means minimizing and
> > + * removing as much of the input constraints as possible.
> > + *
> > + * So we only add constraints that (in general) have been proven to
> > + * cause crashes in the emulator.
> > + *
> > + * For future reference: other constraints which might be necessary at
> > + * some point:
> > + *
> > + * - EFER.LMA => !EFLAGS.NT
> > + * - In VM86 mode (and real mode?), force segment...
> 
> As asked for before - please drop the mentioning of real mode here.

Done.

> 
> > +void sanitize_input(struct x86_emulate_ctxt *ctxt) {
> > +    struct cpu_user_regs *regs = &input.regs;
> > +    unsigned long bitmap = input.options;
> > +
> > +    /* Some hooks can't be disabled. */
> > +    input.options &= ~((1<<HOOK_read)|(1<<HOOK_insn_fetch));
> > +
> > +    /* Zero 'private' entries */
> > +    regs->error_code = 0;
> > +    regs->entry_vector = 0;
> > +
> > +    CANONICALIZE_MAYBE(rip);
> > +    CANONICALIZE_MAYBE(rsp);
> > +    CANONICALIZE_MAYBE(rbp);
> > +
> > +    /*
> > +     * CR0.PG can't be set if CR0.PE isn't set.  Set is more interesting, so
> > +     * set PE if PG is set.
> > +     */
> > +    if ( input.cr[0] & X86_CR0_PG )
> > +        input.cr[0] |= X86_CR0_PE;
> > +
> > +    /*
> > +     * EFLAGS.VM not available in long mode
> > +     */
> > +    if ( long_mode_active(ctxt) )
> > +        regs->rflags &= ~X86_EFLAGS_VM;
> > +
> > +    /*
> > +     * EFLAGS.VM implies 16-bit mode
> > +     */
> 
> Both of these are single line comments.

Done.

> 
> >      do {
> > +        /* FIXME: Until we actually implement SIGFPE handling properly */
> > +        setup_fpu_exception_handler();
> 
> I don't think the comment is appropriate here - all that needs fixing is
> the body of that function (which iirc already has a respective comment).
> 

Deleted this comment.

> Jan
Andrew Cooper Jan. 31, 2017, 3:57 p.m. UTC | #3
On 31/01/17 15:51, Wei Liu wrote:
> On Tue, Jan 31, 2017 at 06:33:11AM -0700, Jan Beulich wrote:
>>> +static void setup_fpu_exception_handler(void)
>>> +{
>>> +    /* FIXME - just disable exceptions for now */
>>> +    unsigned long a;
>>> +
>>> +    asm volatile ( "fnclex");
>>> +    a=0x37f;                    /* FCW_DEFAULT in Xen */
>>> +    asm volatile ( "fldcw %0" :: "m" (a));
>>> +    a=0x1f80;                   /* MXCSR_DEFAULT in Xen */
>>> +    asm volatile ( "ldmxcsr %0" :: "m" (a) );
>>> +}
>> While I see that the FCW value has changed, the strange local
>> variable is still there. If you really want to keep it, please at least
>> add the missing spaces around the = signs. But I'd prefer
>>
>>     asm volatile ( "fldcw %0" :: "m" (0x37f /* FCW_DEFAULT in Xen */));
>>     asm volatile ( "ldmxcsr %0" :: "m" (0x1f80 /* MXCSR_DEFAULT in Xen */) );
>>
> This doesn't work.
>
> x86-insn-emulator-fuzzer.c:445:5: error: memory input 0 is not directly addressable

Indeed.  Both fldcw and ldmxcsr can only take memory operands, so cannot
take immediate values in the inline asm.

>> And then - doesn't the ABI require these settings to be in effect
>> upon program startup anyway?
>>
> I'm not sure about this -- reading AMD64 ABI Draft 0.99.8 doesn't reveal
> much for me. But having the code arranged like this hasn't caused any
> SIGFPE so far.
>
> What do you suggest I do here?

The issue George hit was that AFL managed to emulate alternative loads,
which then caused SIGFPE's to start happening to the test harness.

IIRC, the easy fix was to reset this state before emulating each
instruction.

~Andrew
George Dunlap Jan. 31, 2017, 4:01 p.m. UTC | #4
On 31/01/17 15:57, Andrew Cooper wrote:
> On 31/01/17 15:51, Wei Liu wrote:
>> On Tue, Jan 31, 2017 at 06:33:11AM -0700, Jan Beulich wrote:
>>>> +static void setup_fpu_exception_handler(void)
>>>> +{
>>>> +    /* FIXME - just disable exceptions for now */
>>>> +    unsigned long a;
>>>> +
>>>> +    asm volatile ( "fnclex");
>>>> +    a=0x37f;                    /* FCW_DEFAULT in Xen */
>>>> +    asm volatile ( "fldcw %0" :: "m" (a));
>>>> +    a=0x1f80;                   /* MXCSR_DEFAULT in Xen */
>>>> +    asm volatile ( "ldmxcsr %0" :: "m" (a) );
>>>> +}
>>> While I see that the FCW value has changed, the strange local
>>> variable is still there. If you really want to keep it, please at least
>>> add the missing spaces around the = signs. But I'd prefer
>>>
>>>     asm volatile ( "fldcw %0" :: "m" (0x37f /* FCW_DEFAULT in Xen */));
>>>     asm volatile ( "ldmxcsr %0" :: "m" (0x1f80 /* MXCSR_DEFAULT in Xen */) );
>>>
>> This doesn't work.
>>
>> x86-insn-emulator-fuzzer.c:445:5: error: memory input 0 is not directly addressable
> 
> Indeed.  Both fldcw and ldmxcsr can only take memory operands, so cannot
> take immediate values in the inline asm.
> 
>>> And then - doesn't the ABI require these settings to be in effect
>>> upon program startup anyway?
>>>
>> I'm not sure about this -- reading AMD64 ABI Draft 0.99.8 doesn't reveal
>> much for me. But having the code arranged like this hasn't caused any
>> SIGFPE so far.
>>
>> What do you suggest I do here?
> 
> The issue George hit was that AFL managed to emulate alternative loads,
> which then caused SIGFPE's to start happening to the test harness.
> 
> IIRC, the easy fix was to reset this state before emulating each
> instruction.

Yes, the "proper" fix is to catch SIGFPE and implement the #FP callback
functionality.  Clearing the exceptions every instruction was a
work-around so that I could get useful results without doing so.

 -George
Jan Beulich Jan. 31, 2017, 4:02 p.m. UTC | #5
>>> On 31.01.17 at 16:51, <wei.liu2@citrix.com> wrote:
> On Tue, Jan 31, 2017 at 06:33:11AM -0700, Jan Beulich wrote:
>> >>> On 31.01.17 at 12:08, <wei.liu2@citrix.com> wrote:
>> > +static int _fuzz_rep_read(const char *why, unsigned long *reps)
>> > +{
>> > +    int rc;
>> > +    unsigned long bytes_read = 0;
>> > +
>> > +    rc = data_read(why, &bytes_read, sizeof(bytes_read));
>> > +
>> > +    if ( bytes_read < *reps )
>> > +        *reps -= bytes_read;
>> 
>> I don't understand this: In the success case, *reps upon return
>> indicates the number of iterations done, not the number of
>> iterations left. So at least the variable naming deserves
>> improvement.
> 
> OK, so this should be *reps = bytes_read.

And the comparison should then presumably become <= .

>> > +static void setup_fpu_exception_handler(void)
>> > +{
>> > +    /* FIXME - just disable exceptions for now */
>> > +    unsigned long a;
>> > +
>> > +    asm volatile ( "fnclex");
>> > +    a=0x37f;                    /* FCW_DEFAULT in Xen */
>> > +    asm volatile ( "fldcw %0" :: "m" (a));
>> > +    a=0x1f80;                   /* MXCSR_DEFAULT in Xen */
>> > +    asm volatile ( "ldmxcsr %0" :: "m" (a) );
>> > +}
>> 
>> While I see that the FCW value has changed, the strange local
>> variable is still there. If you really want to keep it, please at least
>> add the missing spaces around the = signs. But I'd prefer
>> 
>>     asm volatile ( "fldcw %0" :: "m" (0x37f /* FCW_DEFAULT in Xen */));
>>     asm volatile ( "ldmxcsr %0" :: "m" (0x1f80 /* MXCSR_DEFAULT in Xen */) 
> );
>> 
> 
> This doesn't work.
> 
> x86-insn-emulator-fuzzer.c:445:5: error: memory input 0 is not directly 
> addressable

Oh. Not sure why that is.

>> And then - doesn't the ABI require these settings to be in effect
>> upon program startup anyway?
>> 
> 
> I'm not sure about this -- reading AMD64 ABI Draft 0.99.8 doesn't reveal
> much for me. But having the code arranged like this hasn't caused any
> SIGFPE so far.
> 
> What do you suggest I do here?

Well, you can keep the code as is if there's uncertainty. The
question was mainly from the perspective of whether the non-
fuzzing test would need to do something similar. With the
SSEn/AVX test blob I'm close to round up, I haven't run into
any situation where the initial settings hadn't been the intended
ones.

Jan
Jan Beulich Jan. 31, 2017, 4:05 p.m. UTC | #6
>>> On 31.01.17 at 16:57, <andrew.cooper3@citrix.com> wrote:
> On 31/01/17 15:51, Wei Liu wrote:
>> On Tue, Jan 31, 2017 at 06:33:11AM -0700, Jan Beulich wrote:
>>>> +static void setup_fpu_exception_handler(void)
>>>> +{
>>>> +    /* FIXME - just disable exceptions for now */
>>>> +    unsigned long a;
>>>> +
>>>> +    asm volatile ( "fnclex");
>>>> +    a=0x37f;                    /* FCW_DEFAULT in Xen */
>>>> +    asm volatile ( "fldcw %0" :: "m" (a));
>>>> +    a=0x1f80;                   /* MXCSR_DEFAULT in Xen */
>>>> +    asm volatile ( "ldmxcsr %0" :: "m" (a) );
>>>> +}
>>> While I see that the FCW value has changed, the strange local
>>> variable is still there. If you really want to keep it, please at least
>>> add the missing spaces around the = signs. But I'd prefer
>>>
>>>     asm volatile ( "fldcw %0" :: "m" (0x37f /* FCW_DEFAULT in Xen */));
>>>     asm volatile ( "ldmxcsr %0" :: "m" (0x1f80 /* MXCSR_DEFAULT in Xen */) );
>>>
>> This doesn't work.
>>
>> x86-insn-emulator-fuzzer.c:445:5: error: memory input 0 is not directly addressable
> 
> Indeed.  Both fldcw and ldmxcsr can only take memory operands, so cannot
> take immediate values in the inline asm.

Nevertheless I don't understand: The constraint correctly says
memory only. I'd have expected the compiler to materialize a
memory object (initialized with the given value) in order to hand
it to the asm().

>>> And then - doesn't the ABI require these settings to be in effect
>>> upon program startup anyway?
>>>
>> I'm not sure about this -- reading AMD64 ABI Draft 0.99.8 doesn't reveal
>> much for me. But having the code arranged like this hasn't caused any
>> SIGFPE so far.
>>
>> What do you suggest I do here?
> 
> The issue George hit was that AFL managed to emulate alternative loads,
> which then caused SIGFPE's to start happening to the test harness.
> 
> IIRC, the easy fix was to reset this state before emulating each
> instruction.

Oh, right, this isn't done just once at startup.

Jan
Wei Liu Jan. 31, 2017, 5:37 p.m. UTC | #7
On Tue, Jan 31, 2017 at 09:02:46AM -0700, Jan Beulich wrote:
> >>> On 31.01.17 at 16:51, <wei.liu2@citrix.com> wrote:
> > On Tue, Jan 31, 2017 at 06:33:11AM -0700, Jan Beulich wrote:
> >> >>> On 31.01.17 at 12:08, <wei.liu2@citrix.com> wrote:
> >> > +static int _fuzz_rep_read(const char *why, unsigned long *reps)
> >> > +{
> >> > +    int rc;
> >> > +    unsigned long bytes_read = 0;
> >> > +
> >> > +    rc = data_read(why, &bytes_read, sizeof(bytes_read));
> >> > +
> >> > +    if ( bytes_read < *reps )
> >> > +        *reps -= bytes_read;
> >> 
> >> I don't understand this: In the success case, *reps upon return
> >> indicates the number of iterations done, not the number of
> >> iterations left. So at least the variable naming deserves
> >> improvement.
> > 
> > OK, so this should be *reps = bytes_read.
> 
> And the comparison should then presumably become <= .
> 

Ah, yes.

> >> > +static void setup_fpu_exception_handler(void)
> >> > +{
> >> > +    /* FIXME - just disable exceptions for now */
> >> > +    unsigned long a;
> >> > +
> >> > +    asm volatile ( "fnclex");
> >> > +    a=0x37f;                    /* FCW_DEFAULT in Xen */
> >> > +    asm volatile ( "fldcw %0" :: "m" (a));
> >> > +    a=0x1f80;                   /* MXCSR_DEFAULT in Xen */
> >> > +    asm volatile ( "ldmxcsr %0" :: "m" (a) );
> >> > +}
> >> 
> >> While I see that the FCW value has changed, the strange local
> >> variable is still there. If you really want to keep it, please at least
> >> add the missing spaces around the = signs. But I'd prefer
> >> 
> >>     asm volatile ( "fldcw %0" :: "m" (0x37f /* FCW_DEFAULT in Xen */));
> >>     asm volatile ( "ldmxcsr %0" :: "m" (0x1f80 /* MXCSR_DEFAULT in Xen */) 
> > );
> >> 
> > 
> > This doesn't work.
> > 
> > x86-insn-emulator-fuzzer.c:445:5: error: memory input 0 is not directly 
> > addressable
> 
> Oh. Not sure why that is.
> 
> >> And then - doesn't the ABI require these settings to be in effect
> >> upon program startup anyway?
> >> 
> > 
> > I'm not sure about this -- reading AMD64 ABI Draft 0.99.8 doesn't reveal
> > much for me. But having the code arranged like this hasn't caused any
> > SIGFPE so far.
> > 
> > What do you suggest I do here?
> 
> Well, you can keep the code as is if there's uncertainty. The
> question was mainly from the perspective of whether the non-
> fuzzing test would need to do something similar. With the
> SSEn/AVX test blob I'm close to round up, I haven't run into
> any situation where the initial settings hadn't been the intended
> ones.
> 
> Jan

Right. I will leave the code as-is for now.

Wei.

>
diff mbox

Patch

diff --git a/tools/fuzz/x86_instruction_emulator/x86-insn-emulator-fuzzer.c b/tools/fuzz/x86_instruction_emulator/x86-insn-emulator-fuzzer.c
index 7d7f731677..ef223e856d 100644
--- a/tools/fuzz/x86_instruction_emulator/x86-insn-emulator-fuzzer.c
+++ b/tools/fuzz/x86_instruction_emulator/x86-insn-emulator-fuzzer.c
@@ -4,6 +4,7 @@ 
 #include <inttypes.h>
 #include <limits.h>
 #include <stdbool.h>
+#include <stddef.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -16,26 +17,79 @@ 
 
 #include "x86_emulate.h"
 
-static unsigned char data[4096];
+#include "../../../xen/include/asm-x86/msr-index.h"
+
+#define MSR_INDEX_MAX 16
+
+#define SEG_NUM x86_seg_none
+
+struct input_struct {
+    unsigned long cr[5];
+    uint64_t msr[MSR_INDEX_MAX];
+    struct cpu_user_regs regs;
+    struct segment_register segments[SEG_NUM];
+    unsigned long options;
+    unsigned char data[4096];
+} input;
+#define DATA_OFFSET offsetof(struct input_struct, data)
 static unsigned int data_index;
-static unsigned int data_max;
+static unsigned int data_num;
+
+/* Randomly return success or failure when processing data.  If
+ * `exception` is false, this function turns _EXCEPTION to _OKAY.
+ */
+int maybe_fail(const char *why, bool exception)
+{
+    int rc;
+
+    if ( data_index + 1 > data_num )
+        rc = X86EMUL_EXCEPTION;
+    else
+    {
+        /* Randomly returns value:
+         * 50% okay
+         * 25% unhandlable
+         * 25% exception
+         */
+        if ( input.data[data_index] > 0xc0 )
+            rc = X86EMUL_EXCEPTION;
+        else if ( input.data[data_index] > 0x80 )
+            rc = X86EMUL_UNHANDLEABLE;
+        else
+            rc = X86EMUL_OKAY;
+        data_index++;
+    }
+
+    if ( rc == X86EMUL_EXCEPTION && !exception )
+        rc = X86EMUL_OKAY;
+
+    printf("maybe_fail %s: %d\n", why, rc);
+
+    return rc;
+}
 
 static int data_read(const char *why, void *dst, unsigned int bytes)
 {
     unsigned int i;
+    int rc;
 
-    if ( data_index + bytes > data_max )
-        return X86EMUL_EXCEPTION;
+    if ( data_index + bytes > data_num )
+        rc = X86EMUL_EXCEPTION;
+    else
+        rc = maybe_fail(why, true);
 
-    memcpy(dst,  data + data_index, bytes);
-    data_index += bytes;
+    if ( rc == X86EMUL_OKAY )
+    {
+        memcpy(dst,  input.data + data_index, bytes);
+        data_index += bytes;
 
-    printf("%s: ", why);
-    for ( i = 0; i < bytes; i++ )
-        printf(" %02x", *(unsigned char *)(dst + i));
-    printf("\n");
+        printf("%s: ", why);
+        for ( i = 0; i < bytes; i++ )
+            printf(" %02x", *(unsigned char *)(dst + i));
+        printf("\n");
+    }
 
-    return X86EMUL_OKAY;
+    return rc;
 }
 
 static int fuzz_read(
@@ -48,14 +102,96 @@  static int fuzz_read(
     return data_read("read", p_data, bytes);
 }
 
-static int fuzz_fetch(
+static int fuzz_read_io(
+    unsigned int port,
+    unsigned int bytes,
+    unsigned long *val,
+    struct x86_emulate_ctxt *ctxt)
+{
+    return data_read("read_io", val, bytes);
+}
+
+static int fuzz_insn_fetch(
     unsigned int seg,
     unsigned long offset,
     void *p_data,
     unsigned int bytes,
     struct x86_emulate_ctxt *ctxt)
 {
-    return data_read("fetch", p_data, bytes);
+    return data_read("insn_fetch", p_data, bytes);
+}
+
+static int _fuzz_rep_read(const char *why, unsigned long *reps)
+{
+    int rc;
+    unsigned long bytes_read = 0;
+
+    rc = data_read(why, &bytes_read, sizeof(bytes_read));
+
+    if ( bytes_read < *reps )
+        *reps -= bytes_read;
+
+    /* Indicate we haven't done any work if emulation has failed */
+    if ( rc != X86EMUL_OKAY )
+        *reps = 0;
+
+    return rc;
+}
+
+static int _fuzz_rep_write(const char *why, unsigned long *reps)
+{
+    int rc = maybe_fail(why, true);
+
+    /* Indicate we haven't done any work if emulation has failed */
+    if ( rc != X86EMUL_OKAY )
+        *reps = 0;
+
+    return rc;
+}
+
+static int fuzz_rep_ins(
+    uint16_t src_port,
+    enum x86_segment dst_seg,
+    unsigned long dst_offset,
+    unsigned int bytes_per_rep,
+    unsigned long *reps,
+    struct x86_emulate_ctxt *ctxt)
+{
+    return _fuzz_rep_read("rep_in", reps);
+}
+
+static int fuzz_rep_movs(
+    enum x86_segment src_seg,
+    unsigned long src_offset,
+    enum x86_segment dst_seg,
+    unsigned long dst_offset,
+    unsigned int bytes_per_rep,
+    unsigned long *reps,
+    struct x86_emulate_ctxt *ctxt)
+{
+    return _fuzz_rep_read("rep_movs", reps);
+}
+
+static int fuzz_rep_outs(
+    enum x86_segment src_seg,
+    unsigned long src_offset,
+    uint16_t dst_port,
+    unsigned int bytes_per_rep,
+    unsigned long *reps,
+    struct x86_emulate_ctxt *ctxt)
+{
+    return _fuzz_rep_write("rep_outs", reps);
+}
+
+static int fuzz_rep_stos(
+    void *p_data,
+    enum x86_segment seg,
+    unsigned long offset,
+    unsigned int bytes_per_rep,
+    unsigned long *reps,
+    struct x86_emulate_ctxt *ctxt)
+{
+    return _fuzz_rep_write("rep_stos", reps);
 }
 
 static int fuzz_write(
@@ -65,7 +201,7 @@  static int fuzz_write(
     unsigned int bytes,
     struct x86_emulate_ctxt *ctxt)
 {
-    return X86EMUL_OKAY;
+    return maybe_fail("write", true);
 }
 
 static int fuzz_cmpxchg(
@@ -76,18 +212,294 @@  static int fuzz_cmpxchg(
     unsigned int bytes,
     struct x86_emulate_ctxt *ctxt)
 {
-    return X86EMUL_OKAY;
+    return maybe_fail("cmpxchg", true);
+}
+
+static int fuzz_invlpg(
+    enum x86_segment seg,
+    unsigned long offset,
+    struct x86_emulate_ctxt *ctxt)
+{
+    return maybe_fail("invlpg", false);
+}
+
+static int fuzz_wbinvd(
+    struct x86_emulate_ctxt *ctxt)
+{
+    return maybe_fail("wbinvd", true);
+}
+
+static int fuzz_write_io(
+    unsigned int port,
+    unsigned int bytes,
+    unsigned long val,
+    struct x86_emulate_ctxt *ctxt)
+{
+    return maybe_fail("write_io", true);
+}
+
+static int fuzz_cpuid(
+    uint32_t leaf,
+    uint32_t subleaf,
+    struct cpuid_leaf *res,
+    struct x86_emulate_ctxt *ctxt)
+{
+    return emul_test_cpuid(leaf, subleaf, res, ctxt);
+}
+
+static int fuzz_read_segment(
+    enum x86_segment seg,
+    struct segment_register *reg,
+    struct x86_emulate_ctxt *ctxt)
+{
+    int rc;
+
+    if ( seg > SEG_NUM )
+        return X86EMUL_UNHANDLEABLE;
+
+    rc = maybe_fail("read_segment", true);
+
+    if ( rc == X86EMUL_OKAY )
+        memcpy(reg, input.segments+seg, sizeof(struct segment_register));
+
+    return rc;
+}
+
+static int fuzz_write_segment(
+    enum x86_segment seg,
+    const struct segment_register *reg,
+    struct x86_emulate_ctxt *ctxt)
+{
+    int rc;
+
+    if ( seg > SEG_NUM )
+        return X86EMUL_UNHANDLEABLE;
+
+    rc = maybe_fail("write_segment", true);
+
+    if ( rc == X86EMUL_OKAY )
+        memcpy(input.segments+seg, reg, sizeof(struct segment_register));
+
+    return rc;
+}
+
+static int fuzz_read_cr(
+    unsigned int reg,
+    unsigned long *val,
+    struct x86_emulate_ctxt *ctxt)
+{
+    int rc;
+
+    if ( reg > ARRAY_SIZE(input.cr) )
+        return X86EMUL_UNHANDLEABLE;
+
+    rc = maybe_fail("read_cr", true);
+
+    if ( rc == X86EMUL_OKAY )
+        *val = input.cr[reg];
+
+    return rc;
+}
+
+static int fuzz_write_cr(
+    unsigned int reg,
+    unsigned long val,
+    struct x86_emulate_ctxt *ctxt)
+{
+    int rc;
+
+    if ( reg > ARRAY_SIZE(input.cr) )
+        return X86EMUL_UNHANDLEABLE;
+
+    rc = maybe_fail("write_cr", true);
+
+    if ( rc == X86EMUL_OKAY )
+        input.cr[reg] = val;
+
+    return rc;
+}
+
+enum {
+    MSRI_IA32_SYSENTER_CS,
+    MSRI_IA32_SYSENTER_ESP,
+    MSRI_IA32_SYSENTER_EIP,
+    MSRI_EFER,
+    MSRI_STAR,
+    MSRI_LSTAR,
+    MSRI_CSTAR,
+    MSRI_SYSCALL_MASK
+};
+
+const static unsigned int msr_index[MSR_INDEX_MAX] = {
+    [MSRI_IA32_SYSENTER_CS]  = MSR_IA32_SYSENTER_CS,
+    [MSRI_IA32_SYSENTER_ESP] = MSR_IA32_SYSENTER_ESP,
+    [MSRI_IA32_SYSENTER_EIP] = MSR_IA32_SYSENTER_EIP,
+    [MSRI_EFER]              = MSR_EFER,
+    [MSRI_STAR]              = MSR_STAR,
+    [MSRI_LSTAR]             = MSR_LSTAR,
+    [MSRI_CSTAR]             = MSR_CSTAR,
+    [MSRI_SYSCALL_MASK]      = MSR_SYSCALL_MASK
+};
+
+static int _fuzz_read_msr(
+    unsigned int reg,
+    uint64_t *val,
+    struct x86_emulate_ctxt *ctxt)
+{
+    unsigned int idx;
+
+    switch ( reg )
+    {
+    case MSR_TSC_AUX:
+    case MSR_IA32_TSC:
+        return data_read("read_msr", val, sizeof(*val));
+    case MSR_EFER:
+        *val = input.msr[MSRI_EFER];
+        *val &= ~EFER_LMA;
+        if ( (*val & EFER_LME) && (input.cr[4] & X86_CR4_PAE) &&
+             (input.cr[0] & X86_CR0_PG) )
+        {
+            printf("Setting EFER_LMA\n");
+            *val |= EFER_LMA;
+        }
+        return X86EMUL_OKAY;
+    }
+
+    for ( idx = 0; idx < MSR_INDEX_MAX; idx++ )
+    {
+        if ( msr_index[idx] == reg )
+        {
+            *val = input.msr[idx];
+            return X86EMUL_OKAY;
+        }
+    }
+
+    return X86EMUL_EXCEPTION;
+}
+
+static int fuzz_read_msr(
+    unsigned int reg,
+    uint64_t *val,
+    struct x86_emulate_ctxt *ctxt) {
+    int rc;
+
+    rc = maybe_fail("read_msr", true);
+    if ( rc != X86EMUL_OKAY )
+        return rc;
+    else
+        return _fuzz_read_msr(reg, val, ctxt);
+}
+
+static int fuzz_write_msr(
+    unsigned int reg,
+    uint64_t val,
+    struct x86_emulate_ctxt *ctxt)
+{
+    unsigned int idx;
+    int rc;
+
+    rc = maybe_fail("write_ms", true);
+    if ( rc != X86EMUL_OKAY )
+        return rc;
+
+    switch ( reg )
+    {
+    case MSR_TSC_AUX:
+    case MSR_IA32_TSC:
+        return X86EMUL_OKAY;
+    }
+
+    for ( idx = 0; idx < MSR_INDEX_MAX; idx++ )
+    {
+        if ( msr_index[idx] == reg )
+        {
+            input.msr[idx] = val;
+            return X86EMUL_OKAY;
+        }
+    }
+
+    return X86EMUL_EXCEPTION;
 }
 
+#define SET(h) .h = fuzz_##h
 static struct x86_emulate_ops fuzz_emulops = {
-    .read       = fuzz_read,
-    .insn_fetch = fuzz_fetch,
-    .write      = fuzz_write,
-    .cmpxchg    = fuzz_cmpxchg,
-    .cpuid      = emul_test_cpuid,
-    .read_cr    = emul_test_read_cr,
+    SET(read),
+    SET(insn_fetch),
+    SET(write),
+    SET(cmpxchg),
+    SET(rep_ins),
+    SET(rep_outs),
+    SET(rep_movs),
+    SET(rep_stos),
+    SET(read_segment),
+    SET(write_segment),
+    SET(read_io),
+    SET(write_io),
+    SET(read_cr),
+    SET(write_cr),
+    SET(read_msr),
+    SET(write_msr),
+    SET(wbinvd),
+    SET(cpuid),
+    SET(invlpg),
     .get_fpu    = emul_test_get_fpu,
 };
+#undef SET
+
+static void setup_fpu_exception_handler(void)
+{
+    /* FIXME - just disable exceptions for now */
+    unsigned long a;
+
+    asm volatile ( "fnclex");
+    a=0x37f;                    /* FCW_DEFAULT in Xen */
+    asm volatile ( "fldcw %0" :: "m" (a));
+    a=0x1f80;                   /* MXCSR_DEFAULT in Xen */
+    asm volatile ( "ldmxcsr %0" :: "m" (a) );
+}
+
+static void dump_state(struct x86_emulate_ctxt *ctxt)
+{
+    struct cpu_user_regs *regs = ctxt->regs;
+    uint64_t val;
+
+    printf(" -- State -- \n");
+    printf("addr / sp size: %d / %d\n", ctxt->addr_size, ctxt->sp_size);
+    printf(" cr0: %lx\n", input.cr[0]);
+    printf(" cr3: %lx\n", input.cr[3]);
+    printf(" cr4: %lx\n", input.cr[4]);
+
+    printf(" rip: %"PRIx64"\n", regs->rip);
+
+    _fuzz_read_msr(MSR_EFER, &val, ctxt);
+    printf("EFER: %"PRIx64"\n", val);
+}
+
+static bool long_mode_active(struct x86_emulate_ctxt *ctxt)
+{
+    uint64_t val;
+
+    if ( _fuzz_read_msr(MSR_EFER, &val, ctxt) != X86EMUL_OKAY )
+        return false;
+
+    return val & EFER_LMA;
+}
+
+static bool in_longmode(struct x86_emulate_ctxt *ctxt)
+{
+    return long_mode_active(ctxt) && input.segments[x86_seg_cs].attr.fields.l;
+}
+
+static void set_sizes(struct x86_emulate_ctxt *ctxt)
+{
+    if ( in_longmode(ctxt) )
+        ctxt->addr_size = ctxt->sp_size = 64;
+    else
+    {
+        ctxt->addr_size = input.segments[x86_seg_cs].attr.fields.db ? 32 : 16;
+        ctxt->sp_size   = input.segments[x86_seg_ss].attr.fields.db ? 32 : 16;
+    }
+}
 
 #define CANONICALIZE(x)                                   \
     do {                                                  \
@@ -100,10 +512,150 @@  static struct x86_emulate_ops fuzz_emulops = {
         (x) = _y;                                       \
     } while( 0 )
 
-#define ADDR_SIZE_SHIFT 60
-#define ADDR_SIZE_64 (2ULL << ADDR_SIZE_SHIFT)
-#define ADDR_SIZE_32 (1ULL << ADDR_SIZE_SHIFT)
-#define ADDR_SIZE_16 (0)
+/* Expects bitmap and regs to be defined */
+#define CANONICALIZE_MAYBE(reg)                       \
+    if ( !(bitmap & (1 << CANONICALIZE_##reg)) )      \
+        CANONICALIZE(regs->reg);                      \
+
+enum {
+    HOOK_read,
+    HOOK_insn_fetch,
+    HOOK_write,
+    HOOK_cmpxchg,
+    HOOK_rep_ins,
+    HOOK_rep_outs,
+    HOOK_rep_movs,
+    HOOK_rep_stos,
+    HOOK_read_segment,
+    HOOK_write_segment,
+    HOOK_read_io,
+    HOOK_write_io,
+    HOOK_read_cr,
+    HOOK_write_cr,
+    HOOK_read_dr,
+    HOOK_write_dr,
+    HOOK_read_msr,
+    HOOK_write_msr,
+    HOOK_wbinvd,
+    HOOK_cpuid,
+    HOOK_inject_hw_exception,
+    HOOK_inject_sw_interrupt,
+    HOOK_get_fpu,
+    HOOK_put_fpu,
+    HOOK_invlpg,
+    HOOK_vmfunc,
+    OPTION_swint_emulation, /* Two bits */
+    CANONICALIZE_rip = OPTION_swint_emulation + 2,
+    CANONICALIZE_rsp,
+    CANONICALIZE_rbp
+};
+
+/* Expects bitmap to be defined */
+#define MAYBE_DISABLE_HOOK(h)                          \
+    if ( bitmap & (1 << HOOK_##h) )                    \
+    {                                                  \
+        fuzz_emulops.h = NULL;                         \
+        printf("Disabling hook "#h"\n");               \
+    }
+
+static void disable_hooks(void)
+{
+    unsigned long bitmap = input.options;
+
+    /* See also sanitize_input, some hooks can't be disabled. */
+    MAYBE_DISABLE_HOOK(read);
+    MAYBE_DISABLE_HOOK(insn_fetch);
+    MAYBE_DISABLE_HOOK(write);
+    MAYBE_DISABLE_HOOK(cmpxchg);
+    MAYBE_DISABLE_HOOK(rep_ins);
+    MAYBE_DISABLE_HOOK(rep_outs);
+    MAYBE_DISABLE_HOOK(rep_movs);
+    MAYBE_DISABLE_HOOK(rep_stos);
+    MAYBE_DISABLE_HOOK(read_segment);
+    MAYBE_DISABLE_HOOK(write_segment);
+    MAYBE_DISABLE_HOOK(read_io);
+    MAYBE_DISABLE_HOOK(write_io);
+    MAYBE_DISABLE_HOOK(read_cr);
+    MAYBE_DISABLE_HOOK(write_cr);
+    MAYBE_DISABLE_HOOK(read_msr);
+    MAYBE_DISABLE_HOOK(write_msr);
+    MAYBE_DISABLE_HOOK(wbinvd);
+    MAYBE_DISABLE_HOOK(cpuid);
+    MAYBE_DISABLE_HOOK(get_fpu);
+    MAYBE_DISABLE_HOOK(invlpg);
+}
+
+static void set_swint_support(struct x86_emulate_ctxt *ctxt)
+{
+    unsigned int swint_opt = (input.options >> OPTION_swint_emulation) & 3;
+
+    static const enum x86_swint_emulation map[4] = {
+        x86_swint_emulate_none,
+        x86_swint_emulate_none,
+        x86_swint_emulate_icebp,
+        x86_swint_emulate_all };
+
+    ctxt->swint_emulate = map[swint_opt];
+}
+
+/*
+ * Constrain input to architecturally-possible states where
+ * the emulator relies on these
+ *
+ * In general we want the emulator to be as absolutely robust as
+ * possible; which means that we want to minimize the number of things
+ * it assumes about the input state.  Tesing this means minimizing and
+ * removing as much of the input constraints as possible.
+ *
+ * So we only add constraints that (in general) have been proven to
+ * cause crashes in the emulator.
+ *
+ * For future reference: other constraints which might be necessary at
+ * some point:
+ *
+ * - EFER.LMA => !EFLAGS.NT
+ * - In VM86 mode (and real mode?), force segment...
+ *  - ...access rights to 0xf3
+ *  - ...limits to 0xffff
+ *  - ...bases to below 1Mb, 16-byte aligned
+ *  - ...selectors to (base >> 4)
+ */
+void sanitize_input(struct x86_emulate_ctxt *ctxt) {
+    struct cpu_user_regs *regs = &input.regs;
+    unsigned long bitmap = input.options;
+
+    /* Some hooks can't be disabled. */
+    input.options &= ~((1<<HOOK_read)|(1<<HOOK_insn_fetch));
+
+    /* Zero 'private' entries */
+    regs->error_code = 0;
+    regs->entry_vector = 0;
+
+    CANONICALIZE_MAYBE(rip);
+    CANONICALIZE_MAYBE(rsp);
+    CANONICALIZE_MAYBE(rbp);
+
+    /*
+     * CR0.PG can't be set if CR0.PE isn't set.  Set is more interesting, so
+     * set PE if PG is set.
+     */
+    if ( input.cr[0] & X86_CR0_PG )
+        input.cr[0] |= X86_CR0_PE;
+
+    /*
+     * EFLAGS.VM not available in long mode
+     */
+    if ( long_mode_active(ctxt) )
+        regs->rflags &= ~X86_EFLAGS_VM;
+
+    /*
+     * EFLAGS.VM implies 16-bit mode
+     */
+    if ( regs->rflags & X86_EFLAGS_VM ) {
+        input.segments[x86_seg_cs].attr.fields.db = 0;
+        input.segments[x86_seg_ss].attr.fields.db = 0;
+    }
+}
 
 int LLVMFuzzerTestOneInput(const uint8_t *data_p, size_t size)
 {
@@ -114,10 +666,7 @@  int LLVMFuzzerTestOneInput(const uint8_t *data_p, size_t size)
         .addr_size = 8 * sizeof(void *),
         .sp_size = 8 * sizeof(void *),
     };
-    unsigned int nr = 0;
     int rc;
-    unsigned int x;
-    const uint8_t *p = data_p;
 
     stack_exec = emul_test_make_stack_executable();
     if ( !stack_exec )
@@ -127,52 +676,39 @@  int LLVMFuzzerTestOneInput(const uint8_t *data_p, size_t size)
     }
 
     /* Reset all global state variables */
-    memset(data, 0, sizeof(data));
+    memset(&input, 0, sizeof(input));
     data_index = 0;
-    data_max = 0;
-
-    nr = size < sizeof(regs) ? size : sizeof(regs);
+    data_num = 0;
 
-    memcpy(&regs, p, nr);
-    p += sizeof(regs);
+    if ( size <= DATA_OFFSET )
+    {
+        printf("Input too small\n");
+        return 1;
+    }
 
-    if ( nr < size )
+    if ( size > sizeof(input) )
     {
-        memcpy(data, p, size - nr);
-        data_max = size - nr;
+        printf("Input too large\n");
+        return 1;
     }
 
-    ctxt.force_writeback = false;
+    memcpy(&input, data_p, size);
 
-    /* Zero 'private' fields */
-    regs.error_code = 0;
-    regs.entry_vector = 0;
+    data_num = size - DATA_OFFSET;
 
-    /* Use the upper bits of regs.eip to determine addr_size */
-    x = (regs.rip >> ADDR_SIZE_SHIFT) & 0x3;
-    if ( x == 3 )
-        x = 2;
-    ctxt.addr_size = 16 << x;
-    printf("addr_size: %d\n", ctxt.addr_size);
+    sanitize_input(&ctxt);
 
-    /* Use the upper bit of regs.rsp to determine sp_size (if appropriate) */
-    if ( ctxt.addr_size == 64 )
-        ctxt.sp_size = 64;
-    else
-    {
-        /* If addr_size isn't 64-bits, sp_size can only be 16 or 32 bits */
-        x = (regs.rsp >> ADDR_SIZE_SHIFT) & 0x1;
-        ctxt.sp_size = 16 << x;
-    }
-    printf("sp_size: %d\n", ctxt.sp_size);
-    CANONICALIZE(regs.rip);
-    CANONICALIZE(regs.rsp);
-    CANONICALIZE(regs.rbp);
+    disable_hooks();
 
-    /* Zero all segments for now */
-    regs.cs = regs.ss = regs.es = regs.ds = regs.fs = regs.gs = 0;
+    set_swint_support(&ctxt);
 
     do {
+        /* FIXME: Until we actually implement SIGFPE handling properly */
+        setup_fpu_exception_handler();
+
+        set_sizes(&ctxt);
+        dump_state(&ctxt);
+
         rc = x86_emulate(&ctxt, &fuzz_emulops);
         printf("Emulation result: %d\n", rc);
     } while ( rc == X86EMUL_OKAY );