diff mbox

[v3,07/12] fuzz/x86_emulate: Move all state into fuzz_state

Message ID 20171010162011.9629-7-george.dunlap@citrix.com (mailing list archive)
State New, archived
Headers show

Commit Message

George Dunlap Oct. 10, 2017, 4:20 p.m. UTC
This is in preparation for adding the option for a more "compact"
interpretation of the fuzzing data, in which we only change select
bits of the state.

Signed-off-by: George Dunlap <george.dunlap@citrix.com>
Acked-by: Jan Beulich <jbeulich@suse.com>
---
v3:
 - Move DATA_OFFSET inside the structure
 - Remove a stray blank line
v2: Port over previous changes

CC: Ian Jackson <ian.jackson@citrix.com>
CC: Wei Liu <wei.liu2@citrix.com>
CC: Andrew Cooper <andrew.cooper3@citrix.com>
CC: Jan Beulich <jbeulich@suse.com>
---
 tools/fuzz/x86_instruction_emulator/fuzz-emul.c | 89 +++++++++++++------------
 1 file changed, 45 insertions(+), 44 deletions(-)

Comments

Andrew Cooper Oct. 10, 2017, 6:20 p.m. UTC | #1
On 10/10/17 17:20, George Dunlap wrote:
> This is in preparation for adding the option for a more "compact"
> interpretation of the fuzzing data, in which we only change select
> bits of the state.
>
> Signed-off-by: George Dunlap <george.dunlap@citrix.com>
> Acked-by: Jan Beulich <jbeulich@suse.com>
> ---
> v3:
>  - Move DATA_OFFSET inside the structure
>  - Remove a stray blank line
> v2: Port over previous changes
>
> CC: Ian Jackson <ian.jackson@citrix.com>
> CC: Wei Liu <wei.liu2@citrix.com>
> CC: Andrew Cooper <andrew.cooper3@citrix.com>
> CC: Jan Beulich <jbeulich@suse.com>
> ---
>  tools/fuzz/x86_instruction_emulator/fuzz-emul.c | 89 +++++++++++++------------
>  1 file changed, 45 insertions(+), 44 deletions(-)
>
> diff --git a/tools/fuzz/x86_instruction_emulator/fuzz-emul.c b/tools/fuzz/x86_instruction_emulator/fuzz-emul.c
> index 8998f21fe1..20d52b33f8 100644
> --- a/tools/fuzz/x86_instruction_emulator/fuzz-emul.c
> +++ b/tools/fuzz/x86_instruction_emulator/fuzz-emul.c
> @@ -24,14 +24,8 @@
>  /* Layout of data expected as fuzzing input. */
>  struct fuzz_corpus
>  {
> -    unsigned long cr[5];
> -    uint64_t msr[MSR_INDEX_MAX];
> -    struct cpu_user_regs regs;
> -    struct segment_register segments[SEG_NUM];
> -    unsigned long options;
>      unsigned char data[4096];
>  } input;
> -#define DATA_OFFSET offsetof(struct fuzz_corpus, data)
>  
>  /*
>   * Internal state of the fuzzing harness.  Calculated initially from the input
> @@ -39,7 +33,14 @@ struct fuzz_corpus
>   */

You've invalidated a number of the comments describing behaviours,
including the description of the difference between fuzz_state and
fuzz_corpus.

Why do you need to move any of this in the first place?  If you insist
on keeping the compact mode, what's wrong with conditionally reading the
AFL input into either fuzz_copus entirely, or into fuzz_corpus.data[]
and then conditionally deriving this state?

That way, you don't block work to fix the root cause, which needs to end
up with architectural values in fuzz_state, derived from a bitfields in
fuzz_corpus.

~Andrew

>  struct fuzz_state
>  {
> +    unsigned long options;
> +    unsigned long cr[5];
> +    uint64_t msr[MSR_INDEX_MAX];
> +    struct segment_register segments[SEG_NUM];
> +    struct cpu_user_regs regs;
> +
>      /* Fuzzer's input data. */
> +#define DATA_OFFSET offsetof(struct fuzz_state, corpus)
>      struct fuzz_corpus *corpus;
>  
>      /* Real amount of data backing corpus->data[]. */
>
George Dunlap Oct. 11, 2017, 11:30 a.m. UTC | #2
On 10/10/2017 07:20 PM, Andrew Cooper wrote:
> On 10/10/17 17:20, George Dunlap wrote:
>> This is in preparation for adding the option for a more "compact"
>> interpretation of the fuzzing data, in which we only change select
>> bits of the state.
>>
>> Signed-off-by: George Dunlap <george.dunlap@citrix.com>
>> Acked-by: Jan Beulich <jbeulich@suse.com>
>> ---
>> v3:
>>  - Move DATA_OFFSET inside the structure
>>  - Remove a stray blank line
>> v2: Port over previous changes
>>
>> CC: Ian Jackson <ian.jackson@citrix.com>
>> CC: Wei Liu <wei.liu2@citrix.com>
>> CC: Andrew Cooper <andrew.cooper3@citrix.com>
>> CC: Jan Beulich <jbeulich@suse.com>
>> ---
>>  tools/fuzz/x86_instruction_emulator/fuzz-emul.c | 89 +++++++++++++------------
>>  1 file changed, 45 insertions(+), 44 deletions(-)
>>
>> diff --git a/tools/fuzz/x86_instruction_emulator/fuzz-emul.c b/tools/fuzz/x86_instruction_emulator/fuzz-emul.c
>> index 8998f21fe1..20d52b33f8 100644
>> --- a/tools/fuzz/x86_instruction_emulator/fuzz-emul.c
>> +++ b/tools/fuzz/x86_instruction_emulator/fuzz-emul.c
>> @@ -24,14 +24,8 @@
>>  /* Layout of data expected as fuzzing input. */
>>  struct fuzz_corpus
>>  {
>> -    unsigned long cr[5];
>> -    uint64_t msr[MSR_INDEX_MAX];
>> -    struct cpu_user_regs regs;
>> -    struct segment_register segments[SEG_NUM];
>> -    unsigned long options;
>>      unsigned char data[4096];
>>  } input;
>> -#define DATA_OFFSET offsetof(struct fuzz_corpus, data)
>>  
>>  /*
>>   * Internal state of the fuzzing harness.  Calculated initially from the input
>> @@ -39,7 +33,14 @@ struct fuzz_corpus
>>   */
> 
> You've invalidated a number of the comments describing behaviours,
> including the description of the difference between fuzz_state and
> fuzz_corpus.

Well completely apart from the 'compact' format, I think this move makes
sense.  The state moved is actually the state of the "emulated cpu" --
the emulator actually modifies this state as instructions are executed.
I think it makes sense to keep the "current state of the virtual
processor" separate from "input we get from a file".

In fact, the comment above this says:  "Internal state of the fuzzing
harness.  Calculated initially from the input corpus, and later
[mutated] by the emulation callbacks."

This actually makes that comment *more* true, since before this patch
almost the only state modified by the emulation callbacks was actually
in fuzz_corpus, not fuzz_state.

> Why do you need to move any of this in the first place?  If you insist
> on keeping the compact mode, what's wrong with conditionally reading the
> AFL input into either fuzz_copus entirely, or into fuzz_corpus.data[]
> and then conditionally deriving this state?

I don't see any advantage of that.  In fact it would mean that the name
"input" and "fuzz_corpus" even more misleading than they are before this
patch.

> That way, you don't block work to fix the root cause, which needs to end
> up with architectural values in fuzz_state, derived from a bitfields in
> fuzz_corpus.

x86_emulate() needs a cpu_user_regs structure; so if you want the data
in fuzz_corpus not to look exactly like cpu_user_regs, then you'll need
to have another place to put cpu_user_regs, and "populate" it based on
the data (whatever that looks like).  The most obvious thing to do is to
is to do exactly what I've done -- place cpu_user_regs inside fuzz_state.

The same thing is true for the other bits of data -- the read_* and
write* callbacks need "unpacked" state which they can read and modify.
The most obvious thing to do is to have arrays in fuzz_state which they
can simply read and write, and to populate them based on whatever
structure "compact" structure you end up with.

If you want to re-introduce a more compact structure format for the
input file, there are lots of options.  We can make fuzz_corpus into a
union.  We can have 'input' be a pure char array, and cast it into
several different structures depending on how we want to interpret it.
Or, we can define a local structure on a stack and "read" from the main
input file via input_read() and friends.

This patch makes all of those options easier, not harder.

 -George
George Dunlap Oct. 11, 2017, 2:50 p.m. UTC | #3
On 10/11/2017 12:30 PM, George Dunlap wrote:
> On 10/10/2017 07:20 PM, Andrew Cooper wrote:
>> On 10/10/17 17:20, George Dunlap wrote:
>>> This is in preparation for adding the option for a more "compact"
>>> interpretation of the fuzzing data, in which we only change select
>>> bits of the state.
>>>
>>> Signed-off-by: George Dunlap <george.dunlap@citrix.com>
>>> Acked-by: Jan Beulich <jbeulich@suse.com>
>>> ---
>>> v3:
>>>  - Move DATA_OFFSET inside the structure
>>>  - Remove a stray blank line
>>> v2: Port over previous changes
>>>
>>> CC: Ian Jackson <ian.jackson@citrix.com>
>>> CC: Wei Liu <wei.liu2@citrix.com>
>>> CC: Andrew Cooper <andrew.cooper3@citrix.com>
>>> CC: Jan Beulich <jbeulich@suse.com>
>>> ---
>>>  tools/fuzz/x86_instruction_emulator/fuzz-emul.c | 89 +++++++++++++------------
>>>  1 file changed, 45 insertions(+), 44 deletions(-)
>>>
>>> diff --git a/tools/fuzz/x86_instruction_emulator/fuzz-emul.c b/tools/fuzz/x86_instruction_emulator/fuzz-emul.c
>>> index 8998f21fe1..20d52b33f8 100644
>>> --- a/tools/fuzz/x86_instruction_emulator/fuzz-emul.c
>>> +++ b/tools/fuzz/x86_instruction_emulator/fuzz-emul.c
>>> @@ -24,14 +24,8 @@
>>>  /* Layout of data expected as fuzzing input. */
>>>  struct fuzz_corpus
>>>  {
>>> -    unsigned long cr[5];
>>> -    uint64_t msr[MSR_INDEX_MAX];
>>> -    struct cpu_user_regs regs;
>>> -    struct segment_register segments[SEG_NUM];
>>> -    unsigned long options;
>>>      unsigned char data[4096];
>>>  } input;
>>> -#define DATA_OFFSET offsetof(struct fuzz_corpus, data)
>>>  
>>>  /*
>>>   * Internal state of the fuzzing harness.  Calculated initially from the input
>>> @@ -39,7 +33,14 @@ struct fuzz_corpus
>>>   */
>>
>> You've invalidated a number of the comments describing behaviours,
>> including the description of the difference between fuzz_state and
>> fuzz_corpus.
> 
> Well completely apart from the 'compact' format, I think this move makes
> sense.  The state moved is actually the state of the "emulated cpu" --
> the emulator actually modifies this state as instructions are executed.
> I think it makes sense to keep the "current state of the virtual
> processor" separate from "input we get from a file".

It's also necessary for when we add the `--rerun` parameter: We have to
make sure we leave the input data alone, and have two parallel states
that we set up and can compare.

 -George
diff mbox

Patch

diff --git a/tools/fuzz/x86_instruction_emulator/fuzz-emul.c b/tools/fuzz/x86_instruction_emulator/fuzz-emul.c
index 8998f21fe1..20d52b33f8 100644
--- a/tools/fuzz/x86_instruction_emulator/fuzz-emul.c
+++ b/tools/fuzz/x86_instruction_emulator/fuzz-emul.c
@@ -24,14 +24,8 @@ 
 /* Layout of data expected as fuzzing input. */
 struct fuzz_corpus
 {
-    unsigned long cr[5];
-    uint64_t msr[MSR_INDEX_MAX];
-    struct cpu_user_regs regs;
-    struct segment_register segments[SEG_NUM];
-    unsigned long options;
     unsigned char data[4096];
 } input;
-#define DATA_OFFSET offsetof(struct fuzz_corpus, data)
 
 /*
  * Internal state of the fuzzing harness.  Calculated initially from the input
@@ -39,7 +33,14 @@  struct fuzz_corpus
  */
 struct fuzz_state
 {
+    unsigned long options;
+    unsigned long cr[5];
+    uint64_t msr[MSR_INDEX_MAX];
+    struct segment_register segments[SEG_NUM];
+    struct cpu_user_regs regs;
+
     /* Fuzzer's input data. */
+#define DATA_OFFSET offsetof(struct fuzz_state, corpus)
     struct fuzz_corpus *corpus;
 
     /* Real amount of data backing corpus->data[]. */
@@ -392,11 +393,10 @@  static int fuzz_read_segment(
     struct x86_emulate_ctxt *ctxt)
 {
     const struct fuzz_state *s = ctxt->data;
-    const struct fuzz_corpus *c = s->corpus;
 
     assert(is_x86_user_segment(seg) || is_x86_system_segment(seg));
 
-    *reg = c->segments[seg];
+    *reg = s->segments[seg];
 
     return X86EMUL_OKAY;
 }
@@ -407,7 +407,6 @@  static int fuzz_write_segment(
     struct x86_emulate_ctxt *ctxt)
 {
     struct fuzz_state *s = ctxt->data;
-    struct fuzz_corpus *c = s->corpus;
     int rc;
 
     assert(is_x86_user_segment(seg) || is_x86_system_segment(seg));
@@ -415,7 +414,7 @@  static int fuzz_write_segment(
     rc = maybe_fail(ctxt, "write_segment", true);
 
     if ( rc == X86EMUL_OKAY )
-        c->segments[seg] = *reg;
+        s->segments[seg] = *reg;
 
     return rc;
 }
@@ -426,12 +425,11 @@  static int fuzz_read_cr(
     struct x86_emulate_ctxt *ctxt)
 {
     const struct fuzz_state *s = ctxt->data;
-    const struct fuzz_corpus *c = s->corpus;
 
-    if ( reg >= ARRAY_SIZE(c->cr) )
+    if ( reg >= ARRAY_SIZE(s->cr) )
         return X86EMUL_UNHANDLEABLE;
 
-    *val = c->cr[reg];
+    *val = s->cr[reg];
 
     return X86EMUL_OKAY;
 }
@@ -442,17 +440,16 @@  static int fuzz_write_cr(
     struct x86_emulate_ctxt *ctxt)
 {
     struct fuzz_state *s = ctxt->data;
-    struct fuzz_corpus *c = s->corpus;
     int rc;
 
-    if ( reg >= ARRAY_SIZE(c->cr) )
+    if ( reg >= ARRAY_SIZE(s->cr) )
         return X86EMUL_UNHANDLEABLE;
 
     rc = maybe_fail(ctxt, "write_cr", true);
     if ( rc != X86EMUL_OKAY )
         return rc;
 
-    c->cr[reg] = val;
+    s->cr[reg] = val;
 
     return X86EMUL_OKAY;
 }
@@ -487,7 +484,6 @@  static int fuzz_read_msr(
     struct x86_emulate_ctxt *ctxt)
 {
     const struct fuzz_state *s = ctxt->data;
-    const struct fuzz_corpus *c = s->corpus;
     unsigned int idx;
 
     switch ( reg )
@@ -501,10 +497,10 @@  static int fuzz_read_msr(
          */
         return data_read(ctxt, x86_seg_none, "read_msr", val, sizeof(*val));
     case MSR_EFER:
-        *val = c->msr[MSRI_EFER];
+        *val = s->msr[MSRI_EFER];
         *val &= ~EFER_LMA;
-        if ( (*val & EFER_LME) && (c->cr[4] & X86_CR4_PAE) &&
-             (c->cr[0] & X86_CR0_PG) )
+        if ( (*val & EFER_LME) && (s->cr[4] & X86_CR4_PAE) &&
+             (s->cr[0] & X86_CR0_PG) )
         {
             printf("Setting EFER_LMA\n");
             *val |= EFER_LMA;
@@ -516,7 +512,7 @@  static int fuzz_read_msr(
     {
         if ( msr_index[idx] == reg )
         {
-            *val = c->msr[idx];
+            *val = s->msr[idx];
             return X86EMUL_OKAY;
         }
     }
@@ -531,7 +527,6 @@  static int fuzz_write_msr(
     struct x86_emulate_ctxt *ctxt)
 {
     struct fuzz_state *s = ctxt->data;
-    struct fuzz_corpus *c = s->corpus;
     unsigned int idx;
     int rc;
 
@@ -550,7 +545,7 @@  static int fuzz_write_msr(
     {
         if ( msr_index[idx] == reg )
         {
-            c->msr[idx] = val;
+            s->msr[idx] = val;
             return X86EMUL_OKAY;
         }
     }
@@ -600,15 +595,14 @@  static void setup_fpu_exception_handler(void)
 static void dump_state(struct x86_emulate_ctxt *ctxt)
 {
     struct fuzz_state *s = ctxt->data;
-    const struct fuzz_corpus *c = s->corpus;
     struct cpu_user_regs *regs = ctxt->regs;
     uint64_t val = 0;
 
     printf(" -- State -- \n");
     printf("addr / sp size: %d / %d\n", ctxt->addr_size, ctxt->sp_size);
-    printf(" cr0: %lx\n", c->cr[0]);
-    printf(" cr3: %lx\n", c->cr[3]);
-    printf(" cr4: %lx\n", c->cr[4]);
+    printf(" cr0: %lx\n", s->cr[0]);
+    printf(" cr3: %lx\n", s->cr[3]);
+    printf(" cr4: %lx\n", s->cr[4]);
 
     printf(" rip: %"PRIx64"\n", regs->rip);
 
@@ -629,15 +623,13 @@  static bool long_mode_active(struct x86_emulate_ctxt *ctxt)
 static bool in_longmode(struct x86_emulate_ctxt *ctxt)
 {
     const struct fuzz_state *s = ctxt->data;
-    const struct fuzz_corpus *c = s->corpus;
 
-    return long_mode_active(ctxt) && c->segments[x86_seg_cs].l;
+    return long_mode_active(ctxt) && s->segments[x86_seg_cs].l;
 }
 
 static void set_sizes(struct x86_emulate_ctxt *ctxt)
 {
     struct fuzz_state *s = ctxt->data;
-    const struct fuzz_corpus *c = s->corpus;
 
     ctxt->lma = long_mode_active(ctxt);
 
@@ -645,11 +637,20 @@  static void set_sizes(struct x86_emulate_ctxt *ctxt)
         ctxt->addr_size = ctxt->sp_size = 64;
     else
     {
-        ctxt->addr_size = c->segments[x86_seg_cs].db ? 32 : 16;
-        ctxt->sp_size   = c->segments[x86_seg_ss].db ? 32 : 16;
+        ctxt->addr_size = s->segments[x86_seg_cs].db ? 32 : 16;
+        ctxt->sp_size   = s->segments[x86_seg_ss].db ? 32 : 16;
     }
 }
 
+static void setup_state(struct x86_emulate_ctxt *ctxt)
+{
+    struct fuzz_state *s = ctxt->data;
+
+    /* Fuzz all of the state in one go */
+    if (!input_read(s, s, DATA_OFFSET))
+        exit(-1);
+}
+
 #define CANONICALIZE(x)                                   \
     do {                                                  \
         uint64_t _y = (x);                                \
@@ -709,8 +710,7 @@  enum {
 static void disable_hooks(struct x86_emulate_ctxt *ctxt)
 {
     struct fuzz_state *s = ctxt->data;
-    const struct fuzz_corpus *c = s->corpus;
-    unsigned long bitmap = c->options;
+    unsigned long bitmap = s->options;
 
     /* See also sanitize_input, some hooks can't be disabled. */
     MAYBE_DISABLE_HOOK(read);
@@ -760,12 +760,11 @@  static void disable_hooks(struct x86_emulate_ctxt *ctxt)
 static void sanitize_input(struct x86_emulate_ctxt *ctxt)
 {
     struct fuzz_state *s = ctxt->data;
-    struct fuzz_corpus *c = s->corpus;
-    struct cpu_user_regs *regs = &c->regs;
-    unsigned long bitmap = c->options;
+    struct cpu_user_regs *regs = ctxt->regs;
+    unsigned long bitmap = s->options;
 
     /* Some hooks can't be disabled. */
-    c->options &= ~((1<<HOOK_read)|(1<<HOOK_insn_fetch));
+    s->options &= ~((1<<HOOK_read)|(1<<HOOK_insn_fetch));
 
     /* Zero 'private' entries */
     regs->error_code = 0;
@@ -779,8 +778,8 @@  static void sanitize_input(struct x86_emulate_ctxt *ctxt)
      * CR0.PG can't be set if CR0.PE isn't set.  Set is more interesting, so
      * set PE if PG is set.
      */
-    if ( c->cr[0] & X86_CR0_PG )
-        c->cr[0] |= X86_CR0_PE;
+    if ( s->cr[0] & X86_CR0_PG )
+        s->cr[0] |= X86_CR0_PE;
 
     /* EFLAGS.VM not available in long mode */
     if ( long_mode_active(ctxt) )
@@ -789,8 +788,8 @@  static void sanitize_input(struct x86_emulate_ctxt *ctxt)
     /* EFLAGS.VM implies 16-bit mode */
     if ( regs->rflags & X86_EFLAGS_VM )
     {
-        c->segments[x86_seg_cs].db = 0;
-        c->segments[x86_seg_ss].db = 0;
+        s->segments[x86_seg_cs].db = 0;
+        s->segments[x86_seg_ss].db = 0;
     }
 }
 
@@ -812,7 +811,7 @@  int LLVMFuzzerTestOneInput(const uint8_t *data_p, size_t size)
     };
     struct x86_emulate_ctxt ctxt = {
         .data = &state,
-        .regs = &input.regs,
+        .regs = &state.regs,
         .addr_size = 8 * sizeof(void *),
         .sp_size = 8 * sizeof(void *),
     };
@@ -836,7 +835,9 @@  int LLVMFuzzerTestOneInput(const uint8_t *data_p, size_t size)
     memcpy(&input, data_p, size);
 
     state.corpus = &input;
-    state.data_num = size - DATA_OFFSET;
+    state.data_num = size;
+
+    setup_state(&ctxt);
 
     sanitize_input(&ctxt);