[v4,08/12] fuzz/x86_emulate: Move all state into fuzz_state
diff mbox

Message ID 20171011175243.19871-8-george.dunlap@citrix.com
State New, archived
Headers show

Commit Message

George Dunlap Oct. 11, 2017, 5:52 p.m. UTC
At the moment we copy data from the input into a struct named
'corpus', then read and write this state (so that it no longer
resembles the corpus that we read from).

Instead, move all "emulated cpu" state into fuzz_state, and explicitly
state that we are expecting to change it.  Get rid of 'input', and
always read data directly from the pointer passed into the fuzzer.

Signed-off-by: George Dunlap <george.dunlap@citrix.com>
---
v4:
 - Reword commit message to make it clear it's not just about the compact state
 - Get rid of fuzz_corpus entirely, and avoid the unnecessary copy
v3:
 - Move DATA_OFFSET inside the structure
 - Remove a stray blank line
v2: Port over previous changes

CC: Ian Jackson <ian.jackson@citrix.com>
CC: Wei Liu <wei.liu2@citrix.com>
CC: Andrew Cooper <andrew.cooper3@citrix.com>
CC: Jan Beulich <jbeulich@suse.com>
---
 tools/fuzz/x86_instruction_emulator/fuzz-emul.c | 114 +++++++++++-------------
 1 file changed, 53 insertions(+), 61 deletions(-)

Comments

Jan Beulich Oct. 12, 2017, 3:16 p.m. UTC | #1
>>> On 11.10.17 at 19:52, <george.dunlap@citrix.com> wrote:
> --- a/tools/fuzz/x86_instruction_emulator/fuzz-emul.c
> +++ b/tools/fuzz/x86_instruction_emulator/fuzz-emul.c
> @@ -22,34 +22,31 @@
>  
>  #define SEG_NUM x86_seg_none
>  
> -/* Layout of data expected as fuzzing input. */
> -struct fuzz_corpus
> +/*
> + * State of the fuzzing harness and emulated cpu.  Calculated
> + * initially from the input corpus, and later mutated by the emulation
> + * callbacks (and the emulator itself, in the case of regs).
> + */
> +struct fuzz_state
>  {
> +    /* Emulated CPU state */
> +    unsigned long options;
>      unsigned long cr[5];
>      uint64_t msr[MSR_INDEX_MAX];
> -    struct cpu_user_regs regs;
>      struct segment_register segments[SEG_NUM];
> -    unsigned long options;
> -    unsigned char data[INPUT_SIZE];
> -} input;
> -#define DATA_OFFSET offsetof(struct fuzz_corpus, data)
> +    struct cpu_user_regs regs;
>  
> -/*
> - * Internal state of the fuzzing harness.  Calculated initially from the input
> - * corpus, and later mutates by the emulation callbacks.
> - */
> -struct fuzz_state
> -{
>      /* Fuzzer's input data. */
> -    struct fuzz_corpus *corpus;
> +#define DATA_OFFSET offsetof(struct fuzz_state, corpus)
> +    const unsigned char * corpus;

Stray blank after *. Also any reason this can't be uint8_t,
matching LLVMFuzzerTestOneInput()'s parameter and making
it possible to avoid the cast you currently use on that
assignment?

> @@ -646,11 +634,20 @@ static void set_sizes(struct x86_emulate_ctxt *ctxt)
>          ctxt->addr_size = ctxt->sp_size = 64;
>      else
>      {
> -        ctxt->addr_size = c->segments[x86_seg_cs].db ? 32 : 16;
> -        ctxt->sp_size   = c->segments[x86_seg_ss].db ? 32 : 16;
> +        ctxt->addr_size = s->segments[x86_seg_cs].db ? 32 : 16;
> +        ctxt->sp_size   = s->segments[x86_seg_ss].db ? 32 : 16;
>      }
>  }
>  
> +static void setup_state(struct x86_emulate_ctxt *ctxt)
> +{
> +    struct fuzz_state *s = ctxt->data;
> +
> +    /* Fuzz all of the emulated state in one go */
> +    if (!input_read(s, s, DATA_OFFSET))

Missing blanks.

> @@ -761,12 +757,11 @@ static void disable_hooks(struct x86_emulate_ctxt *ctxt)
>  static void sanitize_input(struct x86_emulate_ctxt *ctxt)
>  {
>      struct fuzz_state *s = ctxt->data;
> -    struct fuzz_corpus *c = s->corpus;
> -    struct cpu_user_regs *regs = &c->regs;
> -    unsigned long bitmap = c->options;
> +    struct cpu_user_regs *regs = ctxt->regs;
> +    unsigned long bitmap = s->options;
>  
>      /* Some hooks can't be disabled. */
> -    c->options &= ~((1<<HOOK_read)|(1<<HOOK_insn_fetch));
> +    s->options &= ~((1<<HOOK_read)|(1<<HOOK_insn_fetch));

Mind adding the missing blanks here while you touch this?

> @@ -834,10 +826,10 @@ int LLVMFuzzerTestOneInput(const uint8_t *data_p, size_t size)
>          return 1;
>      }
>  
> -    memcpy(&input, data_p, size);
> +    state.corpus = (void*)data_p;

If for any reason the suggested type change can't or shouldn't be
done (and hence the cast needs to stay), then please add a blank
before * and don't cast away const-ness.

Jan
George Dunlap Oct. 13, 2017, 9:22 a.m. UTC | #2
On 10/12/2017 04:16 PM, Jan Beulich wrote:
>>>> On 11.10.17 at 19:52, <george.dunlap@citrix.com> wrote:
>> --- a/tools/fuzz/x86_instruction_emulator/fuzz-emul.c
>> +++ b/tools/fuzz/x86_instruction_emulator/fuzz-emul.c
>> @@ -22,34 +22,31 @@
>>  
>>  #define SEG_NUM x86_seg_none
>>  
>> -/* Layout of data expected as fuzzing input. */
>> -struct fuzz_corpus
>> +/*
>> + * State of the fuzzing harness and emulated cpu.  Calculated
>> + * initially from the input corpus, and later mutated by the emulation
>> + * callbacks (and the emulator itself, in the case of regs).
>> + */
>> +struct fuzz_state
>>  {
>> +    /* Emulated CPU state */
>> +    unsigned long options;
>>      unsigned long cr[5];
>>      uint64_t msr[MSR_INDEX_MAX];
>> -    struct cpu_user_regs regs;
>>      struct segment_register segments[SEG_NUM];
>> -    unsigned long options;
>> -    unsigned char data[INPUT_SIZE];
>> -} input;
>> -#define DATA_OFFSET offsetof(struct fuzz_corpus, data)
>> +    struct cpu_user_regs regs;
>>  
>> -/*
>> - * Internal state of the fuzzing harness.  Calculated initially from the input
>> - * corpus, and later mutates by the emulation callbacks.
>> - */
>> -struct fuzz_state
>> -{
>>      /* Fuzzer's input data. */
>> -    struct fuzz_corpus *corpus;
>> +#define DATA_OFFSET offsetof(struct fuzz_state, corpus)
>> +    const unsigned char * corpus;
> 
> Stray blank after *. Also any reason this can't be uint8_t,
> matching LLVMFuzzerTestOneInput()'s parameter and making
> it possible to avoid the cast you currently use on that
> assignment?

For some reason I thought this would make things uglier; but it actually
works pretty well.

>> @@ -646,11 +634,20 @@ static void set_sizes(struct x86_emulate_ctxt *ctxt)
>>          ctxt->addr_size = ctxt->sp_size = 64;
>>      else
>>      {
>> -        ctxt->addr_size = c->segments[x86_seg_cs].db ? 32 : 16;
>> -        ctxt->sp_size   = c->segments[x86_seg_ss].db ? 32 : 16;
>> +        ctxt->addr_size = s->segments[x86_seg_cs].db ? 32 : 16;
>> +        ctxt->sp_size   = s->segments[x86_seg_ss].db ? 32 : 16;
>>      }
>>  }
>>  
>> +static void setup_state(struct x86_emulate_ctxt *ctxt)
>> +{
>> +    struct fuzz_state *s = ctxt->data;
>> +
>> +    /* Fuzz all of the emulated state in one go */
>> +    if (!input_read(s, s, DATA_OFFSET))
> 
> Missing blanks.

Ack

> 
>> @@ -761,12 +757,11 @@ static void disable_hooks(struct x86_emulate_ctxt *ctxt)
>>  static void sanitize_input(struct x86_emulate_ctxt *ctxt)
>>  {
>>      struct fuzz_state *s = ctxt->data;
>> -    struct fuzz_corpus *c = s->corpus;
>> -    struct cpu_user_regs *regs = &c->regs;
>> -    unsigned long bitmap = c->options;
>> +    struct cpu_user_regs *regs = ctxt->regs;
>> +    unsigned long bitmap = s->options;
>>  
>>      /* Some hooks can't be disabled. */
>> -    c->options &= ~((1<<HOOK_read)|(1<<HOOK_insn_fetch));
>> +    s->options &= ~((1<<HOOK_read)|(1<<HOOK_insn_fetch));
> 
> Mind adding the missing blanks here while you touch this?

Like this?

    s->options &= ~((1<<HOOK_read) | (1<<HOOK_insn_fetch));

Thanks,
 -George
Jan Beulich Oct. 13, 2017, 9:54 a.m. UTC | #3
>>> On 13.10.17 at 11:22, <george.dunlap@citrix.com> wrote:
> On 10/12/2017 04:16 PM, Jan Beulich wrote:
>>>>> On 11.10.17 at 19:52, <george.dunlap@citrix.com> wrote:
>>> @@ -761,12 +757,11 @@ static void disable_hooks(struct x86_emulate_ctxt *ctxt)
>>>  static void sanitize_input(struct x86_emulate_ctxt *ctxt)
>>>  {
>>>      struct fuzz_state *s = ctxt->data;
>>> -    struct fuzz_corpus *c = s->corpus;
>>> -    struct cpu_user_regs *regs = &c->regs;
>>> -    unsigned long bitmap = c->options;
>>> +    struct cpu_user_regs *regs = ctxt->regs;
>>> +    unsigned long bitmap = s->options;
>>>  
>>>      /* Some hooks can't be disabled. */
>>> -    c->options &= ~((1<<HOOK_read)|(1<<HOOK_insn_fetch));
>>> +    s->options &= ~((1<<HOOK_read)|(1<<HOOK_insn_fetch));
>> 
>> Mind adding the missing blanks here while you touch this?
> 
> Like this?
> 
>     s->options &= ~((1<<HOOK_read) | (1<<HOOK_insn_fetch));

Even farther (at the same time adding the missing number suffixes):

    s->options &= ~((1UL << HOOK_read) | (1UL << HOOK_insn_fetch));

Jan
George Dunlap Oct. 13, 2017, 9:55 a.m. UTC | #4
On 10/13/2017 10:54 AM, Jan Beulich wrote:
>>>> On 13.10.17 at 11:22, <george.dunlap@citrix.com> wrote:
>> On 10/12/2017 04:16 PM, Jan Beulich wrote:
>>>>>> On 11.10.17 at 19:52, <george.dunlap@citrix.com> wrote:
>>>> @@ -761,12 +757,11 @@ static void disable_hooks(struct x86_emulate_ctxt *ctxt)
>>>>  static void sanitize_input(struct x86_emulate_ctxt *ctxt)
>>>>  {
>>>>      struct fuzz_state *s = ctxt->data;
>>>> -    struct fuzz_corpus *c = s->corpus;
>>>> -    struct cpu_user_regs *regs = &c->regs;
>>>> -    unsigned long bitmap = c->options;
>>>> +    struct cpu_user_regs *regs = ctxt->regs;
>>>> +    unsigned long bitmap = s->options;
>>>>  
>>>>      /* Some hooks can't be disabled. */
>>>> -    c->options &= ~((1<<HOOK_read)|(1<<HOOK_insn_fetch));
>>>> +    s->options &= ~((1<<HOOK_read)|(1<<HOOK_insn_fetch));
>>>
>>> Mind adding the missing blanks here while you touch this?
>>
>> Like this?
>>
>>     s->options &= ~((1<<HOOK_read) | (1<<HOOK_insn_fetch));
> 
> Even farther (at the same time adding the missing number suffixes):
> 
>     s->options &= ~((1UL << HOOK_read) | (1UL << HOOK_insn_fetch));

Got it.  (I was actually trying to verify the 'snuggly' outer braces,
but missed spaces around the '<<'s).

 -George

Patch
diff mbox

diff --git a/tools/fuzz/x86_instruction_emulator/fuzz-emul.c b/tools/fuzz/x86_instruction_emulator/fuzz-emul.c
index 964682aa1a..4e3751ce50 100644
--- a/tools/fuzz/x86_instruction_emulator/fuzz-emul.c
+++ b/tools/fuzz/x86_instruction_emulator/fuzz-emul.c
@@ -22,34 +22,31 @@ 
 
 #define SEG_NUM x86_seg_none
 
-/* Layout of data expected as fuzzing input. */
-struct fuzz_corpus
+/*
+ * State of the fuzzing harness and emulated cpu.  Calculated
+ * initially from the input corpus, and later mutated by the emulation
+ * callbacks (and the emulator itself, in the case of regs).
+ */
+struct fuzz_state
 {
+    /* Emulated CPU state */
+    unsigned long options;
     unsigned long cr[5];
     uint64_t msr[MSR_INDEX_MAX];
-    struct cpu_user_regs regs;
     struct segment_register segments[SEG_NUM];
-    unsigned long options;
-    unsigned char data[INPUT_SIZE];
-} input;
-#define DATA_OFFSET offsetof(struct fuzz_corpus, data)
+    struct cpu_user_regs regs;
 
-/*
- * Internal state of the fuzzing harness.  Calculated initially from the input
- * corpus, and later mutates by the emulation callbacks.
- */
-struct fuzz_state
-{
     /* Fuzzer's input data. */
-    struct fuzz_corpus *corpus;
+#define DATA_OFFSET offsetof(struct fuzz_state, corpus)
+    const unsigned char * corpus;
 
-    /* Real amount of data backing corpus->data[]. */
+    /* Real amount of data backing corpus[]. */
     size_t data_num;
 
-    /* Amount of corpus->data[] consumed thus far. */
+    /* Amount of corpus[] data consumed thus far. */
     size_t data_index;
 
-    /* Emulation ops, some of which are disabled based on corpus->options. */
+    /* Emulation ops, some of which are disabled based on options. */
     struct x86_emulate_ops ops;
 };
 
@@ -63,7 +60,7 @@  static inline bool input_read(struct fuzz_state *s, void *dst, size_t size)
     if ( !input_avail(s, size) )
         return false;
 
-    memcpy(dst, &s->corpus->data[s->data_index], size);
+    memcpy(dst, &s->corpus[s->data_index], size);
     s->data_index += size;
 
     return true;
@@ -393,11 +390,10 @@  static int fuzz_read_segment(
     struct x86_emulate_ctxt *ctxt)
 {
     const struct fuzz_state *s = ctxt->data;
-    const struct fuzz_corpus *c = s->corpus;
 
     assert(is_x86_user_segment(seg) || is_x86_system_segment(seg));
 
-    *reg = c->segments[seg];
+    *reg = s->segments[seg];
 
     return X86EMUL_OKAY;
 }
@@ -408,7 +404,6 @@  static int fuzz_write_segment(
     struct x86_emulate_ctxt *ctxt)
 {
     struct fuzz_state *s = ctxt->data;
-    struct fuzz_corpus *c = s->corpus;
     int rc;
 
     assert(is_x86_user_segment(seg) || is_x86_system_segment(seg));
@@ -416,7 +411,7 @@  static int fuzz_write_segment(
     rc = maybe_fail(ctxt, "write_segment", true);
 
     if ( rc == X86EMUL_OKAY )
-        c->segments[seg] = *reg;
+        s->segments[seg] = *reg;
 
     return rc;
 }
@@ -427,12 +422,11 @@  static int fuzz_read_cr(
     struct x86_emulate_ctxt *ctxt)
 {
     const struct fuzz_state *s = ctxt->data;
-    const struct fuzz_corpus *c = s->corpus;
 
-    if ( reg >= ARRAY_SIZE(c->cr) )
+    if ( reg >= ARRAY_SIZE(s->cr) )
         return X86EMUL_UNHANDLEABLE;
 
-    *val = c->cr[reg];
+    *val = s->cr[reg];
 
     return X86EMUL_OKAY;
 }
@@ -443,17 +437,16 @@  static int fuzz_write_cr(
     struct x86_emulate_ctxt *ctxt)
 {
     struct fuzz_state *s = ctxt->data;
-    struct fuzz_corpus *c = s->corpus;
     int rc;
 
-    if ( reg >= ARRAY_SIZE(c->cr) )
+    if ( reg >= ARRAY_SIZE(s->cr) )
         return X86EMUL_UNHANDLEABLE;
 
     rc = maybe_fail(ctxt, "write_cr", true);
     if ( rc != X86EMUL_OKAY )
         return rc;
 
-    c->cr[reg] = val;
+    s->cr[reg] = val;
 
     return X86EMUL_OKAY;
 }
@@ -488,7 +481,6 @@  static int fuzz_read_msr(
     struct x86_emulate_ctxt *ctxt)
 {
     const struct fuzz_state *s = ctxt->data;
-    const struct fuzz_corpus *c = s->corpus;
     unsigned int idx;
 
     switch ( reg )
@@ -502,10 +494,10 @@  static int fuzz_read_msr(
          */
         return data_read(ctxt, x86_seg_none, "read_msr", val, sizeof(*val));
     case MSR_EFER:
-        *val = c->msr[MSRI_EFER];
+        *val = s->msr[MSRI_EFER];
         *val &= ~EFER_LMA;
-        if ( (*val & EFER_LME) && (c->cr[4] & X86_CR4_PAE) &&
-             (c->cr[0] & X86_CR0_PG) )
+        if ( (*val & EFER_LME) && (s->cr[4] & X86_CR4_PAE) &&
+             (s->cr[0] & X86_CR0_PG) )
         {
             printf("Setting EFER_LMA\n");
             *val |= EFER_LMA;
@@ -517,7 +509,7 @@  static int fuzz_read_msr(
     {
         if ( msr_index[idx] == reg )
         {
-            *val = c->msr[idx];
+            *val = s->msr[idx];
             return X86EMUL_OKAY;
         }
     }
@@ -532,7 +524,6 @@  static int fuzz_write_msr(
     struct x86_emulate_ctxt *ctxt)
 {
     struct fuzz_state *s = ctxt->data;
-    struct fuzz_corpus *c = s->corpus;
     unsigned int idx;
     int rc;
 
@@ -551,7 +542,7 @@  static int fuzz_write_msr(
     {
         if ( msr_index[idx] == reg )
         {
-            c->msr[idx] = val;
+            s->msr[idx] = val;
             return X86EMUL_OKAY;
         }
     }
@@ -601,15 +592,14 @@  static void setup_fpu_exception_handler(void)
 static void dump_state(struct x86_emulate_ctxt *ctxt)
 {
     struct fuzz_state *s = ctxt->data;
-    const struct fuzz_corpus *c = s->corpus;
     struct cpu_user_regs *regs = ctxt->regs;
     uint64_t val = 0;
 
     printf(" -- State -- \n");
     printf("addr / sp size: %d / %d\n", ctxt->addr_size, ctxt->sp_size);
-    printf(" cr0: %lx\n", c->cr[0]);
-    printf(" cr3: %lx\n", c->cr[3]);
-    printf(" cr4: %lx\n", c->cr[4]);
+    printf(" cr0: %lx\n", s->cr[0]);
+    printf(" cr3: %lx\n", s->cr[3]);
+    printf(" cr4: %lx\n", s->cr[4]);
 
     printf(" rip: %"PRIx64"\n", regs->rip);
 
@@ -630,15 +620,13 @@  static bool long_mode_active(struct x86_emulate_ctxt *ctxt)
 static bool in_longmode(struct x86_emulate_ctxt *ctxt)
 {
     const struct fuzz_state *s = ctxt->data;
-    const struct fuzz_corpus *c = s->corpus;
 
-    return long_mode_active(ctxt) && c->segments[x86_seg_cs].l;
+    return long_mode_active(ctxt) && s->segments[x86_seg_cs].l;
 }
 
 static void set_sizes(struct x86_emulate_ctxt *ctxt)
 {
     struct fuzz_state *s = ctxt->data;
-    const struct fuzz_corpus *c = s->corpus;
 
     ctxt->lma = long_mode_active(ctxt);
 
@@ -646,11 +634,20 @@  static void set_sizes(struct x86_emulate_ctxt *ctxt)
         ctxt->addr_size = ctxt->sp_size = 64;
     else
     {
-        ctxt->addr_size = c->segments[x86_seg_cs].db ? 32 : 16;
-        ctxt->sp_size   = c->segments[x86_seg_ss].db ? 32 : 16;
+        ctxt->addr_size = s->segments[x86_seg_cs].db ? 32 : 16;
+        ctxt->sp_size   = s->segments[x86_seg_ss].db ? 32 : 16;
     }
 }
 
+static void setup_state(struct x86_emulate_ctxt *ctxt)
+{
+    struct fuzz_state *s = ctxt->data;
+
+    /* Fuzz all of the emulated state in one go */
+    if (!input_read(s, s, DATA_OFFSET))
+        exit(-1);
+}
+
 #define CANONICALIZE(x)                                   \
     do {                                                  \
         uint64_t _y = (x);                                \
@@ -710,8 +707,7 @@  enum {
 static void disable_hooks(struct x86_emulate_ctxt *ctxt)
 {
     struct fuzz_state *s = ctxt->data;
-    const struct fuzz_corpus *c = s->corpus;
-    unsigned long bitmap = c->options;
+    unsigned long bitmap = s->options;
 
     /* See also sanitize_input, some hooks can't be disabled. */
     MAYBE_DISABLE_HOOK(read);
@@ -761,12 +757,11 @@  static void disable_hooks(struct x86_emulate_ctxt *ctxt)
 static void sanitize_input(struct x86_emulate_ctxt *ctxt)
 {
     struct fuzz_state *s = ctxt->data;
-    struct fuzz_corpus *c = s->corpus;
-    struct cpu_user_regs *regs = &c->regs;
-    unsigned long bitmap = c->options;
+    struct cpu_user_regs *regs = ctxt->regs;
+    unsigned long bitmap = s->options;
 
     /* Some hooks can't be disabled. */
-    c->options &= ~((1<<HOOK_read)|(1<<HOOK_insn_fetch));
+    s->options &= ~((1<<HOOK_read)|(1<<HOOK_insn_fetch));
 
     /* Zero 'private' entries */
     regs->error_code = 0;
@@ -780,8 +775,8 @@  static void sanitize_input(struct x86_emulate_ctxt *ctxt)
      * CR0.PG can't be set if CR0.PE isn't set.  Set is more interesting, so
      * set PE if PG is set.
      */
-    if ( c->cr[0] & X86_CR0_PG )
-        c->cr[0] |= X86_CR0_PE;
+    if ( s->cr[0] & X86_CR0_PG )
+        s->cr[0] |= X86_CR0_PE;
 
     /* EFLAGS.VM not available in long mode */
     if ( long_mode_active(ctxt) )
@@ -790,8 +785,8 @@  static void sanitize_input(struct x86_emulate_ctxt *ctxt)
     /* EFLAGS.VM implies 16-bit mode */
     if ( regs->rflags & X86_EFLAGS_VM )
     {
-        c->segments[x86_seg_cs].db = 0;
-        c->segments[x86_seg_ss].db = 0;
+        s->segments[x86_seg_cs].db = 0;
+        s->segments[x86_seg_ss].db = 0;
     }
 }
 
@@ -813,15 +808,12 @@  int LLVMFuzzerTestOneInput(const uint8_t *data_p, size_t size)
     };
     struct x86_emulate_ctxt ctxt = {
         .data = &state,
-        .regs = &input.regs,
+        .regs = &state.regs,
         .addr_size = 8 * sizeof(void *),
         .sp_size = 8 * sizeof(void *),
     };
     int rc;
 
-    /* Reset all global state variables */
-    memset(&input, 0, sizeof(input));
-
     if ( size <= DATA_OFFSET )
     {
         printf("Input too small\n");
@@ -834,10 +826,10 @@  int LLVMFuzzerTestOneInput(const uint8_t *data_p, size_t size)
         return 1;
     }
 
-    memcpy(&input, data_p, size);
+    state.corpus = (void*)data_p;
+    state.data_num = size;
 
-    state.corpus = &input;
-    state.data_num = size - DATA_OFFSET;
+    setup_state(&ctxt);
 
     sanitize_input(&ctxt);