[v4,09/12] fuzz/x86_emulate: Make input more compact
diff mbox

Message ID 20171011175243.19871-9-george.dunlap@citrix.com
State New, archived
Headers show

Commit Message

George Dunlap Oct. 11, 2017, 5:52 p.m. UTC
At the moment, AFL reckons that for any given input, 87% of it is
completely irrelevant: that is, it can change it as much as it wants
but have no impact on the result of the test; and yet it can't remove
it.

This is largely because we interpret the blob handed to us as a large
struct, including CR values, MSR values, segment registers, and a full
cpu_user_regs.

Instead, modify our interpretation to have a "set state" stanza at the
front.  Begin by reading a 16-bit value; if it is lower than a certain
threshold, set some state according to what byte it is, and repeat.
Continue until the byte is above a certain threshold.

This allows AFL to compact any given test case much smaller; to the
point where now it reckons there is not a single byte of the test file
which becomes irrelevant.  Testing have shown that this option both
allows AFL to reach coverage much faster, and to have a total coverage
higher than with the old format.

Make this an option (rather than a unilateral change) to enable
side-by-side performance comparison of the old and new formats.

Signed-off-by: George Dunlap <george.dunlap@citrix.com>
Acked-by: Ian Jackson <ian.jackson@eu.citrix.com>
Acked-by: Jan Beulich <jbeulich@suse.com>
---
v4:
 - Rebase over previous changes
v3:
 - Set default for opt_compact statically in fuzz-emul.c rather than afl-harness
 - Make input size / unconditional input reading more consistent
 - Require only minimum input read, not first instruction byte
 - Use ARRAY_SIZE() for hardcoded values
 - Use `for ( ; ; )` rather than `while(1)`
 - Some style issues
 - Move opt_compact declaration into fuzz-emul.h
v2: Port over previous changes

CC: Ian Jackson <ian.jackson@citrix.com>
CC: Wei Liu <wei.liu2@citrix.com>
CC: Andrew Cooper <andrew.cooper3@citrix.com>
CC: Jan Beulich <jbeulich@suse.com>
---
 tools/fuzz/x86_instruction_emulator/afl-harness.c |   9 +-
 tools/fuzz/x86_instruction_emulator/fuzz-emul.c   | 103 +++++++++++++++++++---
 tools/fuzz/x86_instruction_emulator/fuzz-emul.h   |   2 +
 3 files changed, 99 insertions(+), 15 deletions(-)

Patch
diff mbox

diff --git a/tools/fuzz/x86_instruction_emulator/afl-harness.c b/tools/fuzz/x86_instruction_emulator/afl-harness.c
index d514468dd2..23a77a73c0 100644
--- a/tools/fuzz/x86_instruction_emulator/afl-harness.c
+++ b/tools/fuzz/x86_instruction_emulator/afl-harness.c
@@ -4,6 +4,7 @@ 
 #include <stdlib.h>
 #include <string.h>
 #include <getopt.h>
+#include <stdbool.h>
 #include "fuzz-emul.h"
 
 static uint8_t input[INPUT_SIZE];
@@ -21,9 +22,11 @@  int main(int argc, char **argv)
     {
         enum {
             OPT_MIN_SIZE,
+            OPT_COMPACT,
         };
         static const struct option lopts[] = {
             { "min-input-size", no_argument, NULL, OPT_MIN_SIZE },
+            { "compact", required_argument, NULL, OPT_COMPACT },
             { 0, 0, 0, 0 }
         };
         int c = getopt_long_only(argc, argv, "", lopts, NULL);
@@ -38,8 +41,12 @@  int main(int argc, char **argv)
             exit(0);
             break;
 
+        case OPT_COMPACT:
+            opt_compact = atoi(optarg);
+            break;
+            
         case '?':
-            printf("Usage: %s $FILE [$FILE...] | [--min-input-size]\n", argv[0]);
+            printf("Usage: %s [--compact=0|1] $FILE [$FILE...] | [--min-input-size]\n", argv[0]);
             exit(-1);
             break;
 
diff --git a/tools/fuzz/x86_instruction_emulator/fuzz-emul.c b/tools/fuzz/x86_instruction_emulator/fuzz-emul.c
index 4e3751ce50..3ea8a8b726 100644
--- a/tools/fuzz/x86_instruction_emulator/fuzz-emul.c
+++ b/tools/fuzz/x86_instruction_emulator/fuzz-emul.c
@@ -31,14 +31,15 @@  struct fuzz_state
 {
     /* Emulated CPU state */
     unsigned long options;
+#define DATA_SIZE_COMPACT offsetof(struct fuzz_state, cr)
     unsigned long cr[5];
     uint64_t msr[MSR_INDEX_MAX];
     struct segment_register segments[SEG_NUM];
     struct cpu_user_regs regs;
 
     /* Fuzzer's input data. */
-#define DATA_OFFSET offsetof(struct fuzz_state, corpus)
-    const unsigned char * corpus;
+#define DATA_SIZE_FULL offsetof(struct fuzz_state, corpus)
+    const unsigned char *corpus;
 
     /* Real amount of data backing corpus[]. */
     size_t data_num;
@@ -50,6 +51,15 @@  struct fuzz_state
     struct x86_emulate_ops ops;
 };
 
+bool opt_compact = true;
+
+unsigned int fuzz_minimal_input_size(void)
+{
+    BUILD_BUG_ON(DATA_SIZE_FULL > INPUT_SIZE);
+
+    return opt_compact ? DATA_SIZE_COMPACT : DATA_SIZE_FULL;
+}
+
 static inline bool input_avail(const struct fuzz_state *s, size_t size)
 {
     return s->data_index + size <= s->data_num;
@@ -643,9 +653,82 @@  static void setup_state(struct x86_emulate_ctxt *ctxt)
 {
     struct fuzz_state *s = ctxt->data;
 
-    /* Fuzz all of the emulated state in one go */
-    if (!input_read(s, s, DATA_OFFSET))
-        exit(-1);
+    if ( !opt_compact )
+    {
+        /* Fuzz all of the emulated state in one go */
+        if ( !input_read(s, s, DATA_SIZE_FULL) )
+            exit(-1);
+        return;
+    }
+
+    /* Modify only select bits of state */
+
+    /* Always read 'options' */
+    if ( !input_read(s, s, DATA_SIZE_COMPACT) )
+        return;
+    
+    for ( ; ; )
+    {
+        uint16_t offset;
+
+        /* Read 16 bits to decide what bit of state to modify */
+        if ( !input_read(s, &offset, sizeof(offset)) )
+            return;
+
+        /* 
+         * Then decide if it's "pointing to" different bits of the
+         * state 
+         */
+
+        /* cr[]? */
+        if ( offset < ARRAY_SIZE(s->cr) )
+        {
+            if ( !input_read(s, s->cr + offset, sizeof(*s->cr)) )
+                return;
+            printf("Setting CR %d to %lx\n", offset, s->cr[offset]);
+            continue;
+        }
+        
+        offset -= ARRAY_SIZE(s->cr);
+
+        /* msr[]? */
+        if ( offset < ARRAY_SIZE(s->msr) )
+        {
+            if ( !input_read(s, s->msr + offset, sizeof(*s->msr)) )
+                return;
+            printf("Setting MSR i%d (%x) to %lx\n", offset,
+                   msr_index[offset], s->msr[offset]);
+            continue;
+        }
+
+        offset -= ARRAY_SIZE(s->msr);
+
+        /* segments[]? */
+        if ( offset < ARRAY_SIZE(s->segments) )
+        {
+            if ( !input_read(s, s->segments + offset, sizeof(*s->segments)) )
+                return;
+            printf("Setting Segment %d\n", offset);
+            continue;
+            
+        }
+
+        offset -= ARRAY_SIZE(s->segments);
+
+        /* regs? */
+        if ( offset < sizeof(struct cpu_user_regs)
+             && offset + sizeof(uint64_t) <= sizeof(struct cpu_user_regs) )
+        {
+            if ( !input_read(s, ((char *)ctxt->regs) + offset, sizeof(uint64_t)) )
+                return;
+            printf("Setting cpu_user_regs offset %x\n", offset);
+            continue;
+        }
+
+        /* None of the above -- take that as "start emulating" */
+        
+        return;
+    }
 }
 
 #define CANONICALIZE(x)                                   \
@@ -814,7 +897,7 @@  int LLVMFuzzerTestOneInput(const uint8_t *data_p, size_t size)
     };
     int rc;
 
-    if ( size <= DATA_OFFSET )
+    if ( size <= fuzz_minimal_input_size() )
     {
         printf("Input too small\n");
         return 1;
@@ -848,14 +931,6 @@  int LLVMFuzzerTestOneInput(const uint8_t *data_p, size_t size)
 
     return 0;
 }
-
-unsigned int fuzz_minimal_input_size(void)
-{
-    BUILD_BUG_ON(DATA_OFFSET > INPUT_SIZE);
-
-    return DATA_OFFSET + 1;
-}
-
 /*
  * Local variables:
  * mode: C
diff --git a/tools/fuzz/x86_instruction_emulator/fuzz-emul.h b/tools/fuzz/x86_instruction_emulator/fuzz-emul.h
index 30dd8de21e..85d21cbf0f 100644
--- a/tools/fuzz/x86_instruction_emulator/fuzz-emul.h
+++ b/tools/fuzz/x86_instruction_emulator/fuzz-emul.h
@@ -5,6 +5,8 @@  extern int LLVMFuzzerInitialize(int *argc, char ***argv);
 extern int LLVMFuzzerTestOneInput(const uint8_t *data_p, size_t size);
 extern unsigned int fuzz_minimal_input_size(void);
 
+extern bool opt_compact;
+
 #define INPUT_SIZE  4096
 
 #endif /* ifdef FUZZ_EMUL_H */