Message ID | 20171011175243.19871-8-george.dunlap@citrix.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
>>> On 11.10.17 at 19:52, <george.dunlap@citrix.com> wrote: > --- a/tools/fuzz/x86_instruction_emulator/fuzz-emul.c > +++ b/tools/fuzz/x86_instruction_emulator/fuzz-emul.c > @@ -22,34 +22,31 @@ > > #define SEG_NUM x86_seg_none > > -/* Layout of data expected as fuzzing input. */ > -struct fuzz_corpus > +/* > + * State of the fuzzing harness and emulated cpu. Calculated > + * initially from the input corpus, and later mutated by the emulation > + * callbacks (and the emulator itself, in the case of regs). > + */ > +struct fuzz_state > { > + /* Emulated CPU state */ > + unsigned long options; > unsigned long cr[5]; > uint64_t msr[MSR_INDEX_MAX]; > - struct cpu_user_regs regs; > struct segment_register segments[SEG_NUM]; > - unsigned long options; > - unsigned char data[INPUT_SIZE]; > -} input; > -#define DATA_OFFSET offsetof(struct fuzz_corpus, data) > + struct cpu_user_regs regs; > > -/* > - * Internal state of the fuzzing harness. Calculated initially from the input > - * corpus, and later mutates by the emulation callbacks. > - */ > -struct fuzz_state > -{ > /* Fuzzer's input data. */ > - struct fuzz_corpus *corpus; > +#define DATA_OFFSET offsetof(struct fuzz_state, corpus) > + const unsigned char * corpus; Stray blank after *. Also any reason this can't be uint8_t, matching LLVMFuzzerTestOneInput()'s parameter and making it possible to avoid the cast you currently use on that assignment? > @@ -646,11 +634,20 @@ static void set_sizes(struct x86_emulate_ctxt *ctxt) > ctxt->addr_size = ctxt->sp_size = 64; > else > { > - ctxt->addr_size = c->segments[x86_seg_cs].db ? 32 : 16; > - ctxt->sp_size = c->segments[x86_seg_ss].db ? 32 : 16; > + ctxt->addr_size = s->segments[x86_seg_cs].db ? 32 : 16; > + ctxt->sp_size = s->segments[x86_seg_ss].db ? 32 : 16; > } > } > > +static void setup_state(struct x86_emulate_ctxt *ctxt) > +{ > + struct fuzz_state *s = ctxt->data; > + > + /* Fuzz all of the emulated state in one go */ > + if (!input_read(s, s, DATA_OFFSET)) Missing blanks. > @@ -761,12 +757,11 @@ static void disable_hooks(struct x86_emulate_ctxt *ctxt) > static void sanitize_input(struct x86_emulate_ctxt *ctxt) > { > struct fuzz_state *s = ctxt->data; > - struct fuzz_corpus *c = s->corpus; > - struct cpu_user_regs *regs = &c->regs; > - unsigned long bitmap = c->options; > + struct cpu_user_regs *regs = ctxt->regs; > + unsigned long bitmap = s->options; > > /* Some hooks can't be disabled. */ > - c->options &= ~((1<<HOOK_read)|(1<<HOOK_insn_fetch)); > + s->options &= ~((1<<HOOK_read)|(1<<HOOK_insn_fetch)); Mind adding the missing blanks here while you touch this? > @@ -834,10 +826,10 @@ int LLVMFuzzerTestOneInput(const uint8_t *data_p, size_t size) > return 1; > } > > - memcpy(&input, data_p, size); > + state.corpus = (void*)data_p; If for any reason the suggested type change can't or shouldn't be done (and hence the cast needs to stay), then please add a blank before * and don't cast away const-ness. Jan
On 10/12/2017 04:16 PM, Jan Beulich wrote: >>>> On 11.10.17 at 19:52, <george.dunlap@citrix.com> wrote: >> --- a/tools/fuzz/x86_instruction_emulator/fuzz-emul.c >> +++ b/tools/fuzz/x86_instruction_emulator/fuzz-emul.c >> @@ -22,34 +22,31 @@ >> >> #define SEG_NUM x86_seg_none >> >> -/* Layout of data expected as fuzzing input. */ >> -struct fuzz_corpus >> +/* >> + * State of the fuzzing harness and emulated cpu. Calculated >> + * initially from the input corpus, and later mutated by the emulation >> + * callbacks (and the emulator itself, in the case of regs). >> + */ >> +struct fuzz_state >> { >> + /* Emulated CPU state */ >> + unsigned long options; >> unsigned long cr[5]; >> uint64_t msr[MSR_INDEX_MAX]; >> - struct cpu_user_regs regs; >> struct segment_register segments[SEG_NUM]; >> - unsigned long options; >> - unsigned char data[INPUT_SIZE]; >> -} input; >> -#define DATA_OFFSET offsetof(struct fuzz_corpus, data) >> + struct cpu_user_regs regs; >> >> -/* >> - * Internal state of the fuzzing harness. Calculated initially from the input >> - * corpus, and later mutates by the emulation callbacks. >> - */ >> -struct fuzz_state >> -{ >> /* Fuzzer's input data. */ >> - struct fuzz_corpus *corpus; >> +#define DATA_OFFSET offsetof(struct fuzz_state, corpus) >> + const unsigned char * corpus; > > Stray blank after *. Also any reason this can't be uint8_t, > matching LLVMFuzzerTestOneInput()'s parameter and making > it possible to avoid the cast you currently use on that > assignment? For some reason I thought this would make things uglier; but it actually works pretty well. >> @@ -646,11 +634,20 @@ static void set_sizes(struct x86_emulate_ctxt *ctxt) >> ctxt->addr_size = ctxt->sp_size = 64; >> else >> { >> - ctxt->addr_size = c->segments[x86_seg_cs].db ? 32 : 16; >> - ctxt->sp_size = c->segments[x86_seg_ss].db ? 32 : 16; >> + ctxt->addr_size = s->segments[x86_seg_cs].db ? 32 : 16; >> + ctxt->sp_size = s->segments[x86_seg_ss].db ? 32 : 16; >> } >> } >> >> +static void setup_state(struct x86_emulate_ctxt *ctxt) >> +{ >> + struct fuzz_state *s = ctxt->data; >> + >> + /* Fuzz all of the emulated state in one go */ >> + if (!input_read(s, s, DATA_OFFSET)) > > Missing blanks. Ack > >> @@ -761,12 +757,11 @@ static void disable_hooks(struct x86_emulate_ctxt *ctxt) >> static void sanitize_input(struct x86_emulate_ctxt *ctxt) >> { >> struct fuzz_state *s = ctxt->data; >> - struct fuzz_corpus *c = s->corpus; >> - struct cpu_user_regs *regs = &c->regs; >> - unsigned long bitmap = c->options; >> + struct cpu_user_regs *regs = ctxt->regs; >> + unsigned long bitmap = s->options; >> >> /* Some hooks can't be disabled. */ >> - c->options &= ~((1<<HOOK_read)|(1<<HOOK_insn_fetch)); >> + s->options &= ~((1<<HOOK_read)|(1<<HOOK_insn_fetch)); > > Mind adding the missing blanks here while you touch this? Like this? s->options &= ~((1<<HOOK_read) | (1<<HOOK_insn_fetch)); Thanks, -George
>>> On 13.10.17 at 11:22, <george.dunlap@citrix.com> wrote: > On 10/12/2017 04:16 PM, Jan Beulich wrote: >>>>> On 11.10.17 at 19:52, <george.dunlap@citrix.com> wrote: >>> @@ -761,12 +757,11 @@ static void disable_hooks(struct x86_emulate_ctxt *ctxt) >>> static void sanitize_input(struct x86_emulate_ctxt *ctxt) >>> { >>> struct fuzz_state *s = ctxt->data; >>> - struct fuzz_corpus *c = s->corpus; >>> - struct cpu_user_regs *regs = &c->regs; >>> - unsigned long bitmap = c->options; >>> + struct cpu_user_regs *regs = ctxt->regs; >>> + unsigned long bitmap = s->options; >>> >>> /* Some hooks can't be disabled. */ >>> - c->options &= ~((1<<HOOK_read)|(1<<HOOK_insn_fetch)); >>> + s->options &= ~((1<<HOOK_read)|(1<<HOOK_insn_fetch)); >> >> Mind adding the missing blanks here while you touch this? > > Like this? > > s->options &= ~((1<<HOOK_read) | (1<<HOOK_insn_fetch)); Even farther (at the same time adding the missing number suffixes): s->options &= ~((1UL << HOOK_read) | (1UL << HOOK_insn_fetch)); Jan
On 10/13/2017 10:54 AM, Jan Beulich wrote: >>>> On 13.10.17 at 11:22, <george.dunlap@citrix.com> wrote: >> On 10/12/2017 04:16 PM, Jan Beulich wrote: >>>>>> On 11.10.17 at 19:52, <george.dunlap@citrix.com> wrote: >>>> @@ -761,12 +757,11 @@ static void disable_hooks(struct x86_emulate_ctxt *ctxt) >>>> static void sanitize_input(struct x86_emulate_ctxt *ctxt) >>>> { >>>> struct fuzz_state *s = ctxt->data; >>>> - struct fuzz_corpus *c = s->corpus; >>>> - struct cpu_user_regs *regs = &c->regs; >>>> - unsigned long bitmap = c->options; >>>> + struct cpu_user_regs *regs = ctxt->regs; >>>> + unsigned long bitmap = s->options; >>>> >>>> /* Some hooks can't be disabled. */ >>>> - c->options &= ~((1<<HOOK_read)|(1<<HOOK_insn_fetch)); >>>> + s->options &= ~((1<<HOOK_read)|(1<<HOOK_insn_fetch)); >>> >>> Mind adding the missing blanks here while you touch this? >> >> Like this? >> >> s->options &= ~((1<<HOOK_read) | (1<<HOOK_insn_fetch)); > > Even farther (at the same time adding the missing number suffixes): > > s->options &= ~((1UL << HOOK_read) | (1UL << HOOK_insn_fetch)); Got it. (I was actually trying to verify the 'snuggly' outer braces, but missed spaces around the '<<'s). -George
diff --git a/tools/fuzz/x86_instruction_emulator/fuzz-emul.c b/tools/fuzz/x86_instruction_emulator/fuzz-emul.c index 964682aa1a..4e3751ce50 100644 --- a/tools/fuzz/x86_instruction_emulator/fuzz-emul.c +++ b/tools/fuzz/x86_instruction_emulator/fuzz-emul.c @@ -22,34 +22,31 @@ #define SEG_NUM x86_seg_none -/* Layout of data expected as fuzzing input. */ -struct fuzz_corpus +/* + * State of the fuzzing harness and emulated cpu. Calculated + * initially from the input corpus, and later mutated by the emulation + * callbacks (and the emulator itself, in the case of regs). + */ +struct fuzz_state { + /* Emulated CPU state */ + unsigned long options; unsigned long cr[5]; uint64_t msr[MSR_INDEX_MAX]; - struct cpu_user_regs regs; struct segment_register segments[SEG_NUM]; - unsigned long options; - unsigned char data[INPUT_SIZE]; -} input; -#define DATA_OFFSET offsetof(struct fuzz_corpus, data) + struct cpu_user_regs regs; -/* - * Internal state of the fuzzing harness. Calculated initially from the input - * corpus, and later mutates by the emulation callbacks. - */ -struct fuzz_state -{ /* Fuzzer's input data. */ - struct fuzz_corpus *corpus; +#define DATA_OFFSET offsetof(struct fuzz_state, corpus) + const unsigned char * corpus; - /* Real amount of data backing corpus->data[]. */ + /* Real amount of data backing corpus[]. */ size_t data_num; - /* Amount of corpus->data[] consumed thus far. */ + /* Amount of corpus[] data consumed thus far. */ size_t data_index; - /* Emulation ops, some of which are disabled based on corpus->options. */ + /* Emulation ops, some of which are disabled based on options. */ struct x86_emulate_ops ops; }; @@ -63,7 +60,7 @@ static inline bool input_read(struct fuzz_state *s, void *dst, size_t size) if ( !input_avail(s, size) ) return false; - memcpy(dst, &s->corpus->data[s->data_index], size); + memcpy(dst, &s->corpus[s->data_index], size); s->data_index += size; return true; @@ -393,11 +390,10 @@ static int fuzz_read_segment( struct x86_emulate_ctxt *ctxt) { const struct fuzz_state *s = ctxt->data; - const struct fuzz_corpus *c = s->corpus; assert(is_x86_user_segment(seg) || is_x86_system_segment(seg)); - *reg = c->segments[seg]; + *reg = s->segments[seg]; return X86EMUL_OKAY; } @@ -408,7 +404,6 @@ static int fuzz_write_segment( struct x86_emulate_ctxt *ctxt) { struct fuzz_state *s = ctxt->data; - struct fuzz_corpus *c = s->corpus; int rc; assert(is_x86_user_segment(seg) || is_x86_system_segment(seg)); @@ -416,7 +411,7 @@ static int fuzz_write_segment( rc = maybe_fail(ctxt, "write_segment", true); if ( rc == X86EMUL_OKAY ) - c->segments[seg] = *reg; + s->segments[seg] = *reg; return rc; } @@ -427,12 +422,11 @@ static int fuzz_read_cr( struct x86_emulate_ctxt *ctxt) { const struct fuzz_state *s = ctxt->data; - const struct fuzz_corpus *c = s->corpus; - if ( reg >= ARRAY_SIZE(c->cr) ) + if ( reg >= ARRAY_SIZE(s->cr) ) return X86EMUL_UNHANDLEABLE; - *val = c->cr[reg]; + *val = s->cr[reg]; return X86EMUL_OKAY; } @@ -443,17 +437,16 @@ static int fuzz_write_cr( struct x86_emulate_ctxt *ctxt) { struct fuzz_state *s = ctxt->data; - struct fuzz_corpus *c = s->corpus; int rc; - if ( reg >= ARRAY_SIZE(c->cr) ) + if ( reg >= ARRAY_SIZE(s->cr) ) return X86EMUL_UNHANDLEABLE; rc = maybe_fail(ctxt, "write_cr", true); if ( rc != X86EMUL_OKAY ) return rc; - c->cr[reg] = val; + s->cr[reg] = val; return X86EMUL_OKAY; } @@ -488,7 +481,6 @@ static int fuzz_read_msr( struct x86_emulate_ctxt *ctxt) { const struct fuzz_state *s = ctxt->data; - const struct fuzz_corpus *c = s->corpus; unsigned int idx; switch ( reg ) @@ -502,10 +494,10 @@ static int fuzz_read_msr( */ return data_read(ctxt, x86_seg_none, "read_msr", val, sizeof(*val)); case MSR_EFER: - *val = c->msr[MSRI_EFER]; + *val = s->msr[MSRI_EFER]; *val &= ~EFER_LMA; - if ( (*val & EFER_LME) && (c->cr[4] & X86_CR4_PAE) && - (c->cr[0] & X86_CR0_PG) ) + if ( (*val & EFER_LME) && (s->cr[4] & X86_CR4_PAE) && + (s->cr[0] & X86_CR0_PG) ) { printf("Setting EFER_LMA\n"); *val |= EFER_LMA; @@ -517,7 +509,7 @@ static int fuzz_read_msr( { if ( msr_index[idx] == reg ) { - *val = c->msr[idx]; + *val = s->msr[idx]; return X86EMUL_OKAY; } } @@ -532,7 +524,6 @@ static int fuzz_write_msr( struct x86_emulate_ctxt *ctxt) { struct fuzz_state *s = ctxt->data; - struct fuzz_corpus *c = s->corpus; unsigned int idx; int rc; @@ -551,7 +542,7 @@ static int fuzz_write_msr( { if ( msr_index[idx] == reg ) { - c->msr[idx] = val; + s->msr[idx] = val; return X86EMUL_OKAY; } } @@ -601,15 +592,14 @@ static void setup_fpu_exception_handler(void) static void dump_state(struct x86_emulate_ctxt *ctxt) { struct fuzz_state *s = ctxt->data; - const struct fuzz_corpus *c = s->corpus; struct cpu_user_regs *regs = ctxt->regs; uint64_t val = 0; printf(" -- State -- \n"); printf("addr / sp size: %d / %d\n", ctxt->addr_size, ctxt->sp_size); - printf(" cr0: %lx\n", c->cr[0]); - printf(" cr3: %lx\n", c->cr[3]); - printf(" cr4: %lx\n", c->cr[4]); + printf(" cr0: %lx\n", s->cr[0]); + printf(" cr3: %lx\n", s->cr[3]); + printf(" cr4: %lx\n", s->cr[4]); printf(" rip: %"PRIx64"\n", regs->rip); @@ -630,15 +620,13 @@ static bool long_mode_active(struct x86_emulate_ctxt *ctxt) static bool in_longmode(struct x86_emulate_ctxt *ctxt) { const struct fuzz_state *s = ctxt->data; - const struct fuzz_corpus *c = s->corpus; - return long_mode_active(ctxt) && c->segments[x86_seg_cs].l; + return long_mode_active(ctxt) && s->segments[x86_seg_cs].l; } static void set_sizes(struct x86_emulate_ctxt *ctxt) { struct fuzz_state *s = ctxt->data; - const struct fuzz_corpus *c = s->corpus; ctxt->lma = long_mode_active(ctxt); @@ -646,11 +634,20 @@ static void set_sizes(struct x86_emulate_ctxt *ctxt) ctxt->addr_size = ctxt->sp_size = 64; else { - ctxt->addr_size = c->segments[x86_seg_cs].db ? 32 : 16; - ctxt->sp_size = c->segments[x86_seg_ss].db ? 32 : 16; + ctxt->addr_size = s->segments[x86_seg_cs].db ? 32 : 16; + ctxt->sp_size = s->segments[x86_seg_ss].db ? 32 : 16; } } +static void setup_state(struct x86_emulate_ctxt *ctxt) +{ + struct fuzz_state *s = ctxt->data; + + /* Fuzz all of the emulated state in one go */ + if (!input_read(s, s, DATA_OFFSET)) + exit(-1); +} + #define CANONICALIZE(x) \ do { \ uint64_t _y = (x); \ @@ -710,8 +707,7 @@ enum { static void disable_hooks(struct x86_emulate_ctxt *ctxt) { struct fuzz_state *s = ctxt->data; - const struct fuzz_corpus *c = s->corpus; - unsigned long bitmap = c->options; + unsigned long bitmap = s->options; /* See also sanitize_input, some hooks can't be disabled. */ MAYBE_DISABLE_HOOK(read); @@ -761,12 +757,11 @@ static void disable_hooks(struct x86_emulate_ctxt *ctxt) static void sanitize_input(struct x86_emulate_ctxt *ctxt) { struct fuzz_state *s = ctxt->data; - struct fuzz_corpus *c = s->corpus; - struct cpu_user_regs *regs = &c->regs; - unsigned long bitmap = c->options; + struct cpu_user_regs *regs = ctxt->regs; + unsigned long bitmap = s->options; /* Some hooks can't be disabled. */ - c->options &= ~((1<<HOOK_read)|(1<<HOOK_insn_fetch)); + s->options &= ~((1<<HOOK_read)|(1<<HOOK_insn_fetch)); /* Zero 'private' entries */ regs->error_code = 0; @@ -780,8 +775,8 @@ static void sanitize_input(struct x86_emulate_ctxt *ctxt) * CR0.PG can't be set if CR0.PE isn't set. Set is more interesting, so * set PE if PG is set. */ - if ( c->cr[0] & X86_CR0_PG ) - c->cr[0] |= X86_CR0_PE; + if ( s->cr[0] & X86_CR0_PG ) + s->cr[0] |= X86_CR0_PE; /* EFLAGS.VM not available in long mode */ if ( long_mode_active(ctxt) ) @@ -790,8 +785,8 @@ static void sanitize_input(struct x86_emulate_ctxt *ctxt) /* EFLAGS.VM implies 16-bit mode */ if ( regs->rflags & X86_EFLAGS_VM ) { - c->segments[x86_seg_cs].db = 0; - c->segments[x86_seg_ss].db = 0; + s->segments[x86_seg_cs].db = 0; + s->segments[x86_seg_ss].db = 0; } } @@ -813,15 +808,12 @@ int LLVMFuzzerTestOneInput(const uint8_t *data_p, size_t size) }; struct x86_emulate_ctxt ctxt = { .data = &state, - .regs = &input.regs, + .regs = &state.regs, .addr_size = 8 * sizeof(void *), .sp_size = 8 * sizeof(void *), }; int rc; - /* Reset all global state variables */ - memset(&input, 0, sizeof(input)); - if ( size <= DATA_OFFSET ) { printf("Input too small\n"); @@ -834,10 +826,10 @@ int LLVMFuzzerTestOneInput(const uint8_t *data_p, size_t size) return 1; } - memcpy(&input, data_p, size); + state.corpus = (void*)data_p; + state.data_num = size; - state.corpus = &input; - state.data_num = size - DATA_OFFSET; + setup_state(&ctxt); sanitize_input(&ctxt);
At the moment we copy data from the input into a struct named 'corpus', then read and write this state (so that it no longer resembles the corpus that we read from). Instead, move all "emulated cpu" state into fuzz_state, and explicitly state that we are expecting to change it. Get rid of 'input', and always read data directly from the pointer passed into the fuzzer. Signed-off-by: George Dunlap <george.dunlap@citrix.com> --- v4: - Reword commit message to make it clear it's not just about the compact state - Get rid of fuzz_corpus entirely, and avoid the unnecessary copy v3: - Move DATA_OFFSET inside the structure - Remove a stray blank line v2: Port over previous changes CC: Ian Jackson <ian.jackson@citrix.com> CC: Wei Liu <wei.liu2@citrix.com> CC: Andrew Cooper <andrew.cooper3@citrix.com> CC: Jan Beulich <jbeulich@suse.com> --- tools/fuzz/x86_instruction_emulator/fuzz-emul.c | 114 +++++++++++------------- 1 file changed, 53 insertions(+), 61 deletions(-)