Message ID | 20181026152632.30318-4-ben.dooks@codethink.co.uk (mailing list archive) |
---|---|
State | Superseded, archived |
Headers | show |
Series | [1/3] initial parsing of __attribute__((format)) | expand |
On 26/10/18 16:26, Ben Dooks wrote: > +static int decompose_format_printf(const char *string, struct symbol **result) > +{ > + int count = 0; > + > + for (; string[0] != '\0'; string++) { > + if (string[0] == '%') { > + int len = 0; > + struct symbol *sym = NULL; > + if (string[1] == '%') { > + string++; > + continue; > + } > + > + /* get rid of any formatting width bits */ > + while (isdigit(string[1]) || string[1] == '+' || string[1] == '-') > + string++; > + > + switch (string[1]) { > + case 'C': > + /* TODO - same as lc */ > + break; > + case 'c': > + /* TODO - can take l modifier */ > + sym = &char_ctype; > + break; > + case 'f': > + case 'g': > + sym = &double_ctype; > + break; > + case 'h': > + /* TODO hh */ > + len = -1; > + break; > + case 'j': /* ignore intmax/uintmax for the moment */ > + break; > + case 'L': > + sym = &ldouble_ctype; > + break; > + case 'l': > + len++; > + break; > + case 'p': > + /* TODO - deal with void * not being de-referenced in some cases*/ > + sym = &ptr_ctype; > + break; > + case 'q': For the case of %p, is MOD_NODEREF sufficient, or should we also have a flag (either MOD_NOADDRSPACE) or some other way of specifying the type is safe for any address space? I added a ptr_ctype_noderef, but still get: test.c:27:37: warning: incorrect type in argument 3 (different address spaces) test.c:27:37: expected string test.c:27:37: got void [noderef] <asn:1>*b
On 26/10/18 18:28, Ben Dooks wrote: > On 26/10/18 16:26, Ben Dooks wrote: > >> +static int decompose_format_printf(const char *string, struct symbol >> **result) >> +{ >> + int count = 0; >> + >> + for (; string[0] != '\0'; string++) { >> + if (string[0] == '%') { >> + int len = 0; >> + struct symbol *sym = NULL; >> + if (string[1] == '%') { >> + string++; >> + continue; >> + } >> + >> + /* get rid of any formatting width bits */ >> + while (isdigit(string[1]) || string[1] == '+' || >> string[1] == '-') >> + string++; >> + >> + switch (string[1]) { >> + case 'C': >> + /* TODO - same as lc */ >> + break; >> + case 'c': >> + /* TODO - can take l modifier */ >> + sym = &char_ctype; >> + break; >> + case 'f': >> + case 'g': >> + sym = &double_ctype; >> + break; >> + case 'h': >> + /* TODO hh */ >> + len = -1; >> + break; >> + case 'j': /* ignore intmax/uintmax for the moment */ >> + break; >> + case 'L': >> + sym = &ldouble_ctype; >> + break; >> + case 'l': >> + len++; >> + break; >> + case 'p': >> + /* TODO - deal with void * not being de-referenced in >> some cases*/ >> + sym = &ptr_ctype; >> + break; >> + case 'q': > > For the case of %p, is MOD_NODEREF sufficient, or should we also have > a flag (either MOD_NOADDRSPACE) or some other way of specifying the > type is safe for any address space? > > I added a ptr_ctype_noderef, but still get: > > test.c:27:37: warning: incorrect type in argument 3 (different address > spaces) > test.c:27:37: expected string > test.c:27:37: got void [noderef] <asn:1>*b I meant: test.c:28:37: warning: incorrect type in argument 3 (different address spaces) test.c:28:37: expected void [noderef] * test.c:28:37: got void [noderef] <asn:1>*b
On Fri, Oct 26, 2018 at 04:26:32PM +0100, Ben Dooks wrote: > --- > evaluate.c | 158 ++++++++++++++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 157 insertions(+), 1 deletion(-) > > diff --git a/evaluate.c b/evaluate.c > index b96696d..82ddf9f 100644 > --- a/evaluate.c > +++ b/evaluate.c > @@ -2243,11 +2243,154 @@ static struct symbol *evaluate_alignof(struct expression *expr) > return size_t_ctype; > } > > +static int decompose_format_printf(const char *string, struct symbol **result) > +{ > + int count = 0; > + > + for (; string[0] != '\0'; string++) { > + if (string[0] == '%') { I would prefer to write this like: struct symbol *sym = NULL; int len = 0; if (string[0] == '%') continue; ... to spare one indentation but it's not really important. > +static int evaluate_format_printf(struct symbol *fn, struct expression *expr, struct symbol ***result) > +{ > + const char *fmt_string = NULL; > + > + if (!expr) > + return -1; > + if (expr->string && expr->string->length) > + fmt_string = expr->string->data; > + if (!fmt_string) { > + struct symbol *sym = evaluate_expression(expr); > + > + /* attempt to find initialiser for this */ > + if (sym && sym->initializer && sym->initializer->string) > + fmt_string = sym->initializer->string->data; > + } > + > + if (fmt_string) { > + struct symbol **syms = NULL; > + int count; > + > + count = decompose_format_printf(fmt_string, NULL); > + if (count <= 0) > + return count; > + > + syms = calloc(sizeof(struct symbol *), count); Sparse make heavy use of pointer lists and I think it better/easier to use them here too instead of parsing the string twice and using calloc/free. -- Luc
On Fri, Oct 26, 2018 at 06:32:05PM +0100, Ben Dooks wrote: > On 26/10/18 18:28, Ben Dooks wrote: > > On 26/10/18 16:26, Ben Dooks wrote: > > > > > + case 'p': > > > + /* TODO - deal with void * not being > > > de-referenced in some cases*/ > > > + sym = &ptr_ctype; > > > + break; > > > + case 'q': > > > > For the case of %p, is MOD_NODEREF sufficient, or should we also have > > a flag (either MOD_NOADDRSPACE) or some other way of specifying the > > type is safe for any address space? > > > > I added a ptr_ctype_noderef, but still get: > > > > test.c:27:37: warning: incorrect type in argument 3 (different > > address spaces) > > test.c:27:37: expected string > > test.c:27:37: got void [noderef] <asn:1>*b > > I meant: > > test.c:28:37: warning: incorrect type in argument 3 (different > address spaces) > test.c:28:37: expected void [noderef] * > test.c:28:37: got void [noderef] <asn:1>*b For the type checking, there is no such thing as a type safe for any address space. You will probably need to special-case it, For example defining an abstract/generic pointer type (gen_ptr_type) and then doing something like: if (i >= fn->printf_va_start && i <= variadic_limit) target = variadic[i - fn->printf_va_start]; else target = argtype; if (!target) { ... + } else if (target == &gen_ptr_type) { + ... } else if (!target->forced_arg){ -- Luc
On Fri, Oct 26, 2018 at 04:26:32PM +0100, Ben Dooks wrote: > --- > evaluate.c | 158 ++++++++++++++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 157 insertions(+), 1 deletion(-) > > diff --git a/evaluate.c b/evaluate.c > index b96696d..82ddf9f 100644 > --- a/evaluate.c > +++ b/evaluate.c > @@ -2243,11 +2243,154 @@ static struct symbol *evaluate_alignof(struct expression *expr) > return size_t_ctype; > } > > +static int decompose_format_printf(const char *string, struct symbol **result) > +{ > + int count = 0; > + > + for (; string[0] != '\0'; string++) { > + if (string[0] == '%') { > + int len = 0; > + struct symbol *sym = NULL; > + if (string[1] == '%') { > + string++; > + continue; > + } > + > + /* get rid of any formatting width bits */ > + while (isdigit(string[1]) || string[1] == '+' || string[1] == '-') > + string++; > + > + switch (string[1]) { > + case 'C': > + /* TODO - same as lc */ > + break; > + case 'c': > + /* TODO - can take l modifier */ > + sym = &char_ctype; > + break; > + case 'f': > + case 'g': > + sym = &double_ctype; > + break; > + case 'h': > + /* TODO hh */ > + len = -1; > + break; > + case 'j': /* ignore intmax/uintmax for the moment */ > + break; > + case 'L': > + sym = &ldouble_ctype; > + break; > + case 'l': > + len++; > + break; > + case 'p': > + /* TODO - deal with void * not being de-referenced in some cases*/ > + sym = &ptr_ctype; > + break; > + case 'q': > + len = 2; > + break; > + case 's': > + sym = &string_ctype; > + break; > + case 'n': > + /* TODO - actually pointer to integer */ > + sym = &ptr_ctype; > + break; > + /* note, d is out of alpha order */ > + case 'd': > + switch (len) { > + case -1: sym = &short_ctype; break; > + case 0: sym = &int_ctype; break; > + case 1: sym = &long_ctype; break; > + case 2: sym = &llong_ctype; break; > + case 3: sym = &lllong_ctype; break; > + } > + break; > + case 'u': > + switch (len) { > + case -1: sym = &ushort_ctype; break; > + case 0: sym = &uint_ctype; break; > + case 1: sym = &ulong_ctype; break; > + case 2: sym = &ullong_ctype; break; > + case 3: sym = &ulllong_ctype; break; > + } > + break; > + case 'x': > + case 'X': > + switch (len) { > + case 0: sym = &uint_ctype; break; > + case 1: sym = &ulong_ctype; break; > + case 2: sym = &ullong_ctype; break; > + case 3: sym = &ulllong_ctype; break; > + } > + break; This can be simplified into: + case 'd': + case 'u': + case 'x': + case 'X': + switch (len) { + case -1: sym = &short_ctype; break; + case 0: sym = &int_ctype; break; + case 1: sym = &long_ctype; break; + case 2: sym = &llong_ctype; break; + case 3: sym = &lllong_ctype; break; + } + break; because it's irrelevant if the argument is unsigned or not. -- Luc
On Fri, Oct 26, 2018 at 04:26:32PM +0100, Ben Dooks wrote: > @@ -2259,7 +2402,18 @@ static int evaluate_arguments(struct symbol *fn, struct expression_list *head) > if (!ctype) > return 0; > > - target = argtype; > + if (i == fn->printf_msg) { > + int ret = evaluate_format_printf(fn, *p, &variadic); > + if (ret < 0) > + warning((*p)->pos, "cannot parse format"); > + else if (ret > 0) > + variadic_limit = fn->printf_va_start + ret; > + } > + > + if (i >= fn->printf_va_start && i <= variadic_limit) There is an off-by-one error here. The test should be: + if (i >= fn->printf_va_start && i < variadic_limit) But even better to replace this with a struct symbol_list. -- Luc
diff --git a/evaluate.c b/evaluate.c index b96696d..82ddf9f 100644 --- a/evaluate.c +++ b/evaluate.c @@ -2243,11 +2243,154 @@ static struct symbol *evaluate_alignof(struct expression *expr) return size_t_ctype; } +static int decompose_format_printf(const char *string, struct symbol **result) +{ + int count = 0; + + for (; string[0] != '\0'; string++) { + if (string[0] == '%') { + int len = 0; + struct symbol *sym = NULL; + if (string[1] == '%') { + string++; + continue; + } + + /* get rid of any formatting width bits */ + while (isdigit(string[1]) || string[1] == '+' || string[1] == '-') + string++; + + switch (string[1]) { + case 'C': + /* TODO - same as lc */ + break; + case 'c': + /* TODO - can take l modifier */ + sym = &char_ctype; + break; + case 'f': + case 'g': + sym = &double_ctype; + break; + case 'h': + /* TODO hh */ + len = -1; + break; + case 'j': /* ignore intmax/uintmax for the moment */ + break; + case 'L': + sym = &ldouble_ctype; + break; + case 'l': + len++; + break; + case 'p': + /* TODO - deal with void * not being de-referenced in some cases*/ + sym = &ptr_ctype; + break; + case 'q': + len = 2; + break; + case 's': + sym = &string_ctype; + break; + case 'n': + /* TODO - actually pointer to integer */ + sym = &ptr_ctype; + break; + /* note, d is out of alpha order */ + case 'd': + switch (len) { + case -1: sym = &short_ctype; break; + case 0: sym = &int_ctype; break; + case 1: sym = &long_ctype; break; + case 2: sym = &llong_ctype; break; + case 3: sym = &lllong_ctype; break; + } + break; + case 'u': + switch (len) { + case -1: sym = &ushort_ctype; break; + case 0: sym = &uint_ctype; break; + case 1: sym = &ulong_ctype; break; + case 2: sym = &ullong_ctype; break; + case 3: sym = &ulllong_ctype; break; + } + break; + case 'x': + case 'X': + switch (len) { + case 0: sym = &uint_ctype; break; + case 1: sym = &ulong_ctype; break; + case 2: sym = &ullong_ctype; break; + case 3: sym = &ulllong_ctype; break; + } + break; + case 'z': + case 'Z': + sym = &uint_ctype; /* TODO */ + break; + } + + if (result && sym) + *result++ = sym; + if (sym) + count++; + + while (string[0] != ' ' && string[0] != '\0') + string++; + + string--; + } + } + + return count; +} + + +static int evaluate_format_printf(struct symbol *fn, struct expression *expr, struct symbol ***result) +{ + const char *fmt_string = NULL; + + if (!expr) + return -1; + if (expr->string && expr->string->length) + fmt_string = expr->string->data; + if (!fmt_string) { + struct symbol *sym = evaluate_expression(expr); + + /* attempt to find initialiser for this */ + if (sym && sym->initializer && sym->initializer->string) + fmt_string = sym->initializer->string->data; + } + + if (fmt_string) { + struct symbol **syms = NULL; + int count; + + count = decompose_format_printf(fmt_string, NULL); + if (count <= 0) + return count; + + syms = calloc(sizeof(struct symbol *), count); + if (!syms) + return -1; + + count = decompose_format_printf(fmt_string, syms); + *result = syms; + return count; + } + + return -1; +} + static int evaluate_arguments(struct symbol *fn, struct expression_list *head) { struct expression *expr; struct symbol_list *argument_types = fn->arguments; + struct symbol **variadic = NULL; struct symbol *argtype; + int variadic_limit = 0; int i = 1; PREPARE_PTR_LIST(argument_types, argtype); @@ -2259,7 +2402,18 @@ static int evaluate_arguments(struct symbol *fn, struct expression_list *head) if (!ctype) return 0; - target = argtype; + if (i == fn->printf_msg) { + int ret = evaluate_format_printf(fn, *p, &variadic); + if (ret < 0) + warning((*p)->pos, "cannot parse format"); + else if (ret > 0) + variadic_limit = fn->printf_va_start + ret; + } + + if (i >= fn->printf_va_start && i <= variadic_limit) + target = variadic[i - fn->printf_va_start]; + else + target = argtype; if (!target) { struct symbol *type; int class = classify_type(ctype, &type); @@ -2286,6 +2440,8 @@ static int evaluate_arguments(struct symbol *fn, struct expression_list *head) NEXT_PTR_LIST(argtype); } END_FOR_EACH_PTR(expr); FINISH_PTR_LIST(argtype); + + free(variadic); return 1; }