Message ID | 20190925100015.31510-3-ben.dooks@codethink.co.uk (mailing list archive) |
---|---|
State | Superseded, archived |
Headers | show |
Series | [1/5] validation: ignore temporary ~ files | expand |
On Wed, Sep 25, 2019 at 11:00:12AM +0100, Ben Dooks wrote: > diff --git a/parse.c b/parse.c > index f291e24..583a82c 100644 > --- a/parse.c > +++ b/parse.c > @@ -87,7 +87,7 @@ static attr_t > attribute_address_space, attribute_context, > attribute_designated_init, > attribute_transparent_union, ignore_attribute, > - attribute_mode, attribute_force; > + attribute_mode, attribute_force, attribute_format; I prefer that you simply insert for the attribute without touvhing the others one: + invalid_printf_format_args, > typedef struct symbol *to_mode_t(struct symbol *); > > @@ -136,6 +136,11 @@ static void asm_modifier_inline(struct token *token, unsigned long *mods) > asm_modifier(token, mods, MOD_INLINE); > } > > +/* the types of printf style formatting from __attribute__((format)) */ > +enum { > + FmtPrintf = 0, FmtScanf, > +}; Please change this to: FORMAT_PRINTF, FORMAT_SCANF, > @@ -1172,6 +1195,59 @@ static struct token *attribute_address_space(struct token *token, struct symbol > return token; > } > > +static int invalid_printf_format_args(long long start, long long at) > +{ > + return start < 0 || at < 0 || (start == at && start > 0) || > + (start == 0 && at == 0); > +} The name suggest it is only used for printf format but the code below uses it for all formats, please rename it. I would prefer to have the reverse logics, check if the format is valid and to have a simpler check, something like: static bool validate_attribute_format(...) { return (start > 1) && ((at > start) || at == 0); } but since more validations are done after, I think it's best to simply not use this helper and directly doing the checks and emitting the approriate warning messages when needed ("index smaller than 1", ...). > +static struct token *attribute_format(struct token *token, struct symbol *attr, struct decl_state *ctx) > +{ ... > + ctx->ctype.printf_va_start = start; > + ctx->ctype.printf_msg = at; GCC's manpage call them 'string-index' & 'first-to-check'. Best to keep things coherent and use the same names everywhere, for example 'index' & first' ? > diff --git a/symbol.h b/symbol.h > index ac43b31..7bb6f29 100644 > --- a/symbol.h > +++ b/symbol.h > @@ -103,6 +104,7 @@ struct ctype { > struct context_list *contexts; > struct ident *as; > struct symbol *base_type; > + unsigned short printf_va_start, printf_msg; What about something like: + struct { + unsigned short index; + unsigned short first; + } format; Also the validation should check that these are not bigger than USHORT_MAX. -- Luc
On 20/10/2019 15:57, Luc Van Oostenryck wrote: > On Wed, Sep 25, 2019 at 11:00:12AM +0100, Ben Dooks wrote: >> diff --git a/parse.c b/parse.c >> index f291e24..583a82c 100644 >> --- a/parse.c >> +++ b/parse.c >> @@ -87,7 +87,7 @@ static attr_t >> attribute_address_space, attribute_context, >> attribute_designated_init, >> attribute_transparent_union, ignore_attribute, >> - attribute_mode, attribute_force; >> + attribute_mode, attribute_force, attribute_format; > > I prefer that you simply insert for the attribute without touvhing the > others one: > + invalid_printf_format_args, > >> typedef struct symbol *to_mode_t(struct symbol *); >> >> @@ -136,6 +136,11 @@ static void asm_modifier_inline(struct token *token, unsigned long *mods) >> asm_modifier(token, mods, MOD_INLINE); >> } >> >> +/* the types of printf style formatting from __attribute__((format)) */ >> +enum { >> + FmtPrintf = 0, FmtScanf, >> +}; > > Please change this to: > FORMAT_PRINTF, > FORMAT_SCANF, > >> @@ -1172,6 +1195,59 @@ static struct token *attribute_address_space(struct token *token, struct symbol >> return token; >> } >> >> +static int invalid_printf_format_args(long long start, long long at) >> +{ >> + return start < 0 || at < 0 || (start == at && start > 0) || >> + (start == 0 && at == 0); >> +} > > The name suggest it is only used for printf format but the code below > uses it for all formats, please rename it. > I would prefer to have the reverse logics, check if the format is valid > and to have a simpler check, something like: > static bool validate_attribute_format(...) > { > return (start > 1) && ((at > start) || at == 0); > } > but since more validations are done after, I think it's best to simply > not use this helper and directly doing the checks and emitting the > approriate warning messages when needed ("index smaller than 1", ...). > >> +static struct token *attribute_format(struct token *token, struct symbol *attr, struct decl_state *ctx) >> +{ > ... > >> + ctx->ctype.printf_va_start = start; >> + ctx->ctype.printf_msg = at; > > GCC's manpage call them 'string-index' & 'first-to-check'. Best to keep things > coherent and use the same names everywhere, for example 'index' & first' ? ok. >> diff --git a/symbol.h b/symbol.h >> index ac43b31..7bb6f29 100644 >> --- a/symbol.h >> +++ b/symbol.h >> @@ -103,6 +104,7 @@ struct ctype { >> struct context_list *contexts; >> struct ident *as; >> struct symbol *base_type; >> + unsigned short printf_va_start, printf_msg; > > What about something like: > + struct { > + unsigned short index; > + unsigned short first; > + } format; ok, done. > Also the validation should check that these are not bigger than > USHORT_MAX. ok, will do.
diff --git a/parse.c b/parse.c index f291e24..583a82c 100644 --- a/parse.c +++ b/parse.c @@ -87,7 +87,7 @@ static attr_t attribute_address_space, attribute_context, attribute_designated_init, attribute_transparent_union, ignore_attribute, - attribute_mode, attribute_force; + attribute_mode, attribute_force, attribute_format; typedef struct symbol *to_mode_t(struct symbol *); @@ -136,6 +136,11 @@ static void asm_modifier_inline(struct token *token, unsigned long *mods) asm_modifier(token, mods, MOD_INLINE); } +/* the types of printf style formatting from __attribute__((format)) */ +enum { + FmtPrintf = 0, FmtScanf, +}; + static struct symbol_op typedef_op = { .type = KW_MODIFIER, .declarator = typedef_specifier, @@ -386,6 +391,10 @@ static struct symbol_op attr_force_op = { .attribute = attribute_force, }; +static struct symbol_op attr_format = { + .attribute = attribute_format, +}; + static struct symbol_op address_space_op = { .attribute = attribute_address_space, }; @@ -445,6 +454,16 @@ static struct symbol_op mode_word_op = { .to_mode = to_word_mode }; +static struct symbol_op attr_printf_op = { + .type = KW_FORMAT, + .class = FmtPrintf, +}; + +static struct symbol_op attr_scanf_op = { + .type = KW_FORMAT, + .class = FmtScanf, +}; + /* Using NS_TYPEDEF will also make the keyword a reserved one */ static struct init_keyword { const char *name; @@ -570,6 +589,10 @@ static struct init_keyword { {"externally_visible", NS_KEYWORD, .op = &ext_visible_op }, {"__externally_visible__", NS_KEYWORD, .op = &ext_visible_op }, + { "format", NS_KEYWORD, .op = &attr_format }, + { "printf", NS_KEYWORD, .op = &attr_printf_op }, + { "scanf", NS_KEYWORD, .op = &attr_scanf_op }, + { "mode", NS_KEYWORD, .op = &mode_op }, { "__mode__", NS_KEYWORD, .op = &mode_op }, { "QI", NS_KEYWORD, .op = &mode_QI_op }, @@ -1172,6 +1195,59 @@ static struct token *attribute_address_space(struct token *token, struct symbol return token; } +static int invalid_printf_format_args(long long start, long long at) +{ + return start < 0 || at < 0 || (start == at && start > 0) || + (start == 0 && at == 0); +} + +static struct token *attribute_format(struct token *token, struct symbol *attr, struct decl_state *ctx) +{ + struct expression *args[3]; + struct symbol *fmt_sym = NULL; + + /* expecting format ( type, start, va_args at) */ + + token = expect(token, '(', "after format attribute"); + if (token_type(token) == TOKEN_IDENT) + fmt_sym = lookup_keyword(token->ident, NS_KEYWORD); + if (fmt_sym) + if (!fmt_sym->op || fmt_sym->op->type != KW_FORMAT) + fmt_sym = NULL; + + token = conditional_expression(token, &args[0]); + token = expect(token, ',', "format attribute type"); + token = conditional_expression(token, &args[1]); + token = expect(token, ',', "format attribute type position"); + token = conditional_expression(token, &args[2]); + token = expect(token, ')', "format attribute arg position"); + + if (!fmt_sym || !args[0] || !args[1] || !args[2]) { + warning(token->pos, "incorrect format attribute"); + } else if (fmt_sym->op->class != FmtPrintf) { + /* skip anything that isn't printf for the moment */ + warning(token->pos, "only printf format attribute supported"); + } else { + long long start, at; + + start = get_expression_value(args[2]); + at = get_expression_value(args[1]); + + if (invalid_printf_format_args(start, at)) { + warning(token->pos, "bad format positions"); + } else if (start == 0) { + /* nothing to do here, is va_list function */ + } else if (start < at) { + warning(token->pos, "format cannot be after va_args"); + } else { + ctx->ctype.printf_va_start = start; + ctx->ctype.printf_msg = at; + } + } + + return token; +} + static struct symbol *to_QI_mode(struct symbol *ctype) { if (ctype->ctype.base_type != &int_type) @@ -2981,6 +3057,9 @@ struct token *external_declaration(struct token *token, struct symbol_list **lis if (!(decl->ctype.modifiers & MOD_STATIC)) decl->ctype.modifiers |= MOD_EXTERN; + + base_type->ctype.printf_msg = decl->ctype.printf_msg; + base_type->ctype.printf_va_start = decl->ctype.printf_va_start; } else if (base_type == &void_ctype && !(decl->ctype.modifiers & MOD_EXTERN)) { sparse_error(token->pos, "void declaration"); } diff --git a/symbol.h b/symbol.h index ac43b31..7bb6f29 100644 --- a/symbol.h +++ b/symbol.h @@ -86,6 +86,7 @@ enum keyword { KW_SHORT = 1 << 7, KW_LONG = 1 << 8, KW_EXACT = 1 << 9, + KW_FORMAT = 1 << 10, }; struct context { @@ -103,6 +104,7 @@ struct ctype { struct context_list *contexts; struct ident *as; struct symbol *base_type; + unsigned short printf_va_start, printf_msg; }; struct decl_state {
Add code to parse the __attribute__((format)) used to indicate that a variadic function takes a printf-style format string and where those are. Save the data in ctype ready for checking when such an function is encoutered. Signed-off-by: Ben Dooks <ben.dooks@codethink.co.uk> --- Fixes since v1: - moved to using ctype in base_type to store infromation - fixed formatting issues - updated check for bad format arguments - reduced the line count to unsigned short to save space Fixes since v2: - correctly use the decl->ctype to store printf information - fixed issues with checking format positions for va_list code - parse but ignore scanf formatting for now Fixes since v4: - deal with function pointers losing format info Fixes since v5: - remove function pointer attribute support --- parse.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++++++- symbol.h | 2 ++ 2 files changed, 82 insertions(+), 1 deletion(-)