diff mbox series

[2/5] parse: initial parsing of __attribute__((format))

Message ID 20190925100015.31510-3-ben.dooks@codethink.co.uk (mailing list archive)
State Superseded, archived
Headers show
Series [1/5] validation: ignore temporary ~ files | expand

Commit Message

Ben Dooks Sept. 25, 2019, 10 a.m. UTC
Add code to parse the __attribute__((format)) used to indicate that
a variadic function takes a printf-style format string and where
those are. Save the data in ctype ready for checking when such an
function is encoutered.

Signed-off-by: Ben Dooks <ben.dooks@codethink.co.uk>
---
Fixes since v1:
- moved to using ctype in base_type to store infromation
- fixed formatting issues
- updated check for bad format arguments
- reduced the line count to unsigned short to save space

Fixes since v2:
- correctly use the decl->ctype to store printf information
- fixed issues with checking format positions for va_list code
- parse but ignore scanf formatting for now

Fixes since v4:
- deal with function pointers losing format info

Fixes since v5:
- remove function pointer attribute support
---
 parse.c  | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 symbol.h |  2 ++
 2 files changed, 82 insertions(+), 1 deletion(-)

Comments

Luc Van Oostenryck Oct. 20, 2019, 2:57 p.m. UTC | #1
On Wed, Sep 25, 2019 at 11:00:12AM +0100, Ben Dooks wrote:
> diff --git a/parse.c b/parse.c
> index f291e24..583a82c 100644
> --- a/parse.c
> +++ b/parse.c
> @@ -87,7 +87,7 @@ static attr_t
>  	attribute_address_space, attribute_context,
>  	attribute_designated_init,
>  	attribute_transparent_union, ignore_attribute,
> -	attribute_mode, attribute_force;
> +	attribute_mode, attribute_force, attribute_format;

I prefer that you simply insert for the attribute without touvhing the
others one:
	+ invalid_printf_format_args,

>  typedef struct symbol *to_mode_t(struct symbol *);
>  
> @@ -136,6 +136,11 @@ static void asm_modifier_inline(struct token *token, unsigned long *mods)
>  	asm_modifier(token, mods, MOD_INLINE);
>  }
>  
> +/* the types of printf style formatting from __attribute__((format)) */
> +enum {
> +	FmtPrintf = 0, FmtScanf,
> +};

Please change this to:
	FORMAT_PRINTF,
	FORMAT_SCANF,

> @@ -1172,6 +1195,59 @@ static struct token *attribute_address_space(struct token *token, struct symbol
>  	return token;
>  }
>  
> +static int invalid_printf_format_args(long long start, long long at)
> +{
> +	return start < 0 || at < 0 || (start == at && start > 0) ||
> +		(start == 0 && at == 0);
> +}

The name suggest it is only used for printf format but the code below
uses it for all formats, please rename it.
I would prefer to have the reverse logics, check if the format is valid
and to have a simpler check, something like:
	static bool validate_attribute_format(...)
	{
		return (start > 1) && ((at > start) || at == 0);
	}
but since more validations are done after, I think it's best to simply
not use this helper and directly doing the checks and emitting the
approriate warning messages when needed ("index smaller than 1", ...).

> +static struct token *attribute_format(struct token *token, struct symbol *attr, struct decl_state *ctx)
> +{
...

> +			ctx->ctype.printf_va_start = start;
> +			ctx->ctype.printf_msg = at;

GCC's manpage call them 'string-index' & 'first-to-check'. Best to keep things
coherent and use the same names everywhere, for example 'index' & first' ?

> diff --git a/symbol.h b/symbol.h
> index ac43b31..7bb6f29 100644
> --- a/symbol.h
> +++ b/symbol.h
> @@ -103,6 +104,7 @@ struct ctype {
>  	struct context_list *contexts;
>  	struct ident *as;
>  	struct symbol *base_type;
> +	unsigned short printf_va_start, printf_msg;

What about something like:
+	struct {
+		unsigned short index;
+		unsigned short first;
+	} format;

Also the validation should check that these are not bigger than
USHORT_MAX.

-- Luc
Ben Dooks Oct. 23, 2019, 4:28 p.m. UTC | #2
On 20/10/2019 15:57, Luc Van Oostenryck wrote:
> On Wed, Sep 25, 2019 at 11:00:12AM +0100, Ben Dooks wrote:
>> diff --git a/parse.c b/parse.c
>> index f291e24..583a82c 100644
>> --- a/parse.c
>> +++ b/parse.c
>> @@ -87,7 +87,7 @@ static attr_t
>>   	attribute_address_space, attribute_context,
>>   	attribute_designated_init,
>>   	attribute_transparent_union, ignore_attribute,
>> -	attribute_mode, attribute_force;
>> +	attribute_mode, attribute_force, attribute_format;
> 
> I prefer that you simply insert for the attribute without touvhing the
> others one:
> 	+ invalid_printf_format_args,
> 
>>   typedef struct symbol *to_mode_t(struct symbol *);
>>   
>> @@ -136,6 +136,11 @@ static void asm_modifier_inline(struct token *token, unsigned long *mods)
>>   	asm_modifier(token, mods, MOD_INLINE);
>>   }
>>   
>> +/* the types of printf style formatting from __attribute__((format)) */
>> +enum {
>> +	FmtPrintf = 0, FmtScanf,
>> +};
> 
> Please change this to:
> 	FORMAT_PRINTF,
> 	FORMAT_SCANF,
> 
>> @@ -1172,6 +1195,59 @@ static struct token *attribute_address_space(struct token *token, struct symbol
>>   	return token;
>>   }
>>   
>> +static int invalid_printf_format_args(long long start, long long at)
>> +{
>> +	return start < 0 || at < 0 || (start == at && start > 0) ||
>> +		(start == 0 && at == 0);
>> +}
> 
> The name suggest it is only used for printf format but the code below
> uses it for all formats, please rename it.
> I would prefer to have the reverse logics, check if the format is valid
> and to have a simpler check, something like:
> 	static bool validate_attribute_format(...)
> 	{
> 		return (start > 1) && ((at > start) || at == 0);
> 	}
> but since more validations are done after, I think it's best to simply
> not use this helper and directly doing the checks and emitting the
> approriate warning messages when needed ("index smaller than 1", ...).
> 
>> +static struct token *attribute_format(struct token *token, struct symbol *attr, struct decl_state *ctx)
>> +{
> ...
> 
>> +			ctx->ctype.printf_va_start = start;
>> +			ctx->ctype.printf_msg = at;
> 
> GCC's manpage call them 'string-index' & 'first-to-check'. Best to keep things
> coherent and use the same names everywhere, for example 'index' & first' ?

ok.

>> diff --git a/symbol.h b/symbol.h
>> index ac43b31..7bb6f29 100644
>> --- a/symbol.h
>> +++ b/symbol.h
>> @@ -103,6 +104,7 @@ struct ctype {
>>   	struct context_list *contexts;
>>   	struct ident *as;
>>   	struct symbol *base_type;
>> +	unsigned short printf_va_start, printf_msg;
> 
> What about something like:
> +	struct {
> +		unsigned short index;
> +		unsigned short first;
> +	} format;

ok, done.

> Also the validation should check that these are not bigger than
> USHORT_MAX.

ok, will do.
diff mbox series

Patch

diff --git a/parse.c b/parse.c
index f291e24..583a82c 100644
--- a/parse.c
+++ b/parse.c
@@ -87,7 +87,7 @@  static attr_t
 	attribute_address_space, attribute_context,
 	attribute_designated_init,
 	attribute_transparent_union, ignore_attribute,
-	attribute_mode, attribute_force;
+	attribute_mode, attribute_force, attribute_format;
 
 typedef struct symbol *to_mode_t(struct symbol *);
 
@@ -136,6 +136,11 @@  static void asm_modifier_inline(struct token *token, unsigned long *mods)
 	asm_modifier(token, mods, MOD_INLINE);
 }
 
+/* the types of printf style formatting from __attribute__((format)) */
+enum {
+	FmtPrintf = 0, FmtScanf,
+};
+
 static struct symbol_op typedef_op = {
 	.type = KW_MODIFIER,
 	.declarator = typedef_specifier,
@@ -386,6 +391,10 @@  static struct symbol_op attr_force_op = {
 	.attribute = attribute_force,
 };
 
+static struct symbol_op attr_format = {
+	.attribute = attribute_format,
+};
+
 static struct symbol_op address_space_op = {
 	.attribute = attribute_address_space,
 };
@@ -445,6 +454,16 @@  static struct symbol_op mode_word_op = {
 	.to_mode = to_word_mode
 };
 
+static struct symbol_op attr_printf_op = {
+	.type	= KW_FORMAT,
+	.class	= FmtPrintf,
+};
+
+static struct symbol_op attr_scanf_op = {
+	.type	= KW_FORMAT,
+	.class	= FmtScanf,
+};
+
 /* Using NS_TYPEDEF will also make the keyword a reserved one */
 static struct init_keyword {
 	const char *name;
@@ -570,6 +589,10 @@  static struct init_keyword {
 	{"externally_visible",	NS_KEYWORD,	.op = &ext_visible_op },
 	{"__externally_visible__",	NS_KEYWORD,	.op = &ext_visible_op },
 
+	{ "format",	NS_KEYWORD,	.op = &attr_format },
+	{ "printf",	NS_KEYWORD,	.op = &attr_printf_op },
+	{ "scanf",	NS_KEYWORD,	.op = &attr_scanf_op },
+
 	{ "mode",	NS_KEYWORD,	.op = &mode_op },
 	{ "__mode__",	NS_KEYWORD,	.op = &mode_op },
 	{ "QI",		NS_KEYWORD,	.op = &mode_QI_op },
@@ -1172,6 +1195,59 @@  static struct token *attribute_address_space(struct token *token, struct symbol
 	return token;
 }
 
+static int invalid_printf_format_args(long long start, long long at)
+{
+	return start < 0 || at < 0 || (start == at && start > 0) ||
+		(start == 0 && at == 0);
+}
+
+static struct token *attribute_format(struct token *token, struct symbol *attr, struct decl_state *ctx)
+{
+	struct expression *args[3];
+	struct symbol *fmt_sym = NULL;
+
+	/* expecting format ( type, start, va_args at) */
+
+	token = expect(token, '(', "after format attribute");
+	if (token_type(token) == TOKEN_IDENT)
+		fmt_sym = lookup_keyword(token->ident, NS_KEYWORD);
+	if (fmt_sym)
+		if (!fmt_sym->op || fmt_sym->op->type != KW_FORMAT)
+			fmt_sym = NULL;
+
+	token = conditional_expression(token, &args[0]);
+	token = expect(token, ',', "format attribute type");
+	token = conditional_expression(token, &args[1]);
+	token = expect(token, ',', "format attribute type position");
+	token = conditional_expression(token, &args[2]);
+	token = expect(token, ')', "format attribute arg position");
+
+	if (!fmt_sym || !args[0] || !args[1] || !args[2]) {
+		warning(token->pos, "incorrect format attribute");
+	} else if (fmt_sym->op->class != FmtPrintf) {
+		/* skip anything that isn't printf for the moment */
+		warning(token->pos, "only printf format attribute supported");
+	} else {
+		long long start, at;
+
+		start = get_expression_value(args[2]);
+		at = get_expression_value(args[1]);
+
+		if (invalid_printf_format_args(start, at)) {
+			warning(token->pos, "bad format positions");
+		} else if (start == 0) {
+			/* nothing to do here, is va_list function */
+		} else if (start < at) {
+			warning(token->pos, "format cannot be after va_args");
+		} else {
+			ctx->ctype.printf_va_start = start;
+			ctx->ctype.printf_msg = at;
+		}
+	}
+
+	return token;
+}
+
 static struct symbol *to_QI_mode(struct symbol *ctype)
 {
 	if (ctype->ctype.base_type != &int_type)
@@ -2981,6 +3057,9 @@  struct token *external_declaration(struct token *token, struct symbol_list **lis
 
 		if (!(decl->ctype.modifiers & MOD_STATIC))
 			decl->ctype.modifiers |= MOD_EXTERN;
+
+		base_type->ctype.printf_msg = decl->ctype.printf_msg;
+		base_type->ctype.printf_va_start = decl->ctype.printf_va_start;
 	} else if (base_type == &void_ctype && !(decl->ctype.modifiers & MOD_EXTERN)) {
 		sparse_error(token->pos, "void declaration");
 	}
diff --git a/symbol.h b/symbol.h
index ac43b31..7bb6f29 100644
--- a/symbol.h
+++ b/symbol.h
@@ -86,6 +86,7 @@  enum keyword {
 	KW_SHORT	= 1 << 7,
 	KW_LONG		= 1 << 8,
 	KW_EXACT	= 1 << 9,
+	KW_FORMAT	= 1 << 10,
 };
 
 struct context {
@@ -103,6 +104,7 @@  struct ctype {
 	struct context_list *contexts;
 	struct ident *as;
 	struct symbol *base_type;
+	unsigned short printf_va_start, printf_msg;
 };
 
 struct decl_state {