diff mbox series

[3/5] initial parsing of __attribute__((format))

Message ID 20181029153952.13927-4-ben.dooks@codethink.co.uk (mailing list archive)
State Superseded, archived
Headers show
Series [1/5] tokenize: check show_string() for NULL pointer | expand

Commit Message

Ben Dooks Oct. 29, 2018, 3:39 p.m. UTC
Add code to parse the __attribute__((format)) used to indicate that
a variadic function takes a printf-style format string and where
those are. Save the data in ctype ready for checking when such an
function is encoutered.

Signed-off-by: Ben Dooks <ben.dooks@codethink.co.uk>
---
Fixes since v1:
- moved to using ctype in base_type to store infromation
- fixed formatting issues
- updated check for bad format arguments
- reduced the line count to unsigned short to save space

Notes:
- What to do when base_type is not set... current code doesn't work
---
 parse.c  | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 symbol.h |  2 ++
 2 files changed, 74 insertions(+), 2 deletions(-)

Comments

Luc Van Oostenryck Oct. 29, 2018, 8:41 p.m. UTC | #1
On Mon, Oct 29, 2018 at 03:39:50PM +0000, Ben Dooks wrote:
> Add code to parse the __attribute__((format)) used to indicate that
> a variadic function takes a printf-style format string and where
> those are. Save the data in ctype ready for checking when such an
> function is encoutered.
> 
> Signed-off-by: Ben Dooks <ben.dooks@codethink.co.uk>
> ---
> Fixes since v1:
> - moved to using ctype in base_type to store infromation
> - fixed formatting issues
> - updated check for bad format arguments
> - reduced the line count to unsigned short to save space
> 
> Notes:
> - What to do when base_type is not set... current code doesn't work

Yes, I saw this. See here below.

> ---
>  parse.c  | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--
>  symbol.h |  2 ++
>  2 files changed, 74 insertions(+), 2 deletions(-)
> 
> diff --git a/parse.c b/parse.c
> index 02a55a7..9b0d40e 100644
> --- a/parse.c
> +++ b/parse.c
> @@ -1051,6 +1061,67 @@ static struct token *attribute_address_space(struct token *token, struct symbol
>  	return token;
>  }
>  
> +static struct token *attribute_format(struct token *token, struct symbol *attr, struct decl_state *ctx)
> +{
> +	struct expression *args[3];
> +	struct symbol *fmt_sym = NULL;
> +	int argc = 0;
> +
> +	/* expecting format ( type, start, va_args at) */
> +
> +	token = expect(token, '(', "after format attribute");
> +	while (!match_op(token, ')')) {
> +		struct expression *expr = NULL;
> +
> +		if (argc == 0) {
> +			if (token_type(token) == TOKEN_IDENT)
> +				fmt_sym = lookup_keyword(token->ident, NS_KEYWORD);
> +
> +			if (!fmt_sym || !fmt_sym->op ||
> +			    fmt_sym->op != &attr_printf_op) {
> +				sparse_error(token->pos,
> +					     "unknown format type '%s'\n",
> +					     show_ident(token->ident));
> +				fmt_sym = NULL;
> +			}
> +		}
> +
> +		token = conditional_expression(token, &expr);
> +		if (!expr)
> +			break;
> +		if (argc < 3)
> +			args[argc++] = expr;
> +		if (!match_op(token, ','))
> +			break;
> +		token = token->next;
> +	}
> +
> +	if (argc != 3 || !fmt_sym) {
> +		warning(token->pos, "incorrect format attribute");
> +	} else {
> +		long long start, at;
> +
> +		start = get_expression_value(args[2]);
> +		at = get_expression_value(args[1]);
> +
> +		if (start <= 0 || at <= 0 || (start == at && start > 0)) {
> +			warning(token->pos, "bad format positions");
> +		} else if (start < at) {
> +			warning(token->pos, "format cannot be after va_args");
> +		} else if (!ctx->ctype.base_type) {
> +			warning(token->pos, "no base type to set");
> +			ctx->ctype.printf_va_start = start;
> +			ctx->ctype.printf_msg = at;
> +		} else {
> +			ctx->ctype.base_type->ctype.printf_va_start = start;
> +			ctx->ctype.base_type->ctype.printf_msg = at;
> +		}

This makes me realize that only a few attributes are handled and can be
handled. The last lines should simply be written as:
+		} else if (start < at) {
+			warning(token->pos, "format cannot be after va_args");
+		} else {
+			ctx->ctype.printf_va_start = start;
+			ctx->ctype.printf_msg = at;
+		}

In other words, this parsing should not poke into the base type. But as-is
the two args will not be propagated into the function because ctx is for the
whole declaration. You will need to add something like:
	diff --git a/parse.c b/parse.c
	index 83bca24b3..4e273b743 100644
	--- a/parse.c
	+++ b/parse.c
	@@ -2979,6 +2979,10 @@ struct token *external_declaration(struct token *token, struct symbol_list **lis
	 
	 		if (!(decl->ctype.modifiers & MOD_STATIC))
	 			decl->ctype.modifiers |= MOD_EXTERN;
	+
	+		// apply non-modifier function attributes
	+		base_type->ctype.printf_msg = decl->ctype.printf_msg;
	+		base_type->ctype.printf_va_start = decl->ctype.printf_va_start;
	 	} else if (base_type == &void_ctype && !(decl->ctype.modifiers & MOD_EXTERN)) {
	 		sparse_error(token->pos, "void declaration");
	 	}

and at some later stage, I'll make it more generic.


Kind regards,
-- Luc
Ben Dooks Oct. 30, 2018, 9:05 a.m. UTC | #2
On 29/10/18 20:41, Luc Van Oostenryck wrote:
> On Mon, Oct 29, 2018 at 03:39:50PM +0000, Ben Dooks wrote:
>> Add code to parse the __attribute__((format)) used to indicate that
>> a variadic function takes a printf-style format string and where
>> those are. Save the data in ctype ready for checking when such an
>> function is encoutered.
>>

> In other words, this parsing should not poke into the base type. But as-is
> the two args will not be propagated into the function because ctx is for the
> whole declaration. You will need to add something like:
> 	diff --git a/parse.c b/parse.c
> 	index 83bca24b3..4e273b743 100644
> 	--- a/parse.c
> 	+++ b/parse.c
> 	@@ -2979,6 +2979,10 @@ struct token *external_declaration(struct token *token, struct symbol_list **lis
> 	
> 	 		if (!(decl->ctype.modifiers & MOD_STATIC))
> 	 			decl->ctype.modifiers |= MOD_EXTERN;
> 	+
> 	+		// apply non-modifier function attributes
> 	+		base_type->ctype.printf_msg = decl->ctype.printf_msg;
> 	+		base_type->ctype.printf_va_start = decl->ctype.printf_va_start;
> 	 	} else if (base_type == &void_ctype && !(decl->ctype.modifiers & MOD_EXTERN)) {
> 	 		sparse_error(token->pos, "void declaration");
> 	 	}
> 
> and at some later stage, I'll make it more generic.

Ok, thanks. That seems to have worked.
diff mbox series

Patch

diff --git a/parse.c b/parse.c
index 02a55a7..9b0d40e 100644
--- a/parse.c
+++ b/parse.c
@@ -84,7 +84,7 @@  static attr_t
 	attribute_address_space, attribute_context,
 	attribute_designated_init,
 	attribute_transparent_union, ignore_attribute,
-	attribute_mode, attribute_force;
+	attribute_mode, attribute_force, attribute_format;
 
 typedef struct symbol *to_mode_t(struct symbol *);
 
@@ -353,6 +353,10 @@  static struct symbol_op attr_force_op = {
 	.attribute = attribute_force,
 };
 
+static struct symbol_op attr_format = {
+	.attribute = attribute_format,
+};
+
 static struct symbol_op address_space_op = {
 	.attribute = attribute_address_space,
 };
@@ -407,6 +411,10 @@  static struct symbol_op mode_word_op = {
 	.to_mode = to_word_mode
 };
 
+static struct symbol_op attr_printf_op = {
+	.type	= KW_FORMAT,
+};
+
 /* Using NS_TYPEDEF will also make the keyword a reserved one */
 static struct init_keyword {
 	const char *name;
@@ -513,6 +521,8 @@  static struct init_keyword {
 	{ "bitwise",	NS_KEYWORD,	MOD_BITWISE,	.op = &attr_bitwise_op },
 	{ "__bitwise__",NS_KEYWORD,	MOD_BITWISE,	.op = &attr_bitwise_op },
 	{ "address_space",NS_KEYWORD,	.op = &address_space_op },
+	{ "format",	NS_KEYWORD,	.op = &attr_format },
+	{ "printf",	NS_KEYWORD,	.op = &attr_printf_op },
 	{ "mode",	NS_KEYWORD,	.op = &mode_op },
 	{ "context",	NS_KEYWORD,	.op = &context_op },
 	{ "designated_init",	NS_KEYWORD,	.op = &designated_init_op },
@@ -1051,6 +1061,67 @@  static struct token *attribute_address_space(struct token *token, struct symbol
 	return token;
 }
 
+static struct token *attribute_format(struct token *token, struct symbol *attr, struct decl_state *ctx)
+{
+	struct expression *args[3];
+	struct symbol *fmt_sym = NULL;
+	int argc = 0;
+
+	/* expecting format ( type, start, va_args at) */
+
+	token = expect(token, '(', "after format attribute");
+	while (!match_op(token, ')')) {
+		struct expression *expr = NULL;
+
+		if (argc == 0) {
+			if (token_type(token) == TOKEN_IDENT)
+				fmt_sym = lookup_keyword(token->ident, NS_KEYWORD);
+
+			if (!fmt_sym || !fmt_sym->op ||
+			    fmt_sym->op != &attr_printf_op) {
+				sparse_error(token->pos,
+					     "unknown format type '%s'\n",
+					     show_ident(token->ident));
+				fmt_sym = NULL;
+			}
+		}
+
+		token = conditional_expression(token, &expr);
+		if (!expr)
+			break;
+		if (argc < 3)
+			args[argc++] = expr;
+		if (!match_op(token, ','))
+			break;
+		token = token->next;
+	}
+
+	if (argc != 3 || !fmt_sym) {
+		warning(token->pos, "incorrect format attribute");
+	} else {
+		long long start, at;
+
+		start = get_expression_value(args[2]);
+		at = get_expression_value(args[1]);
+
+		if (start <= 0 || at <= 0 || (start == at && start > 0)) {
+			warning(token->pos, "bad format positions");
+		} else if (start < at) {
+			warning(token->pos, "format cannot be after va_args");
+		} else if (!ctx->ctype.base_type) {
+			warning(token->pos, "no base type to set");
+			ctx->ctype.printf_va_start = start;
+			ctx->ctype.printf_msg = at;
+		} else {
+			ctx->ctype.base_type->ctype.printf_va_start = start;
+			ctx->ctype.base_type->ctype.printf_msg = at;
+		}
+	}
+
+	token = expect(token, ')', "after format attribute");
+	return token;
+}
+
 static struct symbol *to_QI_mode(struct symbol *ctype)
 {
 	if (ctype->ctype.base_type != &int_type)
@@ -2102,7 +2173,6 @@  static struct statement *start_function(struct symbol *sym)
 
 	// Currently parsed symbol for __func__/__FUNCTION__/__PRETTY_FUNCTION__
 	current_fn = sym;
-
 	return stmt;
 }
 
diff --git a/symbol.h b/symbol.h
index 1f338f5..4cd8d61 100644
--- a/symbol.h
+++ b/symbol.h
@@ -86,6 +86,7 @@  enum keyword {
 	KW_SHORT	= 1 << 7,
 	KW_LONG		= 1 << 8,
 	KW_EXACT	= 1 << 9,
+	KW_FORMAT	= 1 << 10,
 };
 
 struct context {
@@ -103,6 +104,7 @@  struct ctype {
 	struct context_list *contexts;
 	unsigned int as;
 	struct symbol *base_type;
+	unsigned short printf_va_start, printf_msg;
 };
 
 struct decl_state {