diff mbox series

[1/3] initial parsing of __attribute__((format))

Message ID 20181026152632.30318-2-ben.dooks@codethink.co.uk (mailing list archive)
State Superseded, archived
Headers show
Series [1/3] initial parsing of __attribute__((format)) | expand

Commit Message

Ben Dooks Oct. 26, 2018, 3:26 p.m. UTC
---
 parse.c  | 75 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 symbol.h |  2 ++
 2 files changed, 76 insertions(+), 1 deletion(-)

Comments

Luc Van Oostenryck Oct. 26, 2018, 9:31 p.m. UTC | #1
On Fri, Oct 26, 2018 at 04:26:30PM +0100, Ben Dooks wrote:
> ---
>  parse.c  | 75 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  symbol.h |  2 ++
>  2 files changed, 76 insertions(+), 1 deletion(-)
> 
> diff --git a/parse.c b/parse.c
> index 02a55a7..bb0545c 100644
> --- a/parse.c
> +++ b/parse.c
> @@ -1051,6 +1061,69 @@ static struct token *attribute_address_space(struct token *token, struct symbol
>  	return token;
>  }
>  
> +static struct token *attribute_format(struct token *token, struct symbol *attr, struct decl_state *ctx)
> +{
> +	struct expression *args[3];
> +	struct symbol *fmt_sym = NULL;
> +	int argc = 0;
> +
> +	/* expecting format ( type, start, va_args at) */
> +
> +	token = expect(token, '(', "after format attribute");
> +	while (!match_op(token, ')')) {
> +		struct expression *expr = NULL;
> +
> +		if (argc == 0) {
> +			if (token_type(token) == TOKEN_IDENT)
> +				fmt_sym = lookup_keyword(token->ident, NS_KEYWORD);
> +
> +			if (!fmt_sym || !fmt_sym->op ||
> +			    fmt_sym->op != &attr_printf_op) {

Better to check for op->type == KW_FORMAT so it can later be easily extended.

> +				sparse_error(token->pos,
> +					     "unknown format type '%s'\n",
> +					     show_ident(token->ident));
> +				fmt_sym = NULL;
> +			}
> +		}
> +
> +		token = conditional_expression(token, &expr);
> +		if (!expr)
> +			break;
> +		if (argc < 3)
> +			args[argc++] = expr;
> +		if (!match_op(token, ','))
> +			break;
> +		token = token->next;
> +	}
> +
> +	if (argc != 3 || !fmt_sym)
> +		warning(token->pos,
> +			"expected format type and start/position values");
> +	else {
> +		struct symbol *base_type = ctx->ctype.base_type;
> +		long long start, at;
> +
> +		start = get_expression_value(args[2]);
> +		at = get_expression_value(args[1]);
> +
> +		if (start <= 0 || at <= 0)
> +			warning(token->pos, "bad format positions");
> +		else if (!base_type)
> +			sparse_error(token->pos, "no base context for format");
> +		else if (base_type->type != SYM_FN)
> +			warning(token->pos, "attribute format can only be used on functions");
> +		else if (!base_type->variadic)
> +			warning(token->pos, "attribute format used on non variadic function");
> +		else {
> +			base_type->printf_va_start = start;
> +			base_type->printf_msg = at;
> +		}

Just a stylistic nit and as such not really imprtant but I would prefer
in the next version that chained ifs either:
* all of them take only a single line and then doesn't take braces
* otherwise that they all take the braces
This is the same as done in the kernel.

> +	}
> +
> +	token = expect(token, ')', "after format attribute");
> +	return token;
> +}
> +
>  static struct symbol *to_QI_mode(struct symbol *ctype)
>  {
>  	if (ctype->ctype.base_type != &int_type)
> diff --git a/symbol.h b/symbol.h
> index 3274496..5f1b85d 100644
> --- a/symbol.h
> +++ b/symbol.h
> @@ -86,6 +86,7 @@ enum keyword {
>  	KW_SHORT	= 1 << 7,
>  	KW_LONG		= 1 << 8,
>  	KW_EXACT	= 1 << 9,
> +	KW_FORMAT	= 1 << 10,
>  };

If not using KW_FORMAT in the test here above, it's not really used and needed..

>  struct context {
> @@ -185,6 +186,7 @@ struct symbol {
>  			struct entrypoint *ep;
>  			long long value;		/* Initial value */
>  			struct symbol *definition;
> +			long long	printf_va_start, printf_msg;

This is annoying. This struct is the most used one and is already
quite large and thus uses a significant amount of the allocated memory
which represent a non-negligeable amount of the processing time.
There is several options:
* a long long is certainly not needed for these, 32 bits 16 or even 8
  should be enough
* they could be 'unionized' with some of the other fields
* it could be moved elsewhere ...
More fundamentally these two fields should not belong to struct symbol
but to struct ctype (for example next to 'as' make thme each 8 bit witdh).

Thanks,
-- Luc
Luc Van Oostenryck Oct. 26, 2018, 11:13 p.m. UTC | #2
On Fri, Oct 26, 2018 at 04:26:30PM +0100, Ben Dooks wrote:
> ---
>  parse.c  | 75 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  symbol.h |  2 ++
>  2 files changed, 76 insertions(+), 1 deletion(-)
> 
> @@ -1051,6 +1061,69 @@ static struct token *attribute_address_space(struct token *token, struct symbol
>  	return token;
>  }
>  
> +static struct token *attribute_format(struct token *token, struct symbol *attr, struct decl_state *ctx)
> +{
> +	struct expression *args[3];
> +	struct symbol *fmt_sym = NULL;
> +	int argc = 0;
> +
> +	/* expecting format ( type, start, va_args at) */
> +
> +	token = expect(token, '(', "after format attribute");
> +	while (!match_op(token, ')')) {
> +		struct expression *expr = NULL;
> +
> +		if (argc == 0) {
> +			if (token_type(token) == TOKEN_IDENT)
> +				fmt_sym = lookup_keyword(token->ident, NS_KEYWORD);
> +
> +			if (!fmt_sym || !fmt_sym->op ||
> +			    fmt_sym->op != &attr_printf_op) {
> +				sparse_error(token->pos,
> +					     "unknown format type '%s'\n",
> +					     show_ident(token->ident));
> +				fmt_sym = NULL;
> +			}
> +		}
> +
> +		token = conditional_expression(token, &expr);
> +		if (!expr)
> +			break;
> +		if (argc < 3)
> +			args[argc++] = expr;
> +		if (!match_op(token, ','))
> +			break;
> +		token = token->next;
> +	}
> +
> +	if (argc != 3 || !fmt_sym)
> +		warning(token->pos,
> +			"expected format type and start/position values");
> +	else {
> +		struct symbol *base_type = ctx->ctype.base_type;
> +		long long start, at;
> +
> +		start = get_expression_value(args[2]);
> +		at = get_expression_value(args[1]);
> +
> +		if (start <= 0 || at <= 0)
> +			warning(token->pos, "bad format positions");
> +		else if (!base_type)
> +			sparse_error(token->pos, "no base context for format");
> +		else if (base_type->type != SYM_FN)
> +			warning(token->pos, "attribute format can only be used on functions");
> +		else if (!base_type->variadic)
> +			warning(token->pos, "attribute format used on non variadic function");
> +		else {
> +			base_type->printf_va_start = start;
> +			base_type->printf_msg = at;
> +		}

Another reason why printf_va_start & printf_msg need to move (to ctx->ctype) is
that the attribute can be placed before the function. For example:
	extern __attribute__((format(printf, 1, 2))) void printk(const char *, ...);
is legit but gives here the error "no base context for format".

-- Luc
Ben Dooks Oct. 29, 2018, 9:57 a.m. UTC | #3
On 27/10/18 00:13, Luc Van Oostenryck wrote:
> On Fri, Oct 26, 2018 at 04:26:30PM +0100, Ben Dooks wrote:
>> ---
>>   parse.c  | 75 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
>>   symbol.h |  2 ++
>>   2 files changed, 76 insertions(+), 1 deletion(-)
>>

> 
> Another reason why printf_va_start & printf_msg need to move (to ctx->ctype) is
> that the attribute can be placed before the function. For example:
> 	extern __attribute__((format(printf, 1, 2))) void printk(const char *, ...);
> is legit but gives here the error "no base context for format".
> 
> -- Luc

Ok, thanks for the review.
I will try and get a new series out this week.
diff mbox series

Patch

diff --git a/parse.c b/parse.c
index 02a55a7..bb0545c 100644
--- a/parse.c
+++ b/parse.c
@@ -84,7 +84,7 @@  static attr_t
 	attribute_address_space, attribute_context,
 	attribute_designated_init,
 	attribute_transparent_union, ignore_attribute,
-	attribute_mode, attribute_force;
+	attribute_mode, attribute_force, attribute_format;
 
 typedef struct symbol *to_mode_t(struct symbol *);
 
@@ -353,6 +353,10 @@  static struct symbol_op attr_force_op = {
 	.attribute = attribute_force,
 };
 
+static struct symbol_op attr_format = {
+	.attribute = attribute_format,
+};
+
 static struct symbol_op address_space_op = {
 	.attribute = attribute_address_space,
 };
@@ -407,6 +411,10 @@  static struct symbol_op mode_word_op = {
 	.to_mode = to_word_mode
 };
 
+static struct symbol_op attr_printf_op = {
+	.type	= KW_FORMAT,
+};
+
 /* Using NS_TYPEDEF will also make the keyword a reserved one */
 static struct init_keyword {
 	const char *name;
@@ -513,6 +521,8 @@  static struct init_keyword {
 	{ "bitwise",	NS_KEYWORD,	MOD_BITWISE,	.op = &attr_bitwise_op },
 	{ "__bitwise__",NS_KEYWORD,	MOD_BITWISE,	.op = &attr_bitwise_op },
 	{ "address_space",NS_KEYWORD,	.op = &address_space_op },
+	{ "format",	NS_KEYWORD,	.op = &attr_format },
+	{ "printf",	NS_KEYWORD,	.op = &attr_printf_op },
 	{ "mode",	NS_KEYWORD,	.op = &mode_op },
 	{ "context",	NS_KEYWORD,	.op = &context_op },
 	{ "designated_init",	NS_KEYWORD,	.op = &designated_init_op },
@@ -1051,6 +1061,69 @@  static struct token *attribute_address_space(struct token *token, struct symbol
 	return token;
 }
 
+static struct token *attribute_format(struct token *token, struct symbol *attr, struct decl_state *ctx)
+{
+	struct expression *args[3];
+	struct symbol *fmt_sym = NULL;
+	int argc = 0;
+
+	/* expecting format ( type, start, va_args at) */
+
+	token = expect(token, '(', "after format attribute");
+	while (!match_op(token, ')')) {
+		struct expression *expr = NULL;
+
+		if (argc == 0) {
+			if (token_type(token) == TOKEN_IDENT)
+				fmt_sym = lookup_keyword(token->ident, NS_KEYWORD);
+
+			if (!fmt_sym || !fmt_sym->op ||
+			    fmt_sym->op != &attr_printf_op) {
+				sparse_error(token->pos,
+					     "unknown format type '%s'\n",
+					     show_ident(token->ident));
+				fmt_sym = NULL;
+			}
+		}
+
+		token = conditional_expression(token, &expr);
+		if (!expr)
+			break;
+		if (argc < 3)
+			args[argc++] = expr;
+		if (!match_op(token, ','))
+			break;
+		token = token->next;
+	}
+
+	if (argc != 3 || !fmt_sym)
+		warning(token->pos,
+			"expected format type and start/position values");
+	else {
+		struct symbol *base_type = ctx->ctype.base_type;
+		long long start, at;
+
+		start = get_expression_value(args[2]);
+		at = get_expression_value(args[1]);
+
+		if (start <= 0 || at <= 0)
+			warning(token->pos, "bad format positions");
+		else if (!base_type)
+			sparse_error(token->pos, "no base context for format");
+		else if (base_type->type != SYM_FN)
+			warning(token->pos, "attribute format can only be used on functions");
+		else if (!base_type->variadic)
+			warning(token->pos, "attribute format used on non variadic function");
+		else {
+			base_type->printf_va_start = start;
+			base_type->printf_msg = at;
+		}
+	}
+
+	token = expect(token, ')', "after format attribute");
+	return token;
+}
+
 static struct symbol *to_QI_mode(struct symbol *ctype)
 {
 	if (ctype->ctype.base_type != &int_type)
diff --git a/symbol.h b/symbol.h
index 3274496..5f1b85d 100644
--- a/symbol.h
+++ b/symbol.h
@@ -86,6 +86,7 @@  enum keyword {
 	KW_SHORT	= 1 << 7,
 	KW_LONG		= 1 << 8,
 	KW_EXACT	= 1 << 9,
+	KW_FORMAT	= 1 << 10,
 };
 
 struct context {
@@ -185,6 +186,7 @@  struct symbol {
 			struct entrypoint *ep;
 			long long value;		/* Initial value */
 			struct symbol *definition;
+			long long	printf_va_start, printf_msg;
 		};
 	};
 	union /* backend */ {