diff mbox series

[3/3] initial variadic argument code

Message ID 20181026152632.30318-4-ben.dooks@codethink.co.uk (mailing list archive)
State Superseded, archived
Headers show
Series [1/3] initial parsing of __attribute__((format)) | expand

Commit Message

Ben Dooks Oct. 26, 2018, 3:26 p.m. UTC
---
 evaluate.c | 158 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 157 insertions(+), 1 deletion(-)

Comments

Ben Dooks Oct. 26, 2018, 5:28 p.m. UTC | #1
On 26/10/18 16:26, Ben Dooks wrote:

> +static int decompose_format_printf(const char *string, struct symbol **result)
> +{
> +	int count = 0;
> +
> +	for (; string[0] != '\0'; string++) {
> +		if (string[0] == '%') {
> +			int len = 0;
> +			struct symbol *sym = NULL;
> +			if (string[1] == '%') {
> +				string++;
> +				continue;
> +			}
> +
> +			/* get rid of any formatting width bits */
> +			while (isdigit(string[1]) || string[1] == '+' || string[1] == '-')
> +			       string++;
> +
> +			switch (string[1]) {
> +			case 'C':
> +				/* TODO - same as lc */
> +				break;
> +			case 'c':
> +				/* TODO - can take l modifier */
> +				sym = &char_ctype;
> +				break;
> +			case 'f':
> +			case 'g':
> +				sym = &double_ctype;
> +				break;
> +			case 'h':
> +				/* TODO hh */
> +				len = -1;
> +				break;
> +			case 'j':	/* ignore intmax/uintmax for the moment */
> +				break;
> +			case 'L':
> +				sym = &ldouble_ctype;
> +				break;
> +			case 'l':
> +				len++;
> +				break;
> +			case 'p':
> +				/* TODO - deal with void * not being de-referenced in some cases*/
> +				sym = &ptr_ctype;
> +				break;
> +			case 'q':

For the case of %p, is MOD_NODEREF sufficient, or should we also have
a flag (either MOD_NOADDRSPACE) or some other way of specifying the
type is safe for any address space?

I added a ptr_ctype_noderef, but still get:

test.c:27:37: warning: incorrect type in argument 3 (different address 
spaces)
test.c:27:37:    expected string
test.c:27:37:    got void [noderef] <asn:1>*b
Ben Dooks Oct. 26, 2018, 5:32 p.m. UTC | #2
On 26/10/18 18:28, Ben Dooks wrote:
> On 26/10/18 16:26, Ben Dooks wrote:
> 
>> +static int decompose_format_printf(const char *string, struct symbol 
>> **result)
>> +{
>> +    int count = 0;
>> +
>> +    for (; string[0] != '\0'; string++) {
>> +        if (string[0] == '%') {
>> +            int len = 0;
>> +            struct symbol *sym = NULL;
>> +            if (string[1] == '%') {
>> +                string++;
>> +                continue;
>> +            }
>> +
>> +            /* get rid of any formatting width bits */
>> +            while (isdigit(string[1]) || string[1] == '+' || 
>> string[1] == '-')
>> +                   string++;
>> +
>> +            switch (string[1]) {
>> +            case 'C':
>> +                /* TODO - same as lc */
>> +                break;
>> +            case 'c':
>> +                /* TODO - can take l modifier */
>> +                sym = &char_ctype;
>> +                break;
>> +            case 'f':
>> +            case 'g':
>> +                sym = &double_ctype;
>> +                break;
>> +            case 'h':
>> +                /* TODO hh */
>> +                len = -1;
>> +                break;
>> +            case 'j':    /* ignore intmax/uintmax for the moment */
>> +                break;
>> +            case 'L':
>> +                sym = &ldouble_ctype;
>> +                break;
>> +            case 'l':
>> +                len++;
>> +                break;
>> +            case 'p':
>> +                /* TODO - deal with void * not being de-referenced in 
>> some cases*/
>> +                sym = &ptr_ctype;
>> +                break;
>> +            case 'q':
> 
> For the case of %p, is MOD_NODEREF sufficient, or should we also have
> a flag (either MOD_NOADDRSPACE) or some other way of specifying the
> type is safe for any address space?
> 
> I added a ptr_ctype_noderef, but still get:
> 
> test.c:27:37: warning: incorrect type in argument 3 (different address 
> spaces)
> test.c:27:37:    expected string
> test.c:27:37:    got void [noderef] <asn:1>*b

I meant:

test.c:28:37: warning: incorrect type in argument 3 (different address 
spaces)
test.c:28:37:    expected void [noderef] *
test.c:28:37:    got void [noderef] <asn:1>*b
Luc Van Oostenryck Oct. 26, 2018, 9:42 p.m. UTC | #3
On Fri, Oct 26, 2018 at 04:26:32PM +0100, Ben Dooks wrote:
> ---
>  evaluate.c | 158 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 157 insertions(+), 1 deletion(-)
> 
> diff --git a/evaluate.c b/evaluate.c
> index b96696d..82ddf9f 100644
> --- a/evaluate.c
> +++ b/evaluate.c
> @@ -2243,11 +2243,154 @@ static struct symbol *evaluate_alignof(struct expression *expr)
>  	return size_t_ctype;
>  }
>  
> +static int decompose_format_printf(const char *string, struct symbol **result)
> +{
> +	int count = 0;
> +
> +	for (; string[0] != '\0'; string++) {
> +		if (string[0] == '%') {

I would prefer to write this like:
		struct symbol *sym = NULL;
		int len = 0;

		if (string[0] == '%')
			continue;
		...

to spare one indentation but it's not really important.

> +static int evaluate_format_printf(struct symbol *fn, struct expression *expr, struct symbol ***result)
> +{
> +	const char *fmt_string = NULL;
> +
> +	if (!expr)
> +		return -1;
> +	if (expr->string && expr->string->length)
> +		fmt_string = expr->string->data;
> +	if (!fmt_string) {
> +		struct symbol *sym = evaluate_expression(expr);
> +
> +		/* attempt to find initialiser for this */
> +		if (sym && sym->initializer && sym->initializer->string)
> +			fmt_string = sym->initializer->string->data;
> +	}
> +
> +	if (fmt_string) {
> +		struct symbol **syms = NULL;
> +		int count;
> +
> +		count = decompose_format_printf(fmt_string, NULL);
> +		if (count <= 0)
> +			return count;
> +
> +		syms = calloc(sizeof(struct symbol *), count);

Sparse make heavy use of pointer lists and I think it better/easier to use
them here too instead of parsing the string twice  and using calloc/free.

-- Luc
Luc Van Oostenryck Oct. 26, 2018, 10:03 p.m. UTC | #4
On Fri, Oct 26, 2018 at 06:32:05PM +0100, Ben Dooks wrote:
> On 26/10/18 18:28, Ben Dooks wrote:
> > On 26/10/18 16:26, Ben Dooks wrote:
> > 
> > > +            case 'p':
> > > +                /* TODO - deal with void * not being
> > > de-referenced in some cases*/
> > > +                sym = &ptr_ctype;
> > > +                break;
> > > +            case 'q':
> > 
> > For the case of %p, is MOD_NODEREF sufficient, or should we also have
> > a flag (either MOD_NOADDRSPACE) or some other way of specifying the
> > type is safe for any address space?
> > 
> > I added a ptr_ctype_noderef, but still get:
> > 
> > test.c:27:37: warning: incorrect type in argument 3 (different
> > address spaces)
> > test.c:27:37:    expected string
> > test.c:27:37:    got void [noderef] <asn:1>*b
> 
> I meant:
> 
> test.c:28:37: warning: incorrect type in argument 3 (different
> address spaces)
> test.c:28:37:    expected void [noderef] *
> test.c:28:37:    got void [noderef] <asn:1>*b

For the type checking, there is no such thing as a type safe for
any address space. You will probably need to special-case it,
For example defining an abstract/generic pointer type (gen_ptr_type)
and then doing something like:
		if (i >= fn->printf_va_start && i <= variadic_limit)
			target = variadic[i - fn->printf_va_start];
		else
			target = argtype;
		if (!target) {
			...
+		} else if (target == &gen_ptr_type) {
+				...
		} else if (!target->forced_arg){

-- Luc
Luc Van Oostenryck Oct. 26, 2018, 11:45 p.m. UTC | #5
On Fri, Oct 26, 2018 at 04:26:32PM +0100, Ben Dooks wrote:
> ---
>  evaluate.c | 158 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 157 insertions(+), 1 deletion(-)
> 
> diff --git a/evaluate.c b/evaluate.c
> index b96696d..82ddf9f 100644
> --- a/evaluate.c
> +++ b/evaluate.c
> @@ -2243,11 +2243,154 @@ static struct symbol *evaluate_alignof(struct expression *expr)
>  	return size_t_ctype;
>  }
>  
> +static int decompose_format_printf(const char *string, struct symbol **result)
> +{
> +	int count = 0;
> +
> +	for (; string[0] != '\0'; string++) {
> +		if (string[0] == '%') {
> +			int len = 0;
> +			struct symbol *sym = NULL;
> +			if (string[1] == '%') {
> +				string++;
> +				continue;
> +			}
> +
> +			/* get rid of any formatting width bits */
> +			while (isdigit(string[1]) || string[1] == '+' || string[1] == '-')
> +			       string++;
> +
> +			switch (string[1]) {
> +			case 'C':
> +				/* TODO - same as lc */
> +				break;
> +			case 'c':
> +				/* TODO - can take l modifier */
> +				sym = &char_ctype;
> +				break;
> +			case 'f':
> +			case 'g':
> +				sym = &double_ctype;
> +				break;
> +			case 'h':
> +				/* TODO hh */
> +				len = -1;
> +				break;
> +			case 'j':	/* ignore intmax/uintmax for the moment */
> +				break;
> +			case 'L':
> +				sym = &ldouble_ctype;
> +				break;
> +			case 'l':
> +				len++;
> +				break;
> +			case 'p':
> +				/* TODO - deal with void * not being de-referenced in some cases*/
> +				sym = &ptr_ctype;
> +				break;
> +			case 'q':
> +				len = 2;
> +				break;
> +			case 's':
> +				sym = &string_ctype;
> +				break;
> +			case 'n':
> +				/* TODO - actually pointer to integer */
> +				sym = &ptr_ctype;
> +				break;
> +				/* note, d is out of alpha order */
> +			case 'd':
> +				switch (len) {
> +				case -1: sym = &short_ctype; break;
> +				case 0: sym = &int_ctype; break;
> +				case 1: sym = &long_ctype; break;
> +				case 2: sym = &llong_ctype; break;
> +				case 3: sym = &lllong_ctype; break;
> +				}
> +				break;
> +			case 'u':
> +				switch (len) {
> +				case -1: sym = &ushort_ctype; break;
> +				case 0: sym = &uint_ctype; break;
> +				case 1: sym = &ulong_ctype; break;
> +				case 2: sym = &ullong_ctype; break;
> +				case 3: sym = &ulllong_ctype; break;
> +				}
> +				break;
> +			case 'x':
> +			case 'X':
> +				switch (len) {
> +				case 0: sym = &uint_ctype; break;
> +				case 1: sym = &ulong_ctype; break;
> +				case 2: sym = &ullong_ctype; break;
> +				case 3: sym = &ulllong_ctype; break;
> +				}
> +				break;


This can be simplified into:
+			case 'd':
+			case 'u':
+			case 'x':
+			case 'X':
+				switch (len) {
+				case -1: sym = &short_ctype; break;
+				case 0: sym = &int_ctype; break;
+				case 1: sym = &long_ctype; break;
+				case 2: sym = &llong_ctype; break;
+				case 3: sym = &lllong_ctype; break;
+				}
+				break;

because it's irrelevant if the argument is unsigned or not.

-- Luc
Luc Van Oostenryck Oct. 26, 2018, 11:58 p.m. UTC | #6
On Fri, Oct 26, 2018 at 04:26:32PM +0100, Ben Dooks wrote:
> @@ -2259,7 +2402,18 @@ static int evaluate_arguments(struct symbol *fn, struct expression_list *head)
>  		if (!ctype)
>  			return 0;
>  
> -		target = argtype;
> +		if (i == fn->printf_msg) {
> +			int ret = evaluate_format_printf(fn, *p, &variadic);
> +			if (ret < 0)
> +				warning((*p)->pos, "cannot parse format");
> +			else if (ret > 0)
> +				variadic_limit = fn->printf_va_start + ret;
> +		}
> +
> +		if (i >= fn->printf_va_start && i <= variadic_limit)

There is an off-by-one error here. The test should be:
+		if (i >= fn->printf_va_start && i < variadic_limit)

But even better to replace this with a struct symbol_list.

-- Luc
diff mbox series

Patch

diff --git a/evaluate.c b/evaluate.c
index b96696d..82ddf9f 100644
--- a/evaluate.c
+++ b/evaluate.c
@@ -2243,11 +2243,154 @@  static struct symbol *evaluate_alignof(struct expression *expr)
 	return size_t_ctype;
 }
 
+static int decompose_format_printf(const char *string, struct symbol **result)
+{
+	int count = 0;
+
+	for (; string[0] != '\0'; string++) {
+		if (string[0] == '%') {
+			int len = 0;
+			struct symbol *sym = NULL;
+			if (string[1] == '%') {
+				string++;
+				continue;
+			}
+
+			/* get rid of any formatting width bits */
+			while (isdigit(string[1]) || string[1] == '+' || string[1] == '-')
+			       string++;
+
+			switch (string[1]) {
+			case 'C':
+				/* TODO - same as lc */
+				break;
+			case 'c':
+				/* TODO - can take l modifier */
+				sym = &char_ctype;
+				break;
+			case 'f':
+			case 'g':
+				sym = &double_ctype;
+				break;
+			case 'h':
+				/* TODO hh */
+				len = -1;
+				break;
+			case 'j':	/* ignore intmax/uintmax for the moment */
+				break;
+			case 'L':
+				sym = &ldouble_ctype;
+				break;
+			case 'l':
+				len++;
+				break;
+			case 'p':
+				/* TODO - deal with void * not being de-referenced in some cases*/
+				sym = &ptr_ctype;
+				break;
+			case 'q':
+				len = 2;
+				break;
+			case 's':
+				sym = &string_ctype;
+				break;
+			case 'n':
+				/* TODO - actually pointer to integer */
+				sym = &ptr_ctype;
+				break;
+				/* note, d is out of alpha order */
+			case 'd':
+				switch (len) {
+				case -1: sym = &short_ctype; break;
+				case 0: sym = &int_ctype; break;
+				case 1: sym = &long_ctype; break;
+				case 2: sym = &llong_ctype; break;
+				case 3: sym = &lllong_ctype; break;
+				}
+				break;
+			case 'u':
+				switch (len) {
+				case -1: sym = &ushort_ctype; break;
+				case 0: sym = &uint_ctype; break;
+				case 1: sym = &ulong_ctype; break;
+				case 2: sym = &ullong_ctype; break;
+				case 3: sym = &ulllong_ctype; break;
+				}
+				break;
+			case 'x':
+			case 'X':
+				switch (len) {
+				case 0: sym = &uint_ctype; break;
+				case 1: sym = &ulong_ctype; break;
+				case 2: sym = &ullong_ctype; break;
+				case 3: sym = &ulllong_ctype; break;
+				}
+				break;
+			case 'z':
+			case 'Z':
+				sym = &uint_ctype;	/* TODO */
+				break;
+			}
+
+			if (result && sym)
+				*result++ = sym;
+			if (sym)
+				count++;
+
+			while (string[0] != ' ' && string[0] != '\0')
+				string++;
+
+			string--;
+		}
+	}
+
+	return count;
+}
+
+
+static int evaluate_format_printf(struct symbol *fn, struct expression *expr, struct symbol ***result)
+{
+	const char *fmt_string = NULL;
+
+	if (!expr)
+		return -1;
+	if (expr->string && expr->string->length)
+		fmt_string = expr->string->data;
+	if (!fmt_string) {
+		struct symbol *sym = evaluate_expression(expr);
+
+		/* attempt to find initialiser for this */
+		if (sym && sym->initializer && sym->initializer->string)
+			fmt_string = sym->initializer->string->data;
+	}
+
+	if (fmt_string) {
+		struct symbol **syms = NULL;
+		int count;
+
+		count = decompose_format_printf(fmt_string, NULL);
+		if (count <= 0)
+			return count;
+
+		syms = calloc(sizeof(struct symbol *), count);
+		if (!syms)
+			return -1;
+
+		count = decompose_format_printf(fmt_string, syms);
+		*result = syms;
+		return count;
+	}
+
+	return -1;
+}
+
 static int evaluate_arguments(struct symbol *fn, struct expression_list *head)
 {
 	struct expression *expr;
 	struct symbol_list *argument_types = fn->arguments;
+	struct symbol **variadic = NULL;
 	struct symbol *argtype;
+	int variadic_limit = 0;
 	int i = 1;
 
 	PREPARE_PTR_LIST(argument_types, argtype);
@@ -2259,7 +2402,18 @@  static int evaluate_arguments(struct symbol *fn, struct expression_list *head)
 		if (!ctype)
 			return 0;
 
-		target = argtype;
+		if (i == fn->printf_msg) {
+			int ret = evaluate_format_printf(fn, *p, &variadic);
+			if (ret < 0)
+				warning((*p)->pos, "cannot parse format");
+			else if (ret > 0)
+				variadic_limit = fn->printf_va_start + ret;
+		}
+
+		if (i >= fn->printf_va_start && i <= variadic_limit)
+			target = variadic[i - fn->printf_va_start];
+		else
+			target = argtype;
 		if (!target) {
 			struct symbol *type;
 			int class = classify_type(ctype, &type);
@@ -2286,6 +2440,8 @@  static int evaluate_arguments(struct symbol *fn, struct expression_list *head)
 		NEXT_PTR_LIST(argtype);
 	} END_FOR_EACH_PTR(expr);
 	FINISH_PTR_LIST(argtype);
+
+	free(variadic);
 	return 1;
 }