From patchwork Mon Oct 29 15:39:51 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ben Dooks X-Patchwork-Id: 10659447 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id E18DF17DF for ; Mon, 29 Oct 2018 15:39:58 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id DC76729AAE for ; Mon, 29 Oct 2018 15:39:58 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id D0B9B29AC8; Mon, 29 Oct 2018 15:39:58 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI autolearn=ham version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 4BCE029AC7 for ; Mon, 29 Oct 2018 15:39:58 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727503AbeJ3A3F (ORCPT ); Mon, 29 Oct 2018 20:29:05 -0400 Received: from imap1.codethink.co.uk ([176.9.8.82]:59237 "EHLO imap1.codethink.co.uk" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1727491AbeJ3A3F (ORCPT ); Mon, 29 Oct 2018 20:29:05 -0400 Received: from [148.252.241.226] (helo=rainbowdash) by imap1.codethink.co.uk with esmtpsa (Exim 4.84_2 #1 (Debian)) id 1gH9dy-0005vQ-0r; Mon, 29 Oct 2018 15:39:54 +0000 Received: from ben by rainbowdash with local (Exim 4.91) (envelope-from ) id 1gH9dx-0003eb-MR; Mon, 29 Oct 2018 15:39:53 +0000 From: Ben Dooks To: linux-sparse@vger.kernel.org Cc: Ben Dooks Subject: [PATCH 4/5] evaluate: check variadic argument types against formatting info Date: Mon, 29 Oct 2018 15:39:51 +0000 Message-Id: <20181029153952.13927-5-ben.dooks@codethink.co.uk> X-Mailer: git-send-email 2.19.1 In-Reply-To: <20181029153952.13927-1-ben.dooks@codethink.co.uk> References: <20181029153952.13927-1-ben.dooks@codethink.co.uk> MIME-Version: 1.0 Sender: linux-sparse-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-sparse@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP The variadic argumnet code did not check any of the variadic arguments as it did not previously know the possible type. Now we have the possible formatting information stored in the ctype, we can do some checks on the printf formatting types. Signed-off-by: Ben Dooks --- Fixes since v1: - Split out the format-string -> symbol code - Use symbol_list for the symbols from format parsing - Changed to follow the new parsing code and ctype use - Merged the unsigned-int/long types together Notes: - Is there a way of better doing the vararg list? - %p still generates an address-space mismatch - how do we deal with the kernel's attempt to make printk format all types? --- evaluate.c | 165 +++++++++++++++++++++++++++++++++++++++++++++++++++-- parse.c | 2 +- 2 files changed, 162 insertions(+), 5 deletions(-) diff --git a/evaluate.c b/evaluate.c index b96696d..2a98a9b 100644 --- a/evaluate.c +++ b/evaluate.c @@ -2243,23 +2243,180 @@ static struct symbol *evaluate_alignof(struct expression *expr) return size_t_ctype; } +struct printf_state { + int len; /* size of the argument (ie, %d, %ld, %lld, etc.) */ +}; + +static struct symbol *evaluate_printf_symbol(const char *string, struct printf_state *state) +{ + struct symbol *sym = NULL; + + switch (string[0]) { + case 'C': + /* TODO - same as lc */ + break; + case 'c': + /* TODO - can take l modifier */ + sym = &char_ctype; + break; + case 'f': + case 'g': + sym = &double_ctype; + break; + case 'h': + /* TODO hh */ + state->len = -1; + break; + case 'j': /* ignore intmax/uintmax for the moment */ + break; + case 'L': + sym = &ldouble_ctype; + break; + case 'l': + state->len++; + break; + case 'p': + /* TODO - deal with void * not being de-referenced in some cases*/ + sym = &ptr_ctype_noderef; + break; + case 'q': + state->len = 2; + break; + case 's': + sym = &string_ctype; + break; + case 'n': + /* TODO - actually pointer to integer */ + sym = &ptr_ctype; + break; + /* note, d is out of alpha order */ + case 'd': + switch (state->len) { + case -1: sym = &short_ctype; break; + case 0: sym = &int_ctype; break; + case 1: sym = &long_ctype; break; + case 2: sym = &llong_ctype; break; + case 3: sym = &lllong_ctype; break; + } + break; + case 'u': + case 'x': + case 'X': + switch (state->len) { + case -1: sym = &ushort_ctype; break; + case 0: sym = &uint_ctype; break; + case 1: sym = &ulong_ctype; break; + case 2: sym = &ullong_ctype; break; + case 3: sym = &ulllong_ctype; break; + } + break; + case 'z': + case 'Z': + sym = &uint_ctype; /* TODO */ + break; + } + + return sym; +} + +static int decompose_format_printf(const char *string, struct symbol_list **result) +{ + struct printf_state state; + int count = 0; + + /* TODO - deal with explitic position arguments */ + for (; string[0] != '\0'; string++) { + struct symbol *sym; + + if (string[0] != '%') + continue; + if (string[1] == '%') { + string++; + continue; + } + + state.len = 0; + + /* get rid of any formatting width bits */ + while (isdigit(string[1]) || string[1] == '+' || string[1] == '-') + string++; + + sym = evaluate_printf_symbol(string+1, &state); + if (sym) { + add_symbol(result, sym); + count++; + } + + while (string[0] != ' ' && string[0] != '\0') + string++; + + string--; + } + + return count; +} + + +static int evaluate_format_printf(struct symbol *fn, struct expression *expr, struct symbol_list **result) +{ + const char *fmt_string = NULL; + int count = -1; + + if (!expr) + return -1; + if (expr->string && expr->string->length) + fmt_string = expr->string->data; + if (!fmt_string) { + struct symbol *sym = evaluate_expression(expr); + + /* attempt to find initialiser for this */ + if (sym && sym->initializer && sym->initializer->string) + fmt_string = sym->initializer->string->data; + } + + if (fmt_string) + count = decompose_format_printf(fmt_string, result); + return count; +} + static int evaluate_arguments(struct symbol *fn, struct expression_list *head) { struct expression *expr; struct symbol_list *argument_types = fn->arguments; + struct symbol_list *variadic_types = NULL; struct symbol *argtype; int i = 1; + /* if we have variadic type info, copy the original arguments + * first so that the format parsing can modify this local set */ + PREPARE_PTR_LIST(argument_types, argtype); FOR_EACH_PTR (head, expr) { struct expression **p = THIS_ADDRESS(expr); - struct symbol *ctype, *target; + struct symbol *ctype, *target = NULL; ctype = evaluate_expression(expr); if (!ctype) return 0; - target = argtype; + if (i == fn->ctype.printf_msg) { + int ret = evaluate_format_printf(fn, *p, &variadic_types); + if (ret < 0) + warning((*p)->pos, "cannot parse format"); + } + + if (i >= fn->ctype.printf_va_start) { + struct symbol *sym; + int arg = i - fn->ctype.printf_va_start; + + FOR_EACH_PTR(variadic_types, sym) { + if (arg == 0) + target = sym; + arg--; + } END_FOR_EACH_PTR(sym); + } else { + target = argtype; + } if (!target) { struct symbol *type; int class = classify_type(ctype, &type); @@ -2281,11 +2438,11 @@ static int evaluate_arguments(struct symbol *fn, struct expression_list *head) sprintf(where, "argument %d", i); compatible_argument_type(expr, target, p, where); } - - i++; NEXT_PTR_LIST(argtype); + i++; } END_FOR_EACH_PTR(expr); FINISH_PTR_LIST(argtype); + return 1; } diff --git a/parse.c b/parse.c index 9b0d40e..6b0a20b 100644 --- a/parse.c +++ b/parse.c @@ -1078,7 +1078,7 @@ static struct token *attribute_format(struct token *token, struct symbol *attr, fmt_sym = lookup_keyword(token->ident, NS_KEYWORD); if (!fmt_sym || !fmt_sym->op || - fmt_sym->op != &attr_printf_op) { + fmt_sym->op->type != KW_FORMAT) { sparse_error(token->pos, "unknown format type '%s'\n", show_ident(token->ident));