@@ -517,6 +517,24 @@ config GCC_PLUGIN_RANDSTRUCT_PERFORMANCE
in structures. This reduces the performance hit of RANDSTRUCT
at the cost of weakened randomization.
+config GCC_PLUGIN_FORMAT_TEMPLATE
+ bool "Enable format_template attribute"
+ depends on GCC_PLUGINS
+ help
+ This plugin implements a format_template attribute which can
+ be attached to struct members which are supposed to hold a
+ (printf) format string. This allows the compiler to check
+ that (a) any string statically assigned to such a struct
+ member has format specifiers compatible with those in the
+ template and (b) when such a struct member is used as the
+ format argument to a printf function, use the template in
+ lieu of a string literal to do type checking of the variadic
+ arguments.
+
+ Even without using the plugin, attaching the format_template
+ attribute can be beneficial, since it serves as
+ documentation.
+
config HAVE_CC_STACKPROTECTOR
bool
help
@@ -625,4 +625,10 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
(_________p1); \
})
+#ifdef HAVE_ATTRIBUTE_FORMAT_TEMPLATE
+#define __format_template(x) __attribute__((__format_template__(x)))
+#else
+#define __format_template(x)
+#endif
+
#endif /* __LINUX_COMPILER_H */
@@ -35,6 +35,8 @@ ifdef CONFIG_GCC_PLUGINS
gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_RANDSTRUCT) += -DRANDSTRUCT_PLUGIN
gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_RANDSTRUCT_PERFORMANCE) += -fplugin-arg-randomize_layout_plugin-performance-mode
+ gcc-plugin-$(CONFIG_GCC_PLUGIN_FORMAT_TEMPLATE) += format_template_plugin.so
+
GCC_PLUGINS_CFLAGS := $(strip $(addprefix -fplugin=$(objtree)/scripts/gcc-plugins/, $(gcc-plugin-y)) $(gcc-plugin-cflags-y))
export PLUGINCC GCC_PLUGINS_CFLAGS GCC_PLUGIN GCC_PLUGIN_SUBDIR
new file mode 100644
@@ -0,0 +1,331 @@
+#include <string.h>
+#include <assert.h>
+
+#include "gcc-common.h"
+#include "c-family/c-pragma.h"
+
+int plugin_is_GPL_compatible;
+
+static struct plugin_info format_template_plugin_info = {
+ .version = "20151209",
+ .help = "format_template_plugin\n",
+};
+
+static tree handle_format_template_attribute(tree *node, tree name, tree args, int __unused flags, bool *no_add_attrs)
+{
+ tree tmpl, orig_attr;
+
+ *no_add_attrs = true;
+ switch (TREE_CODE(*node)) {
+ case FIELD_DECL:
+ break;
+ default:
+ error("%qE attribute only applies to struct members", name);
+ return NULL_TREE;
+ }
+
+ tmpl = TREE_VALUE(args);
+ if (TREE_CODE(tmpl) != STRING_CST) {
+ error("%qE parameter of the %qE attribute is not a string constant", args, name);
+ return NULL_TREE;
+ }
+
+ orig_attr = lookup_attribute("format_template", DECL_ATTRIBUTES(*node));
+ if (orig_attr) {
+ error("%qE attribute applied twice", name);
+ return NULL_TREE;
+ }
+ else
+ *no_add_attrs = false;
+
+ return NULL_TREE;
+}
+
+static struct attribute_spec format_template_attr = {
+ .name = "format_template",
+ .min_length = 1,
+ .max_length = 1,
+ .decl_required = true,
+ .type_required = false,
+ .function_type_required = false,
+ .handler = handle_format_template_attribute,
+#if BUILDING_GCC_VERSION >= 4007
+ .affects_type_identity = false
+#endif
+};
+
+static void register_attributes(void __unused *event_data, void __unused *data)
+{
+ register_attribute(&format_template_attr);
+}
+
+static void define_feature_macro(void __unused *event_data, void __unused *data)
+{
+ cpp_define(parse_in, "HAVE_ATTRIBUTE_FORMAT_TEMPLATE");
+}
+
+enum {
+ QUAL_NONE,
+ QUAL_SHORT, /* h */
+ QUAL_BYTE, /* hh, == QUAL_SHORT+1*/
+ QUAL_LONG, /* l */
+ QUAL_LONGLONG, /* ll, == QUAL_LONG+1 */
+ QUAL_MAX, /* j */
+ QUAL_SIZE, /* z */
+ QUAL_PTRDIFF, /* t */
+};
+#define FW_P_NONE (-1)
+#define FW_P_ARG (-2)
+
+struct spec_iter {
+ const char *spec;
+ int len;
+
+ int field_width;
+ int precision;
+ int qual;
+ char type;
+
+};
+
+static inline void
+spec_iter_init(struct spec_iter *spec, const char *s)
+{
+ spec->spec = s;
+ spec->len = 0;
+}
+
+static void get_fw_p(const char **c, int *dst, int prec)
+{
+ *dst = FW_P_NONE;
+ if (prec) {
+ if (**c != '.')
+ return;
+ ++(*c);
+ *dst = 0;
+ }
+ if (**c == '*') {
+ ++(*c);
+ *dst = FW_P_ARG;
+ return;
+ }
+ if (!ISDIGIT(**c))
+ return;
+ *dst = **c - '0';
+ ++(*c);
+ while (ISDIGIT(**c)) {
+ /* should do if (*dst > 10000) warn("insane explicit field width/precision"); */
+ *dst *= 10;
+ *dst += **c - '0';
+ ++(*c);
+ }
+}
+
+static int spec_next(struct spec_iter *spec)
+{
+ const char *c;
+ int slen = 0;
+
+ spec->spec += spec->len;
+again:
+ c = strchrnul(spec->spec, '%');
+ slen += c - spec->spec;
+ if (!c[0]) {
+ spec->spec = NULL;
+ return slen;
+ }
+ assert(c[0] == '%');
+ if (c[1] == '%') {
+ slen++;
+ spec->spec = c+2;
+ goto again;
+ }
+
+ spec->spec = c;
+ ++c;
+ /* skip flags */
+ while (strchr("#0- +", *c))
+ ++c;
+
+ get_fw_p(&c, &spec->field_width, 0);
+ get_fw_p(&c, &spec->precision, 1);
+
+ spec->qual = QUAL_NONE;
+ switch (*c) {
+ case 'h': spec->qual = QUAL_SHORT; break;
+ case 'l': spec->qual = QUAL_LONG; break;
+#if 0 /* The kernel doesn't grok the j qualifier */
+ case 'j': spec->qual = QUAL_MAX; break;
+#endif
+ case 'z': spec->qual = QUAL_SIZE; break;
+ case 't': spec->qual = QUAL_PTRDIFF; break;
+ }
+ if (spec->qual) {
+ ++c;
+ if (*c == c[-1] && (*c == 'h' || *c == 'l')) {
+ spec->qual++;
+ ++c;
+ }
+ }
+
+ spec->type = *c++;
+ spec->len = c - spec->spec;
+
+ switch (spec->type) {
+ case 'd':
+ case 'u':
+ case 'x':
+ case 'X':
+ case 'o':
+ case 'c':
+ case 's':
+ break;
+ /*
+ * Disallowing %p is the safe and sane thing to do, given the
+ * different interpretations based on following alphanumerics.
+ */
+ case 'p':
+ /*
+ * Why are there two with the same meaning? %i is the lesser
+ * used one and should just die.
+ */
+ case 'i':
+ error("unsupported specifier '%.*s' in template or initializer", spec->len, spec->spec);
+ default:
+ error("invalid specifier '%.*s' in template or initializer", spec->len, spec->spec);
+ }
+
+ return slen;
+}
+
+static bool specs_compatible(const struct spec_iter *a, const struct spec_iter *b)
+{
+ if (a->qual != b->qual)
+ return false;
+ if (a->field_width != b->field_width)
+ return false;
+ if (a->precision != b->precision)
+ return false;
+ if (a->type != b->type)
+ return false;
+ return true;
+}
+
+static void check_literal(tree attr, const char *str)
+{
+ const char *pattern = TREE_STRING_POINTER(TREE_VALUE(TREE_VALUE(attr)));
+ struct spec_iter sp, ss;
+ int i;
+
+ spec_iter_init(&sp, pattern);
+ spec_iter_init(&ss, str);
+
+ /*
+ * Walk over the pattern and string in lockstep.
+ */
+ for (i = 1; ; ++i) {
+ spec_next(&sp);
+ spec_next(&ss);
+ /*
+ * It's ok for the template to have more specifiers
+ * than the actual string. But issue warning(s)
+ * anyway, conditional on -Wformat-extra-args.
+ */
+ if (ss.spec == NULL) {
+ while (sp.spec != NULL) {
+ warning(OPT_Wformat_extra_args,
+ "format template '%s' contains extra specifier '%.*s' compared to initializer string '%s'",
+ pattern, sp.len, sp.spec, str);
+ spec_next(&sp);
+ }
+ return;
+ }
+ /*
+ * It's absolutely not ok for the actual string to
+ * have more specifiers than the template.
+ */
+ if (!sp.spec) {
+ do {
+ error("initializer string '%s' contains extra format specifier '%.*s' compared to format template '%s'",
+ str, ss.len, ss.spec, pattern);
+ spec_next(&ss);
+ } while (ss.spec != NULL);
+ return;
+ }
+ if (!specs_compatible(&ss, &sp)) {
+ error("specifier %d in '%s' ('%.*s') incompatible with format template '%s'",
+ i, str, ss.len, ss.spec, pattern);
+ }
+ }
+}
+
+static void check_declaration(void *event_data, void *data __unused)
+{
+ tree decl = (tree)event_data;
+ tree ini, type;
+ unsigned idx;
+ tree field, value;
+
+ switch (TREE_CODE(decl)) {
+ case VAR_DECL:
+ break;
+ default:
+ return;
+ }
+
+ ini = DECL_INITIAL(decl);
+ if (!ini)
+ return;
+
+ type = TREE_TYPE(decl);
+ if (TREE_CODE(type) != RECORD_TYPE)
+ return;
+
+ if (TREE_CODE(ini) != CONSTRUCTOR) {
+ // warning(0, "weird, initializer is not a CONSTRUCTOR, tree_code=%d", TREE_CODE(ini));
+ return;
+ }
+
+ FOR_EACH_CONSTRUCTOR_ELT(CONSTRUCTOR_ELTS(ini), idx, field, value) {
+ tree attr;
+
+ /* if (TREE_CODE(value) != STRING_CST) */
+ /* continue; */
+
+ attr = lookup_attribute("format_template", DECL_ATTRIBUTES(field));
+ if (!attr)
+ continue;
+
+ /*
+ * Hm, apparently the string literal is hidden behind
+ * a NOP_EXPR and a ADDR_EXPR.
+ */
+ STRIP_NOPS(value);
+ if (TREE_CODE(value) == ADDR_EXPR)
+ value = TREE_OPERAND(value, 0);
+
+ if (TREE_CODE(value) != STRING_CST)
+ continue;
+
+ check_literal(attr, TREE_STRING_POINTER(value));
+
+ }
+
+}
+
+int plugin_init(struct plugin_name_args *plugin_info, struct plugin_gcc_version *version)
+{
+ const char *const plugin_name = plugin_info->base_name;
+
+ if (!plugin_default_version_check(version, &gcc_version)) {
+ error(G_("incompatible gcc/plugin versions"));
+ return 1;
+ }
+
+ register_callback(plugin_name, PLUGIN_INFO, NULL, &format_template_plugin_info);
+ register_callback(plugin_name, PLUGIN_START_UNIT, &define_feature_macro, NULL);
+ register_callback(plugin_name, PLUGIN_FINISH_DECL, &check_declaration, NULL);
+ register_callback(plugin_name, PLUGIN_ATTRIBUTES, ®ister_attributes, NULL);
+
+ return 0;
+}
Most format strings in the kernel are string literals, so the compiler and other static analyzers can do type checking. This plugin covers a few of the remaining cases, by introducing a format_template attribute. Consider struct usb_class_driver. Its member 'name' is used as a format string in usb_register_dev(), and that use obviously expects that the format string contains a single "%d" (or maybe %u). So the idea is that we simply attach __format_template("%d") to the declaration of the name member of struct usb_class_driver. We can then check that any static initialization of that member is with a string literal with the same set of specifiers. This is what the plugin currently does. There are a number of things it also could/should do: - Check run-time assignments of literals to a __format_template annotated struct member. - Use this at call sites of *printf functions, where the format string argument is a __format_template annotated struct member - that is, use the template in lieu of a string literal. For now, this is not implemented - mostly because I'm lazy and don't want to write my own format checking code (again), and I suppose there should be an internal gcc function I could (ab)use to say "check this variadic function call, but use _this_ as format string". - It should also be possible to attach it to function parameters - e.g. the namefmt parameter to kthread_create_on_cpu should have __format_template("%u"); its only caller is __smpboot_create_thread which passes struct smp_hotplug_thread->thread_comm, which in turn should also have that attribute. Combined with the above, this would check the entire chain from initializer to sprintf(). While strictly speaking "%*s" and "%d %s" both expect (int, const char*), they're morally distinct, so I don't want to treat them as equivalent. If this is ever a problem, I think one should let the attribute take an optional flag argument, which could then control how strict or lax the checking should be. I'm not sure how much this affects compilation time, but there's not really any point in building with this all the time - it should suffice that the various build bots do it once in a while. Even without the plugin, the __format_template(...) in headers serve as concise documentation. Applying this attribute to smp_hotplug_thread::thread_comm and modifying kernel/watchdog.c slightly, an example of the error message produced for violations is: kernel/watchdog.c:528:1: error: initializer string 'watchdog/%u %d' contains extra format specifier '%d' compared to format template 'foobar/%u' Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk> --- arch/Kconfig | 18 ++ include/linux/compiler.h | 6 + scripts/Makefile.gcc-plugins | 2 + scripts/gcc-plugins/format_template_plugin.c | 331 +++++++++++++++++++++++++++ 4 files changed, 357 insertions(+) create mode 100644 scripts/gcc-plugins/format_template_plugin.c