diff mbox series

[RFC,16/21] objtool: Add support for CONFIG_CFI_CLANG

Message ID 20220429203644.2868448-17-samitolvanen@google.com (mailing list archive)
State New, archived
Headers show
Series KCFI support | expand

Commit Message

Sami Tolvanen April 29, 2022, 8:36 p.m. UTC
With -fsanitize=kcfi, the compiler injects a type identifier before
each function. Teach objtool to recognize the identifier.

Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
---
 scripts/Makefile.build                    |   3 +-
 scripts/link-vmlinux.sh                   |   3 +
 tools/objtool/arch/x86/include/arch/elf.h |   2 +
 tools/objtool/builtin-check.c             |   3 +-
 tools/objtool/check.c                     | 128 ++++++++++++++++++++--
 tools/objtool/elf.c                       |  13 +++
 tools/objtool/include/objtool/arch.h      |   1 +
 tools/objtool/include/objtool/builtin.h   |   2 +-
 tools/objtool/include/objtool/elf.h       |   2 +
 9 files changed, 145 insertions(+), 12 deletions(-)

Comments

Peter Zijlstra April 29, 2022, 11:30 p.m. UTC | #1
On Fri, Apr 29, 2022 at 01:36:39PM -0700, Sami Tolvanen wrote:

> +static void *kcfi_alloc_hash(unsigned long size)
> +{
> +	kcfi_bits = max(10, ilog2(size));
> +	kcfi_hash = mmap(NULL, sizeof(struct hlist_head) << kcfi_bits,
> +			PROT_READ|PROT_WRITE,
> +			MAP_PRIVATE|MAP_ANON, -1, 0);
> +	if (kcfi_hash == (void *)-1L) {
> +		WARN("mmap fail kcfi_hash");
> +		kcfi_hash = NULL;
> +	}  else if (stats) {
> +		printf("kcfi_bits: %d\n", kcfi_bits);
> +	}
> +
> +	return kcfi_hash;
> +}
> +
> +static void add_kcfi_type(struct kcfi_type *type)
> +{
> +	hlist_add_head(&type->hash,
> +		&kcfi_hash[hash_min(
> +			sec_offset_hash(type->sec, type->offset),
> +			kcfi_bits)]);

:se cino=(0:0

Also, I'm thinking you can unwrap some lines at least.

> +}
> +
> +static bool is_kcfi_typeid(struct elf *elf, struct instruction *insn)
> +{
> +	struct hlist_head *head;
> +	struct kcfi_type *type;
> +	struct reloc *reloc;
> +
> +	if (!kcfi)
> +		return false;
> +
> +	/* Compiler-generated annotation in .kcfi_types. */
> +	head = &kcfi_hash[hash_min(sec_offset_hash(insn->sec, insn->offset), kcfi_bits)];
> +
> +	hlist_for_each_entry(type, head, hash)
> +		if (type->sec == insn->sec && type->offset == insn->offset)
> +			return true;

missing { }

> +
> +	/* Manual annotation (in assembly code). */
> +	reloc = find_reloc_by_dest(elf, insn->sec, insn->offset);
> +
> +	if (reloc && !strncmp(reloc->sym->name, "__kcfi_typeid_", 14))
> +		return true;
> +
> +	return false;
> +}
> +
>  /*
>   * This checks to see if the given function is a "noreturn" function.
>   *
> @@ -388,13 +487,18 @@ static int decode_instructions(struct objtool_file *file)
>  			insn->sec = sec;
>  			insn->offset = offset;
>  
> -			ret = arch_decode_instruction(file, sec, offset,
> -						      sec->sh.sh_size - offset,
> -						      &insn->len, &insn->type,
> -						      &insn->immediate,
> -						      &insn->stack_ops);
> -			if (ret)
> -				goto err;
> +			if (is_kcfi_typeid(file->elf, insn)) {
> +				insn->type = INSN_KCFI_TYPEID;
> +				insn->len = KCFI_TYPEID_LEN;

Urgh, what does this do for decode speed? This is a hash-lookup for
every single instruction.

Is that kcfi location array sorted by the compiler? Because then you can
keep a running iterator and replace the whole lookup with a simple
equality comparison.

> +			} else {
> +				ret = arch_decode_instruction(file, sec, offset,
> +							      sec->sh.sh_size - offset,
> +							      &insn->len, &insn->type,
> +							      &insn->immediate,
> +							      &insn->stack_ops);
> +				if (ret)
> +					goto err;
> +			}
>  
>  			/*
>  			 * By default, "ud2" is a dead end unless otherwise
Sami Tolvanen April 30, 2022, 1 a.m. UTC | #2
On Fri, Apr 29, 2022 at 4:30 PM Peter Zijlstra <peterz@infradead.org> wrote:
> Urgh, what does this do for decode speed? This is a hash-lookup for
> every single instruction.

Two actually, since .kcfi_traps only contains compiler-emitted
locations and we also have to check for manual type annotations. I
haven't measured performance yet, but I also didn't notice a
significant impact here.

> Is that kcfi location array sorted by the compiler? Because then you can
> keep a running iterator and replace the whole lookup with a simple
> equality comparison.

The compiler generates a separate .kcfi_types section for each text
section and the entries are emitted in order, so this should be
doable.

Sami
diff mbox series

Patch

diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 9717e6f6fb31..c850ac420b60 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -235,7 +235,8 @@  objtool_args =								\
 	$(if $(CONFIG_RETPOLINE), --retpoline)				\
 	$(if $(CONFIG_X86_SMAP), --uaccess)				\
 	$(if $(CONFIG_FTRACE_MCOUNT_USE_OBJTOOL), --mcount)		\
-	$(if $(CONFIG_SLS), --sls)
+	$(if $(CONFIG_SLS), --sls)					\
+	$(if $(CONFIG_CFI_CLANG), --kcfi)
 
 cmd_objtool = $(if $(objtool-enabled), ; $(objtool) $(objtool_args) $@)
 cmd_gen_objtooldep = $(if $(objtool-enabled), { echo ; echo '$@: $$(wildcard $(objtool))' ; } >> $(dot-target).cmd)
diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
index 20f44504a644..d171f8507db2 100755
--- a/scripts/link-vmlinux.sh
+++ b/scripts/link-vmlinux.sh
@@ -152,6 +152,9 @@  objtool_link()
 		if is_enabled CONFIG_SLS; then
 			objtoolopt="${objtoolopt} --sls"
 		fi
+		if is_enabled CONFIG_CFI_CLANG; then
+			objtoolopt="${objtoolopt} --kcfi"
+		fi
 		info OBJTOOL ${1}
 		tools/objtool/objtool ${objtoolcmd} ${objtoolopt} ${1}
 	fi
diff --git a/tools/objtool/arch/x86/include/arch/elf.h b/tools/objtool/arch/x86/include/arch/elf.h
index 69cc4264b28a..8833d989eec7 100644
--- a/tools/objtool/arch/x86/include/arch/elf.h
+++ b/tools/objtool/arch/x86/include/arch/elf.h
@@ -3,4 +3,6 @@ 
 
 #define R_NONE R_X86_64_NONE
 
+#define KCFI_TYPEID_LEN	6
+
 #endif /* _OBJTOOL_ARCH_ELF */
diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index fc6975ab8b06..8a662dcc21be 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -21,7 +21,7 @@ 
 
 bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats,
      lto, vmlinux, mcount, noinstr, backup, sls, dryrun,
-     ibt;
+     ibt, kcfi;
 
 static const char * const check_usage[] = {
 	"objtool check [<options>] file.o",
@@ -49,6 +49,7 @@  const struct option check_options[] = {
 	OPT_BOOLEAN('S', "sls", &sls, "validate straight-line-speculation"),
 	OPT_BOOLEAN(0, "dry-run", &dryrun, "don't write the modifications"),
 	OPT_BOOLEAN(0, "ibt", &ibt, "validate ENDBR placement"),
+	OPT_BOOLEAN('k', "kcfi", &kcfi, "detect control-flow integrity type identifiers"),
 	OPT_END(),
 };
 
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index bd0c2c828940..e6bee2f2996a 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -27,6 +27,12 @@  struct alternative {
 	bool skip_orig;
 };
 
+struct kcfi_type {
+	struct section *sec;
+	unsigned long offset;
+	struct hlist_node hash;
+};
+
 static unsigned long nr_cfi, nr_cfi_reused, nr_cfi_cache;
 
 static struct cfi_init_state initial_func_cfi;
@@ -143,6 +149,99 @@  static bool is_sibling_call(struct instruction *insn)
 	return (is_static_jump(insn) && insn->call_dest);
 }
 
+static int kcfi_bits;
+static struct hlist_head *kcfi_hash;
+
+static void *kcfi_alloc_hash(unsigned long size)
+{
+	kcfi_bits = max(10, ilog2(size));
+	kcfi_hash = mmap(NULL, sizeof(struct hlist_head) << kcfi_bits,
+			PROT_READ|PROT_WRITE,
+			MAP_PRIVATE|MAP_ANON, -1, 0);
+	if (kcfi_hash == (void *)-1L) {
+		WARN("mmap fail kcfi_hash");
+		kcfi_hash = NULL;
+	}  else if (stats) {
+		printf("kcfi_bits: %d\n", kcfi_bits);
+	}
+
+	return kcfi_hash;
+}
+
+static void add_kcfi_type(struct kcfi_type *type)
+{
+	hlist_add_head(&type->hash,
+		&kcfi_hash[hash_min(
+			sec_offset_hash(type->sec, type->offset),
+			kcfi_bits)]);
+}
+
+static bool add_kcfi_types(struct section *sec)
+{
+	struct reloc *reloc;
+
+	list_for_each_entry(reloc, &sec->reloc_list, list) {
+		struct kcfi_type *type;
+
+		if (reloc->sym->type != STT_SECTION) {
+			WARN("unexpected relocation symbol type in %s", sec->name);
+			return false;
+		}
+
+		type = malloc(sizeof(*type));
+		if (!type) {
+			perror("malloc");
+			return false;
+		}
+
+		type->sec = reloc->sym->sec;
+		type->offset = reloc->addend;
+
+		add_kcfi_type(type);
+	}
+
+	return true;
+}
+
+static int read_kcfi_types(struct objtool_file *file)
+{
+	if (!kcfi)
+		return 0;
+
+	if (!kcfi_alloc_hash(file->elf->text_size / 16))
+		return -1;
+
+	if (!for_each_section_by_name(file->elf, ".rela.kcfi_types", add_kcfi_types))
+		return -1;
+
+	return 0;
+}
+
+static bool is_kcfi_typeid(struct elf *elf, struct instruction *insn)
+{
+	struct hlist_head *head;
+	struct kcfi_type *type;
+	struct reloc *reloc;
+
+	if (!kcfi)
+		return false;
+
+	/* Compiler-generated annotation in .kcfi_types. */
+	head = &kcfi_hash[hash_min(sec_offset_hash(insn->sec, insn->offset), kcfi_bits)];
+
+	hlist_for_each_entry(type, head, hash)
+		if (type->sec == insn->sec && type->offset == insn->offset)
+			return true;
+
+	/* Manual annotation (in assembly code). */
+	reloc = find_reloc_by_dest(elf, insn->sec, insn->offset);
+
+	if (reloc && !strncmp(reloc->sym->name, "__kcfi_typeid_", 14))
+		return true;
+
+	return false;
+}
+
 /*
  * This checks to see if the given function is a "noreturn" function.
  *
@@ -388,13 +487,18 @@  static int decode_instructions(struct objtool_file *file)
 			insn->sec = sec;
 			insn->offset = offset;
 
-			ret = arch_decode_instruction(file, sec, offset,
-						      sec->sh.sh_size - offset,
-						      &insn->len, &insn->type,
-						      &insn->immediate,
-						      &insn->stack_ops);
-			if (ret)
-				goto err;
+			if (is_kcfi_typeid(file->elf, insn)) {
+				insn->type = INSN_KCFI_TYPEID;
+				insn->len = KCFI_TYPEID_LEN;
+			} else {
+				ret = arch_decode_instruction(file, sec, offset,
+							      sec->sh.sh_size - offset,
+							      &insn->len, &insn->type,
+							      &insn->immediate,
+							      &insn->stack_ops);
+				if (ret)
+					goto err;
+			}
 
 			/*
 			 * By default, "ud2" is a dead end unless otherwise
@@ -420,7 +524,8 @@  static int decode_instructions(struct objtool_file *file)
 			}
 
 			sym_for_each_insn(file, func, insn) {
-				insn->func = func;
+				if (insn->type != INSN_KCFI_TYPEID)
+					insn->func = func;
 				if (insn->type == INSN_ENDBR && list_empty(&insn->call_node)) {
 					if (insn->offset == insn->func->offset) {
 						list_add_tail(&insn->call_node, &file->endbr_list);
@@ -2219,6 +2324,10 @@  static int decode_sections(struct objtool_file *file)
 	if (ret)
 		return ret;
 
+	ret = read_kcfi_types(file);
+	if (ret)
+		return ret;
+
 	ret = decode_instructions(file);
 	if (ret)
 		return ret;
@@ -3595,7 +3704,8 @@  static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio
 	int i;
 	struct instruction *prev_insn;
 
-	if (insn->ignore || insn->type == INSN_NOP || insn->type == INSN_TRAP)
+	if (insn->ignore || insn->type == INSN_NOP || insn->type == INSN_TRAP ||
+			insn->type == INSN_KCFI_TYPEID)
 		return true;
 
 	/*
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index d7b99a737496..c4e277d41fd2 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -120,6 +120,19 @@  struct section *find_section_by_name(const struct elf *elf, const char *name)
 	return NULL;
 }
 
+bool for_each_section_by_name(const struct elf *elf, const char *name,
+			      bool (*callback)(struct section *))
+{
+	struct section *sec;
+
+	elf_hash_for_each_possible(section_name, sec, name_hash, str_hash(name)) {
+		if (!strcmp(sec->name, name) && !callback(sec))
+			return false;
+	}
+
+	return true;
+}
+
 static struct section *find_section_by_index(struct elf *elf,
 					     unsigned int idx)
 {
diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h
index 9b19cc304195..3db5951e7aa9 100644
--- a/tools/objtool/include/objtool/arch.h
+++ b/tools/objtool/include/objtool/arch.h
@@ -28,6 +28,7 @@  enum insn_type {
 	INSN_CLD,
 	INSN_TRAP,
 	INSN_ENDBR,
+	INSN_KCFI_TYPEID,
 	INSN_OTHER,
 };
 
diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h
index c39dbfaef6dc..68409070bca5 100644
--- a/tools/objtool/include/objtool/builtin.h
+++ b/tools/objtool/include/objtool/builtin.h
@@ -10,7 +10,7 @@ 
 extern const struct option check_options[];
 extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats,
 	    lto, vmlinux, mcount, noinstr, backup, sls, dryrun,
-	    ibt;
+	    ibt, kcfi;
 
 extern int cmd_parse_options(int argc, const char **argv, const char * const usage[]);
 
diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h
index 22ba7e2b816e..7fd3462ce32a 100644
--- a/tools/objtool/include/objtool/elf.h
+++ b/tools/objtool/include/objtool/elf.h
@@ -148,6 +148,8 @@  int elf_write(struct elf *elf);
 void elf_close(struct elf *elf);
 
 struct section *find_section_by_name(const struct elf *elf, const char *name);
+bool for_each_section_by_name(const struct elf *elf, const char *name,
+			      bool (*callback)(struct section *));
 struct symbol *find_func_by_offset(struct section *sec, unsigned long offset);
 struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset);
 struct symbol *find_symbol_by_name(const struct elf *elf, const char *name);