diff mbox

[-tip,v5,2/7] kprobes: checks probe address is instruction boudary on x86

Message ID 20090509004847.5505.37957.stgit@localhost.localdomain (mailing list archive)
State New, archived
Headers show

Commit Message

Masami Hiramatsu May 9, 2009, 12:48 a.m. UTC
Ensure safeness of inserting kprobes by checking whether the specified
address is at the first byte of a instruction on x86.
This is done by decoding probed function from its head to the probe point.

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Jim Keniston <jkenisto@us.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
---

 arch/x86/kernel/kprobes.c |   54 +++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 54 insertions(+), 0 deletions(-)

Comments

Steven Rostedt May 11, 2009, 2:20 p.m. UTC | #1
On Fri, 8 May 2009, Masami Hiramatsu wrote:

> Ensure safeness of inserting kprobes by checking whether the specified
> address is at the first byte of a instruction on x86.
> This is done by decoding probed function from its head to the probe point.
> 
> Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
> Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
> Cc: Jim Keniston <jkenisto@us.ibm.com>
> Cc: Ingo Molnar <mingo@elte.hu>
> ---
> 
>  arch/x86/kernel/kprobes.c |   54 +++++++++++++++++++++++++++++++++++++++++++++
>  1 files changed, 54 insertions(+), 0 deletions(-)
> 
> diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
> index 7b5169d..3d5e85f 100644
> --- a/arch/x86/kernel/kprobes.c
> +++ b/arch/x86/kernel/kprobes.c
> @@ -48,12 +48,14 @@
>  #include <linux/preempt.h>
>  #include <linux/module.h>
>  #include <linux/kdebug.h>
> +#include <linux/kallsyms.h>
>  
>  #include <asm/cacheflush.h>
>  #include <asm/desc.h>
>  #include <asm/pgtable.h>
>  #include <asm/uaccess.h>
>  #include <asm/alternative.h>
> +#include <asm/insn.h>
>  
>  void jprobe_return_end(void);
>  
> @@ -244,6 +246,56 @@ retry:
>  	}
>  }
>  
> +/* Recover the probed instruction at addr for further analysis. */
> +static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
> +{
> +	struct kprobe *kp;
> +	kp = get_kprobe((void *)addr);
> +	if (!kp)
> +		return -EINVAL;
> +

I'm just doing a casual scan of the patch set.

> +	/*
> +	 * Don't use p->ainsn.insn, which could be modified -- e.g.,

This comment talks about "p", what's that? It's not used in this function.

> +	 * by fix_riprel().
> +	 */
> +	memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
> +	buf[0] = kp->opcode;

Why is it OK to copy addr to "buf" and then rewrite the first character of 
buf?  Does it have something to do with the above "p"?

I don't mean to be critical here, but I've been doing "Mother Day" 
activities all weekend and for some reason that was also the best time for 
everyone to Cc me on patches. I'm way behind in my email, and it would be 
nice if the comments described why things that "look" wrong are not.


> +	return 0;
> +}
> +
> +/* Dummy buffers for kallsyms_lookup */
> +static char __dummy_buf[KSYM_NAME_LEN];
> +
> +/* Check if paddr is at an instruction boundary */
> +static int __kprobes can_probe(unsigned long paddr)
> +{
> +	int ret;
> +	unsigned long addr, offset = 0;
> +	struct insn insn;
> +	kprobe_opcode_t buf[MAX_INSN_SIZE];
> +
> +	/* Lookup symbol including addr */

The above comment is very close to a "add one to i" for i++ type of 
comment.

> +	if (!kallsyms_lookup(paddr, NULL, &offset, NULL, __dummy_buf))
> +		return 0;
> +
> +	/* Decode instructions */
> +	addr = paddr - offset;
> +	while (addr < paddr) {
> +		insn_init_kernel(&insn, (void *)addr);
> +		insn_get_opcode(&insn);
> +		if (OPCODE1(&insn) == BREAKPOINT_INSTRUCTION) {
> +			ret = recover_probed_instruction(buf, addr);

Oh, the above puts back the original op code. That is why it is OK?

I'd comment that a little bit more. Just so that reviewers have an easier 
idea of what is happening.

> +			if (ret)
> +				return 0;
> +			insn_init_kernel(&insn, buf);

insn_init_kernel? Is that like a text poke or something?

> +		}
> +		insn_get_length(&insn);
> +		addr += insn.length;
> +	}
> +
> +	return (addr == paddr);
> +}
> +
>  /*
>   * Returns non-zero if opcode modifies the interrupt flag.
>   */
> @@ -359,6 +411,8 @@ static void __kprobes arch_copy_kprobe(struct kprobe *p)
>  
>  int __kprobes arch_prepare_kprobe(struct kprobe *p)
>  {
> +	if (!can_probe((unsigned long)p->addr))
> +		return -EILSEQ;
>  	/* insn: must be on special executable page on x86. */
>  	p->ainsn.insn = get_insn_slot();

Oh look, I found the phantom "p"!

-- Steve

>  	if (!p->ainsn.insn)
> 
> 
> -- 
> Masami Hiramatsu
> 
> Software Engineer
> Hitachi Computer Products (America) Inc.
> Software Solutions Division
> 
> e-mail: mhiramat@redhat.com
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Masami Hiramatsu May 11, 2009, 3:01 p.m. UTC | #2
Steven Rostedt wrote:
> On Fri, 8 May 2009, Masami Hiramatsu wrote:
> 
>> Ensure safeness of inserting kprobes by checking whether the specified
>> address is at the first byte of a instruction on x86.
>> This is done by decoding probed function from its head to the probe point.
>>
>> Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
>> Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
>> Cc: Jim Keniston <jkenisto@us.ibm.com>
>> Cc: Ingo Molnar <mingo@elte.hu>
>> ---
>>
>>  arch/x86/kernel/kprobes.c |   54 +++++++++++++++++++++++++++++++++++++++++++++
>>  1 files changed, 54 insertions(+), 0 deletions(-)
>>
>> diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
>> index 7b5169d..3d5e85f 100644
>> --- a/arch/x86/kernel/kprobes.c
>> +++ b/arch/x86/kernel/kprobes.c
>> @@ -48,12 +48,14 @@
>>  #include <linux/preempt.h>
>>  #include <linux/module.h>
>>  #include <linux/kdebug.h>
>> +#include <linux/kallsyms.h>
>>  
>>  #include <asm/cacheflush.h>
>>  #include <asm/desc.h>
>>  #include <asm/pgtable.h>
>>  #include <asm/uaccess.h>
>>  #include <asm/alternative.h>
>> +#include <asm/insn.h>
>>  
>>  void jprobe_return_end(void);
>>  
>> @@ -244,6 +246,56 @@ retry:
>>  	}
>>  }
>>  
>> +/* Recover the probed instruction at addr for further analysis. */
>> +static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
>> +{
>> +	struct kprobe *kp;
>> +	kp = get_kprobe((void *)addr);
>> +	if (!kp)
>> +		return -EINVAL;
>> +
> 
> I'm just doing a casual scan of the patch set.

Thank you!

> 
>> +	/*
>> +	 * Don't use p->ainsn.insn, which could be modified -- e.g.,
> 
> This comment talks about "p", what's that? It's not used in this function.

oops, this should be kp.

> 
>> +	 * by fix_riprel().
>> +	 */
>> +	memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
>> +	buf[0] = kp->opcode;
> 
> Why is it OK to copy addr to "buf" and then rewrite the first character of 
> buf?  Does it have something to do with the above "p"?

Yes, each kprobe copied probed instruction to kp->ainsn.insn,
which is an executable buffer for single stepping.
So, basically, kp->ainsn.insn has an original instruction.
However, RIP-relative instruction can not do single-stepping
at different place, fix_riprel() tweaks the displacement of
that instruction. In that case, we can't recover the instruction
from the kp->ainsn.insn.

On the other hand, kp->opcode has a copy of the first byte of
the probed instruction, which is overwritten by int3. And
the instruction at kp->addr is not modified by kprobes except
for the first byte, we can recover the original instruction
from it and kp->opcode.

> I don't mean to be critical here, but I've been doing "Mother Day" 
> activities all weekend and for some reason that was also the best time for 
> everyone to Cc me on patches. I'm way behind in my email, and it would be 
> nice if the comments described why things that "look" wrong are not.
> 
> 
>> +	return 0;
>> +}
>> +
>> +/* Dummy buffers for kallsyms_lookup */
>> +static char __dummy_buf[KSYM_NAME_LEN];
>> +
>> +/* Check if paddr is at an instruction boundary */
>> +static int __kprobes can_probe(unsigned long paddr)
>> +{
>> +	int ret;
>> +	unsigned long addr, offset = 0;
>> +	struct insn insn;
>> +	kprobe_opcode_t buf[MAX_INSN_SIZE];
>> +
>> +	/* Lookup symbol including addr */
> 
> The above comment is very close to a "add one to i" for i++ type of 
> comment.

Agreed.

> 
>> +	if (!kallsyms_lookup(paddr, NULL, &offset, NULL, __dummy_buf))
>> +		return 0;
>> +
>> +	/* Decode instructions */
>> +	addr = paddr - offset;
>> +	while (addr < paddr) {
>> +		insn_init_kernel(&insn, (void *)addr);
>> +		insn_get_opcode(&insn);
>> +		if (OPCODE1(&insn) == BREAKPOINT_INSTRUCTION) {
>> +			ret = recover_probed_instruction(buf, addr);
> 
> Oh, the above puts back the original op code. That is why it is OK?

Oops, no. I have to use get_kprobe() instead. Thanks!

> 
> I'd comment that a little bit more. Just so that reviewers have an easier 
> idea of what is happening.
> 
>> +			if (ret)
>> +				return 0;
>> +			insn_init_kernel(&insn, buf);
> 
> insn_init_kernel? Is that like a text poke or something?

it's a wrapper of insn_init() which initialize struct insn.

Thank you,

>> +		}
>> +		insn_get_length(&insn);
>> +		addr += insn.length;
>> +	}
>> +
>> +	return (addr == paddr);
>> +}
>> +
>>  /*
>>   * Returns non-zero if opcode modifies the interrupt flag.
>>   */
>> @@ -359,6 +411,8 @@ static void __kprobes arch_copy_kprobe(struct kprobe *p)
>>  
>>  int __kprobes arch_prepare_kprobe(struct kprobe *p)
>>  {
>> +	if (!can_probe((unsigned long)p->addr))
>> +		return -EILSEQ;
>>  	/* insn: must be on special executable page on x86. */
>>  	p->ainsn.insn = get_insn_slot();
> 
> Oh look, I found the phantom "p"!
> 
> -- Steve
> 
>>  	if (!p->ainsn.insn)
>>
>>
>> -- 
>> Masami Hiramatsu
>>
>> Software Engineer
>> Hitachi Computer Products (America) Inc.
>> Software Solutions Division
>>
>> e-mail: mhiramat@redhat.com
>>
Masami Hiramatsu May 11, 2009, 3:14 p.m. UTC | #3
Masami Hiramatsu wrote:
>>> +	if (!kallsyms_lookup(paddr, NULL, &offset, NULL, __dummy_buf))
>>> +		return 0;
>>> +
>>> +	/* Decode instructions */
>>> +	addr = paddr - offset;
>>> +	while (addr < paddr) {
>>> +		insn_init_kernel(&insn, (void *)addr);
>>> +		insn_get_opcode(&insn);
>>> +		if (OPCODE1(&insn) == BREAKPOINT_INSTRUCTION) {
>>> +			ret = recover_probed_instruction(buf, addr);
>> Oh, the above puts back the original op code. That is why it is OK?
> 
> Oops, no. I have to use get_kprobe() instead. Thanks!

Ah, I forgot another possibility. There might be another subsystem,
like kgdb, will put their break point on the kernel.
In that case, decoder will decode the instruction is a break point
instruction and the first opcode is int3. So, this part is correct.
In the future, we need to add a generic recover_instruction() code
for those text modification subsystems.

Thank you,
Steven Rostedt May 11, 2009, 3:22 p.m. UTC | #4
On Mon, 11 May 2009, Masami Hiramatsu wrote:
> 
> > 
> >> +	 * by fix_riprel().
> >> +	 */
> >> +	memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
> >> +	buf[0] = kp->opcode;
> > 
> > Why is it OK to copy addr to "buf" and then rewrite the first character of 
> > buf?  Does it have something to do with the above "p"?
> 
> Yes, each kprobe copied probed instruction to kp->ainsn.insn,
> which is an executable buffer for single stepping.
> So, basically, kp->ainsn.insn has an original instruction.
> However, RIP-relative instruction can not do single-stepping
> at different place, fix_riprel() tweaks the displacement of
> that instruction. In that case, we can't recover the instruction
> from the kp->ainsn.insn.
> 
> On the other hand, kp->opcode has a copy of the first byte of
> the probed instruction, which is overwritten by int3. And
> the instruction at kp->addr is not modified by kprobes except
> for the first byte, we can recover the original instruction
> from it and kp->opcode.

For code that is awkward, complex or non-trivial, don't be afraid to put 
in a paragraph explaining the code. The above explanation should be a 
comment in the code. Otherwise people like me would just look at it and 
say "huh?".

Note, I'm a bit cranky this morning, so I hope I don't offend anyone.

-- Steve
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Masami Hiramatsu May 11, 2009, 6:21 p.m. UTC | #5
Steven Rostedt wrote:
> On Mon, 11 May 2009, Masami Hiramatsu wrote:
>>>> +	 * by fix_riprel().
>>>> +	 */
>>>> +	memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
>>>> +	buf[0] = kp->opcode;
>>> Why is it OK to copy addr to "buf" and then rewrite the first character of 
>>> buf?  Does it have something to do with the above "p"?
>> Yes, each kprobe copied probed instruction to kp->ainsn.insn,
>> which is an executable buffer for single stepping.
>> So, basically, kp->ainsn.insn has an original instruction.
>> However, RIP-relative instruction can not do single-stepping
>> at different place, fix_riprel() tweaks the displacement of
>> that instruction. In that case, we can't recover the instruction
>> from the kp->ainsn.insn.
>>
>> On the other hand, kp->opcode has a copy of the first byte of
>> the probed instruction, which is overwritten by int3. And
>> the instruction at kp->addr is not modified by kprobes except
>> for the first byte, we can recover the original instruction
>> from it and kp->opcode.
> 
> For code that is awkward, complex or non-trivial, don't be afraid to put 
> in a paragraph explaining the code. The above explanation should be a 
> comment in the code. Otherwise people like me would just look at it and 
> say "huh?".
> 
> Note, I'm a bit cranky this morning, so I hope I don't offend anyone.

No, that's very helpful review for me. Thanks :-)
diff mbox

Patch

diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 7b5169d..3d5e85f 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -48,12 +48,14 @@ 
 #include <linux/preempt.h>
 #include <linux/module.h>
 #include <linux/kdebug.h>
+#include <linux/kallsyms.h>
 
 #include <asm/cacheflush.h>
 #include <asm/desc.h>
 #include <asm/pgtable.h>
 #include <asm/uaccess.h>
 #include <asm/alternative.h>
+#include <asm/insn.h>
 
 void jprobe_return_end(void);
 
@@ -244,6 +246,56 @@  retry:
 	}
 }
 
+/* Recover the probed instruction at addr for further analysis. */
+static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
+{
+	struct kprobe *kp;
+	kp = get_kprobe((void *)addr);
+	if (!kp)
+		return -EINVAL;
+
+	/*
+	 * Don't use p->ainsn.insn, which could be modified -- e.g.,
+	 * by fix_riprel().
+	 */
+	memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
+	buf[0] = kp->opcode;
+	return 0;
+}
+
+/* Dummy buffers for kallsyms_lookup */
+static char __dummy_buf[KSYM_NAME_LEN];
+
+/* Check if paddr is at an instruction boundary */
+static int __kprobes can_probe(unsigned long paddr)
+{
+	int ret;
+	unsigned long addr, offset = 0;
+	struct insn insn;
+	kprobe_opcode_t buf[MAX_INSN_SIZE];
+
+	/* Lookup symbol including addr */
+	if (!kallsyms_lookup(paddr, NULL, &offset, NULL, __dummy_buf))
+		return 0;
+
+	/* Decode instructions */
+	addr = paddr - offset;
+	while (addr < paddr) {
+		insn_init_kernel(&insn, (void *)addr);
+		insn_get_opcode(&insn);
+		if (OPCODE1(&insn) == BREAKPOINT_INSTRUCTION) {
+			ret = recover_probed_instruction(buf, addr);
+			if (ret)
+				return 0;
+			insn_init_kernel(&insn, buf);
+		}
+		insn_get_length(&insn);
+		addr += insn.length;
+	}
+
+	return (addr == paddr);
+}
+
 /*
  * Returns non-zero if opcode modifies the interrupt flag.
  */
@@ -359,6 +411,8 @@  static void __kprobes arch_copy_kprobe(struct kprobe *p)
 
 int __kprobes arch_prepare_kprobe(struct kprobe *p)
 {
+	if (!can_probe((unsigned long)p->addr))
+		return -EILSEQ;
 	/* insn: must be on special executable page on x86. */
 	p->ainsn.insn = get_insn_slot();
 	if (!p->ainsn.insn)