diff mbox

[-tip,v14,03/12] kprobes: checks probe address is instruction boudary on x86

Message ID 20090813203428.31965.21939.stgit@localhost.localdomain (mailing list archive)
State New, archived
Headers show

Commit Message

Masami Hiramatsu Aug. 13, 2009, 8:34 p.m. UTC
Ensure safeness of inserting kprobes by checking whether the specified
address is at the first byte of a instruction on x86.
This is done by decoding probed function from its head to the probe point.

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Acked-by: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Frank Ch. Eigler <fche@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Jim Keniston <jkenisto@us.ibm.com>
Cc: K.Prasad <prasad@linux.vnet.ibm.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Przemysław Pawełczyk <przemyslaw@pawelczyk.it>
Cc: Roland McGrath <roland@redhat.com>
Cc: Sam Ravnborg <sam@ravnborg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tom Zanussi <tzanussi@gmail.com>
Cc: Vegard Nossum <vegard.nossum@gmail.com>
---

 arch/x86/kernel/kprobes.c |   69 +++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 69 insertions(+), 0 deletions(-)

Comments

Frederic Weisbecker Aug. 18, 2009, 11:03 p.m. UTC | #1
On Thu, Aug 13, 2009 at 04:34:28PM -0400, Masami Hiramatsu wrote:
> Ensure safeness of inserting kprobes by checking whether the specified
> address is at the first byte of a instruction on x86.
> This is done by decoding probed function from its head to the probe point.
> 
> Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
> Acked-by: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
> Cc: Avi Kivity <avi@redhat.com>
> Cc: Andi Kleen <ak@linux.intel.com>
> Cc: Christoph Hellwig <hch@infradead.org>
> Cc: Frank Ch. Eigler <fche@redhat.com>
> Cc: Frederic Weisbecker <fweisbec@gmail.com>
> Cc: H. Peter Anvin <hpa@zytor.com>
> Cc: Ingo Molnar <mingo@elte.hu>
> Cc: Jason Baron <jbaron@redhat.com>
> Cc: Jim Keniston <jkenisto@us.ibm.com>
> Cc: K.Prasad <prasad@linux.vnet.ibm.com>
> Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
> Cc: Li Zefan <lizf@cn.fujitsu.com>
> Cc: Przemysław Pawełczyk <przemyslaw@pawelczyk.it>
> Cc: Roland McGrath <roland@redhat.com>
> Cc: Sam Ravnborg <sam@ravnborg.org>
> Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
> Cc: Steven Rostedt <rostedt@goodmis.org>
> Cc: Tom Zanussi <tzanussi@gmail.com>
> Cc: Vegard Nossum <vegard.nossum@gmail.com>
> ---
> 
>  arch/x86/kernel/kprobes.c |   69 +++++++++++++++++++++++++++++++++++++++++++++
>  1 files changed, 69 insertions(+), 0 deletions(-)
> 
> diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
> index b5b1848..80d493f 100644
> --- a/arch/x86/kernel/kprobes.c
> +++ b/arch/x86/kernel/kprobes.c
> @@ -48,6 +48,7 @@
>  #include <linux/preempt.h>
>  #include <linux/module.h>
>  #include <linux/kdebug.h>
> +#include <linux/kallsyms.h>
>  
>  #include <asm/cacheflush.h>
>  #include <asm/desc.h>
> @@ -55,6 +56,7 @@
>  #include <asm/uaccess.h>
>  #include <asm/alternative.h>
>  #include <asm/debugreg.h>
> +#include <asm/insn.h>
>  
>  void jprobe_return_end(void);
>  
> @@ -245,6 +247,71 @@ retry:
>  	}
>  }
>  
> +/* Recover the probed instruction at addr for further analysis. */
> +static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
> +{
> +	struct kprobe *kp;
> +	kp = get_kprobe((void *)addr);
> +	if (!kp)
> +		return -EINVAL;
> +
> +	/*
> +	 *  Basically, kp->ainsn.insn has an original instruction.
> +	 *  However, RIP-relative instruction can not do single-stepping
> +	 *  at different place, fix_riprel() tweaks the displacement of
> +	 *  that instruction. In that case, we can't recover the instruction
> +	 *  from the kp->ainsn.insn.
> +	 *
> +	 *  On the other hand, kp->opcode has a copy of the first byte of
> +	 *  the probed instruction, which is overwritten by int3. And
> +	 *  the instruction at kp->addr is not modified by kprobes except
> +	 *  for the first byte, we can recover the original instruction
> +	 *  from it and kp->opcode.
> +	 */
> +	memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
> +	buf[0] = kp->opcode;
> +	return 0;
> +}
> +
> +/* Dummy buffers for kallsyms_lookup */
> +static char __dummy_buf[KSYM_NAME_LEN];
> +
> +/* Check if paddr is at an instruction boundary */
> +static int __kprobes can_probe(unsigned long paddr)
> +{
> +	int ret;
> +	unsigned long addr, offset = 0;
> +	struct insn insn;
> +	kprobe_opcode_t buf[MAX_INSN_SIZE];
> +
> +	if (!kallsyms_lookup(paddr, NULL, &offset, NULL, __dummy_buf))
> +		return 0;
> +
> +	/* Decode instructions */
> +	addr = paddr - offset;
> +	while (addr < paddr) {
> +		kernel_insn_init(&insn, (void *)addr);
> +		insn_get_opcode(&insn);
> +
> +		/* Check if the instruction has been modified. */
> +		if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) {
> +			ret = recover_probed_instruction(buf, addr);



I'm confused about the reason of this recovering. Is it to remove
kprobes behind the current setting one in the current function?

If such cleanup is needed for whatever reason, I wonder what happens
to the corresponding kprobe structure, why isn't it using the arch_disarm_
helper to patch back?

(Questions that may prove my solid misunderstanding of the kprobes code ;-)

Frederic.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Masami Hiramatsu Aug. 18, 2009, 11:17 p.m. UTC | #2
Frederic Weisbecker wrote:
> On Thu, Aug 13, 2009 at 04:34:28PM -0400, Masami Hiramatsu wrote:
>> Ensure safeness of inserting kprobes by checking whether the specified
>> address is at the first byte of a instruction on x86.
>> This is done by decoding probed function from its head to the probe point.
>>
>> Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
>> Acked-by: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
>> Cc: Avi Kivity <avi@redhat.com>
>> Cc: Andi Kleen <ak@linux.intel.com>
>> Cc: Christoph Hellwig <hch@infradead.org>
>> Cc: Frank Ch. Eigler <fche@redhat.com>
>> Cc: Frederic Weisbecker <fweisbec@gmail.com>
>> Cc: H. Peter Anvin <hpa@zytor.com>
>> Cc: Ingo Molnar <mingo@elte.hu>
>> Cc: Jason Baron <jbaron@redhat.com>
>> Cc: Jim Keniston <jkenisto@us.ibm.com>
>> Cc: K.Prasad <prasad@linux.vnet.ibm.com>
>> Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
>> Cc: Li Zefan <lizf@cn.fujitsu.com>
>> Cc: Przemysław Pawełczyk <przemyslaw@pawelczyk.it>
>> Cc: Roland McGrath <roland@redhat.com>
>> Cc: Sam Ravnborg <sam@ravnborg.org>
>> Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
>> Cc: Steven Rostedt <rostedt@goodmis.org>
>> Cc: Tom Zanussi <tzanussi@gmail.com>
>> Cc: Vegard Nossum <vegard.nossum@gmail.com>
>> ---
>>
>>  arch/x86/kernel/kprobes.c |   69 +++++++++++++++++++++++++++++++++++++++++++++
>>  1 files changed, 69 insertions(+), 0 deletions(-)
>>
>> diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
>> index b5b1848..80d493f 100644
>> --- a/arch/x86/kernel/kprobes.c
>> +++ b/arch/x86/kernel/kprobes.c
>> @@ -48,6 +48,7 @@
>>  #include <linux/preempt.h>
>>  #include <linux/module.h>
>>  #include <linux/kdebug.h>
>> +#include <linux/kallsyms.h>
>>  
>>  #include <asm/cacheflush.h>
>>  #include <asm/desc.h>
>> @@ -55,6 +56,7 @@
>>  #include <asm/uaccess.h>
>>  #include <asm/alternative.h>
>>  #include <asm/debugreg.h>
>> +#include <asm/insn.h>
>>  
>>  void jprobe_return_end(void);
>>  
>> @@ -245,6 +247,71 @@ retry:
>>  	}
>>  }
>>  
>> +/* Recover the probed instruction at addr for further analysis. */
>> +static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
>> +{
>> +	struct kprobe *kp;
>> +	kp = get_kprobe((void *)addr);
>> +	if (!kp)
>> +		return -EINVAL;
>> +
>> +	/*
>> +	 *  Basically, kp->ainsn.insn has an original instruction.
>> +	 *  However, RIP-relative instruction can not do single-stepping
>> +	 *  at different place, fix_riprel() tweaks the displacement of
>> +	 *  that instruction. In that case, we can't recover the instruction
>> +	 *  from the kp->ainsn.insn.
>> +	 *
>> +	 *  On the other hand, kp->opcode has a copy of the first byte of
>> +	 *  the probed instruction, which is overwritten by int3. And
>> +	 *  the instruction at kp->addr is not modified by kprobes except
>> +	 *  for the first byte, we can recover the original instruction
>> +	 *  from it and kp->opcode.
>> +	 */
>> +	memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
>> +	buf[0] = kp->opcode;
>> +	return 0;
>> +}
>> +
>> +/* Dummy buffers for kallsyms_lookup */
>> +static char __dummy_buf[KSYM_NAME_LEN];
>> +
>> +/* Check if paddr is at an instruction boundary */
>> +static int __kprobes can_probe(unsigned long paddr)
>> +{
>> +	int ret;
>> +	unsigned long addr, offset = 0;
>> +	struct insn insn;
>> +	kprobe_opcode_t buf[MAX_INSN_SIZE];
>> +
>> +	if (!kallsyms_lookup(paddr, NULL, &offset, NULL, __dummy_buf))
>> +		return 0;
>> +
>> +	/* Decode instructions */
>> +	addr = paddr - offset;
>> +	while (addr < paddr) {
>> +		kernel_insn_init(&insn, (void *)addr);
>> +		insn_get_opcode(&insn);
>> +
>> +		/* Check if the instruction has been modified. */
>> +		if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) {
>> +			ret = recover_probed_instruction(buf, addr);
> 
> 
> 
> I'm confused about the reason of this recovering. Is it to remove
> kprobes behind the current setting one in the current function?

No, it recovers just an instruction which is probed by a kprobe,
because we need to know the first byte of this instruction for
decoding it.

Perhaps we'd better to have more generic interface (text_peek?)
for it because another subsystem (e.g. kgdb) may want to insert int3...

Thank you,
Frederic Weisbecker Aug. 18, 2009, 11:43 p.m. UTC | #3
On Tue, Aug 18, 2009 at 07:17:39PM -0400, Masami Hiramatsu wrote:
> Frederic Weisbecker wrote:
> >> +	while (addr < paddr) {
> >> +		kernel_insn_init(&insn, (void *)addr);
> >> +		insn_get_opcode(&insn);
> >> +
> >> +		/* Check if the instruction has been modified. */
> >> +		if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) {
> >> +			ret = recover_probed_instruction(buf, addr);
> > 
> > 
> > 
> > I'm confused about the reason of this recovering. Is it to remove
> > kprobes behind the current setting one in the current function?
> 
> No, it recovers just an instruction which is probed by a kprobe,
> because we need to know the first byte of this instruction for
> decoding it.
> 
> Perhaps we'd better to have more generic interface (text_peek?)
> for it because another subsystem (e.g. kgdb) may want to insert int3...
> 
> Thank you,


Aah, I see now, it's to keep a sane check of the instructions
boundaries without int 3 artifacts in the middle.

But in that case, you should re-arm the breakpoint after your
check, right?

Or may be you could do the check without repatching?
May be by doing a copy of insn.opcode.bytes and replacing bytes[0]
with what a random kprobe has stolen?

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Masami Hiramatsu Aug. 19, 2009, 12:19 a.m. UTC | #4
Frederic Weisbecker wrote:
> On Tue, Aug 18, 2009 at 07:17:39PM -0400, Masami Hiramatsu wrote:
>> Frederic Weisbecker wrote:
>>>> +	while (addr < paddr) {
>>>> +		kernel_insn_init(&insn, (void *)addr);
>>>> +		insn_get_opcode(&insn);
>>>> +
>>>> +		/* Check if the instruction has been modified. */
>>>> +		if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) {
>>>> +			ret = recover_probed_instruction(buf, addr);
>>>
>>>
>>>
>>> I'm confused about the reason of this recovering. Is it to remove
>>> kprobes behind the current setting one in the current function?
>>
>> No, it recovers just an instruction which is probed by a kprobe,
>> because we need to know the first byte of this instruction for
>> decoding it.

Ah, sorry, it was not accurate. the function recovers an instruction
on the buffer(buf), not on the real kernel text. :)

>>
>> Perhaps we'd better to have more generic interface (text_peek?)
>> for it because another subsystem (e.g. kgdb) may want to insert int3...
>>
>> Thank you,
> 
> 
> Aah, I see now, it's to keep a sane check of the instructions
> boundaries without int 3 artifacts in the middle.
> 
> But in that case, you should re-arm the breakpoint after your
> check, right?
> 
> Or may be you could do the check without repatching?

Yes, it doesn't modify kernel text, just recover an original
instruction from kernel text and backup byte on a buffer.

> May be by doing a copy of insn.opcode.bytes and replacing bytes[0]
> with what a random kprobe has stolen?

Hm, no, this function is protected from other kprobes by kprobe_mutex.

Thank you,
Frederic Weisbecker Aug. 19, 2009, 12:46 a.m. UTC | #5
On Tue, Aug 18, 2009 at 08:19:33PM -0400, Masami Hiramatsu wrote:
> Frederic Weisbecker wrote:
> > On Tue, Aug 18, 2009 at 07:17:39PM -0400, Masami Hiramatsu wrote:
> >> Frederic Weisbecker wrote:
> >>>> +	while (addr < paddr) {
> >>>> +		kernel_insn_init(&insn, (void *)addr);
> >>>> +		insn_get_opcode(&insn);
> >>>> +
> >>>> +		/* Check if the instruction has been modified. */
> >>>> +		if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) {
> >>>> +			ret = recover_probed_instruction(buf, addr);
> >>>
> >>>
> >>>
> >>> I'm confused about the reason of this recovering. Is it to remove
> >>> kprobes behind the current setting one in the current function?
> >>
> >> No, it recovers just an instruction which is probed by a kprobe,
> >> because we need to know the first byte of this instruction for
> >> decoding it.
> 
> Ah, sorry, it was not accurate. the function recovers an instruction
> on the buffer(buf), not on the real kernel text. :)



Ah ok. I'll just add a small comment about that then, and apply
it.


 
> >>
> >> Perhaps we'd better to have more generic interface (text_peek?)
> >> for it because another subsystem (e.g. kgdb) may want to insert int3...
> >>
> >> Thank you,
> > 
> > 
> > Aah, I see now, it's to keep a sane check of the instructions
> > boundaries without int 3 artifacts in the middle.
> > 
> > But in that case, you should re-arm the breakpoint after your
> > check, right?
> > 
> > Or may be you could do the check without repatching?
> 
> Yes, it doesn't modify kernel text, just recover an original
> instruction from kernel text and backup byte on a buffer.


Ok.


> > May be by doing a copy of insn.opcode.bytes and replacing bytes[0]
> > with what a random kprobe has stolen?
> 
> Hm, no, this function is protected from other kprobes by kprobe_mutex.
> 
> Thank you,



Right, thanks!



> -- 
> Masami Hiramatsu
> 
> Software Engineer
> Hitachi Computer Products (America), Inc.
> Software Solutions Division
> 
> e-mail: mhiramat@redhat.com
> 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index b5b1848..80d493f 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -48,6 +48,7 @@ 
 #include <linux/preempt.h>
 #include <linux/module.h>
 #include <linux/kdebug.h>
+#include <linux/kallsyms.h>
 
 #include <asm/cacheflush.h>
 #include <asm/desc.h>
@@ -55,6 +56,7 @@ 
 #include <asm/uaccess.h>
 #include <asm/alternative.h>
 #include <asm/debugreg.h>
+#include <asm/insn.h>
 
 void jprobe_return_end(void);
 
@@ -245,6 +247,71 @@  retry:
 	}
 }
 
+/* Recover the probed instruction at addr for further analysis. */
+static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
+{
+	struct kprobe *kp;
+	kp = get_kprobe((void *)addr);
+	if (!kp)
+		return -EINVAL;
+
+	/*
+	 *  Basically, kp->ainsn.insn has an original instruction.
+	 *  However, RIP-relative instruction can not do single-stepping
+	 *  at different place, fix_riprel() tweaks the displacement of
+	 *  that instruction. In that case, we can't recover the instruction
+	 *  from the kp->ainsn.insn.
+	 *
+	 *  On the other hand, kp->opcode has a copy of the first byte of
+	 *  the probed instruction, which is overwritten by int3. And
+	 *  the instruction at kp->addr is not modified by kprobes except
+	 *  for the first byte, we can recover the original instruction
+	 *  from it and kp->opcode.
+	 */
+	memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
+	buf[0] = kp->opcode;
+	return 0;
+}
+
+/* Dummy buffers for kallsyms_lookup */
+static char __dummy_buf[KSYM_NAME_LEN];
+
+/* Check if paddr is at an instruction boundary */
+static int __kprobes can_probe(unsigned long paddr)
+{
+	int ret;
+	unsigned long addr, offset = 0;
+	struct insn insn;
+	kprobe_opcode_t buf[MAX_INSN_SIZE];
+
+	if (!kallsyms_lookup(paddr, NULL, &offset, NULL, __dummy_buf))
+		return 0;
+
+	/* Decode instructions */
+	addr = paddr - offset;
+	while (addr < paddr) {
+		kernel_insn_init(&insn, (void *)addr);
+		insn_get_opcode(&insn);
+
+		/* Check if the instruction has been modified. */
+		if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) {
+			ret = recover_probed_instruction(buf, addr);
+			if (ret)
+				/*
+				 * Another debugging subsystem might insert
+				 * this breakpoint. In that case, we can't
+				 * recover it.
+				 */
+				return 0;
+			kernel_insn_init(&insn, buf);
+		}
+		insn_get_length(&insn);
+		addr += insn.length;
+	}
+
+	return (addr == paddr);
+}
+
 /*
  * Returns non-zero if opcode modifies the interrupt flag.
  */
@@ -360,6 +427,8 @@  static void __kprobes arch_copy_kprobe(struct kprobe *p)
 
 int __kprobes arch_prepare_kprobe(struct kprobe *p)
 {
+	if (!can_probe((unsigned long)p->addr))
+		return -EILSEQ;
 	/* insn: must be on special executable page on x86. */
 	p->ainsn.insn = get_insn_slot();
 	if (!p->ainsn.insn)