diff mbox

[01/22] ARM: add mechanism for late code patching

Message ID 1343775898-28345-2-git-send-email-cyril@ti.com (mailing list archive)
State New, archived
Headers show

Commit Message

Cyril Chemparathy July 31, 2012, 11:04 p.m. UTC
The original phys_to_virt/virt_to_phys patching implementation relied on early
patching prior to MMU initialization.  On PAE systems running out of >4G
address space, this would have entailed an additional round of patching after
switching over to the high address space.

The approach implemented here conceptually extends the original PHYS_OFFSET
patching implementation with the introduction of "early" patch stubs.  Early
patch code is required to be functional out of the box, even before the patch
is applied.  This is implemented by inserting functional (but inefficient)
load code into the .patch.code init section.  Having functional code out of
the box then allows us to defer the init time patch application until later
in the init sequence.

In addition to fitting better with our need for physical address-space
switch-over, this implementation should be somewhat more extensible by virtue
of its more readable (and hackable) C implementation.  This should prove
useful for other similar init time specialization needs, especially in light
of our multi-platform kernel initiative.

This code has been boot tested in both ARM and Thumb-2 modes on an ARMv7
(Cortex-A8) device.

Note: the obtuse use of stringified symbols in patch_stub() and
early_patch_stub() is intentional.  Theoretically this should have been
accomplished with formal operands passed into the asm block, but this requires
the use of the 'c' modifier for instantiating the long (e.g. .long %c0).
However, the 'c' modifier has been found to ICE certain versions of GCC, and
therefore we resort to stringified symbols here.

Signed-off-by: Cyril Chemparathy <cyril@ti.com>
---
 arch/arm/include/asm/patch.h  |  123 +++++++++++++++++++++++++++++
 arch/arm/kernel/module.c      |    4 +
 arch/arm/kernel/setup.c       |  175 +++++++++++++++++++++++++++++++++++++++++
 arch/arm/kernel/vmlinux.lds.S |   10 +++
 4 files changed, 312 insertions(+)
 create mode 100644 arch/arm/include/asm/patch.h

Comments

Nicolas Pitre Aug. 4, 2012, 5:38 a.m. UTC | #1
On Tue, 31 Jul 2012, Cyril Chemparathy wrote:

> The original phys_to_virt/virt_to_phys patching implementation relied on early
> patching prior to MMU initialization.  On PAE systems running out of >4G
> address space, this would have entailed an additional round of patching after
> switching over to the high address space.
> 
> The approach implemented here conceptually extends the original PHYS_OFFSET
> patching implementation with the introduction of "early" patch stubs.  Early
> patch code is required to be functional out of the box, even before the patch
> is applied.  This is implemented by inserting functional (but inefficient)
> load code into the .patch.code init section.  Having functional code out of
> the box then allows us to defer the init time patch application until later
> in the init sequence.
> 
> In addition to fitting better with our need for physical address-space
> switch-over, this implementation should be somewhat more extensible by virtue
> of its more readable (and hackable) C implementation.  This should prove
> useful for other similar init time specialization needs, especially in light
> of our multi-platform kernel initiative.
> 
> This code has been boot tested in both ARM and Thumb-2 modes on an ARMv7
> (Cortex-A8) device.
> 
> Note: the obtuse use of stringified symbols in patch_stub() and
> early_patch_stub() is intentional.  Theoretically this should have been
> accomplished with formal operands passed into the asm block, but this requires
> the use of the 'c' modifier for instantiating the long (e.g. .long %c0).
> However, the 'c' modifier has been found to ICE certain versions of GCC, and
> therefore we resort to stringified symbols here.
> 
> Signed-off-by: Cyril Chemparathy <cyril@ti.com>

This looks very nice.  Comments below.

> ---
>  arch/arm/include/asm/patch.h  |  123 +++++++++++++++++++++++++++++

Please find a better name for this file. "patch" is way too generic and 
commonly referring to something different. "runtime-patching" or similar 
would be more descriptive.

>  arch/arm/kernel/module.c      |    4 +
>  arch/arm/kernel/setup.c       |  175 +++++++++++++++++++++++++++++++++++++++++

This is complex enough to waarrant aa separate source file.  Please move 
those additions out from setup.c.  Given a good name for the header file 
above, the c file could share the same name.

> new file mode 100644
> index 0000000..a89749f
> --- /dev/null
> +++ b/arch/arm/include/asm/patch.h
> @@ -0,0 +1,123 @@
> +/*
> + *  arch/arm/include/asm/patch.h
> + *
> + *  Copyright (C) 2012, Texas Instruments
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + *  Note: this file should not be included by non-asm/.h files
> + */
> +#ifndef __ASM_ARM_PATCH_H
> +#define __ASM_ARM_PATCH_H
> +
> +#include <linux/stringify.h>
> +
> +#ifndef __ASSEMBLY__
> +
> extern unsigned __patch_table_begin, __patch_table_end;

You could use "exttern void __patch_table_begin" so those symbols don't 
get any type that could be misused by mistake, while you still can take 
their addresses.

> +
> +struct patch_info {
> +	u32	 type;
> +	u32	 size;

Given the possibly large number of table entries, some effort at making 
those entries as compact as possible should be considered. For instance, 
the type and size fields could be u8's and insn_end pointer replaced 
with another size expressed as an u8.  By placing all the u8's together 
they would occupy a single word by themselves.  The assembly stub would 
only need a .align statement to reflect the c structure's padding.

[...]

Did you verify with some test program that your patching routines do 
produce the same opcodes as the assembled equivalent for all possible 
shift values?  Especially for Thumb2 code which isn't as trivial to get 
right as the ARM one.


Nicolas
Cyril Chemparathy Aug. 5, 2012, 1:56 p.m. UTC | #2
Hi Nicolas,

On 8/4/2012 1:38 AM, Nicolas Pitre wrote:
> On Tue, 31 Jul 2012, Cyril Chemparathy wrote:
>
>> The original phys_to_virt/virt_to_phys patching implementation relied on early
>> patching prior to MMU initialization.  On PAE systems running out of >4G
>> address space, this would have entailed an additional round of patching after
>> switching over to the high address space.
>>
>> The approach implemented here conceptually extends the original PHYS_OFFSET
>> patching implementation with the introduction of "early" patch stubs.  Early
>> patch code is required to be functional out of the box, even before the patch
>> is applied.  This is implemented by inserting functional (but inefficient)
>> load code into the .patch.code init section.  Having functional code out of
>> the box then allows us to defer the init time patch application until later
>> in the init sequence.
>>
>> In addition to fitting better with our need for physical address-space
>> switch-over, this implementation should be somewhat more extensible by virtue
>> of its more readable (and hackable) C implementation.  This should prove
>> useful for other similar init time specialization needs, especially in light
>> of our multi-platform kernel initiative.
>>
>> This code has been boot tested in both ARM and Thumb-2 modes on an ARMv7
>> (Cortex-A8) device.
>>
>> Note: the obtuse use of stringified symbols in patch_stub() and
>> early_patch_stub() is intentional.  Theoretically this should have been
>> accomplished with formal operands passed into the asm block, but this requires
>> the use of the 'c' modifier for instantiating the long (e.g. .long %c0).
>> However, the 'c' modifier has been found to ICE certain versions of GCC, and
>> therefore we resort to stringified symbols here.
>>
>> Signed-off-by: Cyril Chemparathy <cyril@ti.com>
>
> This looks very nice.  Comments below.
>
>> ---
>>   arch/arm/include/asm/patch.h  |  123 +++++++++++++++++++++++++++++
>
> Please find a better name for this file. "patch" is way too generic and
> commonly referring to something different. "runtime-patching" or similar
> would be more descriptive.
>

Sure.  Does init-patch sound about right?  We need to reflect the fact 
that this is intended for init-time patching only.

>>   arch/arm/kernel/module.c      |    4 +
>>   arch/arm/kernel/setup.c       |  175 +++++++++++++++++++++++++++++++++++++++++
>
> This is complex enough to waarrant aa separate source file.  Please move
> those additions out from setup.c.  Given a good name for the header file
> above, the c file could share the same name.
>

Sure.

>> new file mode 100644
>> index 0000000..a89749f
>> --- /dev/null
>> +++ b/arch/arm/include/asm/patch.h
>> @@ -0,0 +1,123 @@
>> +/*
>> + *  arch/arm/include/asm/patch.h
>> + *
>> + *  Copyright (C) 2012, Texas Instruments
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License version 2 as
>> + * published by the Free Software Foundation.
>> + *
>> + *  Note: this file should not be included by non-asm/.h files
>> + */
>> +#ifndef __ASM_ARM_PATCH_H
>> +#define __ASM_ARM_PATCH_H
>> +
>> +#include <linux/stringify.h>
>> +
>> +#ifndef __ASSEMBLY__
>> +
>> extern unsigned __patch_table_begin, __patch_table_end;
>
> You could use "exttern void __patch_table_begin" so those symbols don't
> get any type that could be misused by mistake, while you still can take
> their addresses.
>

Sure.

>> +
>> +struct patch_info {
>> +	u32	 type;
>> +	u32	 size;
>
> Given the possibly large number of table entries, some effort at making
> those entries as compact as possible should be considered. For instance,
> the type and size fields could be u8's and insn_end pointer replaced
> with another size expressed as an u8.  By placing all the u8's together
> they would occupy a single word by themselves.  The assembly stub would
> only need a .align statement to reflect the c structure's padding.
>

Thanks, will try and pack this struct up.

> [...]
>
> Did you verify with some test program that your patching routines do
> produce the same opcodes as the assembled equivalent for all possible
> shift values?  Especially for Thumb2 code which isn't as trivial to get
> right as the ARM one.
>

Not quite all, but I'm sure I can conjure up an off-line test harness to 
do so.


Much appreciated feedback.  Thanks for taking a look.
Russell King - ARM Linux Aug. 6, 2012, 11:12 a.m. UTC | #3
On Tue, Jul 31, 2012 at 07:04:37PM -0400, Cyril Chemparathy wrote:
> +static void __init init_patch_kernel(void)
> +{
> +	const void *start = &__patch_table_begin;
> +	const void *end   = &__patch_table_end;
> +
> +	BUG_ON(patch_kernel(start, end - start));
> +	flush_icache_range(init_mm.start_code, init_mm.end_code);

Err.  You are asking the kernel to flush every single cache line
manually throughout the kernel code.  That's a flush every 32-bytes
over maybe a few megabytes of address space.

This is one of the reasons we do the patching in assembly code before
the caches are enabled - so we don't have to worry about the interaction
with the CPU caches, which for this kind of application would be very
expensive.
Cyril Chemparathy Aug. 6, 2012, 1:19 p.m. UTC | #4
On 8/6/2012 7:12 AM, Russell King - ARM Linux wrote:
> On Tue, Jul 31, 2012 at 07:04:37PM -0400, Cyril Chemparathy wrote:
>> +static void __init init_patch_kernel(void)
>> +{
>> +	const void *start = &__patch_table_begin;
>> +	const void *end   = &__patch_table_end;
>> +
>> +	BUG_ON(patch_kernel(start, end - start));
>> +	flush_icache_range(init_mm.start_code, init_mm.end_code);
>
> Err.  You are asking the kernel to flush every single cache line
> manually throughout the kernel code.  That's a flush every 32-bytes
> over maybe a few megabytes of address space.
>

With a flush_cache_all(), we could avoid having to operate a cacheline 
at a time, but that clobbers way more than necessary.

Maybe the better answer is to flush only the patched cachelines.

> This is one of the reasons we do the patching in assembly code before
> the caches are enabled - so we don't have to worry about the interaction
> with the CPU caches, which for this kind of application would be very
> expensive.
>

Sure, flushing caches is expensive.  But then, so is running the 
patching code with caches disabled.  I guess memory access latencies 
drive the performance trade off here.
Russell King - ARM Linux Aug. 6, 2012, 1:26 p.m. UTC | #5
On Mon, Aug 06, 2012 at 09:19:10AM -0400, Cyril Chemparathy wrote:
> With a flush_cache_all(), we could avoid having to operate a cacheline  
> at a time, but that clobbers way more than necessary.

You can't do that, because flush_cache_all() on some CPUs requires the
proper MMU mappings to be in place, and you can't get those mappings
in place because you don't have the V:P offsets fixed up in the kernel.
Welcome to the chicken and egg problem.

> Sure, flushing caches is expensive.  But then, so is running the  
> patching code with caches disabled.  I guess memory access latencies  
> drive the performance trade off here.

There we disagree on a few orders of magnitude.  There are relatively
few places that need updating.  According to the kernel I have here:

   text    data     bss     dec     hex filename
7644346  454320  212984 8311650  7ed362 vmlinux

Idx Name          Size      VMA       LMA       File off  Algn
  1 .text         004cd170  c00081c0  c00081c0  000081c0  2**5
 16 .init.pv_table 00000300  c0753a24  c0753a24  00753a24  2**0

That's about 7MB of text, and only 192 points in that code which need
patching.  Even if we did this with caches on, that's still 192 places,
and only 192 places we'd need to flush a cache line.

Alternatively, with your approach and 7MB of text, you need to flush
238885 cache lines to cover the entire kernel.

It would be far _cheaper_ with your approach to flush the individual
cache lines as you go.
Cyril Chemparathy Aug. 6, 2012, 1:38 p.m. UTC | #6
On 8/6/2012 9:26 AM, Russell King - ARM Linux wrote:
> On Mon, Aug 06, 2012 at 09:19:10AM -0400, Cyril Chemparathy wrote:
>> With a flush_cache_all(), we could avoid having to operate a cacheline
>> at a time, but that clobbers way more than necessary.
>
> You can't do that, because flush_cache_all() on some CPUs requires the
> proper MMU mappings to be in place, and you can't get those mappings
> in place because you don't have the V:P offsets fixed up in the kernel.
> Welcome to the chicken and egg problem.
>
>> Sure, flushing caches is expensive.  But then, so is running the
>> patching code with caches disabled.  I guess memory access latencies
>> drive the performance trade off here.
>
> There we disagree on a few orders of magnitude.  There are relatively
> few places that need updating.  According to the kernel I have here:
>
>     text    data     bss     dec     hex filename
> 7644346  454320  212984 8311650  7ed362 vmlinux
>
> Idx Name          Size      VMA       LMA       File off  Algn
>    1 .text         004cd170  c00081c0  c00081c0  000081c0  2**5
>   16 .init.pv_table 00000300  c0753a24  c0753a24  00753a24  2**0
>
> That's about 7MB of text, and only 192 points in that code which need
> patching.  Even if we did this with caches on, that's still 192 places,
> and only 192 places we'd need to flush a cache line.
>
> Alternatively, with your approach and 7MB of text, you need to flush
> 238885 cache lines to cover the entire kernel.
>
> It would be far _cheaper_ with your approach to flush the individual
> cache lines as you go.
>

Agreed.  Thanks.
Nicolas Pitre Aug. 6, 2012, 6:02 p.m. UTC | #7
On Mon, 6 Aug 2012, Russell King - ARM Linux wrote:

> On Mon, Aug 06, 2012 at 09:19:10AM -0400, Cyril Chemparathy wrote:
> > With a flush_cache_all(), we could avoid having to operate a cacheline  
> > at a time, but that clobbers way more than necessary.
> 
> You can't do that, because flush_cache_all() on some CPUs requires the
> proper MMU mappings to be in place, and you can't get those mappings
> in place because you don't have the V:P offsets fixed up in the kernel.
> Welcome to the chicken and egg problem.

This problem is fixed in this case by having the p2v and v2p code sites 
using an out-of-line non optimized computation until those sites are 
runtime patched with the inlined optimized computation we have today.


Nicolas
Cyril Chemparathy Aug. 7, 2012, 10:52 p.m. UTC | #8
Hi Nicolas,

On 8/4/2012 1:38 AM, Nicolas Pitre wrote:
[...]
>> extern unsigned __patch_table_begin, __patch_table_end;
>
> You could use "exttern void __patch_table_begin" so those symbols don't
> get any type that could be misused by mistake, while you still can take
> their addresses.
>

Looks like we'll have to stick with a non-void type here.  The compiler 
throws a warning when we try to take the address of a void.

[...]
> Did you verify with some test program that your patching routines do
> produce the same opcodes as the assembled equivalent for all possible
> shift values?  Especially for Thumb2 code which isn't as trivial to get
> right as the ARM one.
>

We've refactored the patching code into separate functions as:

static int do_patch_imm8_arm(u32 insn, u32 imm, u32 *ninsn);
static int do_patch_imm8_thumb(u32 insn, u32 imm, u32 *ninsn);


With this, the following test code has been used to verify the generated 
instruction encoding:

u32 arm_check[] = {
	0xe2810041, 0xe2810082, 0xe2810f41, 0xe2810f82, 0xe2810e41,
	0xe2810e82, 0xe2810d41, 0xe2810d82, 0xe2810c41, 0xe2810c82,
	0xe2810b41, 0xe2810b82, 0xe2810a41, 0xe2810a82, 0xe2810941,
	0xe2810982, 0xe2810841, 0xe2810882, 0xe2810741, 0xe2810782,
	0xe2810641, 0xe2810682, 0xe2810541, 0xe2810582, 0xe2810441,
};

u32 thumb_check[] = {
	0xf1010081, 0xf5017081, 0xf5017001, 0xf5016081, 0xf5016001,
	0xf5015081, 0xf5015001, 0xf5014081, 0xf5014001, 0xf5013081,
	0xf5013001, 0xf5012081, 0xf5012001, 0xf5011081, 0xf5011001,
	0xf5010081, 0xf5010001, 0xf1017081, 0xf1017001, 0xf1016081,
	0xf1016001, 0xf1015081, 0xf1015001, 0xf1014081, 0xf1014001,
};

int do_test(void)
{
	int i, ret;
	u32 ninsn, insn;
	
	insn = arm_check[0];
	for (i = 0; i < ARRAY_SIZE(arm_check); i++) {
		ret = do_patch_imm8_arm(insn, 0x41 << i, &ninsn);
		if (ret < 0)
			pr_err("patch failed at shift %d\n", i);
		if (ninsn != arm_check[i])
			pr_err("mismatch at %d, expect %x, got %x\n",
			       i, arm_check[i], ninsn);
	}

	insn = thumb_check[0];
	for (i = 0; i < ARRAY_SIZE(thumb_check); i++) {
		ret = do_patch_imm8_thumb(insn, 0x81 << i, &ninsn);
		if (ret < 0)
			pr_err("patch failed at shift %d\n", i);
		if (ninsn != thumb_check[i])
			pr_err("mismatch at %d, expect %x, got %x\n",
			       i, thumb_check[i], ninsn);
	}
}

Any ideas on improving these tests?
Nicolas Pitre Aug. 8, 2012, 5:56 a.m. UTC | #9
On Tue, 7 Aug 2012, Cyril Chemparathy wrote:

> Hi Nicolas,
> 
> On 8/4/2012 1:38 AM, Nicolas Pitre wrote:
> [...]
> > > extern unsigned __patch_table_begin, __patch_table_end;
> > 
> > You could use "exttern void __patch_table_begin" so those symbols don't
> > get any type that could be misused by mistake, while you still can take
> > their addresses.
> > 
> 
> Looks like we'll have to stick with a non-void type here.  The compiler throws
> a warning when we try to take the address of a void.

Ah, I see. Bummer.  This used not to be the case with older gcc 
versions.

> [...]
> > Did you verify with some test program that your patching routines do
> > produce the same opcodes as the assembled equivalent for all possible
> > shift values?  Especially for Thumb2 code which isn't as trivial to get
> > right as the ARM one.
> > 
> 
> We've refactored the patching code into separate functions as:
> 
> static int do_patch_imm8_arm(u32 insn, u32 imm, u32 *ninsn);
> static int do_patch_imm8_thumb(u32 insn, u32 imm, u32 *ninsn);
> 
> 
> With this, the following test code has been used to verify the generated
> instruction encoding:
> 
> u32 arm_check[] = {
> 	0xe2810041, 0xe2810082, 0xe2810f41, 0xe2810f82, 0xe2810e41,
> 	0xe2810e82, 0xe2810d41, 0xe2810d82, 0xe2810c41, 0xe2810c82,
> 	0xe2810b41, 0xe2810b82, 0xe2810a41, 0xe2810a82, 0xe2810941,
> 	0xe2810982, 0xe2810841, 0xe2810882, 0xe2810741, 0xe2810782,
> 	0xe2810641, 0xe2810682, 0xe2810541, 0xe2810582, 0xe2810441,
> };

Instead of using this array you could let the assembler do it for you 
like this:

asm (" \n\
	.arm \n\
arm_check: \n\
        .set shft, 0 \n\
        .rep 12 \n\
        add     r1, r2, #0x81 << \shft \n\
        .set shft, \shft + 2 \n\
        .endr \n\
");

> u32 thumb_check[] = {
> 	0xf1010081, 0xf5017081, 0xf5017001, 0xf5016081, 0xf5016001,
> 	0xf5015081, 0xf5015001, 0xf5014081, 0xf5014001, 0xf5013081,
> 	0xf5013001, 0xf5012081, 0xf5012001, 0xf5011081, 0xf5011001,
> 	0xf5010081, 0xf5010001, 0xf1017081, 0xf1017001, 0xf1016081,
> 	0xf1016001, 0xf1015081, 0xf1015001, 0xf1014081, 0xf1014001,

Same idea here.


Nicolas
Cyril Chemparathy Aug. 8, 2012, 1:18 p.m. UTC | #10
On 08/08/12 01:56, Nicolas Pitre wrote:
> On Tue, 7 Aug 2012, Cyril Chemparathy wrote:
[...]
>> u32 arm_check[] = {
>> 	0xe2810041, 0xe2810082, 0xe2810f41, 0xe2810f82, 0xe2810e41,
>> 	0xe2810e82, 0xe2810d41, 0xe2810d82, 0xe2810c41, 0xe2810c82,
>> 	0xe2810b41, 0xe2810b82, 0xe2810a41, 0xe2810a82, 0xe2810941,
>> 	0xe2810982, 0xe2810841, 0xe2810882, 0xe2810741, 0xe2810782,
>> 	0xe2810641, 0xe2810682, 0xe2810541, 0xe2810582, 0xe2810441,
>> };
>
> Instead of using this array you could let the assembler do it for you
> like this:
>
> asm (" \n\
> 	.arm \n\
> arm_check: \n\
>          .set shft, 0 \n\
>          .rep 12 \n\
>          add     r1, r2, #0x81 << \shft \n\
>          .set shft, \shft + 2 \n\
>          .endr \n\
> ");
>

Neat macro magic.  Are you thinking that we build this in as a self test 
in the code?

Thanks
-- Cyril.
Nicolas Pitre Aug. 8, 2012, 1:55 p.m. UTC | #11
On Wed, 8 Aug 2012, Cyril Chemparathy wrote:

> On 08/08/12 01:56, Nicolas Pitre wrote:
> > On Tue, 7 Aug 2012, Cyril Chemparathy wrote:
> [...]
> > > u32 arm_check[] = {
> > > 	0xe2810041, 0xe2810082, 0xe2810f41, 0xe2810f82, 0xe2810e41,
> > > 	0xe2810e82, 0xe2810d41, 0xe2810d82, 0xe2810c41, 0xe2810c82,
> > > 	0xe2810b41, 0xe2810b82, 0xe2810a41, 0xe2810a82, 0xe2810941,
> > > 	0xe2810982, 0xe2810841, 0xe2810882, 0xe2810741, 0xe2810782,
> > > 	0xe2810641, 0xe2810682, 0xe2810541, 0xe2810582, 0xe2810441,
> > > };
> > 
> > Instead of using this array you could let the assembler do it for you
> > like this:
> > 
> > asm (" \n\
> > 	.arm \n\
> > arm_check: \n\
> >          .set shft, 0 \n\
> >          .rep 12 \n\
> >          add     r1, r2, #0x81 << \shft \n\
> >          .set shft, \shft + 2 \n\
> >          .endr \n\
> > ");
> > 
> 
> Neat macro magic.  Are you thinking that we build this in as a self test in
> the code?

For such things, this is never a bad idea to have some test alongside 
with the main code, especially if this is extended to more cases in the 
future.  It is too easy to break it in subtle ways.

See arch/arm/kernel/kprobes-test*.c for a precedent.


Nicolas
Russell King - ARM Linux Aug. 8, 2012, 4:05 p.m. UTC | #12
On Wed, Aug 08, 2012 at 09:55:12AM -0400, Nicolas Pitre wrote:
> On Wed, 8 Aug 2012, Cyril Chemparathy wrote:
> > Neat macro magic.  Are you thinking that we build this in as a self test in
> > the code?
> 
> For such things, this is never a bad idea to have some test alongside 
> with the main code, especially if this is extended to more cases in the 
> future.  It is too easy to break it in subtle ways.
> 
> See arch/arm/kernel/kprobes-test*.c for a precedent.

Done correctly, it shouldn't be a problem, but I wouldn't say that
arch/arm/kernel/kprobes-test*.c is done correctly.  It's seen quite
a number of patching attempts since it was introduced for various
problems, and I've seen quite a number of builds fail for various
reasons in this file (none which I could be bothered to investigate.)

When the test code ends up causing more problems than the code it's
testing, something is definitely wrong.
Nicolas Pitre Aug. 8, 2012, 4:56 p.m. UTC | #13
On Wed, 8 Aug 2012, Russell King - ARM Linux wrote:

> On Wed, Aug 08, 2012 at 09:55:12AM -0400, Nicolas Pitre wrote:
> > On Wed, 8 Aug 2012, Cyril Chemparathy wrote:
> > > Neat macro magic.  Are you thinking that we build this in as a self test in
> > > the code?
> > 
> > For such things, this is never a bad idea to have some test alongside 
> > with the main code, especially if this is extended to more cases in the 
> > future.  It is too easy to break it in subtle ways.
> > 
> > See arch/arm/kernel/kprobes-test*.c for a precedent.
> 
> Done correctly, it shouldn't be a problem, but I wouldn't say that
> arch/arm/kernel/kprobes-test*.c is done correctly.  It's seen quite
> a number of patching attempts since it was introduced for various
> problems, and I've seen quite a number of builds fail for various
> reasons in this file (none which I could be bothered to investigate.)
> 
> When the test code ends up causing more problems than the code it's
> testing, something is definitely wrong.

I think we shouldn't compare the complexity of test code for kprobes and 
test code for runtime patching code.  The former, while more difficult 
to keep compiling, has found loads of issues in the former kprobes code.  
So it certainly paid back many times its cost in maintenance.

My mention of it wasn't about the actual test code implementation, but 
rather about the fact that we do have test code in the tree which can be 
enabled with a config option.

As for build failures with that test code, I'd suggest you simply drop a 
note to Tixy who is normally very responsive.  I randomly enable it 
myself and didn't run into any issues yet.


Nicolas
Tixy Aug. 9, 2012, 6:59 a.m. UTC | #14
On Wed, 2012-08-08 at 12:56 -0400, Nicolas Pitre wrote:
> On Wed, 8 Aug 2012, Russell King - ARM Linux wrote:
> > Done correctly, it shouldn't be a problem, but I wouldn't say that
> > arch/arm/kernel/kprobes-test*.c is done correctly.  It's seen quite
> > a number of patching attempts since it was introduced for various
> > problems, and I've seen quite a number of builds fail for various
> > reasons in this file (none which I could be bothered to investigate.)
<snip>
> >
> As for build failures with that test code, I'd suggest you simply drop a 
> note to Tixy who is normally very responsive.

Indeed. If there are build failures, I'm happy to investigate and fix.
diff mbox

Patch

diff --git a/arch/arm/include/asm/patch.h b/arch/arm/include/asm/patch.h
new file mode 100644
index 0000000..a89749f
--- /dev/null
+++ b/arch/arm/include/asm/patch.h
@@ -0,0 +1,123 @@ 
+/*
+ *  arch/arm/include/asm/patch.h
+ *
+ *  Copyright (C) 2012, Texas Instruments
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  Note: this file should not be included by non-asm/.h files
+ */
+#ifndef __ASM_ARM_PATCH_H
+#define __ASM_ARM_PATCH_H
+
+#include <linux/stringify.h>
+
+#ifndef __ASSEMBLY__
+
+extern unsigned __patch_table_begin, __patch_table_end;
+
+struct patch_info {
+	u32	 type;
+	u32	 size;
+	void	*insn_start;
+	void	*insn_end;
+	u32	 patch_data[0];
+};
+
+#define patch_next(p)		((void *)(p) + (p)->size)
+
+#define PATCH_TYPE_MASK		0xffff
+#define PATCH_IMM8		0x0001
+
+#define PATCH_EARLY		0x10000
+
+#define patch_stub(type, code, patch_data, ...)			\
+	__asm__("@ patch stub\n"				\
+		"1:\n"						\
+		code						\
+		"2:\n"						\
+		"	.pushsection .patch.table, \"a\"\n"	\
+		"3:\n"						\
+		"	.long (" __stringify(type) ")\n"	\
+		"	.long (4f-3b)\n"			\
+		"	.long 1b\n"				\
+		"	.long 2b\n"				\
+		patch_data					\
+		"4:\n"						\
+		"	.popsection\n"				\
+		__VA_ARGS__)
+
+#define early_patch_stub(type, code, patch_data, ...)		\
+	__asm__("@ patch stub\n"				\
+		"1:\n"						\
+		"	b	5f\n"				\
+		"2:\n"						\
+		"	.pushsection .patch.table, \"a\"\n"	\
+		"3:\n"						\
+		"	.long (" __stringify(type | PATCH_EARLY) ")\n" \
+		"	.long (4f-3b)\n"			\
+		"	.long 1b\n"				\
+		"	.long 2b\n"				\
+		patch_data					\
+		"4:\n"						\
+		"	.popsection\n"				\
+		"	.pushsection .patch.code, \"ax\"\n"	\
+		"5:\n"						\
+		code						\
+		"	b 2b\n"					\
+		"	.popsection\n"				\
+		__VA_ARGS__)
+
+/* constant used to force encoding */
+#define __IMM8		(0x81 << 24)
+
+/*
+ * patch_imm8() - init-time specialized binary operation (imm8 operand)
+ *		  This effectively does: to = from "insn" sym,
+ *		  where the value of sym is fixed at init-time, and is patched
+ *		  in as an immediate operand.  This value must be
+ *		  representible as an 8-bit quantity with an optional
+ *		  rotation.
+ *
+ *		  The stub code produced by this variant is non-functional
+ *		  prior to patching.  Use early_patch_imm8() if you need the
+ *		  code to be functional early on in the init sequence.
+ */
+#define patch_imm8(from, to, insn, sym)				\
+	patch_stub(PATCH_IMM8,					\
+		   /* code */					\
+		   insn " %0, %1, %2\n",			\
+		   /* patch_data */				\
+		   ".long " __stringify(sym)		  "\n"	\
+		   insn " %0, %1, %2\n",			\
+		   : "=r" (to)					\
+		   : "r" (from), "I" (__IMM8), "m" (sym)	\
+		   : "cc")
+
+/*
+ * early_patch_imm8() - early functional variant of patch_imm8() above.  The
+ *			same restrictions on the constant apply here.  This
+ *			version emits workable (albeit inefficient) code at
+ *			compile-time, and therefore functions even prior to
+ *			patch application.
+ */
+#define early_patch_imm8(from, to, insn, sym)			\
+	early_patch_stub(PATCH_IMM8,				\
+			 /* code */				\
+			 "ldr	%0, =" __stringify(sym) "\n"	\
+			 "ldr	%0, [%0]\n"			\
+			 insn " %0, %1, %0\n",			\
+			 /* patch_data */			\
+			 ".long " __stringify(sym) "\n"		\
+			 insn " %0, %1, %2\n",			\
+			 : "=&r" (to)				\
+			 : "r" (from), "I" (__IMM8), "m" (sym)	\
+			 : "cc")
+
+int patch_kernel(const void *table, unsigned size);
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __ASM_ARM_PATCH_H */
diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c
index 1e9be5d..df5e897 100644
--- a/arch/arm/kernel/module.c
+++ b/arch/arm/kernel/module.c
@@ -24,6 +24,7 @@ 
 #include <asm/sections.h>
 #include <asm/smp_plat.h>
 #include <asm/unwind.h>
+#include <asm/patch.h>
 
 #ifdef CONFIG_XIP_KERNEL
 /*
@@ -321,6 +322,9 @@  int module_finalize(const Elf32_Ehdr *hdr, const Elf_Shdr *sechdrs,
 	if (s)
 		fixup_pv_table((void *)s->sh_addr, s->sh_size);
 #endif
+	s = find_mod_section(hdr, sechdrs, ".patch.table");
+	if (s)
+		patch_kernel((void *)s->sh_addr, s->sh_size);
 	s = find_mod_section(hdr, sechdrs, ".alt.smp.init");
 	if (s && !is_smp())
 #ifdef CONFIG_SMP_ON_UP
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index e15d83b..15a7699 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -55,6 +55,7 @@ 
 #include <asm/traps.h>
 #include <asm/unwind.h>
 #include <asm/memblock.h>
+#include <asm/opcodes.h>
 
 #if defined(CONFIG_DEPRECATED_PARAM_STRUCT)
 #include "compat.h"
@@ -937,6 +938,178 @@  static int __init meminfo_cmp(const void *_a, const void *_b)
 	return cmp < 0 ? -1 : cmp > 0 ? 1 : 0;
 }
 
+static int apply_patch_imm8_arm(const struct patch_info *p)
+{
+	u32 insn, ninsn, op, *insn_ptr = p->insn_start;
+	u32 imm, rot, val;
+	int size = p->insn_end - p->insn_start;
+
+	if (size != 4) {
+		pr_err("patch: bad template size %d\n", size);
+		return -EINVAL;
+	}
+
+	insn = __mem_to_opcode_arm(p->patch_data[1]);
+
+	/* disallow special unconditional instructions
+	 * 1111 xxxx xxxx xxxx xxxx xxxx xxxx xxxx */
+	if ((insn >> 24) == 0xf) {
+		pr_err("patch: unconditional insn %08x\n", insn);
+		return -EINVAL;
+	}
+
+	/* allow only data processing (immediate)
+	 * xxxx 001x xxxx xxxx xxxx xxxx xxxx xxxx */
+	if (((insn >> 25) & 0x3) != 1) {
+		pr_err("patch: unknown insn %08x\n", insn);
+		return -EINVAL;
+	}
+
+	/* extract op code */
+	op = (insn >> 20) & 0x1f;
+
+	/* disallow unsupported 10xxx op codes */
+	if (((op >> 3) & 0x3) == 2) {
+		pr_err("patch: unsupported opcode %08x\n", insn);
+		return -EINVAL;
+	}
+
+	/* disallow Rn == PC and Rd == PC */
+	if (((insn >> 16) & 0xf) == 0xf || ((insn >> 12) & 0xf) == 0xf) {
+		pr_err("patch: unsupported register %08x\n", insn);
+		return -EINVAL;
+	}
+
+	imm = *(u32 *)p->patch_data[0];
+
+	rot = imm ? __ffs(imm) / 2 : 0;
+	val = imm >> (rot * 2);
+	rot = (-rot) & 0xf;
+
+	/* does this fit in 8-bit? */
+	if (val > 0xff) {
+		pr_err("patch: constant overflow %08x\n", imm);
+		return -EINVAL;
+	}
+
+	/* patch in new immediate and rotation */
+	ninsn = (insn & ~0xfff) | (rot << 8) | val;
+
+	*insn_ptr = __opcode_to_mem_arm(ninsn);
+
+	return 0;
+}
+
+static int apply_patch_imm8_thumb(const struct patch_info *p)
+{
+	u32 insn, ninsn, op, *insn_ptr = p->insn_start;
+	u32 imm, rot, val;
+	int size = p->insn_end - p->insn_start;
+	const u32 supported_ops = (BIT(0)  | /* and */
+				   BIT(1)  | /* bic */
+				   BIT(2)  | /* orr/mov */
+				   BIT(3)  | /* orn/mvn */
+				   BIT(4)  | /* eor */
+				   BIT(8)  | /* add */
+				   BIT(10) | /* adc */
+				   BIT(11) | /* sbc */
+				   BIT(12) | /* sub */
+				   BIT(13)); /* rsb */
+
+	if (size != 4) {
+		pr_err("patch: bad template size %d\n", size);
+		return -EINVAL;
+	}
+
+	insn = __mem_to_opcode_thumb32(p->patch_data[1]);
+	if (!__opcode_is_thumb32(insn)) {
+		pr_err("patch: invalid thumb2 insn %08x\n", insn);
+		return -EINVAL;
+	}
+
+	/* allow only data processing (immediate)
+	 * 1111 0x0x xxx0 xxxx 0xxx xxxx xxxx xxxx */
+	if ((insn & 0xfa008000) != 0xf0000000) {
+		pr_err("patch: unknown insn %08x\n", insn);
+		return -EINVAL;
+	}
+
+	/* disallow Rn == PC and Rd == PC */
+	if (((insn >> 8) & 0xf) == 0xf || ((insn >> 16) & 0xf) == 0xf) {
+		pr_err("patch: unsupported register %08x\n", insn);
+		return -EINVAL;
+	}
+
+	/* extract op code */
+	op = (insn >> 21) & 0xf;
+
+	/* disallow unsupported opcodes */
+	if ((supported_ops & BIT(op)) == 0) {
+		pr_err("patch: unsupported opcode %x\n", op);
+		return -EINVAL;
+	}
+
+	imm = *(u32 *)p->patch_data[0];
+
+	if (imm <= 0xff) {
+		rot = 0;
+		val = imm;
+	} else {
+		rot = 32 - fls(imm); /* clz */
+		if (imm & ~(0xff000000 >> rot)) {
+			pr_err("patch: constant overflow %08x\n", imm);
+			return -EINVAL;
+		}
+		val  = (imm >> (24 - rot)) & 0x7f;
+		rot += 8; /* encoded i:imm3:a */
+
+		/* pack least-sig rot bit into most-sig val bit */
+		val |= (rot & 1) << 7;
+		rot >>= 1;
+	}
+
+	ninsn  = insn & ~(BIT(26) | 0x7 << 12 | 0xff);
+	ninsn |= (rot >> 3) << 26;	/* field "i" */
+	ninsn |= (rot & 0x7) << 12;	/* field "imm3" */
+	ninsn |= val;
+
+	*insn_ptr = __opcode_to_mem_thumb32(ninsn);
+
+	return 0;
+}
+
+int patch_kernel(const void *table, unsigned size)
+{
+	const struct patch_info *p = table, *end = (table + size);
+	bool thumb2 = IS_ENABLED(CONFIG_THUMB2_KERNEL);
+
+	for (p = table; p < end; p = patch_next(p)) {
+		int type = p->type & PATCH_TYPE_MASK;
+		int ret;
+
+		if (type == PATCH_IMM8) {
+			ret = (thumb2 ? apply_patch_imm8_thumb(p) :
+					apply_patch_imm8_arm(p));
+		} else {
+			pr_err("invalid patch type %d\n", type);
+			ret = -EINVAL;
+		}
+
+		if (ret < 0)
+			return ret;
+	}
+	return 0;
+}
+
+static void __init init_patch_kernel(void)
+{
+	const void *start = &__patch_table_begin;
+	const void *end   = &__patch_table_end;
+
+	BUG_ON(patch_kernel(start, end - start));
+	flush_icache_range(init_mm.start_code, init_mm.end_code);
+}
+
 void __init setup_arch(char **cmdline_p)
 {
 	struct machine_desc *mdesc;
@@ -998,6 +1171,8 @@  void __init setup_arch(char **cmdline_p)
 
 	if (mdesc->init_early)
 		mdesc->init_early();
+
+	init_patch_kernel();
 }
 
 
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index 36ff15b..bacb275 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -167,6 +167,16 @@  SECTIONS
 		*(.pv_table)
 		__pv_table_end = .;
 	}
+	.init.patch_table : {
+		__patch_table_begin = .;
+		*(.patch.table)
+		__patch_table_end = .;
+	}
+	.init.patch_code : {
+		__patch_code_begin = .;
+		*(.patch.code)
+		__patch_code_end = .;
+	}
 	.init.data : {
 #ifndef CONFIG_XIP_KERNEL
 		INIT_DATA