diff mbox

[v6,1/4] x86: Clean up extable entry format (and free up a bit)

Message ID 968b4c079271431292fddfa49ceacff576be6849.1451869360.git.tony.luck@intel.com (mailing list archive)
State Superseded
Headers show

Commit Message

Andy Lutomirski Dec. 30, 2015, 5:59 p.m. UTC
This adds two bits of fixup class information to a fixup entry,
generalizing the uaccess_err hack currently in place.

Forward-ported-from-3.9-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
---
 arch/x86/include/asm/asm.h | 70 ++++++++++++++++++++++++++++++----------------
 arch/x86/mm/extable.c      | 21 ++++++++------
 2 files changed, 59 insertions(+), 32 deletions(-)

Comments

Tony Luck Jan. 4, 2016, 1:37 a.m. UTC | #1
On Wed, Dec 30, 2015 at 9:59 AM, Andy Lutomirski <luto@amacapital.net> wrote:
> This adds two bits of fixup class information to a fixup entry,
> generalizing the uaccess_err hack currently in place.
>
> Forward-ported-from-3.9-by: Tony Luck <tony.luck@intel.com>
> Signed-off-by: Andy Lutomirski <luto@amacapital.net>

Crivens!  I messed up when "git cherrypick"ing this and "git
format-patch"ing it.

I didn't mean to forge Andy's From line when sending this out (just to have a
From: line to give him credit.for the patch).

Big OOPs ... this is "From:" me ... not Andy!

-Tony
Ingo Molnar Jan. 4, 2016, 7:49 a.m. UTC | #2
* Tony Luck <tony.luck@gmail.com> wrote:

> On Wed, Dec 30, 2015 at 9:59 AM, Andy Lutomirski <luto@amacapital.net> wrote:
> > This adds two bits of fixup class information to a fixup entry,
> > generalizing the uaccess_err hack currently in place.
> >
> > Forward-ported-from-3.9-by: Tony Luck <tony.luck@intel.com>
> > Signed-off-by: Andy Lutomirski <luto@amacapital.net>
> 
> Crivens!  I messed up when "git cherrypick"ing this and "git
> format-patch"ing it.
> 
> I didn't mean to forge Andy's From line when sending this out (just to have a
> From: line to give him credit.for the patch).
> 
> Big OOPs ... this is "From:" me ... not Andy!

But in any case it's missing your SOB line.

If Andy is still the primary author (much of his original patch survived, you 
resolved conflicts or minor changes) then you can send this as:

 From: Tony Luck <tony.luck@intel.com>
 Subject: [PATCH v6 1/4] x86: Clean up extable entry format (and free up a bit)

 From: Andy Lutomirski <luto@amacapital.net>

 ... changelog ...

 Signed-off-by: Andy Lutomirski <luto@amacapital.net>
 [ Forward ported from a v3.9 version. ]
 Signed-off-by: Tony Luck <tony.luck@intel.com

This carries all the information, has a proper SOB chain, and preserves 
authorship. Also, it's clear from the tags that you made material changes, so any 
resulting breakage is yours (and mine), not Andy's! ;-)

If the changes to the patch are major, so that your new work is larger than Andy's 
original work, you can still credit him via a non-standard tag, like:

 From: Tony Luck <tony.luck@intel.com>
 Subject: [PATCH v6 1/4] x86: Clean up extable entry format (and free up a bit)

 This patch is based on Andy Lutomirski's patch sent against v3.9:

 ... changelog ...

 Originally-from: Andy Lutomirski <luto@amacapital.net>
 Signed-off-by: Tony Luck <tony.luck@intel.com

Thanks,

	Ingo
Borislav Petkov Jan. 4, 2016, 12:07 p.m. UTC | #3
On Wed, Dec 30, 2015 at 09:59:29AM -0800, Andy Lutomirski wrote:
> This adds two bits of fixup class information to a fixup entry,
> generalizing the uaccess_err hack currently in place.
> 
> Forward-ported-from-3.9-by: Tony Luck <tony.luck@intel.com>
> Signed-off-by: Andy Lutomirski <luto@amacapital.net>
> ---
>  arch/x86/include/asm/asm.h | 70 ++++++++++++++++++++++++++++++----------------
>  arch/x86/mm/extable.c      | 21 ++++++++------
>  2 files changed, 59 insertions(+), 32 deletions(-)
> 
> diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
> index 189679aba703..b64121ffb2da 100644
> --- a/arch/x86/include/asm/asm.h
> +++ b/arch/x86/include/asm/asm.h
> @@ -43,19 +43,47 @@
>  #define _ASM_DI		__ASM_REG(di)
>  
>  /* Exception table entry */
> -#ifdef __ASSEMBLY__
> -# define _ASM_EXTABLE(from,to)					\
> -	.pushsection "__ex_table","a" ;				\
> -	.balign 8 ;						\
> -	.long (from) - . ;					\
> -	.long (to) - . ;					\
> -	.popsection
>  
> -# define _ASM_EXTABLE_EX(from,to)				\
> -	.pushsection "__ex_table","a" ;				\
> -	.balign 8 ;						\
> -	.long (from) - . ;					\
> -	.long (to) - . + 0x7ffffff0 ;				\
> +/*
> + * An exception table entry is 64 bits.  The first 32 bits are the offset

Two 32-bit ints, to be exact.

Also, there's text in arch/x86/include/asm/uaccess.h where the exception
table entry is defined so you probably should sync with it so that the
nomenclature is the same.

> + * from that entry to the potentially faulting instruction.  sortextable

								sortextable.c ?

> + * relies on that exact encoding.  The second 32 bits encode the fault
> + * handler address.
> + *
> + * We want to stick two extra bits of handler class into the fault handler
> + * address.  All of these are generated by relocations, so we can only
> + * rely on addition.  We therefore emit:
> + *
> + * (target - here) + (class) + 0x20000000

I still don't understand that bit 29 thing.

Because the offset is negative?

The exception table currently looks like this here:

insn offset: 0xff91a7c4, fixup offset: 0xffffd57a
insn offset: 0xff91bac3, fixup offset: 0xffffd57e
insn offset: 0xff91bac0, fixup offset: 0xffffd57d
insn offset: 0xff91baba, fixup offset: 0xffffd57c
insn offset: 0xff91bfca, fixup offset: 0xffffd57c
insn offset: 0xff91bfff, fixup offset: 0xffffd57e
insn offset: 0xff91c049, fixup offset: 0xffffd580
insn offset: 0xff91c141, fixup offset: 0xffffd57f
insn offset: 0xff91c24e, fixup offset: 0xffffd581
insn offset: 0xff91c262, fixup offset: 0xffffd580
insn offset: 0xff91c261, fixup offset: 0xffffd57f
...

It probably will dawn on me when I look at the rest of the patch...

> + * This has the property that the two high bits are the class and the
> + * rest is easy to decode.
> + */
> +
> +/* There are two bits of extable entry class, added to a signed offset. */
> +#define _EXTABLE_CLASS_DEFAULT	0		/* standard uaccess fixup */
> +#define _EXTABLE_CLASS_EX	0x80000000	/* uaccess + set uaccess_err */

				BIT(31) is more readable.

> +
> +/*
> + * The biases are the class constants + 0x20000000, as signed integers.
> + * This can't use ordinary arithmetic -- the assembler isn't that smart.
> + */
> +#define _EXTABLE_BIAS_DEFAULT	0x20000000
> +#define _EXTABLE_BIAS_EX	0x20000000 - 0x80000000

Ditto.

> +
> +#define _ASM_EXTABLE(from,to)						\
> +	_ASM_EXTABLE_CLASS(from, to, _EXTABLE_BIAS_DEFAULT)
> +
> +#define _ASM_EXTABLE_EX(from,to)					\
> +	_ASM_EXTABLE_CLASS(from, to, _EXTABLE_BIAS_EX)
> +
> +#ifdef __ASSEMBLY__
> +# define _EXPAND_EXTABLE_BIAS(x) x
> +# define _ASM_EXTABLE_CLASS(from,to,bias)				\
> +	.pushsection "__ex_table","a" ;					\
> +	.balign 8 ;							\
> +	.long (from) - . ;						\
> +	.long (to) - . + _EXPAND_EXTABLE_BIAS(bias) ;			\

Why not simply:

	.long (to) - . + (bias) ;

and

	" .long (" #to ") - . + "(" #bias ") "\n"

below and get rid of that _EXPAND_EXTABLE_BIAS()?

>  	.popsection
>  
>  # define _ASM_NOKPROBE(entry)					\
> @@ -89,18 +117,12 @@
>  	.endm
>  
>  #else
> -# define _ASM_EXTABLE(from,to)					\
> -	" .pushsection \"__ex_table\",\"a\"\n"			\
> -	" .balign 8\n"						\
> -	" .long (" #from ") - .\n"				\
> -	" .long (" #to ") - .\n"				\
> -	" .popsection\n"
> -
> -# define _ASM_EXTABLE_EX(from,to)				\
> -	" .pushsection \"__ex_table\",\"a\"\n"			\
> -	" .balign 8\n"						\
> -	" .long (" #from ") - .\n"				\
> -	" .long (" #to ") - . + 0x7ffffff0\n"			\
> +# define _EXPAND_EXTABLE_BIAS(x) #x
> +# define _ASM_EXTABLE_CLASS(from,to,bias)				\
> +	" .pushsection \"__ex_table\",\"a\"\n"				\
> +	" .balign 8\n"							\
> +	" .long (" #from ") - .\n"					\
> +	" .long (" #to ") - . + " _EXPAND_EXTABLE_BIAS(bias) "\n"	\
>  	" .popsection\n"
>  /* For C file, we already have NOKPROBE_SYMBOL macro */
>  #endif
> diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
> index 903ec1e9c326..95e2ede71206 100644
> --- a/arch/x86/mm/extable.c
> +++ b/arch/x86/mm/extable.c
> @@ -8,16 +8,24 @@ ex_insn_addr(const struct exception_table_entry *x)
>  {
>  	return (unsigned long)&x->insn + x->insn;
>  }
> +static inline unsigned int
> +ex_class(const struct exception_table_entry *x)
> +{
> +	return (unsigned int)x->fixup & 0xC0000000;
> +}
> +
>  static inline unsigned long
>  ex_fixup_addr(const struct exception_table_entry *x)
>  {
> -	return (unsigned long)&x->fixup + x->fixup;
> +	long offset = (long)((u32)x->fixup & 0x3fffffff) - (long)0x20000000;

So basically:

	x->fixup & 0x1fffffff

Why the explicit subtraction of bit 29?

IOW, I was expecting something simpler for the whole scheme like:

ex_class:

	return x->fixup & 0xC0000000;

ex_fixup_addr:

	return x->fixup | 0xC0000000;

Why can't it be done this way?
Tony Luck Jan. 4, 2016, 5:26 p.m. UTC | #4
On Mon, Jan 4, 2016 at 4:07 AM, Borislav Petkov <bp@alien8.de> wrote:
>> + * (target - here) + (class) + 0x20000000
>
> I still don't understand that bit 29 thing.
>
> Because the offset is negative?

I think so.  The .fixup section is placed in the end of .text, and the ex_table
itself is pretty much right after.  So all the "fixup" offsets will be
small negative
numbers (the "insn" ones are also negative, but will be bigger since they
potentially need to reach all the way to the start of .text).

Adding 0x20000000 makes everything positive (so our legacy exception
table entries have bit31==bit30==0) and perhaps makes it fractionally clearer
how we manipulate the top bits for the other classes ... but only
slightly. I got
very confused by it too).

It is all made more complex because these values need to be something
that "ld" can relocate when vmlinux is put together from all the ".o" files.
So we can't just use "x | BIT(30)" etc.


>> +#define _EXTABLE_CLASS_EX    0x80000000      /* uaccess + set uaccess_err */
>
>                                 BIT(31) is more readable.

Not to the assembler :-(

> Why not simply:
>
>         .long (to) - . + (bias) ;
>
> and
>
>         " .long (" #to ") - . + "(" #bias ") "\n"
>
> below and get rid of that _EXPAND_EXTABLE_BIAS()?

Andy - this part is your code and I'm not sure what the trick is here.

>>  ex_fixup_addr(const struct exception_table_entry *x)
>>  {
>> -     return (unsigned long)&x->fixup + x->fixup;
>> +     long offset = (long)((u32)x->fixup & 0x3fffffff) - (long)0x20000000;
>
> So basically:
>
>         x->fixup & 0x1fffffff
>
> Why the explicit subtraction of bit 29?

We added it to begin with ... need to subtract to get back to the
original offset.

> IOW, I was expecting something simpler for the whole scheme like:
>
> ex_class:
>
>         return x->fixup & 0xC0000000;

ex_class (after part2) is just "(u32)x->fixup >> 30" (because I wanted
a result in [0..3])

> ex_fixup_addr:
>
>         return x->fixup | 0xC0000000;
>
> Why can't it be done this way?

Because relocations ... the linker can only add/subtract values when
making vmlinux ... it can't OR bits in.

-Tony
Andy Lutomirski Jan. 4, 2016, 6:08 p.m. UTC | #5
On Mon, Jan 4, 2016 at 9:26 AM, Tony Luck <tony.luck@gmail.com> wrote:
> On Mon, Jan 4, 2016 at 4:07 AM, Borislav Petkov <bp@alien8.de> wrote:
>>> + * (target - here) + (class) + 0x20000000
>>
>> I still don't understand that bit 29 thing.
>>
>> Because the offset is negative?
>
> I think so.  The .fixup section is placed in the end of .text, and the ex_table
> itself is pretty much right after.  So all the "fixup" offsets will be
> small negative
> numbers (the "insn" ones are also negative, but will be bigger since they
> potentially need to reach all the way to the start of .text).
>
> Adding 0x20000000 makes everything positive (so our legacy exception
> table entries have bit31==bit30==0) and perhaps makes it fractionally clearer
> how we manipulate the top bits for the other classes ... but only
> slightly. I got
> very confused by it too).
>
> It is all made more complex because these values need to be something
> that "ld" can relocate when vmlinux is put together from all the ".o" files.
> So we can't just use "x | BIT(30)" etc.

All of that's correct, including the part where it's confusing.  The
comments aren't the best.

How about adding a comment like:

----- begin comment -----

The offset to the fixup is signed, and we're trying to use the high
bits for a different purpose.  In C, we could just do:

u32 class_and_offset = ((target - here) & 0x3fffffff) | class;

Then, to decode it, we'd mask off the class and sign-extend to recover
the offset.

In asm, we can't do that, because this all gets laundered through the
linker, and there's no relocation type that supports this chicanery.
Instead we cheat a bit.  We first add a large number to the offset
(0x20000000).  The result is still nominally signed, but now it's
always positive, and the two high bits are always clear.  We can then
set high bits by ordinary addition or subtraction instead of using
bitwise operations.  As far as the linker is concerned, all we're
doing is adding a large constant to the difference between here (".")
and the target, and that's a valid relocation type.

In the C code, we just mask off the class bits and subtract 0x20000000
to get the offset.

----- end comment -----

>
>
>>> +#define _EXTABLE_CLASS_EX    0x80000000      /* uaccess + set uaccess_err */
>>
>>                                 BIT(31) is more readable.
>
> Not to the assembler :-(
>
>> Why not simply:
>>
>>         .long (to) - . + (bias) ;
>>
>> and
>>
>>         " .long (" #to ") - . + "(" #bias ") "\n"
>>
>> below and get rid of that _EXPAND_EXTABLE_BIAS()?
>
> Andy - this part is your code and I'm not sure what the trick is here.

I don't remember.  I think it was just some preprocessor crud to force
all the macros to expand fully before the assembler sees it.  If it
builds without it, feel free to delete it.

>
>>>  ex_fixup_addr(const struct exception_table_entry *x)
>>>  {
>>> -     return (unsigned long)&x->fixup + x->fixup;
>>> +     long offset = (long)((u32)x->fixup & 0x3fffffff) - (long)0x20000000;
>>
>> So basically:
>>
>>         x->fixup & 0x1fffffff
>>
>> Why the explicit subtraction of bit 29?
>
> We added it to begin with ... need to subtract to get back to the
> original offset.

Hopefully it's clearer with the comment above.

>
>> IOW, I was expecting something simpler for the whole scheme like:
>>
>> ex_class:
>>
>>         return x->fixup & 0xC0000000;
>
> ex_class (after part2) is just "(u32)x->fixup >> 30" (because I wanted
> a result in [0..3])
>
>> ex_fixup_addr:
>>
>>         return x->fixup | 0xC0000000;
>>
>> Why can't it be done this way?
>
> Because relocations ... the linker can only add/subtract values when
> making vmlinux ... it can't OR bits in.

--Andy
Tony Luck Jan. 4, 2016, 6:59 p.m. UTC | #6
> ----- begin comment -----
>
> The offset to the fixup is signed, and we're trying to use the high
> bits for a different purpose.  In C, we could just do:
>
> u32 class_and_offset = ((target - here) & 0x3fffffff) | class;
>
> Then, to decode it, we'd mask off the class and sign-extend to recover
> the offset.
>
> In asm, we can't do that, because this all gets laundered through the
> linker, and there's no relocation type that supports this chicanery.
> Instead we cheat a bit.  We first add a large number to the offset
> (0x20000000).  The result is still nominally signed, but now it's
> always positive, and the two high bits are always clear.  We can then
> set high bits by ordinary addition or subtraction instead of using
> bitwise operations.  As far as the linker is concerned, all we're
> doing is adding a large constant to the difference between here (".")
> and the target, and that's a valid relocation type.
>
> In the C code, we just mask off the class bits and subtract 0x20000000
> to get the offset.
>
> ----- end comment -----

But presumably those constants get folded together, so the linker
is dealing with only one offset.  It doesn't (I assume) know that our
source code added 0x20000000 and then added/subtracted some
more.

It looks like we could just use:
class0: +0x40000000
class1: +0x80000000 (or subtract ... whatever doesn't make the linker cranky)
class2: -0x40000000
class3: don't add/subtract anything

ex_class() stays the same (just looks at bit31/bit30)
ex_fixup_addr() has to use ex_class() to decide what to add/subtract
(if anything).

Would that work?  Would it be more or less confusing?

-Tony
Andy Lutomirski Jan. 4, 2016, 7:05 p.m. UTC | #7
On Mon, Jan 4, 2016 at 10:59 AM, Tony Luck <tony.luck@gmail.com> wrote:
>> ----- begin comment -----
>>
>> The offset to the fixup is signed, and we're trying to use the high
>> bits for a different purpose.  In C, we could just do:
>>
>> u32 class_and_offset = ((target - here) & 0x3fffffff) | class;
>>
>> Then, to decode it, we'd mask off the class and sign-extend to recover
>> the offset.
>>
>> In asm, we can't do that, because this all gets laundered through the
>> linker, and there's no relocation type that supports this chicanery.
>> Instead we cheat a bit.  We first add a large number to the offset
>> (0x20000000).  The result is still nominally signed, but now it's
>> always positive, and the two high bits are always clear.  We can then
>> set high bits by ordinary addition or subtraction instead of using
>> bitwise operations.  As far as the linker is concerned, all we're
>> doing is adding a large constant to the difference between here (".")
>> and the target, and that's a valid relocation type.
>>
>> In the C code, we just mask off the class bits and subtract 0x20000000
>> to get the offset.
>>
>> ----- end comment -----
>
> But presumably those constants get folded together, so the linker
> is dealing with only one offset.  It doesn't (I assume) know that our
> source code added 0x20000000 and then added/subtracted some
> more.

Yes, indeed.

>
> It looks like we could just use:
> class0: +0x40000000
> class1: +0x80000000 (or subtract ... whatever doesn't make the linker cranky)
> class2: -0x40000000
> class3: don't add/subtract anything
>
> ex_class() stays the same (just looks at bit31/bit30)
> ex_fixup_addr() has to use ex_class() to decide what to add/subtract
> (if anything).
>
> Would that work?  Would it be more or less confusing?

That probably works, but to me, at least, it's a bit more confusing.
It also means that you need a table or some branches to compute the
offset, whereas the "mask top two bits and add a constant" approach is
straightforward, short, and fast.

Also, I'm not 100% convinced that the 0x80000000 case can ever work
reliably.  I don't know exactly what the condition that triggers the
warning is, but the logical one would be to warn if the actual offset
plus or minus the addend, as appropriate, overflows in a signed sense.
Whether it overflows depends on the sign of the offset, and *that*
depends on the actual layout of all the sections.

Mine avoids this issue by being shifted by 0x20000000, so nothing ends
up right on the edge.

--Andy
Borislav Petkov Jan. 4, 2016, 9:02 p.m. UTC | #8
On Mon, Jan 04, 2016 at 10:08:43AM -0800, Andy Lutomirski wrote:
> All of that's correct, including the part where it's confusing.  The
> comments aren't the best.
> 
> How about adding a comment like:
> 
> ----- begin comment -----
> 
> The offset to the fixup is signed, and we're trying to use the high
> bits for a different purpose.  In C, we could just do:
> 
> u32 class_and_offset = ((target - here) & 0x3fffffff) | class;
> 
> Then, to decode it, we'd mask off the class and sign-extend to recover
> the offset.
> 
> In asm, we can't do that, because this all gets laundered through the
> linker, and there's no relocation type that supports this chicanery.
> Instead we cheat a bit.  We first add a large number to the offset
> (0x20000000).  The result is still nominally signed, but now it's
> always positive, and the two high bits are always clear.  We can then
> set high bits by ordinary addition or subtraction instead of using
> bitwise operations.  As far as the linker is concerned, all we're
> doing is adding a large constant to the difference between here (".")
> and the target, and that's a valid relocation type.
> 
> In the C code, we just mask off the class bits and subtract 0x20000000
> to get the offset.
> 
> ----- end comment -----

Yeah, that makes more sense, thanks.

That nasty "." current position thing stays in the way to do it cleanly. :-)

Anyway, ok, I see it now. It still feels a bit hacky to me. I probably
would've added the third int to the exception table instead. It would've
been much more straightforward and clean this way and I'd gladly pay the
additional 6K growth.
Andy Lutomirski Jan. 4, 2016, 10:29 p.m. UTC | #9
On Mon, Jan 4, 2016 at 1:02 PM, Borislav Petkov <bp@alien8.de> wrote:
> On Mon, Jan 04, 2016 at 10:08:43AM -0800, Andy Lutomirski wrote:
>> All of that's correct, including the part where it's confusing.  The
>> comments aren't the best.
>>
>> How about adding a comment like:
>>
>> ----- begin comment -----
>>
>> The offset to the fixup is signed, and we're trying to use the high
>> bits for a different purpose.  In C, we could just do:
>>
>> u32 class_and_offset = ((target - here) & 0x3fffffff) | class;
>>
>> Then, to decode it, we'd mask off the class and sign-extend to recover
>> the offset.
>>
>> In asm, we can't do that, because this all gets laundered through the
>> linker, and there's no relocation type that supports this chicanery.
>> Instead we cheat a bit.  We first add a large number to the offset
>> (0x20000000).  The result is still nominally signed, but now it's
>> always positive, and the two high bits are always clear.  We can then
>> set high bits by ordinary addition or subtraction instead of using
>> bitwise operations.  As far as the linker is concerned, all we're
>> doing is adding a large constant to the difference between here (".")
>> and the target, and that's a valid relocation type.
>>
>> In the C code, we just mask off the class bits and subtract 0x20000000
>> to get the offset.
>>
>> ----- end comment -----
>
> Yeah, that makes more sense, thanks.
>
> That nasty "." current position thing stays in the way to do it cleanly. :-)
>
> Anyway, ok, I see it now. It still feels a bit hacky to me. I probably
> would've added the third int to the exception table instead. It would've
> been much more straightforward and clean this way and I'd gladly pay the
> additional 6K growth.

Josh will argue with you if he sees that :)

We could maybe come up with a way to compress the table and get that
space and more back, but maybe that should be a follow-up that someone
else can do if they're inspired.

--Andy
Borislav Petkov Jan. 4, 2016, 11:02 p.m. UTC | #10
On Mon, Jan 04, 2016 at 02:29:09PM -0800, Andy Lutomirski wrote:
> Josh will argue with you if he sees that :)

Except Josh doesn't need allyesconfigs. tinyconfig's __ex_table is 2K.

> We could maybe come up with a way to compress the table and get that
> space and more back, but maybe that should be a follow-up that someone
> else can do if they're inspired.

Yeah, one needs to look after ones' TODO list. :-)
Borislav Petkov Jan. 4, 2016, 11:04 p.m. UTC | #11
On Tue, Jan 05, 2016 at 12:02:46AM +0100, Borislav Petkov wrote:
> Except Josh doesn't need allyesconfigs. tinyconfig's __ex_table is 2K.

Besides I just saved him 1.5K:

https://lkml.kernel.org/r/1449481182-27541-1-git-send-email-bp@alien8.de

:-)
Tony Luck Jan. 4, 2016, 11:11 p.m. UTC | #12
On Mon, Jan 4, 2016 at 10:08 AM, Andy Lutomirski <luto@amacapital.net> wrote:
> On Mon, Jan 4, 2016 at 9:26 AM, Tony Luck <tony.luck@gmail.com> wrote:
>> On Mon, Jan 4, 2016 at 4:07 AM, Borislav Petkov <bp@alien8.de> wrote:

>>> Why not simply:
>>>
>>>         .long (to) - . + (bias) ;
>>>
>>> and
>>>
>>>         " .long (" #to ") - . + "(" #bias ") "\n"
>>>
>>> below and get rid of that _EXPAND_EXTABLE_BIAS()?
>>
>> Andy - this part is your code and I'm not sure what the trick is here.
>
> I don't remember.  I think it was just some preprocessor crud to force
> all the macros to expand fully before the assembler sees it.  If it
> builds without it, feel free to delete it.

The trick is definitely needed in the case of

# define _EXPAND_EXTABLE_BIAS(x) #x

Trying to expand it inline and get rid of the macro led to
horrible failure. The __ASSEMBLY__ case where the
macro does nothing isn't required ... but does provide
a certain amount of symmetry when looking at the two
versions of _ASM_EXTABLE_CLASS

-Tony
Andy Lutomirski Jan. 4, 2016, 11:25 p.m. UTC | #13
On Mon, Jan 4, 2016 at 3:02 PM, Borislav Petkov <bp@alien8.de> wrote:
> On Mon, Jan 04, 2016 at 02:29:09PM -0800, Andy Lutomirski wrote:
>> Josh will argue with you if he sees that :)
>
> Except Josh doesn't need allyesconfigs. tinyconfig's __ex_table is 2K.

If we do the make-it-bigger approach, we get a really nice
simplification.  Screw the whole 'class' idea -- just store an offset
to a handler.

bool extable_handler_default(struct pt_regs *regs, unsigned int fault,
unsigned long error_code, unsigned long info)
{
    if (fault == X86_TRAP_MC)
        return false;

    ...
}

bool extable_handler_mc_copy(struct pt_regs *regs, unsigned int fault,
unsigned long error_code, unsigned long info);
bool extable_handler_getput_ex(struct pt_regs *regs, unsigned int
fault, unsigned long error_code, unsigned long info);

and then shove ".long extable_handler_whatever - ." into the extable entry.

Major bonus points to whoever can figure out how to make
extable_handler_iret work -- the current implementation of that is a
real turd.  (Hint: it's not clear to me that it's even possible
without preserving at least part of the asm special case.)

--Andy
Borislav Petkov Jan. 5, 2016, 11:20 a.m. UTC | #14
On Mon, Jan 04, 2016 at 03:25:58PM -0800, Andy Lutomirski wrote:
> On Mon, Jan 4, 2016 at 3:02 PM, Borislav Petkov <bp@alien8.de> wrote:
> > On Mon, Jan 04, 2016 at 02:29:09PM -0800, Andy Lutomirski wrote:
> >> Josh will argue with you if he sees that :)
> >
> > Except Josh doesn't need allyesconfigs. tinyconfig's __ex_table is 2K.
> 
> If we do the make-it-bigger approach, we get a really nice
> simplification.  Screw the whole 'class' idea -- just store an offset
> to a handler.
> 
> bool extable_handler_default(struct pt_regs *regs, unsigned int fault,
> unsigned long error_code, unsigned long info)
> {
>     if (fault == X86_TRAP_MC)
>         return false;
> 
>     ...
> }
> 
> bool extable_handler_mc_copy(struct pt_regs *regs, unsigned int fault,
> unsigned long error_code, unsigned long info);
> bool extable_handler_getput_ex(struct pt_regs *regs, unsigned int
> fault, unsigned long error_code, unsigned long info);
> 
> and then shove ".long extable_handler_whatever - ." into the extable entry.

And to make it even cooler and more generic, you can make the exception
table entry look like this:

{ <offset to fault address>, <offset to handler>, <offset to an opaque pointer> }

and that opaque pointer would be a void * to some struct we pass to
that handler and filled with stuff it needs. For starters, it would
contain the return address where the fixup wants us to go.

The struct will have to be statically allocated but ok, that's fine.

And this way you can do all the sophisticated handling you desire.

> Major bonus points to whoever can figure out how to make
> extable_handler_iret work -- the current implementation of that is a
> real turd.  (Hint: it's not clear to me that it's even possible
> without preserving at least part of the asm special case.)

What's extable_handler_iret? IRET-ting from an exception? Where do we do
that?
diff mbox

Patch

diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 189679aba703..b64121ffb2da 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -43,19 +43,47 @@ 
 #define _ASM_DI		__ASM_REG(di)
 
 /* Exception table entry */
-#ifdef __ASSEMBLY__
-# define _ASM_EXTABLE(from,to)					\
-	.pushsection "__ex_table","a" ;				\
-	.balign 8 ;						\
-	.long (from) - . ;					\
-	.long (to) - . ;					\
-	.popsection
 
-# define _ASM_EXTABLE_EX(from,to)				\
-	.pushsection "__ex_table","a" ;				\
-	.balign 8 ;						\
-	.long (from) - . ;					\
-	.long (to) - . + 0x7ffffff0 ;				\
+/*
+ * An exception table entry is 64 bits.  The first 32 bits are the offset
+ * from that entry to the potentially faulting instruction.  sortextable
+ * relies on that exact encoding.  The second 32 bits encode the fault
+ * handler address.
+ *
+ * We want to stick two extra bits of handler class into the fault handler
+ * address.  All of these are generated by relocations, so we can only
+ * rely on addition.  We therefore emit:
+ *
+ * (target - here) + (class) + 0x20000000
+ *
+ * This has the property that the two high bits are the class and the
+ * rest is easy to decode.
+ */
+
+/* There are two bits of extable entry class, added to a signed offset. */
+#define _EXTABLE_CLASS_DEFAULT	0		/* standard uaccess fixup */
+#define _EXTABLE_CLASS_EX	0x80000000	/* uaccess + set uaccess_err */
+
+/*
+ * The biases are the class constants + 0x20000000, as signed integers.
+ * This can't use ordinary arithmetic -- the assembler isn't that smart.
+ */
+#define _EXTABLE_BIAS_DEFAULT	0x20000000
+#define _EXTABLE_BIAS_EX	0x20000000 - 0x80000000
+
+#define _ASM_EXTABLE(from,to)						\
+	_ASM_EXTABLE_CLASS(from, to, _EXTABLE_BIAS_DEFAULT)
+
+#define _ASM_EXTABLE_EX(from,to)					\
+	_ASM_EXTABLE_CLASS(from, to, _EXTABLE_BIAS_EX)
+
+#ifdef __ASSEMBLY__
+# define _EXPAND_EXTABLE_BIAS(x) x
+# define _ASM_EXTABLE_CLASS(from,to,bias)				\
+	.pushsection "__ex_table","a" ;					\
+	.balign 8 ;							\
+	.long (from) - . ;						\
+	.long (to) - . + _EXPAND_EXTABLE_BIAS(bias) ;			\
 	.popsection
 
 # define _ASM_NOKPROBE(entry)					\
@@ -89,18 +117,12 @@ 
 	.endm
 
 #else
-# define _ASM_EXTABLE(from,to)					\
-	" .pushsection \"__ex_table\",\"a\"\n"			\
-	" .balign 8\n"						\
-	" .long (" #from ") - .\n"				\
-	" .long (" #to ") - .\n"				\
-	" .popsection\n"
-
-# define _ASM_EXTABLE_EX(from,to)				\
-	" .pushsection \"__ex_table\",\"a\"\n"			\
-	" .balign 8\n"						\
-	" .long (" #from ") - .\n"				\
-	" .long (" #to ") - . + 0x7ffffff0\n"			\
+# define _EXPAND_EXTABLE_BIAS(x) #x
+# define _ASM_EXTABLE_CLASS(from,to,bias)				\
+	" .pushsection \"__ex_table\",\"a\"\n"				\
+	" .balign 8\n"							\
+	" .long (" #from ") - .\n"					\
+	" .long (" #to ") - . + " _EXPAND_EXTABLE_BIAS(bias) "\n"	\
 	" .popsection\n"
 /* For C file, we already have NOKPROBE_SYMBOL macro */
 #endif
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 903ec1e9c326..95e2ede71206 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -8,16 +8,24 @@  ex_insn_addr(const struct exception_table_entry *x)
 {
 	return (unsigned long)&x->insn + x->insn;
 }
+static inline unsigned int
+ex_class(const struct exception_table_entry *x)
+{
+	return (unsigned int)x->fixup & 0xC0000000;
+}
+
 static inline unsigned long
 ex_fixup_addr(const struct exception_table_entry *x)
 {
-	return (unsigned long)&x->fixup + x->fixup;
+	long offset = (long)((u32)x->fixup & 0x3fffffff) - (long)0x20000000;
+	return (unsigned long)&x->fixup + offset;
 }
 
 int fixup_exception(struct pt_regs *regs)
 {
 	const struct exception_table_entry *fixup;
 	unsigned long new_ip;
+	unsigned int class;
 
 #ifdef CONFIG_PNPBIOS
 	if (unlikely(SEGMENT_IS_PNP_CODE(regs->cs))) {
@@ -35,12 +43,12 @@  int fixup_exception(struct pt_regs *regs)
 
 	fixup = search_exception_tables(regs->ip);
 	if (fixup) {
+		class = ex_class(fixup);
 		new_ip = ex_fixup_addr(fixup);
 
-		if (fixup->fixup - fixup->insn >= 0x7ffffff0 - 4) {
+		if (class == _EXTABLE_CLASS_EX) {
 			/* Special hack for uaccess_err */
 			current_thread_info()->uaccess_err = 1;
-			new_ip -= 0x7ffffff0;
 		}
 		regs->ip = new_ip;
 		return 1;
@@ -53,18 +61,15 @@  int fixup_exception(struct pt_regs *regs)
 int __init early_fixup_exception(unsigned long *ip)
 {
 	const struct exception_table_entry *fixup;
-	unsigned long new_ip;
 
 	fixup = search_exception_tables(*ip);
 	if (fixup) {
-		new_ip = ex_fixup_addr(fixup);
-
-		if (fixup->fixup - fixup->insn >= 0x7ffffff0 - 4) {
+		if (ex_class(fixup) == _EXTABLE_CLASS_EX) {
 			/* uaccess handling not supported during early boot */
 			return 0;
 		}
 
-		*ip = new_ip;
+		*ip = ex_fixup_addr(fixup);
 		return 1;
 	}