@@ -159,7 +159,6 @@ SECTIONS
__alt_instructions = .;
*(.altinstructions)
__alt_instructions_end = .;
- . = ALIGN(4);
*(.altinstr_replacement)
#endif
@@ -202,7 +202,7 @@ SECTIONS
* "Alternative instructions for different CPU types or capabilities"
* Think locking instructions on spinlocks.
*/
- . = ALIGN(8);
+ . = ALIGN(4);
__alt_instructions = .;
*(.altinstructions)
__alt_instructions_end = .;
@@ -28,6 +28,7 @@ void __init apply_alternatives_all(void);
int apply_alternatives(const struct alt_instr *start, const struct alt_instr *end);
#define ALTINSTR_ENTRY(feature) \
+ " .p2align 2\n" \
" .word 661b - .\n" /* label */ \
" .word 663f - .\n" /* new instruction */ \
" .hword " __stringify(feature) "\n" /* feature bit */ \
@@ -57,6 +58,7 @@ int apply_alternatives(const struct alt_instr *start, const struct alt_instr *en
ALTINSTR_ENTRY(feature) \
".popsection\n" \
".pushsection .altinstr_replacement, \"a\"\n" \
+ ".p2align 2\n" \
"663:\n\t" \
newinstr "\n" \
"664:\n\t" \
@@ -73,6 +75,7 @@ int apply_alternatives(const struct alt_instr *start, const struct alt_instr *en
#include <asm/asm_defns.h>
.macro altinstruction_entry orig_offset alt_offset feature orig_len alt_len
+ .p2align 2
.word \orig_offset - .
.word \alt_offset - .
.hword \feature
@@ -103,6 +106,7 @@ int apply_alternatives(const struct alt_instr *start, const struct alt_instr *en
.macro alternative_if_not cap, enable = 1
.if \enable
.pushsection .altinstructions, "a"
+ .p2align 2
altinstruction_entry 661f, 663f, \cap, 662f-661f, 664f-663f
.popsection
661:
@@ -5,6 +5,7 @@
#ifdef __ASSEMBLY__
.macro altinstruction_entry orig alt feature orig_len alt_len
+ .p2align 2
.long \orig - .
.long \alt - .
.word \feature
@@ -42,6 +43,7 @@ extern void alternative_instructions(void);
#define alt_rlen(number) e_replacement(number)"f-"b_replacement(number)"f"
#define ALTINSTR_ENTRY(feature, number) \
+ " .p2align 2\n" \
" .long 661b - .\n" /* label */ \
" .long " b_replacement(number)"f - .\n" /* new instruction */ \
" .word " __stringify(feature) "\n" /* feature bit */ \
This is very similar to 137c59b9ff3f7a214f03b52d9c00a0a02374af1f "bug/x86/arm: Align bug_frames sections." On ARM and on x86 the C and assembler macros don't include any alignment information - hence they end up being the default byte granularity. On ARM32 it is paramount that the alignment is word-size (4) otherwise if one tries to use (uint32_t*) access (such as livepatch ELF relocations) we get a Data Abort. Specifically this issue was observed on ARM32 with a cross compiler for the livepatches. Mainly the livepatches .data section size was not padded to the section alignment: ARM32 native: Contents of section .rodata: 0000 68695f66 756e6300 63686563 6b5f666e hi_func.check_fn 0010 63000000 78656e5f 65787472 615f7665 c...xen_extra_ve 0020 7273696f 6e000000 rsion... ARM32 cross compiler: Contents of section .rodata: 0000 68695f66 756e6300 63686563 6b5f666e hi_func.check_fn 0010 63000000 78656e5f 65787472 615f7665 c...xen_extra_ve 0020 7273696f 6e00 rsion. And when we loaded it the next section would be put right behind it: native: (XEN) livepatch.c:413: livepatch: xen_hello_world: Loaded .text at 00a02000 (XEN) livepatch.c:413: livepatch: xen_hello_world: Loaded .rodata at 00a04024 (XEN) livepatch.c:413: livepatch: xen_hello_world: Loaded .altinstructions at 00a0404c cross compiler: (XEN) livepatch.c:413: livepatch: xen_hello_world: Loaded .text at 00a02000 (XEN) livepatch.c:413: livepatch: xen_hello_world: Loaded .rodata at 00a04024 (XEN) livepatch.c:413: livepatch: xen_hello_world: Loaded .altinstructions at 00a0404a (See 4a vs 4c) native readelf: [ 4] .rodata PROGBITS 00000000 000164 000028 00 A 0 0 4 [ 5] .altinstructions PROGBITS 00000000 00018c 00000c 00 A 0 0 1 cross compiler readelf --sections: [ 4] .rodata PROGBITS 00000000 000164 000026 00 A 0 0 4 [ 5] .altinstructions PROGBITS 00000000 00018a 00000c 00 A 0 0 1 And as can be seen the .altinstructions have alignment of 1 which from 'man elf' is: "Values of zero and one mean no alignment is required." which means we can ignore it. Enforcing .altinstructions (and also .altinstr_replacement for completness on ARM) to have the proper alignment across all architectures and in both C and x86 makes them all the same. On x86 the bloat-o-meter detects that with this change the file shrinks: add/remove: 1/0 grow/shrink: 0/2 up/down: 156/-367 (-211) function old new delta get_page_from_gfn - 156 +156 do_mmu_update 4578 4569 -9 do_mmuext_op 5604 5246 -358 Total: Before=3170439, After=3170228, chg -0.01% But as found adding even "#Hi!\n" will casue this optimization, so the bloat-o-meter value here is useless. While on ARM 32/64: add/remove: 0/0 grow/shrink: 0/0 up/down: 0/0 (0) function old new delta Total: Before=822563, After=822563, chg +0.00% Also since the macros have the alignment coded in them there is no need to do that for the xen.lds.S anymore. Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> --- Cc: Ross Lagerwall <ross.lagerwall@citrix.com> Cc: Stefano Stabellini <sstabellini@kernel.org> Cc: Julien Grall <julien.grall@arm.com> Cc: Jan Beulich <jbeulich@suse.com> Cc: Andrew Cooper <andrew.cooper3@citrix.com> v2: - First posting. v3: - Figured out the x86 bloat-o-meter results. - Removed the .ALIGN from xen.lds.S - Removed the .p2align on .altinstr_replacement per Jan's request. - Put most of the commit description for the original issue v4: - Added one .ALIGN back on xen.lds.S (arm) - Changed the .ALIGN(8) to ALIGN(4) on xen.lds.S (x86) - Moved p2align inside of the macros (ALTINSTR_ENTRY and altinstruction_entry) --- xen/arch/arm/xen.lds.S | 1 - xen/arch/x86/xen.lds.S | 2 +- xen/include/asm-arm/alternative.h | 4 ++++ xen/include/asm-x86/alternative.h | 2 ++ 4 files changed, 7 insertions(+), 2 deletions(-)