Message ID | 20190821163542.172063-2-dwmw2@infradead.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Clean up x86_64 boot code | expand |
On 21.08.2019 18:35, David Woodhouse wrote: > --- a/xen/arch/x86/boot/head.S > +++ b/xen/arch/x86/boot/head.S > @@ -699,14 +699,30 @@ trampoline_setup: > cmp $sym_offs(__trampoline_rel_stop),%edi > jb 1b > > - /* Patch in the trampoline segment. */ > + mov $sym_offs(__trampoline32_rel_start),%edi > +1: > + mov %fs:(%edi),%eax > + add %edx,%fs:(%edi,%eax) > + add $4,%edi > + cmp $sym_offs(__trampoline32_rel_stop),%edi > + jb 1b > + > + mov $sym_offs(__bootsym_rel_start),%edi > +1: > + mov %fs:(%edi),%eax > + add %edx,%fs:(%edi,%eax) > + add $4,%edi > + cmp $sym_offs(__bootsym_rel_stop),%edi > + jb 1b With the smaller sets now - are we risking misbehavior if one of the relocation sets ends up empty? This wasn't reasonable to expect before, but I think it would be nice to have a build-time check rather than a hard to debug crash in case this happens. > --- a/xen/arch/x86/boot/trampoline.S > +++ b/xen/arch/x86/boot/trampoline.S > @@ -16,21 +16,62 @@ > * not guaranteed to persist. > */ > > -/* NB. bootsym() is only usable in real mode, or via BOOT_PSEUDORM_DS. */ > +/* > + * There are four sets of relocations: > + * > + * bootsym(): Boot-time code relocated to low memory and run only once. > + * Only usable at boot; in real mode or via BOOT_PSEUDORM_DS. > + * bootdatasym(): Boot-time BIOS-discovered data, relocated back up to Xen > + * image after discovery. > + * trampsym(): Permanent trampoline code relocated into low memory for AP > + * startup and wakeup. > + * tramp32sym(): 32-bit trampoline code which at boot can be used directly > + * from the Xen image in memory, but which will need to be > + * relocated into low (well, into *mapped*) memory in order > + * to be used for AP startup. > + */ > #undef bootsym > #define bootsym(s) ((s)-trampoline_start) > > #define bootsym_rel(sym, off, opnd...) \ > bootsym(sym),##opnd; \ > 111:; \ > - .pushsection .trampoline_rel, "a"; \ > + .pushsection .bootsym_rel, "a"; \ > .long 111b - (off) - .; \ > .popsection > > #define bootsym_segrel(sym, off) \ > $0,$bootsym(sym); \ > 111:; \ > - .pushsection .trampoline_seg, "a"; \ > + .pushsection .bootsym_seg, "a"; \ > + .long 111b - (off) - .; \ > + .popsection > + > +#define bootdatasym(s) ((s)-trampoline_start) > +#define bootdatasym_rel(sym, off, opnd...) \ > + bootdatasym(sym),##opnd; \ > +111:; \ > + .pushsection .bootdatasym_rel, "a";\ > + .long 111b - (off) - .; \ > + .popsection > + > +#undef trampsym Why this and ... > +#define trampsym(s) ((s)-trampoline_start) > + > +#define trampsym_rel(sym, off, opnd...) \ > + trampsym(sym),##opnd; \ > +111:; \ > + .pushsection .trampsym_rel, "a"; \ > + .long 111b - (off) - .; \ > + .popsection > + > +#undef tramp32sym ... this #undef? You have none ahead of the bootdatasym #define-s, and (other than for bootsym) there's not conflicting C level one afaics. > +#define tramp32sym(s) ((s)-trampoline_start) > + > +#define tramp32sym_rel(sym, off, opnd...) \ > + tramp32sym(sym),##opnd; \ > +111:; \ > + .pushsection .tramp32sym_rel, "a"; \ > .long 111b - (off) - .; \ > .popsection After your reply to my comment regarding the redundancy here I've checked (in your git branch) how things end up. Am I mistaken, or are the trampsym and tramp32sym #define-s entirely identical (except for the relocations section name)? Even between the others there's little enough difference, so it continues to be unclear to me why you think it's better to have four instances of about the same (not entirely trivial) thing. > @@ -48,16 +89,19 @@ > GLOBAL(trampoline_realmode_entry) > mov %cs,%ax > mov %ax,%ds > - movb $0xA5,bootsym(trampoline_cpu_started) > + movb $0xA5,trampsym(trampoline_cpu_started) > cld > cli > - lidt bootsym(idt_48) > - lgdt bootsym(gdt_48) > + lidt trampsym(idt_48) > + lgdt trampsym(gdt_48) > mov $1,%bl # EBX != 0 indicates we are an AP > xor %ax, %ax > inc %ax > lmsw %ax # CR0.PE = 1 (enter protected mode) > - ljmpl $BOOT_CS32,$bootsym_rel(trampoline_protmode_entry,6) > + ljmpl $BOOT_CS32,$tramp32sym_rel(trampoline_protmode_entry,6) > + > +GLOBAL(trampoline_cpu_started) > + .byte 0 The movement of this item here seems unrelated to this change; it's also not mentioned in the description. > @@ -115,10 +115,10 @@ static void __init relocate_trampoline(unsigned long phys) > trampoline_ptr < __trampoline_rel_stop; > ++trampoline_ptr ) > *(u32 *)(*trampoline_ptr + (long)trampoline_ptr) += phys; > - for ( trampoline_ptr = __trampoline_seg_start; > - trampoline_ptr < __trampoline_seg_stop; > + for ( trampoline_ptr = __trampoline32_rel_start; > + trampoline_ptr < __trampoline32_rel_stop; > ++trampoline_ptr ) > - *(u16 *)(*trampoline_ptr + (long)trampoline_ptr) = phys >> 4; > + *(u32 *)(*trampoline_ptr + (long)trampoline_ptr) += phys; > } Seeing this and adding in the comment about the redundant tramp*sym macros I wonder why the relocations can't be put together in a single section, and there be just a single loop here. (I realize this entire function gets deleted from here later on, but anyway.) Jan
On Fri, 2019-08-30 at 17:10 +0200, Jan Beulich wrote: > On 21.08.2019 18:35, David Woodhouse wrote: > > --- a/xen/arch/x86/boot/head.S > > +++ b/xen/arch/x86/boot/head.S > > @@ -699,14 +699,30 @@ trampoline_setup: > > cmp $sym_offs(__trampoline_rel_stop),%edi > > jb 1b > > > > - /* Patch in the trampoline segment. */ > > + mov $sym_offs(__trampoline32_rel_start),%edi > > +1: > > + mov %fs:(%edi),%eax > > + add %edx,%fs:(%edi,%eax) > > + add $4,%edi > > + cmp $sym_offs(__trampoline32_rel_stop),%edi > > + jb 1b > > + > > + mov $sym_offs(__bootsym_rel_start),%edi > > +1: > > + mov %fs:(%edi),%eax > > + add %edx,%fs:(%edi,%eax) > > + add $4,%edi > > + cmp $sym_offs(__bootsym_rel_stop),%edi > > + jb 1b > > With the smaller sets now - are we risking misbehavior if one > of the relocation sets ends up empty? This wasn't reasonable to > expect before, but I think it would be nice to have a build-time > check rather than a hard to debug crash in case this happens. Or just code it differently as a while() instead of a do{}while() so that it actually copes with a zero-length section. > > --- a/xen/arch/x86/boot/trampoline.S > > +++ b/xen/arch/x86/boot/trampoline.S > > @@ -16,21 +16,62 @@ > > * not guaranteed to persist. > > */ > > > > -/* NB. bootsym() is only usable in real mode, or via BOOT_PSEUDORM_DS. */ > > +/* > > + * There are four sets of relocations: > > + * > > + * bootsym(): Boot-time code relocated to low memory and run only once. > > + * Only usable at boot; in real mode or via BOOT_PSEUDORM_DS. > > + * bootdatasym(): Boot-time BIOS-discovered data, relocated back up to Xen > > + * image after discovery. > > + * trampsym(): Permanent trampoline code relocated into low memory for AP > > + * startup and wakeup. > > + * tramp32sym(): 32-bit trampoline code which at boot can be used directly > > + * from the Xen image in memory, but which will need to be > > + * relocated into low (well, into *mapped*) memory in order > > + * to be used for AP startup. > > + */ > > #undef bootsym > > #define bootsym(s) ((s)-trampoline_start) > > > > #define bootsym_rel(sym, off, opnd...) \ > > bootsym(sym),##opnd; \ > > 111:; \ > > - .pushsection .trampoline_rel, "a"; \ > > + .pushsection .bootsym_rel, "a"; \ > > .long 111b - (off) - .; \ > > .popsection > > > > #define bootsym_segrel(sym, off) \ > > $0,$bootsym(sym); \ > > 111:; \ > > - .pushsection .trampoline_seg, "a"; \ > > + .pushsection .bootsym_seg, "a"; \ > > + .long 111b - (off) - .; \ > > + .popsection > > + > > +#define bootdatasym(s) ((s)-trampoline_start) > > +#define bootdatasym_rel(sym, off, opnd...) \ > > + bootdatasym(sym),##opnd; \ > > +111:; \ > > + .pushsection .bootdatasym_rel, "a";\ > > + .long 111b - (off) - .; \ > > + .popsection > > + > > +#undef trampsym > > Why this and ... > > > +#define trampsym(s) ((s)-trampoline_start) > > + > > +#define trampsym_rel(sym, off, opnd...) \ > > + trampsym(sym),##opnd; \ > > +111:; \ > > + .pushsection .trampsym_rel, "a"; \ > > + .long 111b - (off) - .; \ > > + .popsection > > + > > +#undef tramp32sym > > ... this #undef? You have none ahead of the bootdatasym #define-s, > and (other than for bootsym) there's not conflicting C level one > afaics. > > > +#define tramp32sym(s) ((s)-trampoline_start) > > + > > +#define tramp32sym_rel(sym, off, opnd...) \ > > + tramp32sym(sym),##opnd; \ > > +111:; \ > > + .pushsection .tramp32sym_rel, "a"; \ > > .long 111b - (off) - .; \ > > .popsection > > After your reply to my comment regarding the redundancy here I've > checked (in your git branch) how things end up. Am I mistaken, or > are the trampsym and tramp32sym #define-s entirely identical > (except for the relocations section name)? Even between the others > there's little enough difference, so it continues to be unclear to > me why you think it's better to have four instances of about the > same (not entirely trivial) thing. The distinction is that in a no-real-mode boot tramp32 is used in place in the Xen image at the physical address it happened to be loaded at, and then *again* later in the AP/wakeup path. In the latter case it needs to be moved to low memory (or we need to put the physical location into idle_pg_table which seemed to be harder, as discussed). So tramp32 symbols get relocated *twice*, while the plain tramp symbols don't, but actually we could probably ditch the distinction and treat them all the same, which would reduce the four categories to three. I'll take a look. I suppose we could also combine bootsym and bootdatasym, and copy that *whole* section back up to the Xen image; both code and data. But I'm inclined to prefer keeping them separate and only copying the data back up. > > @@ -48,16 +89,19 @@ > > GLOBAL(trampoline_realmode_entry) > > mov %cs,%ax > > mov %ax,%ds > > - movb $0xA5,bootsym(trampoline_cpu_started) > > + movb $0xA5,trampsym(trampoline_cpu_started) > > cld > > cli > > - lidt bootsym(idt_48) > > - lgdt bootsym(gdt_48) > > + lidt trampsym(idt_48) > > + lgdt trampsym(gdt_48) > > mov $1,%bl # EBX != 0 indicates we are an AP > > xor %ax, %ax > > inc %ax > > lmsw %ax # CR0.PE = 1 (enter protected mode) > > - ljmpl $BOOT_CS32,$bootsym_rel(trampoline_protmode_entry,6) > > + ljmpl $BOOT_CS32,$tramp32sym_rel(trampoline_protmode_entry,6) > > + > > +GLOBAL(trampoline_cpu_started) > > + .byte 0 > > The movement of this item here seems unrelated to this change; it's > also not mentioned in the description. Andy's already moved that elsewhere anyway; I'll undo that as I rebase. > > @@ -115,10 +115,10 @@ static void __init relocate_trampoline(unsigned long phys) > > trampoline_ptr < __trampoline_rel_stop; > > ++trampoline_ptr ) > > *(u32 *)(*trampoline_ptr + (long)trampoline_ptr) += phys; > > - for ( trampoline_ptr = __trampoline_seg_start; > > - trampoline_ptr < __trampoline_seg_stop; > > + for ( trampoline_ptr = __trampoline32_rel_start; > > + trampoline_ptr < __trampoline32_rel_stop; > > ++trampoline_ptr ) > > - *(u16 *)(*trampoline_ptr + (long)trampoline_ptr) = phys >> 4; > > + *(u32 *)(*trampoline_ptr + (long)trampoline_ptr) += phys; > > } > > Seeing this and adding in the comment about the redundant tramp*sym > macros I wonder why the relocations can't be put together in a single > section, and there be just a single loop here. (I realize this > entire function gets deleted from here later on, but anyway.) Yeah, I think it's worth the harmless double-relocation in the non-EFI case to treat everything (well tramp vs. tramp32) the same there. I'll do that. Thanks.
On 30.08.2019 18:12, David Woodhouse wrote: > On Fri, 2019-08-30 at 17:10 +0200, Jan Beulich wrote: >> On 21.08.2019 18:35, David Woodhouse wrote: >>> --- a/xen/arch/x86/boot/trampoline.S >>> +++ b/xen/arch/x86/boot/trampoline.S >>> @@ -16,21 +16,62 @@ >>> * not guaranteed to persist. >>> */ >>> >>> -/* NB. bootsym() is only usable in real mode, or via BOOT_PSEUDORM_DS. */ >>> +/* >>> + * There are four sets of relocations: >>> + * >>> + * bootsym(): Boot-time code relocated to low memory and run only once. >>> + * Only usable at boot; in real mode or via BOOT_PSEUDORM_DS. >>> + * bootdatasym(): Boot-time BIOS-discovered data, relocated back up to Xen >>> + * image after discovery. >>> + * trampsym(): Permanent trampoline code relocated into low memory for AP >>> + * startup and wakeup. >>> + * tramp32sym(): 32-bit trampoline code which at boot can be used directly >>> + * from the Xen image in memory, but which will need to be >>> + * relocated into low (well, into *mapped*) memory in order >>> + * to be used for AP startup. >>> + */ >>> #undef bootsym >>> #define bootsym(s) ((s)-trampoline_start) >>> >>> #define bootsym_rel(sym, off, opnd...) \ >>> bootsym(sym),##opnd; \ >>> 111:; \ >>> - .pushsection .trampoline_rel, "a"; \ >>> + .pushsection .bootsym_rel, "a"; \ >>> .long 111b - (off) - .; \ >>> .popsection >>> >>> #define bootsym_segrel(sym, off) \ >>> $0,$bootsym(sym); \ >>> 111:; \ >>> - .pushsection .trampoline_seg, "a"; \ >>> + .pushsection .bootsym_seg, "a"; \ >>> + .long 111b - (off) - .; \ >>> + .popsection >>> + >>> +#define bootdatasym(s) ((s)-trampoline_start) >>> +#define bootdatasym_rel(sym, off, opnd...) \ >>> + bootdatasym(sym),##opnd; \ >>> +111:; \ >>> + .pushsection .bootdatasym_rel, "a";\ >>> + .long 111b - (off) - .; \ >>> + .popsection >>> + >>> +#undef trampsym >>> +#define trampsym(s) ((s)-trampoline_start) >>> + >>> +#define trampsym_rel(sym, off, opnd...) \ >>> + trampsym(sym),##opnd; \ >>> +111:; \ >>> + .pushsection .trampsym_rel, "a"; \ >>> + .long 111b - (off) - .; \ >>> + .popsection >>> + >>> +#undef tramp32sym >>> +#define tramp32sym(s) ((s)-trampoline_start) >>> + >>> +#define tramp32sym_rel(sym, off, opnd...) \ >>> + tramp32sym(sym),##opnd; \ >>> +111:; \ >>> + .pushsection .tramp32sym_rel, "a"; \ >>> .long 111b - (off) - .; \ >>> .popsection >> >> After your reply to my comment regarding the redundancy here I've >> checked (in your git branch) how things end up. Am I mistaken, or >> are the trampsym and tramp32sym #define-s entirely identical >> (except for the relocations section name)? Even between the others >> there's little enough difference, so it continues to be unclear to >> me why you think it's better to have four instances of about the >> same (not entirely trivial) thing. > > The distinction is that in a no-real-mode boot tramp32 is used in place > in the Xen image at the physical address it happened to be loaded at, > and then *again* later in the AP/wakeup path. In the latter case it > needs to be moved to low memory (or we need to put the physical > location into idle_pg_table which seemed to be harder, as discussed). > > So tramp32 symbols get relocated *twice*, while the plain tramp symbols > don't, but actually we could probably ditch the distinction and treat > them all the same, which would reduce the four categories to three. > > I'll take a look. > > I suppose we could also combine bootsym and bootdatasym, and copy that > *whole* section back up to the Xen image; both code and data. But I'm > inclined to prefer keeping them separate and only copying the data back > up. My remark here was and is not so much about reducing the number of instances of separate reloc macros/sections, but about reducing the redundancy in their definition. At the very least this part 111:; \ .pushsection .bootdatasym_rel, "a";\ .long 111b - (off) - .; \ .popsection is identical between all of them, except for the section name, and hence I'd prefer it to be spelled out just once, and the "actual" macros then simply using the resulting (helper) macro. Jan
diff --git a/xen/arch/x86/boot/edd.S b/xen/arch/x86/boot/edd.S index 3df712bce1..434bbbd960 100644 --- a/xen/arch/x86/boot/edd.S +++ b/xen/arch/x86/boot/edd.S @@ -41,7 +41,7 @@ get_edd: # This code is sensitive to the size of the structs in edd.h edd_start: /* ds:si points at fn48 results. Fn41 results go immediately before. */ - movw $bootsym(boot_edd_info)+EDDEXTSIZE, %si + movw $bootdatasym(boot_edd_info)+EDDEXTSIZE, %si movb $0x80, %dl # BIOS device 0x80 edd_check_ext: @@ -56,7 +56,7 @@ edd_check_ext: movb %dl, %ds:-8(%si) # store device number movb %ah, %ds:-7(%si) # store version movw %cx, %ds:-6(%si) # store extensions - incb bootsym(boot_edd_info_nr) # note that we stored something + incb bootdatasym(boot_edd_info_nr) # note that we stored something edd_get_device_params: movw $EDDPARMSIZE, %ds:(%si) # put size @@ -97,7 +97,7 @@ edd_legacy_done: edd_next: incb %dl # increment to next device jz edd_done - cmpb $EDD_INFO_MAX,bootsym(boot_edd_info_nr) + cmpb $EDD_INFO_MAX,bootdatasym(boot_edd_info_nr) jb edd_check_ext edd_done: @@ -108,11 +108,11 @@ edd_done: .Ledd_mbr_sig_start: pushw %es movb $0x80, %dl # from device 80 - movw $bootsym(boot_mbr_signature), %bx # store buffer ptr in bx + movw $bootdatasym(boot_mbr_signature), %bx # store buffer ptr in bx .Ledd_mbr_sig_read: pushw %bx - movw $bootsym(boot_edd_info), %bx - movzbw bootsym(boot_edd_info_nr), %cx + movw $bootdatasym(boot_edd_info), %bx + movzbw bootdatasym(boot_edd_info_nr), %cx jcxz .Ledd_mbr_sig_default .Ledd_mbr_sig_find_info: cmpb %dl, (%bx) @@ -151,12 +151,12 @@ edd_done: jne .Ledd_mbr_sig_next movb %dl, (%bx) # store BIOS drive number movl %ecx, 4(%bx) # store signature from MBR - incb bootsym(boot_mbr_signature_nr) # note that we stored something + incb bootdatasym(boot_mbr_signature_nr) # note that we stored something addw $8, %bx # increment sig buffer ptr .Ledd_mbr_sig_next: incb %dl # increment to next device jz .Ledd_mbr_sig_done - cmpb $EDD_MBR_SIG_MAX, bootsym(boot_mbr_signature_nr) + cmpb $EDD_MBR_SIG_MAX, bootdatasym(boot_mbr_signature_nr) jb .Ledd_mbr_sig_read .Ledd_mbr_sig_done: popw %es diff --git a/xen/arch/x86/boot/head.S b/xen/arch/x86/boot/head.S index d303379083..e19b31fb85 100644 --- a/xen/arch/x86/boot/head.S +++ b/xen/arch/x86/boot/head.S @@ -699,14 +699,30 @@ trampoline_setup: cmp $sym_offs(__trampoline_rel_stop),%edi jb 1b - /* Patch in the trampoline segment. */ + mov $sym_offs(__trampoline32_rel_start),%edi +1: + mov %fs:(%edi),%eax + add %edx,%fs:(%edi,%eax) + add $4,%edi + cmp $sym_offs(__trampoline32_rel_stop),%edi + jb 1b + + mov $sym_offs(__bootsym_rel_start),%edi +1: + mov %fs:(%edi),%eax + add %edx,%fs:(%edi,%eax) + add $4,%edi + cmp $sym_offs(__bootsym_rel_stop),%edi + jb 1b + + /* Patch in the boot trampoline segment. */ shr $4,%edx - mov $sym_offs(__trampoline_seg_start),%edi + mov $sym_offs(__bootsym_seg_start),%edi 1: mov %fs:(%edi),%eax mov %dx,%fs:(%edi,%eax) add $4,%edi - cmp $sym_offs(__trampoline_seg_stop),%edi + cmp $sym_offs(__bootsym_seg_stop),%edi jb 1b /* Do not parse command line on EFI platform here. */ diff --git a/xen/arch/x86/boot/mem.S b/xen/arch/x86/boot/mem.S index 5b9ab5c1de..c5bc774325 100644 --- a/xen/arch/x86/boot/mem.S +++ b/xen/arch/x86/boot/mem.S @@ -7,7 +7,7 @@ get_memory_map: .Lmeme820: xorl %ebx, %ebx # continuation counter - movw $bootsym(bios_e820map), %di # point into the whitelist + movw $bootdatasym(bios_e820map), %di # point into the whitelist # so we can have the bios # directly write into it. @@ -22,8 +22,8 @@ get_memory_map: cmpl $SMAP,%eax # check the return is `SMAP' jne .Lmem88 - incw bootsym(bios_e820nr) - cmpw $E820_BIOS_MAX, bootsym(bios_e820nr) # up to this many entries + incw bootdatasym(bios_e820nr) + cmpw $E820_BIOS_MAX, bootdatasym(bios_e820nr) # up to this many entries jae .Lmem88 movw %di,%ax @@ -35,7 +35,7 @@ get_memory_map: .Lmem88: movb $0x88, %ah int $0x15 - movw %ax,bootsym(highmem_kb) + movw %ax,bootdatasym(highmem_kb) .Lmeme801: stc # fix to work around buggy @@ -58,11 +58,11 @@ get_memory_map: shll $6,%edx # and go from 64k to 1k chunks movzwl %cx, %ecx addl %ecx, %edx # add in lower memory - movl %edx,bootsym(highmem_kb) # store extended memory size + movl %edx,bootdatasym(highmem_kb) # store extended memory size .Lint12: int $0x12 - movw %ax,bootsym(lowmem_kb) + movw %ax,bootdatasym(lowmem_kb) ret diff --git a/xen/arch/x86/boot/trampoline.S b/xen/arch/x86/boot/trampoline.S index 429a088b19..8537aeb917 100644 --- a/xen/arch/x86/boot/trampoline.S +++ b/xen/arch/x86/boot/trampoline.S @@ -16,21 +16,62 @@ * not guaranteed to persist. */ -/* NB. bootsym() is only usable in real mode, or via BOOT_PSEUDORM_DS. */ +/* + * There are four sets of relocations: + * + * bootsym(): Boot-time code relocated to low memory and run only once. + * Only usable at boot; in real mode or via BOOT_PSEUDORM_DS. + * bootdatasym(): Boot-time BIOS-discovered data, relocated back up to Xen + * image after discovery. + * trampsym(): Permanent trampoline code relocated into low memory for AP + * startup and wakeup. + * tramp32sym(): 32-bit trampoline code which at boot can be used directly + * from the Xen image in memory, but which will need to be + * relocated into low (well, into *mapped*) memory in order + * to be used for AP startup. + */ #undef bootsym #define bootsym(s) ((s)-trampoline_start) #define bootsym_rel(sym, off, opnd...) \ bootsym(sym),##opnd; \ 111:; \ - .pushsection .trampoline_rel, "a"; \ + .pushsection .bootsym_rel, "a"; \ .long 111b - (off) - .; \ .popsection #define bootsym_segrel(sym, off) \ $0,$bootsym(sym); \ 111:; \ - .pushsection .trampoline_seg, "a"; \ + .pushsection .bootsym_seg, "a"; \ + .long 111b - (off) - .; \ + .popsection + +#define bootdatasym(s) ((s)-trampoline_start) +#define bootdatasym_rel(sym, off, opnd...) \ + bootdatasym(sym),##opnd; \ +111:; \ + .pushsection .bootdatasym_rel, "a";\ + .long 111b - (off) - .; \ + .popsection + +#undef trampsym +#define trampsym(s) ((s)-trampoline_start) + +#define trampsym_rel(sym, off, opnd...) \ + trampsym(sym),##opnd; \ +111:; \ + .pushsection .trampsym_rel, "a"; \ + .long 111b - (off) - .; \ + .popsection + +#undef tramp32sym +#define tramp32sym(s) ((s)-trampoline_start) + +#define tramp32sym_rel(sym, off, opnd...) \ + tramp32sym(sym),##opnd; \ +111:; \ + .pushsection .tramp32sym_rel, "a"; \ .long 111b - (off) - .; \ .popsection @@ -48,16 +89,19 @@ GLOBAL(trampoline_realmode_entry) mov %cs,%ax mov %ax,%ds - movb $0xA5,bootsym(trampoline_cpu_started) + movb $0xA5,trampsym(trampoline_cpu_started) cld cli - lidt bootsym(idt_48) - lgdt bootsym(gdt_48) + lidt trampsym(idt_48) + lgdt trampsym(gdt_48) mov $1,%bl # EBX != 0 indicates we are an AP xor %ax, %ax inc %ax lmsw %ax # CR0.PE = 1 (enter protected mode) - ljmpl $BOOT_CS32,$bootsym_rel(trampoline_protmode_entry,6) + ljmpl $BOOT_CS32,$tramp32sym_rel(trampoline_protmode_entry,6) + +GLOBAL(trampoline_cpu_started) + .byte 0 trampoline_gdt: /* 0x0000: unused */ @@ -79,8 +123,12 @@ trampoline_gdt: * address is computed at runtime. */ .quad 0x00c0920000000fff - - .pushsection .trampoline_rel, "a" + /* + * BOOT_PSEUDORM_CS and BOOT_PSEUDORM_DS are usable only at boot time, + * and their base addresses must reference the low location to which + * the boot-time code was loaded. Hence bootsym. + */ + .pushsection .bootsym_rel, "a" .long trampoline_gdt + BOOT_PSEUDORM_CS + 2 - . .long trampoline_gdt + BOOT_PSEUDORM_DS + 2 - . .popsection @@ -94,9 +142,6 @@ GLOBAL(cpuid_ext_features) GLOBAL(trampoline_xen_phys_start) .long 0 -GLOBAL(trampoline_cpu_started) - .byte 0 - .code32 trampoline_protmode_entry: /* Set up a few descriptors: on entry only CS is guaranteed good. */ @@ -113,12 +158,12 @@ trampoline_protmode_entry: /* Load pagetable base register. */ mov $sym_offs(idle_pg_table),%eax - add bootsym_rel(trampoline_xen_phys_start,4,%eax) + add tramp32sym_rel(trampoline_xen_phys_start,4,%eax) mov %eax,%cr3 /* Adjust IA32_MISC_ENABLE if needed (for NX enabling below). */ - mov bootsym_rel(trampoline_misc_enable_off,4,%esi) - mov bootsym_rel(trampoline_misc_enable_off+4,4,%edi) + mov tramp32sym_rel(trampoline_misc_enable_off,4,%esi) + mov tramp32sym_rel(trampoline_misc_enable_off+4,4,%edi) mov %esi,%eax or %edi,%eax jz 1f @@ -132,7 +177,7 @@ trampoline_protmode_entry: 1: /* Set up EFER (Extended Feature Enable Register). */ - mov bootsym_rel(cpuid_ext_features,4,%edi) + mov tramp32sym_rel(cpuid_ext_features,4,%edi) movl $MSR_EFER,%ecx rdmsr or $EFER_LME|EFER_SCE,%eax /* Long Mode + SYSCALL/SYSRET */ @@ -148,7 +193,7 @@ trampoline_protmode_entry: 1: /* Now in compatibility mode. Long-jump into 64-bit mode. */ - ljmp $BOOT_CS64,$bootsym_rel(start64,6) + ljmp $BOOT_CS64,$tramp32sym_rel(start64,6) .code64 start64: @@ -183,7 +228,7 @@ start64: idt_48: .word 0, 0, 0 # base = limit = 0 .word 0 gdt_48: .word 6*8-1 - .long bootsym_rel(trampoline_gdt,4) + .long tramp32sym_rel(trampoline_gdt,4) /* The first page of trampoline is permanent, the rest boot-time only. */ /* Reuse the boot trampoline on the 1st trampoline page as stack for wakeup. */ @@ -249,7 +294,7 @@ trampoline_boot_cpu_entry: mov $0x0200,%ax int $0x16 - mov %al,bootsym(kbd_shift_flags) + mov %al,bootdatasym(kbd_shift_flags) /* Disable irqs before returning to protected mode. */ cli @@ -294,7 +339,7 @@ opt_edid: .byte 0 #ifdef CONFIG_VIDEO -GLOBAL(boot_vid_mode) +boot_vid_mode: .word VIDEO_80x25 /* If we don't run at all, assume basic video mode 3 at 80x25. */ vesa_size: .word 0,0,0 /* width x depth x height */ diff --git a/xen/arch/x86/boot/video.S b/xen/arch/x86/boot/video.S index 335a51c9b5..03907e9e9a 100644 --- a/xen/arch/x86/boot/video.S +++ b/xen/arch/x86/boot/video.S @@ -45,7 +45,7 @@ #define PARAM_VESAPM_SEG 0x24 #define PARAM_VESAPM_OFF 0x26 #define PARAM_VESA_ATTRIB 0x28 -#define _param(param) bootsym(boot_vid_info)+(param) +#define _param(param) bootdatasym(boot_vid_info)+(param) video: xorw %ax, %ax movw %ax, %gs # GS is zero @@ -917,7 +917,7 @@ store_edid: cmpw $0x004f, %ax # Call failed? jne .Lno_edid - movw %bx, bootsym(boot_edid_caps) + movw %bx, bootdatasym(boot_edid_caps) cmpb $2, bootsym(opt_edid) # EDID forced on cmdline (edid=force)? je .Lforce_edid @@ -933,7 +933,7 @@ store_edid: movw $0x01, %bx movw $0x00, %cx movw $0x00, %dx - movw $bootsym(boot_edid_info), %di + movw $bootdatasym(boot_edid_info), %di int $0x10 .Lno_edid: diff --git a/xen/arch/x86/boot/wakeup.S b/xen/arch/x86/boot/wakeup.S index 090487ba78..35cf83b80e 100644 --- a/xen/arch/x86/boot/wakeup.S +++ b/xen/arch/x86/boot/wakeup.S @@ -53,7 +53,7 @@ ENTRY(wakeup_start) movw $1, %ax lmsw %ax # Turn on CR0.PE - ljmpl $BOOT_CS32, $bootsym_rel(wakeup_32, 6) + ljmpl $BOOT_CS32, $trampsym_rel(wakeup_32, 6) /* This code uses an extended set of video mode numbers. These include: * Aliases for standard modes @@ -118,11 +118,11 @@ wakeup_32: mov $BOOT_DS, %eax mov %eax, %ds mov %eax, %ss - mov $bootsym_rel(wakeup_stack, 4, %esp) + mov $trampsym_rel(wakeup_stack, 4, %esp) # check saved magic again mov $sym_offs(saved_magic),%eax - add bootsym_rel(trampoline_xen_phys_start, 4, %eax) + add trampsym_rel(trampoline_xen_phys_start, 4, %eax) mov (%eax), %eax cmp $0x9abcdef0, %eax jne bogus_saved_magic @@ -135,12 +135,12 @@ wakeup_32: /* Load pagetable base register */ mov $sym_offs(idle_pg_table),%eax - add bootsym_rel(trampoline_xen_phys_start,4,%eax) + add trampsym_rel(trampoline_xen_phys_start,4,%eax) mov %eax,%cr3 /* Reapply IA32_MISC_ENABLE modifications from early_init_intel(). */ - mov bootsym_rel(trampoline_misc_enable_off, 4, %esi) - mov bootsym_rel(trampoline_misc_enable_off + 4, 4, %edi) + mov trampsym_rel(trampoline_misc_enable_off, 4, %esi) + mov trampsym_rel(trampoline_misc_enable_off + 4, 4, %edi) mov %esi, %eax or %edi, %eax jz 1f @@ -155,7 +155,7 @@ wakeup_32: /* Will cpuid feature change after resume? */ /* Set up EFER (Extended Feature Enable Register). */ - mov bootsym_rel(cpuid_ext_features,4,%edi) + mov trampsym_rel(cpuid_ext_features,4,%edi) test $0x20100800,%edi /* SYSCALL/SYSRET, No Execute, Long Mode? */ jz .Lskip_eferw movl $MSR_EFER,%ecx @@ -177,7 +177,7 @@ wakeup_32: 1: /* Now in compatibility mode. Long-jump to 64-bit mode */ - ljmp $BOOT_CS64, $bootsym_rel(wakeup_64,6) + ljmp $BOOT_CS64, $trampsym_rel(wakeup_64,6) .code64 wakeup_64: diff --git a/xen/arch/x86/efi/efi-boot.h b/xen/arch/x86/efi/efi-boot.h index a0737f98c3..8aefd7bfb2 100644 --- a/xen/arch/x86/efi/efi-boot.h +++ b/xen/arch/x86/efi/efi-boot.h @@ -99,7 +99,7 @@ static void __init efi_arch_relocate_image(unsigned long delta) } extern const s32 __trampoline_rel_start[], __trampoline_rel_stop[]; -extern const s32 __trampoline_seg_start[], __trampoline_seg_stop[]; +extern const s32 __trampoline32_rel_start[], __trampoline32_rel_stop[]; static void __init relocate_trampoline(unsigned long phys) { @@ -115,10 +115,10 @@ static void __init relocate_trampoline(unsigned long phys) trampoline_ptr < __trampoline_rel_stop; ++trampoline_ptr ) *(u32 *)(*trampoline_ptr + (long)trampoline_ptr) += phys; - for ( trampoline_ptr = __trampoline_seg_start; - trampoline_ptr < __trampoline_seg_stop; + for ( trampoline_ptr = __trampoline32_rel_start; + trampoline_ptr < __trampoline32_rel_stop; ++trampoline_ptr ) - *(u16 *)(*trampoline_ptr + (long)trampoline_ptr) = phys >> 4; + *(u32 *)(*trampoline_ptr + (long)trampoline_ptr) += phys; } static void __init place_string(u32 *addr, const char *s) diff --git a/xen/arch/x86/xen.lds.S b/xen/arch/x86/xen.lds.S index 87fa02b9b5..abf46347ec 100644 --- a/xen/arch/x86/xen.lds.S +++ b/xen/arch/x86/xen.lds.S @@ -236,11 +236,18 @@ SECTIONS *(.init.data.rel.*) . = ALIGN(4); __trampoline_rel_start = .; - *(.trampoline_rel) + *(.trampsym_rel) __trampoline_rel_stop = .; - __trampoline_seg_start = .; - *(.trampoline_seg) - __trampoline_seg_stop = .; + __trampoline32_rel_start = .; + *(.tramp32sym_rel) + __trampoline32_rel_stop = .; + __bootsym_rel_start = .; + *(.bootsym_rel) + *(.bootdatasym_rel) + __bootsym_rel_stop = .; + __bootsym_seg_start = .; + *(.bootsym_seg) + __bootsym_seg_stop = .; /* * struct alt_inst entries. From the header (alternative.h): * "Alternative instructions for different CPU types or capabilities"