diff mbox series

[v2,5/8] tools/hvmloader: Retrieve (x2)APIC IDs from the APs themselves

Message ID b2d4109cd30c82e0af153d36f8dce77c59f03695.1715102098.git.alejandro.vallejo@cloud.com (mailing list archive)
State Superseded
Headers show
Series x86: Expose consistent topology to guests | expand

Commit Message

Alejandro Vallejo May 8, 2024, 12:39 p.m. UTC
Make it so the APs expose their own APIC IDs in a LUT. We can use that LUT to
populate the MADT, decoupling the algorithm that relates CPU IDs and APIC IDs
from hvmloader.

While at this also remove ap_callin, as writing the APIC ID may serve the same
purpose.

Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
---
v2:
  * New patch. Replaces adding cpu policy to hvmloader in v1.
---
 tools/firmware/hvmloader/config.h    |  6 ++++-
 tools/firmware/hvmloader/hvmloader.c |  4 +--
 tools/firmware/hvmloader/smp.c       | 40 +++++++++++++++++++++++-----
 tools/firmware/hvmloader/util.h      |  5 ++++
 xen/arch/x86/include/asm/hvm/hvm.h   |  1 +
 5 files changed, 47 insertions(+), 9 deletions(-)

Comments

Roger Pau Monne May 23, 2024, 4:13 p.m. UTC | #1
On Wed, May 08, 2024 at 01:39:24PM +0100, Alejandro Vallejo wrote:
> Make it so the APs expose their own APIC IDs in a LUT. We can use that LUT to
> populate the MADT, decoupling the algorithm that relates CPU IDs and APIC IDs
> from hvmloader.
> 
> While at this also remove ap_callin, as writing the APIC ID may serve the same
> purpose.
> 
> Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
> ---
> v2:
>   * New patch. Replaces adding cpu policy to hvmloader in v1.
> ---
>  tools/firmware/hvmloader/config.h    |  6 ++++-
>  tools/firmware/hvmloader/hvmloader.c |  4 +--
>  tools/firmware/hvmloader/smp.c       | 40 +++++++++++++++++++++++-----
>  tools/firmware/hvmloader/util.h      |  5 ++++
>  xen/arch/x86/include/asm/hvm/hvm.h   |  1 +
>  5 files changed, 47 insertions(+), 9 deletions(-)
> 
> diff --git a/tools/firmware/hvmloader/config.h b/tools/firmware/hvmloader/config.h
> index c82adf6dc508..edf6fa9c908c 100644
> --- a/tools/firmware/hvmloader/config.h
> +++ b/tools/firmware/hvmloader/config.h
> @@ -4,6 +4,8 @@
>  #include <stdint.h>
>  #include <stdbool.h>
>  
> +#include <xen/hvm/hvm_info_table.h>
> +
>  enum virtual_vga { VGA_none, VGA_std, VGA_cirrus, VGA_pt };
>  extern enum virtual_vga virtual_vga;
>  
> @@ -49,8 +51,10 @@ extern uint8_t ioapic_version;
>  
>  #define IOAPIC_ID           0x01
>  
> +extern uint32_t CPU_TO_X2APICID[HVM_MAX_VCPUS];
> +
>  #define LAPIC_BASE_ADDRESS  0xfee00000
> -#define LAPIC_ID(vcpu_id)   ((vcpu_id) * 2)
> +#define LAPIC_ID(vcpu_id)   (CPU_TO_X2APICID[(vcpu_id)])
>  
>  #define PCI_ISA_DEVFN       0x08    /* dev 1, fn 0 */
>  #define PCI_ISA_IRQ_MASK    0x0c20U /* ISA IRQs 5,10,11 are PCI connected */
> diff --git a/tools/firmware/hvmloader/hvmloader.c b/tools/firmware/hvmloader/hvmloader.c
> index c58841e5b556..1eba92229925 100644
> --- a/tools/firmware/hvmloader/hvmloader.c
> +++ b/tools/firmware/hvmloader/hvmloader.c
> @@ -342,11 +342,11 @@ int main(void)
>  
>      printf("CPU speed is %u MHz\n", get_cpu_mhz());
>  
> +    smp_initialise();
> +
>      apic_setup();
>      pci_setup();
>  
> -    smp_initialise();
> -
>      perform_tests();
>  
>      if ( bios->bios_info_setup )
> diff --git a/tools/firmware/hvmloader/smp.c b/tools/firmware/hvmloader/smp.c
> index a668f15d7e1f..4d75f239c2f5 100644
> --- a/tools/firmware/hvmloader/smp.c
> +++ b/tools/firmware/hvmloader/smp.c
> @@ -29,7 +29,34 @@
>  
>  #include <xen/vcpu.h>
>  
> -static int ap_callin, ap_cpuid;
> +static int ap_cpuid;
> +
> +/**
> + * Lookup table of x2APIC IDs.
> + *
> + * Each entry is populated its respective CPU as they come online. This is required
> + * for generating the MADT with minimal assumptions about ID relationships.
> + */
> +uint32_t CPU_TO_X2APICID[HVM_MAX_VCPUS];
> +
> +static uint32_t read_apic_id(void)
> +{
> +    uint32_t apic_id;
> +
> +    cpuid(1, NULL, &apic_id, NULL, NULL);
> +    apic_id >>= 24;
> +
> +    /*
> +     * APIC IDs over 255 are represented by 255 in leaf 1 and are meant to be
> +     * read from topology leaves instead. Xen exposes x2APIC IDs in leaf 0xb,
> +     * but only if the x2APIC feature is present. If there are that many CPUs
> +     * it's guaranteed to be there so we can avoid checking for it specifically
> +     */

Maybe I'm missing something, but given the current code won't Xen just
return the low 8 bits from the x2APIC ID?  I don't see any code in
guest_cpuid() that adjusts the IDs to be 255 when > 255.

> +    if ( apic_id == 255 )
> +        cpuid(0xb, NULL, NULL, NULL, &apic_id);

Won't the correct logic be to check if x2APIC is set in CPUID, and
then fetch the APIC ID from leaf 0xb, otherwise fallback to fetching
the APID ID from leaf 1?

> +
> +    return apic_id;
> +}
>  
>  static void ap_start(void)
>  {
> @@ -37,12 +64,12 @@ static void ap_start(void)
>      cacheattr_init();
>      printf("done.\n");
>  
> +    wmb();
> +    ACCESS_ONCE(CPU_TO_X2APICID[ap_cpuid]) = read_apic_id();

A comment would be helpful here, that CPU_TO_X2APICID[ap_cpuid] is
used as synchronization that the AP has started.

You probably want to assert that read_apic_id() doesn't return 0,
otherwise we are skewed.

> +
>      if ( !ap_cpuid )
>          return;
>  
> -    wmb();
> -    ap_callin = 1;
> -
>      while ( 1 )
>          asm volatile ( "hlt" );
>  }
> @@ -86,10 +113,11 @@ static void boot_cpu(unsigned int cpu)
>          BUG();
>  
>      /*
> -     * Wait for the secondary processor to complete initialisation.
> +     * Wait for the secondary processor to complete initialisation,
> +     * which is signaled by its x2APIC ID being writted to the LUT.
>       * Do not touch shared resources meanwhile.
>       */
> -    while ( !ap_callin )
> +    while ( !ACCESS_ONCE(CPU_TO_X2APICID[cpu]) )
>          cpu_relax();

As a further improvement, we could launch all APs in pararell, and use
a for loop to wait until all positions of the CPU_TO_X2APICID array
are set.

>  
>      /* Take the secondary processor offline. */
> diff --git a/tools/firmware/hvmloader/util.h b/tools/firmware/hvmloader/util.h
> index 14078bde1e30..51e9003bc615 100644
> --- a/tools/firmware/hvmloader/util.h
> +++ b/tools/firmware/hvmloader/util.h
> @@ -23,6 +23,11 @@ enum {
>  #define __STR(...) #__VA_ARGS__
>  #define STR(...) __STR(__VA_ARGS__)
>  
> +#define __ACCESS_ONCE(x) ({                             \
> +            (void)(typeof(x))0; /* Scalar typecheck. */ \
> +            (volatile typeof(x) *)&(x); })
> +#define ACCESS_ONCE(x) (*__ACCESS_ONCE(x))

Might be better for this to be placed in common-macros.h?

> +
>  /* GDT selector values. */
>  #define SEL_CODE16          0x0008
>  #define SEL_DATA16          0x0010
> diff --git a/xen/arch/x86/include/asm/hvm/hvm.h b/xen/arch/x86/include/asm/hvm/hvm.h
> index 84911f3ebcb4..6c005f0b0b38 100644
> --- a/xen/arch/x86/include/asm/hvm/hvm.h
> +++ b/xen/arch/x86/include/asm/hvm/hvm.h
> @@ -16,6 +16,7 @@
>  #include <asm/current.h>
>  #include <asm/x86_emulate.h>
>  #include <asm/hvm/asid.h>
> +#include <asm/hvm/vlapic.h>

Is this a stray change?  Otherwise I don't see why this need to be
part of this patch when the rest of the changes are exclusively to
hvmloader.

Thanks, Roger.
Roger Pau Monne May 24, 2024, 7:21 a.m. UTC | #2
On Wed, May 08, 2024 at 01:39:24PM +0100, Alejandro Vallejo wrote:
> Make it so the APs expose their own APIC IDs in a LUT. We can use that LUT to
> populate the MADT, decoupling the algorithm that relates CPU IDs and APIC IDs
> from hvmloader.
> 
> While at this also remove ap_callin, as writing the APIC ID may serve the same
> purpose.
> 
> Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
> ---
> v2:
>   * New patch. Replaces adding cpu policy to hvmloader in v1.
> ---
>  tools/firmware/hvmloader/config.h    |  6 ++++-
>  tools/firmware/hvmloader/hvmloader.c |  4 +--
>  tools/firmware/hvmloader/smp.c       | 40 +++++++++++++++++++++++-----
>  tools/firmware/hvmloader/util.h      |  5 ++++
>  xen/arch/x86/include/asm/hvm/hvm.h   |  1 +
>  5 files changed, 47 insertions(+), 9 deletions(-)
> 
> diff --git a/tools/firmware/hvmloader/config.h b/tools/firmware/hvmloader/config.h
> index c82adf6dc508..edf6fa9c908c 100644
> --- a/tools/firmware/hvmloader/config.h
> +++ b/tools/firmware/hvmloader/config.h
> @@ -4,6 +4,8 @@
>  #include <stdint.h>
>  #include <stdbool.h>
>  
> +#include <xen/hvm/hvm_info_table.h>
> +
>  enum virtual_vga { VGA_none, VGA_std, VGA_cirrus, VGA_pt };
>  extern enum virtual_vga virtual_vga;
>  
> @@ -49,8 +51,10 @@ extern uint8_t ioapic_version;
>  
>  #define IOAPIC_ID           0x01
>  
> +extern uint32_t CPU_TO_X2APICID[HVM_MAX_VCPUS];
> +
>  #define LAPIC_BASE_ADDRESS  0xfee00000
> -#define LAPIC_ID(vcpu_id)   ((vcpu_id) * 2)
> +#define LAPIC_ID(vcpu_id)   (CPU_TO_X2APICID[(vcpu_id)])
>  
>  #define PCI_ISA_DEVFN       0x08    /* dev 1, fn 0 */
>  #define PCI_ISA_IRQ_MASK    0x0c20U /* ISA IRQs 5,10,11 are PCI connected */
> diff --git a/tools/firmware/hvmloader/hvmloader.c b/tools/firmware/hvmloader/hvmloader.c
> index c58841e5b556..1eba92229925 100644
> --- a/tools/firmware/hvmloader/hvmloader.c
> +++ b/tools/firmware/hvmloader/hvmloader.c
> @@ -342,11 +342,11 @@ int main(void)
>  
>      printf("CPU speed is %u MHz\n", get_cpu_mhz());
>  
> +    smp_initialise();
> +
>      apic_setup();
>      pci_setup();
>  
> -    smp_initialise();
> -
>      perform_tests();
>  
>      if ( bios->bios_info_setup )
> diff --git a/tools/firmware/hvmloader/smp.c b/tools/firmware/hvmloader/smp.c
> index a668f15d7e1f..4d75f239c2f5 100644
> --- a/tools/firmware/hvmloader/smp.c
> +++ b/tools/firmware/hvmloader/smp.c
> @@ -29,7 +29,34 @@
>  
>  #include <xen/vcpu.h>
>  
> -static int ap_callin, ap_cpuid;
> +static int ap_cpuid;
> +
> +/**
> + * Lookup table of x2APIC IDs.
> + *
> + * Each entry is populated its respective CPU as they come online. This is required
> + * for generating the MADT with minimal assumptions about ID relationships.
> + */
> +uint32_t CPU_TO_X2APICID[HVM_MAX_VCPUS];
> +
> +static uint32_t read_apic_id(void)
> +{
> +    uint32_t apic_id;
> +
> +    cpuid(1, NULL, &apic_id, NULL, NULL);
> +    apic_id >>= 24;
> +
> +    /*
> +     * APIC IDs over 255 are represented by 255 in leaf 1 and are meant to be
> +     * read from topology leaves instead. Xen exposes x2APIC IDs in leaf 0xb,
> +     * but only if the x2APIC feature is present. If there are that many CPUs
> +     * it's guaranteed to be there so we can avoid checking for it specifically
> +     */
> +    if ( apic_id == 255 )
> +        cpuid(0xb, NULL, NULL, NULL, &apic_id);
> +
> +    return apic_id;
> +}
>  
>  static void ap_start(void)
>  {
> @@ -37,12 +64,12 @@ static void ap_start(void)
>      cacheattr_init();
>      printf("done.\n");
>  
> +    wmb();
> +    ACCESS_ONCE(CPU_TO_X2APICID[ap_cpuid]) = read_apic_id();

Further thinking about this: do we really need the wmb(), given the
usage of ACCESS_ONCE()?

wmb() is a compiler barrier, and the usage of volatile in
ACCESS_ONCE() should already prevent any compiler re-ordering.

Thanks, Roger.
Alejandro Vallejo May 24, 2024, 3:15 p.m. UTC | #3
On 24/05/2024 08:21, Roger Pau Monné wrote:
> On Wed, May 08, 2024 at 01:39:24PM +0100, Alejandro Vallejo wrote:
>> Make it so the APs expose their own APIC IDs in a LUT. We can use that LUT to
>> populate the MADT, decoupling the algorithm that relates CPU IDs and APIC IDs
>> from hvmloader.
>>
>> While at this also remove ap_callin, as writing the APIC ID may serve the same
>> purpose.
>>
>> Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
>> ---
>> v2:
>>   * New patch. Replaces adding cpu policy to hvmloader in v1.
>> ---
>>  tools/firmware/hvmloader/config.h    |  6 ++++-
>>  tools/firmware/hvmloader/hvmloader.c |  4 +--
>>  tools/firmware/hvmloader/smp.c       | 40 +++++++++++++++++++++++-----
>>  tools/firmware/hvmloader/util.h      |  5 ++++
>>  xen/arch/x86/include/asm/hvm/hvm.h   |  1 +
>>  5 files changed, 47 insertions(+), 9 deletions(-)
>>
>> diff --git a/tools/firmware/hvmloader/config.h b/tools/firmware/hvmloader/config.h
>> index c82adf6dc508..edf6fa9c908c 100644
>> --- a/tools/firmware/hvmloader/config.h
>> +++ b/tools/firmware/hvmloader/config.h
>> @@ -4,6 +4,8 @@
>>  #include <stdint.h>
>>  #include <stdbool.h>
>>  
>> +#include <xen/hvm/hvm_info_table.h>
>> +
>>  enum virtual_vga { VGA_none, VGA_std, VGA_cirrus, VGA_pt };
>>  extern enum virtual_vga virtual_vga;
>>  
>> @@ -49,8 +51,10 @@ extern uint8_t ioapic_version;
>>  
>>  #define IOAPIC_ID           0x01
>>  
>> +extern uint32_t CPU_TO_X2APICID[HVM_MAX_VCPUS];
>> +
>>  #define LAPIC_BASE_ADDRESS  0xfee00000
>> -#define LAPIC_ID(vcpu_id)   ((vcpu_id) * 2)
>> +#define LAPIC_ID(vcpu_id)   (CPU_TO_X2APICID[(vcpu_id)])
>>  
>>  #define PCI_ISA_DEVFN       0x08    /* dev 1, fn 0 */
>>  #define PCI_ISA_IRQ_MASK    0x0c20U /* ISA IRQs 5,10,11 are PCI connected */
>> diff --git a/tools/firmware/hvmloader/hvmloader.c b/tools/firmware/hvmloader/hvmloader.c
>> index c58841e5b556..1eba92229925 100644
>> --- a/tools/firmware/hvmloader/hvmloader.c
>> +++ b/tools/firmware/hvmloader/hvmloader.c
>> @@ -342,11 +342,11 @@ int main(void)
>>  
>>      printf("CPU speed is %u MHz\n", get_cpu_mhz());
>>  
>> +    smp_initialise();
>> +
>>      apic_setup();
>>      pci_setup();
>>  
>> -    smp_initialise();
>> -
>>      perform_tests();
>>  
>>      if ( bios->bios_info_setup )
>> diff --git a/tools/firmware/hvmloader/smp.c b/tools/firmware/hvmloader/smp.c
>> index a668f15d7e1f..4d75f239c2f5 100644
>> --- a/tools/firmware/hvmloader/smp.c
>> +++ b/tools/firmware/hvmloader/smp.c
>> @@ -29,7 +29,34 @@
>>  
>>  #include <xen/vcpu.h>
>>  
>> -static int ap_callin, ap_cpuid;
>> +static int ap_cpuid;
>> +
>> +/**
>> + * Lookup table of x2APIC IDs.
>> + *
>> + * Each entry is populated its respective CPU as they come online. This is required
>> + * for generating the MADT with minimal assumptions about ID relationships.
>> + */
>> +uint32_t CPU_TO_X2APICID[HVM_MAX_VCPUS];
>> +
>> +static uint32_t read_apic_id(void)
>> +{
>> +    uint32_t apic_id;
>> +
>> +    cpuid(1, NULL, &apic_id, NULL, NULL);
>> +    apic_id >>= 24;
>> +
>> +    /*
>> +     * APIC IDs over 255 are represented by 255 in leaf 1 and are meant to be
>> +     * read from topology leaves instead. Xen exposes x2APIC IDs in leaf 0xb,
>> +     * but only if the x2APIC feature is present. If there are that many CPUs
>> +     * it's guaranteed to be there so we can avoid checking for it specifically
>> +     */
>> +    if ( apic_id == 255 )
>> +        cpuid(0xb, NULL, NULL, NULL, &apic_id);
>> +
>> +    return apic_id;
>> +}
>>  
>>  static void ap_start(void)
>>  {
>> @@ -37,12 +64,12 @@ static void ap_start(void)
>>      cacheattr_init();
>>      printf("done.\n");
>>  
>> +    wmb();
>> +    ACCESS_ONCE(CPU_TO_X2APICID[ap_cpuid]) = read_apic_id();
> 
> Further thinking about this: do we really need the wmb(), given the
> usage of ACCESS_ONCE()?
> 
> wmb() is a compiler barrier, and the usage of volatile in
> ACCESS_ONCE() should already prevent any compiler re-ordering.
> 
> Thanks, Roger.

volatile reads/writes cannot be reordered with other volatile
reads/writes, but volatile reads/writes can be reordered with
non-volatile reads/writes, AFAIR.

My intent here was to prevent the compiler omitting the write (though in
practice it didn't). I think the barrier is still required to prevent
reordering according to the spec.

Cheers,
Alejandro
Alejandro Vallejo May 24, 2024, 3:16 p.m. UTC | #4
On 23/05/2024 17:13, Roger Pau Monné wrote:
> On Wed, May 08, 2024 at 01:39:24PM +0100, Alejandro Vallejo wrote:
>> Make it so the APs expose their own APIC IDs in a LUT. We can use that LUT to
>> populate the MADT, decoupling the algorithm that relates CPU IDs and APIC IDs
>> from hvmloader.
>>
>> While at this also remove ap_callin, as writing the APIC ID may serve the same
>> purpose.
>>
>> Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
>> ---
>> v2:
>>   * New patch. Replaces adding cpu policy to hvmloader in v1.
>> ---
>>  tools/firmware/hvmloader/config.h    |  6 ++++-
>>  tools/firmware/hvmloader/hvmloader.c |  4 +--
>>  tools/firmware/hvmloader/smp.c       | 40 +++++++++++++++++++++++-----
>>  tools/firmware/hvmloader/util.h      |  5 ++++
>>  xen/arch/x86/include/asm/hvm/hvm.h   |  1 +
>>  5 files changed, 47 insertions(+), 9 deletions(-)
>>
>> diff --git a/tools/firmware/hvmloader/config.h b/tools/firmware/hvmloader/config.h
>> index c82adf6dc508..edf6fa9c908c 100644
>> --- a/tools/firmware/hvmloader/config.h
>> +++ b/tools/firmware/hvmloader/config.h
>> @@ -4,6 +4,8 @@
>>  #include <stdint.h>
>>  #include <stdbool.h>
>>  
>> +#include <xen/hvm/hvm_info_table.h>
>> +
>>  enum virtual_vga { VGA_none, VGA_std, VGA_cirrus, VGA_pt };
>>  extern enum virtual_vga virtual_vga;
>>  
>> @@ -49,8 +51,10 @@ extern uint8_t ioapic_version;
>>  
>>  #define IOAPIC_ID           0x01
>>  
>> +extern uint32_t CPU_TO_X2APICID[HVM_MAX_VCPUS];
>> +
>>  #define LAPIC_BASE_ADDRESS  0xfee00000
>> -#define LAPIC_ID(vcpu_id)   ((vcpu_id) * 2)
>> +#define LAPIC_ID(vcpu_id)   (CPU_TO_X2APICID[(vcpu_id)])
>>  
>>  #define PCI_ISA_DEVFN       0x08    /* dev 1, fn 0 */
>>  #define PCI_ISA_IRQ_MASK    0x0c20U /* ISA IRQs 5,10,11 are PCI connected */
>> diff --git a/tools/firmware/hvmloader/hvmloader.c b/tools/firmware/hvmloader/hvmloader.c
>> index c58841e5b556..1eba92229925 100644
>> --- a/tools/firmware/hvmloader/hvmloader.c
>> +++ b/tools/firmware/hvmloader/hvmloader.c
>> @@ -342,11 +342,11 @@ int main(void)
>>  
>>      printf("CPU speed is %u MHz\n", get_cpu_mhz());
>>  
>> +    smp_initialise();
>> +
>>      apic_setup();
>>      pci_setup();
>>  
>> -    smp_initialise();
>> -
>>      perform_tests();
>>  
>>      if ( bios->bios_info_setup )
>> diff --git a/tools/firmware/hvmloader/smp.c b/tools/firmware/hvmloader/smp.c
>> index a668f15d7e1f..4d75f239c2f5 100644
>> --- a/tools/firmware/hvmloader/smp.c
>> +++ b/tools/firmware/hvmloader/smp.c
>> @@ -29,7 +29,34 @@
>>  
>>  #include <xen/vcpu.h>
>>  
>> -static int ap_callin, ap_cpuid;
>> +static int ap_cpuid;
>> +
>> +/**
>> + * Lookup table of x2APIC IDs.
>> + *
>> + * Each entry is populated its respective CPU as they come online. This is required
>> + * for generating the MADT with minimal assumptions about ID relationships.
>> + */
>> +uint32_t CPU_TO_X2APICID[HVM_MAX_VCPUS];
>> +
>> +static uint32_t read_apic_id(void)
>> +{
>> +    uint32_t apic_id;
>> +
>> +    cpuid(1, NULL, &apic_id, NULL, NULL);
>> +    apic_id >>= 24;
>> +
>> +    /*
>> +     * APIC IDs over 255 are represented by 255 in leaf 1 and are meant to be
>> +     * read from topology leaves instead. Xen exposes x2APIC IDs in leaf 0xb,
>> +     * but only if the x2APIC feature is present. If there are that many CPUs
>> +     * it's guaranteed to be there so we can avoid checking for it specifically
>> +     */
> 
> Maybe I'm missing something, but given the current code won't Xen just
> return the low 8 bits from the x2APIC ID?  I don't see any code in
> guest_cpuid() that adjusts the IDs to be 255 when > 255.>
>> +    if ( apic_id == 255 )
>> +        cpuid(0xb, NULL, NULL, NULL, &apic_id);
> 
> Won't the correct logic be to check if x2APIC is set in CPUID, and
> then fetch the APIC ID from leaf 0xb, otherwise fallback to fetching
> the APID ID from leaf 1?

I was pretty sure that was the behaviour of real HW, but clearly I was
wrong. Just checked on a beefy machine and that's indeed the low 8 bits,
just as Xen emulates. Got confused with the core count, that does clip
to 255.

Will adjust by explicitly checking for the x2apic_id bit.

> 
>> +
>> +    return apic_id;
>> +}
>>  
>>  static void ap_start(void)
>>  {
>> @@ -37,12 +64,12 @@ static void ap_start(void)
>>      cacheattr_init();
>>      printf("done.\n");
>>  
>> +    wmb();
>> +    ACCESS_ONCE(CPU_TO_X2APICID[ap_cpuid]) = read_apic_id();
> 
> A comment would be helpful here, that CPU_TO_X2APICID[ap_cpuid] is
> used as synchronization that the AP has started.
> 
> You probably want to assert that read_apic_id() doesn't return 0,
> otherwise we are skewed.

Not a bad idea. Sure

> 
>> +
>>      if ( !ap_cpuid )
>>          return;
>>  
>> -    wmb();
>> -    ap_callin = 1;
>> -
>>      while ( 1 )
>>          asm volatile ( "hlt" );
>>  }
>> @@ -86,10 +113,11 @@ static void boot_cpu(unsigned int cpu)
>>          BUG();
>>  
>>      /*
>> -     * Wait for the secondary processor to complete initialisation.
>> +     * Wait for the secondary processor to complete initialisation,
>> +     * which is signaled by its x2APIC ID being writted to the LUT.
>>       * Do not touch shared resources meanwhile.
>>       */
>> -    while ( !ap_callin )
>> +    while ( !ACCESS_ONCE(CPU_TO_X2APICID[cpu]) )
>>          cpu_relax();
> 
> As a further improvement, we could launch all APs in pararell, and use
> a for loop to wait until all positions of the CPU_TO_X2APICID array
> are set.

I thought about it, but then we'd need locking for the prints as well,
or refactor things so only the BSP prints on success.

The time taken is truly negligible, so I reckon it's better left for
another patch.

> 
>>  
>>      /* Take the secondary processor offline. */
>> diff --git a/tools/firmware/hvmloader/util.h b/tools/firmware/hvmloader/util.h
>> index 14078bde1e30..51e9003bc615 100644
>> --- a/tools/firmware/hvmloader/util.h
>> +++ b/tools/firmware/hvmloader/util.h
>> @@ -23,6 +23,11 @@ enum {
>>  #define __STR(...) #__VA_ARGS__
>>  #define STR(...) __STR(__VA_ARGS__)
>>  
>> +#define __ACCESS_ONCE(x) ({                             \
>> +            (void)(typeof(x))0; /* Scalar typecheck. */ \
>> +            (volatile typeof(x) *)&(x); })
>> +#define ACCESS_ONCE(x) (*__ACCESS_ONCE(x))
> 
> Might be better for this to be placed in common-macros.h?

Sure.

> 
>> +
>>  /* GDT selector values. */
>>  #define SEL_CODE16          0x0008
>>  #define SEL_DATA16          0x0010
>> diff --git a/xen/arch/x86/include/asm/hvm/hvm.h b/xen/arch/x86/include/asm/hvm/hvm.h
>> index 84911f3ebcb4..6c005f0b0b38 100644
>> --- a/xen/arch/x86/include/asm/hvm/hvm.h
>> +++ b/xen/arch/x86/include/asm/hvm/hvm.h
>> @@ -16,6 +16,7 @@
>>  #include <asm/current.h>
>>  #include <asm/x86_emulate.h>
>>  #include <asm/hvm/asid.h>
>> +#include <asm/hvm/vlapic.h>
> 
> Is this a stray change?  Otherwise I don't see why this need to be
> part of this patch when the rest of the changes are exclusively to
> hvmloader.
> 
> Thanks, Roger.

Should've been part of the vlapic_policy_update change. That line got
lost in the v1->v2 conversion. I just moved to where it logically
belongs now.

Cheers,
Alejandro
Roger Pau Monne May 27, 2024, 8:52 a.m. UTC | #5
On Fri, May 24, 2024 at 04:15:34PM +0100, Alejandro Vallejo wrote:
> On 24/05/2024 08:21, Roger Pau Monné wrote:
> > On Wed, May 08, 2024 at 01:39:24PM +0100, Alejandro Vallejo wrote:
> >> Make it so the APs expose their own APIC IDs in a LUT. We can use that LUT to
> >> populate the MADT, decoupling the algorithm that relates CPU IDs and APIC IDs
> >> from hvmloader.
> >>
> >> While at this also remove ap_callin, as writing the APIC ID may serve the same
> >> purpose.
> >>
> >> Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
> >> ---
> >> v2:
> >>   * New patch. Replaces adding cpu policy to hvmloader in v1.
> >> ---
> >>  tools/firmware/hvmloader/config.h    |  6 ++++-
> >>  tools/firmware/hvmloader/hvmloader.c |  4 +--
> >>  tools/firmware/hvmloader/smp.c       | 40 +++++++++++++++++++++++-----
> >>  tools/firmware/hvmloader/util.h      |  5 ++++
> >>  xen/arch/x86/include/asm/hvm/hvm.h   |  1 +
> >>  5 files changed, 47 insertions(+), 9 deletions(-)
> >>
> >> diff --git a/tools/firmware/hvmloader/config.h b/tools/firmware/hvmloader/config.h
> >> index c82adf6dc508..edf6fa9c908c 100644
> >> --- a/tools/firmware/hvmloader/config.h
> >> +++ b/tools/firmware/hvmloader/config.h
> >> @@ -4,6 +4,8 @@
> >>  #include <stdint.h>
> >>  #include <stdbool.h>
> >>  
> >> +#include <xen/hvm/hvm_info_table.h>
> >> +
> >>  enum virtual_vga { VGA_none, VGA_std, VGA_cirrus, VGA_pt };
> >>  extern enum virtual_vga virtual_vga;
> >>  
> >> @@ -49,8 +51,10 @@ extern uint8_t ioapic_version;
> >>  
> >>  #define IOAPIC_ID           0x01
> >>  
> >> +extern uint32_t CPU_TO_X2APICID[HVM_MAX_VCPUS];
> >> +
> >>  #define LAPIC_BASE_ADDRESS  0xfee00000
> >> -#define LAPIC_ID(vcpu_id)   ((vcpu_id) * 2)
> >> +#define LAPIC_ID(vcpu_id)   (CPU_TO_X2APICID[(vcpu_id)])
> >>  
> >>  #define PCI_ISA_DEVFN       0x08    /* dev 1, fn 0 */
> >>  #define PCI_ISA_IRQ_MASK    0x0c20U /* ISA IRQs 5,10,11 are PCI connected */
> >> diff --git a/tools/firmware/hvmloader/hvmloader.c b/tools/firmware/hvmloader/hvmloader.c
> >> index c58841e5b556..1eba92229925 100644
> >> --- a/tools/firmware/hvmloader/hvmloader.c
> >> +++ b/tools/firmware/hvmloader/hvmloader.c
> >> @@ -342,11 +342,11 @@ int main(void)
> >>  
> >>      printf("CPU speed is %u MHz\n", get_cpu_mhz());
> >>  
> >> +    smp_initialise();
> >> +
> >>      apic_setup();
> >>      pci_setup();
> >>  
> >> -    smp_initialise();
> >> -
> >>      perform_tests();
> >>  
> >>      if ( bios->bios_info_setup )
> >> diff --git a/tools/firmware/hvmloader/smp.c b/tools/firmware/hvmloader/smp.c
> >> index a668f15d7e1f..4d75f239c2f5 100644
> >> --- a/tools/firmware/hvmloader/smp.c
> >> +++ b/tools/firmware/hvmloader/smp.c
> >> @@ -29,7 +29,34 @@
> >>  
> >>  #include <xen/vcpu.h>
> >>  
> >> -static int ap_callin, ap_cpuid;
> >> +static int ap_cpuid;
> >> +
> >> +/**
> >> + * Lookup table of x2APIC IDs.
> >> + *
> >> + * Each entry is populated its respective CPU as they come online. This is required
> >> + * for generating the MADT with minimal assumptions about ID relationships.
> >> + */
> >> +uint32_t CPU_TO_X2APICID[HVM_MAX_VCPUS];
> >> +
> >> +static uint32_t read_apic_id(void)
> >> +{
> >> +    uint32_t apic_id;
> >> +
> >> +    cpuid(1, NULL, &apic_id, NULL, NULL);
> >> +    apic_id >>= 24;
> >> +
> >> +    /*
> >> +     * APIC IDs over 255 are represented by 255 in leaf 1 and are meant to be
> >> +     * read from topology leaves instead. Xen exposes x2APIC IDs in leaf 0xb,
> >> +     * but only if the x2APIC feature is present. If there are that many CPUs
> >> +     * it's guaranteed to be there so we can avoid checking for it specifically
> >> +     */
> >> +    if ( apic_id == 255 )
> >> +        cpuid(0xb, NULL, NULL, NULL, &apic_id);
> >> +
> >> +    return apic_id;
> >> +}
> >>  
> >>  static void ap_start(void)
> >>  {
> >> @@ -37,12 +64,12 @@ static void ap_start(void)
> >>      cacheattr_init();
> >>      printf("done.\n");
> >>  
> >> +    wmb();
> >> +    ACCESS_ONCE(CPU_TO_X2APICID[ap_cpuid]) = read_apic_id();
> > 
> > Further thinking about this: do we really need the wmb(), given the
> > usage of ACCESS_ONCE()?
> > 
> > wmb() is a compiler barrier, and the usage of volatile in
> > ACCESS_ONCE() should already prevent any compiler re-ordering.
> > 
> > Thanks, Roger.
> 
> volatile reads/writes cannot be reordered with other volatile
> reads/writes, but volatile reads/writes can be reordered with
> non-volatile reads/writes, AFAIR.

Right, I've read the C99 spec and it does indeed only guarantee the
state of volatile objects to be as expected at sequence points.  I
think however this specific code would still be fine since the
function calls are considered to have side-effects, and those must all
be completed before the volatile access.

> My intent here was to prevent the compiler omitting the write (though in
> practice it didn't). I think the barrier is still required to prevent
> reordering according to the spec.

Yes, my understating is that you added the ACCESS_ONCE() to possibly
prevent the compiler from re-ordering the CPU_TO_X2APICID[ap_cpuid]) =
read_apic_id() after the 'hlt' loop?

AFAICT the expressions in the `for` statement are considered sequence
points, and the calling `read_apic_id()` could have side-effects, so
the compiler won't be able to elide or move the setting of
CPU_TO_X2APICID[] due to all side-effects of previous evaluations must
be complete at sequence points.

I'm fine with leaving the wmb() + ACCESS_ONCE().

Thanks, Roger.
Roger Pau Monne May 27, 2024, 8:55 a.m. UTC | #6
On Fri, May 24, 2024 at 04:16:01PM +0100, Alejandro Vallejo wrote:
> On 23/05/2024 17:13, Roger Pau Monné wrote:
> > On Wed, May 08, 2024 at 01:39:24PM +0100, Alejandro Vallejo wrote:
> >> @@ -86,10 +113,11 @@ static void boot_cpu(unsigned int cpu)
> >>          BUG();
> >>  
> >>      /*
> >> -     * Wait for the secondary processor to complete initialisation.
> >> +     * Wait for the secondary processor to complete initialisation,
> >> +     * which is signaled by its x2APIC ID being writted to the LUT.
> >>       * Do not touch shared resources meanwhile.
> >>       */
> >> -    while ( !ap_callin )
> >> +    while ( !ACCESS_ONCE(CPU_TO_X2APICID[cpu]) )
> >>          cpu_relax();
> > 
> > As a further improvement, we could launch all APs in pararell, and use
> > a for loop to wait until all positions of the CPU_TO_X2APICID array
> > are set.
> 
> I thought about it, but then we'd need locking for the prints as well,
> or refactor things so only the BSP prints on success.

Hm, I see, yes, we would likely need to refactor the printing a bit so
each AP only prints one line, and then add locking around the calls in
`cpu_setup()`.

> The time taken is truly negligible, so I reckon it's better left for
> another patch.

Oh, indeed, sorry if I made it look it should be part of this patch,
that wasn't my intention.  Just something that might be worth looking
into doing in the future.

Thanks, Roger.
Alejandro Vallejo May 28, 2024, 12:35 p.m. UTC | #7
On 27/05/2024 09:52, Roger Pau Monné wrote:
>> My intent here was to prevent the compiler omitting the write (though in
>> practice it didn't). I think the barrier is still required to prevent
>> reordering according to the spec.
> 
> Yes, my understating is that you added the ACCESS_ONCE() to possibly
> prevent the compiler from re-ordering the CPU_TO_X2APICID[ap_cpuid]) =
> read_apic_id() after the 'hlt' loop?

Yes, but not only that. Also preventing the compiler from eliding the
write altogether on the basis that it's not read after within the
function. I have suffered that particular behaviour on older versions of
GCC and it stung very much.

> 
> AFAICT the expressions in the `for` statement are considered sequence
> points, and the calling `read_apic_id()` could have side-effects, so
> the compiler won't be able to elide or move the setting of
> CPU_TO_X2APICID[] due to all side-effects of previous evaluations must
> be complete at sequence points.
> > I'm fine with leaving the wmb() + ACCESS_ONCE().
> 
> Thanks, Roger.

Cheers,
Alejandro
diff mbox series

Patch

diff --git a/tools/firmware/hvmloader/config.h b/tools/firmware/hvmloader/config.h
index c82adf6dc508..edf6fa9c908c 100644
--- a/tools/firmware/hvmloader/config.h
+++ b/tools/firmware/hvmloader/config.h
@@ -4,6 +4,8 @@ 
 #include <stdint.h>
 #include <stdbool.h>
 
+#include <xen/hvm/hvm_info_table.h>
+
 enum virtual_vga { VGA_none, VGA_std, VGA_cirrus, VGA_pt };
 extern enum virtual_vga virtual_vga;
 
@@ -49,8 +51,10 @@  extern uint8_t ioapic_version;
 
 #define IOAPIC_ID           0x01
 
+extern uint32_t CPU_TO_X2APICID[HVM_MAX_VCPUS];
+
 #define LAPIC_BASE_ADDRESS  0xfee00000
-#define LAPIC_ID(vcpu_id)   ((vcpu_id) * 2)
+#define LAPIC_ID(vcpu_id)   (CPU_TO_X2APICID[(vcpu_id)])
 
 #define PCI_ISA_DEVFN       0x08    /* dev 1, fn 0 */
 #define PCI_ISA_IRQ_MASK    0x0c20U /* ISA IRQs 5,10,11 are PCI connected */
diff --git a/tools/firmware/hvmloader/hvmloader.c b/tools/firmware/hvmloader/hvmloader.c
index c58841e5b556..1eba92229925 100644
--- a/tools/firmware/hvmloader/hvmloader.c
+++ b/tools/firmware/hvmloader/hvmloader.c
@@ -342,11 +342,11 @@  int main(void)
 
     printf("CPU speed is %u MHz\n", get_cpu_mhz());
 
+    smp_initialise();
+
     apic_setup();
     pci_setup();
 
-    smp_initialise();
-
     perform_tests();
 
     if ( bios->bios_info_setup )
diff --git a/tools/firmware/hvmloader/smp.c b/tools/firmware/hvmloader/smp.c
index a668f15d7e1f..4d75f239c2f5 100644
--- a/tools/firmware/hvmloader/smp.c
+++ b/tools/firmware/hvmloader/smp.c
@@ -29,7 +29,34 @@ 
 
 #include <xen/vcpu.h>
 
-static int ap_callin, ap_cpuid;
+static int ap_cpuid;
+
+/**
+ * Lookup table of x2APIC IDs.
+ *
+ * Each entry is populated its respective CPU as they come online. This is required
+ * for generating the MADT with minimal assumptions about ID relationships.
+ */
+uint32_t CPU_TO_X2APICID[HVM_MAX_VCPUS];
+
+static uint32_t read_apic_id(void)
+{
+    uint32_t apic_id;
+
+    cpuid(1, NULL, &apic_id, NULL, NULL);
+    apic_id >>= 24;
+
+    /*
+     * APIC IDs over 255 are represented by 255 in leaf 1 and are meant to be
+     * read from topology leaves instead. Xen exposes x2APIC IDs in leaf 0xb,
+     * but only if the x2APIC feature is present. If there are that many CPUs
+     * it's guaranteed to be there so we can avoid checking for it specifically
+     */
+    if ( apic_id == 255 )
+        cpuid(0xb, NULL, NULL, NULL, &apic_id);
+
+    return apic_id;
+}
 
 static void ap_start(void)
 {
@@ -37,12 +64,12 @@  static void ap_start(void)
     cacheattr_init();
     printf("done.\n");
 
+    wmb();
+    ACCESS_ONCE(CPU_TO_X2APICID[ap_cpuid]) = read_apic_id();
+
     if ( !ap_cpuid )
         return;
 
-    wmb();
-    ap_callin = 1;
-
     while ( 1 )
         asm volatile ( "hlt" );
 }
@@ -86,10 +113,11 @@  static void boot_cpu(unsigned int cpu)
         BUG();
 
     /*
-     * Wait for the secondary processor to complete initialisation.
+     * Wait for the secondary processor to complete initialisation,
+     * which is signaled by its x2APIC ID being writted to the LUT.
      * Do not touch shared resources meanwhile.
      */
-    while ( !ap_callin )
+    while ( !ACCESS_ONCE(CPU_TO_X2APICID[cpu]) )
         cpu_relax();
 
     /* Take the secondary processor offline. */
diff --git a/tools/firmware/hvmloader/util.h b/tools/firmware/hvmloader/util.h
index 14078bde1e30..51e9003bc615 100644
--- a/tools/firmware/hvmloader/util.h
+++ b/tools/firmware/hvmloader/util.h
@@ -23,6 +23,11 @@  enum {
 #define __STR(...) #__VA_ARGS__
 #define STR(...) __STR(__VA_ARGS__)
 
+#define __ACCESS_ONCE(x) ({                             \
+            (void)(typeof(x))0; /* Scalar typecheck. */ \
+            (volatile typeof(x) *)&(x); })
+#define ACCESS_ONCE(x) (*__ACCESS_ONCE(x))
+
 /* GDT selector values. */
 #define SEL_CODE16          0x0008
 #define SEL_DATA16          0x0010
diff --git a/xen/arch/x86/include/asm/hvm/hvm.h b/xen/arch/x86/include/asm/hvm/hvm.h
index 84911f3ebcb4..6c005f0b0b38 100644
--- a/xen/arch/x86/include/asm/hvm/hvm.h
+++ b/xen/arch/x86/include/asm/hvm/hvm.h
@@ -16,6 +16,7 @@ 
 #include <asm/current.h>
 #include <asm/x86_emulate.h>
 #include <asm/hvm/asid.h>
+#include <asm/hvm/vlapic.h>
 
 struct pirq; /* needed by pi_update_irte */