diff mbox series

[v4,4/7] xen/riscv: introduce functionality to work with CPU info

Message ID 452e2960d1f064a2e3abcaae1ac354f48aafd24c.1723214540.git.oleksii.kurochko@gmail.com (mailing list archive)
State Superseded
Headers show
Series RISCV device tree mapping | expand

Commit Message

Oleksii Kurochko Aug. 9, 2024, 4:19 p.m. UTC
Introduce struct pcpu_info to store pCPU-related information.
Initially, it includes only processor_id, but it will be extended
to include guest CPU information and temporary variables for
saving/restoring vCPU registers.

Add set_processor_id() and get_processor_id() functions to set
and retrieve the processor_id stored in pcpu_info.

Introduce cpuid_to_hartid_map[NR_CPUS] to map Xen logical CPUs to
hart IDs (physical CPU IDs). Add auxiliary macros cpuid_to_hartid()
for convenient access to this mapping.

Define smp_processor_id() to provide accurate information,
replacing the previous "dummy" value of 0.

Initialize tp registers to point to pcpu_info[0].
Set processor_id to 0 for logical CPU 0 and store the physical CPU ID
for the current logical CPU in cpuid_to_hartid_map[].

Signed-off-by: Oleksii Kurochko <oleksii.kurochko@gmail.com>
---
Changes in V4:
 - wrap id with () inside set_processor_id().
 - code style fixes
 - update BUG_ON(id > NR_CPUS) in smp_processor_id() and drop the comment
   above BUG_ON().
 - s/__cpuid_to_hartid_map/cpuid_to_hartid_map
 - s/cpuid_to_hartid_map/cpuid_to_harti ( here cpuid_to_hartid_map is the name
   of the macros ).
 - update the commit message above the code of TP register initialization in
   start_xen().
 - s/smp_setup_processor_id/smp_setup_bootcpu_id
 - update the commit message.
 - cleanup headers which are included in <asm/processor.h>
---
Changes in V3:
 - new patch.
---
 xen/arch/riscv/Makefile                |  2 ++
 xen/arch/riscv/include/asm/processor.h | 28 ++++++++++++++++++++++++--
 xen/arch/riscv/include/asm/smp.h       | 10 +++++++++
 xen/arch/riscv/setup.c                 | 14 +++++++++++++
 xen/arch/riscv/smp.c                   |  4 ++++
 xen/arch/riscv/smpboot.c               | 12 +++++++++++
 6 files changed, 68 insertions(+), 2 deletions(-)
 create mode 100644 xen/arch/riscv/smp.c
 create mode 100644 xen/arch/riscv/smpboot.c

Comments

Jan Beulich Aug. 13, 2024, 8:54 a.m. UTC | #1
On 09.08.2024 18:19, Oleksii Kurochko wrote:
> Introduce struct pcpu_info to store pCPU-related information.
> Initially, it includes only processor_id, but it will be extended
> to include guest CPU information and temporary variables for
> saving/restoring vCPU registers.
> 
> Add set_processor_id() and get_processor_id() functions to set
> and retrieve the processor_id stored in pcpu_info.
> 
> Introduce cpuid_to_hartid_map[NR_CPUS] to map Xen logical CPUs to
> hart IDs (physical CPU IDs). Add auxiliary macros cpuid_to_hartid()
> for convenient access to this mapping.
> 
> Define smp_processor_id() to provide accurate information,
> replacing the previous "dummy" value of 0.
> 
> Initialize tp registers to point to pcpu_info[0].
> Set processor_id to 0 for logical CPU 0 and store the physical CPU ID
> for the current logical CPU in cpuid_to_hartid_map[].
> 
> Signed-off-by: Oleksii Kurochko <oleksii.kurochko@gmail.com>
> ---
> Changes in V4:
>  - wrap id with () inside set_processor_id().
>  - code style fixes
>  - update BUG_ON(id > NR_CPUS) in smp_processor_id() and drop the comment
>    above BUG_ON().
>  - s/__cpuid_to_hartid_map/cpuid_to_hartid_map
>  - s/cpuid_to_hartid_map/cpuid_to_harti ( here cpuid_to_hartid_map is the name
>    of the macros ).
>  - update the commit message above the code of TP register initialization in
>    start_xen().

I guess that's once again "comment", not "commit message"?

> --- a/xen/arch/riscv/include/asm/processor.h
> +++ b/xen/arch/riscv/include/asm/processor.h
> @@ -12,8 +12,32 @@
>  
>  #ifndef __ASSEMBLY__
>  
> -/* TODO: need to be implemeted */
> -#define smp_processor_id() 0
> +#include <xen/bug.h>
> +
> +register struct pcpu_info *tp asm ("tp");
> +
> +struct pcpu_info {
> +    unsigned int processor_id;
> +};
> +
> +/* tp points to one of these */
> +extern struct pcpu_info pcpu_info[NR_CPUS];
> +
> +#define get_processor_id()      (tp->processor_id)
> +#define set_processor_id(id)    do { \
> +    tp->processor_id = (id);         \
> +} while (0)
> +
> +static inline unsigned int smp_processor_id(void)
> +{
> +    unsigned int id;
> +
> +    id = get_processor_id();
> +
> +    BUG_ON(id > NR_CPUS);

>= ?

> --- a/xen/arch/riscv/include/asm/smp.h
> +++ b/xen/arch/riscv/include/asm/smp.h
> @@ -5,6 +5,8 @@
>  #include <xen/cpumask.h>
>  #include <xen/percpu.h>
>  
> +#define INVALID_HARTID UINT_MAX
> +
>  DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_mask);
>  DECLARE_PER_CPU(cpumask_var_t, cpu_core_mask);
>  
> @@ -14,6 +16,14 @@ DECLARE_PER_CPU(cpumask_var_t, cpu_core_mask);
>   */
>  #define park_offline_cpus false
>  
> +void smp_set_bootcpu_id(unsigned long boot_cpu_hartid);
> +
> +/*
> + * Mapping between linux logical cpu index and hartid.
> + */
> +extern unsigned long cpuid_to_hartid_map[NR_CPUS];
> +#define cpuid_to_hartid(cpu) cpuid_to_hartid_map[cpu]

How wide can hart IDs be? Wider than 32 bits? If not, why unsigned long?
If so, what about RV32 (which generally you at least try cover where
easily possible)?

Is there a reason this needs to be a separate array, rather than being
part of struct pcpu_info?

> @@ -40,6 +41,19 @@ void __init noreturn start_xen(unsigned long bootcpu_id,
>  {
>      remove_identity_mapping();
>  
> +    /*
> +     * tp register contains an address of physical cpu information.
> +     * So write physical CPU info of boot cpu to tp register
> +     * It will be used later by get_processor_id() ( look at
> +     * <asm/processor.h> ):
> +     *   #define get_processor_id()    (tp->processor_id)
> +     */
> +    asm volatile ( "mv tp, %0" : : "r"((unsigned long)&pcpu_info[0]) );

Perhaps be on the safe side and also add a memory barrier?

Perhaps be on the safe side and do this absolutely first in the function,
or even in assembly (such that initializers of future variables declared
at the top of the function end up safe, too)?

Also nit: Blank please between closing quote and opening parenthesis.
(Otoh you could omit the blank between the two colons.)

> --- /dev/null
> +++ b/xen/arch/riscv/smp.c
> @@ -0,0 +1,4 @@
> +#include <xen/smp.h>
> +
> +/* tp points to one of these per cpu */
> +struct pcpu_info pcpu_info[NR_CPUS];

And they all need setting up statically? Is there a plan to make this
dynamic (which could be recorded in a "fixme" in the comment)?

Jan
Oleksii Kurochko Aug. 14, 2024, 2:45 p.m. UTC | #2
On Tue, 2024-08-13 at 10:54 +0200, Jan Beulich wrote:
> On 09.08.2024 18:19, Oleksii Kurochko wrote:
> > Introduce struct pcpu_info to store pCPU-related information.
> > Initially, it includes only processor_id, but it will be extended
> > to include guest CPU information and temporary variables for
> > saving/restoring vCPU registers.
> > 
> > Add set_processor_id() and get_processor_id() functions to set
> > and retrieve the processor_id stored in pcpu_info.
> > 
> > Introduce cpuid_to_hartid_map[NR_CPUS] to map Xen logical CPUs to
> > hart IDs (physical CPU IDs). Add auxiliary macros cpuid_to_hartid()
> > for convenient access to this mapping.
> > 
> > Define smp_processor_id() to provide accurate information,
> > replacing the previous "dummy" value of 0.
> > 
> > Initialize tp registers to point to pcpu_info[0].
> > Set processor_id to 0 for logical CPU 0 and store the physical CPU
> > ID
> > for the current logical CPU in cpuid_to_hartid_map[].
> > 
> > Signed-off-by: Oleksii Kurochko <oleksii.kurochko@gmail.com>
> > ---
> > Changes in V4:
> >  - wrap id with () inside set_processor_id().
> >  - code style fixes
> >  - update BUG_ON(id > NR_CPUS) in smp_processor_id() and drop the
> > comment
> >    above BUG_ON().
> >  - s/__cpuid_to_hartid_map/cpuid_to_hartid_map
> >  - s/cpuid_to_hartid_map/cpuid_to_harti ( here cpuid_to_hartid_map
> > is the name
> >    of the macros ).
> >  - update the commit message above the code of TP register
> > initialization in
> >    start_xen().
> 
> I guess that's once again "comment", not "commit message"?
Yes, sorry for confusion. It should be comment.

> 
> > --- a/xen/arch/riscv/include/asm/processor.h
> > +++ b/xen/arch/riscv/include/asm/processor.h
> > @@ -12,8 +12,32 @@
> >  
> >  #ifndef __ASSEMBLY__
> >  
> > -/* TODO: need to be implemeted */
> > -#define smp_processor_id() 0
> > +#include <xen/bug.h>
> > +
> > +register struct pcpu_info *tp asm ("tp");
> > +
> > +struct pcpu_info {
> > +    unsigned int processor_id;
> > +};
> > +
> > +/* tp points to one of these */
> > +extern struct pcpu_info pcpu_info[NR_CPUS];
> > +
> > +#define get_processor_id()      (tp->processor_id)
> > +#define set_processor_id(id)    do { \
> > +    tp->processor_id = (id);         \
> > +} while (0)
> > +
> > +static inline unsigned int smp_processor_id(void)
> > +{
> > +    unsigned int id;
> > +
> > +    id = get_processor_id();
> > +
> > +    BUG_ON(id > NR_CPUS);
> 
> > = ?
> 
> > --- a/xen/arch/riscv/include/asm/smp.h
> > +++ b/xen/arch/riscv/include/asm/smp.h
> > @@ -5,6 +5,8 @@
> >  #include <xen/cpumask.h>
> >  #include <xen/percpu.h>
> >  
> > +#define INVALID_HARTID UINT_MAX
> > +
> >  DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_mask);
> >  DECLARE_PER_CPU(cpumask_var_t, cpu_core_mask);
> >  
> > @@ -14,6 +16,14 @@ DECLARE_PER_CPU(cpumask_var_t, cpu_core_mask);
> >   */
> >  #define park_offline_cpus false
> >  
> > +void smp_set_bootcpu_id(unsigned long boot_cpu_hartid);
> > +
> > +/*
> > + * Mapping between linux logical cpu index and hartid.
> > + */
> > +extern unsigned long cpuid_to_hartid_map[NR_CPUS];
> > +#define cpuid_to_hartid(cpu) cpuid_to_hartid_map[cpu]
> 
> How wide can hart IDs be? Wider than 32 bits? If not, why unsigned
> long?
According to the spec:
```
The mhartid CSR is an MXLEN-bit read-only register containing the
integer ID of the hardware thread running the code
```
where MXLEN can bit 32 and 64.

> If so, what about RV32 (which generally you at least try cover where
> easily possible)?
On RV32 MXLEN will be 32 and unsigned long will be 32-bit too.

> 
> Is there a reason this needs to be a separate array, rather than
> being
> part of struct pcpu_info?
> 
> > @@ -40,6 +41,19 @@ void __init noreturn start_xen(unsigned long
> > bootcpu_id,
> >  {
> >      remove_identity_mapping();
> >  
> > +    /*
> > +     * tp register contains an address of physical cpu
> > information.
> > +     * So write physical CPU info of boot cpu to tp register
> > +     * It will be used later by get_processor_id() ( look at
> > +     * <asm/processor.h> ):
> > +     *   #define get_processor_id()    (tp->processor_id)
> > +     */
> > +    asm volatile ( "mv tp, %0" : : "r"((unsigned
> > long)&pcpu_info[0]) );
> 
> Perhaps be on the safe side and also add a memory barrier?
Do you mean compiler barrier? ( asm volatile ( "..." :: ... : "memory"
)? )

> 
> Perhaps be on the safe side and do this absolutely first in the
> function,
> or even in assembly (such that initializers of future variables
> declared
> at the top of the function end up safe, too)?
I am not sure that I am following your part related to put this code in
assembler ( instructions in assembly code still code be re-ordered what
can affect a time when tp will be set with correct value ) and what do
you mean by "initializers of future variables declared at the top of
the function end up safe"
> 
> Also nit: Blank please between closing quote and opening parenthesis.
> (Otoh you could omit the blank between the two colons.)
> 
> > --- /dev/null
> > +++ b/xen/arch/riscv/smp.c
> > @@ -0,0 +1,4 @@
> > +#include <xen/smp.h>
> > +
> > +/* tp points to one of these per cpu */
> > +struct pcpu_info pcpu_info[NR_CPUS];
> 
> And they all need setting up statically? Is there a plan to make this
> dynamic (which could be recorded in a "fixme" in the comment)?
I didn't plan to make allocation of this array dynamic. I don't expect
that NR_CPUS will be big. I can add "fixme" but I am not really
understand what will be advantages if pcpu_info[] will be allocated
dynamically.

~ Oleksii
Jan Beulich Aug. 14, 2024, 3:22 p.m. UTC | #3
On 14.08.2024 16:45, oleksii.kurochko@gmail.com wrote:
> On Tue, 2024-08-13 at 10:54 +0200, Jan Beulich wrote:
>> On 09.08.2024 18:19, Oleksii Kurochko wrote:
>>> --- a/xen/arch/riscv/include/asm/smp.h
>>> +++ b/xen/arch/riscv/include/asm/smp.h
>>> @@ -5,6 +5,8 @@
>>>  #include <xen/cpumask.h>
>>>  #include <xen/percpu.h>
>>>  
>>> +#define INVALID_HARTID UINT_MAX
>>> +
>>>  DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_mask);
>>>  DECLARE_PER_CPU(cpumask_var_t, cpu_core_mask);
>>>  
>>> @@ -14,6 +16,14 @@ DECLARE_PER_CPU(cpumask_var_t, cpu_core_mask);
>>>   */
>>>  #define park_offline_cpus false
>>>  
>>> +void smp_set_bootcpu_id(unsigned long boot_cpu_hartid);
>>> +
>>> +/*
>>> + * Mapping between linux logical cpu index and hartid.
>>> + */
>>> +extern unsigned long cpuid_to_hartid_map[NR_CPUS];
>>> +#define cpuid_to_hartid(cpu) cpuid_to_hartid_map[cpu]
>>
>> How wide can hart IDs be? Wider than 32 bits? If not, why unsigned
>> long?
> According to the spec:
> ```
> The mhartid CSR is an MXLEN-bit read-only register containing the
> integer ID of the hardware thread running the code
> ```
> where MXLEN can bit 32 and 64.

Hmm, okay. If the full MXLEN bits can be used, then using unsigned long
is indeed the right thing here.

>>> @@ -40,6 +41,19 @@ void __init noreturn start_xen(unsigned long
>>> bootcpu_id,
>>>  {
>>>      remove_identity_mapping();
>>>  
>>> +    /*
>>> +     * tp register contains an address of physical cpu
>>> information.
>>> +     * So write physical CPU info of boot cpu to tp register
>>> +     * It will be used later by get_processor_id() ( look at
>>> +     * <asm/processor.h> ):
>>> +     *   #define get_processor_id()    (tp->processor_id)
>>> +     */
>>> +    asm volatile ( "mv tp, %0" : : "r"((unsigned
>>> long)&pcpu_info[0]) );
>>
>> Perhaps be on the safe side and also add a memory barrier?
> Do you mean compiler barrier? ( asm volatile ( "..." :: ... : "memory"
> )? )

Yes.

>> Perhaps be on the safe side and do this absolutely first in the
>> function,
>> or even in assembly (such that initializers of future variables
>> declared
>> at the top of the function end up safe, too)?
> I am not sure that I am following your part related to put this code in
> assembler ( instructions in assembly code still code be re-ordered what
> can affect a time when tp will be set with correct value )

I'm afraid I don't understand this. Instructions can be re-ordered, sure,
but later instructions are guaranteed to observe the effects on register
state that earlier instructions caused.

> and what do
> you mean by "initializers of future variables declared at the top of
> the function end up safe"

With

void start_xen()
{
    int var = func();

    asm volatile ( "mv tp, %0" :: ...);
    ...

you end up with the requirement that func() must not use anything that
depends on tp being set. In this simple example it may be easy to say
"don't use an initializer and call the function afterwards". But that is
going to be a risky game to play. Look at x86'es __start_xen(). An
insertion into such a big set of declarations may not pay attention to
tp not being set yet, when _all_ other C code may reasonably assume tp
is set.

>>> --- /dev/null
>>> +++ b/xen/arch/riscv/smp.c
>>> @@ -0,0 +1,4 @@
>>> +#include <xen/smp.h>
>>> +
>>> +/* tp points to one of these per cpu */
>>> +struct pcpu_info pcpu_info[NR_CPUS];
>>
>> And they all need setting up statically? Is there a plan to make this
>> dynamic (which could be recorded in a "fixme" in the comment)?
> I didn't plan to make allocation of this array dynamic. I don't expect
> that NR_CPUS will be big.

What is this expectation of yours based on? Other architectures permit
systems with hundreds or even thousands of CPUs; why would RISC-V be
different there?

> I can add "fixme" but I am not really
> understand what will be advantages if pcpu_info[] will be allocated
> dynamically.

Where possible it's better to avoid static allocations, of which on
some systems only a very small part may be used. Even if you put yourself
on the position that many take - memory being cheap - you then still
waste cache and TLB bandwidth. Furthermore as long as struct pcpu_info
isn't big enough (and properly aligned) for two successive array entries
to not share cache lines, you may end up playing cacheline ping-pong
when a CPU writes to its own array slot.

Jan
Oleksii Kurochko Aug. 15, 2024, 8:55 a.m. UTC | #4
On Wed, 2024-08-14 at 17:22 +0200, Jan Beulich wrote:
> On 14.08.2024 16:45, oleksii.kurochko@gmail.com wrote:
> > On Tue, 2024-08-13 at 10:54 +0200, Jan Beulich wrote:
> > > On 09.08.2024 18:19, Oleksii Kurochko wrote:
> > > > --- a/xen/arch/riscv/include/asm/smp.h
> > > > +++ b/xen/arch/riscv/include/asm/smp.h
> > > > @@ -5,6 +5,8 @@
> > > >  #include <xen/cpumask.h>
> > > >  #include <xen/percpu.h>
> > > >  
> > > > +#define INVALID_HARTID UINT_MAX
> > > > +
> > > >  DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_mask);
> > > >  DECLARE_PER_CPU(cpumask_var_t, cpu_core_mask);
> > > >  
> > > > @@ -14,6 +16,14 @@ DECLARE_PER_CPU(cpumask_var_t,
> > > > cpu_core_mask);
> > > >   */
> > > >  #define park_offline_cpus false
> > > >  
> > > > +void smp_set_bootcpu_id(unsigned long boot_cpu_hartid);
> > > > +
> > > > +/*
> > > > + * Mapping between linux logical cpu index and hartid.
> > > > + */
> > > > +extern unsigned long cpuid_to_hartid_map[NR_CPUS];
> > > > +#define cpuid_to_hartid(cpu) cpuid_to_hartid_map[cpu]
> > > 
> > > How wide can hart IDs be? Wider than 32 bits? If not, why
> > > unsigned
> > > long?
> > According to the spec:
> > ```
> > The mhartid CSR is an MXLEN-bit read-only register containing the
> > integer ID of the hardware thread running the code
> > ```
> > where MXLEN can bit 32 and 64.
> 
> Hmm, okay. If the full MXLEN bits can be used, then using unsigned
> long
> is indeed the right thing here.
> 
> > > > @@ -40,6 +41,19 @@ void __init noreturn start_xen(unsigned long
> > > > bootcpu_id,
> > > >  {
> > > >      remove_identity_mapping();
> > > >  
> > > > +    /*
> > > > +     * tp register contains an address of physical cpu
> > > > information.
> > > > +     * So write physical CPU info of boot cpu to tp register
> > > > +     * It will be used later by get_processor_id() ( look at
> > > > +     * <asm/processor.h> ):
> > > > +     *   #define get_processor_id()    (tp->processor_id)
> > > > +     */
> > > > +    asm volatile ( "mv tp, %0" : : "r"((unsigned
> > > > long)&pcpu_info[0]) );
> > > 
> > > Perhaps be on the safe side and also add a memory barrier?
> > Do you mean compiler barrier? ( asm volatile ( "..." :: ... :
> > "memory"
> > )? )
> 
> Yes.
> 
> > > Perhaps be on the safe side and do this absolutely first in the
> > > function,
> > > or even in assembly (such that initializers of future variables
> > > declared
> > > at the top of the function end up safe, too)?
> > I am not sure that I am following your part related to put this
> > code in
> > assembler ( instructions in assembly code still code be re-ordered
> > what
> > can affect a time when tp will be set with correct value )
> 
> I'm afraid I don't understand this. Instructions can be re-ordered,
> sure,
> but later instructions are guaranteed to observe the effects on
> register
> state that earlier instructions caused.
> 
> > and what do
> > you mean by "initializers of future variables declared at the top
> > of
> > the function end up safe"
> 
> With
> 
> void start_xen()
> {
>     int var = func();
> 
>     asm volatile ( "mv tp, %0" :: ...);
>     ...
> 
> you end up with the requirement that func() must not use anything
> that
> depends on tp being set. In this simple example it may be easy to say
> "don't use an initializer and call the function afterwards". But that
> is
> going to be a risky game to play. Look at x86'es __start_xen(). An
> insertion into such a big set of declarations may not pay attention
> to
> tp not being set yet, when _all_ other C code may reasonably assume
> tp
> is set.
Thanks for the clarification. I missed the possibility that someone
might accidentally use tp before it is set. In this case, I agree that
it would be better to create a setup_tp() function and call it before
start_xen().

> 
> > > > --- /dev/null
> > > > +++ b/xen/arch/riscv/smp.c
> > > > @@ -0,0 +1,4 @@
> > > > +#include <xen/smp.h>
> > > > +
> > > > +/* tp points to one of these per cpu */
> > > > +struct pcpu_info pcpu_info[NR_CPUS];
> > > 
> > > And they all need setting up statically? Is there a plan to make
> > > this
> > > dynamic (which could be recorded in a "fixme" in the comment)?
> > I didn't plan to make allocation of this array dynamic. I don't
> > expect
> > that NR_CPUS will be big.
> 
> What is this expectation of yours based on? Other architectures
> permit
> systems with hundreds or even thousands of CPUs; why would RISC-V be
> different there?
Based on available dev boards. ( what isn't really strong argument )

I checked other architectures and they are using static allocation too:
   struct cpuinfo_x86 cpu_data[NR_CPUS];
   
   u32 x86_cpu_to_apicid[NR_CPUS] __read_mostly =
   	{ [0 ... NR_CPUS-1] = BAD_APICID };
   
   ... /* Arm */
   
   struct cpuinfo_arm cpu_data[NR_CPUS];

I wanted to check to understand which one API should be used to
allocate this array dynamically. xmalloc?

And I am curious how I can use xmalloc() at this stage if page
allocator (?) should be initialized what isn't true now.
Or just to allocate pcpu_info only for boot cpu and for other then use
xmalloc()?

> 
> > I can add "fixme" but I am not really
> > understand what will be advantages if pcpu_info[] will be allocated
> > dynamically.
> 
> Where possible it's better to avoid static allocations, of which on
> some systems only a very small part may be used. Even if you put
> yourself
> on the position that many take - memory being cheap - you then still
> waste cache and TLB bandwidth. Furthermore as long as struct
> pcpu_info
> isn't big enough (and properly aligned) for two successive array
> entries
> to not share cache lines, you may end up playing cacheline ping-pong
> when a CPU writes to its own array slot.
Why the mentioned issues aren't work for dynamic memory? We still
allocating memory for sizeof(pcpu_info) * NR_CPUS and when this
allocated memory access it will go to cache, CPU/MMU still will use TLB
for address translation for this region and without a proper alignment
of pcpu_info size it still could be an issue with cache line sharing.

~ Oleksii
Jan Beulich Aug. 15, 2024, 9:02 a.m. UTC | #5
On 15.08.2024 10:55, oleksii.kurochko@gmail.com wrote:
> On Wed, 2024-08-14 at 17:22 +0200, Jan Beulich wrote:
>> On 14.08.2024 16:45, oleksii.kurochko@gmail.com wrote:
>>> On Tue, 2024-08-13 at 10:54 +0200, Jan Beulich wrote:
>>>> On 09.08.2024 18:19, Oleksii Kurochko wrote:
>>>>> --- /dev/null
>>>>> +++ b/xen/arch/riscv/smp.c
>>>>> @@ -0,0 +1,4 @@
>>>>> +#include <xen/smp.h>
>>>>> +
>>>>> +/* tp points to one of these per cpu */
>>>>> +struct pcpu_info pcpu_info[NR_CPUS];
>>>>
>>>> And they all need setting up statically? Is there a plan to make
>>>> this
>>>> dynamic (which could be recorded in a "fixme" in the comment)?
>>> I didn't plan to make allocation of this array dynamic. I don't
>>> expect
>>> that NR_CPUS will be big.
>>
>> What is this expectation of yours based on? Other architectures
>> permit
>> systems with hundreds or even thousands of CPUs; why would RISC-V be
>> different there?
> Based on available dev boards. ( what isn't really strong argument )
> 
> I checked other architectures and they are using static allocation too:
>    struct cpuinfo_x86 cpu_data[NR_CPUS];
>    
>    u32 x86_cpu_to_apicid[NR_CPUS] __read_mostly =
>    	{ [0 ... NR_CPUS-1] = BAD_APICID };
>    
>    ... /* Arm */
>    
>    struct cpuinfo_arm cpu_data[NR_CPUS];
> 
> I wanted to check to understand which one API should be used to
> allocate this array dynamically. xmalloc?

As of a few days ago xvmalloc() (or friends thereof), as long as ...

> And I am curious how I can use xmalloc() at this stage if page
> allocator (?) should be initialized what isn't true now.

... this happens late enough in the boot process. Indeed ...

> Or just to allocate pcpu_info only for boot cpu and for other then use
> xmalloc()?

... statically allocating space for the boot CPU only is another option.

>>> I can add "fixme" but I am not really
>>> understand what will be advantages if pcpu_info[] will be allocated
>>> dynamically.
>>
>> Where possible it's better to avoid static allocations, of which on
>> some systems only a very small part may be used. Even if you put
>> yourself
>> on the position that many take - memory being cheap - you then still
>> waste cache and TLB bandwidth. Furthermore as long as struct
>> pcpu_info
>> isn't big enough (and properly aligned) for two successive array
>> entries
>> to not share cache lines, you may end up playing cacheline ping-pong
>> when a CPU writes to its own array slot.
> Why the mentioned issues aren't work for dynamic memory? We still
> allocating memory for sizeof(pcpu_info) * NR_CPUS

Why NR_CPUS? At runtime you know how may CPUs the system has you're
running on. You only need to allocate as much then. Just like e.g.
dynamically allocated CPU mask variables (cpumask_var_t) deliberately
use less than NR_CPUS bits unless on really big iron.

Jan

> and when this
> allocated memory access it will go to cache, CPU/MMU still will use TLB
> for address translation for this region and without a proper alignment
> of pcpu_info size it still could be an issue with cache line sharing.
> 
> ~ Oleksii
>
Oleksii Kurochko Aug. 15, 2024, 1:29 p.m. UTC | #6
On Thu, 2024-08-15 at 11:02 +0200, Jan Beulich wrote:
> On 15.08.2024 10:55, oleksii.kurochko@gmail.com wrote:
> > On Wed, 2024-08-14 at 17:22 +0200, Jan Beulich wrote:
> > > On 14.08.2024 16:45, oleksii.kurochko@gmail.com wrote:
> > > > On Tue, 2024-08-13 at 10:54 +0200, Jan Beulich wrote:
> > > > > On 09.08.2024 18:19, Oleksii Kurochko wrote:
> > > > > > --- /dev/null
> > > > > > +++ b/xen/arch/riscv/smp.c
> > > > > > @@ -0,0 +1,4 @@
> > > > > > +#include <xen/smp.h>
> > > > > > +
> > > > > > +/* tp points to one of these per cpu */
> > > > > > +struct pcpu_info pcpu_info[NR_CPUS];
> > > > > 
> > > > > And they all need setting up statically? Is there a plan to
> > > > > make
> > > > > this
> > > > > dynamic (which could be recorded in a "fixme" in the
> > > > > comment)?
> > > > I didn't plan to make allocation of this array dynamic. I don't
> > > > expect
> > > > that NR_CPUS will be big.
> > > 
> > > What is this expectation of yours based on? Other architectures
> > > permit
> > > systems with hundreds or even thousands of CPUs; why would RISC-V
> > > be
> > > different there?
> > Based on available dev boards. ( what isn't really strong argument
> > )
> > 
> > I checked other architectures and they are using static allocation
> > too:
> >    struct cpuinfo_x86 cpu_data[NR_CPUS];
> >    
> >    u32 x86_cpu_to_apicid[NR_CPUS] __read_mostly =
> >    	{ [0 ... NR_CPUS-1] = BAD_APICID };
> >    
> >    ... /* Arm */
> >    
> >    struct cpuinfo_arm cpu_data[NR_CPUS];
> > 
> > I wanted to check to understand which one API should be used to
> > allocate this array dynamically. xmalloc?
> 
> As of a few days ago xvmalloc() (or friends thereof), as long as ...
> 
> > And I am curious how I can use xmalloc() at this stage if page
> > allocator (?) should be initialized what isn't true now.
> 
> ... this happens late enough in the boot process. Indeed ...
> 
> > Or just to allocate pcpu_info only for boot cpu and for other then
> > use
> > xmalloc()?
> 
> ... statically allocating space for the boot CPU only is another
> option.
> 
> > > > I can add "fixme" but I am not really
> > > > understand what will be advantages if pcpu_info[] will be
> > > > allocated
> > > > dynamically.
> > > 
> > > Where possible it's better to avoid static allocations, of which
> > > on
> > > some systems only a very small part may be used. Even if you put
> > > yourself
> > > on the position that many take - memory being cheap - you then
> > > still
> > > waste cache and TLB bandwidth. Furthermore as long as struct
> > > pcpu_info
> > > isn't big enough (and properly aligned) for two successive array
> > > entries
> > > to not share cache lines, you may end up playing cacheline ping-
> > > pong
> > > when a CPU writes to its own array slot.
> > Why the mentioned issues aren't work for dynamic memory? We still
> > allocating memory for sizeof(pcpu_info) * NR_CPUS
> 
> Why NR_CPUS? At runtime you know how may CPUs the system has you're
> running on. You only need to allocate as much then. Just like e.g.
> dynamically allocated CPU mask variables (cpumask_var_t) deliberately
> use less than NR_CPUS bits unless on really big iron.
I thought that NR_CPUS tells me how many CPU the system has.

The other option is to read that from DTB but then pcpu_info[] can be
allocated only after DTB will be mapped.

~ Oleksii

> 
> > and when this
> > allocated memory access it will go to cache, CPU/MMU still will use
> > TLB
> > for address translation for this region and without a proper
> > alignment
> > of pcpu_info size it still could be an issue with cache line
> > sharing.
> > 
> > ~ Oleksii
> > 
>
Jan Beulich Aug. 15, 2024, 3:24 p.m. UTC | #7
On 15.08.2024 15:29, oleksii.kurochko@gmail.com wrote:
> On Thu, 2024-08-15 at 11:02 +0200, Jan Beulich wrote:
>> On 15.08.2024 10:55, oleksii.kurochko@gmail.com wrote:
>>> On Wed, 2024-08-14 at 17:22 +0200, Jan Beulich wrote:
>>>> On 14.08.2024 16:45, oleksii.kurochko@gmail.com wrote:
>>>>> On Tue, 2024-08-13 at 10:54 +0200, Jan Beulich wrote:
>>>>>> On 09.08.2024 18:19, Oleksii Kurochko wrote:
>>>>>>> --- /dev/null
>>>>>>> +++ b/xen/arch/riscv/smp.c
>>>>>>> @@ -0,0 +1,4 @@
>>>>>>> +#include <xen/smp.h>
>>>>>>> +
>>>>>>> +/* tp points to one of these per cpu */
>>>>>>> +struct pcpu_info pcpu_info[NR_CPUS];
>>>>>>
>>>>>> And they all need setting up statically? Is there a plan to
>>>>>> make
>>>>>> this
>>>>>> dynamic (which could be recorded in a "fixme" in the
>>>>>> comment)?
>>>>> I didn't plan to make allocation of this array dynamic. I don't
>>>>> expect
>>>>> that NR_CPUS will be big.
>>>>
>>>> What is this expectation of yours based on? Other architectures
>>>> permit
>>>> systems with hundreds or even thousands of CPUs; why would RISC-V
>>>> be
>>>> different there?
>>> Based on available dev boards. ( what isn't really strong argument
>>> )
>>>
>>> I checked other architectures and they are using static allocation
>>> too:
>>>    struct cpuinfo_x86 cpu_data[NR_CPUS];
>>>    
>>>    u32 x86_cpu_to_apicid[NR_CPUS] __read_mostly =
>>>    	{ [0 ... NR_CPUS-1] = BAD_APICID };
>>>    
>>>    ... /* Arm */
>>>    
>>>    struct cpuinfo_arm cpu_data[NR_CPUS];
>>>
>>> I wanted to check to understand which one API should be used to
>>> allocate this array dynamically. xmalloc?
>>
>> As of a few days ago xvmalloc() (or friends thereof), as long as ...
>>
>>> And I am curious how I can use xmalloc() at this stage if page
>>> allocator (?) should be initialized what isn't true now.
>>
>> ... this happens late enough in the boot process. Indeed ...
>>
>>> Or just to allocate pcpu_info only for boot cpu and for other then
>>> use
>>> xmalloc()?
>>
>> ... statically allocating space for the boot CPU only is another
>> option.
>>
>>>>> I can add "fixme" but I am not really
>>>>> understand what will be advantages if pcpu_info[] will be
>>>>> allocated
>>>>> dynamically.
>>>>
>>>> Where possible it's better to avoid static allocations, of which
>>>> on
>>>> some systems only a very small part may be used. Even if you put
>>>> yourself
>>>> on the position that many take - memory being cheap - you then
>>>> still
>>>> waste cache and TLB bandwidth. Furthermore as long as struct
>>>> pcpu_info
>>>> isn't big enough (and properly aligned) for two successive array
>>>> entries
>>>> to not share cache lines, you may end up playing cacheline ping-
>>>> pong
>>>> when a CPU writes to its own array slot.
>>> Why the mentioned issues aren't work for dynamic memory? We still
>>> allocating memory for sizeof(pcpu_info) * NR_CPUS
>>
>> Why NR_CPUS? At runtime you know how may CPUs the system has you're
>> running on. You only need to allocate as much then. Just like e.g.
>> dynamically allocated CPU mask variables (cpumask_var_t) deliberately
>> use less than NR_CPUS bits unless on really big iron.
> I thought that NR_CPUS tells me how many CPU the system has.

Oh, no, that not what it says (and it really can't, being a compile time
constant) - it says how many CPUs the specific build of Xen is going to
support at most.

Jan
diff mbox series

Patch

diff --git a/xen/arch/riscv/Makefile b/xen/arch/riscv/Makefile
index 81b77b13d6..334fd24547 100644
--- a/xen/arch/riscv/Makefile
+++ b/xen/arch/riscv/Makefile
@@ -4,6 +4,8 @@  obj-y += mm.o
 obj-$(CONFIG_RISCV_64) += riscv64/
 obj-y += sbi.o
 obj-y += setup.o
+obj-y += smp.o
+obj-y += smpboot.o
 obj-y += stubs.o
 obj-y += traps.o
 obj-y += vm_event.o
diff --git a/xen/arch/riscv/include/asm/processor.h b/xen/arch/riscv/include/asm/processor.h
index 3ae164c265..fd4e9b4a37 100644
--- a/xen/arch/riscv/include/asm/processor.h
+++ b/xen/arch/riscv/include/asm/processor.h
@@ -12,8 +12,32 @@ 
 
 #ifndef __ASSEMBLY__
 
-/* TODO: need to be implemeted */
-#define smp_processor_id() 0
+#include <xen/bug.h>
+
+register struct pcpu_info *tp asm ("tp");
+
+struct pcpu_info {
+    unsigned int processor_id;
+};
+
+/* tp points to one of these */
+extern struct pcpu_info pcpu_info[NR_CPUS];
+
+#define get_processor_id()      (tp->processor_id)
+#define set_processor_id(id)    do { \
+    tp->processor_id = (id);         \
+} while (0)
+
+static inline unsigned int smp_processor_id(void)
+{
+    unsigned int id;
+
+    id = get_processor_id();
+
+    BUG_ON(id > NR_CPUS);
+
+    return id;
+}
 
 /* On stack VCPU state */
 struct cpu_user_regs
diff --git a/xen/arch/riscv/include/asm/smp.h b/xen/arch/riscv/include/asm/smp.h
index b1ea91b1eb..9f49d2bc8b 100644
--- a/xen/arch/riscv/include/asm/smp.h
+++ b/xen/arch/riscv/include/asm/smp.h
@@ -5,6 +5,8 @@ 
 #include <xen/cpumask.h>
 #include <xen/percpu.h>
 
+#define INVALID_HARTID UINT_MAX
+
 DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_mask);
 DECLARE_PER_CPU(cpumask_var_t, cpu_core_mask);
 
@@ -14,6 +16,14 @@  DECLARE_PER_CPU(cpumask_var_t, cpu_core_mask);
  */
 #define park_offline_cpus false
 
+void smp_set_bootcpu_id(unsigned long boot_cpu_hartid);
+
+/*
+ * Mapping between linux logical cpu index and hartid.
+ */
+extern unsigned long cpuid_to_hartid_map[NR_CPUS];
+#define cpuid_to_hartid(cpu) cpuid_to_hartid_map[cpu]
+
 #endif
 
 /*
diff --git a/xen/arch/riscv/setup.c b/xen/arch/riscv/setup.c
index 13f0e8c77d..c9446e6038 100644
--- a/xen/arch/riscv/setup.c
+++ b/xen/arch/riscv/setup.c
@@ -8,6 +8,7 @@ 
 #include <public/version.h>
 
 #include <asm/early_printk.h>
+#include <asm/smp.h>
 #include <asm/traps.h>
 
 void arch_get_xen_caps(xen_capabilities_info_t *info)
@@ -40,6 +41,19 @@  void __init noreturn start_xen(unsigned long bootcpu_id,
 {
     remove_identity_mapping();
 
+    /*
+     * tp register contains an address of physical cpu information.
+     * So write physical CPU info of boot cpu to tp register
+     * It will be used later by get_processor_id() ( look at
+     * <asm/processor.h> ):
+     *   #define get_processor_id()    (tp->processor_id)
+     */
+    asm volatile ( "mv tp, %0" : : "r"((unsigned long)&pcpu_info[0]) );
+
+    set_processor_id(0);
+
+    smp_set_bootcpu_id(bootcpu_id);
+
     trap_init();
 
 #ifdef CONFIG_SELF_TESTS
diff --git a/xen/arch/riscv/smp.c b/xen/arch/riscv/smp.c
new file mode 100644
index 0000000000..779d955e3a
--- /dev/null
+++ b/xen/arch/riscv/smp.c
@@ -0,0 +1,4 @@ 
+#include <xen/smp.h>
+
+/* tp points to one of these per cpu */
+struct pcpu_info pcpu_info[NR_CPUS];
diff --git a/xen/arch/riscv/smpboot.c b/xen/arch/riscv/smpboot.c
new file mode 100644
index 0000000000..6690522a52
--- /dev/null
+++ b/xen/arch/riscv/smpboot.c
@@ -0,0 +1,12 @@ 
+#include <xen/init.h>
+#include <xen/sections.h>
+#include <xen/smp.h>
+
+unsigned long cpuid_to_hartid_map[NR_CPUS] __ro_after_init = {
+    [0 ... NR_CPUS - 1] = INVALID_HARTID
+};
+
+void __init smp_set_bootcpu_id(unsigned long boot_cpu_hartid)
+{
+    cpuid_to_hartid(0) = boot_cpu_hartid;
+}