diff mbox series

[v3,7/9] xen/riscv: introduce and init SBI RFENCE extension

Message ID fb2d24731f870378d79077be39b1bc19cc655327.1721834549.git.oleksii.kurochko@gmail.com (mailing list archive)
State Superseded
Headers show
Series RISCV device tree mapping | expand

Commit Message

Oleksii Kurochko July 24, 2024, 3:31 p.m. UTC
Introduces functions to work with SBI RFENCE extenstion
which will be used to do various version of fences for
remote ( not local ) CPU.

Except that sbi_init() function and auxiliary functions and
macros definitions are introduced to proper initialization and
checking availability of SBI extenstions.
At the moment, it is only done for RFENCE.

sbi_remote_sfence_vma() is introduced to send SFENCE_VMA to
a set of target HARTs and based on that flush_xen_tlb_range_va()
will be implemented.

Signed-off-by: Oleksii Kurochko <oleksii.kurochko@gmail.com>
---
Changes in V3:
 - new patch.
---
 xen/arch/riscv/include/asm/sbi.h |  57 +++++++
 xen/arch/riscv/sbi.c             | 256 +++++++++++++++++++++++++++++++
 xen/arch/riscv/setup.c           |   3 +
 3 files changed, 316 insertions(+)

Comments

Jan Beulich July 29, 2024, 3:52 p.m. UTC | #1
On 24.07.2024 17:31, Oleksii Kurochko wrote:
> --- a/xen/arch/riscv/include/asm/sbi.h
> +++ b/xen/arch/riscv/include/asm/sbi.h
> @@ -14,6 +14,38 @@
>  
>  #define SBI_EXT_0_1_CONSOLE_PUTCHAR		0x1
>  
> +#define SBI_EXT_BASE                    0x10
> +#define SBI_EXT_RFENCE                  0x52464E43
> +
> +/* SBI function IDs for BASE extension */
> +#define SBI_EXT_BASE_GET_SPEC_VERSION   0x0
> +#define SBI_EXT_BASE_GET_IMP_ID         0x1
> +#define SBI_EXT_BASE_GET_IMP_VERSION    0x2
> +#define SBI_EXT_BASE_PROBE_EXT          0x3
> +
> +/* SBI function IDs for RFENCE extension */
> +#define SBI_EXT_RFENCE_REMOTE_FENCE_I           0x0
> +#define SBI_EXT_RFENCE_REMOTE_SFENCE_VMA        0x1
> +#define SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID   0x2
> +#define SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA       0x3
> +#define SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID  0x4
> +#define SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA       0x5
> +#define SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID  0x6
> +
> +#define SBI_SPEC_VERSION_MAJOR_SHIFT    24
> +#define SBI_SPEC_VERSION_MAJOR_MASK     0x7f
> +#define SBI_SPEC_VERSION_MINOR_MASK     0xffffff
> +
> +/* SBI return error codes */
> +#define SBI_SUCCESS             0
> +#define SBI_ERR_FAILURE         -1
> +#define SBI_ERR_NOT_SUPPORTED   -2
> +#define SBI_ERR_INVALID_PARAM   -3
> +#define SBI_ERR_DENIED          -4
> +#define SBI_ERR_INVALID_ADDRESS -5

Parentheses around all the negative numbers please.

> +#define SBI_SPEC_VERSION_DEFAULT        0x1
> +
>  struct sbiret {
>      long error;
>      long value;
> @@ -31,4 +63,29 @@ struct sbiret sbi_ecall(unsigned long ext, unsigned long fid,
>   */
>  void sbi_console_putchar(int ch);
>  
> +/*
> + * Check underlying SBI implementation has RFENCE
> + *
> + * @return 1 for supported AND 0 for not-supported

In which case ...

> + */
> +int sbi_has_rfence(void);

... bool please.

> +/*
> + * Send SFENCE_VMA to a set of target HARTs.
> + *
> + * @param hart_mask mask representing set of target HARTs
> + * @param start virtual address start
> + * @param size virtual address size

Are these really virtual addresses, not somehow a bias and a number
of bits (CPUs) or elements? From the rest of the patch I can't deduce
what these two parameters express.

> + */
> +void sbi_remote_sfence_vma(const unsigned long *hart_mask,

Maybe better hart_mask[]? It's not clear to me though what the upper
bound of the array is.

> @@ -38,7 +48,253 @@ struct sbiret sbi_ecall(unsigned long ext, unsigned long fid,
>      return ret;
>  }
>  
> +static int sbi_err_map_xen_errno(int err)
> +{
> +    switch ( err )
> +    {
> +    case SBI_SUCCESS:
> +        return 0;
> +    case SBI_ERR_DENIED:
> +        return -EACCES;
> +    case SBI_ERR_INVALID_PARAM:
> +        return -EINVAL;
> +    case SBI_ERR_INVALID_ADDRESS:
> +        return -EFAULT;
> +    case SBI_ERR_NOT_SUPPORTED:
> +    case SBI_ERR_FAILURE:
> +    default:
> +        return -EOPNOTSUPP;

Mapping FAILURE to -EOPNOTSUPP may end up confusing.

> +    };
> +}
> +
>  void sbi_console_putchar(int ch)
>  {
>      sbi_ecall(SBI_EXT_0_1_CONSOLE_PUTCHAR, 0, ch, 0, 0, 0, 0, 0);
>  }
> +
> +static unsigned long sbi_major_version(void)
> +{
> +    return (sbi_spec_version >> SBI_SPEC_VERSION_MAJOR_SHIFT) &
> +        SBI_SPEC_VERSION_MAJOR_MASK;
> +}
> +
> +static unsigned long sbi_minor_version(void)
> +{
> +    return sbi_spec_version & SBI_SPEC_VERSION_MINOR_MASK;
> +}
> +
> +static long sbi_ext_base_func(long fid)
> +{
> +    struct sbiret ret;
> +
> +    ret = sbi_ecall(SBI_EXT_BASE, fid, 0, 0, 0, 0, 0, 0);
> +    if (!ret.error)

Nit: Blanks missing.

> +        return ret.value;
> +    else
> +        return ret.error;
> +}
> +
> +static void sbi_cpumask_to_hartmask(const struct cpumask *cmask,
> +                 struct cpumask *hmask)

I doubt it is valud to re-use struct cpumask for hart maps.

> +{
> +    u32 cpu;

uint32_t or yet better unsigned int please.

> +    unsigned long hart = INVALID_HARTID;
> +
> +    if (!cmask || !hmask)
> +        return;
> +
> +    cpumask_clear(hmask);
> +    for_each_cpu(cpu, cmask)
> +    {
> +        if ( CONFIG_NR_CPUS <= cpu )
> +        {
> +            printk(XENLOG_ERR "SBI: invalid hart=%lu for cpu=%d\n",

%u for the CPU please.

> +                     hart, cpu);

"hart" wasn't set yet and hence is invalid or at least misleading to
log.

Nit: Indentation.

> +            continue;

Can you really sensibly continue in such a case?

> +        }
> +
> +        hart = cpuid_to_hartid_map(pcpu_info[cpu].processor_id);

What does "_map" in the function/macro name signify?

> +        cpumask_set_cpu(hart, hmask);
> +    }
> +}
> +
> +static int __sbi_rfence_v02_real(unsigned long fid,
> +                 unsigned long hmask, unsigned long hbase,
> +                 unsigned long start, unsigned long size,
> +                 unsigned long arg4)

Nit: Indentation (more similar issues elsewhere).

> +{
> +    struct sbiret ret = {0};
> +    int result = 0;
> +
> +    switch ( fid )
> +    {
> +    case SBI_EXT_RFENCE_REMOTE_FENCE_I:
> +        ret = sbi_ecall(SBI_EXT_RFENCE, fid, hmask, hbase,
> +                0, 0, 0, 0);
> +        break;
> +    case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA:
> +        ret = sbi_ecall(SBI_EXT_RFENCE, fid, hmask, hbase,
> +                start, size, 0, 0);
> +        break;
> +    case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID:
> +        ret = sbi_ecall(SBI_EXT_RFENCE, fid, hmask, hbase,
> +                start, size, arg4, 0);
> +        break;
> +    case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA:
> +        ret = sbi_ecall(SBI_EXT_RFENCE, fid, hmask, hbase,
> +                start, size, 0, 0);
> +        break;
> +    case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID:
> +        ret = sbi_ecall(SBI_EXT_RFENCE, fid, hmask, hbase,
> +                start, size, arg4, 0);
> +        break;
> +    case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA:
> +        ret = sbi_ecall(SBI_EXT_RFENCE, fid, hmask, hbase,
> +                start, size, 0, 0);
> +        break;
> +    case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID:
> +        ret = sbi_ecall(SBI_EXT_RFENCE, fid, hmask, hbase,
> +                start, size, arg4, 0);
> +        break;
> +
> +    default:
> +        printk("%s: unknown function ID [%lu]\n",
> +               __func__, fid);
> +        result = -EINVAL;
> +        break;
> +    };
> +
> +    if ( ret.error )
> +    {
> +        result = sbi_err_map_xen_errno(ret.error);
> +        printk("%s: hbase=%lu hmask=0x%lx failed (error %d)\n",

%#lx (also elsewhere)

> +               __func__, hbase, hmask, result);
> +    }
> +
> +    return result;
> +}
> +
> +static int __sbi_rfence_v02(unsigned long fid,
> +                const unsigned long *hart_mask,
> +                unsigned long start, unsigned long size,
> +                unsigned long arg4, unsigned long arg5)
> +{
> +    struct cpumask tmask;
> +    unsigned long hart, hmask, hbase;
> +    int result;
> +
> +    if (!hart_mask)

Nit: Style (and more below). And I'm afraid I'm tired of making such
remarks. Please check style yourself before submitting.

> +static int sbi_spec_is_0_1(void)
> +{
> +    return (sbi_spec_version == SBI_SPEC_VERSION_DEFAULT) ? 1 : 0;

bool and no conditional operator

Jan
Oleksii Kurochko July 30, 2024, 8:44 a.m. UTC | #2
On Mon, 2024-07-29 at 17:52 +0200, Jan Beulich wrote:
> On 24.07.2024 17:31, Oleksii Kurochko wrote:
> 
> 
> > +/*
> > + * Send SFENCE_VMA to a set of target HARTs.
> > + *
> > + * @param hart_mask mask representing set of target HARTs
> > + * @param start virtual address start
> > + * @param size virtual address size
> 
> Are these really virtual addresses, not somehow a bias and a number
> of bits (CPUs) or elements? From the rest of the patch I can't deduce
> what these two parameters express.
According to SBI doc start is an virtual address:
https://github.com/riscv-non-isa/riscv-sbi-doc/blob/master/src/ext-rfence.adoc?plain=1#L44

and hart_mask is:
• unsigned long hart_mask is a scalar bit-vector containing hartids


> 
> > + */
> > +void sbi_remote_sfence_vma(const unsigned long *hart_mask,
> 
> Maybe better hart_mask[]? It's not clear to me though what the upper
> bound of the array is.
Theoretically it is ULONGMAX but we don't looking more then
CONFIG_NR_CPUS.

> 
> > +
> > +static void sbi_cpumask_to_hartmask(const struct cpumask *cmask,
> > +                 struct cpumask *hmask)
> 
> I doubt it is valud to re-use struct cpumask for hart maps.
Why not? Would it be better to use unsigned long *hmask?

> 
> > +{
> > +    u32 cpu;
> 
> uint32_t or yet better unsigned int please.
> 
> > +    unsigned long hart = INVALID_HARTID;
> > +
> > +    if (!cmask || !hmask)
> > +        return;
> > +
> > +    cpumask_clear(hmask);
> > +    for_each_cpu(cpu, cmask)
> > +    {
> > +        if ( CONFIG_NR_CPUS <= cpu )
> > +        {
> > +            printk(XENLOG_ERR "SBI: invalid hart=%lu for
> > cpu=%d\n",
> 
> %u for the CPU please.
> 
> > +                     hart, cpu);
> 
> "hart" wasn't set yet and hence is invalid or at least misleading to
> log.
That why it will print INVALID_HARTID which user will identify as
invalid hartid.
Do you mean that there is no any sense to message "invalid hart=%lu" as
it is obviously invalid?

> 
> Nit: Indentation.
> 
> > +            continue;
> 
> 
> Can you really sensibly continue in such a case?
I think yes, the worst thing we will have is that the "bad" CPU won't
be used.
But it might be better to switch to BUG_ON() as if we are inised the
"if CONFIG_NR_CPUS <= cpu" then it could tell us that something went
wrong.


> 
> > +        }
> > +
> > +        hart = cpuid_to_hartid_map(pcpu_info[cpu].processor_id);
> 
> What does "_map" in the function/macro name signify?
It is interconnections/correllation between Xen's CPU id and Hart's id.


~ Oleksii
Jan Beulich July 30, 2024, 9:17 a.m. UTC | #3
On 30.07.2024 10:44, oleksii.kurochko@gmail.com wrote:
> On Mon, 2024-07-29 at 17:52 +0200, Jan Beulich wrote:
>> On 24.07.2024 17:31, Oleksii Kurochko wrote:
>>
>>
>>> +/*
>>> + * Send SFENCE_VMA to a set of target HARTs.
>>> + *
>>> + * @param hart_mask mask representing set of target HARTs
>>> + * @param start virtual address start
>>> + * @param size virtual address size
>>
>> Are these really virtual addresses, not somehow a bias and a number
>> of bits (CPUs) or elements? From the rest of the patch I can't deduce
>> what these two parameters express.
> According to SBI doc start is an virtual address:
> https://github.com/riscv-non-isa/riscv-sbi-doc/blob/master/src/ext-rfence.adoc?plain=1#L44

Oh, so these are describing the VA range to be flushed. Okay.

> and hart_mask is:
> • unsigned long hart_mask is a scalar bit-vector containing hartids

Biased by hart_mask_base in the actual SBI call.

>>> + */
>>> +void sbi_remote_sfence_vma(const unsigned long *hart_mask,
>>
>> Maybe better hart_mask[]? It's not clear to me though what the upper
>> bound of the array is.
> Theoretically it is ULONGMAX but we don't looking more then
> CONFIG_NR_CPUS.

See my comment elsewhere about possibly sparse numbering of hart IDs.

>>> +
>>> +static void sbi_cpumask_to_hartmask(const struct cpumask *cmask,
>>> +                 struct cpumask *hmask)
>>
>> I doubt it is valud to re-use struct cpumask for hart maps.
> Why not? Would it be better to use unsigned long *hmask?

It's not only better, but imo a requirement. Unless there's a guarantee
by the spec that hart IDs for any subset of harts are sequential and
starting from 0, you just can't assume they fall in the [0,NR_CPUS) or
really [0,nr_cpu_ids) range. Yet without that you simply can't (ab)use
struct cpumask (and btw it wants to be cpumask_t everywhere).

You may want to take a look at struct physid_mask that we have on x86
for the equivalent purpose.

>>> +{
>>> +    u32 cpu;
>>
>> uint32_t or yet better unsigned int please.
>>
>>> +    unsigned long hart = INVALID_HARTID;
>>> +
>>> +    if (!cmask || !hmask)
>>> +        return;
>>> +
>>> +    cpumask_clear(hmask);
>>> +    for_each_cpu(cpu, cmask)
>>> +    {
>>> +        if ( CONFIG_NR_CPUS <= cpu )
>>> +        {
>>> +            printk(XENLOG_ERR "SBI: invalid hart=%lu for
>>> cpu=%d\n",
>>
>> %u for the CPU please.
>>
>>> +                     hart, cpu);
>>
>> "hart" wasn't set yet and hence is invalid or at least misleading to
>> log.
> That why it will print INVALID_HARTID which user will identify as
> invalid hartid.
> Do you mean that there is no any sense to message "invalid hart=%lu" as
> it is obviously invalid?

Yes. Plus the problem in this case isn't that there's no hart ID, but
that the CPU ID is out of range. That's what the message wants to say.
If such a check and message are necessary in the first place. I don't
think we have anything like that on x86. And for_each_cpu() also can't
possibly produce any ID at or beyond nr_cpu_ids, where nr_cpu_ids <=
NR_CPUS.

>>> +            continue;
>>
>>
>> Can you really sensibly continue in such a case?
> I think yes, the worst thing we will have is that the "bad" CPU won't
> be used.
> But it might be better to switch to BUG_ON() as if we are inised the
> "if CONFIG_NR_CPUS <= cpu" then it could tell us that something went
> wrong.

Again - your problem here isn't the range of "cpu". What you instead
want to check is ...

>>> +        }
>>> +
>>> +        hart = cpuid_to_hartid_map(pcpu_info[cpu].processor_id);

the hart ID that you get back. If that's INVALID_HARTID, you're in
fact in trouble.

>> What does "_map" in the function/macro name signify?
> It is interconnections/correllation between Xen's CPU id and Hart's id.

Well. What the function does internally is consult the map. Yet that's
not relevant to any caller? They only care about the translation from
one ID space to the other? No matter whether a map, radix tree, rbtree,
or what not is used behind the scenes?

Jan
Oleksii Kurochko July 30, 2024, 11:57 a.m. UTC | #4
On Tue, 2024-07-30 at 11:17 +0200, Jan Beulich wrote:
> On 30.07.2024 10:44, oleksii.kurochko@gmail.com wrote:
> > On Mon, 2024-07-29 at 17:52 +0200, Jan Beulich wrote:
> > > On 24.07.2024 17:31, Oleksii Kurochko wrote:
> > > 
> > > 
> > > > +/*
> > > > + * Send SFENCE_VMA to a set of target HARTs.
> > > > + *
> > > > + * @param hart_mask mask representing set of target HARTs
> > > > + * @param start virtual address start
> > > > + * @param size virtual address size
> > > 
> > > Are these really virtual addresses, not somehow a bias and a
> > > number
> > > of bits (CPUs) or elements? From the rest of the patch I can't
> > > deduce
> > > what these two parameters express.
> > According to SBI doc start is an virtual address:
> > https://github.com/riscv-non-isa/riscv-sbi-doc/blob/master/src/ext-rfence.adoc?plain=1#L44
> 
> Oh, so these are describing the VA range to be flushed. Okay.
> 
> > and hart_mask is:
> > • unsigned long hart_mask is a scalar bit-vector containing hartids
> 
> Biased by hart_mask_base in the actual SBI call.
What word "biased" mean here?

> 
> > > > +
> > > > +static void sbi_cpumask_to_hartmask(const struct cpumask
> > > > *cmask,
> > > > +                 struct cpumask *hmask)
> > > 
> > > I doubt it is valud to re-use struct cpumask for hart maps.
> > Why not? Would it be better to use unsigned long *hmask?
> 
> It's not only better, but imo a requirement. Unless there's a
> guarantee
> by the spec that hart IDs for any subset of harts are sequential and
> starting from 0, you just can't assume they fall in the [0,NR_CPUS)
> or
> really [0,nr_cpu_ids) range. Yet without that you simply can't
> (ab)use
> struct cpumask (and btw it wants to be cpumask_t everywhere).
It is guarantee that hart ID 0 will be present but not that they are
numbered contiguously:
```
Hart IDs might
not necessarily be numbered contiguously in a multiprocessor system,
but at least one hart must
have a hart ID of zero. Hart IDs must be unique within the execution
environment.
```
I have to rework then this things around sbi_cpumask_to_hartmask().

> 
> You may want to take a look at struct physid_mask that we have on x86
> for the equivalent purpose.
Thanks I will look at.

> > > > +            continue;
> > > 
> > > 
> > > Can you really sensibly continue in such a case?
> > I think yes, the worst thing we will have is that the "bad" CPU
> > won't
> > be used.
> > But it might be better to switch to BUG_ON() as if we are inised
> > the
> > "if CONFIG_NR_CPUS <= cpu" then it could tell us that something
> > went
> > wrong.
> 
> Again - your problem here isn't the range of "cpu". What you instead
> want to check is ...
> 
> > > > +        }
> > > > +
> > > > +        hart =
> > > > cpuid_to_hartid_map(pcpu_info[cpu].processor_id);
> 
> the hart ID that you get back. If that's INVALID_HARTID, you're in
> fact in trouble.
> 
> > > What does "_map" in the function/macro name signify?
> > It is interconnections/correllation between Xen's CPU id and Hart's
> > id.
> 
> Well. What the function does internally is consult the map. Yet
> that's
> not relevant to any caller? They only care about the translation from
> one ID space to the other? No matter whether a map, radix tree,
> rbtree,
> or what not is used behind the scenes?
Agree, the "_map" in cpuid_to_hartid_map() is useless. Thanks.

~ Oleksii
Jan Beulich July 30, 2024, 12:17 p.m. UTC | #5
On 30.07.2024 13:57, oleksii.kurochko@gmail.com wrote:
> On Tue, 2024-07-30 at 11:17 +0200, Jan Beulich wrote:
>> On 30.07.2024 10:44, oleksii.kurochko@gmail.com wrote:
>>> On Mon, 2024-07-29 at 17:52 +0200, Jan Beulich wrote:
>>>> On 24.07.2024 17:31, Oleksii Kurochko wrote:
>>>>
>>>>
>>>>> +/*
>>>>> + * Send SFENCE_VMA to a set of target HARTs.
>>>>> + *
>>>>> + * @param hart_mask mask representing set of target HARTs
>>>>> + * @param start virtual address start
>>>>> + * @param size virtual address size
>>>>
>>>> Are these really virtual addresses, not somehow a bias and a
>>>> number
>>>> of bits (CPUs) or elements? From the rest of the patch I can't
>>>> deduce
>>>> what these two parameters express.
>>> According to SBI doc start is an virtual address:
>>> https://github.com/riscv-non-isa/riscv-sbi-doc/blob/master/src/ext-rfence.adoc?plain=1#L44
>>
>> Oh, so these are describing the VA range to be flushed. Okay.
>>
>>> and hart_mask is:
>>> • unsigned long hart_mask is a scalar bit-vector containing hartids
>>
>> Biased by hart_mask_base in the actual SBI call.
> What word "biased" mean here?

The meaning of e.g. bit 0 in hart_mask will, aiui, be determined by
hart_mask_base. If the latter is non-zero, the bit will not name hart 0.

Jan
Oleksii Kurochko Aug. 7, 2024, 11:29 a.m. UTC | #6
On Tue, 2024-07-30 at 11:17 +0200, Jan Beulich wrote:
> > > > +
> > > > +static void sbi_cpumask_to_hartmask(const struct cpumask
> > > > *cmask,
> > > > +                 struct cpumask *hmask)
> > > 
> > > I doubt it is valud to re-use struct cpumask for hart maps.
> > Why not? Would it be better to use unsigned long *hmask?
> 
> It's not only better, but imo a requirement. Unless there's a
> guarantee
> by the spec that hart IDs for any subset of harts are sequential and
> starting from 0, you just can't assume they fall in the [0,NR_CPUS)
> or
> really [0,nr_cpu_ids) range. Yet without that you simply can't
> (ab)use
> struct cpumask (and btw it wants to be cpumask_t everywhere).
> 
> You may want to take a look at struct physid_mask that we have on x86
> for the equivalent purpose.
Could you please explain me why factor 4 is used in defintion of
MAX_APICS and why 256 ( is it a maximum number of APIC IDs can be
supported in the system? ):
  #define MAX_APICS     MAX(256, 4 * NR_CPUS)

~ Oleksii
Jan Beulich Aug. 7, 2024, 11:37 a.m. UTC | #7
On 07.08.2024 13:29, oleksii.kurochko@gmail.com wrote:
> On Tue, 2024-07-30 at 11:17 +0200, Jan Beulich wrote:
>>>>> +
>>>>> +static void sbi_cpumask_to_hartmask(const struct cpumask
>>>>> *cmask,
>>>>> +                 struct cpumask *hmask)
>>>>
>>>> I doubt it is valud to re-use struct cpumask for hart maps.
>>> Why not? Would it be better to use unsigned long *hmask?
>>
>> It's not only better, but imo a requirement. Unless there's a
>> guarantee
>> by the spec that hart IDs for any subset of harts are sequential and
>> starting from 0, you just can't assume they fall in the [0,NR_CPUS)
>> or
>> really [0,nr_cpu_ids) range. Yet without that you simply can't
>> (ab)use
>> struct cpumask (and btw it wants to be cpumask_t everywhere).
>>
>> You may want to take a look at struct physid_mask that we have on x86
>> for the equivalent purpose.
> Could you please explain me why factor 4 is used in defintion of
> MAX_APICS and why 256 ( is it a maximum number of APIC IDs can be
> supported in the system? ):
>   #define MAX_APICS     MAX(256, 4 * NR_CPUS)

Both numbers are pretty arbitrary, I suppose. APIC IDs need not be
contiguous, which is what the multiplication by 4 is trying to cover
for. Plus on old (non-x2APIC) systems APIC IDs are only 8 bits wide,
and allowing for all possible values in that case is cheap enough -
hence the 256.

Jan
diff mbox series

Patch

diff --git a/xen/arch/riscv/include/asm/sbi.h b/xen/arch/riscv/include/asm/sbi.h
index 0e6820a4ed..0985fbb1aa 100644
--- a/xen/arch/riscv/include/asm/sbi.h
+++ b/xen/arch/riscv/include/asm/sbi.h
@@ -14,6 +14,38 @@ 
 
 #define SBI_EXT_0_1_CONSOLE_PUTCHAR		0x1
 
+#define SBI_EXT_BASE                    0x10
+#define SBI_EXT_RFENCE                  0x52464E43
+
+/* SBI function IDs for BASE extension */
+#define SBI_EXT_BASE_GET_SPEC_VERSION   0x0
+#define SBI_EXT_BASE_GET_IMP_ID         0x1
+#define SBI_EXT_BASE_GET_IMP_VERSION    0x2
+#define SBI_EXT_BASE_PROBE_EXT          0x3
+
+/* SBI function IDs for RFENCE extension */
+#define SBI_EXT_RFENCE_REMOTE_FENCE_I           0x0
+#define SBI_EXT_RFENCE_REMOTE_SFENCE_VMA        0x1
+#define SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID   0x2
+#define SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA       0x3
+#define SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID  0x4
+#define SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA       0x5
+#define SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID  0x6
+
+#define SBI_SPEC_VERSION_MAJOR_SHIFT    24
+#define SBI_SPEC_VERSION_MAJOR_MASK     0x7f
+#define SBI_SPEC_VERSION_MINOR_MASK     0xffffff
+
+/* SBI return error codes */
+#define SBI_SUCCESS             0
+#define SBI_ERR_FAILURE         -1
+#define SBI_ERR_NOT_SUPPORTED   -2
+#define SBI_ERR_INVALID_PARAM   -3
+#define SBI_ERR_DENIED          -4
+#define SBI_ERR_INVALID_ADDRESS -5
+
+#define SBI_SPEC_VERSION_DEFAULT        0x1
+
 struct sbiret {
     long error;
     long value;
@@ -31,4 +63,29 @@  struct sbiret sbi_ecall(unsigned long ext, unsigned long fid,
  */
 void sbi_console_putchar(int ch);
 
+/*
+ * Check underlying SBI implementation has RFENCE
+ *
+ * @return 1 for supported AND 0 for not-supported
+ */
+int sbi_has_rfence(void);
+
+/*
+ * Send SFENCE_VMA to a set of target HARTs.
+ *
+ * @param hart_mask mask representing set of target HARTs
+ * @param start virtual address start
+ * @param size virtual address size
+ */
+void sbi_remote_sfence_vma(const unsigned long *hart_mask,
+                           unsigned long start,
+                           unsigned long size);
+
+/*
+ * Initialize SBI library
+ *
+ * @return 0 on success, otherwise negative errno on failure
+ */
+int sbi_init(void);
+
 #endif /* __ASM_RISCV_SBI_H__ */
diff --git a/xen/arch/riscv/sbi.c b/xen/arch/riscv/sbi.c
index 0ae166c861..04d878d1e2 100644
--- a/xen/arch/riscv/sbi.c
+++ b/xen/arch/riscv/sbi.c
@@ -10,8 +10,18 @@ 
  * Copyright (c) 2021-2023 Vates SAS.
  */
 
+#include <xen/cpumask.h>
+#include <xen/errno.h>
+#include <xen/init.h>
+#include <xen/lib.h>
+#include <xen/smp.h>
+
+#include <asm/processor.h>
 #include <asm/sbi.h>
 
+static unsigned long sbi_spec_version = SBI_SPEC_VERSION_DEFAULT;
+static unsigned long sbi_fw_id, sbi_fw_version;
+
 struct sbiret sbi_ecall(unsigned long ext, unsigned long fid,
                         unsigned long arg0, unsigned long arg1,
                         unsigned long arg2, unsigned long arg3,
@@ -38,7 +48,253 @@  struct sbiret sbi_ecall(unsigned long ext, unsigned long fid,
     return ret;
 }
 
+static int sbi_err_map_xen_errno(int err)
+{
+    switch ( err )
+    {
+    case SBI_SUCCESS:
+        return 0;
+    case SBI_ERR_DENIED:
+        return -EACCES;
+    case SBI_ERR_INVALID_PARAM:
+        return -EINVAL;
+    case SBI_ERR_INVALID_ADDRESS:
+        return -EFAULT;
+    case SBI_ERR_NOT_SUPPORTED:
+    case SBI_ERR_FAILURE:
+    default:
+        return -EOPNOTSUPP;
+    };
+}
+
 void sbi_console_putchar(int ch)
 {
     sbi_ecall(SBI_EXT_0_1_CONSOLE_PUTCHAR, 0, ch, 0, 0, 0, 0, 0);
 }
+
+static unsigned long sbi_major_version(void)
+{
+    return (sbi_spec_version >> SBI_SPEC_VERSION_MAJOR_SHIFT) &
+        SBI_SPEC_VERSION_MAJOR_MASK;
+}
+
+static unsigned long sbi_minor_version(void)
+{
+    return sbi_spec_version & SBI_SPEC_VERSION_MINOR_MASK;
+}
+
+static long sbi_ext_base_func(long fid)
+{
+    struct sbiret ret;
+
+    ret = sbi_ecall(SBI_EXT_BASE, fid, 0, 0, 0, 0, 0, 0);
+    if (!ret.error)
+        return ret.value;
+    else
+        return ret.error;
+}
+
+static void sbi_cpumask_to_hartmask(const struct cpumask *cmask,
+                 struct cpumask *hmask)
+{
+    u32 cpu;
+    unsigned long hart = INVALID_HARTID;
+
+    if (!cmask || !hmask)
+        return;
+
+    cpumask_clear(hmask);
+    for_each_cpu(cpu, cmask)
+    {
+        if ( CONFIG_NR_CPUS <= cpu )
+        {
+            printk(XENLOG_ERR "SBI: invalid hart=%lu for cpu=%d\n",
+                     hart, cpu);
+            continue;
+        }
+
+        hart = cpuid_to_hartid_map(pcpu_info[cpu].processor_id);
+        cpumask_set_cpu(hart, hmask);
+    }
+}
+
+static int __sbi_rfence_v02_real(unsigned long fid,
+                 unsigned long hmask, unsigned long hbase,
+                 unsigned long start, unsigned long size,
+                 unsigned long arg4)
+{
+    struct sbiret ret = {0};
+    int result = 0;
+
+    switch ( fid )
+    {
+    case SBI_EXT_RFENCE_REMOTE_FENCE_I:
+        ret = sbi_ecall(SBI_EXT_RFENCE, fid, hmask, hbase,
+                0, 0, 0, 0);
+        break;
+    case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA:
+        ret = sbi_ecall(SBI_EXT_RFENCE, fid, hmask, hbase,
+                start, size, 0, 0);
+        break;
+    case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID:
+        ret = sbi_ecall(SBI_EXT_RFENCE, fid, hmask, hbase,
+                start, size, arg4, 0);
+        break;
+    case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA:
+        ret = sbi_ecall(SBI_EXT_RFENCE, fid, hmask, hbase,
+                start, size, 0, 0);
+        break;
+    case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID:
+        ret = sbi_ecall(SBI_EXT_RFENCE, fid, hmask, hbase,
+                start, size, arg4, 0);
+        break;
+    case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA:
+        ret = sbi_ecall(SBI_EXT_RFENCE, fid, hmask, hbase,
+                start, size, 0, 0);
+        break;
+    case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID:
+        ret = sbi_ecall(SBI_EXT_RFENCE, fid, hmask, hbase,
+                start, size, arg4, 0);
+        break;
+
+    default:
+        printk("%s: unknown function ID [%lu]\n",
+               __func__, fid);
+        result = -EINVAL;
+        break;
+    };
+
+    if ( ret.error )
+    {
+        result = sbi_err_map_xen_errno(ret.error);
+        printk("%s: hbase=%lu hmask=0x%lx failed (error %d)\n",
+               __func__, hbase, hmask, result);
+    }
+
+    return result;
+}
+
+static int __sbi_rfence_v02(unsigned long fid,
+                const unsigned long *hart_mask,
+                unsigned long start, unsigned long size,
+                unsigned long arg4, unsigned long arg5)
+{
+    struct cpumask tmask;
+    unsigned long hart, hmask, hbase;
+    int result;
+
+    if (!hart_mask)
+    {
+        sbi_cpumask_to_hartmask(&cpu_online_map, &tmask);
+        hart_mask = cpumask_bits(&tmask);
+    }
+
+    hmask = hbase = 0;
+    for_each_set_bit(hart, hart_mask, CONFIG_NR_CPUS)
+    {
+        if (hmask && ((hbase + BITS_PER_LONG) <= hart))
+        {
+            result = __sbi_rfence_v02_real(fid, hmask, hbase,
+                            start, size, arg4);
+            if (result)
+                return result;
+            hmask = hbase = 0;
+        }
+
+        if (!hmask)
+            hbase = hart;
+
+        hmask |= 1UL << (hart - hbase);
+    }
+
+    if (hmask)
+    {
+        result = __sbi_rfence_v02_real(fid, hmask, hbase,
+                        start, size, arg4);
+        if (result)
+            return result;
+    }
+
+    return 0;
+}
+
+static int (*__sbi_rfence)(unsigned long fid,
+        const unsigned long *hart_mask,
+        unsigned long start, unsigned long size,
+        unsigned long arg4, unsigned long arg5) = NULL;
+
+void sbi_remote_sfence_vma(const unsigned long *hart_mask,
+                           unsigned long start,
+                           unsigned long size)
+{
+    __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA,
+             hart_mask, start, size, 0, 0);
+}
+
+#define sbi_get_spec_version()		\
+    sbi_ext_base_func(SBI_EXT_BASE_GET_SPEC_VERSION)
+
+#define sbi_get_firmware_id()		\
+    sbi_ext_base_func(SBI_EXT_BASE_GET_IMP_ID)
+
+#define sbi_get_firmware_version()	\
+    sbi_ext_base_func(SBI_EXT_BASE_GET_IMP_VERSION)
+
+int sbi_probe_extension(long extid)
+{
+    struct sbiret ret;
+
+    ret = sbi_ecall(SBI_EXT_BASE, SBI_EXT_BASE_PROBE_EXT, extid,
+            0, 0, 0, 0, 0);
+    if (!ret.error && ret.value)
+        return ret.value;
+
+    return -EOPNOTSUPP;
+}
+
+static int sbi_spec_is_0_1(void)
+{
+    return (sbi_spec_version == SBI_SPEC_VERSION_DEFAULT) ? 1 : 0;
+}
+
+int sbi_has_rfence(void)
+{
+    return __sbi_rfence ? 1 : 0;
+}
+
+int __init sbi_init(void)
+{
+    int ret;
+
+    ret = sbi_get_spec_version();
+    if (ret > 0)
+        sbi_spec_version = ret;
+
+    printk("SBI specification v%lu.%lu detected\n",
+            sbi_major_version(), sbi_minor_version());
+
+    if ( !sbi_spec_is_0_1() )
+    {
+        sbi_fw_id = sbi_get_firmware_id();
+        sbi_fw_version = sbi_get_firmware_version();
+
+        printk("SBI implementation ID=0x%lx Version=0x%lx\n",
+            sbi_fw_id, sbi_fw_version);
+
+        if ( sbi_probe_extension(SBI_EXT_RFENCE) > 0 )
+        {
+            __sbi_rfence = __sbi_rfence_v02;
+            printk("SBI v0.2 RFENCE extension detected\n");
+        }
+    } else {
+        BUG_ON("Ooops. SBI spec veriosn 0.1 detected. Need to add support");
+    }
+
+    if ( !sbi_has_rfence() )
+    {
+        BUG_ON("At the moment flush_xen_tlb_range_va() uses SBI rfence to "
+               "flush TLB for all CPUS!");
+    }
+
+    return 0;
+}
diff --git a/xen/arch/riscv/setup.c b/xen/arch/riscv/setup.c
index 37b234360c..497e273081 100644
--- a/xen/arch/riscv/setup.c
+++ b/xen/arch/riscv/setup.c
@@ -8,6 +8,7 @@ 
 #include <public/version.h>
 
 #include <asm/early_printk.h>
+#include <asm/sbi.h>
 #include <asm/traps.h>
 
 void arch_get_xen_caps(xen_capabilities_info_t *info)
@@ -55,6 +56,8 @@  void __init noreturn start_xen(unsigned long bootcpu_id,
 
     trap_init();
 
+    sbi_init();
+
 #ifdef CONFIG_SELF_TESTS
     test_macros_from_bug_h();
 #endif