diff mbox series

[v4,5/7] xen/riscv: introduce and initialize SBI RFENCE extension

Message ID 1a863d06866f86761d23e6da3f4a6ee58c474473.1723214540.git.oleksii.kurochko@gmail.com (mailing list archive)
State Superseded
Headers show
Series RISCV device tree mapping | expand

Commit Message

Oleksii Kurochko Aug. 9, 2024, 4:19 p.m. UTC
Introduce functions to work with the SBI RFENCE extension for issuing
various fence operations to remote CPUs.

Add the sbi_init() function along with auxiliary functions and macro
definitions for proper initialization and checking the availability of
SBI extensions. Currently, this is implemented only for RFENCE.

Introduce sbi_remote_sfence_vma() to send SFENCE_VMA instructions to
a set of target HARTs. This will support the implementation of
flush_xen_tlb_range_va().

Integrate __sbi_rfence_v02 from Linux kernel 6.6.0-rc4 with minimal
modifications:
 - Adapt to Xen code style.
 - Use cpuid_to_hartid() instead of cpuid_to_hartid_map[].
 - Update BIT(...) to BIT(..., UL).
 - Rename __sbi_rfence_v02_call to __sbi_rfence_v02_real and
   remove the unused arg5.
 - Handle NULL cpu_mask to execute rfence on all CPUs by calling
  __sbi_rfence_v02_real(..., 0UL, -1UL,...) instead of creating hmask.

Signed-off-by: Oleksii Kurochko <oleksii.kurochko@gmail.com>
---
Changes in V4:
 - update the commit message.
 - code style fixes
 - update return type of sbi_has_rfence() from int to bool and drop
   conditional operator inside implementation.
 - Update mapping of SBI_ERR_FAILURE in sbi_err_map_xen_errno().
 - Update return type of sbi_spec_is_0_1() and drop conditional operator
   inside implementation.
 - s/0x%lx/%#lx
 - update the comment above declaration of sbi_remote_sfence_vma() with
   more detailed explanation what the function does.
 - update prototype of sbi_remote_sfence_vma(). Now it receives cpumask_t
   and returns int.
 - refactor __sbi_rfence_v02() take from the Linux kernel as it takes into
   account a case that hart id could be from different hbase. For example,
   the case when hart IDs are the following 0, 3, 65, 2. Or the case when
   hart IDs are unsorted: 0 3 1 2.
 - drop sbi_cpumask_to_hartmask() as it is not needed anymore
 - Update the prototype of sbi_remote_sfence_vma() and implemntation accordingly
   to the fact it returns 'int'.
 - s/flush_xen_tlb_one_local/flush_tlb_one_local
---
Changes in V3:
 - new patch.
---
 xen/arch/riscv/include/asm/sbi.h |  64 ++++++++
 xen/arch/riscv/sbi.c             | 252 ++++++++++++++++++++++++++++++-
 xen/arch/riscv/setup.c           |   3 +
 3 files changed, 318 insertions(+), 1 deletion(-)

Comments

Jan Beulich Aug. 13, 2024, 9:34 a.m. UTC | #1
On 09.08.2024 18:19, Oleksii Kurochko wrote:
> @@ -31,4 +65,34 @@ struct sbiret sbi_ecall(unsigned long ext, unsigned long fid,
>   */
>  void sbi_console_putchar(int ch);
>  
> +/*
> + * Check underlying SBI implementation has RFENCE
> + *
> + * @return 1 for supported AND 0 for not-supported
> + */
> +bool sbi_has_rfence(void);

Nit: With bool return type, true and false in the comment?

> +/*
> + * Instructs the remote harts to execute one or more SFENCE.VMA
> + * instructions, covering the range of virtual addresses between
> + * start_addr and start_addr + size.

It's relatively clear what is meant here, but maybe still better to
make things explicit by using a mathematical range:
[start_addr, start_addr + size).

> + * Returns 0 if IPI was sent to all the targeted harts successfully
> + * or negative value if start_addr or size is not valid.
> + *
> + * @hart_mask a cpu mask containing all the target harts.
> + * @param start virtual address start
> + * @param size virtual address range size
> + */
> +int sbi_remote_sfence_vma(cpumask_t *cpu_mask,

pointer-to-const?

> --- a/xen/arch/riscv/sbi.c
> +++ b/xen/arch/riscv/sbi.c
> @@ -7,11 +7,23 @@
>   * Modified by Bobby Eshleman (bobby.eshleman@gmail.com).
>   *
>   * Copyright (c) 2019 Western Digital Corporation or its affiliates.
> - * Copyright (c) 2021-2023 Vates SAS.
> + * Copyright (c) 2021-2024 Vates SAS.
>   */
>  
> +#include <xen/compiler.h>
> +#include <xen/const.h>
> +#include <xen/cpumask.h>
> +#include <xen/errno.h>
> +#include <xen/init.h>
> +#include <xen/lib.h>
> +#include <xen/smp.h>
> +
> +#include <asm/processor.h>
>  #include <asm/sbi.h>
>  
> +static unsigned long sbi_spec_version = SBI_SPEC_VERSION_DEFAULT;
> +static unsigned long sbi_fw_id, sbi_fw_version;

__ro_after_init for perhaps all three of them?

Considering SBI_SPEC_VERSION_{MAJOR,MINOR}_MASK, at least the first
of them also doesn't need to be unsigned long, but could be unsigned
int?

> @@ -38,7 +50,245 @@ struct sbiret sbi_ecall(unsigned long ext, unsigned long fid,
>      return ret;
>  }
>  
> +static int sbi_err_map_xen_errno(int err)
> +{
> +    switch ( err )
> +    {
> +    case SBI_SUCCESS:
> +        return 0;
> +    case SBI_ERR_DENIED:
> +        return -EACCES;
> +    case SBI_ERR_INVALID_PARAM:
> +        return -EINVAL;
> +    case SBI_ERR_INVALID_ADDRESS:
> +        return -EFAULT;
> +    case SBI_ERR_NOT_SUPPORTED:
> +        return -EOPNOTSUPP;
> +    case SBI_ERR_FAILURE:
> +        fallthrough;
> +    default:
> +        return -ENOSYS;
> +    };
> +}
> +
>  void sbi_console_putchar(int ch)
>  {
>      sbi_ecall(SBI_EXT_0_1_CONSOLE_PUTCHAR, 0, ch, 0, 0, 0, 0, 0);
>  }
> +
> +static unsigned long sbi_major_version(void)
> +{
> +    return (sbi_spec_version >> SBI_SPEC_VERSION_MAJOR_SHIFT) &
> +        SBI_SPEC_VERSION_MAJOR_MASK;
> +}

Can you use MASK_EXTR() here, allowing to even drop the separate
SBI_SPEC_VERSION_MAJOR_SHIFT?

> +static unsigned long sbi_minor_version(void)
> +{
> +    return sbi_spec_version & SBI_SPEC_VERSION_MINOR_MASK;
> +}

For consistency here then, too.

> +static long sbi_ext_base_func(long fid)
> +{
> +    struct sbiret ret;
> +
> +    ret = sbi_ecall(SBI_EXT_BASE, fid, 0, 0, 0, 0, 0, 0);
> +    if ( !ret.error )
> +        return ret.value;
> +    else
> +        return ret.error;

With the folding of two distinct values, how is the caller going to
tell failure from success?

> +}
> +
> +static int __sbi_rfence_v02_real(unsigned long fid,

Are the double leading underscores really necessary here? (Same again
further down.)

> +                                 unsigned long hmask, unsigned long hbase,
> +                                 unsigned long start, unsigned long size,
> +                                 unsigned long arg4)
> +{
> +    struct sbiret ret = {0};
> +    int result = 0;
> +
> +    switch ( fid )
> +    {
> +    case SBI_EXT_RFENCE_REMOTE_FENCE_I:
> +        ret = sbi_ecall(SBI_EXT_RFENCE, fid, hmask, hbase,
> +                0, 0, 0, 0);
> +        break;
> +    case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA:
> +        ret = sbi_ecall(SBI_EXT_RFENCE, fid, hmask, hbase,
> +                start, size, 0, 0);
> +        break;
> +    case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID:
> +        ret = sbi_ecall(SBI_EXT_RFENCE, fid, hmask, hbase,
> +                start, size, arg4, 0);
> +        break;
> +    case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA:
> +        ret = sbi_ecall(SBI_EXT_RFENCE, fid, hmask, hbase,
> +                start, size, 0, 0);
> +        break;

How is e.g. this different from SBI_EXT_RFENCE_REMOTE_SFENCE_VMA
handling? To ease recognition, I think you want to fold cases with
identical handling.

> +    case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID:
> +        ret = sbi_ecall(SBI_EXT_RFENCE, fid, hmask, hbase,
> +                start, size, arg4, 0);
> +        break;
> +    case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA:
> +        ret = sbi_ecall(SBI_EXT_RFENCE, fid, hmask, hbase,
> +                start, size, 0, 0);
> +        break;
> +    case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID:
> +        ret = sbi_ecall(SBI_EXT_RFENCE, fid, hmask, hbase,
> +                start, size, arg4, 0);
> +        break;
> +
> +    default:

Nit: Just like you have it here, blank lines please between any non-
fallthrough case blocks.

> +        printk("%s: unknown function ID [%lu]\n",
> +               __func__, fid);
> +        result = -EINVAL;
> +        break;
> +    };
> +
> +    if ( ret.error )
> +    {
> +        result = sbi_err_map_xen_errno(ret.error);
> +        printk("%s: hbase=%lu hmask=%#lx failed (error %d)\n",
> +               __func__, hbase, hmask, result);
> +    }
> +
> +    return result;
> +}
> +
> +static int __sbi_rfence_v02(unsigned long fid,

This has its address taken for storing in a function pointer. I'd like
to suggest that from the very beginning you mark such functions cf_check.

> +                            const cpumask_t *cpu_mask,
> +                            unsigned long start, unsigned long size,
> +                            unsigned long arg4, unsigned long arg5)
> +{
> +    unsigned long hartid, cpuid, hmask = 0, hbase = 0, htop = 0;
> +    int result;
> +
> +    /*
> +     * hart_mask_base can be set to -1 to indicate that hart_mask can be
> +     * ignored and all available harts must be considered.
> +     */
> +    if ( !cpu_mask )
> +        return __sbi_rfence_v02_real(fid, 0UL, -1UL, start, size, arg4);
> +
> +    for_each_cpu( cpuid, cpu_mask )

Nit: Either

    for_each_cpu ( cpuid, cpu_mask )

(if you deem for_each_cpu a pseudo-keyword) or

    for_each_cpu(cpuid, cpu_mask)

(if you don't).

> +    {
> +        hartid = cpuid_to_hartid(cpuid);
> +        if ( hmask )
> +        {
> +            if ( hartid + BITS_PER_LONG <= htop ||
> +                 hbase + BITS_PER_LONG <= hartid )
> +            {
> +                result = __sbi_rfence_v02_real(fid, hmask, hbase,
> +                                               start, size, arg4);
> +                if ( result )
> +                    return result;
> +                hmask = 0;
> +            }
> +            else if ( hartid < hbase )
> +            {
> +                /* shift the mask to fit lower hartid */
> +                hmask <<= hbase - hartid;
> +                hbase = hartid;
> +            }
> +        }
> +
> +        if ( !hmask )
> +        {
> +            hbase = hartid;
> +            htop = hartid;
> +        } else if ( hartid > htop )

Nit: Closing brace on its own line please.

> +            htop = hartid;
> +
> +        hmask |= BIT(hartid - hbase, UL);
> +    }

I can see how you will successfully batch for certain configurations
this way. When hart ID mapping is something like (0,64,1,65,2,66,...)
you won't batch at all though. Which may be fine at least for now, but
then I think a comment wants to state what is or is not intended to be
covered. It is only this way that people will know that certain
shortcomings aren't bugs.

> +    if ( hmask )
> +    {
> +        result = __sbi_rfence_v02_real(fid, hmask, hbase,
> +                                       start, size, arg4);
> +        if ( result )
> +            return result;
> +    }
> +
> +    return 0;
> +}
> +
> +static int (*__sbi_rfence)(unsigned long fid,
> +                           const cpumask_t *cpu_mask,
> +                           unsigned long start, unsigned long size,
> +                           unsigned long arg4, unsigned long arg5) = NULL;

__ro_after_init and no need for an initializer.

> +int sbi_remote_sfence_vma(cpumask_t *cpu_mask,
> +                          unsigned long start_addr,
> +                          unsigned long size)
> +{
> +    return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA,
> +                        cpu_mask, start_addr, size, 0, 0);

No check (not even an assertion) here for __sbi_rfence still being NULL?

> +int __init sbi_init(void)
> +{
> +    int ret;
> +
> +    ret = sbi_get_spec_version();
> +    if ( ret > 0 )
> +        sbi_spec_version = ret;

Truncation from long to int is not an issue here?

> +    printk("SBI specification v%lu.%lu detected\n",
> +            sbi_major_version(), sbi_minor_version());
> +
> +    if ( !sbi_spec_is_0_1() )
> +    {
> +        sbi_fw_id = sbi_get_firmware_id();
> +        sbi_fw_version = sbi_get_firmware_version();

These cannot return errors?

> +        printk("SBI implementation ID=%#lx Version=%#lx\n",
> +            sbi_fw_id, sbi_fw_version);
> +
> +        if ( sbi_probe_extension(SBI_EXT_RFENCE) > 0 )
> +        {
> +            __sbi_rfence = __sbi_rfence_v02;
> +            printk("SBI v0.2 RFENCE extension detected\n");
> +        }
> +    } else {

Nit: Style (and I think I said so before).

> +        BUG_ON("Ooops. SBI spec veriosn 0.1 detected. Need to add support");

Besides the typo (version) here and ...

> +    }
> +
> +    if ( !sbi_has_rfence() )
> +    {
> +        BUG_ON("At the moment flush_xen_tlb_range_va() uses SBI rfence to "
> +               "flush TLB for all CPUS!");

... here better panic()? A call trace doesn't really add any value for these.

Jan
Oleksii Kurochko Aug. 14, 2024, 3:41 p.m. UTC | #2
On Tue, 2024-08-13 at 11:34 +0200, Jan Beulich wrote:
> On 09.08.2024 18:19, Oleksii Kurochko wrote:
> 
> > --- a/xen/arch/riscv/sbi.c
> > +++ b/xen/arch/riscv/sbi.c
> > @@ -7,11 +7,23 @@
> >   * Modified by Bobby Eshleman (bobby.eshleman@gmail.com).
> >   *
> >   * Copyright (c) 2019 Western Digital Corporation or its
> > affiliates.
> > - * Copyright (c) 2021-2023 Vates SAS.
> > + * Copyright (c) 2021-2024 Vates SAS.
> >   */
> >  
> > +#include <xen/compiler.h>
> > +#include <xen/const.h>
> > +#include <xen/cpumask.h>
> > +#include <xen/errno.h>
> > +#include <xen/init.h>
> > +#include <xen/lib.h>
> > +#include <xen/smp.h>
> > +
> > +#include <asm/processor.h>
> >  #include <asm/sbi.h>
> >  
> > +static unsigned long sbi_spec_version = SBI_SPEC_VERSION_DEFAULT;
> > +static unsigned long sbi_fw_id, sbi_fw_version;
> 
> __ro_after_init for perhaps all three of them?
> 
> Considering SBI_SPEC_VERSION_{MAJOR,MINOR}_MASK, at least the first
> of them also doesn't need to be unsigned long, but could be unsigned
> int?
It could be unsigned int, this part is declared as unsigned long in
Linux kernel so was taken as is. But based on the possible values for
these variables we can go with unsigned long.

> 
> > @@ -38,7 +50,245 @@ struct sbiret sbi_ecall(unsigned long ext,
> > unsigned long fid,
> >      return ret;
> >  }
> >  
> > +static int sbi_err_map_xen_errno(int err)
> > +{
> > +    switch ( err )
> > +    {
> > +    case SBI_SUCCESS:
> > +        return 0;
> > +    case SBI_ERR_DENIED:
> > +        return -EACCES;
> > +    case SBI_ERR_INVALID_PARAM:
> > +        return -EINVAL;
> > +    case SBI_ERR_INVALID_ADDRESS:
> > +        return -EFAULT;
> > +    case SBI_ERR_NOT_SUPPORTED:
> > +        return -EOPNOTSUPP;
> > +    case SBI_ERR_FAILURE:
> > +        fallthrough;
> > +    default:
> > +        return -ENOSYS;
> > +    };
> > +}
> > +
> >  void sbi_console_putchar(int ch)
> >  {
> >      sbi_ecall(SBI_EXT_0_1_CONSOLE_PUTCHAR, 0, ch, 0, 0, 0, 0, 0);
> >  }
> > +
> > +static unsigned long sbi_major_version(void)
> > +{
> > +    return (sbi_spec_version >> SBI_SPEC_VERSION_MAJOR_SHIFT) &
> > +        SBI_SPEC_VERSION_MAJOR_MASK;
> > +}
> 
> Can you use MASK_EXTR() here, allowing to even drop the separate
> SBI_SPEC_VERSION_MAJOR_SHIFT?
I am not sure that:
(sbi_spec_version >> SBI_SPEC_VERSION_MAJOR_SHIFT) & 
SBI_SPEC_VERSION_MAJOR_MASK == MASK_EXTR(sbi_spec_version,
SBI_SPEC_VERSION_MAJOR_MASK)

> 
> > +static unsigned long sbi_minor_version(void)
> > +{
> > +    return sbi_spec_version & SBI_SPEC_VERSION_MINOR_MASK;
> > +}
> 
> For consistency here then, too.
Here we can use MASK_EXTR.

> 
> > +static long sbi_ext_base_func(long fid)
> > +{
> > +    struct sbiret ret;
> > +
> > +    ret = sbi_ecall(SBI_EXT_BASE, fid, 0, 0, 0, 0, 0, 0);
> > +    if ( !ret.error )
> > +        return ret.value;
> > +    else
> > +        return ret.error;
> 
> With the folding of two distinct values, how is the caller going to
> tell failure from success?
By checking if the return value is < 0.
According to the SBI spec [
https://github.com/riscv-non-isa/riscv-sbi-doc/blob/master/src/binary-encoding.adoc?plain=1#L32
] ret.error can be or 0 ( which means success ) or something < 0 if it
was a failure.

> 
> > +}
> > +
> > +static int __sbi_rfence_v02_real(unsigned long fid,
> 
> Are the double leading underscores really necessary here? (Same again
> further down.)
No at all, I'll drop it.

> 
> > +                                 unsigned long hmask, unsigned
> > long hbase,
> > +                                 unsigned long start, unsigned
> > long size,
> > +                                 unsigned long arg4)
> > +{
> > +    struct sbiret ret = {0};
> > +    int result = 0;
> > +
> > +    switch ( fid )
> > +    {
> > +    case SBI_EXT_RFENCE_REMOTE_FENCE_I:
> > +        ret = sbi_ecall(SBI_EXT_RFENCE, fid, hmask, hbase,
> > +                0, 0, 0, 0);
> > +        break;
> > +    case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA:
> > +        ret = sbi_ecall(SBI_EXT_RFENCE, fid, hmask, hbase,
> > +                start, size, 0, 0);
> > +        break;
> > +    case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID:
> > +        ret = sbi_ecall(SBI_EXT_RFENCE, fid, hmask, hbase,
> > +                start, size, arg4, 0);
> > +        break;
> > +    case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA:
> > +        ret = sbi_ecall(SBI_EXT_RFENCE, fid, hmask, hbase,
> > +                start, size, 0, 0);
> > +        break;
> 
> How is e.g. this different from SBI_EXT_RFENCE_REMOTE_SFENCE_VMA
> handling? To ease recognition, I think you want to fold cases with
> identical handling.
Agree, it could be folded for some cases.

> 
> > +int sbi_remote_sfence_vma(cpumask_t *cpu_mask,
> > +                          unsigned long start_addr,
> > +                          unsigned long size)
> > +{
> > +    return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA,
> > +                        cpu_mask, start_addr, size, 0, 0);
> 
> No check (not even an assertion) here for __sbi_rfence still being
> NULL?
I thought that it would be enough to have BUG_ON() in sbi_init() but
then probably would be better to update the message inside BUG_ON():
   int __init sbi_init(void)
   {
   ....
   
       if ( !sbi_has_rfence() )
       {
           BUG_ON("At the moment flush_xen_tlb_range_va() uses SBI rfence
   to "
                  "flush TLB for all CPUS!");
       }
   
       return 0;
   }
> 
> > +int __init sbi_init(void)
> > +{
> > +    int ret;
> > +
> > +    ret = sbi_get_spec_version();
> > +    if ( ret > 0 )
> > +        sbi_spec_version = ret;
> 
> Truncation from long to int is not an issue here?
No, spec_version doesn't have such big numbers, the latest version is
v2.0.

> 
> > +    printk("SBI specification v%lu.%lu detected\n",
> > +            sbi_major_version(), sbi_minor_version());
> > +
> > +    if ( !sbi_spec_is_0_1() )
> > +    {
> > +        sbi_fw_id = sbi_get_firmware_id();
> > +        sbi_fw_version = sbi_get_firmware_version();
> 
> These cannot return errors?
At least, SBI specs don't tell that directly:
https://github.com/riscv-non-isa/riscv-sbi-doc/blob/master/src/ext-base.adoc?plain=1#L31

~ Oleksii
Jan Beulich Aug. 14, 2024, 3:53 p.m. UTC | #3
On 14.08.2024 17:41, oleksii.kurochko@gmail.com wrote:
> On Tue, 2024-08-13 at 11:34 +0200, Jan Beulich wrote:
>> On 09.08.2024 18:19, Oleksii Kurochko wrote:
>>> +static unsigned long sbi_major_version(void)
>>> +{
>>> +    return (sbi_spec_version >> SBI_SPEC_VERSION_MAJOR_SHIFT) &
>>> +        SBI_SPEC_VERSION_MAJOR_MASK;
>>> +}
>>
>> Can you use MASK_EXTR() here, allowing to even drop the separate
>> SBI_SPEC_VERSION_MAJOR_SHIFT?
> I am not sure that:
> (sbi_spec_version >> SBI_SPEC_VERSION_MAJOR_SHIFT) & 
> SBI_SPEC_VERSION_MAJOR_MASK == MASK_EXTR(sbi_spec_version,
> SBI_SPEC_VERSION_MAJOR_MASK)

How come you're not sure? That's precisely what MASK_EXTR() is for.

>>> +static long sbi_ext_base_func(long fid)
>>> +{
>>> +    struct sbiret ret;
>>> +
>>> +    ret = sbi_ecall(SBI_EXT_BASE, fid, 0, 0, 0, 0, 0, 0);
>>> +    if ( !ret.error )
>>> +        return ret.value;
>>> +    else
>>> +        return ret.error;
>>
>> With the folding of two distinct values, how is the caller going to
>> tell failure from success?
> By checking if the return value is < 0.
> According to the SBI spec [
> https://github.com/riscv-non-isa/riscv-sbi-doc/blob/master/src/binary-encoding.adoc?plain=1#L32
> ] ret.error can be or 0 ( which means success ) or something < 0 if it
> was a failure.

Right. And what if ret.error was 0, but ret.value was negative?

>>> +int sbi_remote_sfence_vma(cpumask_t *cpu_mask,
>>> +                          unsigned long start_addr,
>>> +                          unsigned long size)
>>> +{
>>> +    return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA,
>>> +                        cpu_mask, start_addr, size, 0, 0);
>>
>> No check (not even an assertion) here for __sbi_rfence still being
>> NULL?
> I thought that it would be enough to have BUG_ON() in sbi_init() but
> then probably would be better to update the message inside BUG_ON():
>    int __init sbi_init(void)
>    {
>    ....
>    
>        if ( !sbi_has_rfence() )
>        {
>            BUG_ON("At the moment flush_xen_tlb_range_va() uses SBI rfence
>    to "
>                   "flush TLB for all CPUS!");

I never really liked this kind of BUG_ON(). I leave it uncommented in
code which clearly is only temporary. Plus imo such BUG_ON()s want to
be next to where the risk is, i.e. in this case ahead of the possible
NULL deref. Then again, without PV guests and without anything mapped
at VA 0, you'll crash cleanly anyway. So perhaps my request to add a
check went too far.

Jan
Oleksii Kurochko Aug. 15, 2024, 10 a.m. UTC | #4
On Wed, 2024-08-14 at 17:53 +0200, Jan Beulich wrote:
> On 14.08.2024 17:41, oleksii.kurochko@gmail.com wrote:
> > On Tue, 2024-08-13 at 11:34 +0200, Jan Beulich wrote:
> > > On 09.08.2024 18:19, Oleksii Kurochko wrote:
> > > > +static unsigned long sbi_major_version(void)
> > > > +{
> > > > +    return (sbi_spec_version >> SBI_SPEC_VERSION_MAJOR_SHIFT)
> > > > &
> > > > +        SBI_SPEC_VERSION_MAJOR_MASK;
> > > > +}
> > > 
> > > Can you use MASK_EXTR() here, allowing to even drop the separate
> > > SBI_SPEC_VERSION_MAJOR_SHIFT?
> > I am not sure that:
> > (sbi_spec_version >> SBI_SPEC_VERSION_MAJOR_SHIFT) & 
> > SBI_SPEC_VERSION_MAJOR_MASK == MASK_EXTR(sbi_spec_version,
> > SBI_SPEC_VERSION_MAJOR_MASK)
> 
> How come you're not sure? That's precisely what MASK_EXTR() is for.
As the order matter here. First should be shift then applying
SBI_SPEC_VERSION_MAJOR_MASK, as SBI_SPEC_VERSION_MAJOR_MASK is defined
now as 7f. If SBI_SPEC_VERSION_MAJOR_MASK set to 0x7F000000 then it
will work.

> 
> > > > +static long sbi_ext_base_func(long fid)
> > > > +{
> > > > +    struct sbiret ret;
> > > > +
> > > > +    ret = sbi_ecall(SBI_EXT_BASE, fid, 0, 0, 0, 0, 0, 0);
> > > > +    if ( !ret.error )
> > > > +        return ret.value;
> > > > +    else
> > > > +        return ret.error;
> > > 
> > > With the folding of two distinct values, how is the caller going
> > > to
> > > tell failure from success?
> > By checking if the return value is < 0.
> > According to the SBI spec [
> > https://github.com/riscv-non-isa/riscv-sbi-doc/blob/master/src/binary-encoding.adoc?plain=1#L32
> > ] ret.error can be or 0 ( which means success ) or something < 0 if
> > it
> > was a failure.
> 
> Right. And what if ret.error was 0, but ret.value was negative?
I wasn't able to find a case in the SBI spec where sbiret.value could
be negative. Unfortunately, the spec does not specify the possible
values of sbiret.value, but based on the description of the SBI
function, it only returns a negative value in the case of an error.
Otherwise, ret.value is always greater than or equal to 0.

> 
> > > > +int sbi_remote_sfence_vma(cpumask_t *cpu_mask,
> > > > +                          unsigned long start_addr,
> > > > +                          unsigned long size)
> > > > +{
> > > > +    return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA,
> > > > +                        cpu_mask, start_addr, size, 0, 0);
> > > 
> > > No check (not even an assertion) here for __sbi_rfence still
> > > being
> > > NULL?
> > I thought that it would be enough to have BUG_ON() in sbi_init()
> > but
> > then probably would be better to update the message inside
> > BUG_ON():
> >    int __init sbi_init(void)
> >    {
> >    ....
> >    
> >        if ( !sbi_has_rfence() )
> >        {
> >            BUG_ON("At the moment flush_xen_tlb_range_va() uses SBI
> > rfence
> >    to "
> >                   "flush TLB for all CPUS!");
> 
> I never really liked this kind of BUG_ON(). I leave it uncommented in
> code which clearly is only temporary. Plus imo such BUG_ON()s want to
> be next to where the risk is, i.e. in this case ahead of the possible
> NULL deref. Then again, without PV guests and without anything mapped
> at VA 0, you'll crash cleanly anyway. So perhaps my request to add a
> check went too far.
But then there is not big difference whether BUG_ON() or ASSERT()
inside sbi_remote_sfence_vma() is present. Even none of the check
present it will be a crash anyway.

And the other reason why I thought it would be better to have BUG_ON()
in sbi_init() as it will be need only one BUG_ON() instead of having
ASSERT() in each function which will use __sbi_rfence().

And according to coding-best-practices.pandoc:
```
 * Programmers can use ASSERT(), which will cause the check to be
   executed in DEBUG builds, and cause the hypervisor to crash if it's
   violated
 * Programmers can use BUG_ON(), which will cause the check to be
   executed in both DEBUG and non-DEBUG builds, and cause the
hypervisor
   to crash if it's violated.
```
The difference between them is only the check is working only in DEBUG
build or both in DEBUG and non-DEBUG.

~ Oleksii
Jan Beulich Aug. 15, 2024, 10:32 a.m. UTC | #5
On 15.08.2024 12:00, oleksii.kurochko@gmail.com wrote:
> On Wed, 2024-08-14 at 17:53 +0200, Jan Beulich wrote:
>> On 14.08.2024 17:41, oleksii.kurochko@gmail.com wrote:
>>> On Tue, 2024-08-13 at 11:34 +0200, Jan Beulich wrote:
>>>> On 09.08.2024 18:19, Oleksii Kurochko wrote:
>>>>> +static unsigned long sbi_major_version(void)
>>>>> +{
>>>>> +    return (sbi_spec_version >> SBI_SPEC_VERSION_MAJOR_SHIFT)
>>>>> &
>>>>> +        SBI_SPEC_VERSION_MAJOR_MASK;
>>>>> +}
>>>>
>>>> Can you use MASK_EXTR() here, allowing to even drop the separate
>>>> SBI_SPEC_VERSION_MAJOR_SHIFT?
>>> I am not sure that:
>>> (sbi_spec_version >> SBI_SPEC_VERSION_MAJOR_SHIFT) & 
>>> SBI_SPEC_VERSION_MAJOR_MASK == MASK_EXTR(sbi_spec_version,
>>> SBI_SPEC_VERSION_MAJOR_MASK)
>>
>> How come you're not sure? That's precisely what MASK_EXTR() is for.
> As the order matter here. First should be shift then applying
> SBI_SPEC_VERSION_MAJOR_MASK, as SBI_SPEC_VERSION_MAJOR_MASK is defined
> now as 7f. If SBI_SPEC_VERSION_MAJOR_MASK set to 0x7F000000 then it
> will work.

The assumption was of course that you'd adjust SBI_SPEC_VERSION_MAJOR_MASK.
That's the underlying idea after all: When the mask is shifted to its
appropriate position, the shift amount needed can be calculated from it.
Hence the lack of a need for SBI_SPEC_VERSION_MAJOR_SHIFT (and alike).

Jan
Oleksii Kurochko Aug. 16, 2024, 12:06 p.m. UTC | #6
On Tue, 2024-08-13 at 11:34 +0200, Jan Beulich wrote:
> >   
> > +static unsigned long sbi_spec_version = SBI_SPEC_VERSION_DEFAULT;
> > +static unsigned long sbi_fw_id, sbi_fw_version;
> 
> __ro_after_init for perhaps all three of them?
> 
> Considering SBI_SPEC_VERSION_{MAJOR,MINOR}_MASK, at least the first
> of them also doesn't need to be unsigned long, but could be unsigned
> int?

sbi_spec_version can be really unsigned int as according to the spec
only 32 bits are used:
```
   struct sbiret sbi_get_spec_version(void);
   Returns the current SBI specification version. This function must
   always succeed. The minor number
   of the SBI specification is encoded in the low 24 bits, with the
   major number encoded in the next 7
   bits. Bit 31 must be 0 and is reserved for future expansion.
```

For sbi_fw_id, sbi_fw_version it is not mentioned the same thing, so we
can just assume ( despite of the fact that now this values are very
small and it is unlikely to be bigger the UINT_MAX ) that it will be
always fit in uint32_t.

But I think it would be better to leave unsigned long for everyone as
according to the specification this functions returns sbiret structure
which consist of 2 longs ( error and value ) and it is good idea to
follow the specification.

~ Oleksii
Jan Beulich Aug. 16, 2024, 12:32 p.m. UTC | #7
On 16.08.2024 14:06, oleksii.kurochko@gmail.com wrote:
> On Tue, 2024-08-13 at 11:34 +0200, Jan Beulich wrote:
>>>   
>>> +static unsigned long sbi_spec_version = SBI_SPEC_VERSION_DEFAULT;
>>> +static unsigned long sbi_fw_id, sbi_fw_version;
>>
>> __ro_after_init for perhaps all three of them?
>>
>> Considering SBI_SPEC_VERSION_{MAJOR,MINOR}_MASK, at least the first
>> of them also doesn't need to be unsigned long, but could be unsigned
>> int?
> 
> sbi_spec_version can be really unsigned int as according to the spec
> only 32 bits are used:
> ```
>    struct sbiret sbi_get_spec_version(void);
>    Returns the current SBI specification version. This function must
>    always succeed. The minor number
>    of the SBI specification is encoded in the low 24 bits, with the
>    major number encoded in the next 7
>    bits. Bit 31 must be 0 and is reserved for future expansion.
> ```
> 
> For sbi_fw_id, sbi_fw_version it is not mentioned the same thing, so we
> can just assume ( despite of the fact that now this values are very
> small and it is unlikely to be bigger the UINT_MAX ) that it will be
> always fit in uint32_t.
> 
> But I think it would be better to leave unsigned long for everyone as
> according to the specification this functions returns sbiret structure
> which consist of 2 longs ( error and value ) and it is good idea to
> follow the specification.

Okay with me (for these two) then.

Jan
diff mbox series

Patch

diff --git a/xen/arch/riscv/include/asm/sbi.h b/xen/arch/riscv/include/asm/sbi.h
index 0e6820a4ed..931613646d 100644
--- a/xen/arch/riscv/include/asm/sbi.h
+++ b/xen/arch/riscv/include/asm/sbi.h
@@ -12,8 +12,42 @@ 
 #ifndef __ASM_RISCV_SBI_H__
 #define __ASM_RISCV_SBI_H__
 
+#include <xen/cpumask.h>
+
 #define SBI_EXT_0_1_CONSOLE_PUTCHAR		0x1
 
+#define SBI_EXT_BASE                    0x10
+#define SBI_EXT_RFENCE                  0x52464E43
+
+/* SBI function IDs for BASE extension */
+#define SBI_EXT_BASE_GET_SPEC_VERSION   0x0
+#define SBI_EXT_BASE_GET_IMP_ID         0x1
+#define SBI_EXT_BASE_GET_IMP_VERSION    0x2
+#define SBI_EXT_BASE_PROBE_EXT          0x3
+
+/* SBI function IDs for RFENCE extension */
+#define SBI_EXT_RFENCE_REMOTE_FENCE_I           0x0
+#define SBI_EXT_RFENCE_REMOTE_SFENCE_VMA        0x1
+#define SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID   0x2
+#define SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA       0x3
+#define SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID  0x4
+#define SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA       0x5
+#define SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID  0x6
+
+#define SBI_SPEC_VERSION_MAJOR_SHIFT    24
+#define SBI_SPEC_VERSION_MAJOR_MASK     0x7f
+#define SBI_SPEC_VERSION_MINOR_MASK     0xffffff
+
+/* SBI return error codes */
+#define SBI_SUCCESS             0
+#define SBI_ERR_FAILURE         (-1)
+#define SBI_ERR_NOT_SUPPORTED   (-2)
+#define SBI_ERR_INVALID_PARAM   (-3)
+#define SBI_ERR_DENIED          (-4)
+#define SBI_ERR_INVALID_ADDRESS (-5)
+
+#define SBI_SPEC_VERSION_DEFAULT        0x1
+
 struct sbiret {
     long error;
     long value;
@@ -31,4 +65,34 @@  struct sbiret sbi_ecall(unsigned long ext, unsigned long fid,
  */
 void sbi_console_putchar(int ch);
 
+/*
+ * Check underlying SBI implementation has RFENCE
+ *
+ * @return 1 for supported AND 0 for not-supported
+ */
+bool sbi_has_rfence(void);
+
+/*
+ * Instructs the remote harts to execute one or more SFENCE.VMA
+ * instructions, covering the range of virtual addresses between
+ * start_addr and start_addr + size.
+ *
+ * Returns 0 if IPI was sent to all the targeted harts successfully
+ * or negative value if start_addr or size is not valid.
+ *
+ * @hart_mask a cpu mask containing all the target harts.
+ * @param start virtual address start
+ * @param size virtual address range size
+ */
+int sbi_remote_sfence_vma(cpumask_t *cpu_mask,
+                          unsigned long start_addr,
+                          unsigned long size);
+
+/*
+ * Initialize SBI library
+ *
+ * @return 0 on success, otherwise negative errno on failure
+ */
+int sbi_init(void);
+
 #endif /* __ASM_RISCV_SBI_H__ */
diff --git a/xen/arch/riscv/sbi.c b/xen/arch/riscv/sbi.c
index 0ae166c861..39e46ef859 100644
--- a/xen/arch/riscv/sbi.c
+++ b/xen/arch/riscv/sbi.c
@@ -7,11 +7,23 @@ 
  * Modified by Bobby Eshleman (bobby.eshleman@gmail.com).
  *
  * Copyright (c) 2019 Western Digital Corporation or its affiliates.
- * Copyright (c) 2021-2023 Vates SAS.
+ * Copyright (c) 2021-2024 Vates SAS.
  */
 
+#include <xen/compiler.h>
+#include <xen/const.h>
+#include <xen/cpumask.h>
+#include <xen/errno.h>
+#include <xen/init.h>
+#include <xen/lib.h>
+#include <xen/smp.h>
+
+#include <asm/processor.h>
 #include <asm/sbi.h>
 
+static unsigned long sbi_spec_version = SBI_SPEC_VERSION_DEFAULT;
+static unsigned long sbi_fw_id, sbi_fw_version;
+
 struct sbiret sbi_ecall(unsigned long ext, unsigned long fid,
                         unsigned long arg0, unsigned long arg1,
                         unsigned long arg2, unsigned long arg3,
@@ -38,7 +50,245 @@  struct sbiret sbi_ecall(unsigned long ext, unsigned long fid,
     return ret;
 }
 
+static int sbi_err_map_xen_errno(int err)
+{
+    switch ( err )
+    {
+    case SBI_SUCCESS:
+        return 0;
+    case SBI_ERR_DENIED:
+        return -EACCES;
+    case SBI_ERR_INVALID_PARAM:
+        return -EINVAL;
+    case SBI_ERR_INVALID_ADDRESS:
+        return -EFAULT;
+    case SBI_ERR_NOT_SUPPORTED:
+        return -EOPNOTSUPP;
+    case SBI_ERR_FAILURE:
+        fallthrough;
+    default:
+        return -ENOSYS;
+    };
+}
+
 void sbi_console_putchar(int ch)
 {
     sbi_ecall(SBI_EXT_0_1_CONSOLE_PUTCHAR, 0, ch, 0, 0, 0, 0, 0);
 }
+
+static unsigned long sbi_major_version(void)
+{
+    return (sbi_spec_version >> SBI_SPEC_VERSION_MAJOR_SHIFT) &
+        SBI_SPEC_VERSION_MAJOR_MASK;
+}
+
+static unsigned long sbi_minor_version(void)
+{
+    return sbi_spec_version & SBI_SPEC_VERSION_MINOR_MASK;
+}
+
+static long sbi_ext_base_func(long fid)
+{
+    struct sbiret ret;
+
+    ret = sbi_ecall(SBI_EXT_BASE, fid, 0, 0, 0, 0, 0, 0);
+    if ( !ret.error )
+        return ret.value;
+    else
+        return ret.error;
+}
+
+static int __sbi_rfence_v02_real(unsigned long fid,
+                                 unsigned long hmask, unsigned long hbase,
+                                 unsigned long start, unsigned long size,
+                                 unsigned long arg4)
+{
+    struct sbiret ret = {0};
+    int result = 0;
+
+    switch ( fid )
+    {
+    case SBI_EXT_RFENCE_REMOTE_FENCE_I:
+        ret = sbi_ecall(SBI_EXT_RFENCE, fid, hmask, hbase,
+                0, 0, 0, 0);
+        break;
+    case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA:
+        ret = sbi_ecall(SBI_EXT_RFENCE, fid, hmask, hbase,
+                start, size, 0, 0);
+        break;
+    case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID:
+        ret = sbi_ecall(SBI_EXT_RFENCE, fid, hmask, hbase,
+                start, size, arg4, 0);
+        break;
+    case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA:
+        ret = sbi_ecall(SBI_EXT_RFENCE, fid, hmask, hbase,
+                start, size, 0, 0);
+        break;
+    case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID:
+        ret = sbi_ecall(SBI_EXT_RFENCE, fid, hmask, hbase,
+                start, size, arg4, 0);
+        break;
+    case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA:
+        ret = sbi_ecall(SBI_EXT_RFENCE, fid, hmask, hbase,
+                start, size, 0, 0);
+        break;
+    case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID:
+        ret = sbi_ecall(SBI_EXT_RFENCE, fid, hmask, hbase,
+                start, size, arg4, 0);
+        break;
+
+    default:
+        printk("%s: unknown function ID [%lu]\n",
+               __func__, fid);
+        result = -EINVAL;
+        break;
+    };
+
+    if ( ret.error )
+    {
+        result = sbi_err_map_xen_errno(ret.error);
+        printk("%s: hbase=%lu hmask=%#lx failed (error %d)\n",
+               __func__, hbase, hmask, result);
+    }
+
+    return result;
+}
+
+static int __sbi_rfence_v02(unsigned long fid,
+                            const cpumask_t *cpu_mask,
+                            unsigned long start, unsigned long size,
+                            unsigned long arg4, unsigned long arg5)
+{
+    unsigned long hartid, cpuid, hmask = 0, hbase = 0, htop = 0;
+    int result;
+
+    /*
+     * hart_mask_base can be set to -1 to indicate that hart_mask can be
+     * ignored and all available harts must be considered.
+     */
+    if ( !cpu_mask )
+        return __sbi_rfence_v02_real(fid, 0UL, -1UL, start, size, arg4);
+
+    for_each_cpu( cpuid, cpu_mask )
+    {
+        hartid = cpuid_to_hartid(cpuid);
+        if ( hmask )
+        {
+            if ( hartid + BITS_PER_LONG <= htop ||
+                 hbase + BITS_PER_LONG <= hartid )
+            {
+                result = __sbi_rfence_v02_real(fid, hmask, hbase,
+                                               start, size, arg4);
+                if ( result )
+                    return result;
+                hmask = 0;
+            }
+            else if ( hartid < hbase )
+            {
+                /* shift the mask to fit lower hartid */
+                hmask <<= hbase - hartid;
+                hbase = hartid;
+            }
+        }
+
+        if ( !hmask )
+        {
+            hbase = hartid;
+            htop = hartid;
+        } else if ( hartid > htop )
+            htop = hartid;
+
+        hmask |= BIT(hartid - hbase, UL);
+    }
+
+    if ( hmask )
+    {
+        result = __sbi_rfence_v02_real(fid, hmask, hbase,
+                                       start, size, arg4);
+        if ( result )
+            return result;
+    }
+
+    return 0;
+}
+
+static int (*__sbi_rfence)(unsigned long fid,
+                           const cpumask_t *cpu_mask,
+                           unsigned long start, unsigned long size,
+                           unsigned long arg4, unsigned long arg5) = NULL;
+
+int sbi_remote_sfence_vma(cpumask_t *cpu_mask,
+                          unsigned long start_addr,
+                          unsigned long size)
+{
+    return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA,
+                        cpu_mask, start_addr, size, 0, 0);
+}
+
+#define sbi_get_spec_version()  \
+    sbi_ext_base_func(SBI_EXT_BASE_GET_SPEC_VERSION)
+
+#define sbi_get_firmware_id()   \
+    sbi_ext_base_func(SBI_EXT_BASE_GET_IMP_ID)
+
+#define sbi_get_firmware_version()  \
+    sbi_ext_base_func(SBI_EXT_BASE_GET_IMP_VERSION)
+
+int sbi_probe_extension(long extid)
+{
+    struct sbiret ret;
+
+    ret = sbi_ecall(SBI_EXT_BASE, SBI_EXT_BASE_PROBE_EXT, extid,
+                    0, 0, 0, 0, 0);
+    if ( !ret.error && ret.value )
+        return ret.value;
+
+    return -EOPNOTSUPP;
+}
+
+static bool sbi_spec_is_0_1(void)
+{
+    return (sbi_spec_version == SBI_SPEC_VERSION_DEFAULT);
+}
+
+bool sbi_has_rfence(void)
+{
+    return (__sbi_rfence != NULL);
+}
+
+int __init sbi_init(void)
+{
+    int ret;
+
+    ret = sbi_get_spec_version();
+    if ( ret > 0 )
+        sbi_spec_version = ret;
+
+    printk("SBI specification v%lu.%lu detected\n",
+            sbi_major_version(), sbi_minor_version());
+
+    if ( !sbi_spec_is_0_1() )
+    {
+        sbi_fw_id = sbi_get_firmware_id();
+        sbi_fw_version = sbi_get_firmware_version();
+
+        printk("SBI implementation ID=%#lx Version=%#lx\n",
+            sbi_fw_id, sbi_fw_version);
+
+        if ( sbi_probe_extension(SBI_EXT_RFENCE) > 0 )
+        {
+            __sbi_rfence = __sbi_rfence_v02;
+            printk("SBI v0.2 RFENCE extension detected\n");
+        }
+    } else {
+        BUG_ON("Ooops. SBI spec veriosn 0.1 detected. Need to add support");
+    }
+
+    if ( !sbi_has_rfence() )
+    {
+        BUG_ON("At the moment flush_xen_tlb_range_va() uses SBI rfence to "
+               "flush TLB for all CPUS!");
+    }
+
+    return 0;
+}
diff --git a/xen/arch/riscv/setup.c b/xen/arch/riscv/setup.c
index c9446e6038..a49a8eee90 100644
--- a/xen/arch/riscv/setup.c
+++ b/xen/arch/riscv/setup.c
@@ -8,6 +8,7 @@ 
 #include <public/version.h>
 
 #include <asm/early_printk.h>
+#include <asm/sbi.h>
 #include <asm/smp.h>
 #include <asm/traps.h>
 
@@ -56,6 +57,8 @@  void __init noreturn start_xen(unsigned long bootcpu_id,
 
     trap_init();
 
+    sbi_init();
+
 #ifdef CONFIG_SELF_TESTS
     test_macros_from_bug_h();
 #endif