Message ID | 1a863d06866f86761d23e6da3f4a6ee58c474473.1723214540.git.oleksii.kurochko@gmail.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | RISCV device tree mapping | expand |
On 09.08.2024 18:19, Oleksii Kurochko wrote: > @@ -31,4 +65,34 @@ struct sbiret sbi_ecall(unsigned long ext, unsigned long fid, > */ > void sbi_console_putchar(int ch); > > +/* > + * Check underlying SBI implementation has RFENCE > + * > + * @return 1 for supported AND 0 for not-supported > + */ > +bool sbi_has_rfence(void); Nit: With bool return type, true and false in the comment? > +/* > + * Instructs the remote harts to execute one or more SFENCE.VMA > + * instructions, covering the range of virtual addresses between > + * start_addr and start_addr + size. It's relatively clear what is meant here, but maybe still better to make things explicit by using a mathematical range: [start_addr, start_addr + size). > + * Returns 0 if IPI was sent to all the targeted harts successfully > + * or negative value if start_addr or size is not valid. > + * > + * @hart_mask a cpu mask containing all the target harts. > + * @param start virtual address start > + * @param size virtual address range size > + */ > +int sbi_remote_sfence_vma(cpumask_t *cpu_mask, pointer-to-const? > --- a/xen/arch/riscv/sbi.c > +++ b/xen/arch/riscv/sbi.c > @@ -7,11 +7,23 @@ > * Modified by Bobby Eshleman (bobby.eshleman@gmail.com). > * > * Copyright (c) 2019 Western Digital Corporation or its affiliates. > - * Copyright (c) 2021-2023 Vates SAS. > + * Copyright (c) 2021-2024 Vates SAS. > */ > > +#include <xen/compiler.h> > +#include <xen/const.h> > +#include <xen/cpumask.h> > +#include <xen/errno.h> > +#include <xen/init.h> > +#include <xen/lib.h> > +#include <xen/smp.h> > + > +#include <asm/processor.h> > #include <asm/sbi.h> > > +static unsigned long sbi_spec_version = SBI_SPEC_VERSION_DEFAULT; > +static unsigned long sbi_fw_id, sbi_fw_version; __ro_after_init for perhaps all three of them? Considering SBI_SPEC_VERSION_{MAJOR,MINOR}_MASK, at least the first of them also doesn't need to be unsigned long, but could be unsigned int? > @@ -38,7 +50,245 @@ struct sbiret sbi_ecall(unsigned long ext, unsigned long fid, > return ret; > } > > +static int sbi_err_map_xen_errno(int err) > +{ > + switch ( err ) > + { > + case SBI_SUCCESS: > + return 0; > + case SBI_ERR_DENIED: > + return -EACCES; > + case SBI_ERR_INVALID_PARAM: > + return -EINVAL; > + case SBI_ERR_INVALID_ADDRESS: > + return -EFAULT; > + case SBI_ERR_NOT_SUPPORTED: > + return -EOPNOTSUPP; > + case SBI_ERR_FAILURE: > + fallthrough; > + default: > + return -ENOSYS; > + }; > +} > + > void sbi_console_putchar(int ch) > { > sbi_ecall(SBI_EXT_0_1_CONSOLE_PUTCHAR, 0, ch, 0, 0, 0, 0, 0); > } > + > +static unsigned long sbi_major_version(void) > +{ > + return (sbi_spec_version >> SBI_SPEC_VERSION_MAJOR_SHIFT) & > + SBI_SPEC_VERSION_MAJOR_MASK; > +} Can you use MASK_EXTR() here, allowing to even drop the separate SBI_SPEC_VERSION_MAJOR_SHIFT? > +static unsigned long sbi_minor_version(void) > +{ > + return sbi_spec_version & SBI_SPEC_VERSION_MINOR_MASK; > +} For consistency here then, too. > +static long sbi_ext_base_func(long fid) > +{ > + struct sbiret ret; > + > + ret = sbi_ecall(SBI_EXT_BASE, fid, 0, 0, 0, 0, 0, 0); > + if ( !ret.error ) > + return ret.value; > + else > + return ret.error; With the folding of two distinct values, how is the caller going to tell failure from success? > +} > + > +static int __sbi_rfence_v02_real(unsigned long fid, Are the double leading underscores really necessary here? (Same again further down.) > + unsigned long hmask, unsigned long hbase, > + unsigned long start, unsigned long size, > + unsigned long arg4) > +{ > + struct sbiret ret = {0}; > + int result = 0; > + > + switch ( fid ) > + { > + case SBI_EXT_RFENCE_REMOTE_FENCE_I: > + ret = sbi_ecall(SBI_EXT_RFENCE, fid, hmask, hbase, > + 0, 0, 0, 0); > + break; > + case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA: > + ret = sbi_ecall(SBI_EXT_RFENCE, fid, hmask, hbase, > + start, size, 0, 0); > + break; > + case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID: > + ret = sbi_ecall(SBI_EXT_RFENCE, fid, hmask, hbase, > + start, size, arg4, 0); > + break; > + case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA: > + ret = sbi_ecall(SBI_EXT_RFENCE, fid, hmask, hbase, > + start, size, 0, 0); > + break; How is e.g. this different from SBI_EXT_RFENCE_REMOTE_SFENCE_VMA handling? To ease recognition, I think you want to fold cases with identical handling. > + case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID: > + ret = sbi_ecall(SBI_EXT_RFENCE, fid, hmask, hbase, > + start, size, arg4, 0); > + break; > + case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA: > + ret = sbi_ecall(SBI_EXT_RFENCE, fid, hmask, hbase, > + start, size, 0, 0); > + break; > + case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID: > + ret = sbi_ecall(SBI_EXT_RFENCE, fid, hmask, hbase, > + start, size, arg4, 0); > + break; > + > + default: Nit: Just like you have it here, blank lines please between any non- fallthrough case blocks. > + printk("%s: unknown function ID [%lu]\n", > + __func__, fid); > + result = -EINVAL; > + break; > + }; > + > + if ( ret.error ) > + { > + result = sbi_err_map_xen_errno(ret.error); > + printk("%s: hbase=%lu hmask=%#lx failed (error %d)\n", > + __func__, hbase, hmask, result); > + } > + > + return result; > +} > + > +static int __sbi_rfence_v02(unsigned long fid, This has its address taken for storing in a function pointer. I'd like to suggest that from the very beginning you mark such functions cf_check. > + const cpumask_t *cpu_mask, > + unsigned long start, unsigned long size, > + unsigned long arg4, unsigned long arg5) > +{ > + unsigned long hartid, cpuid, hmask = 0, hbase = 0, htop = 0; > + int result; > + > + /* > + * hart_mask_base can be set to -1 to indicate that hart_mask can be > + * ignored and all available harts must be considered. > + */ > + if ( !cpu_mask ) > + return __sbi_rfence_v02_real(fid, 0UL, -1UL, start, size, arg4); > + > + for_each_cpu( cpuid, cpu_mask ) Nit: Either for_each_cpu ( cpuid, cpu_mask ) (if you deem for_each_cpu a pseudo-keyword) or for_each_cpu(cpuid, cpu_mask) (if you don't). > + { > + hartid = cpuid_to_hartid(cpuid); > + if ( hmask ) > + { > + if ( hartid + BITS_PER_LONG <= htop || > + hbase + BITS_PER_LONG <= hartid ) > + { > + result = __sbi_rfence_v02_real(fid, hmask, hbase, > + start, size, arg4); > + if ( result ) > + return result; > + hmask = 0; > + } > + else if ( hartid < hbase ) > + { > + /* shift the mask to fit lower hartid */ > + hmask <<= hbase - hartid; > + hbase = hartid; > + } > + } > + > + if ( !hmask ) > + { > + hbase = hartid; > + htop = hartid; > + } else if ( hartid > htop ) Nit: Closing brace on its own line please. > + htop = hartid; > + > + hmask |= BIT(hartid - hbase, UL); > + } I can see how you will successfully batch for certain configurations this way. When hart ID mapping is something like (0,64,1,65,2,66,...) you won't batch at all though. Which may be fine at least for now, but then I think a comment wants to state what is or is not intended to be covered. It is only this way that people will know that certain shortcomings aren't bugs. > + if ( hmask ) > + { > + result = __sbi_rfence_v02_real(fid, hmask, hbase, > + start, size, arg4); > + if ( result ) > + return result; > + } > + > + return 0; > +} > + > +static int (*__sbi_rfence)(unsigned long fid, > + const cpumask_t *cpu_mask, > + unsigned long start, unsigned long size, > + unsigned long arg4, unsigned long arg5) = NULL; __ro_after_init and no need for an initializer. > +int sbi_remote_sfence_vma(cpumask_t *cpu_mask, > + unsigned long start_addr, > + unsigned long size) > +{ > + return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA, > + cpu_mask, start_addr, size, 0, 0); No check (not even an assertion) here for __sbi_rfence still being NULL? > +int __init sbi_init(void) > +{ > + int ret; > + > + ret = sbi_get_spec_version(); > + if ( ret > 0 ) > + sbi_spec_version = ret; Truncation from long to int is not an issue here? > + printk("SBI specification v%lu.%lu detected\n", > + sbi_major_version(), sbi_minor_version()); > + > + if ( !sbi_spec_is_0_1() ) > + { > + sbi_fw_id = sbi_get_firmware_id(); > + sbi_fw_version = sbi_get_firmware_version(); These cannot return errors? > + printk("SBI implementation ID=%#lx Version=%#lx\n", > + sbi_fw_id, sbi_fw_version); > + > + if ( sbi_probe_extension(SBI_EXT_RFENCE) > 0 ) > + { > + __sbi_rfence = __sbi_rfence_v02; > + printk("SBI v0.2 RFENCE extension detected\n"); > + } > + } else { Nit: Style (and I think I said so before). > + BUG_ON("Ooops. SBI spec veriosn 0.1 detected. Need to add support"); Besides the typo (version) here and ... > + } > + > + if ( !sbi_has_rfence() ) > + { > + BUG_ON("At the moment flush_xen_tlb_range_va() uses SBI rfence to " > + "flush TLB for all CPUS!"); ... here better panic()? A call trace doesn't really add any value for these. Jan
On Tue, 2024-08-13 at 11:34 +0200, Jan Beulich wrote: > On 09.08.2024 18:19, Oleksii Kurochko wrote: > > > --- a/xen/arch/riscv/sbi.c > > +++ b/xen/arch/riscv/sbi.c > > @@ -7,11 +7,23 @@ > > * Modified by Bobby Eshleman (bobby.eshleman@gmail.com). > > * > > * Copyright (c) 2019 Western Digital Corporation or its > > affiliates. > > - * Copyright (c) 2021-2023 Vates SAS. > > + * Copyright (c) 2021-2024 Vates SAS. > > */ > > > > +#include <xen/compiler.h> > > +#include <xen/const.h> > > +#include <xen/cpumask.h> > > +#include <xen/errno.h> > > +#include <xen/init.h> > > +#include <xen/lib.h> > > +#include <xen/smp.h> > > + > > +#include <asm/processor.h> > > #include <asm/sbi.h> > > > > +static unsigned long sbi_spec_version = SBI_SPEC_VERSION_DEFAULT; > > +static unsigned long sbi_fw_id, sbi_fw_version; > > __ro_after_init for perhaps all three of them? > > Considering SBI_SPEC_VERSION_{MAJOR,MINOR}_MASK, at least the first > of them also doesn't need to be unsigned long, but could be unsigned > int? It could be unsigned int, this part is declared as unsigned long in Linux kernel so was taken as is. But based on the possible values for these variables we can go with unsigned long. > > > @@ -38,7 +50,245 @@ struct sbiret sbi_ecall(unsigned long ext, > > unsigned long fid, > > return ret; > > } > > > > +static int sbi_err_map_xen_errno(int err) > > +{ > > + switch ( err ) > > + { > > + case SBI_SUCCESS: > > + return 0; > > + case SBI_ERR_DENIED: > > + return -EACCES; > > + case SBI_ERR_INVALID_PARAM: > > + return -EINVAL; > > + case SBI_ERR_INVALID_ADDRESS: > > + return -EFAULT; > > + case SBI_ERR_NOT_SUPPORTED: > > + return -EOPNOTSUPP; > > + case SBI_ERR_FAILURE: > > + fallthrough; > > + default: > > + return -ENOSYS; > > + }; > > +} > > + > > void sbi_console_putchar(int ch) > > { > > sbi_ecall(SBI_EXT_0_1_CONSOLE_PUTCHAR, 0, ch, 0, 0, 0, 0, 0); > > } > > + > > +static unsigned long sbi_major_version(void) > > +{ > > + return (sbi_spec_version >> SBI_SPEC_VERSION_MAJOR_SHIFT) & > > + SBI_SPEC_VERSION_MAJOR_MASK; > > +} > > Can you use MASK_EXTR() here, allowing to even drop the separate > SBI_SPEC_VERSION_MAJOR_SHIFT? I am not sure that: (sbi_spec_version >> SBI_SPEC_VERSION_MAJOR_SHIFT) & SBI_SPEC_VERSION_MAJOR_MASK == MASK_EXTR(sbi_spec_version, SBI_SPEC_VERSION_MAJOR_MASK) > > > +static unsigned long sbi_minor_version(void) > > +{ > > + return sbi_spec_version & SBI_SPEC_VERSION_MINOR_MASK; > > +} > > For consistency here then, too. Here we can use MASK_EXTR. > > > +static long sbi_ext_base_func(long fid) > > +{ > > + struct sbiret ret; > > + > > + ret = sbi_ecall(SBI_EXT_BASE, fid, 0, 0, 0, 0, 0, 0); > > + if ( !ret.error ) > > + return ret.value; > > + else > > + return ret.error; > > With the folding of two distinct values, how is the caller going to > tell failure from success? By checking if the return value is < 0. According to the SBI spec [ https://github.com/riscv-non-isa/riscv-sbi-doc/blob/master/src/binary-encoding.adoc?plain=1#L32 ] ret.error can be or 0 ( which means success ) or something < 0 if it was a failure. > > > +} > > + > > +static int __sbi_rfence_v02_real(unsigned long fid, > > Are the double leading underscores really necessary here? (Same again > further down.) No at all, I'll drop it. > > > + unsigned long hmask, unsigned > > long hbase, > > + unsigned long start, unsigned > > long size, > > + unsigned long arg4) > > +{ > > + struct sbiret ret = {0}; > > + int result = 0; > > + > > + switch ( fid ) > > + { > > + case SBI_EXT_RFENCE_REMOTE_FENCE_I: > > + ret = sbi_ecall(SBI_EXT_RFENCE, fid, hmask, hbase, > > + 0, 0, 0, 0); > > + break; > > + case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA: > > + ret = sbi_ecall(SBI_EXT_RFENCE, fid, hmask, hbase, > > + start, size, 0, 0); > > + break; > > + case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID: > > + ret = sbi_ecall(SBI_EXT_RFENCE, fid, hmask, hbase, > > + start, size, arg4, 0); > > + break; > > + case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA: > > + ret = sbi_ecall(SBI_EXT_RFENCE, fid, hmask, hbase, > > + start, size, 0, 0); > > + break; > > How is e.g. this different from SBI_EXT_RFENCE_REMOTE_SFENCE_VMA > handling? To ease recognition, I think you want to fold cases with > identical handling. Agree, it could be folded for some cases. > > > +int sbi_remote_sfence_vma(cpumask_t *cpu_mask, > > + unsigned long start_addr, > > + unsigned long size) > > +{ > > + return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA, > > + cpu_mask, start_addr, size, 0, 0); > > No check (not even an assertion) here for __sbi_rfence still being > NULL? I thought that it would be enough to have BUG_ON() in sbi_init() but then probably would be better to update the message inside BUG_ON(): int __init sbi_init(void) { .... if ( !sbi_has_rfence() ) { BUG_ON("At the moment flush_xen_tlb_range_va() uses SBI rfence to " "flush TLB for all CPUS!"); } return 0; } > > > +int __init sbi_init(void) > > +{ > > + int ret; > > + > > + ret = sbi_get_spec_version(); > > + if ( ret > 0 ) > > + sbi_spec_version = ret; > > Truncation from long to int is not an issue here? No, spec_version doesn't have such big numbers, the latest version is v2.0. > > > + printk("SBI specification v%lu.%lu detected\n", > > + sbi_major_version(), sbi_minor_version()); > > + > > + if ( !sbi_spec_is_0_1() ) > > + { > > + sbi_fw_id = sbi_get_firmware_id(); > > + sbi_fw_version = sbi_get_firmware_version(); > > These cannot return errors? At least, SBI specs don't tell that directly: https://github.com/riscv-non-isa/riscv-sbi-doc/blob/master/src/ext-base.adoc?plain=1#L31 ~ Oleksii
On 14.08.2024 17:41, oleksii.kurochko@gmail.com wrote: > On Tue, 2024-08-13 at 11:34 +0200, Jan Beulich wrote: >> On 09.08.2024 18:19, Oleksii Kurochko wrote: >>> +static unsigned long sbi_major_version(void) >>> +{ >>> + return (sbi_spec_version >> SBI_SPEC_VERSION_MAJOR_SHIFT) & >>> + SBI_SPEC_VERSION_MAJOR_MASK; >>> +} >> >> Can you use MASK_EXTR() here, allowing to even drop the separate >> SBI_SPEC_VERSION_MAJOR_SHIFT? > I am not sure that: > (sbi_spec_version >> SBI_SPEC_VERSION_MAJOR_SHIFT) & > SBI_SPEC_VERSION_MAJOR_MASK == MASK_EXTR(sbi_spec_version, > SBI_SPEC_VERSION_MAJOR_MASK) How come you're not sure? That's precisely what MASK_EXTR() is for. >>> +static long sbi_ext_base_func(long fid) >>> +{ >>> + struct sbiret ret; >>> + >>> + ret = sbi_ecall(SBI_EXT_BASE, fid, 0, 0, 0, 0, 0, 0); >>> + if ( !ret.error ) >>> + return ret.value; >>> + else >>> + return ret.error; >> >> With the folding of two distinct values, how is the caller going to >> tell failure from success? > By checking if the return value is < 0. > According to the SBI spec [ > https://github.com/riscv-non-isa/riscv-sbi-doc/blob/master/src/binary-encoding.adoc?plain=1#L32 > ] ret.error can be or 0 ( which means success ) or something < 0 if it > was a failure. Right. And what if ret.error was 0, but ret.value was negative? >>> +int sbi_remote_sfence_vma(cpumask_t *cpu_mask, >>> + unsigned long start_addr, >>> + unsigned long size) >>> +{ >>> + return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA, >>> + cpu_mask, start_addr, size, 0, 0); >> >> No check (not even an assertion) here for __sbi_rfence still being >> NULL? > I thought that it would be enough to have BUG_ON() in sbi_init() but > then probably would be better to update the message inside BUG_ON(): > int __init sbi_init(void) > { > .... > > if ( !sbi_has_rfence() ) > { > BUG_ON("At the moment flush_xen_tlb_range_va() uses SBI rfence > to " > "flush TLB for all CPUS!"); I never really liked this kind of BUG_ON(). I leave it uncommented in code which clearly is only temporary. Plus imo such BUG_ON()s want to be next to where the risk is, i.e. in this case ahead of the possible NULL deref. Then again, without PV guests and without anything mapped at VA 0, you'll crash cleanly anyway. So perhaps my request to add a check went too far. Jan
On Wed, 2024-08-14 at 17:53 +0200, Jan Beulich wrote: > On 14.08.2024 17:41, oleksii.kurochko@gmail.com wrote: > > On Tue, 2024-08-13 at 11:34 +0200, Jan Beulich wrote: > > > On 09.08.2024 18:19, Oleksii Kurochko wrote: > > > > +static unsigned long sbi_major_version(void) > > > > +{ > > > > + return (sbi_spec_version >> SBI_SPEC_VERSION_MAJOR_SHIFT) > > > > & > > > > + SBI_SPEC_VERSION_MAJOR_MASK; > > > > +} > > > > > > Can you use MASK_EXTR() here, allowing to even drop the separate > > > SBI_SPEC_VERSION_MAJOR_SHIFT? > > I am not sure that: > > (sbi_spec_version >> SBI_SPEC_VERSION_MAJOR_SHIFT) & > > SBI_SPEC_VERSION_MAJOR_MASK == MASK_EXTR(sbi_spec_version, > > SBI_SPEC_VERSION_MAJOR_MASK) > > How come you're not sure? That's precisely what MASK_EXTR() is for. As the order matter here. First should be shift then applying SBI_SPEC_VERSION_MAJOR_MASK, as SBI_SPEC_VERSION_MAJOR_MASK is defined now as 7f. If SBI_SPEC_VERSION_MAJOR_MASK set to 0x7F000000 then it will work. > > > > > +static long sbi_ext_base_func(long fid) > > > > +{ > > > > + struct sbiret ret; > > > > + > > > > + ret = sbi_ecall(SBI_EXT_BASE, fid, 0, 0, 0, 0, 0, 0); > > > > + if ( !ret.error ) > > > > + return ret.value; > > > > + else > > > > + return ret.error; > > > > > > With the folding of two distinct values, how is the caller going > > > to > > > tell failure from success? > > By checking if the return value is < 0. > > According to the SBI spec [ > > https://github.com/riscv-non-isa/riscv-sbi-doc/blob/master/src/binary-encoding.adoc?plain=1#L32 > > ] ret.error can be or 0 ( which means success ) or something < 0 if > > it > > was a failure. > > Right. And what if ret.error was 0, but ret.value was negative? I wasn't able to find a case in the SBI spec where sbiret.value could be negative. Unfortunately, the spec does not specify the possible values of sbiret.value, but based on the description of the SBI function, it only returns a negative value in the case of an error. Otherwise, ret.value is always greater than or equal to 0. > > > > > +int sbi_remote_sfence_vma(cpumask_t *cpu_mask, > > > > + unsigned long start_addr, > > > > + unsigned long size) > > > > +{ > > > > + return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA, > > > > + cpu_mask, start_addr, size, 0, 0); > > > > > > No check (not even an assertion) here for __sbi_rfence still > > > being > > > NULL? > > I thought that it would be enough to have BUG_ON() in sbi_init() > > but > > then probably would be better to update the message inside > > BUG_ON(): > > int __init sbi_init(void) > > { > > .... > > > > if ( !sbi_has_rfence() ) > > { > > BUG_ON("At the moment flush_xen_tlb_range_va() uses SBI > > rfence > > to " > > "flush TLB for all CPUS!"); > > I never really liked this kind of BUG_ON(). I leave it uncommented in > code which clearly is only temporary. Plus imo such BUG_ON()s want to > be next to where the risk is, i.e. in this case ahead of the possible > NULL deref. Then again, without PV guests and without anything mapped > at VA 0, you'll crash cleanly anyway. So perhaps my request to add a > check went too far. But then there is not big difference whether BUG_ON() or ASSERT() inside sbi_remote_sfence_vma() is present. Even none of the check present it will be a crash anyway. And the other reason why I thought it would be better to have BUG_ON() in sbi_init() as it will be need only one BUG_ON() instead of having ASSERT() in each function which will use __sbi_rfence(). And according to coding-best-practices.pandoc: ``` * Programmers can use ASSERT(), which will cause the check to be executed in DEBUG builds, and cause the hypervisor to crash if it's violated * Programmers can use BUG_ON(), which will cause the check to be executed in both DEBUG and non-DEBUG builds, and cause the hypervisor to crash if it's violated. ``` The difference between them is only the check is working only in DEBUG build or both in DEBUG and non-DEBUG. ~ Oleksii
On 15.08.2024 12:00, oleksii.kurochko@gmail.com wrote: > On Wed, 2024-08-14 at 17:53 +0200, Jan Beulich wrote: >> On 14.08.2024 17:41, oleksii.kurochko@gmail.com wrote: >>> On Tue, 2024-08-13 at 11:34 +0200, Jan Beulich wrote: >>>> On 09.08.2024 18:19, Oleksii Kurochko wrote: >>>>> +static unsigned long sbi_major_version(void) >>>>> +{ >>>>> + return (sbi_spec_version >> SBI_SPEC_VERSION_MAJOR_SHIFT) >>>>> & >>>>> + SBI_SPEC_VERSION_MAJOR_MASK; >>>>> +} >>>> >>>> Can you use MASK_EXTR() here, allowing to even drop the separate >>>> SBI_SPEC_VERSION_MAJOR_SHIFT? >>> I am not sure that: >>> (sbi_spec_version >> SBI_SPEC_VERSION_MAJOR_SHIFT) & >>> SBI_SPEC_VERSION_MAJOR_MASK == MASK_EXTR(sbi_spec_version, >>> SBI_SPEC_VERSION_MAJOR_MASK) >> >> How come you're not sure? That's precisely what MASK_EXTR() is for. > As the order matter here. First should be shift then applying > SBI_SPEC_VERSION_MAJOR_MASK, as SBI_SPEC_VERSION_MAJOR_MASK is defined > now as 7f. If SBI_SPEC_VERSION_MAJOR_MASK set to 0x7F000000 then it > will work. The assumption was of course that you'd adjust SBI_SPEC_VERSION_MAJOR_MASK. That's the underlying idea after all: When the mask is shifted to its appropriate position, the shift amount needed can be calculated from it. Hence the lack of a need for SBI_SPEC_VERSION_MAJOR_SHIFT (and alike). Jan
On Tue, 2024-08-13 at 11:34 +0200, Jan Beulich wrote: > > > > +static unsigned long sbi_spec_version = SBI_SPEC_VERSION_DEFAULT; > > +static unsigned long sbi_fw_id, sbi_fw_version; > > __ro_after_init for perhaps all three of them? > > Considering SBI_SPEC_VERSION_{MAJOR,MINOR}_MASK, at least the first > of them also doesn't need to be unsigned long, but could be unsigned > int? sbi_spec_version can be really unsigned int as according to the spec only 32 bits are used: ``` struct sbiret sbi_get_spec_version(void); Returns the current SBI specification version. This function must always succeed. The minor number of the SBI specification is encoded in the low 24 bits, with the major number encoded in the next 7 bits. Bit 31 must be 0 and is reserved for future expansion. ``` For sbi_fw_id, sbi_fw_version it is not mentioned the same thing, so we can just assume ( despite of the fact that now this values are very small and it is unlikely to be bigger the UINT_MAX ) that it will be always fit in uint32_t. But I think it would be better to leave unsigned long for everyone as according to the specification this functions returns sbiret structure which consist of 2 longs ( error and value ) and it is good idea to follow the specification. ~ Oleksii
On 16.08.2024 14:06, oleksii.kurochko@gmail.com wrote: > On Tue, 2024-08-13 at 11:34 +0200, Jan Beulich wrote: >>> >>> +static unsigned long sbi_spec_version = SBI_SPEC_VERSION_DEFAULT; >>> +static unsigned long sbi_fw_id, sbi_fw_version; >> >> __ro_after_init for perhaps all three of them? >> >> Considering SBI_SPEC_VERSION_{MAJOR,MINOR}_MASK, at least the first >> of them also doesn't need to be unsigned long, but could be unsigned >> int? > > sbi_spec_version can be really unsigned int as according to the spec > only 32 bits are used: > ``` > struct sbiret sbi_get_spec_version(void); > Returns the current SBI specification version. This function must > always succeed. The minor number > of the SBI specification is encoded in the low 24 bits, with the > major number encoded in the next 7 > bits. Bit 31 must be 0 and is reserved for future expansion. > ``` > > For sbi_fw_id, sbi_fw_version it is not mentioned the same thing, so we > can just assume ( despite of the fact that now this values are very > small and it is unlikely to be bigger the UINT_MAX ) that it will be > always fit in uint32_t. > > But I think it would be better to leave unsigned long for everyone as > according to the specification this functions returns sbiret structure > which consist of 2 longs ( error and value ) and it is good idea to > follow the specification. Okay with me (for these two) then. Jan
diff --git a/xen/arch/riscv/include/asm/sbi.h b/xen/arch/riscv/include/asm/sbi.h index 0e6820a4ed..931613646d 100644 --- a/xen/arch/riscv/include/asm/sbi.h +++ b/xen/arch/riscv/include/asm/sbi.h @@ -12,8 +12,42 @@ #ifndef __ASM_RISCV_SBI_H__ #define __ASM_RISCV_SBI_H__ +#include <xen/cpumask.h> + #define SBI_EXT_0_1_CONSOLE_PUTCHAR 0x1 +#define SBI_EXT_BASE 0x10 +#define SBI_EXT_RFENCE 0x52464E43 + +/* SBI function IDs for BASE extension */ +#define SBI_EXT_BASE_GET_SPEC_VERSION 0x0 +#define SBI_EXT_BASE_GET_IMP_ID 0x1 +#define SBI_EXT_BASE_GET_IMP_VERSION 0x2 +#define SBI_EXT_BASE_PROBE_EXT 0x3 + +/* SBI function IDs for RFENCE extension */ +#define SBI_EXT_RFENCE_REMOTE_FENCE_I 0x0 +#define SBI_EXT_RFENCE_REMOTE_SFENCE_VMA 0x1 +#define SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID 0x2 +#define SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA 0x3 +#define SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID 0x4 +#define SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA 0x5 +#define SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID 0x6 + +#define SBI_SPEC_VERSION_MAJOR_SHIFT 24 +#define SBI_SPEC_VERSION_MAJOR_MASK 0x7f +#define SBI_SPEC_VERSION_MINOR_MASK 0xffffff + +/* SBI return error codes */ +#define SBI_SUCCESS 0 +#define SBI_ERR_FAILURE (-1) +#define SBI_ERR_NOT_SUPPORTED (-2) +#define SBI_ERR_INVALID_PARAM (-3) +#define SBI_ERR_DENIED (-4) +#define SBI_ERR_INVALID_ADDRESS (-5) + +#define SBI_SPEC_VERSION_DEFAULT 0x1 + struct sbiret { long error; long value; @@ -31,4 +65,34 @@ struct sbiret sbi_ecall(unsigned long ext, unsigned long fid, */ void sbi_console_putchar(int ch); +/* + * Check underlying SBI implementation has RFENCE + * + * @return 1 for supported AND 0 for not-supported + */ +bool sbi_has_rfence(void); + +/* + * Instructs the remote harts to execute one or more SFENCE.VMA + * instructions, covering the range of virtual addresses between + * start_addr and start_addr + size. + * + * Returns 0 if IPI was sent to all the targeted harts successfully + * or negative value if start_addr or size is not valid. + * + * @hart_mask a cpu mask containing all the target harts. + * @param start virtual address start + * @param size virtual address range size + */ +int sbi_remote_sfence_vma(cpumask_t *cpu_mask, + unsigned long start_addr, + unsigned long size); + +/* + * Initialize SBI library + * + * @return 0 on success, otherwise negative errno on failure + */ +int sbi_init(void); + #endif /* __ASM_RISCV_SBI_H__ */ diff --git a/xen/arch/riscv/sbi.c b/xen/arch/riscv/sbi.c index 0ae166c861..39e46ef859 100644 --- a/xen/arch/riscv/sbi.c +++ b/xen/arch/riscv/sbi.c @@ -7,11 +7,23 @@ * Modified by Bobby Eshleman (bobby.eshleman@gmail.com). * * Copyright (c) 2019 Western Digital Corporation or its affiliates. - * Copyright (c) 2021-2023 Vates SAS. + * Copyright (c) 2021-2024 Vates SAS. */ +#include <xen/compiler.h> +#include <xen/const.h> +#include <xen/cpumask.h> +#include <xen/errno.h> +#include <xen/init.h> +#include <xen/lib.h> +#include <xen/smp.h> + +#include <asm/processor.h> #include <asm/sbi.h> +static unsigned long sbi_spec_version = SBI_SPEC_VERSION_DEFAULT; +static unsigned long sbi_fw_id, sbi_fw_version; + struct sbiret sbi_ecall(unsigned long ext, unsigned long fid, unsigned long arg0, unsigned long arg1, unsigned long arg2, unsigned long arg3, @@ -38,7 +50,245 @@ struct sbiret sbi_ecall(unsigned long ext, unsigned long fid, return ret; } +static int sbi_err_map_xen_errno(int err) +{ + switch ( err ) + { + case SBI_SUCCESS: + return 0; + case SBI_ERR_DENIED: + return -EACCES; + case SBI_ERR_INVALID_PARAM: + return -EINVAL; + case SBI_ERR_INVALID_ADDRESS: + return -EFAULT; + case SBI_ERR_NOT_SUPPORTED: + return -EOPNOTSUPP; + case SBI_ERR_FAILURE: + fallthrough; + default: + return -ENOSYS; + }; +} + void sbi_console_putchar(int ch) { sbi_ecall(SBI_EXT_0_1_CONSOLE_PUTCHAR, 0, ch, 0, 0, 0, 0, 0); } + +static unsigned long sbi_major_version(void) +{ + return (sbi_spec_version >> SBI_SPEC_VERSION_MAJOR_SHIFT) & + SBI_SPEC_VERSION_MAJOR_MASK; +} + +static unsigned long sbi_minor_version(void) +{ + return sbi_spec_version & SBI_SPEC_VERSION_MINOR_MASK; +} + +static long sbi_ext_base_func(long fid) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_BASE, fid, 0, 0, 0, 0, 0, 0); + if ( !ret.error ) + return ret.value; + else + return ret.error; +} + +static int __sbi_rfence_v02_real(unsigned long fid, + unsigned long hmask, unsigned long hbase, + unsigned long start, unsigned long size, + unsigned long arg4) +{ + struct sbiret ret = {0}; + int result = 0; + + switch ( fid ) + { + case SBI_EXT_RFENCE_REMOTE_FENCE_I: + ret = sbi_ecall(SBI_EXT_RFENCE, fid, hmask, hbase, + 0, 0, 0, 0); + break; + case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA: + ret = sbi_ecall(SBI_EXT_RFENCE, fid, hmask, hbase, + start, size, 0, 0); + break; + case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID: + ret = sbi_ecall(SBI_EXT_RFENCE, fid, hmask, hbase, + start, size, arg4, 0); + break; + case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA: + ret = sbi_ecall(SBI_EXT_RFENCE, fid, hmask, hbase, + start, size, 0, 0); + break; + case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID: + ret = sbi_ecall(SBI_EXT_RFENCE, fid, hmask, hbase, + start, size, arg4, 0); + break; + case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA: + ret = sbi_ecall(SBI_EXT_RFENCE, fid, hmask, hbase, + start, size, 0, 0); + break; + case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID: + ret = sbi_ecall(SBI_EXT_RFENCE, fid, hmask, hbase, + start, size, arg4, 0); + break; + + default: + printk("%s: unknown function ID [%lu]\n", + __func__, fid); + result = -EINVAL; + break; + }; + + if ( ret.error ) + { + result = sbi_err_map_xen_errno(ret.error); + printk("%s: hbase=%lu hmask=%#lx failed (error %d)\n", + __func__, hbase, hmask, result); + } + + return result; +} + +static int __sbi_rfence_v02(unsigned long fid, + const cpumask_t *cpu_mask, + unsigned long start, unsigned long size, + unsigned long arg4, unsigned long arg5) +{ + unsigned long hartid, cpuid, hmask = 0, hbase = 0, htop = 0; + int result; + + /* + * hart_mask_base can be set to -1 to indicate that hart_mask can be + * ignored and all available harts must be considered. + */ + if ( !cpu_mask ) + return __sbi_rfence_v02_real(fid, 0UL, -1UL, start, size, arg4); + + for_each_cpu( cpuid, cpu_mask ) + { + hartid = cpuid_to_hartid(cpuid); + if ( hmask ) + { + if ( hartid + BITS_PER_LONG <= htop || + hbase + BITS_PER_LONG <= hartid ) + { + result = __sbi_rfence_v02_real(fid, hmask, hbase, + start, size, arg4); + if ( result ) + return result; + hmask = 0; + } + else if ( hartid < hbase ) + { + /* shift the mask to fit lower hartid */ + hmask <<= hbase - hartid; + hbase = hartid; + } + } + + if ( !hmask ) + { + hbase = hartid; + htop = hartid; + } else if ( hartid > htop ) + htop = hartid; + + hmask |= BIT(hartid - hbase, UL); + } + + if ( hmask ) + { + result = __sbi_rfence_v02_real(fid, hmask, hbase, + start, size, arg4); + if ( result ) + return result; + } + + return 0; +} + +static int (*__sbi_rfence)(unsigned long fid, + const cpumask_t *cpu_mask, + unsigned long start, unsigned long size, + unsigned long arg4, unsigned long arg5) = NULL; + +int sbi_remote_sfence_vma(cpumask_t *cpu_mask, + unsigned long start_addr, + unsigned long size) +{ + return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA, + cpu_mask, start_addr, size, 0, 0); +} + +#define sbi_get_spec_version() \ + sbi_ext_base_func(SBI_EXT_BASE_GET_SPEC_VERSION) + +#define sbi_get_firmware_id() \ + sbi_ext_base_func(SBI_EXT_BASE_GET_IMP_ID) + +#define sbi_get_firmware_version() \ + sbi_ext_base_func(SBI_EXT_BASE_GET_IMP_VERSION) + +int sbi_probe_extension(long extid) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_BASE, SBI_EXT_BASE_PROBE_EXT, extid, + 0, 0, 0, 0, 0); + if ( !ret.error && ret.value ) + return ret.value; + + return -EOPNOTSUPP; +} + +static bool sbi_spec_is_0_1(void) +{ + return (sbi_spec_version == SBI_SPEC_VERSION_DEFAULT); +} + +bool sbi_has_rfence(void) +{ + return (__sbi_rfence != NULL); +} + +int __init sbi_init(void) +{ + int ret; + + ret = sbi_get_spec_version(); + if ( ret > 0 ) + sbi_spec_version = ret; + + printk("SBI specification v%lu.%lu detected\n", + sbi_major_version(), sbi_minor_version()); + + if ( !sbi_spec_is_0_1() ) + { + sbi_fw_id = sbi_get_firmware_id(); + sbi_fw_version = sbi_get_firmware_version(); + + printk("SBI implementation ID=%#lx Version=%#lx\n", + sbi_fw_id, sbi_fw_version); + + if ( sbi_probe_extension(SBI_EXT_RFENCE) > 0 ) + { + __sbi_rfence = __sbi_rfence_v02; + printk("SBI v0.2 RFENCE extension detected\n"); + } + } else { + BUG_ON("Ooops. SBI spec veriosn 0.1 detected. Need to add support"); + } + + if ( !sbi_has_rfence() ) + { + BUG_ON("At the moment flush_xen_tlb_range_va() uses SBI rfence to " + "flush TLB for all CPUS!"); + } + + return 0; +} diff --git a/xen/arch/riscv/setup.c b/xen/arch/riscv/setup.c index c9446e6038..a49a8eee90 100644 --- a/xen/arch/riscv/setup.c +++ b/xen/arch/riscv/setup.c @@ -8,6 +8,7 @@ #include <public/version.h> #include <asm/early_printk.h> +#include <asm/sbi.h> #include <asm/smp.h> #include <asm/traps.h> @@ -56,6 +57,8 @@ void __init noreturn start_xen(unsigned long bootcpu_id, trap_init(); + sbi_init(); + #ifdef CONFIG_SELF_TESTS test_macros_from_bug_h(); #endif
Introduce functions to work with the SBI RFENCE extension for issuing various fence operations to remote CPUs. Add the sbi_init() function along with auxiliary functions and macro definitions for proper initialization and checking the availability of SBI extensions. Currently, this is implemented only for RFENCE. Introduce sbi_remote_sfence_vma() to send SFENCE_VMA instructions to a set of target HARTs. This will support the implementation of flush_xen_tlb_range_va(). Integrate __sbi_rfence_v02 from Linux kernel 6.6.0-rc4 with minimal modifications: - Adapt to Xen code style. - Use cpuid_to_hartid() instead of cpuid_to_hartid_map[]. - Update BIT(...) to BIT(..., UL). - Rename __sbi_rfence_v02_call to __sbi_rfence_v02_real and remove the unused arg5. - Handle NULL cpu_mask to execute rfence on all CPUs by calling __sbi_rfence_v02_real(..., 0UL, -1UL,...) instead of creating hmask. Signed-off-by: Oleksii Kurochko <oleksii.kurochko@gmail.com> --- Changes in V4: - update the commit message. - code style fixes - update return type of sbi_has_rfence() from int to bool and drop conditional operator inside implementation. - Update mapping of SBI_ERR_FAILURE in sbi_err_map_xen_errno(). - Update return type of sbi_spec_is_0_1() and drop conditional operator inside implementation. - s/0x%lx/%#lx - update the comment above declaration of sbi_remote_sfence_vma() with more detailed explanation what the function does. - update prototype of sbi_remote_sfence_vma(). Now it receives cpumask_t and returns int. - refactor __sbi_rfence_v02() take from the Linux kernel as it takes into account a case that hart id could be from different hbase. For example, the case when hart IDs are the following 0, 3, 65, 2. Or the case when hart IDs are unsorted: 0 3 1 2. - drop sbi_cpumask_to_hartmask() as it is not needed anymore - Update the prototype of sbi_remote_sfence_vma() and implemntation accordingly to the fact it returns 'int'. - s/flush_xen_tlb_one_local/flush_tlb_one_local --- Changes in V3: - new patch. --- xen/arch/riscv/include/asm/sbi.h | 64 ++++++++ xen/arch/riscv/sbi.c | 252 ++++++++++++++++++++++++++++++- xen/arch/riscv/setup.c | 3 + 3 files changed, 318 insertions(+), 1 deletion(-)