diff mbox series

[v1,3/6] RISC-V: hwprobe: Introduce which-cpus flag

Message ID 20231011135610.122850-11-ajones@ventanamicro.com (mailing list archive)
State Superseded
Headers show
Series RISC-V: hwprobe: Introduce which-cpus | expand

Checks

Context Check Description
conchuod/vmtest-fixes-PR fail merge-conflict

Commit Message

Andrew Jones Oct. 11, 2023, 1:56 p.m. UTC
Introduce the first flag for the hwprobe syscall. The flag basically
reverses its behavior, i.e. instead of populating the values of keys
for a given set of cpus, the set of cpus after the call is the result
of finding a set which supports the values of the keys. In order to
do this, we implement a pair compare function which takes the type of
value (a single value vs. a bitmask of booleans) into consideration.
We also implement vdso support for the new flag.

Signed-off-by: Andrew Jones <ajones@ventanamicro.com>
---
 Documentation/riscv/hwprobe.rst       | 16 ++++-
 arch/riscv/include/asm/hwprobe.h      | 24 +++++++
 arch/riscv/include/uapi/asm/hwprobe.h |  3 +
 arch/riscv/kernel/sys_hwprobe.c       | 93 +++++++++++++++++++++++++--
 arch/riscv/kernel/vdso/hwprobe.c      | 68 +++++++++++++++++---
 5 files changed, 190 insertions(+), 14 deletions(-)

Comments

Evan Green Oct. 12, 2023, 5:40 p.m. UTC | #1
On Wed, Oct 11, 2023 at 6:56 AM Andrew Jones <ajones@ventanamicro.com> wrote:
>
> Introduce the first flag for the hwprobe syscall. The flag basically
> reverses its behavior, i.e. instead of populating the values of keys
> for a given set of cpus, the set of cpus after the call is the result
> of finding a set which supports the values of the keys. In order to
> do this, we implement a pair compare function which takes the type of
> value (a single value vs. a bitmask of booleans) into consideration.
> We also implement vdso support for the new flag.
>
> Signed-off-by: Andrew Jones <ajones@ventanamicro.com>
> ---
>  Documentation/riscv/hwprobe.rst       | 16 ++++-
>  arch/riscv/include/asm/hwprobe.h      | 24 +++++++
>  arch/riscv/include/uapi/asm/hwprobe.h |  3 +
>  arch/riscv/kernel/sys_hwprobe.c       | 93 +++++++++++++++++++++++++--
>  arch/riscv/kernel/vdso/hwprobe.c      | 68 +++++++++++++++++---
>  5 files changed, 190 insertions(+), 14 deletions(-)
>
> diff --git a/Documentation/riscv/hwprobe.rst b/Documentation/riscv/hwprobe.rst
> index c57437e40ffb..576aa03f56bb 100644
> --- a/Documentation/riscv/hwprobe.rst
> +++ b/Documentation/riscv/hwprobe.rst
> @@ -25,8 +25,20 @@ arch, impl), the returned value will only be valid if all CPUs in the given set
>  have the same value. Otherwise -1 will be returned. For boolean-like keys, the
>  value returned will be a logical AND of the values for the specified CPUs.
>  Usermode can supply NULL for ``cpus`` and 0 for ``cpusetsize`` as a shortcut for
> -all online CPUs. There are currently no flags, this value must be zero for
> -future compatibility.
> +all online CPUs. The currently supported flags are:
> +
> +* :c:macro:`RISCV_HWPROBE_WHICH_CPUS`: This flag basically reverses the behavior
> +  of sys_riscv_hwprobe().  Instead of populating the values of keys for a given
> +  set of CPUs, the set of CPUs is initially all unset and the values of each key

This isn't quite right anymore, I reckon. The cpuset passed in is used
as an initial set, and this function removes CPUs from the set that
have differing values from those given in the array of pairs. If an
empty cpuset is passed in, then the initial set used is all online
cpus.

> +  are given.  Upon return, the CPUs which all match each of the given key-value
> +  pairs are set in ``cpus``.  How matching is done depends on the key type.  For
> +  value-like keys, matching means to be the exact same as the value.  For
> +  boolean-like keys, matching means the result of a logical AND of the pair's
> +  value with the CPU's value is exactly the same as the pair's value.  ``cpus``
> +  may also initially have set bits, in which case the bits of any CPUs which do
> +  not match the pairs will be cleared, but no other bits will be set.
> +
> +All other flags are reserved for future compatibility and must be zero.
>
>  On success 0 is returned, on failure a negative error code is returned.
>
> diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h
> index 7cad513538d8..a68764149e51 100644
> --- a/arch/riscv/include/asm/hwprobe.h
> +++ b/arch/riscv/include/asm/hwprobe.h
> @@ -15,4 +15,28 @@ static inline bool riscv_hwprobe_key_is_valid(__s64 key)
>         return key >= 0 && key <= RISCV_HWPROBE_MAX_KEY;
>  }
>
> +static inline bool hwprobe_key_is_bitmask(__s64 key)
> +{
> +       switch (key) {
> +       case RISCV_HWPROBE_KEY_BASE_BEHAVIOR:
> +       case RISCV_HWPROBE_KEY_IMA_EXT_0:
> +       case RISCV_HWPROBE_KEY_CPUPERF_0:
> +               return true;
> +       }
> +
> +       return false;
> +}
> +
> +static inline bool riscv_hwprobe_pair_cmp(struct riscv_hwprobe *pair,
> +                                         struct riscv_hwprobe *other_pair)
> +{
> +       if (pair->key != other_pair->key)
> +               return false;
> +
> +       if (hwprobe_key_is_bitmask(pair->key))
> +               return (pair->value & other_pair->value) == other_pair->value;
> +
> +       return pair->value == other_pair->value;
> +}
> +
>  #endif
> diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h
> index 006bfb48343d..1d4134befc48 100644
> --- a/arch/riscv/include/uapi/asm/hwprobe.h
> +++ b/arch/riscv/include/uapi/asm/hwprobe.h
> @@ -38,4 +38,7 @@ struct riscv_hwprobe {
>  #define                RISCV_HWPROBE_MISALIGNED_MASK           (7 << 0)
>  /* Increase RISCV_HWPROBE_MAX_KEY when adding items. */
>
> +/* Flags */
> +#define RISCV_HWPROBE_WHICH_CPUS       (1 << 0)
> +
>  #endif
> diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c
> index 69ad5f793374..de294538ca25 100644
> --- a/arch/riscv/kernel/sys_hwprobe.c
> +++ b/arch/riscv/kernel/sys_hwprobe.c
> @@ -166,10 +166,10 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair,
>         }
>  }
>
> -static int do_riscv_hwprobe(struct riscv_hwprobe __user *pairs,
> -                           size_t pair_count, size_t cpusetsize,
> -                           unsigned long __user *cpus_user,
> -                           unsigned int flags)
> +static int hwprobe_get_values(struct riscv_hwprobe __user *pairs,
> +                             size_t pair_count, size_t cpusetsize,
> +                             unsigned long __user *cpus_user,
> +                             unsigned int flags)
>  {
>         size_t out;
>         int ret;
> @@ -223,6 +223,91 @@ static int do_riscv_hwprobe(struct riscv_hwprobe __user *pairs,
>         return 0;
>  }
>
> +static int hwprobe_get_cpus(struct riscv_hwprobe __user *pairs,
> +                           size_t pair_count, size_t cpusetsize,
> +                           unsigned long __user *cpus_user,
> +                           unsigned int flags)
> +{
> +       cpumask_t cpus, one_cpu;
> +       bool clear_all = false;
> +       size_t i;
> +       int ret;
> +
> +       if (flags != RISCV_HWPROBE_WHICH_CPUS)
> +               return -EINVAL;

We'll have to be careful if we add another flag to deal with how it
behaves here too. I think the choice you made here is correct as it's
the most defensive. It's (usually) easy to change a failure to a
non-failure, but an ABI compatibility issue to change a
weird-but-nonfailing behavior to a different behavior. This might be
worth a comment, but I also tend to love comments more than most, so
up to you.

> +
> +       if (!cpusetsize || !cpus_user)
> +               return -EINVAL;
> +
> +       if (cpusetsize > cpumask_size())
> +               cpusetsize = cpumask_size();
> +
> +       ret = copy_from_user(&cpus, cpus_user, cpusetsize);
> +       if (ret)
> +               return -EFAULT;
> +
> +       cpumask_and(&cpus, &cpus, cpu_online_mask);
> +       if (cpumask_empty(&cpus))
> +               cpumask_copy(&cpus, cpu_online_mask);

I worry this is accident prone. If the caller asks for some set of
CPUs that don't happen to be online right now, they get an answer for
a completely different set of CPUs. I'm all for having a shorthand for
"all online CPUs", but I think it should be more explicit. If you
moved the cpumask_and() below the if(cpumask_empty()), then IMO it
would be a clean shorthand: pass in an empty set to get all online
CPUs.

> +
> +       cpumask_clear(&one_cpu);
> +
> +       for (i = 0; i < pair_count; i++) {
> +               struct riscv_hwprobe pair, tmp;
> +               int cpu;
> +
> +               ret = copy_from_user(&pair, &pairs[i], sizeof(pair));
> +               if (ret)
> +                       return -EFAULT;
> +
> +               if (!riscv_hwprobe_key_is_valid(pair.key)) {
> +                       clear_all = true;
> +                       pair = (struct riscv_hwprobe){ .key = -1, };
> +                       ret = copy_to_user(&pairs[i], &pair, sizeof(pair));
> +                       if (ret)
> +                               return -EFAULT;
> +               }
> +
> +               if (clear_all)
> +                       continue;
> +
> +               tmp = (struct riscv_hwprobe){ .key = pair.key, };
> +
> +               for_each_cpu(cpu, &cpus) {
> +                       cpumask_set_cpu(cpu, &one_cpu);
> +
> +                       hwprobe_one_pair(&tmp, &one_cpu);
> +
> +                       if (!riscv_hwprobe_pair_cmp(&tmp, &pair))
> +                               cpumask_clear_cpu(cpu, &cpus);
> +
> +                       cpumask_clear_cpu(cpu, &one_cpu);
> +               }
> +       }
> +
> +       if (clear_all)
> +               cpumask_clear(&cpus);
> +
> +       ret = copy_to_user(cpus_user, &cpus, cpusetsize);
> +       if (ret)
> +               return -EFAULT;
> +
> +       return 0;
> +}
> +
> +static int do_riscv_hwprobe(struct riscv_hwprobe __user *pairs,
> +                           size_t pair_count, size_t cpusetsize,
> +                           unsigned long __user *cpus_user,
> +                           unsigned int flags)
> +{
> +       if (flags & RISCV_HWPROBE_WHICH_CPUS)
> +               return hwprobe_get_cpus(pairs, pair_count, cpusetsize,
> +                                       cpus_user, flags);
> +
> +       return hwprobe_get_values(pairs, pair_count, cpusetsize,
> +                                 cpus_user, flags);
> +}
> +
>  #ifdef CONFIG_MMU
>
>  static int __init init_hwprobe_vdso_data(void)
> diff --git a/arch/riscv/kernel/vdso/hwprobe.c b/arch/riscv/kernel/vdso/hwprobe.c
> index 026b7645c5ab..e6c324d64544 100644
> --- a/arch/riscv/kernel/vdso/hwprobe.c
> +++ b/arch/riscv/kernel/vdso/hwprobe.c
> @@ -3,6 +3,7 @@
>   * Copyright 2023 Rivos, Inc
>   */
>
> +#include <linux/string.h>
>  #include <linux/types.h>
>  #include <vdso/datapage.h>
>  #include <vdso/helpers.h>
> @@ -11,14 +12,9 @@ extern int riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
>                          size_t cpusetsize, unsigned long *cpus,
>                          unsigned int flags);
>
> -/* Add a prototype to avoid -Wmissing-prototypes warning. */
> -int __vdso_riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
> -                        size_t cpusetsize, unsigned long *cpus,
> -                        unsigned int flags);
> -
> -int __vdso_riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
> -                        size_t cpusetsize, unsigned long *cpus,
> -                        unsigned int flags)
> +static int riscv_vdso_get_values(struct riscv_hwprobe *pairs, size_t pair_count,
> +                                size_t cpusetsize, unsigned long *cpus,
> +                                unsigned int flags)
>  {
>         const struct vdso_data *vd = __arch_get_vdso_data();
>         const struct arch_vdso_data *avd = &vd->arch_data;
> @@ -50,3 +46,59 @@ int __vdso_riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
>
>         return 0;
>  }
> +
> +static int riscv_vdso_get_cpus(struct riscv_hwprobe *pairs, size_t pair_count,
> +                              size_t cpusetsize, unsigned long *cpus,
> +                              unsigned int flags)
> +{
> +       const struct vdso_data *vd = __arch_get_vdso_data();
> +       const struct arch_vdso_data *avd = &vd->arch_data;
> +       struct riscv_hwprobe *p = pairs;
> +       struct riscv_hwprobe *end = pairs + pair_count;
> +       bool clear_all = false;
> +
> +       if (!cpusetsize || !cpus)
> +               return -EINVAL;
> +
> +       if (flags != RISCV_HWPROBE_WHICH_CPUS || !avd->homogeneous_cpus)
> +               return riscv_hwprobe(pairs, pair_count, cpusetsize, cpus, flags);
> +

Hm, I realize now that doing the shorthand of "empty set == all online
cpus" leaves the VDSO in a slight lurch, as we now have to figure out
how to come up with the cpu_online_mask in usermode. It'd be a bummer
if the shorthand always bounced to the system call, as I think this
will be the most common case. Is there a way we could stash the
cpu_online_mask in the vDSO data without it going stale? Any other
ideas?

> +       while (p < end) {
> +               if (riscv_hwprobe_key_is_valid(p->key)) {
> +                       struct riscv_hwprobe t = {
> +                               .key = p->key,
> +                               .value = avd->all_cpu_hwprobe_values[p->key],
> +                       };
> +
> +                       if (!riscv_hwprobe_pair_cmp(&t, p))
> +                               clear_all = true;
> +               } else {
> +                       clear_all = true;
> +                       p->key = -1;
> +                       p->value = 0;
> +               }
> +               p++;
> +       }
> +
> +       if (clear_all)
> +               memset(cpus, 0, cpusetsize);
> +
> +       return 0;
> +}
> +
> +/* Add a prototype to avoid -Wmissing-prototypes warning. */
> +int __vdso_riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
> +                        size_t cpusetsize, unsigned long *cpus,
> +                        unsigned int flags);
> +
> +int __vdso_riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
> +                        size_t cpusetsize, unsigned long *cpus,
> +                        unsigned int flags)
> +{
> +       if (flags & RISCV_HWPROBE_WHICH_CPUS)
> +               return riscv_vdso_get_cpus(pairs, pair_count, cpusetsize,
> +                                          cpus, flags);
> +
> +       return riscv_vdso_get_values(pairs, pair_count, cpusetsize,
> +                                    cpus, flags);
> +}
> --
> 2.41.0
>
Andrew Jones Oct. 13, 2023, 7:20 a.m. UTC | #2
On Thu, Oct 12, 2023 at 10:40:21AM -0700, Evan Green wrote:
> On Wed, Oct 11, 2023 at 6:56 AM Andrew Jones <ajones@ventanamicro.com> wrote:
> >
> > Introduce the first flag for the hwprobe syscall. The flag basically
> > reverses its behavior, i.e. instead of populating the values of keys
> > for a given set of cpus, the set of cpus after the call is the result
> > of finding a set which supports the values of the keys. In order to
> > do this, we implement a pair compare function which takes the type of
> > value (a single value vs. a bitmask of booleans) into consideration.
> > We also implement vdso support for the new flag.
> >
> > Signed-off-by: Andrew Jones <ajones@ventanamicro.com>
> > ---
> >  Documentation/riscv/hwprobe.rst       | 16 ++++-
> >  arch/riscv/include/asm/hwprobe.h      | 24 +++++++
> >  arch/riscv/include/uapi/asm/hwprobe.h |  3 +
> >  arch/riscv/kernel/sys_hwprobe.c       | 93 +++++++++++++++++++++++++--
> >  arch/riscv/kernel/vdso/hwprobe.c      | 68 +++++++++++++++++---
> >  5 files changed, 190 insertions(+), 14 deletions(-)
> >
> > diff --git a/Documentation/riscv/hwprobe.rst b/Documentation/riscv/hwprobe.rst
> > index c57437e40ffb..576aa03f56bb 100644
> > --- a/Documentation/riscv/hwprobe.rst
> > +++ b/Documentation/riscv/hwprobe.rst
> > @@ -25,8 +25,20 @@ arch, impl), the returned value will only be valid if all CPUs in the given set
> >  have the same value. Otherwise -1 will be returned. For boolean-like keys, the
> >  value returned will be a logical AND of the values for the specified CPUs.
> >  Usermode can supply NULL for ``cpus`` and 0 for ``cpusetsize`` as a shortcut for
> > -all online CPUs. There are currently no flags, this value must be zero for
> > -future compatibility.
> > +all online CPUs. The currently supported flags are:
> > +
> > +* :c:macro:`RISCV_HWPROBE_WHICH_CPUS`: This flag basically reverses the behavior
> > +  of sys_riscv_hwprobe().  Instead of populating the values of keys for a given
> > +  set of CPUs, the set of CPUs is initially all unset and the values of each key
> 
> This isn't quite right anymore, I reckon. The cpuset passed in is used
> as an initial set, and this function removes CPUs from the set that
> have differing values from those given in the array of pairs. If an
> empty cpuset is passed in, then the initial set used is all online
> cpus.

Indeed. I'll add more text describing the "which cpus of this cpuset"
behavior for v2.

> 
> > +  are given.  Upon return, the CPUs which all match each of the given key-value
> > +  pairs are set in ``cpus``.  How matching is done depends on the key type.  For
> > +  value-like keys, matching means to be the exact same as the value.  For
> > +  boolean-like keys, matching means the result of a logical AND of the pair's
> > +  value with the CPU's value is exactly the same as the pair's value.  ``cpus``
> > +  may also initially have set bits, in which case the bits of any CPUs which do
> > +  not match the pairs will be cleared, but no other bits will be set.
> > +
> > +All other flags are reserved for future compatibility and must be zero.
> >
> >  On success 0 is returned, on failure a negative error code is returned.
> >
> > diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h
> > index 7cad513538d8..a68764149e51 100644
> > --- a/arch/riscv/include/asm/hwprobe.h
> > +++ b/arch/riscv/include/asm/hwprobe.h
> > @@ -15,4 +15,28 @@ static inline bool riscv_hwprobe_key_is_valid(__s64 key)
> >         return key >= 0 && key <= RISCV_HWPROBE_MAX_KEY;
> >  }
> >
> > +static inline bool hwprobe_key_is_bitmask(__s64 key)
> > +{
> > +       switch (key) {
> > +       case RISCV_HWPROBE_KEY_BASE_BEHAVIOR:
> > +       case RISCV_HWPROBE_KEY_IMA_EXT_0:
> > +       case RISCV_HWPROBE_KEY_CPUPERF_0:
> > +               return true;
> > +       }
> > +
> > +       return false;
> > +}
> > +
> > +static inline bool riscv_hwprobe_pair_cmp(struct riscv_hwprobe *pair,
> > +                                         struct riscv_hwprobe *other_pair)
> > +{
> > +       if (pair->key != other_pair->key)
> > +               return false;
> > +
> > +       if (hwprobe_key_is_bitmask(pair->key))
> > +               return (pair->value & other_pair->value) == other_pair->value;
> > +
> > +       return pair->value == other_pair->value;
> > +}
> > +
> >  #endif
> > diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h
> > index 006bfb48343d..1d4134befc48 100644
> > --- a/arch/riscv/include/uapi/asm/hwprobe.h
> > +++ b/arch/riscv/include/uapi/asm/hwprobe.h
> > @@ -38,4 +38,7 @@ struct riscv_hwprobe {
> >  #define                RISCV_HWPROBE_MISALIGNED_MASK           (7 << 0)
> >  /* Increase RISCV_HWPROBE_MAX_KEY when adding items. */
> >
> > +/* Flags */
> > +#define RISCV_HWPROBE_WHICH_CPUS       (1 << 0)
> > +
> >  #endif
> > diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c
> > index 69ad5f793374..de294538ca25 100644
> > --- a/arch/riscv/kernel/sys_hwprobe.c
> > +++ b/arch/riscv/kernel/sys_hwprobe.c
> > @@ -166,10 +166,10 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair,
> >         }
> >  }
> >
> > -static int do_riscv_hwprobe(struct riscv_hwprobe __user *pairs,
> > -                           size_t pair_count, size_t cpusetsize,
> > -                           unsigned long __user *cpus_user,
> > -                           unsigned int flags)
> > +static int hwprobe_get_values(struct riscv_hwprobe __user *pairs,
> > +                             size_t pair_count, size_t cpusetsize,
> > +                             unsigned long __user *cpus_user,
> > +                             unsigned int flags)
> >  {
> >         size_t out;
> >         int ret;
> > @@ -223,6 +223,91 @@ static int do_riscv_hwprobe(struct riscv_hwprobe __user *pairs,
> >         return 0;
> >  }
> >
> > +static int hwprobe_get_cpus(struct riscv_hwprobe __user *pairs,
> > +                           size_t pair_count, size_t cpusetsize,
> > +                           unsigned long __user *cpus_user,
> > +                           unsigned int flags)
> > +{
> > +       cpumask_t cpus, one_cpu;
> > +       bool clear_all = false;
> > +       size_t i;
> > +       int ret;
> > +
> > +       if (flags != RISCV_HWPROBE_WHICH_CPUS)
> > +               return -EINVAL;
> 
> We'll have to be careful if we add another flag to deal with how it
> behaves here too. I think the choice you made here is correct as it's
> the most defensive. It's (usually) easy to change a failure to a
> non-failure, but an ABI compatibility issue to change a
> weird-but-nonfailing behavior to a different behavior. This might be
> worth a comment, but I also tend to love comments more than most, so
> up to you.

I'll probably leave it without a comment. My thinking is that if we
add another flag which applies to which-cpus, then the selftests,
which should get updated with new tests for the new flag, should
quickly point this out. I prefer avoiding comments when possible,
since neither the compiler nor test cases test them, allowing them
to easily go stale.

> 
> > +
> > +       if (!cpusetsize || !cpus_user)
> > +               return -EINVAL;
> > +
> > +       if (cpusetsize > cpumask_size())
> > +               cpusetsize = cpumask_size();
> > +
> > +       ret = copy_from_user(&cpus, cpus_user, cpusetsize);
> > +       if (ret)
> > +               return -EFAULT;
> > +
> > +       cpumask_and(&cpus, &cpus, cpu_online_mask);
> > +       if (cpumask_empty(&cpus))
> > +               cpumask_copy(&cpus, cpu_online_mask);
> 
> I worry this is accident prone. If the caller asks for some set of
> CPUs that don't happen to be online right now, they get an answer for
> a completely different set of CPUs. I'm all for having a shorthand for
> "all online CPUs", but I think it should be more explicit. If you
> moved the cpumask_and() below the if(cpumask_empty()), then IMO it
> would be a clean shorthand: pass in an empty set to get all online
> CPUs.

Good catch. The way it is now is indeed a bug and your suggestion is
the fix. Will do for v2.

> 
> > +
> > +       cpumask_clear(&one_cpu);
> > +
> > +       for (i = 0; i < pair_count; i++) {
> > +               struct riscv_hwprobe pair, tmp;
> > +               int cpu;
> > +
> > +               ret = copy_from_user(&pair, &pairs[i], sizeof(pair));
> > +               if (ret)
> > +                       return -EFAULT;
> > +
> > +               if (!riscv_hwprobe_key_is_valid(pair.key)) {
> > +                       clear_all = true;
> > +                       pair = (struct riscv_hwprobe){ .key = -1, };
> > +                       ret = copy_to_user(&pairs[i], &pair, sizeof(pair));
> > +                       if (ret)
> > +                               return -EFAULT;
> > +               }
> > +
> > +               if (clear_all)
> > +                       continue;
> > +
> > +               tmp = (struct riscv_hwprobe){ .key = pair.key, };
> > +
> > +               for_each_cpu(cpu, &cpus) {
> > +                       cpumask_set_cpu(cpu, &one_cpu);
> > +
> > +                       hwprobe_one_pair(&tmp, &one_cpu);
> > +
> > +                       if (!riscv_hwprobe_pair_cmp(&tmp, &pair))
> > +                               cpumask_clear_cpu(cpu, &cpus);
> > +
> > +                       cpumask_clear_cpu(cpu, &one_cpu);
> > +               }
> > +       }
> > +
> > +       if (clear_all)
> > +               cpumask_clear(&cpus);
> > +
> > +       ret = copy_to_user(cpus_user, &cpus, cpusetsize);
> > +       if (ret)
> > +               return -EFAULT;
> > +
> > +       return 0;
> > +}
> > +
> > +static int do_riscv_hwprobe(struct riscv_hwprobe __user *pairs,
> > +                           size_t pair_count, size_t cpusetsize,
> > +                           unsigned long __user *cpus_user,
> > +                           unsigned int flags)
> > +{
> > +       if (flags & RISCV_HWPROBE_WHICH_CPUS)
> > +               return hwprobe_get_cpus(pairs, pair_count, cpusetsize,
> > +                                       cpus_user, flags);
> > +
> > +       return hwprobe_get_values(pairs, pair_count, cpusetsize,
> > +                                 cpus_user, flags);
> > +}
> > +
> >  #ifdef CONFIG_MMU
> >
> >  static int __init init_hwprobe_vdso_data(void)
> > diff --git a/arch/riscv/kernel/vdso/hwprobe.c b/arch/riscv/kernel/vdso/hwprobe.c
> > index 026b7645c5ab..e6c324d64544 100644
> > --- a/arch/riscv/kernel/vdso/hwprobe.c
> > +++ b/arch/riscv/kernel/vdso/hwprobe.c
> > @@ -3,6 +3,7 @@
> >   * Copyright 2023 Rivos, Inc
> >   */
> >
> > +#include <linux/string.h>
> >  #include <linux/types.h>
> >  #include <vdso/datapage.h>
> >  #include <vdso/helpers.h>
> > @@ -11,14 +12,9 @@ extern int riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
> >                          size_t cpusetsize, unsigned long *cpus,
> >                          unsigned int flags);
> >
> > -/* Add a prototype to avoid -Wmissing-prototypes warning. */
> > -int __vdso_riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
> > -                        size_t cpusetsize, unsigned long *cpus,
> > -                        unsigned int flags);
> > -
> > -int __vdso_riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
> > -                        size_t cpusetsize, unsigned long *cpus,
> > -                        unsigned int flags)
> > +static int riscv_vdso_get_values(struct riscv_hwprobe *pairs, size_t pair_count,
> > +                                size_t cpusetsize, unsigned long *cpus,
> > +                                unsigned int flags)
> >  {
> >         const struct vdso_data *vd = __arch_get_vdso_data();
> >         const struct arch_vdso_data *avd = &vd->arch_data;
> > @@ -50,3 +46,59 @@ int __vdso_riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
> >
> >         return 0;
> >  }
> > +
> > +static int riscv_vdso_get_cpus(struct riscv_hwprobe *pairs, size_t pair_count,
> > +                              size_t cpusetsize, unsigned long *cpus,
> > +                              unsigned int flags)
> > +{
> > +       const struct vdso_data *vd = __arch_get_vdso_data();
> > +       const struct arch_vdso_data *avd = &vd->arch_data;
> > +       struct riscv_hwprobe *p = pairs;
> > +       struct riscv_hwprobe *end = pairs + pair_count;
> > +       bool clear_all = false;
> > +
> > +       if (!cpusetsize || !cpus)
> > +               return -EINVAL;
> > +
> > +       if (flags != RISCV_HWPROBE_WHICH_CPUS || !avd->homogeneous_cpus)
> > +               return riscv_hwprobe(pairs, pair_count, cpusetsize, cpus, flags);
> > +
> 
> Hm, I realize now that doing the shorthand of "empty set == all online
> cpus" leaves the VDSO in a slight lurch, as we now have to figure out
> how to come up with the cpu_online_mask in usermode. It'd be a bummer
> if the shorthand always bounced to the system call,

Huh, I recall thinking about this while working on it, but now, looking at
the code I wrote, I see that I apparently forgot to address it. I had
intended to bounce to the system call to handle the empty set case, but
I'm missing the check for that!

> as I think this
> will be the most common case. Is there a way we could stash the
> cpu_online_mask in the vDSO data without it going stale? Any other
> ideas?

I don't think we can stash the data and keep it synchronized and, while
the empty set case may be useful for platform description utilities, I
actually suspect applications which want to set the affinity of their
threads based on cpu features will want to start by setting their set
to the result of sched_getaffinity(). This is because it wouldn't be
useful for an application to know that cpu A has the extension they need
if their process has been confined to a set excluding cpu A with cgroups.

I'll fix this lack of empty set check for v2.

Thanks,
drew
Andrew Jones Oct. 19, 2023, 5:16 p.m. UTC | #3
On Wed, Oct 11, 2023 at 03:56:14PM +0200, Andrew Jones wrote:
...
> diff --git a/arch/riscv/kernel/vdso/hwprobe.c b/arch/riscv/kernel/vdso/hwprobe.c
> index 026b7645c5ab..e6c324d64544 100644
> --- a/arch/riscv/kernel/vdso/hwprobe.c
> +++ b/arch/riscv/kernel/vdso/hwprobe.c
> @@ -3,6 +3,7 @@
>   * Copyright 2023 Rivos, Inc
>   */
>  
> +#include <linux/string.h>
>  #include <linux/types.h>
>  #include <vdso/datapage.h>
>  #include <vdso/helpers.h>
> @@ -11,14 +12,9 @@ extern int riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
>  			 size_t cpusetsize, unsigned long *cpus,
>  			 unsigned int flags);
>  
> -/* Add a prototype to avoid -Wmissing-prototypes warning. */
> -int __vdso_riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
> -			 size_t cpusetsize, unsigned long *cpus,
> -			 unsigned int flags);
> -
> -int __vdso_riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
> -			 size_t cpusetsize, unsigned long *cpus,
> -			 unsigned int flags)
> +static int riscv_vdso_get_values(struct riscv_hwprobe *pairs, size_t pair_count,
> +				 size_t cpusetsize, unsigned long *cpus,
> +				 unsigned int flags)
>  {
>  	const struct vdso_data *vd = __arch_get_vdso_data();
>  	const struct arch_vdso_data *avd = &vd->arch_data;
> @@ -50,3 +46,59 @@ int __vdso_riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
>  
>  	return 0;
>  }
> +
> +static int riscv_vdso_get_cpus(struct riscv_hwprobe *pairs, size_t pair_count,
> +			       size_t cpusetsize, unsigned long *cpus,
> +			       unsigned int flags)
> +{
> +	const struct vdso_data *vd = __arch_get_vdso_data();
> +	const struct arch_vdso_data *avd = &vd->arch_data;
> +	struct riscv_hwprobe *p = pairs;
> +	struct riscv_hwprobe *end = pairs + pair_count;
> +	bool clear_all = false;
> +
> +	if (!cpusetsize || !cpus)
> +		return -EINVAL;
> +
> +	if (flags != RISCV_HWPROBE_WHICH_CPUS || !avd->homogeneous_cpus)
> +		return riscv_hwprobe(pairs, pair_count, cpusetsize, cpus, flags);
> +
> +	while (p < end) {
> +		if (riscv_hwprobe_key_is_valid(p->key)) {
> +			struct riscv_hwprobe t = {
> +				.key = p->key,
> +				.value = avd->all_cpu_hwprobe_values[p->key],
> +			};
> +
> +			if (!riscv_hwprobe_pair_cmp(&t, p))
> +				clear_all = true;
> +		} else {
> +			clear_all = true;
> +			p->key = -1;
> +			p->value = 0;
> +		}
> +		p++;
> +	}
> +
> +	if (clear_all)
> +		memset(cpus, 0, cpusetsize);

This memset will go away in v2. It wasn't very smart of me to put it there
in the first place as there's no memset available to vdso code, which
means it would segfault when attempting to use it. My initial testing
failed to find this, because I had forgotten that avd->homogeneous_cpus
would be false on a default QEMU machine since it doesn't set mvendorid to
anything. I actually found this problem when trying to add a memcmp which
wasn't skipped with !avd->homogeneous_cpus, and that promptly segfaulted.
(I won't be adding that memcmp either :-)

Thanks,
drew

> +
> +	return 0;
> +}
> +
> +/* Add a prototype to avoid -Wmissing-prototypes warning. */
> +int __vdso_riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
> +			 size_t cpusetsize, unsigned long *cpus,
> +			 unsigned int flags);
> +
> +int __vdso_riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
> +			 size_t cpusetsize, unsigned long *cpus,
> +			 unsigned int flags)
> +{
> +	if (flags & RISCV_HWPROBE_WHICH_CPUS)
> +		return riscv_vdso_get_cpus(pairs, pair_count, cpusetsize,
> +					   cpus, flags);
> +
> +	return riscv_vdso_get_values(pairs, pair_count, cpusetsize,
> +				     cpus, flags);
> +}
> -- 
> 2.41.0
>
diff mbox series

Patch

diff --git a/Documentation/riscv/hwprobe.rst b/Documentation/riscv/hwprobe.rst
index c57437e40ffb..576aa03f56bb 100644
--- a/Documentation/riscv/hwprobe.rst
+++ b/Documentation/riscv/hwprobe.rst
@@ -25,8 +25,20 @@  arch, impl), the returned value will only be valid if all CPUs in the given set
 have the same value. Otherwise -1 will be returned. For boolean-like keys, the
 value returned will be a logical AND of the values for the specified CPUs.
 Usermode can supply NULL for ``cpus`` and 0 for ``cpusetsize`` as a shortcut for
-all online CPUs. There are currently no flags, this value must be zero for
-future compatibility.
+all online CPUs. The currently supported flags are:
+
+* :c:macro:`RISCV_HWPROBE_WHICH_CPUS`: This flag basically reverses the behavior
+  of sys_riscv_hwprobe().  Instead of populating the values of keys for a given
+  set of CPUs, the set of CPUs is initially all unset and the values of each key
+  are given.  Upon return, the CPUs which all match each of the given key-value
+  pairs are set in ``cpus``.  How matching is done depends on the key type.  For
+  value-like keys, matching means to be the exact same as the value.  For
+  boolean-like keys, matching means the result of a logical AND of the pair's
+  value with the CPU's value is exactly the same as the pair's value.  ``cpus``
+  may also initially have set bits, in which case the bits of any CPUs which do
+  not match the pairs will be cleared, but no other bits will be set.
+
+All other flags are reserved for future compatibility and must be zero.
 
 On success 0 is returned, on failure a negative error code is returned.
 
diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h
index 7cad513538d8..a68764149e51 100644
--- a/arch/riscv/include/asm/hwprobe.h
+++ b/arch/riscv/include/asm/hwprobe.h
@@ -15,4 +15,28 @@  static inline bool riscv_hwprobe_key_is_valid(__s64 key)
 	return key >= 0 && key <= RISCV_HWPROBE_MAX_KEY;
 }
 
+static inline bool hwprobe_key_is_bitmask(__s64 key)
+{
+	switch (key) {
+	case RISCV_HWPROBE_KEY_BASE_BEHAVIOR:
+	case RISCV_HWPROBE_KEY_IMA_EXT_0:
+	case RISCV_HWPROBE_KEY_CPUPERF_0:
+		return true;
+	}
+
+	return false;
+}
+
+static inline bool riscv_hwprobe_pair_cmp(struct riscv_hwprobe *pair,
+					  struct riscv_hwprobe *other_pair)
+{
+	if (pair->key != other_pair->key)
+		return false;
+
+	if (hwprobe_key_is_bitmask(pair->key))
+		return (pair->value & other_pair->value) == other_pair->value;
+
+	return pair->value == other_pair->value;
+}
+
 #endif
diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h
index 006bfb48343d..1d4134befc48 100644
--- a/arch/riscv/include/uapi/asm/hwprobe.h
+++ b/arch/riscv/include/uapi/asm/hwprobe.h
@@ -38,4 +38,7 @@  struct riscv_hwprobe {
 #define		RISCV_HWPROBE_MISALIGNED_MASK		(7 << 0)
 /* Increase RISCV_HWPROBE_MAX_KEY when adding items. */
 
+/* Flags */
+#define RISCV_HWPROBE_WHICH_CPUS	(1 << 0)
+
 #endif
diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c
index 69ad5f793374..de294538ca25 100644
--- a/arch/riscv/kernel/sys_hwprobe.c
+++ b/arch/riscv/kernel/sys_hwprobe.c
@@ -166,10 +166,10 @@  static void hwprobe_one_pair(struct riscv_hwprobe *pair,
 	}
 }
 
-static int do_riscv_hwprobe(struct riscv_hwprobe __user *pairs,
-			    size_t pair_count, size_t cpusetsize,
-			    unsigned long __user *cpus_user,
-			    unsigned int flags)
+static int hwprobe_get_values(struct riscv_hwprobe __user *pairs,
+			      size_t pair_count, size_t cpusetsize,
+			      unsigned long __user *cpus_user,
+			      unsigned int flags)
 {
 	size_t out;
 	int ret;
@@ -223,6 +223,91 @@  static int do_riscv_hwprobe(struct riscv_hwprobe __user *pairs,
 	return 0;
 }
 
+static int hwprobe_get_cpus(struct riscv_hwprobe __user *pairs,
+			    size_t pair_count, size_t cpusetsize,
+			    unsigned long __user *cpus_user,
+			    unsigned int flags)
+{
+	cpumask_t cpus, one_cpu;
+	bool clear_all = false;
+	size_t i;
+	int ret;
+
+	if (flags != RISCV_HWPROBE_WHICH_CPUS)
+		return -EINVAL;
+
+	if (!cpusetsize || !cpus_user)
+		return -EINVAL;
+
+	if (cpusetsize > cpumask_size())
+		cpusetsize = cpumask_size();
+
+	ret = copy_from_user(&cpus, cpus_user, cpusetsize);
+	if (ret)
+		return -EFAULT;
+
+	cpumask_and(&cpus, &cpus, cpu_online_mask);
+	if (cpumask_empty(&cpus))
+		cpumask_copy(&cpus, cpu_online_mask);
+
+	cpumask_clear(&one_cpu);
+
+	for (i = 0; i < pair_count; i++) {
+		struct riscv_hwprobe pair, tmp;
+		int cpu;
+
+		ret = copy_from_user(&pair, &pairs[i], sizeof(pair));
+		if (ret)
+			return -EFAULT;
+
+		if (!riscv_hwprobe_key_is_valid(pair.key)) {
+			clear_all = true;
+			pair = (struct riscv_hwprobe){ .key = -1, };
+			ret = copy_to_user(&pairs[i], &pair, sizeof(pair));
+			if (ret)
+				return -EFAULT;
+		}
+
+		if (clear_all)
+			continue;
+
+		tmp = (struct riscv_hwprobe){ .key = pair.key, };
+
+		for_each_cpu(cpu, &cpus) {
+			cpumask_set_cpu(cpu, &one_cpu);
+
+			hwprobe_one_pair(&tmp, &one_cpu);
+
+			if (!riscv_hwprobe_pair_cmp(&tmp, &pair))
+				cpumask_clear_cpu(cpu, &cpus);
+
+			cpumask_clear_cpu(cpu, &one_cpu);
+		}
+	}
+
+	if (clear_all)
+		cpumask_clear(&cpus);
+
+	ret = copy_to_user(cpus_user, &cpus, cpusetsize);
+	if (ret)
+		return -EFAULT;
+
+	return 0;
+}
+
+static int do_riscv_hwprobe(struct riscv_hwprobe __user *pairs,
+			    size_t pair_count, size_t cpusetsize,
+			    unsigned long __user *cpus_user,
+			    unsigned int flags)
+{
+	if (flags & RISCV_HWPROBE_WHICH_CPUS)
+		return hwprobe_get_cpus(pairs, pair_count, cpusetsize,
+					cpus_user, flags);
+
+	return hwprobe_get_values(pairs, pair_count, cpusetsize,
+				  cpus_user, flags);
+}
+
 #ifdef CONFIG_MMU
 
 static int __init init_hwprobe_vdso_data(void)
diff --git a/arch/riscv/kernel/vdso/hwprobe.c b/arch/riscv/kernel/vdso/hwprobe.c
index 026b7645c5ab..e6c324d64544 100644
--- a/arch/riscv/kernel/vdso/hwprobe.c
+++ b/arch/riscv/kernel/vdso/hwprobe.c
@@ -3,6 +3,7 @@ 
  * Copyright 2023 Rivos, Inc
  */
 
+#include <linux/string.h>
 #include <linux/types.h>
 #include <vdso/datapage.h>
 #include <vdso/helpers.h>
@@ -11,14 +12,9 @@  extern int riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
 			 size_t cpusetsize, unsigned long *cpus,
 			 unsigned int flags);
 
-/* Add a prototype to avoid -Wmissing-prototypes warning. */
-int __vdso_riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
-			 size_t cpusetsize, unsigned long *cpus,
-			 unsigned int flags);
-
-int __vdso_riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
-			 size_t cpusetsize, unsigned long *cpus,
-			 unsigned int flags)
+static int riscv_vdso_get_values(struct riscv_hwprobe *pairs, size_t pair_count,
+				 size_t cpusetsize, unsigned long *cpus,
+				 unsigned int flags)
 {
 	const struct vdso_data *vd = __arch_get_vdso_data();
 	const struct arch_vdso_data *avd = &vd->arch_data;
@@ -50,3 +46,59 @@  int __vdso_riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
 
 	return 0;
 }
+
+static int riscv_vdso_get_cpus(struct riscv_hwprobe *pairs, size_t pair_count,
+			       size_t cpusetsize, unsigned long *cpus,
+			       unsigned int flags)
+{
+	const struct vdso_data *vd = __arch_get_vdso_data();
+	const struct arch_vdso_data *avd = &vd->arch_data;
+	struct riscv_hwprobe *p = pairs;
+	struct riscv_hwprobe *end = pairs + pair_count;
+	bool clear_all = false;
+
+	if (!cpusetsize || !cpus)
+		return -EINVAL;
+
+	if (flags != RISCV_HWPROBE_WHICH_CPUS || !avd->homogeneous_cpus)
+		return riscv_hwprobe(pairs, pair_count, cpusetsize, cpus, flags);
+
+	while (p < end) {
+		if (riscv_hwprobe_key_is_valid(p->key)) {
+			struct riscv_hwprobe t = {
+				.key = p->key,
+				.value = avd->all_cpu_hwprobe_values[p->key],
+			};
+
+			if (!riscv_hwprobe_pair_cmp(&t, p))
+				clear_all = true;
+		} else {
+			clear_all = true;
+			p->key = -1;
+			p->value = 0;
+		}
+		p++;
+	}
+
+	if (clear_all)
+		memset(cpus, 0, cpusetsize);
+
+	return 0;
+}
+
+/* Add a prototype to avoid -Wmissing-prototypes warning. */
+int __vdso_riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
+			 size_t cpusetsize, unsigned long *cpus,
+			 unsigned int flags);
+
+int __vdso_riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
+			 size_t cpusetsize, unsigned long *cpus,
+			 unsigned int flags)
+{
+	if (flags & RISCV_HWPROBE_WHICH_CPUS)
+		return riscv_vdso_get_cpus(pairs, pair_count, cpusetsize,
+					   cpus, flags);
+
+	return riscv_vdso_get_values(pairs, pair_count, cpusetsize,
+				     cpus, flags);
+}