lib/vdso: Use __arch_use_vsyscall() to indicate fallback
diff mbox series

Message ID 1571367619-13573-1-git-send-email-chenhc@lemote.com
State New
Headers show
Series
  • lib/vdso: Use __arch_use_vsyscall() to indicate fallback
Related show

Commit Message

Huacai Chen Oct. 18, 2019, 3 a.m. UTC
In do_hres(), we currently use whether the return value of __arch_get_
hw_counter() is negtive to indicate fallback, but this is not a good
idea. Because:

1, ARM64 returns ULL_MAX but MIPS returns 0 when clock_mode is invalid;
2, For a 64bit counter, a "negtive" value of counter is actually valid.

To solve this problem, we use U64_MAX as the only "invalid" return
value -- this is still not fully correct, but has no problem in most
cases. Moreover, all vdso time-related functions should rely on the
return value of __arch_use_vsyscall(), because update_vdso_data() and
update_vsyscall_tz() also rely on it. So, in the core functions of
__cvdso_gettimeofday(), __cvdso_clock_gettime() and __cvdso_clock_
getres(), if __arch_use_vsyscall() returns false, we use the fallback
functions directly.

Fixes: 00b26474c2f1613d7ab894c5 ("lib/vdso: Provide generic VDSO implementation")
Cc: stable@vger.kernel.org
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Paul Burton <paul.burton@mips.com>
Cc: linux-mips@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Signed-off-by: Huacai Chen <chenhc@lemote.com>
---
 arch/arm64/include/asm/vdso/vsyscall.h |  2 +-
 arch/mips/include/asm/vdso/vsyscall.h  |  2 +-
 include/asm-generic/vdso/vsyscall.h    |  2 +-
 lib/vdso/gettimeofday.c                | 12 +++++++++++-
 4 files changed, 14 insertions(+), 4 deletions(-)

Comments

Andy Lutomirski Oct. 18, 2019, 3:15 a.m. UTC | #1
On Thu, Oct 17, 2019 at 7:57 PM Huacai Chen <chenhc@lemote.com> wrote:
>
> In do_hres(), we currently use whether the return value of __arch_get_
> hw_counter() is negtive to indicate fallback, but this is not a good
> idea. Because:
>
> 1, ARM64 returns ULL_MAX but MIPS returns 0 when clock_mode is invalid;
> 2, For a 64bit counter, a "negtive" value of counter is actually valid.

s/negtive/negative

What's the actual bug?  Is it that MIPS is returning 0 but the check
is < 0?  Sounds like MIPS should get fixed.

>
> To solve this problem, we use U64_MAX as the only "invalid" return
> value -- this is still not fully correct, but has no problem in most
> cases.

I'm sort of okay with that, but...

> Moreover, all vdso time-related functions should rely on the
> return value of __arch_use_vsyscall(), because update_vdso_data() and
> update_vsyscall_tz() also rely on it. So, in the core functions of
> __cvdso_gettimeofday(), __cvdso_clock_gettime() and __cvdso_clock_
> getres(), if __arch_use_vsyscall() returns false, we use the fallback
> functions directly.

__arch_use_vsyscall() is not currently intended for use in the vDSO at all.

>
> Fixes: 00b26474c2f1613d7ab894c5 ("lib/vdso: Provide generic VDSO implementation")
> Cc: stable@vger.kernel.org
> Cc: Arnd Bergmann <arnd@arndb.de>
> Cc: Paul Burton <paul.burton@mips.com>
> Cc: linux-mips@vger.kernel.org
> Cc: linux-arm-kernel@lists.infradead.org
> Signed-off-by: Huacai Chen <chenhc@lemote.com>
> ---
>  arch/arm64/include/asm/vdso/vsyscall.h |  2 +-
>  arch/mips/include/asm/vdso/vsyscall.h  |  2 +-
>  include/asm-generic/vdso/vsyscall.h    |  2 +-
>  lib/vdso/gettimeofday.c                | 12 +++++++++++-
>  4 files changed, 14 insertions(+), 4 deletions(-)
>
> diff --git a/arch/arm64/include/asm/vdso/vsyscall.h b/arch/arm64/include/asm/vdso/vsyscall.h
> index 0c731bf..406e6de 100644
> --- a/arch/arm64/include/asm/vdso/vsyscall.h
> +++ b/arch/arm64/include/asm/vdso/vsyscall.h
> @@ -31,7 +31,7 @@ int __arm64_get_clock_mode(struct timekeeper *tk)
>  #define __arch_get_clock_mode __arm64_get_clock_mode
>
>  static __always_inline
> -int __arm64_use_vsyscall(struct vdso_data *vdata)
> +int __arm64_use_vsyscall(const struct vdso_data *vdata)
>  {
>         return !vdata[CS_HRES_COARSE].clock_mode;
>  }
> diff --git a/arch/mips/include/asm/vdso/vsyscall.h b/arch/mips/include/asm/vdso/vsyscall.h
> index 1953147..8b10dd7 100644
> --- a/arch/mips/include/asm/vdso/vsyscall.h
> +++ b/arch/mips/include/asm/vdso/vsyscall.h
> @@ -29,7 +29,7 @@ int __mips_get_clock_mode(struct timekeeper *tk)
>  #define __arch_get_clock_mode __mips_get_clock_mode
>
>  static __always_inline
> -int __mips_use_vsyscall(struct vdso_data *vdata)
> +int __mips_use_vsyscall(const struct vdso_data *vdata)
>  {
>         return (vdata[CS_HRES_COARSE].clock_mode != VDSO_CLOCK_NONE);
>  }
> diff --git a/include/asm-generic/vdso/vsyscall.h b/include/asm-generic/vdso/vsyscall.h
> index e94b1978..ac05a625 100644
> --- a/include/asm-generic/vdso/vsyscall.h
> +++ b/include/asm-generic/vdso/vsyscall.h
> @@ -26,7 +26,7 @@ static __always_inline int __arch_get_clock_mode(struct timekeeper *tk)
>  #endif /* __arch_get_clock_mode */
>
>  #ifndef __arch_use_vsyscall
> -static __always_inline int __arch_use_vsyscall(struct vdso_data *vdata)
> +static __always_inline int __arch_use_vsyscall(const struct vdso_data *vdata)
>  {
>         return 1;
>  }
> diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c
> index e630e7f..4ad062e 100644
> --- a/lib/vdso/gettimeofday.c
> +++ b/lib/vdso/gettimeofday.c
> @@ -9,6 +9,7 @@
>  #include <linux/hrtimer_defs.h>
>  #include <vdso/datapage.h>
>  #include <vdso/helpers.h>
> +#include <vdso/vsyscall.h>
>
>  /*
>   * The generic vDSO implementation requires that gettimeofday.h
> @@ -50,7 +51,7 @@ static int do_hres(const struct vdso_data *vd, clockid_t clk,
>                 cycles = __arch_get_hw_counter(vd->clock_mode);
>                 ns = vdso_ts->nsec;
>                 last = vd->cycle_last;
> -               if (unlikely((s64)cycles < 0))
> +               if (unlikely(cycles == U64_MAX))
>                         return -1;

I would actually prefer:

if (unlikely(cycles < last))

or perhaps:

if (unlikely((s64)(cycles-last) < 0))

which would have the nice side effect of getting rid of the annoying
x86 special case in vdso_calc_delta().  The former version is
compatible with U64_MAX, whereas the latter version would need the
error case to return last-1 or similar.  The benefit of the latter
version is that it can survive wrap-around.

>
>                 ns += vdso_calc_delta(cycles, last, vd->mask, vd->mult);
> @@ -91,6 +92,9 @@ __cvdso_clock_gettime_common(clockid_t clock, struct __kernel_timespec *ts)
>         if (unlikely((u32) clock >= MAX_CLOCKS))
>                 return -1;
>
> +       if (!__arch_use_vsyscall(vd))
> +               return -1;
> +

NAK.  I don't think this is helpful or correct.  It doesn't appear to
do anything valid, and it's racy.

>         /*
>          * Convert the clockid to a bitmask and use it to check which
>          * clocks are handled in the VDSO directly.
> @@ -145,6 +149,9 @@ __cvdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz)
>  {
>         const struct vdso_data *vd = __arch_get_vdso_data();
>
> +       if (!__arch_use_vsyscall(vd))
> +               return gettimeofday_fallback(tv, tz);
> +

Ditto.

>         if (likely(tv != NULL)) {
>                 struct __kernel_timespec ts;
>
> @@ -189,6 +196,9 @@ int __cvdso_clock_getres_common(clockid_t clock, struct __kernel_timespec *res)
>         if (unlikely((u32) clock >= MAX_CLOCKS))
>                 return -1;
>
> +       if (!__arch_use_vsyscall(vd))
> +               return -1;
> +

Ditto.
Vincenzo Frascino Oct. 18, 2019, 8:32 a.m. UTC | #2
Hi Andy and Hucan,

On 10/18/19 4:15 AM, Andy Lutomirski wrote:
> On Thu, Oct 17, 2019 at 7:57 PM Huacai Chen <chenhc@lemote.com> wrote:
>>
>> In do_hres(), we currently use whether the return value of __arch_get_
>> hw_counter() is negtive to indicate fallback, but this is not a good
>> idea. Because:
>>
>> 1, ARM64 returns ULL_MAX but MIPS returns 0 when clock_mode is invalid;
>> 2, For a 64bit counter, a "negtive" value of counter is actually valid.
> 
> s/negtive/negative
> 
> What's the actual bug?  Is it that MIPS is returning 0 but the check
> is < 0?  Sounds like MIPS should get fixed.
> 

I submitted a patch for this yesterday to the MIPS maintainers [1]. The MIPS32
r1 implementation had a bug when VDSO_CLOCK_NONE was set.

The issue has been reported by Maxime Bizon who tested the fix as well.

[1] https://patchwork.kernel.org/patch/11193391/
Huacai Chen Oct. 19, 2019, 2:06 a.m. UTC | #3
Hi, Andy,

On Fri, Oct 18, 2019 at 11:15 AM Andy Lutomirski <luto@kernel.org> wrote:
>
> On Thu, Oct 17, 2019 at 7:57 PM Huacai Chen <chenhc@lemote.com> wrote:
> >
> > In do_hres(), we currently use whether the return value of __arch_get_
> > hw_counter() is negtive to indicate fallback, but this is not a good
> > idea. Because:
> >
> > 1, ARM64 returns ULL_MAX but MIPS returns 0 when clock_mode is invalid;
> > 2, For a 64bit counter, a "negtive" value of counter is actually valid.
>
> s/negtive/negative
>
> What's the actual bug?  Is it that MIPS is returning 0 but the check
> is < 0?  Sounds like MIPS should get fixed.
My original bug is what Vincenzo said, MIPS has a boot failure if no
valid clock_mode, and surely MIPS need to fix. However, when I try to
fix it, I found that clock_getres() has another problem, because
__cvdso_clock_getres_common() get vd[CS_HRES_COARSE].hrtimer_res, but
hrtimer_res is set in update_vdso_data() which relies on
__arch_use_vsyscall().

>
> >
> > To solve this problem, we use U64_MAX as the only "invalid" return
> > value -- this is still not fully correct, but has no problem in most
> > cases.
>
> I'm sort of okay with that, but...
>
> > Moreover, all vdso time-related functions should rely on the
> > return value of __arch_use_vsyscall(), because update_vdso_data() and
> > update_vsyscall_tz() also rely on it. So, in the core functions of
> > __cvdso_gettimeofday(), __cvdso_clock_gettime() and __cvdso_clock_
> > getres(), if __arch_use_vsyscall() returns false, we use the fallback
> > functions directly.
>
> __arch_use_vsyscall() is not currently intended for use in the vDSO at all.
>
> >
> > Fixes: 00b26474c2f1613d7ab894c5 ("lib/vdso: Provide generic VDSO implementation")
> > Cc: stable@vger.kernel.org
> > Cc: Arnd Bergmann <arnd@arndb.de>
> > Cc: Paul Burton <paul.burton@mips.com>
> > Cc: linux-mips@vger.kernel.org
> > Cc: linux-arm-kernel@lists.infradead.org
> > Signed-off-by: Huacai Chen <chenhc@lemote.com>
> > ---
> >  arch/arm64/include/asm/vdso/vsyscall.h |  2 +-
> >  arch/mips/include/asm/vdso/vsyscall.h  |  2 +-
> >  include/asm-generic/vdso/vsyscall.h    |  2 +-
> >  lib/vdso/gettimeofday.c                | 12 +++++++++++-
> >  4 files changed, 14 insertions(+), 4 deletions(-)
> >
> > diff --git a/arch/arm64/include/asm/vdso/vsyscall.h b/arch/arm64/include/asm/vdso/vsyscall.h
> > index 0c731bf..406e6de 100644
> > --- a/arch/arm64/include/asm/vdso/vsyscall.h
> > +++ b/arch/arm64/include/asm/vdso/vsyscall.h
> > @@ -31,7 +31,7 @@ int __arm64_get_clock_mode(struct timekeeper *tk)
> >  #define __arch_get_clock_mode __arm64_get_clock_mode
> >
> >  static __always_inline
> > -int __arm64_use_vsyscall(struct vdso_data *vdata)
> > +int __arm64_use_vsyscall(const struct vdso_data *vdata)
> >  {
> >         return !vdata[CS_HRES_COARSE].clock_mode;
> >  }
> > diff --git a/arch/mips/include/asm/vdso/vsyscall.h b/arch/mips/include/asm/vdso/vsyscall.h
> > index 1953147..8b10dd7 100644
> > --- a/arch/mips/include/asm/vdso/vsyscall.h
> > +++ b/arch/mips/include/asm/vdso/vsyscall.h
> > @@ -29,7 +29,7 @@ int __mips_get_clock_mode(struct timekeeper *tk)
> >  #define __arch_get_clock_mode __mips_get_clock_mode
> >
> >  static __always_inline
> > -int __mips_use_vsyscall(struct vdso_data *vdata)
> > +int __mips_use_vsyscall(const struct vdso_data *vdata)
> >  {
> >         return (vdata[CS_HRES_COARSE].clock_mode != VDSO_CLOCK_NONE);
> >  }
> > diff --git a/include/asm-generic/vdso/vsyscall.h b/include/asm-generic/vdso/vsyscall.h
> > index e94b1978..ac05a625 100644
> > --- a/include/asm-generic/vdso/vsyscall.h
> > +++ b/include/asm-generic/vdso/vsyscall.h
> > @@ -26,7 +26,7 @@ static __always_inline int __arch_get_clock_mode(struct timekeeper *tk)
> >  #endif /* __arch_get_clock_mode */
> >
> >  #ifndef __arch_use_vsyscall
> > -static __always_inline int __arch_use_vsyscall(struct vdso_data *vdata)
> > +static __always_inline int __arch_use_vsyscall(const struct vdso_data *vdata)
> >  {
> >         return 1;
> >  }
> > diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c
> > index e630e7f..4ad062e 100644
> > --- a/lib/vdso/gettimeofday.c
> > +++ b/lib/vdso/gettimeofday.c
> > @@ -9,6 +9,7 @@
> >  #include <linux/hrtimer_defs.h>
> >  #include <vdso/datapage.h>
> >  #include <vdso/helpers.h>
> > +#include <vdso/vsyscall.h>
> >
> >  /*
> >   * The generic vDSO implementation requires that gettimeofday.h
> > @@ -50,7 +51,7 @@ static int do_hres(const struct vdso_data *vd, clockid_t clk,
> >                 cycles = __arch_get_hw_counter(vd->clock_mode);
> >                 ns = vdso_ts->nsec;
> >                 last = vd->cycle_last;
> > -               if (unlikely((s64)cycles < 0))
> > +               if (unlikely(cycles == U64_MAX))
> >                         return -1;
>
> I would actually prefer:
>
> if (unlikely(cycles < last))
>
> or perhaps:
>
> if (unlikely((s64)(cycles-last) < 0))
>
> which would have the nice side effect of getting rid of the annoying
> x86 special case in vdso_calc_delta().  The former version is
> compatible with U64_MAX, whereas the latter version would need the
> error case to return last-1 or similar.  The benefit of the latter
> version is that it can survive wrap-around.
When you say if (unlikely(cycles < last)), do you means if
(unlikely(cycles <= last))? If __arch_get_hw_counter() return U64_MAX
every time, I don't think cycles can be less than last.

Huacai
>
> >
> >                 ns += vdso_calc_delta(cycles, last, vd->mask, vd->mult);
> > @@ -91,6 +92,9 @@ __cvdso_clock_gettime_common(clockid_t clock, struct __kernel_timespec *ts)
> >         if (unlikely((u32) clock >= MAX_CLOCKS))
> >                 return -1;
> >
> > +       if (!__arch_use_vsyscall(vd))
> > +               return -1;
> > +
>
> NAK.  I don't think this is helpful or correct.  It doesn't appear to
> do anything valid, and it's racy.
>
> >         /*
> >          * Convert the clockid to a bitmask and use it to check which
> >          * clocks are handled in the VDSO directly.
> > @@ -145,6 +149,9 @@ __cvdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz)
> >  {
> >         const struct vdso_data *vd = __arch_get_vdso_data();
> >
> > +       if (!__arch_use_vsyscall(vd))
> > +               return gettimeofday_fallback(tv, tz);
> > +
>
> Ditto.
>
> >         if (likely(tv != NULL)) {
> >                 struct __kernel_timespec ts;
> >
> > @@ -189,6 +196,9 @@ int __cvdso_clock_getres_common(clockid_t clock, struct __kernel_timespec *res)
> >         if (unlikely((u32) clock >= MAX_CLOCKS))
> >                 return -1;
> >
> > +       if (!__arch_use_vsyscall(vd))
> > +               return -1;
> > +
>
> Ditto.
Thomas Gleixner Oct. 19, 2019, 10:01 a.m. UTC | #4
On Sat, 19 Oct 2019, Huacai Chen wrote:
> On Fri, Oct 18, 2019 at 11:15 AM Andy Lutomirski <luto@kernel.org> wrote:
> >
> > On Thu, Oct 17, 2019 at 7:57 PM Huacai Chen <chenhc@lemote.com> wrote:
> > >
> > > In do_hres(), we currently use whether the return value of __arch_get_
> > > hw_counter() is negtive to indicate fallback, but this is not a good
> > > idea. Because:
> > >
> > > 1, ARM64 returns ULL_MAX but MIPS returns 0 when clock_mode is invalid;
> > > 2, For a 64bit counter, a "negtive" value of counter is actually valid.
> >
> > s/negtive/negative
> >
> > What's the actual bug?  Is it that MIPS is returning 0 but the check
> > is < 0?  Sounds like MIPS should get fixed.
> My original bug is what Vincenzo said, MIPS has a boot failure if no
> valid clock_mode, and surely MIPS need to fix. However, when I try to
> fix it, I found that clock_getres() has another problem, because
> __cvdso_clock_getres_common() get vd[CS_HRES_COARSE].hrtimer_res, but
> hrtimer_res is set in update_vdso_data() which relies on
> __arch_use_vsyscall().

__arch_use_vsyscall() is a pointless exercise TBH. The VDSO data should be
updated unconditionally so all the trivial interfaces like time() and
getres() just work independently of the functions which depend on the
underlying clocksource.

This functions have a fallback operation already:

Let __arch_get_hw_counter() return U64_MAX and the syscall fallback is
invoked.

__arch_use_vsyscall() should just be removed.

Thanks,

	tglx
Andy Lutomirski Oct. 20, 2019, 10:12 p.m. UTC | #5
On Sat, Oct 19, 2019 at 3:01 AM Thomas Gleixner <tglx@linutronix.de> wrote:
>
> On Sat, 19 Oct 2019, Huacai Chen wrote:
> > On Fri, Oct 18, 2019 at 11:15 AM Andy Lutomirski <luto@kernel.org> wrote:
> > >
> > > On Thu, Oct 17, 2019 at 7:57 PM Huacai Chen <chenhc@lemote.com> wrote:
> > > >
> > > > In do_hres(), we currently use whether the return value of __arch_get_
> > > > hw_counter() is negtive to indicate fallback, but this is not a good
> > > > idea. Because:
> > > >
> > > > 1, ARM64 returns ULL_MAX but MIPS returns 0 when clock_mode is invalid;
> > > > 2, For a 64bit counter, a "negtive" value of counter is actually valid.
> > >
> > > s/negtive/negative
> > >
> > > What's the actual bug?  Is it that MIPS is returning 0 but the check
> > > is < 0?  Sounds like MIPS should get fixed.
> > My original bug is what Vincenzo said, MIPS has a boot failure if no
> > valid clock_mode, and surely MIPS need to fix. However, when I try to
> > fix it, I found that clock_getres() has another problem, because
> > __cvdso_clock_getres_common() get vd[CS_HRES_COARSE].hrtimer_res, but
> > hrtimer_res is set in update_vdso_data() which relies on
> > __arch_use_vsyscall().
>
> __arch_use_vsyscall() is a pointless exercise TBH. The VDSO data should be
> updated unconditionally so all the trivial interfaces like time() and
> getres() just work independently of the functions which depend on the
> underlying clocksource.
>
> This functions have a fallback operation already:
>
> Let __arch_get_hw_counter() return U64_MAX and the syscall fallback is
> invoked.
>

My thought was that __arch_get_hw_counter() could return last-1 to
indicate failure, which would allow the two checks to be folded into
one check.  Or we could continue to use U64_MAX and rely on the fact
that (s64)U64_MAX < 0, not worry about the cycle counter overflowing,
and letting cycles < last catch it.

(And we should change it to return s64 at some point regardless -- all
the math is signed, so the underlying types should be too IMO.)
Thomas Gleixner Oct. 21, 2019, 9:32 a.m. UTC | #6
On Sun, 20 Oct 2019, Andy Lutomirski wrote:
> On Sat, Oct 19, 2019 at 3:01 AM Thomas Gleixner <tglx@linutronix.de> wrote:
> > __arch_use_vsyscall() is a pointless exercise TBH. The VDSO data should be
> > updated unconditionally so all the trivial interfaces like time() and
> > getres() just work independently of the functions which depend on the
> > underlying clocksource.
> >
> > This functions have a fallback operation already:
> >
> > Let __arch_get_hw_counter() return U64_MAX and the syscall fallback is
> > invoked.
> >
> 
> My thought was that __arch_get_hw_counter() could return last-1 to
> indicate failure, which would allow the two checks to be folded into
> one check.  Or we could continue to use U64_MAX and rely on the fact
> that (s64)U64_MAX < 0, not worry about the cycle counter overflowing,
> and letting cycles < last catch it.

This is not an overflow catch. It's solely to deal with the fact that on
X86 you can observe (cycles < last) on multi socket systems under rare
circumstances. Any other architecture does not have that issue AFAIK.

The wraparound of clocksources with a smaller width than 64bit is handled
by:

    delta = (cycles - last) & mask;

which operates on unsigned values for obvious reasons.

> (And we should change it to return s64 at some point regardless -- all
> the math is signed, so the underlying types should be too IMO.)

See above. delta is guaranteed to be >= 0 and the mult/shift is not signed
either. All the base values which are in the VDSO are unsigned as well.

The weird typecast there:

    if ((s64)cycles < 0)

could as well be

    if (cycles == U64_MAX)

but the typecasted version creates better code.

I tried to fold the two operations (see patch below) and on all machines I
tested on (various generations of Intel and AMD) the result is slower than
what we have now by a couple of cycles, which is a lot for these functions
(i.e. between 3-5%). I'm sure I tried that before and ended up with the
existing code as the fastest variant.

Why? That's subject to speculation :)

Thanks,

	tglx

8<----------
 arch/x86/include/asm/vdso/gettimeofday.h |   39 ++++++-------------------------
 lib/vdso/gettimeofday.c                  |   23 +++---------------
 2 files changed, 13 insertions(+), 49 deletions(-)

--- a/arch/x86/include/asm/vdso/gettimeofday.h
+++ b/arch/x86/include/asm/vdso/gettimeofday.h
@@ -235,10 +235,14 @@ static u64 vread_hvclock(void)
 }
 #endif
 
-static inline u64 __arch_get_hw_counter(s32 clock_mode)
+static inline u64 __arch_get_hw_counter(s32 clock_mode, u64 last, u64 mask)
 {
+	/*
+	 * Mask operation is not required as all VDSO clocksources are
+	 * 64bit wide.
+	 */
 	if (clock_mode == VCLOCK_TSC)
-		return (u64)rdtsc_ordered();
+		return (u64)rdtsc_ordered() - last;
 	/*
 	 * For any memory-mapped vclock type, we need to make sure that gcc
 	 * doesn't cleverly hoist a load before the mode check.  Otherwise we
@@ -248,13 +252,13 @@ static inline u64 __arch_get_hw_counter(
 #ifdef CONFIG_PARAVIRT_CLOCK
 	if (clock_mode == VCLOCK_PVCLOCK) {
 		barrier();
-		return vread_pvclock();
+		return vread_pvclock() - last;
 	}
 #endif
 #ifdef CONFIG_HYPERV_TIMER
 	if (clock_mode == VCLOCK_HVCLOCK) {
 		barrier();
-		return vread_hvclock();
+		return vread_hvclock() - last;
 	}
 #endif
 	return U64_MAX;
@@ -265,33 +269,6 @@ static __always_inline const struct vdso
 	return __vdso_data;
 }
 
-/*
- * x86 specific delta calculation.
- *
- * The regular implementation assumes that clocksource reads are globally
- * monotonic. The TSC can be slightly off across sockets which can cause
- * the regular delta calculation (@cycles - @last) to return a huge time
- * jump.
- *
- * Therefore it needs to be verified that @cycles are greater than
- * @last. If not then use @last, which is the base time of the current
- * conversion period.
- *
- * This variant also removes the masking of the subtraction because the
- * clocksource mask of all VDSO capable clocksources on x86 is U64_MAX
- * which would result in a pointless operation. The compiler cannot
- * optimize it away as the mask comes from the vdso data and is not compile
- * time constant.
- */
-static __always_inline
-u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult)
-{
-	if (cycles > last)
-		return (cycles - last) * mult;
-	return 0;
-}
-#define vdso_calc_delta vdso_calc_delta
-
 #endif /* !__ASSEMBLY__ */
 
 #endif /* __ASM_VDSO_GETTIMEOFDAY_H */
--- a/lib/vdso/gettimeofday.c
+++ b/lib/vdso/gettimeofday.c
@@ -26,34 +26,21 @@
 #include <asm/vdso/gettimeofday.h>
 #endif /* ENABLE_COMPAT_VDSO */
 
-#ifndef vdso_calc_delta
-/*
- * Default implementation which works for all sane clocksources. That
- * obviously excludes x86/TSC.
- */
-static __always_inline
-u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult)
-{
-	return ((cycles - last) & mask) * mult;
-}
-#endif
-
 static int do_hres(const struct vdso_data *vd, clockid_t clk,
 		   struct __kernel_timespec *ts)
 {
 	const struct vdso_timestamp *vdso_ts = &vd->basetime[clk];
-	u64 cycles, last, sec, ns;
+	u64 delta, sec, ns;
 	u32 seq;
 
 	do {
 		seq = vdso_read_begin(vd);
-		cycles = __arch_get_hw_counter(vd->clock_mode);
-		ns = vdso_ts->nsec;
-		last = vd->cycle_last;
-		if (unlikely((s64)cycles < 0))
+		delta = __arch_get_hw_counter(vd->clock_mode, vd->cycle_last,
+					      vd->mask);
+		if (unlikely((s64)delta < 0))
 			return -1;
 
-		ns += vdso_calc_delta(cycles, last, vd->mask, vd->mult);
+		ns = vdso_ts->nsec + delta * vd->mult;
 		ns >>= vd->shift;
 		sec = vdso_ts->sec;
 	} while (unlikely(vdso_read_retry(vd, seq)));

Patch
diff mbox series

diff --git a/arch/arm64/include/asm/vdso/vsyscall.h b/arch/arm64/include/asm/vdso/vsyscall.h
index 0c731bf..406e6de 100644
--- a/arch/arm64/include/asm/vdso/vsyscall.h
+++ b/arch/arm64/include/asm/vdso/vsyscall.h
@@ -31,7 +31,7 @@  int __arm64_get_clock_mode(struct timekeeper *tk)
 #define __arch_get_clock_mode __arm64_get_clock_mode
 
 static __always_inline
-int __arm64_use_vsyscall(struct vdso_data *vdata)
+int __arm64_use_vsyscall(const struct vdso_data *vdata)
 {
 	return !vdata[CS_HRES_COARSE].clock_mode;
 }
diff --git a/arch/mips/include/asm/vdso/vsyscall.h b/arch/mips/include/asm/vdso/vsyscall.h
index 1953147..8b10dd7 100644
--- a/arch/mips/include/asm/vdso/vsyscall.h
+++ b/arch/mips/include/asm/vdso/vsyscall.h
@@ -29,7 +29,7 @@  int __mips_get_clock_mode(struct timekeeper *tk)
 #define __arch_get_clock_mode __mips_get_clock_mode
 
 static __always_inline
-int __mips_use_vsyscall(struct vdso_data *vdata)
+int __mips_use_vsyscall(const struct vdso_data *vdata)
 {
 	return (vdata[CS_HRES_COARSE].clock_mode != VDSO_CLOCK_NONE);
 }
diff --git a/include/asm-generic/vdso/vsyscall.h b/include/asm-generic/vdso/vsyscall.h
index e94b1978..ac05a625 100644
--- a/include/asm-generic/vdso/vsyscall.h
+++ b/include/asm-generic/vdso/vsyscall.h
@@ -26,7 +26,7 @@  static __always_inline int __arch_get_clock_mode(struct timekeeper *tk)
 #endif /* __arch_get_clock_mode */
 
 #ifndef __arch_use_vsyscall
-static __always_inline int __arch_use_vsyscall(struct vdso_data *vdata)
+static __always_inline int __arch_use_vsyscall(const struct vdso_data *vdata)
 {
 	return 1;
 }
diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c
index e630e7f..4ad062e 100644
--- a/lib/vdso/gettimeofday.c
+++ b/lib/vdso/gettimeofday.c
@@ -9,6 +9,7 @@ 
 #include <linux/hrtimer_defs.h>
 #include <vdso/datapage.h>
 #include <vdso/helpers.h>
+#include <vdso/vsyscall.h>
 
 /*
  * The generic vDSO implementation requires that gettimeofday.h
@@ -50,7 +51,7 @@  static int do_hres(const struct vdso_data *vd, clockid_t clk,
 		cycles = __arch_get_hw_counter(vd->clock_mode);
 		ns = vdso_ts->nsec;
 		last = vd->cycle_last;
-		if (unlikely((s64)cycles < 0))
+		if (unlikely(cycles == U64_MAX))
 			return -1;
 
 		ns += vdso_calc_delta(cycles, last, vd->mask, vd->mult);
@@ -91,6 +92,9 @@  __cvdso_clock_gettime_common(clockid_t clock, struct __kernel_timespec *ts)
 	if (unlikely((u32) clock >= MAX_CLOCKS))
 		return -1;
 
+	if (!__arch_use_vsyscall(vd))
+		return -1;
+
 	/*
 	 * Convert the clockid to a bitmask and use it to check which
 	 * clocks are handled in the VDSO directly.
@@ -145,6 +149,9 @@  __cvdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz)
 {
 	const struct vdso_data *vd = __arch_get_vdso_data();
 
+	if (!__arch_use_vsyscall(vd))
+		return gettimeofday_fallback(tv, tz);
+
 	if (likely(tv != NULL)) {
 		struct __kernel_timespec ts;
 
@@ -189,6 +196,9 @@  int __cvdso_clock_getres_common(clockid_t clock, struct __kernel_timespec *res)
 	if (unlikely((u32) clock >= MAX_CLOCKS))
 		return -1;
 
+	if (!__arch_use_vsyscall(vd))
+		return -1;
+
 	hrtimer_res = READ_ONCE(vd[CS_HRES_COARSE].hrtimer_res);
 	/*
 	 * Convert the clockid to a bitmask and use it to check which