diff mbox series

[v6,01/19] kernel: Standardize vdso_datapage

Message ID 20190530141531.43462-2-vincenzo.frascino@arm.com (mailing list archive)
State New, archived
Headers show
Series Unify vDSOs across more architectures | expand

Commit Message

Vincenzo Frascino May 30, 2019, 2:15 p.m. UTC
In an effort to unify the common code for managing the vdso library in
between all the architectures that support it, this patch tries to
provide a common format for the vdso datapage.

As a result of this, this patch generalized the data structures in vgtod.h
from x86 private includes to general includes (include/vdso).

Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
---
 include/vdso/datapage.h | 91 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 91 insertions(+)
 create mode 100644 include/vdso/datapage.h

Comments

Arnd Bergmann May 31, 2019, 8:16 a.m. UTC | #1
On Thu, May 30, 2019 at 4:15 PM Vincenzo Frascino
<vincenzo.frascino@arm.com> wrote:

> + * vdso_data will be accessed by 64 bit and compat code at the same time
> + * so we should be careful before modifying this structure.
> + */
> +struct vdso_data {
> +       u32                     seq;
> +
> +       s32                     clock_mode;
> +       u64                     cycle_last;
> +       u64                     mask;
> +       u32                     mult;
> +       u32                     shift;
> +
> +       struct vdso_timestamp   basetime[VDSO_BASES];
> +
> +       s32                     tz_minuteswest;
> +       s32                     tz_dsttime;
> +       u32                     hrtimer_res;
> +};

The structure contains four padding bytes at the end, which is
something we try to avoid, at least if this ends up being used as
an ABI. Maybe add "u32 __unused" at the end?

     Arnd
Vincenzo Frascino June 4, 2019, 12:05 p.m. UTC | #2
On 31/05/2019 09:16, Arnd Bergmann wrote:
> On Thu, May 30, 2019 at 4:15 PM Vincenzo Frascino
> <vincenzo.frascino@arm.com> wrote:
> 
>> + * vdso_data will be accessed by 64 bit and compat code at the same time
>> + * so we should be careful before modifying this structure.
>> + */
>> +struct vdso_data {
>> +       u32                     seq;
>> +
>> +       s32                     clock_mode;
>> +       u64                     cycle_last;
>> +       u64                     mask;
>> +       u32                     mult;
>> +       u32                     shift;
>> +
>> +       struct vdso_timestamp   basetime[VDSO_BASES];
>> +
>> +       s32                     tz_minuteswest;
>> +       s32                     tz_dsttime;
>> +       u32                     hrtimer_res;
>> +};
> 
> The structure contains four padding bytes at the end, which is
> something we try to avoid, at least if this ends up being used as
> an ABI. Maybe add "u32 __unused" at the end?
> 

Agreed, I will fix this in v7.

>      Arnd
>
Huw Davies June 10, 2019, 9:27 a.m. UTC | #3
On Thu, May 30, 2019 at 03:15:13PM +0100, Vincenzo Frascino wrote:
> --- /dev/null
> +++ b/include/vdso/datapage.h
> @@ -0,0 +1,91 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef __VDSO_DATAPAGE_H
> +#define __VDSO_DATAPAGE_H
> +
> +#ifdef __KERNEL__
> +
> +#ifndef __ASSEMBLY__
> +
> +#include <linux/bits.h>
> +#include <linux/time.h>
> +#include <linux/types.h>
> +
> +#define VDSO_BASES	(CLOCK_TAI + 1)
> +#define VDSO_HRES	(BIT(CLOCK_REALTIME)		| \
> +			 BIT(CLOCK_MONOTONIC)		| \
> +			 BIT(CLOCK_BOOTTIME)		| \
> +			 BIT(CLOCK_TAI))
> +#define VDSO_COARSE	(BIT(CLOCK_REALTIME_COARSE)	| \
> +			 BIT(CLOCK_MONOTONIC_COARSE))
> +#define VDSO_RAW	(BIT(CLOCK_MONOTONIC_RAW))
> +
> +#define CS_HRES_COARSE	0
> +#define CS_RAW		1

CS_HRES_COARSE seems like a confusing name choice to me.  What you
really mean is not RAW.

How about CS_ADJ to indicate that its updated by adjtime?
CS_XTIME might be another option.

Huw.
Vincenzo Frascino June 10, 2019, 10:17 a.m. UTC | #4
Hi Huw,

thank you for your review.

On 10/06/2019 10:27, Huw Davies wrote:
> On Thu, May 30, 2019 at 03:15:13PM +0100, Vincenzo Frascino wrote:
>> --- /dev/null
>> +++ b/include/vdso/datapage.h
>> @@ -0,0 +1,91 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +#ifndef __VDSO_DATAPAGE_H
>> +#define __VDSO_DATAPAGE_H
>> +
>> +#ifdef __KERNEL__
>> +
>> +#ifndef __ASSEMBLY__
>> +
>> +#include <linux/bits.h>
>> +#include <linux/time.h>
>> +#include <linux/types.h>
>> +
>> +#define VDSO_BASES	(CLOCK_TAI + 1)
>> +#define VDSO_HRES	(BIT(CLOCK_REALTIME)		| \
>> +			 BIT(CLOCK_MONOTONIC)		| \
>> +			 BIT(CLOCK_BOOTTIME)		| \
>> +			 BIT(CLOCK_TAI))
>> +#define VDSO_COARSE	(BIT(CLOCK_REALTIME_COARSE)	| \
>> +			 BIT(CLOCK_MONOTONIC_COARSE))
>> +#define VDSO_RAW	(BIT(CLOCK_MONOTONIC_RAW))
>> +
>> +#define CS_HRES_COARSE	0
>> +#define CS_RAW		1
> 
> CS_HRES_COARSE seems like a confusing name choice to me.  What you
> really mean is not RAW.
> 
> How about CS_ADJ to indicate that its updated by adjtime?
> CS_XTIME might be another option.
> 

I divided the timers in 3 sets (HRES, COARSE, RAW), CS_HRES_COARSE refers to the
first two and CS_RAW to the third. I will ad a comment to explain the logic in
the next iteration.

> Huw.
>
Huw Davies June 10, 2019, 10:31 a.m. UTC | #5
On Mon, Jun 10, 2019 at 11:17:48AM +0100, Vincenzo Frascino wrote:
> On 10/06/2019 10:27, Huw Davies wrote:
> > On Thu, May 30, 2019 at 03:15:13PM +0100, Vincenzo Frascino wrote:
> >> --- /dev/null
> >> +++ b/include/vdso/datapage.h
> >> @@ -0,0 +1,91 @@
> >> +/* SPDX-License-Identifier: GPL-2.0 */
> >> +#ifndef __VDSO_DATAPAGE_H
> >> +#define __VDSO_DATAPAGE_H
> >> +
> >> +#ifdef __KERNEL__
> >> +
> >> +#ifndef __ASSEMBLY__
> >> +
> >> +#include <linux/bits.h>
> >> +#include <linux/time.h>
> >> +#include <linux/types.h>
> >> +
> >> +#define VDSO_BASES	(CLOCK_TAI + 1)
> >> +#define VDSO_HRES	(BIT(CLOCK_REALTIME)		| \
> >> +			 BIT(CLOCK_MONOTONIC)		| \
> >> +			 BIT(CLOCK_BOOTTIME)		| \
> >> +			 BIT(CLOCK_TAI))
> >> +#define VDSO_COARSE	(BIT(CLOCK_REALTIME_COARSE)	| \
> >> +			 BIT(CLOCK_MONOTONIC_COARSE))
> >> +#define VDSO_RAW	(BIT(CLOCK_MONOTONIC_RAW))
> >> +
> >> +#define CS_HRES_COARSE	0
> >> +#define CS_RAW		1
> > 
> > CS_HRES_COARSE seems like a confusing name choice to me.  What you
> > really mean is not RAW.
> > 
> > How about CS_ADJ to indicate that its updated by adjtime?
> > CS_XTIME might be another option.
> > 
> 
> I divided the timers in 3 sets (HRES, COARSE, RAW), CS_HRES_COARSE refers to the
> first two and CS_RAW to the third. I will ad a comment to explain the logic in
> the next iteration.

I'm thinking ahead about a possible CLOCK_MONOTONIC_RAW_COARSE (which
would be useful at least for Wine).  In that case you'd have four clock
types non-raw and raw, each with either hres or coarse.

Huw.
Vincenzo Frascino June 10, 2019, 11:07 a.m. UTC | #6
Hi Huw,

On 10/06/2019 11:31, Huw Davies wrote:
> On Mon, Jun 10, 2019 at 11:17:48AM +0100, Vincenzo Frascino wrote:
>> On 10/06/2019 10:27, Huw Davies wrote:
>>> On Thu, May 30, 2019 at 03:15:13PM +0100, Vincenzo Frascino wrote:
>>>> --- /dev/null
>>>> +++ b/include/vdso/datapage.h
>>>> @@ -0,0 +1,91 @@
>>>> +/* SPDX-License-Identifier: GPL-2.0 */
>>>> +#ifndef __VDSO_DATAPAGE_H
>>>> +#define __VDSO_DATAPAGE_H
>>>> +
>>>> +#ifdef __KERNEL__
>>>> +
>>>> +#ifndef __ASSEMBLY__
>>>> +
>>>> +#include <linux/bits.h>
>>>> +#include <linux/time.h>
>>>> +#include <linux/types.h>
>>>> +
>>>> +#define VDSO_BASES	(CLOCK_TAI + 1)
>>>> +#define VDSO_HRES	(BIT(CLOCK_REALTIME)		| \
>>>> +			 BIT(CLOCK_MONOTONIC)		| \
>>>> +			 BIT(CLOCK_BOOTTIME)		| \
>>>> +			 BIT(CLOCK_TAI))
>>>> +#define VDSO_COARSE	(BIT(CLOCK_REALTIME_COARSE)	| \
>>>> +			 BIT(CLOCK_MONOTONIC_COARSE))
>>>> +#define VDSO_RAW	(BIT(CLOCK_MONOTONIC_RAW))
>>>> +
>>>> +#define CS_HRES_COARSE	0
>>>> +#define CS_RAW		1
>>>
>>> CS_HRES_COARSE seems like a confusing name choice to me.  What you
>>> really mean is not RAW.
>>>
>>> How about CS_ADJ to indicate that its updated by adjtime?
>>> CS_XTIME might be another option.
>>>
>>
>> I divided the timers in 3 sets (HRES, COARSE, RAW), CS_HRES_COARSE refers to the
>> first two and CS_RAW to the third. I will ad a comment to explain the logic in
>> the next iteration.
> 
> I'm thinking ahead about a possible CLOCK_MONOTONIC_RAW_COARSE (which
> would be useful at least for Wine).  In that case you'd have four clock
> types non-raw and raw, each with either hres or coarse.
> 

Thanks for this, I was not aware of CLOCK_MONOTONIC_RAW_COARSE.
I tried to find, though, some details, but I could not find any. Could you
please provide some reference?

> Huw.
>
Huw Davies June 10, 2019, 11:37 a.m. UTC | #7
On Mon, Jun 10, 2019 at 12:07:45PM +0100, Vincenzo Frascino wrote:
> On 10/06/2019 11:31, Huw Davies wrote:
> > On Mon, Jun 10, 2019 at 11:17:48AM +0100, Vincenzo Frascino wrote:
> >> On 10/06/2019 10:27, Huw Davies wrote:
> >>> On Thu, May 30, 2019 at 03:15:13PM +0100, Vincenzo Frascino wrote:
> >>>> --- /dev/null
> >>>> +++ b/include/vdso/datapage.h
> >>>> @@ -0,0 +1,91 @@
> >>>> +/* SPDX-License-Identifier: GPL-2.0 */
> >>>> +#ifndef __VDSO_DATAPAGE_H
> >>>> +#define __VDSO_DATAPAGE_H
> >>>> +
> >>>> +#ifdef __KERNEL__
> >>>> +
> >>>> +#ifndef __ASSEMBLY__
> >>>> +
> >>>> +#include <linux/bits.h>
> >>>> +#include <linux/time.h>
> >>>> +#include <linux/types.h>
> >>>> +
> >>>> +#define VDSO_BASES	(CLOCK_TAI + 1)
> >>>> +#define VDSO_HRES	(BIT(CLOCK_REALTIME)		| \
> >>>> +			 BIT(CLOCK_MONOTONIC)		| \
> >>>> +			 BIT(CLOCK_BOOTTIME)		| \
> >>>> +			 BIT(CLOCK_TAI))
> >>>> +#define VDSO_COARSE	(BIT(CLOCK_REALTIME_COARSE)	| \
> >>>> +			 BIT(CLOCK_MONOTONIC_COARSE))
> >>>> +#define VDSO_RAW	(BIT(CLOCK_MONOTONIC_RAW))
> >>>> +
> >>>> +#define CS_HRES_COARSE	0
> >>>> +#define CS_RAW		1
> >>>
> >>> CS_HRES_COARSE seems like a confusing name choice to me.  What you
> >>> really mean is not RAW.
> >>>
> >>> How about CS_ADJ to indicate that its updated by adjtime?
> >>> CS_XTIME might be another option.
> >>>
> >>
> >> I divided the timers in 3 sets (HRES, COARSE, RAW), CS_HRES_COARSE refers to the
> >> first two and CS_RAW to the third. I will ad a comment to explain the logic in
> >> the next iteration.
> > 
> > I'm thinking ahead about a possible CLOCK_MONOTONIC_RAW_COARSE (which
> > would be useful at least for Wine).  In that case you'd have four clock
> > types non-raw and raw, each with either hres or coarse.
> > 
> 
> Thanks for this, I was not aware of CLOCK_MONOTONIC_RAW_COARSE.
> I tried to find, though, some details, but I could not find any. Could you
> please provide some reference?

It doesn't exist yet ;-)  However it doesn't seem crazy that such a
clock should exist.  I was really using it to illustrate that
raw / non-raw is orthogonal to hres / coarse.

That being said, this really doesn't matter that much.

Huw.
Huw Davies June 10, 2019, 5:47 p.m. UTC | #8
On Tue, Jun 04, 2019 at 01:05:40PM +0100, Vincenzo Frascino wrote:
> On 31/05/2019 09:16, Arnd Bergmann wrote:
> > On Thu, May 30, 2019 at 4:15 PM Vincenzo Frascino
> > <vincenzo.frascino@arm.com> wrote:
> > 
> >> + * vdso_data will be accessed by 64 bit and compat code at the same time
> >> + * so we should be careful before modifying this structure.
> >> + */
> >> +struct vdso_data {
> >> +       u32                     seq;
> >> +
> >> +       s32                     clock_mode;
> >> +       u64                     cycle_last;
> >> +       u64                     mask;
> >> +       u32                     mult;
> >> +       u32                     shift;
> >> +
> >> +       struct vdso_timestamp   basetime[VDSO_BASES];
> >> +
> >> +       s32                     tz_minuteswest;
> >> +       s32                     tz_dsttime;
> >> +       u32                     hrtimer_res;
> >> +};
> > 
> > The structure contains four padding bytes at the end, which is
> > something we try to avoid, at least if this ends up being used as
> > an ABI. Maybe add "u32 __unused" at the end?
> > 
> 
> Agreed, I will fix this in v7.

Note that this is also necessary to ensure that CLOCK_MONOTONIC_RAW
works in the 32-bit vDSO on x86_64 kernels.

Huw.
diff mbox series

Patch

diff --git a/include/vdso/datapage.h b/include/vdso/datapage.h
new file mode 100644
index 000000000000..bb7087eec9bd
--- /dev/null
+++ b/include/vdso/datapage.h
@@ -0,0 +1,91 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __VDSO_DATAPAGE_H
+#define __VDSO_DATAPAGE_H
+
+#ifdef __KERNEL__
+
+#ifndef __ASSEMBLY__
+
+#include <linux/bits.h>
+#include <linux/time.h>
+#include <linux/types.h>
+
+#define VDSO_BASES	(CLOCK_TAI + 1)
+#define VDSO_HRES	(BIT(CLOCK_REALTIME)		| \
+			 BIT(CLOCK_MONOTONIC)		| \
+			 BIT(CLOCK_BOOTTIME)		| \
+			 BIT(CLOCK_TAI))
+#define VDSO_COARSE	(BIT(CLOCK_REALTIME_COARSE)	| \
+			 BIT(CLOCK_MONOTONIC_COARSE))
+#define VDSO_RAW	(BIT(CLOCK_MONOTONIC_RAW))
+
+#define CS_HRES_COARSE	0
+#define CS_RAW		1
+#define CS_BASES	(CS_RAW + 1)
+
+/**
+ * struct vdso_timestamp - basetime per clock_id
+ * @sec: seconds
+ * @nsec: nanoseconds
+ *
+ * There is one vdso_timestamp object in vvar for each vDSO-accelerated
+ * clock_id. For high-resolution clocks, this encodes the time
+ * corresponding to vdso_data.cycle_last. For coarse clocks this encodes
+ * the actual time.
+ *
+ * To be noticed that for highres clocks nsec is left-shifted by
+ * vdso_data.cs[x].shift.
+ */
+struct vdso_timestamp {
+	u64	sec;
+	u64	nsec;
+};
+
+/**
+ * struct vdso_data - vdso datapage representation
+ * @seq: timebase sequence counter
+ * @clock_mode: clock mode
+ * @cycle_last: timebase at clocksource init
+ * @mask: clocksource mask
+ * @mult: clocksource multiplier
+ * @shift: clocksource shift
+ * @basetime[clock_id]: basetime per clock_id
+ * @tz_minuteswest: minutes west of Greenwich
+ * @tz_dsttime: type of DST correction
+ * @hrtimer_res: hrtimer resolution
+ *
+ * vdso_data will be accessed by 64 bit and compat code at the same time
+ * so we should be careful before modifying this structure.
+ */
+struct vdso_data {
+	u32			seq;
+
+	s32			clock_mode;
+	u64			cycle_last;
+	u64			mask;
+	u32			mult;
+	u32			shift;
+
+	struct vdso_timestamp	basetime[VDSO_BASES];
+
+	s32			tz_minuteswest;
+	s32			tz_dsttime;
+	u32			hrtimer_res;
+};
+
+/*
+ * We use the hidden visibility to prevent the compiler from generating a GOT
+ * relocation. Not only is going through a GOT useless (the entry couldn't and
+ * must not be overridden by another library), it does not even work: the linker
+ * cannot generate an absolute address to the data page.
+ *
+ * With the hidden visibility, the compiler simply generates a PC-relative
+ * relocation, and this is what we need.
+ */
+extern struct vdso_data _vdso_data[CS_BASES] __attribute__((visibility("hidden")));
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __KERNEL__ */
+
+#endif /* __VDSO_DATAPAGE_H */