diff mbox series

[v4,1/8] common: assembly entry point type/size annotations

Message ID 8ed43968-311e-263b-4dd7-9f8a49a394dc@suse.com (mailing list archive)
State New, archived
Headers show
Series annotate entry points with type and size | expand

Commit Message

Jan Beulich Aug. 4, 2023, 6:26 a.m. UTC
Recent gas versions generate minimalistic Dwarf debug info for items
annotated as functions and having their sizes specified [1]. Furthermore
generating live patches wants items properly annotated. "Borrow" Arm's
END() and (remotely) derive other annotation infrastructure from
Linux'es, for all architectures to use.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

[1] https://sourceware.org/git?p=binutils-gdb.git;a=commitdiff;h=591cc9fbbfd6d51131c0f1d4a92e7893edcc7a28
---
v3: New, generalized from earlier x86-only version. LAST() (now
    LASTARG()) moved to macros.h.
---
TBD: What to set CODE_ALIGN to by default? Or should we requires arch-es
     to define that in all cases?

TBD: {CODE,DATA}_ALIGN are byte granular, such that a value of 0 can be
     specified (in case this has some special meaning on an arch;
     conceivably it could mean to use some kind of arch default). We may
     not strictly need that, and hence we could also make these power-of
     -2 values (using .p2align).

Note that we can't use ALIGN() (in place of SYM_ALIGN()) as long as we
still have ALIGN.

Note further that FUNC()'s etc "algn" parameter is intended to allow for
only no or a single argument. If we wanted to also make the fill value
customizable per call site, the constructs would need re-doing to some
degree.

Comments

Julien Grall Sept. 14, 2023, 9:06 p.m. UTC | #1
Hi Jan,

On 04/08/2023 07:26, Jan Beulich wrote:
> Recent gas versions generate minimalistic Dwarf debug info for items
> annotated as functions and having their sizes specified [1]. Furthermore
> generating live patches wants items properly annotated. "Borrow" Arm's
> END() and (remotely) derive other annotation infrastructure from
> Linux'es, for all architectures to use.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> 
> [1] https://sourceware.org/git?p=binutils-gdb.git;a=commitdiff;h=591cc9fbbfd6d51131c0f1d4a92e7893edcc7a28
> ---
> v3: New, generalized from earlier x86-only version. LAST() (now
>      LASTARG()) moved to macros.h.
> ---
> TBD: What to set CODE_ALIGN to by default? Or should we requires arch-es
>       to define that in all cases?

The code alignment is very specific to an architecture. So I think it 
would be better if there are no default.

Otherwise, it will be more difficult for a developper to figure out that 
CODE_ALIGN may need an update.

> 
> TBD: {CODE,DATA}_ALIGN are byte granular, such that a value of 0 can be
>       specified (in case this has some special meaning on an arch;
>       conceivably it could mean to use some kind of arch default). We may
>       not strictly need that, and hence we could also make these power-of
>       -2 values (using .p2align).

I don't have a strong opinion on this one.

> 
> Note that we can't use ALIGN() (in place of SYM_ALIGN()) as long as we
> still have ALIGN.
> 
> Note further that FUNC()'s etc "algn" parameter is intended to allow for
> only no or a single argument. If we wanted to also make the fill value
> customizable per call site, the constructs would need re-doing to some
> degree.
> 
> --- /dev/null
> +++ b/xen/include/xen/linkage.h
> @@ -0,0 +1,56 @@
> +#ifndef __LINKAGE_H__
> +#define __LINKAGE_H__
> +
> +#ifdef __ASSEMBLY__
> +
> +#include <xen/macros.h>
> +
> +#ifndef CODE_ALIGN
> +# define CODE_ALIGN ??
> +#endif
> +#ifndef CODE_FILL
> +# define CODE_FILL ~0
> +#endif

What's the value to allow the architecture to override CODE_FILL and ...

> +
> +#ifndef DATA_ALIGN
> +# define DATA_ALIGN 0
> +#endif
> +#ifndef DATA_FILL
> +# define DATA_FILL ~0
> +#endif

... DATA_FILL?

> +
> +#define SYM_ALIGN(algn...) .balign algn

I find the name 'algn' confusing (not even referring to the missing 
'i'). Why not naming it 'args'?

> +
> +#define SYM_L_GLOBAL(name) .globl name
> +#define SYM_L_WEAK(name)   .weak name
> +#define SYM_L_LOCAL(name)  /* nothing */
> +
> +#define SYM_T_FUNC         STT_FUNC
> +#define SYM_T_DATA         STT_OBJECT
> +#define SYM_T_NONE         STT_NOTYPE

SYM_* will be used only in SYM() below. So why not using STT_* directly?

> +
> +#define SYM(name, typ, linkage, algn...)          \
> +        .type name, SYM_T_ ## typ;                \
> +        SYM_L_ ## linkage(name);                  \
> +        SYM_ALIGN(algn);                          \
> +        name:
> +
> +#define END(name) .size name, . - name
> +
> +#define FUNC(name, algn...) \
> +        SYM(name, FUNC, GLOBAL, LASTARG(CODE_ALIGN, ## algn), CODE_FILL)
> +#define LABEL(name, algn...) \
> +        SYM(name, NONE, GLOBAL, LASTARG(CODE_ALIGN, ## algn), CODE_FILL)
> +#define DATA(name, algn...) \
> +        SYM(name, DATA, GLOBAL, LASTARG(DATA_ALIGN, ## algn), DATA_FILL)

I think the alignment should be explicit for DATA. Otherwise, at least 
on Arm, we would default to 0 which could lead to unaligned access if 
not careful.

> +
> +#define FUNC_LOCAL(name, algn...) \
> +        SYM(name, FUNC, LOCAL, LASTARG(CODE_ALIGN, ## algn), CODE_FILL)
> +#define LABEL_LOCAL(name, algn...) \
> +        SYM(name, NONE, LOCAL, LASTARG(CODE_ALIGN, ## algn), CODE_FILL)
> +#define DATA_LOCAL(name, algn...) \
> +        SYM(name, DATA, LOCAL, LASTARG(DATA_ALIGN, ## algn), DATA_FILL)

Same here.

> +
> +#endif /*  __ASSEMBLY__ */
> +
> +#endif /* __LINKAGE_H__ */
> --- a/xen/include/xen/macros.h
> +++ b/xen/include/xen/macros.h
> @@ -15,6 +15,15 @@
>   #define count_args(args...) \
>       count_args_(., ## args, 8, 7, 6, 5, 4, 3, 2, 1, 0)
>   
> +#define ARG1_(x, y...) (x)
> +#define ARG2_(x, y...) ARG1_(y)
> +#define ARG3_(x, y...) ARG2_(y)
> +#define ARG4_(x, y...) ARG3_(y)
> +
> +#define ARG__(nr) ARG ## nr ## _
> +#define ARG_(nr)  ARG__(nr)
> +#define LASTARG(x, y...) ARG_(count_args(x, ## y))(x, ## y)
> +
>   /* Indirect macros required for expanded argument pasting. */
>   #define PASTE_(a, b) a ## b
>   #define PASTE(a, b) PASTE_(a, b)
> 

Cheers,
Jan Beulich Sept. 18, 2023, 10:24 a.m. UTC | #2
On 14.09.2023 23:06, Julien Grall wrote:
> On 04/08/2023 07:26, Jan Beulich wrote:
>> TBD: What to set CODE_ALIGN to by default? Or should we requires arch-es
>>       to define that in all cases?
> 
> The code alignment is very specific to an architecture. So I think it 
> would be better if there are no default.
> 
> Otherwise, it will be more difficult for a developper to figure out that 
> CODE_ALIGN may need an update.

Okay, I've dropped the fallback then.

>> --- /dev/null
>> +++ b/xen/include/xen/linkage.h
>> @@ -0,0 +1,56 @@
>> +#ifndef __LINKAGE_H__
>> +#define __LINKAGE_H__
>> +
>> +#ifdef __ASSEMBLY__
>> +
>> +#include <xen/macros.h>
>> +
>> +#ifndef CODE_ALIGN
>> +# define CODE_ALIGN ??
>> +#endif
>> +#ifndef CODE_FILL
>> +# define CODE_FILL ~0
>> +#endif
> 
> What's the value to allow the architecture to override CODE_FILL and ...

What is put between functions may be relevant to control. Without fall-
through to a subsequent label, I think the intention is to use "int3" (0xcc)
filler bytes, for example. (With fall-through to the subsequent label, NOPs
would need using in any event.)

>> +
>> +#ifndef DATA_ALIGN
>> +# define DATA_ALIGN 0
>> +#endif
>> +#ifndef DATA_FILL
>> +# define DATA_FILL ~0
>> +#endif
> 
> ... DATA_FILL?

For data the need is probably less strict; still I could see one arch to
prefer zero filling while another might better like all-ones-filling.

>> +
>> +#define SYM_ALIGN(algn...) .balign algn
> 
> I find the name 'algn' confusing (not even referring to the missing 
> 'i'). Why not naming it 'args'?

I can name it "args", sure. It's just that "algn" is in line with the
naming further down (where "args" isn't reasonable to use as substitution).

>> +#define SYM_L_GLOBAL(name) .globl name
>> +#define SYM_L_WEAK(name)   .weak name
>> +#define SYM_L_LOCAL(name)  /* nothing */
>> +
>> +#define SYM_T_FUNC         STT_FUNC
>> +#define SYM_T_DATA         STT_OBJECT
>> +#define SYM_T_NONE         STT_NOTYPE
> 
> SYM_* will be used only in SYM() below. So why not using STT_* directly?

For one this is how the Linux original has it. And then to me DATA and
NONE are neater to have at the use sites than the ELF-specific terms
OBJECT and NOTYPE. But I'm willing to reconsider provided arguments
towards the two given reasons not being overly relevant for us.

>> +
>> +#define SYM(name, typ, linkage, algn...)          \
>> +        .type name, SYM_T_ ## typ;                \
>> +        SYM_L_ ## linkage(name);                  \
>> +        SYM_ALIGN(algn);                          \
>> +        name:
>> +
>> +#define END(name) .size name, . - name
>> +
>> +#define FUNC(name, algn...) \
>> +        SYM(name, FUNC, GLOBAL, LASTARG(CODE_ALIGN, ## algn), CODE_FILL)
>> +#define LABEL(name, algn...) \
>> +        SYM(name, NONE, GLOBAL, LASTARG(CODE_ALIGN, ## algn), CODE_FILL)
>> +#define DATA(name, algn...) \
>> +        SYM(name, DATA, GLOBAL, LASTARG(DATA_ALIGN, ## algn), DATA_FILL)
> 
> I think the alignment should be explicit for DATA. Otherwise, at least 
> on Arm, we would default to 0 which could lead to unaligned access if 
> not careful.

I disagree. Even for byte-granular data (like strings) it may be desirable
to have some default alignment, without every use site needing to repeat
that specific value.

Jan
Julien Grall Sept. 18, 2023, 10:34 a.m. UTC | #3
Hi,

On 18/09/2023 11:24, Jan Beulich wrote:
> On 14.09.2023 23:06, Julien Grall wrote:
>> On 04/08/2023 07:26, Jan Beulich wrote:
>>> TBD: What to set CODE_ALIGN to by default? Or should we requires arch-es
>>>        to define that in all cases?
>>
>> The code alignment is very specific to an architecture. So I think it
>> would be better if there are no default.
>>
>> Otherwise, it will be more difficult for a developper to figure out that
>> CODE_ALIGN may need an update.
> 
> Okay, I've dropped the fallback then.
> 
>>> --- /dev/null
>>> +++ b/xen/include/xen/linkage.h
>>> @@ -0,0 +1,56 @@
>>> +#ifndef __LINKAGE_H__
>>> +#define __LINKAGE_H__
>>> +
>>> +#ifdef __ASSEMBLY__
>>> +
>>> +#include <xen/macros.h>
>>> +
>>> +#ifndef CODE_ALIGN
>>> +# define CODE_ALIGN ??
>>> +#endif
>>> +#ifndef CODE_FILL
>>> +# define CODE_FILL ~0
>>> +#endif
>>
>> What's the value to allow the architecture to override CODE_FILL and ...
> 
> What is put between functions may be relevant to control. Without fall-
> through to a subsequent label, I think the intention is to use "int3" (0xcc)
> filler bytes, for example. (With fall-through to the subsequent label, NOPs
> would need using in any event.)

I guess for x86 it makes sense. For Arm, the filler is unlikely to be 
used as the instruction size is always fixed.

> 
>>> +
>>> +#ifndef DATA_ALIGN
>>> +# define DATA_ALIGN 0
>>> +#endif
>>> +#ifndef DATA_FILL
>>> +# define DATA_FILL ~0
>>> +#endif
>>
>> ... DATA_FILL?
> 
> For data the need is probably less strict; still I could see one arch to
> prefer zero filling while another might better like all-ones-filling.

It is unclear to me why an architecture would prefer one over the other. 
Can you provide a bit more details?

> 
>>> +
>>> +#define SYM_ALIGN(algn...) .balign algn
>>
>> I find the name 'algn' confusing (not even referring to the missing
>> 'i'). Why not naming it 'args'?
> 
> I can name it "args", sure. It's just that "algn" is in line with the
> naming further down (where "args" isn't reasonable to use as substitution).

If you want to be consistent then, I think it would be best to use 
'align'. I think it should be fine as we don't seem to use '.align'.

> 
>>> +#define SYM_L_GLOBAL(name) .globl name
>>> +#define SYM_L_WEAK(name)   .weak name
>>> +#define SYM_L_LOCAL(name)  /* nothing */
>>> +
>>> +#define SYM_T_FUNC         STT_FUNC
>>> +#define SYM_T_DATA         STT_OBJECT
>>> +#define SYM_T_NONE         STT_NOTYPE
>>
>> SYM_* will be used only in SYM() below. So why not using STT_* directly?
> 
> For one this is how the Linux original has it. And then to me DATA and
> NONE are neater to have at the use sites than the ELF-specific terms
> OBJECT and NOTYPE. But I'm willing to reconsider provided arguments
> towards the two given reasons not being overly relevant for us.
> 
>>> +
>>> +#define SYM(name, typ, linkage, algn...)          \
>>> +        .type name, SYM_T_ ## typ;                \
>>> +        SYM_L_ ## linkage(name);                  \
>>> +        SYM_ALIGN(algn);                          \
>>> +        name:
>>> +
>>> +#define END(name) .size name, . - name
>>> +
>>> +#define FUNC(name, algn...) \
>>> +        SYM(name, FUNC, GLOBAL, LASTARG(CODE_ALIGN, ## algn), CODE_FILL)
>>> +#define LABEL(name, algn...) \
>>> +        SYM(name, NONE, GLOBAL, LASTARG(CODE_ALIGN, ## algn), CODE_FILL)
>>> +#define DATA(name, algn...) \
>>> +        SYM(name, DATA, GLOBAL, LASTARG(DATA_ALIGN, ## algn), DATA_FILL)
>>
>> I think the alignment should be explicit for DATA. Otherwise, at least
>> on Arm, we would default to 0 which could lead to unaligned access if
>> not careful.
> 
> I disagree. Even for byte-granular data (like strings) it may be desirable
> to have some default alignment, without every use site needing to repeat
> that specific value. 

I understand that some cases may want to use a default alignment. But my 
concern is the developer may not realize that alignment is necessary. So 
by making it mandatory, it would at least prompt the developper to think 
whether this is needed.

For the string case, we could introduce a different macro.

Cheers,
Jan Beulich Sept. 18, 2023, 10:51 a.m. UTC | #4
On 18.09.2023 12:34, Julien Grall wrote:
> Hi,
> 
> On 18/09/2023 11:24, Jan Beulich wrote:
>> On 14.09.2023 23:06, Julien Grall wrote:
>>> On 04/08/2023 07:26, Jan Beulich wrote:
>>>> TBD: What to set CODE_ALIGN to by default? Or should we requires arch-es
>>>>        to define that in all cases?
>>>
>>> The code alignment is very specific to an architecture. So I think it
>>> would be better if there are no default.
>>>
>>> Otherwise, it will be more difficult for a developper to figure out that
>>> CODE_ALIGN may need an update.
>>
>> Okay, I've dropped the fallback then.
>>
>>>> --- /dev/null
>>>> +++ b/xen/include/xen/linkage.h
>>>> @@ -0,0 +1,56 @@
>>>> +#ifndef __LINKAGE_H__
>>>> +#define __LINKAGE_H__
>>>> +
>>>> +#ifdef __ASSEMBLY__
>>>> +
>>>> +#include <xen/macros.h>
>>>> +
>>>> +#ifndef CODE_ALIGN
>>>> +# define CODE_ALIGN ??
>>>> +#endif
>>>> +#ifndef CODE_FILL
>>>> +# define CODE_FILL ~0
>>>> +#endif
>>>
>>> What's the value to allow the architecture to override CODE_FILL and ...
>>
>> What is put between functions may be relevant to control. Without fall-
>> through to a subsequent label, I think the intention is to use "int3" (0xcc)
>> filler bytes, for example. (With fall-through to the subsequent label, NOPs
>> would need using in any event.)
> 
> I guess for x86 it makes sense. For Arm, the filler is unlikely to be 
> used as the instruction size is always fixed.
> 
>>
>>>> +
>>>> +#ifndef DATA_ALIGN
>>>> +# define DATA_ALIGN 0
>>>> +#endif
>>>> +#ifndef DATA_FILL
>>>> +# define DATA_FILL ~0
>>>> +#endif
>>>
>>> ... DATA_FILL?
>>
>> For data the need is probably less strict; still I could see one arch to
>> prefer zero filling while another might better like all-ones-filling.
> 
> It is unclear to me why an architecture would prefer one over the other. 
> Can you provide a bit more details?
> 
>>
>>>> +
>>>> +#define SYM_ALIGN(algn...) .balign algn
>>>
>>> I find the name 'algn' confusing (not even referring to the missing
>>> 'i'). Why not naming it 'args'?
>>
>> I can name it "args", sure. It's just that "algn" is in line with the
>> naming further down (where "args" isn't reasonable to use as substitution).
> 
> If you want to be consistent then, I think it would be best to use 
> 'align'. I think it should be fine as we don't seem to use '.align'.

I think I had a conflict from this somewhere, but that may have been very
early when I hadn't switched to .balign yet. I'll see if renaming works
out.

>>>> +#define SYM(name, typ, linkage, algn...)          \
>>>> +        .type name, SYM_T_ ## typ;                \
>>>> +        SYM_L_ ## linkage(name);                  \
>>>> +        SYM_ALIGN(algn);                          \
>>>> +        name:
>>>> +
>>>> +#define END(name) .size name, . - name
>>>> +
>>>> +#define FUNC(name, algn...) \
>>>> +        SYM(name, FUNC, GLOBAL, LASTARG(CODE_ALIGN, ## algn), CODE_FILL)
>>>> +#define LABEL(name, algn...) \
>>>> +        SYM(name, NONE, GLOBAL, LASTARG(CODE_ALIGN, ## algn), CODE_FILL)
>>>> +#define DATA(name, algn...) \
>>>> +        SYM(name, DATA, GLOBAL, LASTARG(DATA_ALIGN, ## algn), DATA_FILL)
>>>
>>> I think the alignment should be explicit for DATA. Otherwise, at least
>>> on Arm, we would default to 0 which could lead to unaligned access if
>>> not careful.
>>
>> I disagree. Even for byte-granular data (like strings) it may be desirable
>> to have some default alignment, without every use site needing to repeat
>> that specific value. 
> 
> I understand that some cases may want to use a default alignment. But my 
> concern is the developer may not realize that alignment is necessary. So 
> by making it mandatory, it would at least prompt the developper to think 
> whether this is needed.

Forcing people to use a specific value every time, even when none would
be needed. Anyway, if others think your way, then I can certainly change.
But then I need to know whether others perhaps think alignment on functions
(and maybe even labels) should also be explicit in all cases.

> For the string case, we could introduce a different macro.

Hmm, yet one more special thing then (for people to remember to use under
certain circumstances).

Jan
diff mbox series

Patch

--- /dev/null
+++ b/xen/include/xen/linkage.h
@@ -0,0 +1,56 @@ 
+#ifndef __LINKAGE_H__
+#define __LINKAGE_H__
+
+#ifdef __ASSEMBLY__
+
+#include <xen/macros.h>
+
+#ifndef CODE_ALIGN
+# define CODE_ALIGN ??
+#endif
+#ifndef CODE_FILL
+# define CODE_FILL ~0
+#endif
+
+#ifndef DATA_ALIGN
+# define DATA_ALIGN 0
+#endif
+#ifndef DATA_FILL
+# define DATA_FILL ~0
+#endif
+
+#define SYM_ALIGN(algn...) .balign algn
+
+#define SYM_L_GLOBAL(name) .globl name
+#define SYM_L_WEAK(name)   .weak name
+#define SYM_L_LOCAL(name)  /* nothing */
+
+#define SYM_T_FUNC         STT_FUNC
+#define SYM_T_DATA         STT_OBJECT
+#define SYM_T_NONE         STT_NOTYPE
+
+#define SYM(name, typ, linkage, algn...)          \
+        .type name, SYM_T_ ## typ;                \
+        SYM_L_ ## linkage(name);                  \
+        SYM_ALIGN(algn);                          \
+        name:
+
+#define END(name) .size name, . - name
+
+#define FUNC(name, algn...) \
+        SYM(name, FUNC, GLOBAL, LASTARG(CODE_ALIGN, ## algn), CODE_FILL)
+#define LABEL(name, algn...) \
+        SYM(name, NONE, GLOBAL, LASTARG(CODE_ALIGN, ## algn), CODE_FILL)
+#define DATA(name, algn...) \
+        SYM(name, DATA, GLOBAL, LASTARG(DATA_ALIGN, ## algn), DATA_FILL)
+
+#define FUNC_LOCAL(name, algn...) \
+        SYM(name, FUNC, LOCAL, LASTARG(CODE_ALIGN, ## algn), CODE_FILL)
+#define LABEL_LOCAL(name, algn...) \
+        SYM(name, NONE, LOCAL, LASTARG(CODE_ALIGN, ## algn), CODE_FILL)
+#define DATA_LOCAL(name, algn...) \
+        SYM(name, DATA, LOCAL, LASTARG(DATA_ALIGN, ## algn), DATA_FILL)
+
+#endif /*  __ASSEMBLY__ */
+
+#endif /* __LINKAGE_H__ */
--- a/xen/include/xen/macros.h
+++ b/xen/include/xen/macros.h
@@ -15,6 +15,15 @@ 
 #define count_args(args...) \
     count_args_(., ## args, 8, 7, 6, 5, 4, 3, 2, 1, 0)
 
+#define ARG1_(x, y...) (x)
+#define ARG2_(x, y...) ARG1_(y)
+#define ARG3_(x, y...) ARG2_(y)
+#define ARG4_(x, y...) ARG3_(y)
+
+#define ARG__(nr) ARG ## nr ## _
+#define ARG_(nr)  ARG__(nr)
+#define LASTARG(x, y...) ARG_(count_args(x, ## y))(x, ## y)
+
 /* Indirect macros required for expanded argument pasting. */
 #define PASTE_(a, b) a ## b
 #define PASTE(a, b) PASTE_(a, b)