diff mbox series

[dwarves,1/5] dwarves: help dwarf loader spot functions with optimized-out parameters

Message ID 1674567931-26458-2-git-send-email-alan.maguire@oracle.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series dwarves: support encoding of optimized-out parameters, removal of inconsistent static functions | expand

Checks

Context Check Description
netdev/tree_selection success Not a local patch
bpf/vmtest-bpf-PR fail merge-conflict
bpf/vmtest-bpf-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-VM_Test-2 success Logs for build for aarch64 with gcc
bpf/vmtest-bpf-VM_Test-3 success Logs for build for aarch64 with llvm-16
bpf/vmtest-bpf-VM_Test-4 success Logs for build for s390x with gcc
bpf/vmtest-bpf-VM_Test-5 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-VM_Test-6 success Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-VM_Test-7 success Logs for llvm-toolchain
bpf/vmtest-bpf-VM_Test-8 success Logs for set-matrix
bpf/vmtest-bpf-VM_Test-9 success Logs for test_maps on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-10 success Logs for test_maps on aarch64 with llvm-16
bpf/vmtest-bpf-VM_Test-11 success Logs for test_maps on s390x with gcc
bpf/vmtest-bpf-VM_Test-12 success Logs for test_maps on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-13 success Logs for test_maps on x86_64 with llvm-16
bpf/vmtest-bpf-VM_Test-14 fail Logs for test_progs on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-15 success Logs for test_progs on aarch64 with llvm-16
bpf/vmtest-bpf-VM_Test-16 success Logs for test_progs on s390x with gcc
bpf/vmtest-bpf-VM_Test-17 success Logs for test_progs on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-18 success Logs for test_progs on x86_64 with llvm-16
bpf/vmtest-bpf-VM_Test-19 success Logs for test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-20 success Logs for test_progs_no_alu32 on aarch64 with llvm-16
bpf/vmtest-bpf-VM_Test-21 success Logs for test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-VM_Test-22 success Logs for test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-23 success Logs for test_progs_no_alu32 on x86_64 with llvm-16
bpf/vmtest-bpf-VM_Test-24 success Logs for test_progs_no_alu32_parallel on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-25 success Logs for test_progs_no_alu32_parallel on aarch64 with llvm-16
bpf/vmtest-bpf-VM_Test-26 success Logs for test_progs_no_alu32_parallel on s390x with gcc
bpf/vmtest-bpf-VM_Test-27 success Logs for test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-28 success Logs for test_progs_no_alu32_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-VM_Test-29 success Logs for test_progs_parallel on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-30 success Logs for test_progs_parallel on aarch64 with llvm-16
bpf/vmtest-bpf-VM_Test-31 success Logs for test_progs_parallel on s390x with gcc
bpf/vmtest-bpf-VM_Test-32 success Logs for test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-33 success Logs for test_progs_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-VM_Test-34 success Logs for test_verifier on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-35 success Logs for test_verifier on aarch64 with llvm-16
bpf/vmtest-bpf-VM_Test-36 success Logs for test_verifier on s390x with gcc
bpf/vmtest-bpf-VM_Test-37 success Logs for test_verifier on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-38 success Logs for test_verifier on x86_64 with llvm-16

Commit Message

Alan Maguire Jan. 24, 2023, 1:45 p.m. UTC
Compilation generates DWARF at several stages, and often the
later DWARF representations more accurately represent optimizations
that have occurred during compilation.

In particular, parameter representations can be spotted by their
abstract origin references to the original parameter, but they
often have more accurate location information.  In most cases,
the parameter locations will match calling conventions, and be
registers for the first 6 parameters on x86_64, first 8 on ARM64
etc.  If the parameter is not a register when it should be however,
it is likely passed via the stack or the compiler has used a
constant representation instead.

This change adds a field to parameters and their associated
ftype to note if a parameter has been optimized out.  Having
this information allows us to skip such functions, as their
presence in CUs makes BTF encoding impossible.

Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
---
 dwarf_loader.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 dwarves.h      |  4 +++-
 2 files changed, 77 insertions(+), 3 deletions(-)

Comments

Jiri Olsa Jan. 25, 2023, 4:53 p.m. UTC | #1
On Tue, Jan 24, 2023 at 01:45:27PM +0000, Alan Maguire wrote:

SNIP

>  
>  	return parm;
> @@ -1450,7 +1504,7 @@ static struct tag *die__create_new_parameter(Dwarf_Die *die,
>  					     struct cu *cu, struct conf_load *conf,
>  					     int param_idx)
>  {
> -	struct parameter *parm = parameter__new(die, cu, conf);
> +	struct parameter *parm = parameter__new(die, cu, conf, param_idx);
>  
>  	if (parm == NULL)
>  		return NULL;
> @@ -2209,6 +2263,10 @@ static void ftype__recode_dwarf_types(struct tag *tag, struct cu *cu)
>  			}
>  			pos->name = tag__parameter(dtype->tag)->name;
>  			pos->tag.type = dtype->tag->type;
> +			if (pos->optimized) {
> +				tag__parameter(dtype->tag)->optimized = pos->optimized;
> +				type->optimized_parms = 1;
> +			}
>  			continue;
>  		}
>  
> @@ -2219,6 +2277,20 @@ static void ftype__recode_dwarf_types(struct tag *tag, struct cu *cu)
>  		}
>  		pos->tag.type = dtype->small_id;
>  	}
> +	/* if parameters were optimized out, set flag for the ftype this
> +	 * function tag referred to via abstract origin.
> +	 */
> +	if (type->optimized_parms) {
> +		struct dwarf_tag *dtype = type->tag.priv;
> +		struct dwarf_tag *dftype;
> +
> +		dftype = dwarf_cu__find_tag_by_ref(dcu, &dtype->abstract_origin);
> +		if (dftype && dftype->tag) {
> +			struct ftype *ftype = tag__ftype(dftype->tag);
> +
> +			ftype->optimized_parms = 1;

nit, could be:
   tag__ftype(dftype->tag)->optimized_parms = 1; 

as you did above

jirka

> +		}
> +	}
>  }
Eduard Zingerman Jan. 25, 2023, 5:47 p.m. UTC | #2
On Tue, 2023-01-24 at 13:45 +0000, Alan Maguire wrote:
> Compilation generates DWARF at several stages, and often the
> later DWARF representations more accurately represent optimizations
> that have occurred during compilation.
> 
> In particular, parameter representations can be spotted by their
> abstract origin references to the original parameter, but they
> often have more accurate location information.  In most cases,
> the parameter locations will match calling conventions, and be
> registers for the first 6 parameters on x86_64, first 8 on ARM64
> etc.  If the parameter is not a register when it should be however,
> it is likely passed via the stack or the compiler has used a
> constant representation instead.
> 
> This change adds a field to parameters and their associated
> ftype to note if a parameter has been optimized out.  Having
> this information allows us to skip such functions, as their
> presence in CUs makes BTF encoding impossible.
> 
> Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
> ---
>  dwarf_loader.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
>  dwarves.h      |  4 +++-
>  2 files changed, 77 insertions(+), 3 deletions(-)
> 
> diff --git a/dwarf_loader.c b/dwarf_loader.c
> index 5a74035..0220f1d 100644
> --- a/dwarf_loader.c
> +++ b/dwarf_loader.c
> @@ -992,13 +992,67 @@ static struct class_member *class_member__new(Dwarf_Die *die, struct cu *cu,
>  	return member;
>  }
>  
> -static struct parameter *parameter__new(Dwarf_Die *die, struct cu *cu, struct conf_load *conf)
> +/* How many function parameters are passed via registers?  Used below in
> + * determining if an argument has been optimized out or if it is simply
> + * an argument > NR_REGISTER_PARAMS.  Setting NR_REGISTER_PARAMS to 0
> + * allows unsupported architectures to skip tagging optimized-out
> + * values.
> + */
> +#if defined(__x86_64__)
> +#define NR_REGISTER_PARAMS      6
> +#elif defined(__s390__)
> +#define NR_REGISTER_PARAMS	5
> +#elif defined(__aarch64__)
> +#define NR_REGISTER_PARAMS      8
> +#elif defined(__mips__)
> +#define NR_REGISTER_PARAMS	8
> +#elif defined(__powerpc__)
> +#define NR_REGISTER_PARAMS	8
> +#elif defined(__sparc__)
> +#define NR_REGISTER_PARAMS	6
> +#elif defined(__riscv) && __riscv_xlen == 64
> +#define NR_REGISTER_PARAMS	8
> +#elif defined(__arc__)
> +#define NR_REGISTER_PARAMS	8
> +#else
> +#define NR_REGISTER_PARAMS      0
> +#endif
> +
> +static struct parameter *parameter__new(Dwarf_Die *die, struct cu *cu,
> +					struct conf_load *conf, int param_idx)
>  {
>  	struct parameter *parm = tag__alloc(cu, sizeof(*parm));
>  
>  	if (parm != NULL) {
> +		struct location loc;
> +
>  		tag__init(&parm->tag, cu, die);
>  		parm->name = attr_string(die, DW_AT_name, conf);
> +
> +		/* Parameters which use DW_AT_abstract_origin to point at
> +		 * the original parameter definition (with no name in the DIE)
> +		 * are the result of later DWARF generation during compilation
> +		 * so often better take into account if arguments were
> +		 * optimized out.
> +		 *
> +		 * By checking that locations for parameters that are expected
> +		 * to be passed as registers are actually passed as registers,
> +		 * we can spot optimized-out parameters.
> +		 */
> +		if (param_idx < NR_REGISTER_PARAMS && !parm->name &&
> +		    attr_location(die, &loc.expr, &loc.exprlen) == 0 &&
> +		    loc.exprlen != 0) {
> +			Dwarf_Op *expr = loc.expr;
> +
> +			switch (expr->atom) {
> +			case DW_OP_reg1 ... DW_OP_reg31:
> +			case DW_OP_breg0 ... DW_OP_breg31:
> +				break;
> +			default:
> +				parm->optimized = true;
> +				break;
> +			}
> +		}

Hi Alan,

I looked through the DWARF standard and found two relevant entries:

> 4.1.4
> 
> If no location attribute is present in a variable entry representing
> the definition of a variable (...), or if the location attribute is
> present but has an empty location description (...), the variable is
> assumed to exist in the source code but not in the executable program
> (but see number 10, below).

This paragraph implies that parameter name presence or absence is
irrelevant, but I don't have any examples when parameter name is
present for a removed parameter.

> 4.1.10
> 
> A DW_AT_const_value attribute for an entry describing a variable or formal
> parameter whose value is constant and not represented by an object in the
> address space of the program, or an entry describing a named constant. (Note
> that such an entry does not have a location attribute.)

For this paragraph I have an example:

    $ cat test.c
    __attribute__((noinline))
    static int f(int x, int y) {
        return x + y;
    }
    
    int main(int argc, char *argv[]) {
        return f(1, 2) + f(1, 3);
    }
    
    $ gcc --version | head -n1
    gcc (Ubuntu 11.3.0-1ubuntu1~22.04) 11.3.0
    $ gcc -O2 -g -c test.c -o test.o
    
The objdump shows that constant propagation removed the first
parameter of the function `f`:

    $ llvm-objdump -d test.o 
    
    test.o:	file format elf64-x86-64
    
    Disassembly of section .text:
    
    0000000000000000 <f.constprop.0>:
           0: 8d 47 01                     	leal	0x1(%rdi), %eax
           3: c3                           	retq
    
    Disassembly of section .text.startup:
    
    0000000000000000 <main>:
           0: f3 0f 1e fa                  	endbr64
           4: bf 02 00 00 00               	movl	$0x2, %edi
           9: e8 00 00 00 00               	callq	0xe <main+0xe>
           e: bf 03 00 00 00               	movl	$0x3, %edi
          13: 89 c2                        	movl	%eax, %edx
          15: e8 00 00 00 00               	callq	0x1a <main+0x1a>
          1a: 01 d0                        	addl	%edx, %eax
          1c: c3                           	retq
    
However, the information about this parameter is still present in the DWARF:

    $ llvm-dwarfdump test.o
    ...
    0x000000c1:   DW_TAG_subprogram
                    DW_AT_name	("f")
                    DW_AT_decl_file	("/home/eddy/work/tmp/test.c")
                    DW_AT_decl_line	(2)
                    DW_AT_decl_column	(0x0c)
                    DW_AT_prototyped	(true)
                    DW_AT_type	(0x000000a9 "int")
                    DW_AT_inline	(DW_INL_inlined)
                    DW_AT_sibling	(0x000000e1)
    
    0x000000d0:     DW_TAG_formal_parameter
                      DW_AT_name	("x")
                      DW_AT_decl_file	("/home/eddy/work/tmp/test.c")
                      DW_AT_decl_line	(2)
                      DW_AT_decl_column	(0x12)
                      DW_AT_type	(0x000000a9 "int")
    
    0x000000d8:     DW_TAG_formal_parameter
                      DW_AT_name	("y")
                      DW_AT_decl_file	("/home/eddy/work/tmp/test.c")
                      DW_AT_decl_line	(2)
                      DW_AT_decl_column	(0x19)
                      DW_AT_type	(0x000000a9 "int")
    
    0x000000e0:     NULL
    
    0x000000e1:   DW_TAG_subprogram
                    DW_AT_abstract_origin	(0x000000c1 "f")
                    DW_AT_low_pc	(0x0000000000000000)
                    DW_AT_high_pc	(0x0000000000000004)
                    DW_AT_frame_base	(DW_OP_call_frame_cfa)
                    DW_AT_call_all_calls	(true)
    
    0x000000f8:     DW_TAG_formal_parameter
                      DW_AT_abstract_origin	(0x000000d8 "y")
                      DW_AT_location	(DW_OP_reg5 RDI)
    
    0x000000ff:     DW_TAG_formal_parameter
                      DW_AT_abstract_origin	(0x000000d0 "x")
                      DW_AT_const_value	(0x01)
    
    0x00000105:     NULL
    
When I ask pahole with this patch-set applied to generate BTF I see
the following output:

    $ pahole --verbose --btf_encode_detached=test.btf test.o
    btf_encoder__new: 'test.o' doesn't have '.data..percpu' section
    Found 0 per-CPU variables!
    Found 2 functions!
    File test.o:
    [1] INT int size=4 nr_bits=32 encoding=SIGNED
    [2] PTR (anon) type_id=3
    [3] PTR (anon) type_id=4
    [4] INT char size=1 nr_bits=8 encoding=SIGNED
    [5] FUNC_PROTO (anon) return=1 args=(1 argc, 2 argv)
    [6] FUNC main type_id=5
    matched function 'f' with 'f.constprop.0'
    added local function 'f'
    matched function 'f' with 'f.constprop.0'
    [7] FUNC_PROTO (anon) return=1 args=(1 x, 1 y)
    [8] FUNC f type_id=7
    
Meaning that function `f` had not been skipped.
A trivial modification overcomes this:

		if (param_idx < NR_REGISTER_PARAMS && !parm->name) {
			if (attr_location(die, &loc.expr, &loc.exprlen) == 0 &&
			    loc.exprlen != 0) {
				Dwarf_Op *expr = loc.expr;

				switch (expr->atom) {
				case DW_OP_reg1 ... DW_OP_reg31:
				case DW_OP_breg0 ... DW_OP_breg31:
					break;
				default:
					parm->optimized = true;
					break;
				}
			} else if (dwarf_attr(die, DW_AT_const_value, &attr) != NULL) {
					parm->optimized = true;
			}

With it pahole seem to work as intended (if I understand the intention correctly):

    $ pahole --verbose --btf_encode_detached=test.btf test.o
    btf_encoder__new: 'test.o' doesn't have '.data..percpu' section
    Found 0 per-CPU variables!
    Found 2 functions!
    File test.o:
    [1] INT int size=4 nr_bits=32 encoding=SIGNED
    [2] PTR (anon) type_id=3
    [3] PTR (anon) type_id=4
    [4] INT char size=1 nr_bits=8 encoding=SIGNED
    [5] FUNC_PROTO (anon) return=1 args=(1 argc, 2 argv)
    [6] FUNC main type_id=5
    matched function 'f' with 'f.constprop.0', has optimized-out parameters
    added local function 'f', optimized-out params
    matched function 'f' with 'f.constprop.0', has optimized-out parameters
    skipping addition of 'f' due to optimized-out parameters

wdyt?

Thanks,
Eduard

>  
>  	return parm;
> @@ -1450,7 +1504,7 @@ static struct tag *die__create_new_parameter(Dwarf_Die *die,
>  					     struct cu *cu, struct conf_load *conf,
>  					     int param_idx)
>  {
> -	struct parameter *parm = parameter__new(die, cu, conf);
> +	struct parameter *parm = parameter__new(die, cu, conf, param_idx);
>  
>  	if (parm == NULL)
>  		return NULL;
> @@ -2209,6 +2263,10 @@ static void ftype__recode_dwarf_types(struct tag *tag, struct cu *cu)
>  			}
>  			pos->name = tag__parameter(dtype->tag)->name;
>  			pos->tag.type = dtype->tag->type;
> +			if (pos->optimized) {
> +				tag__parameter(dtype->tag)->optimized = pos->optimized;
> +				type->optimized_parms = 1;
> +			}
>  			continue;
>  		}
>  
> @@ -2219,6 +2277,20 @@ static void ftype__recode_dwarf_types(struct tag *tag, struct cu *cu)
>  		}
>  		pos->tag.type = dtype->small_id;
>  	}
> +	/* if parameters were optimized out, set flag for the ftype this
> +	 * function tag referred to via abstract origin.
> +	 */
> +	if (type->optimized_parms) {
> +		struct dwarf_tag *dtype = type->tag.priv;
> +		struct dwarf_tag *dftype;
> +
> +		dftype = dwarf_cu__find_tag_by_ref(dcu, &dtype->abstract_origin);
> +		if (dftype && dftype->tag) {
> +			struct ftype *ftype = tag__ftype(dftype->tag);
> +
> +			ftype->optimized_parms = 1;
> +		}
> +	}
>  }
>  
>  static void lexblock__recode_dwarf_types(struct lexblock *tag, struct cu *cu)
> diff --git a/dwarves.h b/dwarves.h
> index 589588e..1ad1b3b 100644
> --- a/dwarves.h
> +++ b/dwarves.h
> @@ -808,6 +808,7 @@ size_t lexblock__fprintf(const struct lexblock *lexblock, const struct cu *cu,
>  struct parameter {
>  	struct tag tag;
>  	const char *name;
> +	bool optimized;
>  };
>  
>  static inline struct parameter *tag__parameter(const struct tag *tag)
> @@ -827,7 +828,8 @@ struct ftype {
>  	struct tag	 tag;
>  	struct list_head parms;
>  	uint16_t	 nr_parms;
> -	uint8_t		 unspec_parms; /* just one bit is needed */
> +	uint8_t		 unspec_parms:1; /* just one bit is needed */
> +	uint8_t		 optimized_parms:1;
>  };
>  
>  static inline struct ftype *tag__ftype(const struct tag *tag)
Alan Maguire Jan. 25, 2023, 6:28 p.m. UTC | #3
On 25/01/2023 17:47, Eduard Zingerman wrote:
> On Tue, 2023-01-24 at 13:45 +0000, Alan Maguire wrote:
>> Compilation generates DWARF at several stages, and often the
>> later DWARF representations more accurately represent optimizations
>> that have occurred during compilation.
>>
>> In particular, parameter representations can be spotted by their
>> abstract origin references to the original parameter, but they
>> often have more accurate location information.  In most cases,
>> the parameter locations will match calling conventions, and be
>> registers for the first 6 parameters on x86_64, first 8 on ARM64
>> etc.  If the parameter is not a register when it should be however,
>> it is likely passed via the stack or the compiler has used a
>> constant representation instead.
>>
>> This change adds a field to parameters and their associated
>> ftype to note if a parameter has been optimized out.  Having
>> this information allows us to skip such functions, as their
>> presence in CUs makes BTF encoding impossible.
>>
>> Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
>> ---
>>  dwarf_loader.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
>>  dwarves.h      |  4 +++-
>>  2 files changed, 77 insertions(+), 3 deletions(-)
>>
>> diff --git a/dwarf_loader.c b/dwarf_loader.c
>> index 5a74035..0220f1d 100644
>> --- a/dwarf_loader.c
>> +++ b/dwarf_loader.c
>> @@ -992,13 +992,67 @@ static struct class_member *class_member__new(Dwarf_Die *die, struct cu *cu,
>>  	return member;
>>  }
>>  
>> -static struct parameter *parameter__new(Dwarf_Die *die, struct cu *cu, struct conf_load *conf)
>> +/* How many function parameters are passed via registers?  Used below in
>> + * determining if an argument has been optimized out or if it is simply
>> + * an argument > NR_REGISTER_PARAMS.  Setting NR_REGISTER_PARAMS to 0
>> + * allows unsupported architectures to skip tagging optimized-out
>> + * values.
>> + */
>> +#if defined(__x86_64__)
>> +#define NR_REGISTER_PARAMS      6
>> +#elif defined(__s390__)
>> +#define NR_REGISTER_PARAMS	5
>> +#elif defined(__aarch64__)
>> +#define NR_REGISTER_PARAMS      8
>> +#elif defined(__mips__)
>> +#define NR_REGISTER_PARAMS	8
>> +#elif defined(__powerpc__)
>> +#define NR_REGISTER_PARAMS	8
>> +#elif defined(__sparc__)
>> +#define NR_REGISTER_PARAMS	6
>> +#elif defined(__riscv) && __riscv_xlen == 64
>> +#define NR_REGISTER_PARAMS	8
>> +#elif defined(__arc__)
>> +#define NR_REGISTER_PARAMS	8
>> +#else
>> +#define NR_REGISTER_PARAMS      0
>> +#endif
>> +
>> +static struct parameter *parameter__new(Dwarf_Die *die, struct cu *cu,
>> +					struct conf_load *conf, int param_idx)
>>  {
>>  	struct parameter *parm = tag__alloc(cu, sizeof(*parm));
>>  
>>  	if (parm != NULL) {
>> +		struct location loc;
>> +
>>  		tag__init(&parm->tag, cu, die);
>>  		parm->name = attr_string(die, DW_AT_name, conf);
>> +
>> +		/* Parameters which use DW_AT_abstract_origin to point at
>> +		 * the original parameter definition (with no name in the DIE)
>> +		 * are the result of later DWARF generation during compilation
>> +		 * so often better take into account if arguments were
>> +		 * optimized out.
>> +		 *
>> +		 * By checking that locations for parameters that are expected
>> +		 * to be passed as registers are actually passed as registers,
>> +		 * we can spot optimized-out parameters.
>> +		 */
>> +		if (param_idx < NR_REGISTER_PARAMS && !parm->name &&
>> +		    attr_location(die, &loc.expr, &loc.exprlen) == 0 &&
>> +		    loc.exprlen != 0) {
>> +			Dwarf_Op *expr = loc.expr;
>> +
>> +			switch (expr->atom) {
>> +			case DW_OP_reg1 ... DW_OP_reg31:
>> +			case DW_OP_breg0 ... DW_OP_breg31:
>> +				break;
>> +			default:
>> +				parm->optimized = true;
>> +				break;
>> +			}
>> +		}
> 
> Hi Alan,
> 
> I looked through the DWARF standard and found two relevant entries:
> 
>> 4.1.4
>>
>> If no location attribute is present in a variable entry representing
>> the definition of a variable (...), or if the location attribute is
>> present but has an empty location description (...), the variable is
>> assumed to exist in the source code but not in the executable program
>> (but see number 10, below).
> 
> This paragraph implies that parameter name presence or absence is
> irrelevant, but I don't have any examples when parameter name is
> present for a removed parameter.
> 
>> 4.1.10
>>
>> A DW_AT_const_value attribute for an entry describing a variable or formal
>> parameter whose value is constant and not represented by an object in the
>> address space of the program, or an entry describing a named constant. (Note
>> that such an entry does not have a location attribute.)
> 
> For this paragraph I have an example:
> 
>     $ cat test.c
>     __attribute__((noinline))
>     static int f(int x, int y) {
>         return x + y;
>     }
>     
>     int main(int argc, char *argv[]) {
>         return f(1, 2) + f(1, 3);
>     }
>     
>     $ gcc --version | head -n1
>     gcc (Ubuntu 11.3.0-1ubuntu1~22.04) 11.3.0
>     $ gcc -O2 -g -c test.c -o test.o
>     
> The objdump shows that constant propagation removed the first
> parameter of the function `f`:
> 
>     $ llvm-objdump -d test.o 
>     
>     test.o:	file format elf64-x86-64
>     
>     Disassembly of section .text:
>     
>     0000000000000000 <f.constprop.0>:
>            0: 8d 47 01                     	leal	0x1(%rdi), %eax
>            3: c3                           	retq
>     
>     Disassembly of section .text.startup:
>     
>     0000000000000000 <main>:
>            0: f3 0f 1e fa                  	endbr64
>            4: bf 02 00 00 00               	movl	$0x2, %edi
>            9: e8 00 00 00 00               	callq	0xe <main+0xe>
>            e: bf 03 00 00 00               	movl	$0x3, %edi
>           13: 89 c2                        	movl	%eax, %edx
>           15: e8 00 00 00 00               	callq	0x1a <main+0x1a>
>           1a: 01 d0                        	addl	%edx, %eax
>           1c: c3                           	retq
>     
> However, the information about this parameter is still present in the DWARF:
> 
>     $ llvm-dwarfdump test.o
>     ...
>     0x000000c1:   DW_TAG_subprogram
>                     DW_AT_name	("f")
>                     DW_AT_decl_file	("/home/eddy/work/tmp/test.c")
>                     DW_AT_decl_line	(2)
>                     DW_AT_decl_column	(0x0c)
>                     DW_AT_prototyped	(true)
>                     DW_AT_type	(0x000000a9 "int")
>                     DW_AT_inline	(DW_INL_inlined)
>                     DW_AT_sibling	(0x000000e1)
>     
>     0x000000d0:     DW_TAG_formal_parameter
>                       DW_AT_name	("x")
>                       DW_AT_decl_file	("/home/eddy/work/tmp/test.c")
>                       DW_AT_decl_line	(2)
>                       DW_AT_decl_column	(0x12)
>                       DW_AT_type	(0x000000a9 "int")
>     
>     0x000000d8:     DW_TAG_formal_parameter
>                       DW_AT_name	("y")
>                       DW_AT_decl_file	("/home/eddy/work/tmp/test.c")
>                       DW_AT_decl_line	(2)
>                       DW_AT_decl_column	(0x19)
>                       DW_AT_type	(0x000000a9 "int")
>     
>     0x000000e0:     NULL
>     
>     0x000000e1:   DW_TAG_subprogram
>                     DW_AT_abstract_origin	(0x000000c1 "f")
>                     DW_AT_low_pc	(0x0000000000000000)
>                     DW_AT_high_pc	(0x0000000000000004)
>                     DW_AT_frame_base	(DW_OP_call_frame_cfa)
>                     DW_AT_call_all_calls	(true)
>     
>     0x000000f8:     DW_TAG_formal_parameter
>                       DW_AT_abstract_origin	(0x000000d8 "y")
>                       DW_AT_location	(DW_OP_reg5 RDI)
>     
>     0x000000ff:     DW_TAG_formal_parameter
>                       DW_AT_abstract_origin	(0x000000d0 "x")
>                       DW_AT_const_value	(0x01)
>     
>     0x00000105:     NULL
>     
> When I ask pahole with this patch-set applied to generate BTF I see
> the following output:
> 
>     $ pahole --verbose --btf_encode_detached=test.btf test.o
>     btf_encoder__new: 'test.o' doesn't have '.data..percpu' section
>     Found 0 per-CPU variables!
>     Found 2 functions!
>     File test.o:
>     [1] INT int size=4 nr_bits=32 encoding=SIGNED
>     [2] PTR (anon) type_id=3
>     [3] PTR (anon) type_id=4
>     [4] INT char size=1 nr_bits=8 encoding=SIGNED
>     [5] FUNC_PROTO (anon) return=1 args=(1 argc, 2 argv)
>     [6] FUNC main type_id=5
>     matched function 'f' with 'f.constprop.0'
>     added local function 'f'
>     matched function 'f' with 'f.constprop.0'
>     [7] FUNC_PROTO (anon) return=1 args=(1 x, 1 y)
>     [8] FUNC f type_id=7
>     
> Meaning that function `f` had not been skipped.
> A trivial modification overcomes this:
> 
> 		if (param_idx < NR_REGISTER_PARAMS && !parm->name) {
> 			if (attr_location(die, &loc.expr, &loc.exprlen) == 0 &&
> 			    loc.exprlen != 0) {
> 				Dwarf_Op *expr = loc.expr;
> 
> 				switch (expr->atom) {
> 				case DW_OP_reg1 ... DW_OP_reg31:
> 				case DW_OP_breg0 ... DW_OP_breg31:
> 					break;
> 				default:
> 					parm->optimized = true;
> 					break;
> 				}
> 			} else if (dwarf_attr(die, DW_AT_const_value, &attr) != NULL) {
> 					parm->optimized = true;
> 			}
> 
> With it pahole seem to work as intended (if I understand the intention correctly):
> 
>     $ pahole --verbose --btf_encode_detached=test.btf test.o
>     btf_encoder__new: 'test.o' doesn't have '.data..percpu' section
>     Found 0 per-CPU variables!
>     Found 2 functions!
>     File test.o:
>     [1] INT int size=4 nr_bits=32 encoding=SIGNED
>     [2] PTR (anon) type_id=3
>     [3] PTR (anon) type_id=4
>     [4] INT char size=1 nr_bits=8 encoding=SIGNED
>     [5] FUNC_PROTO (anon) return=1 args=(1 argc, 2 argv)
>     [6] FUNC main type_id=5
>     matched function 'f' with 'f.constprop.0', has optimized-out parameters
>     added local function 'f', optimized-out params
>     matched function 'f' with 'f.constprop.0', has optimized-out parameters
>     skipping addition of 'f' due to optimized-out parameters
> 
> wdyt?
> 

This is great, thanks Eduard! I can add an additional patch
for the else clause code above, attributing that to you in v2 if
you like?

Alan

> Thanks,
> Eduard
> 
>>  
>>  	return parm;
>> @@ -1450,7 +1504,7 @@ static struct tag *die__create_new_parameter(Dwarf_Die *die,
>>  					     struct cu *cu, struct conf_load *conf,
>>  					     int param_idx)
>>  {
>> -	struct parameter *parm = parameter__new(die, cu, conf);
>> +	struct parameter *parm = parameter__new(die, cu, conf, param_idx);
>>  
>>  	if (parm == NULL)
>>  		return NULL;
>> @@ -2209,6 +2263,10 @@ static void ftype__recode_dwarf_types(struct tag *tag, struct cu *cu)
>>  			}
>>  			pos->name = tag__parameter(dtype->tag)->name;
>>  			pos->tag.type = dtype->tag->type;
>> +			if (pos->optimized) {
>> +				tag__parameter(dtype->tag)->optimized = pos->optimized;
>> +				type->optimized_parms = 1;
>> +			}
>>  			continue;
>>  		}
>>  
>> @@ -2219,6 +2277,20 @@ static void ftype__recode_dwarf_types(struct tag *tag, struct cu *cu)
>>  		}
>>  		pos->tag.type = dtype->small_id;
>>  	}
>> +	/* if parameters were optimized out, set flag for the ftype this
>> +	 * function tag referred to via abstract origin.
>> +	 */
>> +	if (type->optimized_parms) {
>> +		struct dwarf_tag *dtype = type->tag.priv;
>> +		struct dwarf_tag *dftype;
>> +
>> +		dftype = dwarf_cu__find_tag_by_ref(dcu, &dtype->abstract_origin);
>> +		if (dftype && dftype->tag) {
>> +			struct ftype *ftype = tag__ftype(dftype->tag);
>> +
>> +			ftype->optimized_parms = 1;
>> +		}
>> +	}
>>  }
>>  
>>  static void lexblock__recode_dwarf_types(struct lexblock *tag, struct cu *cu)
>> diff --git a/dwarves.h b/dwarves.h
>> index 589588e..1ad1b3b 100644
>> --- a/dwarves.h
>> +++ b/dwarves.h
>> @@ -808,6 +808,7 @@ size_t lexblock__fprintf(const struct lexblock *lexblock, const struct cu *cu,
>>  struct parameter {
>>  	struct tag tag;
>>  	const char *name;
>> +	bool optimized;
>>  };
>>  
>>  static inline struct parameter *tag__parameter(const struct tag *tag)
>> @@ -827,7 +828,8 @@ struct ftype {
>>  	struct tag	 tag;
>>  	struct list_head parms;
>>  	uint16_t	 nr_parms;
>> -	uint8_t		 unspec_parms; /* just one bit is needed */
>> +	uint8_t		 unspec_parms:1; /* just one bit is needed */
>> +	uint8_t		 optimized_parms:1;
>>  };
>>  
>>  static inline struct ftype *tag__ftype(const struct tag *tag)
>
Eduard Zingerman Jan. 25, 2023, 9:34 p.m. UTC | #4
On Wed, 2023-01-25 at 18:28 +0000, Alan Maguire wrote:
> On 25/01/2023 17:47, Eduard Zingerman wrote:
> > On Tue, 2023-01-24 at 13:45 +0000, Alan Maguire wrote:
> > > Compilation generates DWARF at several stages, and often the
> > > later DWARF representations more accurately represent optimizations
> > > that have occurred during compilation.
> > > 
> > > In particular, parameter representations can be spotted by their
> > > abstract origin references to the original parameter, but they
> > > often have more accurate location information.  In most cases,
> > > the parameter locations will match calling conventions, and be
> > > registers for the first 6 parameters on x86_64, first 8 on ARM64
> > > etc.  If the parameter is not a register when it should be however,
> > > it is likely passed via the stack or the compiler has used a
> > > constant representation instead.
> > > 
> > > This change adds a field to parameters and their associated
> > > ftype to note if a parameter has been optimized out.  Having
> > > this information allows us to skip such functions, as their
> > > presence in CUs makes BTF encoding impossible.
> > > 
> > > Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
> > > ---
> > >  dwarf_loader.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
> > >  dwarves.h      |  4 +++-
> > >  2 files changed, 77 insertions(+), 3 deletions(-)
> > > 
> > > diff --git a/dwarf_loader.c b/dwarf_loader.c
> > > index 5a74035..0220f1d 100644
> > > --- a/dwarf_loader.c
> > > +++ b/dwarf_loader.c
> > > @@ -992,13 +992,67 @@ static struct class_member *class_member__new(Dwarf_Die *die, struct cu *cu,
> > >  	return member;
> > >  }
> > >  
> > > -static struct parameter *parameter__new(Dwarf_Die *die, struct cu *cu, struct conf_load *conf)
> > > +/* How many function parameters are passed via registers?  Used below in
> > > + * determining if an argument has been optimized out or if it is simply
> > > + * an argument > NR_REGISTER_PARAMS.  Setting NR_REGISTER_PARAMS to 0
> > > + * allows unsupported architectures to skip tagging optimized-out
> > > + * values.
> > > + */
> > > +#if defined(__x86_64__)
> > > +#define NR_REGISTER_PARAMS      6
> > > +#elif defined(__s390__)
> > > +#define NR_REGISTER_PARAMS	5
> > > +#elif defined(__aarch64__)
> > > +#define NR_REGISTER_PARAMS      8
> > > +#elif defined(__mips__)
> > > +#define NR_REGISTER_PARAMS	8
> > > +#elif defined(__powerpc__)
> > > +#define NR_REGISTER_PARAMS	8
> > > +#elif defined(__sparc__)
> > > +#define NR_REGISTER_PARAMS	6
> > > +#elif defined(__riscv) && __riscv_xlen == 64
> > > +#define NR_REGISTER_PARAMS	8
> > > +#elif defined(__arc__)
> > > +#define NR_REGISTER_PARAMS	8
> > > +#else
> > > +#define NR_REGISTER_PARAMS      0
> > > +#endif
> > > +
> > > +static struct parameter *parameter__new(Dwarf_Die *die, struct cu *cu,
> > > +					struct conf_load *conf, int param_idx)
> > >  {
> > >  	struct parameter *parm = tag__alloc(cu, sizeof(*parm));
> > >  
> > >  	if (parm != NULL) {
> > > +		struct location loc;
> > > +
> > >  		tag__init(&parm->tag, cu, die);
> > >  		parm->name = attr_string(die, DW_AT_name, conf);
> > > +
> > > +		/* Parameters which use DW_AT_abstract_origin to point at
> > > +		 * the original parameter definition (with no name in the DIE)
> > > +		 * are the result of later DWARF generation during compilation
> > > +		 * so often better take into account if arguments were
> > > +		 * optimized out.
> > > +		 *
> > > +		 * By checking that locations for parameters that are expected
> > > +		 * to be passed as registers are actually passed as registers,
> > > +		 * we can spot optimized-out parameters.
> > > +		 */
> > > +		if (param_idx < NR_REGISTER_PARAMS && !parm->name &&
> > > +		    attr_location(die, &loc.expr, &loc.exprlen) == 0 &&
> > > +		    loc.exprlen != 0) {
> > > +			Dwarf_Op *expr = loc.expr;
> > > +
> > > +			switch (expr->atom) {
> > > +			case DW_OP_reg1 ... DW_OP_reg31:
> > > +			case DW_OP_breg0 ... DW_OP_breg31:
> > > +				break;
> > > +			default:
> > > +				parm->optimized = true;
> > > +				break;
> > > +			}
> > > +		}
> > 
> > Hi Alan,
> > 
> > I looked through the DWARF standard and found two relevant entries:
> > 
> > > 4.1.4
> > > 
> > > If no location attribute is present in a variable entry representing
> > > the definition of a variable (...), or if the location attribute is
> > > present but has an empty location description (...), the variable is
> > > assumed to exist in the source code but not in the executable program
> > > (but see number 10, below).
> > 
> > This paragraph implies that parameter name presence or absence is
> > irrelevant, but I don't have any examples when parameter name is
> > present for a removed parameter.
> > 
> > > 4.1.10
> > > 
> > > A DW_AT_const_value attribute for an entry describing a variable or formal
> > > parameter whose value is constant and not represented by an object in the
> > > address space of the program, or an entry describing a named constant. (Note
> > > that such an entry does not have a location attribute.)
> > 
> > For this paragraph I have an example:
> > 
> >     $ cat test.c
> >     __attribute__((noinline))
> >     static int f(int x, int y) {
> >         return x + y;
> >     }
> >     
> >     int main(int argc, char *argv[]) {
> >         return f(1, 2) + f(1, 3);
> >     }
> >     
> >     $ gcc --version | head -n1
> >     gcc (Ubuntu 11.3.0-1ubuntu1~22.04) 11.3.0
> >     $ gcc -O2 -g -c test.c -o test.o
> >     
> > The objdump shows that constant propagation removed the first
> > parameter of the function `f`:
> > 
> >     $ llvm-objdump -d test.o 
> >     
> >     test.o:	file format elf64-x86-64
> >     
> >     Disassembly of section .text:
> >     
> >     0000000000000000 <f.constprop.0>:
> >            0: 8d 47 01                     	leal	0x1(%rdi), %eax
> >            3: c3                           	retq
> >     
> >     Disassembly of section .text.startup:
> >     
> >     0000000000000000 <main>:
> >            0: f3 0f 1e fa                  	endbr64
> >            4: bf 02 00 00 00               	movl	$0x2, %edi
> >            9: e8 00 00 00 00               	callq	0xe <main+0xe>
> >            e: bf 03 00 00 00               	movl	$0x3, %edi
> >           13: 89 c2                        	movl	%eax, %edx
> >           15: e8 00 00 00 00               	callq	0x1a <main+0x1a>
> >           1a: 01 d0                        	addl	%edx, %eax
> >           1c: c3                           	retq
> >     
> > However, the information about this parameter is still present in the DWARF:
> > 
> >     $ llvm-dwarfdump test.o
> >     ...
> >     0x000000c1:   DW_TAG_subprogram
> >                     DW_AT_name	("f")
> >                     DW_AT_decl_file	("/home/eddy/work/tmp/test.c")
> >                     DW_AT_decl_line	(2)
> >                     DW_AT_decl_column	(0x0c)
> >                     DW_AT_prototyped	(true)
> >                     DW_AT_type	(0x000000a9 "int")
> >                     DW_AT_inline	(DW_INL_inlined)
> >                     DW_AT_sibling	(0x000000e1)
> >     
> >     0x000000d0:     DW_TAG_formal_parameter
> >                       DW_AT_name	("x")
> >                       DW_AT_decl_file	("/home/eddy/work/tmp/test.c")
> >                       DW_AT_decl_line	(2)
> >                       DW_AT_decl_column	(0x12)
> >                       DW_AT_type	(0x000000a9 "int")
> >     
> >     0x000000d8:     DW_TAG_formal_parameter
> >                       DW_AT_name	("y")
> >                       DW_AT_decl_file	("/home/eddy/work/tmp/test.c")
> >                       DW_AT_decl_line	(2)
> >                       DW_AT_decl_column	(0x19)
> >                       DW_AT_type	(0x000000a9 "int")
> >     
> >     0x000000e0:     NULL
> >     
> >     0x000000e1:   DW_TAG_subprogram
> >                     DW_AT_abstract_origin	(0x000000c1 "f")
> >                     DW_AT_low_pc	(0x0000000000000000)
> >                     DW_AT_high_pc	(0x0000000000000004)
> >                     DW_AT_frame_base	(DW_OP_call_frame_cfa)
> >                     DW_AT_call_all_calls	(true)
> >     
> >     0x000000f8:     DW_TAG_formal_parameter
> >                       DW_AT_abstract_origin	(0x000000d8 "y")
> >                       DW_AT_location	(DW_OP_reg5 RDI)
> >     
> >     0x000000ff:     DW_TAG_formal_parameter
> >                       DW_AT_abstract_origin	(0x000000d0 "x")
> >                       DW_AT_const_value	(0x01)
> >     
> >     0x00000105:     NULL
> >     
> > When I ask pahole with this patch-set applied to generate BTF I see
> > the following output:
> > 
> >     $ pahole --verbose --btf_encode_detached=test.btf test.o
> >     btf_encoder__new: 'test.o' doesn't have '.data..percpu' section
> >     Found 0 per-CPU variables!
> >     Found 2 functions!
> >     File test.o:
> >     [1] INT int size=4 nr_bits=32 encoding=SIGNED
> >     [2] PTR (anon) type_id=3
> >     [3] PTR (anon) type_id=4
> >     [4] INT char size=1 nr_bits=8 encoding=SIGNED
> >     [5] FUNC_PROTO (anon) return=1 args=(1 argc, 2 argv)
> >     [6] FUNC main type_id=5
> >     matched function 'f' with 'f.constprop.0'
> >     added local function 'f'
> >     matched function 'f' with 'f.constprop.0'
> >     [7] FUNC_PROTO (anon) return=1 args=(1 x, 1 y)
> >     [8] FUNC f type_id=7
> >     
> > Meaning that function `f` had not been skipped.
> > A trivial modification overcomes this:
> > 
> > 		if (param_idx < NR_REGISTER_PARAMS && !parm->name) {
> > 			if (attr_location(die, &loc.expr, &loc.exprlen) == 0 &&
> > 			    loc.exprlen != 0) {
> > 				Dwarf_Op *expr = loc.expr;
> > 
> > 				switch (expr->atom) {
> > 				case DW_OP_reg1 ... DW_OP_reg31:
> > 				case DW_OP_breg0 ... DW_OP_breg31:
> > 					break;
> > 				default:
> > 					parm->optimized = true;
> > 					break;
> > 				}
> > 			} else if (dwarf_attr(die, DW_AT_const_value, &attr) != NULL) {
> > 					parm->optimized = true;
> > 			}
> > 
> > With it pahole seem to work as intended (if I understand the intention correctly):
> > 
> >     $ pahole --verbose --btf_encode_detached=test.btf test.o
> >     btf_encoder__new: 'test.o' doesn't have '.data..percpu' section
> >     Found 0 per-CPU variables!
> >     Found 2 functions!
> >     File test.o:
> >     [1] INT int size=4 nr_bits=32 encoding=SIGNED
> >     [2] PTR (anon) type_id=3
> >     [3] PTR (anon) type_id=4
> >     [4] INT char size=1 nr_bits=8 encoding=SIGNED
> >     [5] FUNC_PROTO (anon) return=1 args=(1 argc, 2 argv)
> >     [6] FUNC main type_id=5
> >     matched function 'f' with 'f.constprop.0', has optimized-out parameters
> >     added local function 'f', optimized-out params
> >     matched function 'f' with 'f.constprop.0', has optimized-out parameters
> >     skipping addition of 'f' due to optimized-out parameters
> > 
> > wdyt?
> > 
> 
> This is great, thanks Eduard! I can add an additional patch
> for the else clause code above, attributing that to you in v2 if
> you like?
> 
> Alan
> 

More on this topic. I tried the same example but with clang,
DWARF generated by clang differs significantly.

    $ cat test.c
    __attribute__((noinline))
    static int f(int x, int y) {
        return x + y;
    }
    
    int main(int argc, char *argv[]) {
        return f(1, 2) + f(1, 3);
    }
    
    $ clang --version | head -n1
    clang version 16.0.0 (https://github.com/llvm/llvm-project.git 50d4a1f70e111cd41b1a94d95fd06b5691aa2643)
    
    $ clang -O2 -g -c test.c -o test.o

llvm-objdump shows that the first parameter is still optimized out:

    $ llvm-objdump -d test.o 
    
    test.o:	file format elf64-x86-64
    
    Disassembly of section .text:
    
    0000000000000000 <main>:
           0: 53                           	pushq	%rbx
           1: bf 02 00 00 00               	movl	$0x2, %edi
           6: e8 15 00 00 00               	callq	0x20 <f>
           b: 89 c3                        	movl	%eax, %ebx
           d: bf 03 00 00 00               	movl	$0x3, %edi
          12: e8 09 00 00 00               	callq	0x20 <f>
          17: 01 d8                        	addl	%ebx, %eax
          19: 5b                           	popq	%rbx
          1a: c3                           	retq
          1b: 0f 1f 44 00 00               	nopl	(%rax,%rax)
    
    0000000000000020 <f>:
          20: 8d 47 01                     	leal	0x1(%rdi), %eax
          23: c3                           	retq

And here is the DWARF, note that formal parameter has both
`DW_AT_name` and `DW_AT_const_value` attributes:

    $ llvm-dwarfdump test.o
    ...
    0x00000061:   DW_TAG_subprogram
                    DW_AT_low_pc	(0x0000000000000020)
                    DW_AT_high_pc	(0x0000000000000024)
                    DW_AT_frame_base	(DW_OP_reg7 RSP)
                    DW_AT_call_all_calls	(true)
                    DW_AT_name	("f")
                    DW_AT_decl_file	("/home/eddy/work/tmp/test.c")
                    DW_AT_decl_line	(2)
                    DW_AT_prototyped	(true)
                    DW_AT_calling_convention	(DW_CC_nocall)
                    DW_AT_type	(0x00000085 "int")
    
    0x00000071:     DW_TAG_formal_parameter
                      DW_AT_const_value	(1)
                      DW_AT_name	("x")
                      DW_AT_decl_file	("/home/eddy/work/tmp/test.c")
                      DW_AT_decl_line	(2)
                      DW_AT_type	(0x00000085 "int")
    
    0x0000007a:     DW_TAG_formal_parameter
                      DW_AT_location	(DW_OP_reg5 RDI)
                      DW_AT_name	("y")
                      DW_AT_decl_file	("/home/eddy/work/tmp/test.c")
                      DW_AT_decl_line	(2)
                      DW_AT_type	(0x00000085 "int")
    
    0x00000084:     NULL
    ...

Given this DWARF layout pahole does not recognize `x` as optimized out:

    $ pahole --verbose --btf_encode_detached=test.btf test.o
    btf_encoder__new: 'test.o' doesn't have '.data..percpu' section
    Found 0 per-CPU variables!
    Found 2 functions!
    File test.o:
    [1] INT int size=4 nr_bits=32 encoding=SIGNED
    [2] PTR (anon) type_id=3
    [3] PTR (anon) type_id=4
    [4] INT char size=1 nr_bits=8 encoding=SIGNED
    [5] FUNC_PROTO (anon) return=1 args=(1 argc, 2 argv)
    [6] FUNC main type_id=5
    [7] FUNC_PROTO (anon) return=1 args=(1 x, 1 y)
    [8] FUNC f type_id=7

The way I read paragraph 4.1.4 mentioned before the tag `DW_AT_name`
should not be used to identify whether parameter is optimized out.
Unfortunately trivial modification of the condition in the
`parameter__new()` to remove the `!parm->name` check is not
sufficient. For some reason parameters `x` and `y` are not visited in
`ftype__recode_dwarf_types()` and thus `optimized_parms` field is not set.

Thanks,
Eduard



> > Thanks,
> > Eduard
> > 
> > >  
> > >  	return parm;
> > > @@ -1450,7 +1504,7 @@ static struct tag *die__create_new_parameter(Dwarf_Die *die,
> > >  					     struct cu *cu, struct conf_load *conf,
> > >  					     int param_idx)
> > >  {
> > > -	struct parameter *parm = parameter__new(die, cu, conf);
> > > +	struct parameter *parm = parameter__new(die, cu, conf, param_idx);
> > >  
> > >  	if (parm == NULL)
> > >  		return NULL;
> > > @@ -2209,6 +2263,10 @@ static void ftype__recode_dwarf_types(struct tag *tag, struct cu *cu)
> > >  			}
> > >  			pos->name = tag__parameter(dtype->tag)->name;
> > >  			pos->tag.type = dtype->tag->type;
> > > +			if (pos->optimized) {
> > > +				tag__parameter(dtype->tag)->optimized = pos->optimized;
> > > +				type->optimized_parms = 1;
> > > +			}
> > >  			continue;
> > >  		}
> > >  
> > > @@ -2219,6 +2277,20 @@ static void ftype__recode_dwarf_types(struct tag *tag, struct cu *cu)
> > >  		}
> > >  		pos->tag.type = dtype->small_id;
> > >  	}
> > > +	/* if parameters were optimized out, set flag for the ftype this
> > > +	 * function tag referred to via abstract origin.
> > > +	 */
> > > +	if (type->optimized_parms) {
> > > +		struct dwarf_tag *dtype = type->tag.priv;
> > > +		struct dwarf_tag *dftype;
> > > +
> > > +		dftype = dwarf_cu__find_tag_by_ref(dcu, &dtype->abstract_origin);
> > > +		if (dftype && dftype->tag) {
> > > +			struct ftype *ftype = tag__ftype(dftype->tag);
> > > +
> > > +			ftype->optimized_parms = 1;
> > > +		}
> > > +	}
> > >  }
> > >  
> > >  static void lexblock__recode_dwarf_types(struct lexblock *tag, struct cu *cu)
> > > diff --git a/dwarves.h b/dwarves.h
> > > index 589588e..1ad1b3b 100644
> > > --- a/dwarves.h
> > > +++ b/dwarves.h
> > > @@ -808,6 +808,7 @@ size_t lexblock__fprintf(const struct lexblock *lexblock, const struct cu *cu,
> > >  struct parameter {
> > >  	struct tag tag;
> > >  	const char *name;
> > > +	bool optimized;
> > >  };
> > >  
> > >  static inline struct parameter *tag__parameter(const struct tag *tag)
> > > @@ -827,7 +828,8 @@ struct ftype {
> > >  	struct tag	 tag;
> > >  	struct list_head parms;
> > >  	uint16_t	 nr_parms;
> > > -	uint8_t		 unspec_parms; /* just one bit is needed */
> > > +	uint8_t		 unspec_parms:1; /* just one bit is needed */
> > > +	uint8_t		 optimized_parms:1;
> > >  };
> > >  
> > >  static inline struct ftype *tag__ftype(const struct tag *tag)
> >
Alan Maguire Jan. 25, 2023, 10:52 p.m. UTC | #5
On 25/01/2023 21:34, Eduard Zingerman wrote:
> On Wed, 2023-01-25 at 18:28 +0000, Alan Maguire wrote:
>> On 25/01/2023 17:47, Eduard Zingerman wrote:
>>> On Tue, 2023-01-24 at 13:45 +0000, Alan Maguire wrote:
>>>> Compilation generates DWARF at several stages, and often the
>>>> later DWARF representations more accurately represent optimizations
>>>> that have occurred during compilation.
>>>>
>>>> In particular, parameter representations can be spotted by their
>>>> abstract origin references to the original parameter, but they
>>>> often have more accurate location information.  In most cases,
>>>> the parameter locations will match calling conventions, and be
>>>> registers for the first 6 parameters on x86_64, first 8 on ARM64
>>>> etc.  If the parameter is not a register when it should be however,
>>>> it is likely passed via the stack or the compiler has used a
>>>> constant representation instead.
>>>>
>>>> This change adds a field to parameters and their associated
>>>> ftype to note if a parameter has been optimized out.  Having
>>>> this information allows us to skip such functions, as their
>>>> presence in CUs makes BTF encoding impossible.
>>>>
>>>> Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
>>>> ---
>>>>  dwarf_loader.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
>>>>  dwarves.h      |  4 +++-
>>>>  2 files changed, 77 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/dwarf_loader.c b/dwarf_loader.c
>>>> index 5a74035..0220f1d 100644
>>>> --- a/dwarf_loader.c
>>>> +++ b/dwarf_loader.c
>>>> @@ -992,13 +992,67 @@ static struct class_member *class_member__new(Dwarf_Die *die, struct cu *cu,
>>>>  	return member;
>>>>  }
>>>>  
>>>> -static struct parameter *parameter__new(Dwarf_Die *die, struct cu *cu, struct conf_load *conf)
>>>> +/* How many function parameters are passed via registers?  Used below in
>>>> + * determining if an argument has been optimized out or if it is simply
>>>> + * an argument > NR_REGISTER_PARAMS.  Setting NR_REGISTER_PARAMS to 0
>>>> + * allows unsupported architectures to skip tagging optimized-out
>>>> + * values.
>>>> + */
>>>> +#if defined(__x86_64__)
>>>> +#define NR_REGISTER_PARAMS      6
>>>> +#elif defined(__s390__)
>>>> +#define NR_REGISTER_PARAMS	5
>>>> +#elif defined(__aarch64__)
>>>> +#define NR_REGISTER_PARAMS      8
>>>> +#elif defined(__mips__)
>>>> +#define NR_REGISTER_PARAMS	8
>>>> +#elif defined(__powerpc__)
>>>> +#define NR_REGISTER_PARAMS	8
>>>> +#elif defined(__sparc__)
>>>> +#define NR_REGISTER_PARAMS	6
>>>> +#elif defined(__riscv) && __riscv_xlen == 64
>>>> +#define NR_REGISTER_PARAMS	8
>>>> +#elif defined(__arc__)
>>>> +#define NR_REGISTER_PARAMS	8
>>>> +#else
>>>> +#define NR_REGISTER_PARAMS      0
>>>> +#endif
>>>> +
>>>> +static struct parameter *parameter__new(Dwarf_Die *die, struct cu *cu,
>>>> +					struct conf_load *conf, int param_idx)
>>>>  {
>>>>  	struct parameter *parm = tag__alloc(cu, sizeof(*parm));
>>>>  
>>>>  	if (parm != NULL) {
>>>> +		struct location loc;
>>>> +
>>>>  		tag__init(&parm->tag, cu, die);
>>>>  		parm->name = attr_string(die, DW_AT_name, conf);
>>>> +
>>>> +		/* Parameters which use DW_AT_abstract_origin to point at
>>>> +		 * the original parameter definition (with no name in the DIE)
>>>> +		 * are the result of later DWARF generation during compilation
>>>> +		 * so often better take into account if arguments were
>>>> +		 * optimized out.
>>>> +		 *
>>>> +		 * By checking that locations for parameters that are expected
>>>> +		 * to be passed as registers are actually passed as registers,
>>>> +		 * we can spot optimized-out parameters.
>>>> +		 */
>>>> +		if (param_idx < NR_REGISTER_PARAMS && !parm->name &&
>>>> +		    attr_location(die, &loc.expr, &loc.exprlen) == 0 &&
>>>> +		    loc.exprlen != 0) {
>>>> +			Dwarf_Op *expr = loc.expr;
>>>> +
>>>> +			switch (expr->atom) {
>>>> +			case DW_OP_reg1 ... DW_OP_reg31:
>>>> +			case DW_OP_breg0 ... DW_OP_breg31:
>>>> +				break;
>>>> +			default:
>>>> +				parm->optimized = true;
>>>> +				break;
>>>> +			}
>>>> +		}
>>>
>>> Hi Alan,
>>>
>>> I looked through the DWARF standard and found two relevant entries:
>>>
>>>> 4.1.4
>>>>
>>>> If no location attribute is present in a variable entry representing
>>>> the definition of a variable (...), or if the location attribute is
>>>> present but has an empty location description (...), the variable is
>>>> assumed to exist in the source code but not in the executable program
>>>> (but see number 10, below).
>>>
>>> This paragraph implies that parameter name presence or absence is
>>> irrelevant, but I don't have any examples when parameter name is
>>> present for a removed parameter.
>>>
>>>> 4.1.10
>>>>
>>>> A DW_AT_const_value attribute for an entry describing a variable or formal
>>>> parameter whose value is constant and not represented by an object in the
>>>> address space of the program, or an entry describing a named constant. (Note
>>>> that such an entry does not have a location attribute.)
>>>
>>> For this paragraph I have an example:
>>>
>>>     $ cat test.c
>>>     __attribute__((noinline))
>>>     static int f(int x, int y) {
>>>         return x + y;
>>>     }
>>>     
>>>     int main(int argc, char *argv[]) {
>>>         return f(1, 2) + f(1, 3);
>>>     }
>>>     
>>>     $ gcc --version | head -n1
>>>     gcc (Ubuntu 11.3.0-1ubuntu1~22.04) 11.3.0
>>>     $ gcc -O2 -g -c test.c -o test.o
>>>     
>>> The objdump shows that constant propagation removed the first
>>> parameter of the function `f`:
>>>
>>>     $ llvm-objdump -d test.o 
>>>     
>>>     test.o:	file format elf64-x86-64
>>>     
>>>     Disassembly of section .text:
>>>     
>>>     0000000000000000 <f.constprop.0>:
>>>            0: 8d 47 01                     	leal	0x1(%rdi), %eax
>>>            3: c3                           	retq
>>>     
>>>     Disassembly of section .text.startup:
>>>     
>>>     0000000000000000 <main>:
>>>            0: f3 0f 1e fa                  	endbr64
>>>            4: bf 02 00 00 00               	movl	$0x2, %edi
>>>            9: e8 00 00 00 00               	callq	0xe <main+0xe>
>>>            e: bf 03 00 00 00               	movl	$0x3, %edi
>>>           13: 89 c2                        	movl	%eax, %edx
>>>           15: e8 00 00 00 00               	callq	0x1a <main+0x1a>
>>>           1a: 01 d0                        	addl	%edx, %eax
>>>           1c: c3                           	retq
>>>     
>>> However, the information about this parameter is still present in the DWARF:
>>>
>>>     $ llvm-dwarfdump test.o
>>>     ...
>>>     0x000000c1:   DW_TAG_subprogram
>>>                     DW_AT_name	("f")
>>>                     DW_AT_decl_file	("/home/eddy/work/tmp/test.c")
>>>                     DW_AT_decl_line	(2)
>>>                     DW_AT_decl_column	(0x0c)
>>>                     DW_AT_prototyped	(true)
>>>                     DW_AT_type	(0x000000a9 "int")
>>>                     DW_AT_inline	(DW_INL_inlined)
>>>                     DW_AT_sibling	(0x000000e1)
>>>     
>>>     0x000000d0:     DW_TAG_formal_parameter
>>>                       DW_AT_name	("x")
>>>                       DW_AT_decl_file	("/home/eddy/work/tmp/test.c")
>>>                       DW_AT_decl_line	(2)
>>>                       DW_AT_decl_column	(0x12)
>>>                       DW_AT_type	(0x000000a9 "int")
>>>     
>>>     0x000000d8:     DW_TAG_formal_parameter
>>>                       DW_AT_name	("y")
>>>                       DW_AT_decl_file	("/home/eddy/work/tmp/test.c")
>>>                       DW_AT_decl_line	(2)
>>>                       DW_AT_decl_column	(0x19)
>>>                       DW_AT_type	(0x000000a9 "int")
>>>     
>>>     0x000000e0:     NULL
>>>     
>>>     0x000000e1:   DW_TAG_subprogram
>>>                     DW_AT_abstract_origin	(0x000000c1 "f")
>>>                     DW_AT_low_pc	(0x0000000000000000)
>>>                     DW_AT_high_pc	(0x0000000000000004)
>>>                     DW_AT_frame_base	(DW_OP_call_frame_cfa)
>>>                     DW_AT_call_all_calls	(true)
>>>     
>>>     0x000000f8:     DW_TAG_formal_parameter
>>>                       DW_AT_abstract_origin	(0x000000d8 "y")
>>>                       DW_AT_location	(DW_OP_reg5 RDI)
>>>     
>>>     0x000000ff:     DW_TAG_formal_parameter
>>>                       DW_AT_abstract_origin	(0x000000d0 "x")
>>>                       DW_AT_const_value	(0x01)
>>>     
>>>     0x00000105:     NULL
>>>     
>>> When I ask pahole with this patch-set applied to generate BTF I see
>>> the following output:
>>>
>>>     $ pahole --verbose --btf_encode_detached=test.btf test.o
>>>     btf_encoder__new: 'test.o' doesn't have '.data..percpu' section
>>>     Found 0 per-CPU variables!
>>>     Found 2 functions!
>>>     File test.o:
>>>     [1] INT int size=4 nr_bits=32 encoding=SIGNED
>>>     [2] PTR (anon) type_id=3
>>>     [3] PTR (anon) type_id=4
>>>     [4] INT char size=1 nr_bits=8 encoding=SIGNED
>>>     [5] FUNC_PROTO (anon) return=1 args=(1 argc, 2 argv)
>>>     [6] FUNC main type_id=5
>>>     matched function 'f' with 'f.constprop.0'
>>>     added local function 'f'
>>>     matched function 'f' with 'f.constprop.0'
>>>     [7] FUNC_PROTO (anon) return=1 args=(1 x, 1 y)
>>>     [8] FUNC f type_id=7
>>>     
>>> Meaning that function `f` had not been skipped.
>>> A trivial modification overcomes this:
>>>
>>> 		if (param_idx < NR_REGISTER_PARAMS && !parm->name) {
>>> 			if (attr_location(die, &loc.expr, &loc.exprlen) == 0 &&
>>> 			    loc.exprlen != 0) {
>>> 				Dwarf_Op *expr = loc.expr;
>>>
>>> 				switch (expr->atom) {
>>> 				case DW_OP_reg1 ... DW_OP_reg31:
>>> 				case DW_OP_breg0 ... DW_OP_breg31:
>>> 					break;
>>> 				default:
>>> 					parm->optimized = true;
>>> 					break;
>>> 				}
>>> 			} else if (dwarf_attr(die, DW_AT_const_value, &attr) != NULL) {
>>> 					parm->optimized = true;
>>> 			}
>>>
>>> With it pahole seem to work as intended (if I understand the intention correctly):
>>>
>>>     $ pahole --verbose --btf_encode_detached=test.btf test.o
>>>     btf_encoder__new: 'test.o' doesn't have '.data..percpu' section
>>>     Found 0 per-CPU variables!
>>>     Found 2 functions!
>>>     File test.o:
>>>     [1] INT int size=4 nr_bits=32 encoding=SIGNED
>>>     [2] PTR (anon) type_id=3
>>>     [3] PTR (anon) type_id=4
>>>     [4] INT char size=1 nr_bits=8 encoding=SIGNED
>>>     [5] FUNC_PROTO (anon) return=1 args=(1 argc, 2 argv)
>>>     [6] FUNC main type_id=5
>>>     matched function 'f' with 'f.constprop.0', has optimized-out parameters
>>>     added local function 'f', optimized-out params
>>>     matched function 'f' with 'f.constprop.0', has optimized-out parameters
>>>     skipping addition of 'f' due to optimized-out parameters
>>>
>>> wdyt?
>>>
>>
>> This is great, thanks Eduard! I can add an additional patch
>> for the else clause code above, attributing that to you in v2 if
>> you like?
>>
>> Alan
>>
> 
> More on this topic. I tried the same example but with clang,
> DWARF generated by clang differs significantly.
> 
>     $ cat test.c
>     __attribute__((noinline))
>     static int f(int x, int y) {
>         return x + y;
>     }
>     
>     int main(int argc, char *argv[]) {
>         return f(1, 2) + f(1, 3);
>     }
>     
>     $ clang --version | head -n1
>     clang version 16.0.0 (https://github.com/llvm/llvm-project.git 50d4a1f70e111cd41b1a94d95fd06b5691aa2643)
>     
>     $ clang -O2 -g -c test.c -o test.o
> 
> llvm-objdump shows that the first parameter is still optimized out:
> 
>     $ llvm-objdump -d test.o 
>     
>     test.o:	file format elf64-x86-64
>     
>     Disassembly of section .text:
>     
>     0000000000000000 <main>:
>            0: 53                           	pushq	%rbx
>            1: bf 02 00 00 00               	movl	$0x2, %edi
>            6: e8 15 00 00 00               	callq	0x20 <f>
>            b: 89 c3                        	movl	%eax, %ebx
>            d: bf 03 00 00 00               	movl	$0x3, %edi
>           12: e8 09 00 00 00               	callq	0x20 <f>
>           17: 01 d8                        	addl	%ebx, %eax
>           19: 5b                           	popq	%rbx
>           1a: c3                           	retq
>           1b: 0f 1f 44 00 00               	nopl	(%rax,%rax)
>     
>     0000000000000020 <f>:
>           20: 8d 47 01                     	leal	0x1(%rdi), %eax
>           23: c3                           	retq
> 
> And here is the DWARF, note that formal parameter has both
> `DW_AT_name` and `DW_AT_const_value` attributes:
> 
>     $ llvm-dwarfdump test.o
>     ...
>     0x00000061:   DW_TAG_subprogram
>                     DW_AT_low_pc	(0x0000000000000020)
>                     DW_AT_high_pc	(0x0000000000000024)
>                     DW_AT_frame_base	(DW_OP_reg7 RSP)
>                     DW_AT_call_all_calls	(true)
>                     DW_AT_name	("f")
>                     DW_AT_decl_file	("/home/eddy/work/tmp/test.c")
>                     DW_AT_decl_line	(2)
>                     DW_AT_prototyped	(true)
>                     DW_AT_calling_convention	(DW_CC_nocall)
>                     DW_AT_type	(0x00000085 "int")
>     
>     0x00000071:     DW_TAG_formal_parameter
>                       DW_AT_const_value	(1)
>                       DW_AT_name	("x")
>                       DW_AT_decl_file	("/home/eddy/work/tmp/test.c")
>                       DW_AT_decl_line	(2)
>                       DW_AT_type	(0x00000085 "int")
>     
>     0x0000007a:     DW_TAG_formal_parameter
>                       DW_AT_location	(DW_OP_reg5 RDI)
>                       DW_AT_name	("y")
>                       DW_AT_decl_file	("/home/eddy/work/tmp/test.c")
>                       DW_AT_decl_line	(2)
>                       DW_AT_type	(0x00000085 "int")
>     
>     0x00000084:     NULL
>     ...
> 
> Given this DWARF layout pahole does not recognize `x` as optimized out:
> 
>     $ pahole --verbose --btf_encode_detached=test.btf test.o
>     btf_encoder__new: 'test.o' doesn't have '.data..percpu' section
>     Found 0 per-CPU variables!
>     Found 2 functions!
>     File test.o:
>     [1] INT int size=4 nr_bits=32 encoding=SIGNED
>     [2] PTR (anon) type_id=3
>     [3] PTR (anon) type_id=4
>     [4] INT char size=1 nr_bits=8 encoding=SIGNED
>     [5] FUNC_PROTO (anon) return=1 args=(1 argc, 2 argv)
>     [6] FUNC main type_id=5
>     [7] FUNC_PROTO (anon) return=1 args=(1 x, 1 y)
>     [8] FUNC f type_id=7
> 
> The way I read paragraph 4.1.4 mentioned before the tag `DW_AT_name`
> should not be used to identify whether parameter is optimized out.
> Unfortunately trivial modification of the condition in the
> `parameter__new()` to remove the `!parm->name` check is not
> sufficient. For some reason parameters `x` and `y` are not visited in
> `ftype__recode_dwarf_types()` and thus `optimized_parms` field is not set.
> 

Thanks for this - I tried it, and we spot the optimization once we update
die__create_new_parameter() as follows:

diff --git a/dwarf_loader.c b/dwarf_loader.c
index f96b6ff..605ad45 100644
--- a/dwarf_loader.c
+++ b/dwarf_loader.c
@@ -1529,6 +1530,8 @@ static struct tag *die__create_new_parameter(Dwarf_Die *di
 
        if (ftype != NULL) {
                ftype__add_parameter(ftype, parm);
+               if (parm->optimized)
+                       ftype->optimized_parms = 1;
                if (param_idx >= 0) {
                        if (add_child_llvm_annotations(die, param_idx, conf, &(t
                                return NULL;


With that change, I see:

$ pahole --verbose --btf_encode_detached=test.btf test.o
btf_encoder__new: 'test.o' doesn't have '.data..percpu' section
Found 0 per-CPU variables!
Found 2 functions!
File test.o:
[1] INT int size=4 nr_bits=32 encoding=SIGNED
[2] PTR (anon) type_id=3
[3] PTR (anon) type_id=4
[4] INT char size=1 nr_bits=8 encoding=SIGNED
[5] FUNC_PROTO (anon) return=1 args=(1 argc, 2 argv)
[6] FUNC main type_id=5
added local function 'f', optimized-out params
skipping addition of 'f' due to optimized-out parameters

Thanks!

Alan

> Thanks,
> Eduard
> 
> 
> 
>>> Thanks,
>>> Eduard
>>>
>>>>  
>>>>  	return parm;
>>>> @@ -1450,7 +1504,7 @@ static struct tag *die__create_new_parameter(Dwarf_Die *die,
>>>>  					     struct cu *cu, struct conf_load *conf,
>>>>  					     int param_idx)
>>>>  {
>>>> -	struct parameter *parm = parameter__new(die, cu, conf);
>>>> +	struct parameter *parm = parameter__new(die, cu, conf, param_idx);
>>>>  
>>>>  	if (parm == NULL)
>>>>  		return NULL;
>>>> @@ -2209,6 +2263,10 @@ static void ftype__recode_dwarf_types(struct tag *tag, struct cu *cu)
>>>>  			}
>>>>  			pos->name = tag__parameter(dtype->tag)->name;
>>>>  			pos->tag.type = dtype->tag->type;
>>>> +			if (pos->optimized) {
>>>> +				tag__parameter(dtype->tag)->optimized = pos->optimized;
>>>> +				type->optimized_parms = 1;
>>>> +			}
>>>>  			continue;
>>>>  		}
>>>>  
>>>> @@ -2219,6 +2277,20 @@ static void ftype__recode_dwarf_types(struct tag *tag, struct cu *cu)
>>>>  		}
>>>>  		pos->tag.type = dtype->small_id;
>>>>  	}
>>>> +	/* if parameters were optimized out, set flag for the ftype this
>>>> +	 * function tag referred to via abstract origin.
>>>> +	 */
>>>> +	if (type->optimized_parms) {
>>>> +		struct dwarf_tag *dtype = type->tag.priv;
>>>> +		struct dwarf_tag *dftype;
>>>> +
>>>> +		dftype = dwarf_cu__find_tag_by_ref(dcu, &dtype->abstract_origin);
>>>> +		if (dftype && dftype->tag) {
>>>> +			struct ftype *ftype = tag__ftype(dftype->tag);
>>>> +
>>>> +			ftype->optimized_parms = 1;
>>>> +		}
>>>> +	}
>>>>  }
>>>>  
>>>>  static void lexblock__recode_dwarf_types(struct lexblock *tag, struct cu *cu)
>>>> diff --git a/dwarves.h b/dwarves.h
>>>> index 589588e..1ad1b3b 100644
>>>> --- a/dwarves.h
>>>> +++ b/dwarves.h
>>>> @@ -808,6 +808,7 @@ size_t lexblock__fprintf(const struct lexblock *lexblock, const struct cu *cu,
>>>>  struct parameter {
>>>>  	struct tag tag;
>>>>  	const char *name;
>>>> +	bool optimized;
>>>>  };
>>>>  
>>>>  static inline struct parameter *tag__parameter(const struct tag *tag)
>>>> @@ -827,7 +828,8 @@ struct ftype {
>>>>  	struct tag	 tag;
>>>>  	struct list_head parms;
>>>>  	uint16_t	 nr_parms;
>>>> -	uint8_t		 unspec_parms; /* just one bit is needed */
>>>> +	uint8_t		 unspec_parms:1; /* just one bit is needed */
>>>> +	uint8_t		 optimized_parms:1;
>>>>  };
>>>>  
>>>>  static inline struct ftype *tag__ftype(const struct tag *tag)
>>>
>
Eduard Zingerman Jan. 25, 2023, 11:42 p.m. UTC | #6
On Wed, 2023-01-25 at 22:52 +0000, Alan Maguire wrote:
[...]
> 
> Thanks for this - I tried it, and we spot the optimization once we update
> die__create_new_parameter() as follows:
> 
> diff --git a/dwarf_loader.c b/dwarf_loader.c
> index f96b6ff..605ad45 100644
> --- a/dwarf_loader.c
> +++ b/dwarf_loader.c
> @@ -1529,6 +1530,8 @@ static struct tag *die__create_new_parameter(Dwarf_Die *di
>  
>         if (ftype != NULL) {
>                 ftype__add_parameter(ftype, parm);
> +               if (parm->optimized)
> +                       ftype->optimized_parms = 1;
>                 if (param_idx >= 0) {
>                         if (add_child_llvm_annotations(die, param_idx, conf, &(t
>                                 return NULL;
> 

Great, looks good.

> With that change, I see:
> 
> $ pahole --verbose --btf_encode_detached=test.btf test.o
> btf_encoder__new: 'test.o' doesn't have '.data..percpu' section
> Found 0 per-CPU variables!
> Found 2 functions!
> File test.o:
> [1] INT int size=4 nr_bits=32 encoding=SIGNED
> [2] PTR (anon) type_id=3
> [3] PTR (anon) type_id=4
> [4] INT char size=1 nr_bits=8 encoding=SIGNED
> [5] FUNC_PROTO (anon) return=1 args=(1 argc, 2 argv)
> [6] FUNC main type_id=5
> added local function 'f', optimized-out params
> skipping addition of 'f' due to optimized-out parameters

Sorry, I have one more silly program.

I talked to Yonghong today and we discussed if compiler can change a
type of a function parameter as a result of some optimization.
Consider the following example:

    $ cat test.c
    struct st {
      int a;
      int b;
    };
    
    __attribute__((noinline))
    static int f(struct st *s) {
      return s->a + s->b;
    }
    
    int main(int argc, char *argv[]) {
      struct st s = {
        .a = (long)argv[0],
        .b = (long)argv[1]
      };
      return f(&s);
    }

When compiled by `clang` with -O3 the prototype of `f` is changed from
`int f(struct *st)` to `int f(int, int)`:

    $ clang -O3 -g -c test.c -o test.o && llvm-objdump -d test.o
    ...
    0000000000000000 <main>:
           0: 8b 3e                        	movl	(%rsi), %edi
           2: 8b 76 08                     	movl	0x8(%rsi), %esi
           5: eb 09                        	jmp	0x10 <f>
           7: 66 0f 1f 84 00 00 00 00 00   	nopw	(%rax,%rax)
    
    0000000000000010 <f>:
          10: 8d 04 37                     	leal	(%rdi,%rsi), %eax
          13: c3                           	retq
    
But generated DWARF hides this information:

    $ llvm-dwarfdump test.o
    ...
    0x0000005c:   DW_TAG_subprogram
                    DW_AT_low_pc	(0x0000000000000010)
                    DW_AT_high_pc	(0x0000000000000014)
                    DW_AT_frame_base	(DW_OP_reg7 RSP)
                    DW_AT_call_all_calls	(true)
                    DW_AT_name	("f")
                    DW_AT_decl_file	("/home/eddy/work/tmp/test.c")
                    DW_AT_decl_line	(7)
                    DW_AT_prototyped	(true)
                    DW_AT_type	(0x00000074 "int")
    
    0x0000006b:     DW_TAG_formal_parameter
                      DW_AT_name	("s")
                      DW_AT_decl_file	("/home/eddy/work/tmp/test.c")
                      DW_AT_decl_line	(7)
                      DW_AT_type	(0x0000009e "st *")
    
    0x00000073:     NULL
    ...

Is this important?
(gcc does not do this for the particular example, but I don't know if
 it could be tricked to under some conditions).

Thanks,
Eduard

[...]
Eduard Zingerman Jan. 26, 2023, 12:20 a.m. UTC | #7
On Thu, 2023-01-26 at 01:42 +0200, Eduard Zingerman wrote:
> On Wed, 2023-01-25 at 22:52 +0000, Alan Maguire wrote:
> [...]
> > 
> > Thanks for this - I tried it, and we spot the optimization once we update
> > die__create_new_parameter() as follows:
> > 
> > diff --git a/dwarf_loader.c b/dwarf_loader.c
> > index f96b6ff..605ad45 100644
> > --- a/dwarf_loader.c
> > +++ b/dwarf_loader.c
> > @@ -1529,6 +1530,8 @@ static struct tag *die__create_new_parameter(Dwarf_Die *di
> >  
> >         if (ftype != NULL) {
> >                 ftype__add_parameter(ftype, parm);
> > +               if (parm->optimized)
> > +                       ftype->optimized_parms = 1;
> >                 if (param_idx >= 0) {
> >                         if (add_child_llvm_annotations(die, param_idx, conf, &(t
> >                                 return NULL;
> > 
> 
> Great, looks good.
> 
> > With that change, I see:
> > 
> > $ pahole --verbose --btf_encode_detached=test.btf test.o
> > btf_encoder__new: 'test.o' doesn't have '.data..percpu' section
> > Found 0 per-CPU variables!
> > Found 2 functions!
> > File test.o:
> > [1] INT int size=4 nr_bits=32 encoding=SIGNED
> > [2] PTR (anon) type_id=3
> > [3] PTR (anon) type_id=4
> > [4] INT char size=1 nr_bits=8 encoding=SIGNED
> > [5] FUNC_PROTO (anon) return=1 args=(1 argc, 2 argv)
> > [6] FUNC main type_id=5
> > added local function 'f', optimized-out params
> > skipping addition of 'f' due to optimized-out parameters
> 
> Sorry, I have one more silly program.
> 
> I talked to Yonghong today and we discussed if compiler can change a
> type of a function parameter as a result of some optimization.
> Consider the following example:
> 
>     $ cat test.c
>     struct st {
>       int a;
>       int b;
>     };
>     
>     __attribute__((noinline))
>     static int f(struct st *s) {
>       return s->a + s->b;
>     }
>     
>     int main(int argc, char *argv[]) {
>       struct st s = {
>         .a = (long)argv[0],
>         .b = (long)argv[1]
>       };
>       return f(&s);
>     }
> 
> When compiled by `clang` with -O3 the prototype of `f` is changed from
> `int f(struct *st)` to `int f(int, int)`:
> 
>     $ clang -O3 -g -c test.c -o test.o && llvm-objdump -d test.o
>     ...
>     0000000000000000 <main>:
>            0: 8b 3e                        	movl	(%rsi), %edi
>            2: 8b 76 08                     	movl	0x8(%rsi), %esi
>            5: eb 09                        	jmp	0x10 <f>
>            7: 66 0f 1f 84 00 00 00 00 00   	nopw	(%rax,%rax)
>     
>     0000000000000010 <f>:
>           10: 8d 04 37                     	leal	(%rdi,%rsi), %eax
>           13: c3                           	retq
>     
> But generated DWARF hides this information:

Actually, I'm not correct. The information is present because
`DW_AT_location` attribute is not present (just as 4.1.4 says).
So I think that the condition for optimized parameters detection has
to be adjusted one more time:

			has_location = attr_location(die, &loc.expr, &loc.exprlen) == 0;
			has_const_value = dwarf_attr(die, DW_AT_const_value, &attr) != NULL;

			if (has_location && loc.exprlen != 0) {
				Dwarf_Op *expr = loc.expr;

				switch (expr->atom) {
				case DW_OP_reg1 ... DW_OP_reg31:
				case DW_OP_breg0 ... DW_OP_breg31:
					break;
				default:
					parm->optimized = true;
					break;
				}
			} else if (!has_location || has_const_value) {
				parm->optimized = true;
			}

(But again, the parameter is marked as optimized but the function is
 not skipped in the final BTF, so either I applied our last change
 incorrectly or something additional should be done).
 
wdyt?

>     $ llvm-dwarfdump test.o
>     ...
>     0x0000005c:   DW_TAG_subprogram
>                     DW_AT_low_pc	(0x0000000000000010)
>                     DW_AT_high_pc	(0x0000000000000014)
>                     DW_AT_frame_base	(DW_OP_reg7 RSP)
>                     DW_AT_call_all_calls	(true)
>                     DW_AT_name	("f")
>                     DW_AT_decl_file	("/home/eddy/work/tmp/test.c")
>                     DW_AT_decl_line	(7)
>                     DW_AT_prototyped	(true)
>                     DW_AT_type	(0x00000074 "int")
>     
>     0x0000006b:     DW_TAG_formal_parameter
>                       DW_AT_name	("s")
>                       DW_AT_decl_file	("/home/eddy/work/tmp/test.c")
>                       DW_AT_decl_line	(7)
>                       DW_AT_type	(0x0000009e "st *")
>     
>     0x00000073:     NULL
>     ...
> 
> Is this important?
> (gcc does not do this for the particular example, but I don't know if
>  it could be tricked to under some conditions).
> 
> Thanks,
> Eduard
> 
> [...]
Alan Maguire Jan. 26, 2023, 2:02 p.m. UTC | #8
On 26/01/2023 00:20, Eduard Zingerman wrote:
> On Thu, 2023-01-26 at 01:42 +0200, Eduard Zingerman wrote:
>> On Wed, 2023-01-25 at 22:52 +0000, Alan Maguire wrote:
>> [...]
>>>
>>> Thanks for this - I tried it, and we spot the optimization once we update
>>> die__create_new_parameter() as follows:
>>>
>>> diff --git a/dwarf_loader.c b/dwarf_loader.c
>>> index f96b6ff..605ad45 100644
>>> --- a/dwarf_loader.c
>>> +++ b/dwarf_loader.c
>>> @@ -1529,6 +1530,8 @@ static struct tag *die__create_new_parameter(Dwarf_Die *di
>>>  
>>>         if (ftype != NULL) {
>>>                 ftype__add_parameter(ftype, parm);
>>> +               if (parm->optimized)
>>> +                       ftype->optimized_parms = 1;
>>>                 if (param_idx >= 0) {
>>>                         if (add_child_llvm_annotations(die, param_idx, conf, &(t
>>>                                 return NULL;
>>>
>>
>> Great, looks good.
>>
>>> With that change, I see:
>>>
>>> $ pahole --verbose --btf_encode_detached=test.btf test.o
>>> btf_encoder__new: 'test.o' doesn't have '.data..percpu' section
>>> Found 0 per-CPU variables!
>>> Found 2 functions!
>>> File test.o:
>>> [1] INT int size=4 nr_bits=32 encoding=SIGNED
>>> [2] PTR (anon) type_id=3
>>> [3] PTR (anon) type_id=4
>>> [4] INT char size=1 nr_bits=8 encoding=SIGNED
>>> [5] FUNC_PROTO (anon) return=1 args=(1 argc, 2 argv)
>>> [6] FUNC main type_id=5
>>> added local function 'f', optimized-out params
>>> skipping addition of 'f' due to optimized-out parameters
>>
>> Sorry, I have one more silly program.
>>
>> I talked to Yonghong today and we discussed if compiler can change a
>> type of a function parameter as a result of some optimization.
>> Consider the following example:
>>
>>     $ cat test.c
>>     struct st {
>>       int a;
>>       int b;
>>     };
>>     
>>     __attribute__((noinline))
>>     static int f(struct st *s) {
>>       return s->a + s->b;
>>     }
>>     
>>     int main(int argc, char *argv[]) {
>>       struct st s = {
>>         .a = (long)argv[0],
>>         .b = (long)argv[1]
>>       };
>>       return f(&s);
>>     }
>>
>> When compiled by `clang` with -O3 the prototype of `f` is changed from
>> `int f(struct *st)` to `int f(int, int)`:
>>
>>     $ clang -O3 -g -c test.c -o test.o && llvm-objdump -d test.o
>>     ...
>>     0000000000000000 <main>:
>>            0: 8b 3e                        	movl	(%rsi), %edi
>>            2: 8b 76 08                     	movl	0x8(%rsi), %esi
>>            5: eb 09                        	jmp	0x10 <f>
>>            7: 66 0f 1f 84 00 00 00 00 00   	nopw	(%rax,%rax)
>>     
>>     0000000000000010 <f>:
>>           10: 8d 04 37                     	leal	(%rdi,%rsi), %eax
>>           13: c3                           	retq
>>     
>> But generated DWARF hides this information:
> 
> Actually, I'm not correct. The information is present because
> `DW_AT_location` attribute is not present (just as 4.1.4 says).
> So I think that the condition for optimized parameters detection has
> to be adjusted one more time:
> 
> 			has_location = attr_location(die, &loc.expr, &loc.exprlen) == 0;
> 			has_const_value = dwarf_attr(die, DW_AT_const_value, &attr) != NULL;
> 
> 			if (has_location && loc.exprlen != 0) {
> 				Dwarf_Op *expr = loc.expr;
> 
> 				switch (expr->atom) {
> 				case DW_OP_reg1 ... DW_OP_reg31:
> 				case DW_OP_breg0 ... DW_OP_breg31:
> 					break;
> 				default:
> 					parm->optimized = true;
> 					break;
> 				}
> 			} else if (!has_location || has_const_value) {
> 				parm->optimized = true;
> 			}
> 
> (But again, the parameter is marked as optimized but the function is
>  not skipped in the final BTF, so either I applied our last change
>  incorrectly or something additional should be done).
>  
> wdyt?

I've been digging into this a bit, and the issue here is that for 
gcc-generated DWARF at least, location info is often in the abstract 
origin parameter references, so we have to combine observations across
abstract origin reference and original parameter to determine for sure
if the parameter really is missing location information. In the
case of this program there are no abstract origin references, so
it's a bit more straightforward, but we have to handle both cases
I think.

I'll try and polish up a v2 series that incorporates this shortly;
in testing it, it works on this case as desired I think:

LLVM_OBJCOPY=objcopy pahole --verbose -J ~/src/isra2/test2.o
btf_encoder__new: '/home/alan/src/isra2/test2.o' doesn't have '.data..percpu' section
Found 0 per-CPU variables!
Found 13 functions!
File /home/alan/src/isra2/test2.o:
[1] INT long size=8 nr_bits=64 encoding=SIGNED
[2] INT int size=4 nr_bits=32 encoding=SIGNED
[3] PTR (anon) type_id=4
[4] PTR (anon) type_id=5
[5] INT char size=1 nr_bits=8 encoding=SIGNED
[6] STRUCT st size=8
	a type_id=2 bits_offset=0
	b type_id=2 bits_offset=32
[7] PTR (anon) type_id=6
[8] FUNC_PROTO (anon) return=2 args=(2 argc, 3 argv)
[9] FUNC main type_id=8
added local function 'f', optimized-out params
skipping addition of 'f' due to optimized-out parameters

Thanks!

Alan

> 
>>     $ llvm-dwarfdump test.o
>>     ...
>>     0x0000005c:   DW_TAG_subprogram
>>                     DW_AT_low_pc	(0x0000000000000010)
>>                     DW_AT_high_pc	(0x0000000000000014)
>>                     DW_AT_frame_base	(DW_OP_reg7 RSP)
>>                     DW_AT_call_all_calls	(true)
>>                     DW_AT_name	("f")
>>                     DW_AT_decl_file	("/home/eddy/work/tmp/test.c")
>>                     DW_AT_decl_line	(7)
>>                     DW_AT_prototyped	(true)
>>                     DW_AT_type	(0x00000074 "int")
>>     
>>     0x0000006b:     DW_TAG_formal_parameter
>>                       DW_AT_name	("s")
>>                       DW_AT_decl_file	("/home/eddy/work/tmp/test.c")
>>                       DW_AT_decl_line	(7)
>>                       DW_AT_type	(0x0000009e "st *")
>>     
>>     0x00000073:     NULL
>>     ...
>>
>> Is this important?
>> (gcc does not do this for the particular example, but I don't know if
>>  it could be tricked to under some conditions).
>>
>> Thanks,
>> Eduard
>>
>> [...]
>
Eduard Zingerman Jan. 26, 2023, 3:02 p.m. UTC | #9
On Thu, 2023-01-26 at 14:02 +0000, Alan Maguire wrote:
> On 26/01/2023 00:20, Eduard Zingerman wrote:
> > On Thu, 2023-01-26 at 01:42 +0200, Eduard Zingerman wrote:
> > > On Wed, 2023-01-25 at 22:52 +0000, Alan Maguire wrote:
> > > [...]
> > > > 
> > > > Thanks for this - I tried it, and we spot the optimization once we update
> > > > die__create_new_parameter() as follows:
> > > > 
> > > > diff --git a/dwarf_loader.c b/dwarf_loader.c
> > > > index f96b6ff..605ad45 100644
> > > > --- a/dwarf_loader.c
> > > > +++ b/dwarf_loader.c
> > > > @@ -1529,6 +1530,8 @@ static struct tag *die__create_new_parameter(Dwarf_Die *di
> > > >  
> > > >         if (ftype != NULL) {
> > > >                 ftype__add_parameter(ftype, parm);
> > > > +               if (parm->optimized)
> > > > +                       ftype->optimized_parms = 1;
> > > >                 if (param_idx >= 0) {
> > > >                         if (add_child_llvm_annotations(die, param_idx, conf, &(t
> > > >                                 return NULL;
> > > > 
> > > 
> > > Great, looks good.
> > > 
> > > > With that change, I see:
> > > > 
> > > > $ pahole --verbose --btf_encode_detached=test.btf test.o
> > > > btf_encoder__new: 'test.o' doesn't have '.data..percpu' section
> > > > Found 0 per-CPU variables!
> > > > Found 2 functions!
> > > > File test.o:
> > > > [1] INT int size=4 nr_bits=32 encoding=SIGNED
> > > > [2] PTR (anon) type_id=3
> > > > [3] PTR (anon) type_id=4
> > > > [4] INT char size=1 nr_bits=8 encoding=SIGNED
> > > > [5] FUNC_PROTO (anon) return=1 args=(1 argc, 2 argv)
> > > > [6] FUNC main type_id=5
> > > > added local function 'f', optimized-out params
> > > > skipping addition of 'f' due to optimized-out parameters
> > > 
> > > Sorry, I have one more silly program.
> > > 
> > > I talked to Yonghong today and we discussed if compiler can change a
> > > type of a function parameter as a result of some optimization.
> > > Consider the following example:
> > > 
> > >     $ cat test.c
> > >     struct st {
> > >       int a;
> > >       int b;
> > >     };
> > >     
> > >     __attribute__((noinline))
> > >     static int f(struct st *s) {
> > >       return s->a + s->b;
> > >     }
> > >     
> > >     int main(int argc, char *argv[]) {
> > >       struct st s = {
> > >         .a = (long)argv[0],
> > >         .b = (long)argv[1]
> > >       };
> > >       return f(&s);
> > >     }
> > > 
> > > When compiled by `clang` with -O3 the prototype of `f` is changed from
> > > `int f(struct *st)` to `int f(int, int)`:
> > > 
> > >     $ clang -O3 -g -c test.c -o test.o && llvm-objdump -d test.o
> > >     ...
> > >     0000000000000000 <main>:
> > >            0: 8b 3e                        	movl	(%rsi), %edi
> > >            2: 8b 76 08                     	movl	0x8(%rsi), %esi
> > >            5: eb 09                        	jmp	0x10 <f>
> > >            7: 66 0f 1f 84 00 00 00 00 00   	nopw	(%rax,%rax)
> > >     
> > >     0000000000000010 <f>:
> > >           10: 8d 04 37                     	leal	(%rdi,%rsi), %eax
> > >           13: c3                           	retq
> > >     
> > > But generated DWARF hides this information:
> > 
> > Actually, I'm not correct. The information is present because
> > `DW_AT_location` attribute is not present (just as 4.1.4 says).
> > So I think that the condition for optimized parameters detection has
> > to be adjusted one more time:
> > 
> > 			has_location = attr_location(die, &loc.expr, &loc.exprlen) == 0;
> > 			has_const_value = dwarf_attr(die, DW_AT_const_value, &attr) != NULL;
> > 
> > 			if (has_location && loc.exprlen != 0) {
> > 				Dwarf_Op *expr = loc.expr;
> > 
> > 				switch (expr->atom) {
> > 				case DW_OP_reg1 ... DW_OP_reg31:
> > 				case DW_OP_breg0 ... DW_OP_breg31:
> > 					break;
> > 				default:
> > 					parm->optimized = true;
> > 					break;
> > 				}
> > 			} else if (!has_location || has_const_value) {
> > 				parm->optimized = true;
> > 			}
> > 
> > (But again, the parameter is marked as optimized but the function is
> >  not skipped in the final BTF, so either I applied our last change
> >  incorrectly or something additional should be done).
> >  
> > wdyt?
> 
> I've been digging into this a bit, and the issue here is that for 
> gcc-generated DWARF at least, location info is often in the abstract 
> origin parameter references, so we have to combine observations across
> abstract origin reference and original parameter to determine for sure
> if the parameter really is missing location information. In the
> case of this program there are no abstract origin references, so
> it's a bit more straightforward, but we have to handle both cases
> I think.

Is it safe it ignore DW_TAG_subprogram's with DW_AT_abstract_origin's
and thus avoid the combine logic?
The way I read standard it looks like DW_AT_abstract_origin is only
present for instances that undergo some optimization.

> 
> I'll try and polish up a v2 series that incorporates this shortly;
> in testing it, it works on this case as desired I think:
> 
> LLVM_OBJCOPY=objcopy pahole --verbose -J ~/src/isra2/test2.o
> btf_encoder__new: '/home/alan/src/isra2/test2.o' doesn't have '.data..percpu' section
> Found 0 per-CPU variables!
> Found 13 functions!
> File /home/alan/src/isra2/test2.o:
> [1] INT long size=8 nr_bits=64 encoding=SIGNED
> [2] INT int size=4 nr_bits=32 encoding=SIGNED
> [3] PTR (anon) type_id=4
> [4] PTR (anon) type_id=5
> [5] INT char size=1 nr_bits=8 encoding=SIGNED
> [6] STRUCT st size=8
> 	a type_id=2 bits_offset=0
> 	b type_id=2 bits_offset=32
> [7] PTR (anon) type_id=6
> [8] FUNC_PROTO (anon) return=2 args=(2 argc, 3 argv)
> [9] FUNC main type_id=8
> added local function 'f', optimized-out params
> skipping addition of 'f' due to optimized-out parameters
> 
> Thanks!
> 
> Alan
> 
> > 
> > >     $ llvm-dwarfdump test.o
> > >     ...
> > >     0x0000005c:   DW_TAG_subprogram
> > >                     DW_AT_low_pc	(0x0000000000000010)
> > >                     DW_AT_high_pc	(0x0000000000000014)
> > >                     DW_AT_frame_base	(DW_OP_reg7 RSP)
> > >                     DW_AT_call_all_calls	(true)
> > >                     DW_AT_name	("f")
> > >                     DW_AT_decl_file	("/home/eddy/work/tmp/test.c")
> > >                     DW_AT_decl_line	(7)
> > >                     DW_AT_prototyped	(true)
> > >                     DW_AT_type	(0x00000074 "int")
> > >     
> > >     0x0000006b:     DW_TAG_formal_parameter
> > >                       DW_AT_name	("s")
> > >                       DW_AT_decl_file	("/home/eddy/work/tmp/test.c")
> > >                       DW_AT_decl_line	(7)
> > >                       DW_AT_type	(0x0000009e "st *")
> > >     
> > >     0x00000073:     NULL
> > >     ...
> > > 
> > > Is this important?
> > > (gcc does not do this for the particular example, but I don't know if
> > >  it could be tricked to under some conditions).
> > > 
> > > Thanks,
> > > Eduard
> > > 
> > > [...]
> >
diff mbox series

Patch

diff --git a/dwarf_loader.c b/dwarf_loader.c
index 5a74035..0220f1d 100644
--- a/dwarf_loader.c
+++ b/dwarf_loader.c
@@ -992,13 +992,67 @@  static struct class_member *class_member__new(Dwarf_Die *die, struct cu *cu,
 	return member;
 }
 
-static struct parameter *parameter__new(Dwarf_Die *die, struct cu *cu, struct conf_load *conf)
+/* How many function parameters are passed via registers?  Used below in
+ * determining if an argument has been optimized out or if it is simply
+ * an argument > NR_REGISTER_PARAMS.  Setting NR_REGISTER_PARAMS to 0
+ * allows unsupported architectures to skip tagging optimized-out
+ * values.
+ */
+#if defined(__x86_64__)
+#define NR_REGISTER_PARAMS      6
+#elif defined(__s390__)
+#define NR_REGISTER_PARAMS	5
+#elif defined(__aarch64__)
+#define NR_REGISTER_PARAMS      8
+#elif defined(__mips__)
+#define NR_REGISTER_PARAMS	8
+#elif defined(__powerpc__)
+#define NR_REGISTER_PARAMS	8
+#elif defined(__sparc__)
+#define NR_REGISTER_PARAMS	6
+#elif defined(__riscv) && __riscv_xlen == 64
+#define NR_REGISTER_PARAMS	8
+#elif defined(__arc__)
+#define NR_REGISTER_PARAMS	8
+#else
+#define NR_REGISTER_PARAMS      0
+#endif
+
+static struct parameter *parameter__new(Dwarf_Die *die, struct cu *cu,
+					struct conf_load *conf, int param_idx)
 {
 	struct parameter *parm = tag__alloc(cu, sizeof(*parm));
 
 	if (parm != NULL) {
+		struct location loc;
+
 		tag__init(&parm->tag, cu, die);
 		parm->name = attr_string(die, DW_AT_name, conf);
+
+		/* Parameters which use DW_AT_abstract_origin to point at
+		 * the original parameter definition (with no name in the DIE)
+		 * are the result of later DWARF generation during compilation
+		 * so often better take into account if arguments were
+		 * optimized out.
+		 *
+		 * By checking that locations for parameters that are expected
+		 * to be passed as registers are actually passed as registers,
+		 * we can spot optimized-out parameters.
+		 */
+		if (param_idx < NR_REGISTER_PARAMS && !parm->name &&
+		    attr_location(die, &loc.expr, &loc.exprlen) == 0 &&
+		    loc.exprlen != 0) {
+			Dwarf_Op *expr = loc.expr;
+
+			switch (expr->atom) {
+			case DW_OP_reg1 ... DW_OP_reg31:
+			case DW_OP_breg0 ... DW_OP_breg31:
+				break;
+			default:
+				parm->optimized = true;
+				break;
+			}
+		}
 	}
 
 	return parm;
@@ -1450,7 +1504,7 @@  static struct tag *die__create_new_parameter(Dwarf_Die *die,
 					     struct cu *cu, struct conf_load *conf,
 					     int param_idx)
 {
-	struct parameter *parm = parameter__new(die, cu, conf);
+	struct parameter *parm = parameter__new(die, cu, conf, param_idx);
 
 	if (parm == NULL)
 		return NULL;
@@ -2209,6 +2263,10 @@  static void ftype__recode_dwarf_types(struct tag *tag, struct cu *cu)
 			}
 			pos->name = tag__parameter(dtype->tag)->name;
 			pos->tag.type = dtype->tag->type;
+			if (pos->optimized) {
+				tag__parameter(dtype->tag)->optimized = pos->optimized;
+				type->optimized_parms = 1;
+			}
 			continue;
 		}
 
@@ -2219,6 +2277,20 @@  static void ftype__recode_dwarf_types(struct tag *tag, struct cu *cu)
 		}
 		pos->tag.type = dtype->small_id;
 	}
+	/* if parameters were optimized out, set flag for the ftype this
+	 * function tag referred to via abstract origin.
+	 */
+	if (type->optimized_parms) {
+		struct dwarf_tag *dtype = type->tag.priv;
+		struct dwarf_tag *dftype;
+
+		dftype = dwarf_cu__find_tag_by_ref(dcu, &dtype->abstract_origin);
+		if (dftype && dftype->tag) {
+			struct ftype *ftype = tag__ftype(dftype->tag);
+
+			ftype->optimized_parms = 1;
+		}
+	}
 }
 
 static void lexblock__recode_dwarf_types(struct lexblock *tag, struct cu *cu)
diff --git a/dwarves.h b/dwarves.h
index 589588e..1ad1b3b 100644
--- a/dwarves.h
+++ b/dwarves.h
@@ -808,6 +808,7 @@  size_t lexblock__fprintf(const struct lexblock *lexblock, const struct cu *cu,
 struct parameter {
 	struct tag tag;
 	const char *name;
+	bool optimized;
 };
 
 static inline struct parameter *tag__parameter(const struct tag *tag)
@@ -827,7 +828,8 @@  struct ftype {
 	struct tag	 tag;
 	struct list_head parms;
 	uint16_t	 nr_parms;
-	uint8_t		 unspec_parms; /* just one bit is needed */
+	uint8_t		 unspec_parms:1; /* just one bit is needed */
+	uint8_t		 optimized_parms:1;
 };
 
 static inline struct ftype *tag__ftype(const struct tag *tag)