diff mbox series

[03/23] perf dwarf-aux: Handle type transfer for memory access

Message ID 20240319055115.4063940-4-namhyung@kernel.org (mailing list archive)
State New
Headers show
Series Remaining bits of data type profiling (v7) | expand

Commit Message

Namhyung Kim March 19, 2024, 5:50 a.m. UTC
We want to track type states as instructions are executed.  Each
instruction can access compound types like struct or union and load/
store its members to a different location.

The die_deref_ptr_type() is to find a type of memory access with a
pointer variable.  If it points to a compound type like struct, the
target memory is a member in the struct.  The access will happen
with an offset indicating which member it refers.  Let's follow the
DWARF info to figure out the type of the pointer target.

For example, say we have the following code.

  struct foo {
    int a;
    int b;
  };

  struct foo *p = malloc(sizeof(*p));
  p->b = 0;

The last pointer access should produce x86 asm like below:

  mov  0x0, 4(%rbx)

And we know %rbx register has a pointer to struct foo.  Then offset 4
should return the debug info of member 'b'.

Also variables of compound types can be accessed directly without a
pointer.  The die_get_member_type() is to handle a such case.

Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/util/dwarf-aux.c | 110 ++++++++++++++++++++++++++++++++++++
 tools/perf/util/dwarf-aux.h |   6 ++
 2 files changed, 116 insertions(+)

Comments

Arnaldo Carvalho de Melo March 19, 2024, 1:55 p.m. UTC | #1
On Mon, Mar 18, 2024 at 10:50:55PM -0700, Namhyung Kim wrote:
> We want to track type states as instructions are executed.  Each
> instruction can access compound types like struct or union and load/
> store its members to a different location.
> 
> The die_deref_ptr_type() is to find a type of memory access with a
> pointer variable.  If it points to a compound type like struct, the
> target memory is a member in the struct.  The access will happen
> with an offset indicating which member it refers.  Let's follow the
> DWARF info to figure out the type of the pointer target.
> 
> For example, say we have the following code.
> 
>   struct foo {
>     int a;
>     int b;
>   };
> 
>   struct foo *p = malloc(sizeof(*p));
>   p->b = 0;
> 
> The last pointer access should produce x86 asm like below:
> 
>   mov  0x0, 4(%rbx)
> 
> And we know %rbx register has a pointer to struct foo.  Then offset 4
> should return the debug info of member 'b'.
> 
> Also variables of compound types can be accessed directly without a
> pointer.  The die_get_member_type() is to handle a such case.
> 
> Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> ---
>  tools/perf/util/dwarf-aux.c | 110 ++++++++++++++++++++++++++++++++++++
>  tools/perf/util/dwarf-aux.h |   6 ++
>  2 files changed, 116 insertions(+)
> 
> diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
> index 785aa7a3d725..cd9364d296b6 100644
> --- a/tools/perf/util/dwarf-aux.c
> +++ b/tools/perf/util/dwarf-aux.c
> @@ -1838,3 +1838,113 @@ int die_get_scopes(Dwarf_Die *cu_die, Dwarf_Addr pc, Dwarf_Die **scopes)
>  	*scopes = data.scopes;
>  	return data.nr;
>  }
> +
> +static int __die_find_member_offset_cb(Dwarf_Die *die_mem, void *arg)
> +{
> +	Dwarf_Die type_die;
> +	Dwarf_Word size, loc;
> +	Dwarf_Word offset = (long)arg;
> +	int tag = dwarf_tag(die_mem);
> +
> +	if (tag != DW_TAG_member)
> +		return DIE_FIND_CB_SIBLING;
> +
> +	/* Unions might not have location */
> +	if (die_get_data_member_location(die_mem, &loc) < 0)
> +		loc = 0;
> +
> +	if (offset == loc)
> +		return DIE_FIND_CB_END;
> +
> +	die_get_real_type(die_mem, &type_die);

Don't we have to check that the type_die was initialized? From what I
can see there is the possibility it isn't:

  Dwarf_Die type_die;
  die_get_real_type(die_mem, &type_die);
        do {
                vr_die = __die_get_real_type(vr_die, die_mem);
        } while (vr_die && dwarf_tag(vr_die) == DW_TAG_typedef);
        
        return vr_die;

    static Dwarf_Die *__die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)

        do {
                vr_die = die_get_type(vr_die, die_mem);
                if (!vr_die)
                        break;

   Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)

        Dwarf_Attribute attr;

        if (dwarf_attr_integrate(vr_die, DW_AT_type, &attr) &&
            dwarf_formref_die(&attr, die_mem))
                return die_mem;
        else
                return NULL;
  } 

> +	if (dwarf_aggregate_size(&type_die, &size) < 0)
> +		size = 0;
> +
> +	if (loc < offset && offset < (loc + size))
> +		return DIE_FIND_CB_END;
> +
> +	return DIE_FIND_CB_SIBLING;
> +}
> +
> +/**
> + * die_get_member_type - Return type info of struct member
> + * @type_die: a type DIE
> + * @offset: offset in the type
> + * @die_mem: a buffer to save the resulting DIE
> + *
> + * This function returns a type of a member in @type_die where it's located at
> + * @offset if it's a struct.  For now, it just returns the first matching
> + * member in a union.  For other types, it'd return the given type directly
> + * if it's within the size of the type or NULL otherwise.
> + */
> +Dwarf_Die *die_get_member_type(Dwarf_Die *type_die, int offset,
> +			       Dwarf_Die *die_mem)
> +{
> +	Dwarf_Die *member;
> +	Dwarf_Die mb_type;
> +	int tag;
> +
> +	tag = dwarf_tag(type_die);
> +	/* If it's not a compound type, return the type directly */
> +	if (tag != DW_TAG_structure_type && tag != DW_TAG_union_type) {
> +		Dwarf_Word size;
> +
> +		if (dwarf_aggregate_size(type_die, &size) < 0)
> +			size = 0;
> +
> +		if ((unsigned)offset >= size)
> +			return NULL;
> +
> +		*die_mem = *type_die;
> +		return die_mem;
> +	}
> +
> +	mb_type = *type_die;
> +	/* TODO: Handle union types better? */
> +	while (tag == DW_TAG_structure_type || tag == DW_TAG_union_type) {
> +		member = die_find_child(&mb_type, __die_find_member_offset_cb,
> +					(void *)(long)offset, die_mem);
> +		if (member == NULL)
> +			return NULL;
> +
> +		if (die_get_real_type(member, &mb_type) == NULL)
> +			return NULL;
> +
> +		tag = dwarf_tag(&mb_type);
> +
> +		if (tag == DW_TAG_structure_type || tag == DW_TAG_union_type) {
> +			Dwarf_Word loc;
> +
> +			/* Update offset for the start of the member struct */
> +			if (die_get_data_member_location(member, &loc) == 0)
> +				offset -= loc;
> +		}
> +	}
> +	*die_mem = mb_type;
> +	return die_mem;
> +}
> +
> +/**
> + * die_deref_ptr_type - Return type info for pointer access
> + * @ptr_die: a pointer type DIE
> + * @offset: access offset for the pointer
> + * @die_mem: a buffer to save the resulting DIE
> + *
> + * This function follows the pointer in @ptr_die with given @offset
> + * and saves the resulting type in @die_mem.  If the pointer points
> + * a struct type, actual member at the offset would be returned.
> + */
> +Dwarf_Die *die_deref_ptr_type(Dwarf_Die *ptr_die, int offset,
> +			      Dwarf_Die *die_mem)
> +{
> +	Dwarf_Die type_die;
> +
> +	if (dwarf_tag(ptr_die) != DW_TAG_pointer_type)
> +		return NULL;
> +
> +	if (die_get_real_type(ptr_die, &type_die) == NULL)
> +		return NULL;
> +
> +	return die_get_member_type(&type_die, offset, die_mem);
> +}
> diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h
> index cd171b06fd4c..16c916311bc0 100644
> --- a/tools/perf/util/dwarf-aux.h
> +++ b/tools/perf/util/dwarf-aux.h
> @@ -144,6 +144,12 @@ struct die_var_type {
>  	int offset;
>  };
>  
> +/* Return type info of a member at offset */
> +Dwarf_Die *die_get_member_type(Dwarf_Die *type_die, int offset, Dwarf_Die *die_mem);
> +
> +/* Return type info where the pointer and offset point to */
> +Dwarf_Die *die_deref_ptr_type(Dwarf_Die *ptr_die, int offset, Dwarf_Die *die_mem);
> +
>  #ifdef HAVE_DWARF_GETLOCATIONS_SUPPORT
>  
>  /* Get byte offset range of given variable DIE */
> -- 
> 2.44.0.291.gc1ea87d7ee-goog
Namhyung Kim March 19, 2024, 5:41 p.m. UTC | #2
On Tue, Mar 19, 2024 at 6:55 AM Arnaldo Carvalho de Melo
<acme@kernel.org> wrote:
>
> On Mon, Mar 18, 2024 at 10:50:55PM -0700, Namhyung Kim wrote:
> > We want to track type states as instructions are executed.  Each
> > instruction can access compound types like struct or union and load/
> > store its members to a different location.
> >
> > The die_deref_ptr_type() is to find a type of memory access with a
> > pointer variable.  If it points to a compound type like struct, the
> > target memory is a member in the struct.  The access will happen
> > with an offset indicating which member it refers.  Let's follow the
> > DWARF info to figure out the type of the pointer target.
> >
> > For example, say we have the following code.
> >
> >   struct foo {
> >     int a;
> >     int b;
> >   };
> >
> >   struct foo *p = malloc(sizeof(*p));
> >   p->b = 0;
> >
> > The last pointer access should produce x86 asm like below:
> >
> >   mov  0x0, 4(%rbx)
> >
> > And we know %rbx register has a pointer to struct foo.  Then offset 4
> > should return the debug info of member 'b'.
> >
> > Also variables of compound types can be accessed directly without a
> > pointer.  The die_get_member_type() is to handle a such case.
> >
> > Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
> > Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> > ---
> >  tools/perf/util/dwarf-aux.c | 110 ++++++++++++++++++++++++++++++++++++
> >  tools/perf/util/dwarf-aux.h |   6 ++
> >  2 files changed, 116 insertions(+)
> >
> > diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
> > index 785aa7a3d725..cd9364d296b6 100644
> > --- a/tools/perf/util/dwarf-aux.c
> > +++ b/tools/perf/util/dwarf-aux.c
> > @@ -1838,3 +1838,113 @@ int die_get_scopes(Dwarf_Die *cu_die, Dwarf_Addr pc, Dwarf_Die **scopes)
> >       *scopes = data.scopes;
> >       return data.nr;
> >  }
> > +
> > +static int __die_find_member_offset_cb(Dwarf_Die *die_mem, void *arg)
> > +{
> > +     Dwarf_Die type_die;
> > +     Dwarf_Word size, loc;
> > +     Dwarf_Word offset = (long)arg;
> > +     int tag = dwarf_tag(die_mem);
> > +
> > +     if (tag != DW_TAG_member)
> > +             return DIE_FIND_CB_SIBLING;
> > +
> > +     /* Unions might not have location */
> > +     if (die_get_data_member_location(die_mem, &loc) < 0)
> > +             loc = 0;
> > +
> > +     if (offset == loc)
> > +             return DIE_FIND_CB_END;
> > +
> > +     die_get_real_type(die_mem, &type_die);
>
> Don't we have to check that the type_die was initialized? From what I
> can see there is the possibility it isn't:

Right, it might return NULL if the entry doesn't have a valid
DW_AT_type.  I'll check the return value and skip.

Thanks,
Namhyung

>
>   Dwarf_Die type_die;
>   die_get_real_type(die_mem, &type_die);
>         do {
>                 vr_die = __die_get_real_type(vr_die, die_mem);
>         } while (vr_die && dwarf_tag(vr_die) == DW_TAG_typedef);
>
>         return vr_die;
>
>     static Dwarf_Die *__die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
>
>         do {
>                 vr_die = die_get_type(vr_die, die_mem);
>                 if (!vr_die)
>                         break;
>
>    Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
>
>         Dwarf_Attribute attr;
>
>         if (dwarf_attr_integrate(vr_die, DW_AT_type, &attr) &&
>             dwarf_formref_die(&attr, die_mem))
>                 return die_mem;
>         else
>                 return NULL;
>   }
>
> > +     if (dwarf_aggregate_size(&type_die, &size) < 0)
> > +             size = 0;
> > +
> > +     if (loc < offset && offset < (loc + size))
> > +             return DIE_FIND_CB_END;
> > +
> > +     return DIE_FIND_CB_SIBLING;
> > +}
> > +
> > +/**
> > + * die_get_member_type - Return type info of struct member
> > + * @type_die: a type DIE
> > + * @offset: offset in the type
> > + * @die_mem: a buffer to save the resulting DIE
> > + *
> > + * This function returns a type of a member in @type_die where it's located at
> > + * @offset if it's a struct.  For now, it just returns the first matching
> > + * member in a union.  For other types, it'd return the given type directly
> > + * if it's within the size of the type or NULL otherwise.
> > + */
> > +Dwarf_Die *die_get_member_type(Dwarf_Die *type_die, int offset,
> > +                            Dwarf_Die *die_mem)
> > +{
> > +     Dwarf_Die *member;
> > +     Dwarf_Die mb_type;
> > +     int tag;
> > +
> > +     tag = dwarf_tag(type_die);
> > +     /* If it's not a compound type, return the type directly */
> > +     if (tag != DW_TAG_structure_type && tag != DW_TAG_union_type) {
> > +             Dwarf_Word size;
> > +
> > +             if (dwarf_aggregate_size(type_die, &size) < 0)
> > +                     size = 0;
> > +
> > +             if ((unsigned)offset >= size)
> > +                     return NULL;
> > +
> > +             *die_mem = *type_die;
> > +             return die_mem;
> > +     }
> > +
> > +     mb_type = *type_die;
> > +     /* TODO: Handle union types better? */
> > +     while (tag == DW_TAG_structure_type || tag == DW_TAG_union_type) {
> > +             member = die_find_child(&mb_type, __die_find_member_offset_cb,
> > +                                     (void *)(long)offset, die_mem);
> > +             if (member == NULL)
> > +                     return NULL;
> > +
> > +             if (die_get_real_type(member, &mb_type) == NULL)
> > +                     return NULL;
> > +
> > +             tag = dwarf_tag(&mb_type);
> > +
> > +             if (tag == DW_TAG_structure_type || tag == DW_TAG_union_type) {
> > +                     Dwarf_Word loc;
> > +
> > +                     /* Update offset for the start of the member struct */
> > +                     if (die_get_data_member_location(member, &loc) == 0)
> > +                             offset -= loc;
> > +             }
> > +     }
> > +     *die_mem = mb_type;
> > +     return die_mem;
> > +}
> > +
> > +/**
> > + * die_deref_ptr_type - Return type info for pointer access
> > + * @ptr_die: a pointer type DIE
> > + * @offset: access offset for the pointer
> > + * @die_mem: a buffer to save the resulting DIE
> > + *
> > + * This function follows the pointer in @ptr_die with given @offset
> > + * and saves the resulting type in @die_mem.  If the pointer points
> > + * a struct type, actual member at the offset would be returned.
> > + */
> > +Dwarf_Die *die_deref_ptr_type(Dwarf_Die *ptr_die, int offset,
> > +                           Dwarf_Die *die_mem)
> > +{
> > +     Dwarf_Die type_die;
> > +
> > +     if (dwarf_tag(ptr_die) != DW_TAG_pointer_type)
> > +             return NULL;
> > +
> > +     if (die_get_real_type(ptr_die, &type_die) == NULL)
> > +             return NULL;
> > +
> > +     return die_get_member_type(&type_die, offset, die_mem);
> > +}
> > diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h
> > index cd171b06fd4c..16c916311bc0 100644
> > --- a/tools/perf/util/dwarf-aux.h
> > +++ b/tools/perf/util/dwarf-aux.h
> > @@ -144,6 +144,12 @@ struct die_var_type {
> >       int offset;
> >  };
> >
> > +/* Return type info of a member at offset */
> > +Dwarf_Die *die_get_member_type(Dwarf_Die *type_die, int offset, Dwarf_Die *die_mem);
> > +
> > +/* Return type info where the pointer and offset point to */
> > +Dwarf_Die *die_deref_ptr_type(Dwarf_Die *ptr_die, int offset, Dwarf_Die *die_mem);
> > +
> >  #ifdef HAVE_DWARF_GETLOCATIONS_SUPPORT
> >
> >  /* Get byte offset range of given variable DIE */
> > --
> > 2.44.0.291.gc1ea87d7ee-goog
diff mbox series

Patch

diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
index 785aa7a3d725..cd9364d296b6 100644
--- a/tools/perf/util/dwarf-aux.c
+++ b/tools/perf/util/dwarf-aux.c
@@ -1838,3 +1838,113 @@  int die_get_scopes(Dwarf_Die *cu_die, Dwarf_Addr pc, Dwarf_Die **scopes)
 	*scopes = data.scopes;
 	return data.nr;
 }
+
+static int __die_find_member_offset_cb(Dwarf_Die *die_mem, void *arg)
+{
+	Dwarf_Die type_die;
+	Dwarf_Word size, loc;
+	Dwarf_Word offset = (long)arg;
+	int tag = dwarf_tag(die_mem);
+
+	if (tag != DW_TAG_member)
+		return DIE_FIND_CB_SIBLING;
+
+	/* Unions might not have location */
+	if (die_get_data_member_location(die_mem, &loc) < 0)
+		loc = 0;
+
+	if (offset == loc)
+		return DIE_FIND_CB_END;
+
+	die_get_real_type(die_mem, &type_die);
+
+	if (dwarf_aggregate_size(&type_die, &size) < 0)
+		size = 0;
+
+	if (loc < offset && offset < (loc + size))
+		return DIE_FIND_CB_END;
+
+	return DIE_FIND_CB_SIBLING;
+}
+
+/**
+ * die_get_member_type - Return type info of struct member
+ * @type_die: a type DIE
+ * @offset: offset in the type
+ * @die_mem: a buffer to save the resulting DIE
+ *
+ * This function returns a type of a member in @type_die where it's located at
+ * @offset if it's a struct.  For now, it just returns the first matching
+ * member in a union.  For other types, it'd return the given type directly
+ * if it's within the size of the type or NULL otherwise.
+ */
+Dwarf_Die *die_get_member_type(Dwarf_Die *type_die, int offset,
+			       Dwarf_Die *die_mem)
+{
+	Dwarf_Die *member;
+	Dwarf_Die mb_type;
+	int tag;
+
+	tag = dwarf_tag(type_die);
+	/* If it's not a compound type, return the type directly */
+	if (tag != DW_TAG_structure_type && tag != DW_TAG_union_type) {
+		Dwarf_Word size;
+
+		if (dwarf_aggregate_size(type_die, &size) < 0)
+			size = 0;
+
+		if ((unsigned)offset >= size)
+			return NULL;
+
+		*die_mem = *type_die;
+		return die_mem;
+	}
+
+	mb_type = *type_die;
+	/* TODO: Handle union types better? */
+	while (tag == DW_TAG_structure_type || tag == DW_TAG_union_type) {
+		member = die_find_child(&mb_type, __die_find_member_offset_cb,
+					(void *)(long)offset, die_mem);
+		if (member == NULL)
+			return NULL;
+
+		if (die_get_real_type(member, &mb_type) == NULL)
+			return NULL;
+
+		tag = dwarf_tag(&mb_type);
+
+		if (tag == DW_TAG_structure_type || tag == DW_TAG_union_type) {
+			Dwarf_Word loc;
+
+			/* Update offset for the start of the member struct */
+			if (die_get_data_member_location(member, &loc) == 0)
+				offset -= loc;
+		}
+	}
+	*die_mem = mb_type;
+	return die_mem;
+}
+
+/**
+ * die_deref_ptr_type - Return type info for pointer access
+ * @ptr_die: a pointer type DIE
+ * @offset: access offset for the pointer
+ * @die_mem: a buffer to save the resulting DIE
+ *
+ * This function follows the pointer in @ptr_die with given @offset
+ * and saves the resulting type in @die_mem.  If the pointer points
+ * a struct type, actual member at the offset would be returned.
+ */
+Dwarf_Die *die_deref_ptr_type(Dwarf_Die *ptr_die, int offset,
+			      Dwarf_Die *die_mem)
+{
+	Dwarf_Die type_die;
+
+	if (dwarf_tag(ptr_die) != DW_TAG_pointer_type)
+		return NULL;
+
+	if (die_get_real_type(ptr_die, &type_die) == NULL)
+		return NULL;
+
+	return die_get_member_type(&type_die, offset, die_mem);
+}
diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h
index cd171b06fd4c..16c916311bc0 100644
--- a/tools/perf/util/dwarf-aux.h
+++ b/tools/perf/util/dwarf-aux.h
@@ -144,6 +144,12 @@  struct die_var_type {
 	int offset;
 };
 
+/* Return type info of a member at offset */
+Dwarf_Die *die_get_member_type(Dwarf_Die *type_die, int offset, Dwarf_Die *die_mem);
+
+/* Return type info where the pointer and offset point to */
+Dwarf_Die *die_deref_ptr_type(Dwarf_Die *ptr_die, int offset, Dwarf_Die *die_mem);
+
 #ifdef HAVE_DWARF_GETLOCATIONS_SUPPORT
 
 /* Get byte offset range of given variable DIE */