diff mbox series

[V7,4/4] Adjust objdump start/end range per map pgoff parameter

Message ID d730ad217936d952008edf703ba4c34393d89dad.1725573309.git.scclevenger@os.amperecomputing.com (mailing list archive)
State New, archived
Headers show
Series arm-cs-trace-disasm.py/perf must accommodate non-zero DSO text offset | expand

Commit Message

Steve Clevenger Sept. 5, 2024, 10:28 p.m. UTC
Extract map_pgoff parameter from the dictionary, and adjust start/end
range passed to objdump based on the value.

A zero start_addr is filtered to prevent output of dso address range
check failures. This script repeatedly sees a zero value passed
in for
      start_addr = cpu_data[str(cpu) + 'addr']

These zero values are not a new problem. The start_addr/stop_addr warning
clutters the instruction trace output, hence this change.

Signed-off-by: Steve Clevenger <scclevenger@os.amperecomputing.com>
---
 tools/perf/scripts/python/arm-cs-trace-disasm.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

Comments

Leo Yan Sept. 6, 2024, 11:27 a.m. UTC | #1
On 9/5/24 23:28, Steve Clevenger wrote:
> 
> Extract map_pgoff parameter from the dictionary, and adjust start/end
> range passed to objdump based on the value.
> 
> A zero start_addr is filtered to prevent output of dso address range
> check failures. This script repeatedly sees a zero value passed
> in for
>       start_addr = cpu_data[str(cpu) + 'addr']
> 
> These zero values are not a new problem. The start_addr/stop_addr warning
> clutters the instruction trace output, hence this change.
> 
> Signed-off-by: Steve Clevenger <scclevenger@os.amperecomputing.com>
> ---
>   tools/perf/scripts/python/arm-cs-trace-disasm.py | 14 +++++++++-----
>   1 file changed, 9 insertions(+), 5 deletions(-)
> 
> diff --git a/tools/perf/scripts/python/arm-cs-trace-disasm.py b/tools/perf/scripts/python/arm-cs-trace-disasm.py
> index 7aff02d84ffb..a867e0db02b8 100755
> --- a/tools/perf/scripts/python/arm-cs-trace-disasm.py
> +++ b/tools/perf/scripts/python/arm-cs-trace-disasm.py
> @@ -187,6 +187,7 @@ def process_event(param_dict):
>          dso_start = get_optional(param_dict, "dso_map_start")
>          dso_end = get_optional(param_dict, "dso_map_end")
>          symbol = get_optional(param_dict, "symbol")
> +       map_pgoff = get_optional(param_dict, "map_pgoff")


I am concerned the two sentences below are inconsistence: one uses
'start_addr + map_pgoff' and the other uses 'start_addr + int(map_pgoff)'.

Here about below code?

            map_pgoff_str = get_optional(param_dict, "map_pgoff")
	   if map_pgoff_str == "":
		map_pgoff = 0
            else:
		map_pgoff = int(map_pgoff_str)

With above change, 'map_pgoff' is an int type. As a result, the changes
below can simply add 'map_pgoff'.

With these changes, LGTM:

Reviewed-by: Leo Yan <leo.yan@arm.com>


>          cpu = sample["cpu"]
>          ip = sample["ip"]
> @@ -243,9 +244,11 @@ def process_event(param_dict):
>          # Record for previous sample packet
>          cpu_data[str(cpu) + 'addr'] = addr
> 
> -       # Handle CS_ETM_TRACE_ON packet if start_addr=0 and stop_addr=4
> -       if (start_addr == 0 and stop_addr == 4):
> -               print("CPU%d: CS_ETM_TRACE_ON packet is inserted" % cpu)
> +       # Filter out zero start_address. Optionally identify CS_ETM_TRACE_ON packet
> +       # if start_addr=0 and stop_addr=4.
> +       if (start_addr == 0):
> +               if ((stop_addr == 4) and (options.verbose == True)):
> +                       print("CPU%d: CS_ETM_TRACE_ON packet is inserted" % cpu)
>                  return
> 
>          if (start_addr < int(dso_start) or start_addr > int(dso_end)):
> @@ -262,13 +265,14 @@ def process_event(param_dict):
>                  # vm_start to zero.
>                  if (dso == "[kernel.kallsyms]" or dso_start == 0x400000):
>                          dso_vm_start = 0
> +                       map_pgoff = 0
>                  else:
>                          dso_vm_start = int(dso_start)
> 
>                  dso_fname = get_dso_file_path(dso, dso_bid)
>                  if path.exists(dso_fname):
> -                       print_disam(dso_fname, dso_vm_start, start_addr, stop_addr)
> +                       print_disam(dso_fname, dso_vm_start, start_addr + map_pgoff, stop_addr + map_pgoff)
>                  else:
> -                       print("Failed to find dso %s for address range [ 0x%x .. 0x%x ]" % (dso, start_addr, stop_addr))
> +                       print("Failed to find dso %s for address range [ 0x%x .. 0x%x ]" % (dso, start_addr + int(map_pgoff), stop_addr + int(map_pgoff)))

> 
>          print_srccode(comm, param_dict, sample, symbol, dso)
> --
> 2.44.0
>
Steve Clevenger Sept. 6, 2024, 5:27 p.m. UTC | #2
On 9/6/2024 4:27 AM, Leo Yan wrote:
> 
> 
> On 9/5/24 23:28, Steve Clevenger wrote:
>>
>> Extract map_pgoff parameter from the dictionary, and adjust start/end
>> range passed to objdump based on the value.
>>
>> A zero start_addr is filtered to prevent output of dso address range
>> check failures. This script repeatedly sees a zero value passed
>> in for
>>       start_addr = cpu_data[str(cpu) + 'addr']
>>
>> These zero values are not a new problem. The start_addr/stop_addr warning
>> clutters the instruction trace output, hence this change.
>>
>> Signed-off-by: Steve Clevenger <scclevenger@os.amperecomputing.com>
>> ---
>>   tools/perf/scripts/python/arm-cs-trace-disasm.py | 14 +++++++++-----
>>   1 file changed, 9 insertions(+), 5 deletions(-)
>>
>> diff --git a/tools/perf/scripts/python/arm-cs-trace-disasm.py b/tools/
>> perf/scripts/python/arm-cs-trace-disasm.py
>> index 7aff02d84ffb..a867e0db02b8 100755
>> --- a/tools/perf/scripts/python/arm-cs-trace-disasm.py
>> +++ b/tools/perf/scripts/python/arm-cs-trace-disasm.py
>> @@ -187,6 +187,7 @@ def process_event(param_dict):
>>          dso_start = get_optional(param_dict, "dso_map_start")
>>          dso_end = get_optional(param_dict, "dso_map_end")
>>          symbol = get_optional(param_dict, "symbol")
>> +       map_pgoff = get_optional(param_dict, "map_pgoff")
> 
> 
> I am concerned the two sentences below are inconsistence: one uses
> 'start_addr + map_pgoff' and the other uses 'start_addr + int(map_pgoff)'.
> 

Valid point. It's working fine as is, but how is it even working? I look
at print_disam/read_disasm, and no type conversion is done in either
call. The dso_start parameter for these calls is actually dso_vm_start
which is the dso_start integer conversion.

> Here about below code?
> 
>            map_pgoff_str = get_optional(param_dict, "map_pgoff")
>        if map_pgoff_str == "":
>         map_pgoff = 0
>            else:
>         map_pgoff = int(map_pgoff_str)
> 
> With above change, 'map_pgoff' is an int type. As a result, the changes
> below can simply add 'map_pgoff'.

I propose:
	map_pgoff_str = get_optional(param_dict, "map_pgoff"
	if (map_pgoff_str.isdigit()):
	    map_pgoff = int(map_pgoff)
	else:
	    map_pgoff = 0

The int() conversions in the print() statement can then be removed.

Steve

> With these changes, LGTM:
> 
> Reviewed-by: Leo Yan <leo.yan@arm.com>
> 
> 
>>          cpu = sample["cpu"]
>>          ip = sample["ip"]
>> @@ -243,9 +244,11 @@ def process_event(param_dict):
>>          # Record for previous sample packet
>>          cpu_data[str(cpu) + 'addr'] = addr
>>
>> -       # Handle CS_ETM_TRACE_ON packet if start_addr=0 and stop_addr=4
>> -       if (start_addr == 0 and stop_addr == 4):
>> -               print("CPU%d: CS_ETM_TRACE_ON packet is inserted" % cpu)
>> +       # Filter out zero start_address. Optionally identify
>> CS_ETM_TRACE_ON packet
>> +       # if start_addr=0 and stop_addr=4.
>> +       if (start_addr == 0):
>> +               if ((stop_addr == 4) and (options.verbose == True)):
>> +                       print("CPU%d: CS_ETM_TRACE_ON packet is
>> inserted" % cpu)
>>                  return
>>
>>          if (start_addr < int(dso_start) or start_addr > int(dso_end)):
>> @@ -262,13 +265,14 @@ def process_event(param_dict):
>>                  # vm_start to zero.
>>                  if (dso == "[kernel.kallsyms]" or dso_start ==
>> 0x400000):
>>                          dso_vm_start = 0
>> +                       map_pgoff = 0
>>                  else:
>>                          dso_vm_start = int(dso_start)
>>
>>                  dso_fname = get_dso_file_path(dso, dso_bid)
>>                  if path.exists(dso_fname):
>> -                       print_disam(dso_fname, dso_vm_start,
>> start_addr, stop_addr)
>> +                       print_disam(dso_fname, dso_vm_start,
>> start_addr + map_pgoff, stop_addr + map_pgoff)
>>                  else:
>> -                       print("Failed to find dso %s for address range
>> [ 0x%x .. 0x%x ]" % (dso, start_addr, stop_addr))
>> +                       print("Failed to find dso %s for address range
>> [ 0x%x .. 0x%x ]" % (dso, start_addr + int(map_pgoff), stop_addr +
>> int(map_pgoff)))
> 
>>
>>          print_srccode(comm, param_dict, sample, symbol, dso)
>> -- 
>> 2.44.0
>>
Steve Clevenger Sept. 6, 2024, 11:20 p.m. UTC | #3
On 9/6/2024 10:27 AM, Steve Clevenger wrote:
> 
> 
> On 9/6/2024 4:27 AM, Leo Yan wrote:
>>
>>
>> On 9/5/24 23:28, Steve Clevenger wrote:
>>>
>>> Extract map_pgoff parameter from the dictionary, and adjust start/end
>>> range passed to objdump based on the value.
>>>
>>> A zero start_addr is filtered to prevent output of dso address range
>>> check failures. This script repeatedly sees a zero value passed
>>> in for
>>>       start_addr = cpu_data[str(cpu) + 'addr']
>>>
>>> These zero values are not a new problem. The start_addr/stop_addr warning
>>> clutters the instruction trace output, hence this change.
>>>
>>> Signed-off-by: Steve Clevenger <scclevenger@os.amperecomputing.com>
>>> ---
>>>   tools/perf/scripts/python/arm-cs-trace-disasm.py | 14 +++++++++-----
>>>   1 file changed, 9 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/tools/perf/scripts/python/arm-cs-trace-disasm.py b/tools/
>>> perf/scripts/python/arm-cs-trace-disasm.py
>>> index 7aff02d84ffb..a867e0db02b8 100755
>>> --- a/tools/perf/scripts/python/arm-cs-trace-disasm.py
>>> +++ b/tools/perf/scripts/python/arm-cs-trace-disasm.py
>>> @@ -187,6 +187,7 @@ def process_event(param_dict):
>>>          dso_start = get_optional(param_dict, "dso_map_start")
>>>          dso_end = get_optional(param_dict, "dso_map_end")
>>>          symbol = get_optional(param_dict, "symbol")
>>> +       map_pgoff = get_optional(param_dict, "map_pgoff")
>>
>>
>> I am concerned the two sentences below are inconsistence: one uses
>> 'start_addr + map_pgoff' and the other uses 'start_addr + int(map_pgoff)'.
>>
> 
> Valid point. It's working fine as is, but how is it even working? I look
> at print_disam/read_disasm, and no type conversion is done in either
> call. The dso_start parameter for these calls is actually dso_vm_start
> which is the dso_start integer conversion.

Python thinks the map_pgoff object is already an integer. For example,
	map_pgoff = get_optional(param_dict, "map_pgoff")
	print("%d" % map_pgoff.isdigit())

AttributeError: 'int' object has no attribute 'isdigit'
Fatal Python error: handler_call die: problem in Python trace event handler

Converting map_pgoff to a string works in the print statement.

print("%d" % str(map_pgoff).isdigit())
1

I'm not sure, but it's possible get_optional() during some call had
converted it to '[unknown]' which would cause a problem.

I can explicitly force to integer.

Steve

> 
>> Here about below code?
>>
>>            map_pgoff_str = get_optional(param_dict, "map_pgoff")
>>        if map_pgoff_str == "":
>>         map_pgoff = 0
>>            else:
>>         map_pgoff = int(map_pgoff_str)
>>
>> With above change, 'map_pgoff' is an int type. As a result, the changes
>> below can simply add 'map_pgoff'.
> 
> I propose:
> 	map_pgoff_str = get_optional(param_dict, "map_pgoff"
> 	if (map_pgoff_str.isdigit()):
> 	    map_pgoff = int(map_pgoff)
> 	else:
> 	    map_pgoff = 0
> 
> The int() conversions in the print() statement can then be removed.
> 
> Steve
> 
>> With these changes, LGTM:
>>
>> Reviewed-by: Leo Yan <leo.yan@arm.com>
>>
>>
>>>          cpu = sample["cpu"]
>>>          ip = sample["ip"]
>>> @@ -243,9 +244,11 @@ def process_event(param_dict):
>>>          # Record for previous sample packet
>>>          cpu_data[str(cpu) + 'addr'] = addr
>>>
>>> -       # Handle CS_ETM_TRACE_ON packet if start_addr=0 and stop_addr=4
>>> -       if (start_addr == 0 and stop_addr == 4):
>>> -               print("CPU%d: CS_ETM_TRACE_ON packet is inserted" % cpu)
>>> +       # Filter out zero start_address. Optionally identify
>>> CS_ETM_TRACE_ON packet
>>> +       # if start_addr=0 and stop_addr=4.
>>> +       if (start_addr == 0):
>>> +               if ((stop_addr == 4) and (options.verbose == True)):
>>> +                       print("CPU%d: CS_ETM_TRACE_ON packet is
>>> inserted" % cpu)
>>>                  return
>>>
>>>          if (start_addr < int(dso_start) or start_addr > int(dso_end)):
>>> @@ -262,13 +265,14 @@ def process_event(param_dict):
>>>                  # vm_start to zero.
>>>                  if (dso == "[kernel.kallsyms]" or dso_start ==
>>> 0x400000):
>>>                          dso_vm_start = 0
>>> +                       map_pgoff = 0
>>>                  else:
>>>                          dso_vm_start = int(dso_start)
>>>
>>>                  dso_fname = get_dso_file_path(dso, dso_bid)
>>>                  if path.exists(dso_fname):
>>> -                       print_disam(dso_fname, dso_vm_start,
>>> start_addr, stop_addr)
>>> +                       print_disam(dso_fname, dso_vm_start,
>>> start_addr + map_pgoff, stop_addr + map_pgoff)
>>>                  else:
>>> -                       print("Failed to find dso %s for address range
>>> [ 0x%x .. 0x%x ]" % (dso, start_addr, stop_addr))
>>> +                       print("Failed to find dso %s for address range
>>> [ 0x%x .. 0x%x ]" % (dso, start_addr + int(map_pgoff), stop_addr +
>>> int(map_pgoff)))
>>
>>>
>>>          print_srccode(comm, param_dict, sample, symbol, dso)
>>> -- 
>>> 2.44.0
>>>
> 
> _______________________________________________
> CoreSight mailing list -- coresight@lists.linaro.org
> To unsubscribe send an email to coresight-leave@lists.linaro.org
Leo Yan Sept. 9, 2024, 8:33 p.m. UTC | #4
On 9/7/2024 12:20 AM, Steve Clevenger wrote:

[...]

>>>> diff --git a/tools/perf/scripts/python/arm-cs-trace-disasm.py b/tools/
>>>> perf/scripts/python/arm-cs-trace-disasm.py
>>>> index 7aff02d84ffb..a867e0db02b8 100755
>>>> --- a/tools/perf/scripts/python/arm-cs-trace-disasm.py
>>>> +++ b/tools/perf/scripts/python/arm-cs-trace-disasm.py
>>>> @@ -187,6 +187,7 @@ def process_event(param_dict):
>>>>          dso_start = get_optional(param_dict, "dso_map_start")
>>>>          dso_end = get_optional(param_dict, "dso_map_end")
>>>>          symbol = get_optional(param_dict, "symbol")
>>>> +       map_pgoff = get_optional(param_dict, "map_pgoff")
>>>
>>>
>>> I am concerned the two sentences below are inconsistence: one uses
>>> 'start_addr + map_pgoff' and the other uses 'start_addr + int(map_pgoff)'.
>>>
>>
>> Valid point. It's working fine as is, but how is it even working? I look
>> at print_disam/read_disasm, and no type conversion is done in either
>> call. The dso_start parameter for these calls is actually dso_vm_start
>> which is the dso_start integer conversion.

I agreed with you. After reading code, I have same conclusion that we don't
need to type conversion to int type.

> Python thinks the map_pgoff object is already an integer. For example,
>         map_pgoff = get_optional(param_dict, "map_pgoff")
>         print("%d" % map_pgoff.isdigit())
> 
> AttributeError: 'int' object has no attribute 'isdigit'
> Fatal Python error: handler_call die: problem in Python trace event handler
> 
> Converting map_pgoff to a string works in the print statement.
> 
> print("%d" % str(map_pgoff).isdigit())
> 1
> 
> I'm not sure, but it's possible get_optional() during some call had
> converted it to '[unknown]' which would cause a problem.
> 
> I can explicitly force to integer.

For backward compatibility, we can use below code:

   map_pgoff = get_optional(param_dict, "map_pgoff")
   if (map_pgoff == '[unknown]'):
       map_pgoff = 0

Then in the later flow, we should can always use "map_pgoff" as an int type.

I struggled for James reported issue.

The variables “map_pgoff,” “dso_start,” and “dso_end” are set together in the
Python engine. All of them should be of type int if the DSO is found, or all
should be ‘[unknown]’ if the DSO is missing. We have checked the types for
“dso_start” and “dso_end”, and if either is ‘[unknown]’ the flow will directly
bail out. Thus, in theory, “map_pgoff” will not cause trouble if it is an
‘[unknown]’ string.

One possibility is that James has applied your patches but has not built perf.
So, the field “map_pgoff” is not passed from the Python engine, which will
cause the reported error. I think the proposed above change can effectively
avoid the error.

Thanks,
Leo
Steve Clevenger Sept. 9, 2024, 8:56 p.m. UTC | #5
On 9/9/2024 1:33 PM, Leo Yan wrote:
> On 9/7/2024 12:20 AM, Steve Clevenger wrote:
> 
> [...]
> 
>>>>> diff --git a/tools/perf/scripts/python/arm-cs-trace-disasm.py b/tools/
>>>>> perf/scripts/python/arm-cs-trace-disasm.py
>>>>> index 7aff02d84ffb..a867e0db02b8 100755
>>>>> --- a/tools/perf/scripts/python/arm-cs-trace-disasm.py
>>>>> +++ b/tools/perf/scripts/python/arm-cs-trace-disasm.py
>>>>> @@ -187,6 +187,7 @@ def process_event(param_dict):
>>>>>          dso_start = get_optional(param_dict, "dso_map_start")
>>>>>          dso_end = get_optional(param_dict, "dso_map_end")
>>>>>          symbol = get_optional(param_dict, "symbol")
>>>>> +       map_pgoff = get_optional(param_dict, "map_pgoff")
>>>>
>>>>
>>>> I am concerned the two sentences below are inconsistence: one uses
>>>> 'start_addr + map_pgoff' and the other uses 'start_addr + int(map_pgoff)'.
>>>>
>>>
>>> Valid point. It's working fine as is, but how is it even working? I look
>>> at print_disam/read_disasm, and no type conversion is done in either
>>> call. The dso_start parameter for these calls is actually dso_vm_start
>>> which is the dso_start integer conversion.
> 
> I agreed with you. After reading code, I have same conclusion that we don't
> need to type conversion to int type.
> 
>> Python thinks the map_pgoff object is already an integer. For example,
>>         map_pgoff = get_optional(param_dict, "map_pgoff")
>>         print("%d" % map_pgoff.isdigit())
>>
>> AttributeError: 'int' object has no attribute 'isdigit'
>> Fatal Python error: handler_call die: problem in Python trace event handler
>>
>> Converting map_pgoff to a string works in the print statement.
>>
>> print("%d" % str(map_pgoff).isdigit())
>> 1
>>
>> I'm not sure, but it's possible get_optional() during some call had
>> converted it to '[unknown]' which would cause a problem.
>>
>> I can explicitly force to integer.
> 
> For backward compatibility, we can use below code:
> 
>    map_pgoff = get_optional(param_dict, "map_pgoff")
>    if (map_pgoff == '[unknown]'):
>        map_pgoff = 0
> 
> Then in the later flow, we should can always use "map_pgoff" as an int type.
> 
> I struggled for James reported issue.
> 
> The variables “map_pgoff,” “dso_start,” and “dso_end” are set together in the
> Python engine. All of them should be of type int if the DSO is found, or all
> should be ‘[unknown]’ if the DSO is missing. We have checked the types for
> “dso_start” and “dso_end”, and if either is ‘[unknown]’ the flow will directly
> bail out. Thus, in theory, “map_pgoff” will not cause trouble if it is an
> ‘[unknown]’ string.
> 
> One possibility is that James has applied your patches but has not built perf.
> So, the field “map_pgoff” is not passed from the Python engine, which will
> cause the reported error. I think the proposed above change can effectively
> avoid the error.
> 
> Thanks,
> Leo

I added that exact'[unknown]' check in last week. There's no need to
explicitly convert to an integer although Python doesn't complain when
about integer to integer conversions. It's probably just a no op. I'll
commit the change later today.

Steve
diff mbox series

Patch

diff --git a/tools/perf/scripts/python/arm-cs-trace-disasm.py b/tools/perf/scripts/python/arm-cs-trace-disasm.py
index 7aff02d84ffb..a867e0db02b8 100755
--- a/tools/perf/scripts/python/arm-cs-trace-disasm.py
+++ b/tools/perf/scripts/python/arm-cs-trace-disasm.py
@@ -187,6 +187,7 @@  def process_event(param_dict):
 	dso_start = get_optional(param_dict, "dso_map_start")
 	dso_end = get_optional(param_dict, "dso_map_end")
 	symbol = get_optional(param_dict, "symbol")
+	map_pgoff = get_optional(param_dict, "map_pgoff")
 
 	cpu = sample["cpu"]
 	ip = sample["ip"]
@@ -243,9 +244,11 @@  def process_event(param_dict):
 	# Record for previous sample packet
 	cpu_data[str(cpu) + 'addr'] = addr
 
-	# Handle CS_ETM_TRACE_ON packet if start_addr=0 and stop_addr=4
-	if (start_addr == 0 and stop_addr == 4):
-		print("CPU%d: CS_ETM_TRACE_ON packet is inserted" % cpu)
+	# Filter out zero start_address. Optionally identify CS_ETM_TRACE_ON packet
+	# if start_addr=0 and stop_addr=4.
+	if (start_addr == 0):
+		if ((stop_addr == 4) and (options.verbose == True)):
+			print("CPU%d: CS_ETM_TRACE_ON packet is inserted" % cpu)
 		return
 
 	if (start_addr < int(dso_start) or start_addr > int(dso_end)):
@@ -262,13 +265,14 @@  def process_event(param_dict):
 		# vm_start to zero.
 		if (dso == "[kernel.kallsyms]" or dso_start == 0x400000):
 			dso_vm_start = 0
+			map_pgoff = 0
 		else:
 			dso_vm_start = int(dso_start)
 
 		dso_fname = get_dso_file_path(dso, dso_bid)
 		if path.exists(dso_fname):
-			print_disam(dso_fname, dso_vm_start, start_addr, stop_addr)
+			print_disam(dso_fname, dso_vm_start, start_addr + map_pgoff, stop_addr + map_pgoff)
 		else:
-			print("Failed to find dso %s for address range [ 0x%x .. 0x%x ]" % (dso, start_addr, stop_addr))
+			print("Failed to find dso %s for address range [ 0x%x .. 0x%x ]" % (dso, start_addr + int(map_pgoff), stop_addr + int(map_pgoff)))
 
 	print_srccode(comm, param_dict, sample, symbol, dso)