diff mbox series

target/hppa: Speed up hppa_is_pa20()

Message ID Z2-nWcZ5l6oklIZW@p100 (mailing list archive)
State New
Headers show
Series target/hppa: Speed up hppa_is_pa20() | expand

Commit Message

Helge Deller Dec. 28, 2024, 7:23 a.m. UTC
Although the hppa_is_pa20() helper is costly due to string comparisms in
object_dynamic_cast(), it is called quite often during memory lookups
and at each start of a block of instruction translations.
Speed hppa_is_pa20() up by calling object_dynamic_cast() only once at
CPU creation and store the result in the is_pa20 of struct CPUArchState.

Signed-off-by: Helge Deller <deller@gmx.de>

Comments

BALATON Zoltan Dec. 28, 2024, 11:16 a.m. UTC | #1
On Sat, 28 Dec 2024, Helge Deller wrote:
> Although the hppa_is_pa20() helper is costly due to string comparisms in
> object_dynamic_cast(), it is called quite often during memory lookups
> and at each start of a block of instruction translations.
> Speed hppa_is_pa20() up by calling object_dynamic_cast() only once at
> CPU creation and store the result in the is_pa20 of struct CPUArchState.
>
> Signed-off-by: Helge Deller <deller@gmx.de>
>
> diff --git a/hw/hppa/machine.c b/hw/hppa/machine.c
> index a31dc32a9f..08ac1ec068 100644
> --- a/hw/hppa/machine.c
> +++ b/hw/hppa/machine.c
> @@ -281,6 +281,7 @@ static TranslateFn *machine_HP_common_init_cpus(MachineState *machine)
>     /* Create CPUs.  */
>     for (unsigned int i = 0; i < smp_cpus; i++) {
>         cpu[i] = HPPA_CPU(cpu_create(machine->cpu_type));
> +        cpu[i]->env.is_pa20 = object_dynamic_cast(OBJECT(cpu[i]), TYPE_HPPA64_CPU);
>     }
>
>     /*
> diff --git a/target/hppa/cpu.h b/target/hppa/cpu.h
> index e45ba50a59..c37a701f44 100644
> --- a/target/hppa/cpu.h
> +++ b/target/hppa/cpu.h
> @@ -208,6 +208,7 @@ typedef struct CPUArchState {
>     uint64_t fr[32];
>     uint64_t sr[8];          /* stored shifted into place for gva */
>
> +    bool is_pa20;
>     uint32_t psw;            /* All psw bits except the following:  */
>     uint32_t psw_xb;         /* X and B, in their normal positions */
>     target_ulong psw_n;      /* boolean */
> @@ -294,7 +295,7 @@ struct HPPACPUClass {
>
> static inline bool hppa_is_pa20(CPUHPPAState *env)
> {
> -    return object_dynamic_cast(OBJECT(env_cpu(env)), TYPE_HPPA64_CPU) != NULL;
> +    return env->is_pa20;
> }

Now this function name is longer than what it extends to so maybe it would 
be simpler to drop the inline function and use env->is_pa20 directly 
where it's needed? Is there a reason to keep the function?

Regards,
BALATON Zoltan

>
> static inline int HPPA_BTLB_ENTRIES(CPUHPPAState *env)
>
>
Richard Henderson Dec. 28, 2024, 3:48 p.m. UTC | #2
On 12/27/24 23:23, Helge Deller wrote:
> Although the hppa_is_pa20() helper is costly due to string comparisms in
> object_dynamic_cast(), it is called quite often during memory lookups
> and at each start of a block of instruction translations.
> Speed hppa_is_pa20() up by calling object_dynamic_cast() only once at
> CPU creation and store the result in the is_pa20 of struct CPUArchState.
> 
> Signed-off-by: Helge Deller <deller@gmx.de>
> 
> diff --git a/hw/hppa/machine.c b/hw/hppa/machine.c
> index a31dc32a9f..08ac1ec068 100644
> --- a/hw/hppa/machine.c
> +++ b/hw/hppa/machine.c
> @@ -281,6 +281,7 @@ static TranslateFn *machine_HP_common_init_cpus(MachineState *machine)
>       /* Create CPUs.  */
>       for (unsigned int i = 0; i < smp_cpus; i++) {
>           cpu[i] = HPPA_CPU(cpu_create(machine->cpu_type));
> +        cpu[i]->env.is_pa20 = object_dynamic_cast(OBJECT(cpu[i]), TYPE_HPPA64_CPU);
>       }

This belongs in hppa_cpu_initfn, since it's internal to the workings of the cpu.
Otherwise,

> diff --git a/target/hppa/cpu.h b/target/hppa/cpu.h
> index e45ba50a59..c37a701f44 100644
> --- a/target/hppa/cpu.h
> +++ b/target/hppa/cpu.h
> @@ -208,6 +208,7 @@ typedef struct CPUArchState {
>       uint64_t fr[32];
>       uint64_t sr[8];          /* stored shifted into place for gva */
>   
> +    bool is_pa20;

This placement will interact badly with your reset function.
Probably better to put it at the end of ArchCPU.


r~
Helge Deller Dec. 28, 2024, 9:12 p.m. UTC | #3
On 12/28/24 12:16, BALATON Zoltan wrote:
> On Sat, 28 Dec 2024, Helge Deller wrote:
>> Although the hppa_is_pa20() helper is costly due to string comparisms in
>> object_dynamic_cast(), it is called quite often during memory lookups
>> and at each start of a block of instruction translations.
>> Speed hppa_is_pa20() up by calling object_dynamic_cast() only once at
>> CPU creation and store the result in the is_pa20 of struct CPUArchState.
>>
>> Signed-off-by: Helge Deller <deller@gmx.de>
>>
>> diff --git a/hw/hppa/machine.c b/hw/hppa/machine.c
>> index a31dc32a9f..08ac1ec068 100644
>> --- a/hw/hppa/machine.c
>> +++ b/hw/hppa/machine.c
>> @@ -281,6 +281,7 @@ static TranslateFn *machine_HP_common_init_cpus(MachineState *machine)
>>     /* Create CPUs.  */
>>     for (unsigned int i = 0; i < smp_cpus; i++) {
>>         cpu[i] = HPPA_CPU(cpu_create(machine->cpu_type));
>> +        cpu[i]->env.is_pa20 = object_dynamic_cast(OBJECT(cpu[i]), TYPE_HPPA64_CPU);
>>     }
>>
>>     /*
>> diff --git a/target/hppa/cpu.h b/target/hppa/cpu.h
>> index e45ba50a59..c37a701f44 100644
>> --- a/target/hppa/cpu.h
>> +++ b/target/hppa/cpu.h
>> @@ -208,6 +208,7 @@ typedef struct CPUArchState {
>>     uint64_t fr[32];
>>     uint64_t sr[8];          /* stored shifted into place for gva */
>>
>> +    bool is_pa20;
>>     uint32_t psw;            /* All psw bits except the following:  */
>>     uint32_t psw_xb;         /* X and B, in their normal positions */
>>     target_ulong psw_n;      /* boolean */
>> @@ -294,7 +295,7 @@ struct HPPACPUClass {
>>
>> static inline bool hppa_is_pa20(CPUHPPAState *env)
>> {
>> -    return object_dynamic_cast(OBJECT(env_cpu(env)), TYPE_HPPA64_CPU) != NULL;
>> +    return env->is_pa20;
>> }
>
> Now this function name is longer than what it extends to so maybe it
> would be simpler to drop the inline function and use env->is_pa20
> directly where it's needed?

Yes, that's a possible cleanup which can be done afterwards.

> Is there a reason to keep the function?

Personally I like it more than the "env->is_pa20".
Richard, any opinion from your side? Should I send a such a replacement patch?

Helge
Richard Henderson Dec. 28, 2024, 10:55 p.m. UTC | #4
On 12/28/24 13:12, Helge Deller wrote:
> On 12/28/24 12:16, BALATON Zoltan wrote:
>>> static inline bool hppa_is_pa20(CPUHPPAState *env)
>>> {
>>> -    return object_dynamic_cast(OBJECT(env_cpu(env)), TYPE_HPPA64_CPU) != NULL;
>>> +    return env->is_pa20;
>>> }
>>
>> Now this function name is longer than what it extends to so maybe it
>> would be simpler to drop the inline function and use env->is_pa20
>> directly where it's needed?
> 
> Yes, that's a possible cleanup which can be done afterwards.
> 
>> Is there a reason to keep the function?
> 
> Personally I like it more than the "env->is_pa20".
> Richard, any opinion from your side? Should I send a such a replacement patch?

I like keeping the accessor function.


r~
diff mbox series

Patch

diff --git a/hw/hppa/machine.c b/hw/hppa/machine.c
index a31dc32a9f..08ac1ec068 100644
--- a/hw/hppa/machine.c
+++ b/hw/hppa/machine.c
@@ -281,6 +281,7 @@  static TranslateFn *machine_HP_common_init_cpus(MachineState *machine)
     /* Create CPUs.  */
     for (unsigned int i = 0; i < smp_cpus; i++) {
         cpu[i] = HPPA_CPU(cpu_create(machine->cpu_type));
+        cpu[i]->env.is_pa20 = object_dynamic_cast(OBJECT(cpu[i]), TYPE_HPPA64_CPU);
     }
 
     /*
diff --git a/target/hppa/cpu.h b/target/hppa/cpu.h
index e45ba50a59..c37a701f44 100644
--- a/target/hppa/cpu.h
+++ b/target/hppa/cpu.h
@@ -208,6 +208,7 @@  typedef struct CPUArchState {
     uint64_t fr[32];
     uint64_t sr[8];          /* stored shifted into place for gva */
 
+    bool is_pa20;
     uint32_t psw;            /* All psw bits except the following:  */
     uint32_t psw_xb;         /* X and B, in their normal positions */
     target_ulong psw_n;      /* boolean */
@@ -294,7 +295,7 @@  struct HPPACPUClass {
 
 static inline bool hppa_is_pa20(CPUHPPAState *env)
 {
-    return object_dynamic_cast(OBJECT(env_cpu(env)), TYPE_HPPA64_CPU) != NULL;
+    return env->is_pa20;
 }
 
 static inline int HPPA_BTLB_ENTRIES(CPUHPPAState *env)