diff mbox

[v9,14/14] virt: arm: support hip04 gic

Message ID CAD6h2NQZGBa5U0-tmj=wtsFpcqnV2tHx+_vESrx8+3r7dGpHjQ@mail.gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Haojian Zhuang May 20, 2014, 2:16 p.m. UTC
On 20 May 2014 22:01, Christoffer Dall <christoffer.dall@linaro.org> wrote:
> On Tue, May 20, 2014 at 09:52:53PM +0800, Haojian Zhuang wrote:
>> On 20 May 2014 21:44, Christoffer Dall <christoffer.dall@linaro.org> wrote:
>> > On Tue, May 20, 2014 at 09:10:27PM +0800, Haojian Zhuang wrote:
>
> [...]
>
>> >> diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
>> >> index 47b2983..4c0c1e9 100644
>> >> --- a/virt/kvm/arm/vgic.c
>> >> +++ b/virt/kvm/arm/vgic.c
>> >> @@ -76,6 +76,8 @@
>> >>  #define IMPLEMENTER_ARM              0x43b
>> >>  #define GICC_ARCH_VERSION_V2 0x2
>> >>
>> >> +#define vgic_nr_lr(vcpu)     (vcpu->hw_cfg & HWCFG_NR_LR_MASK)
>> >> +
>> >>  /* Physical address of vgic virtual cpu interface */
>> >>  static phys_addr_t vgic_vcpu_base;
>> >>
>> >> @@ -97,7 +99,7 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
>> >>  static void vgic_update_state(struct kvm *kvm);
>> >>  static void vgic_kick_vcpus(struct kvm *kvm);
>> >>  static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg);
>> >> -static u32 vgic_nr_lr;
>> >> +static u32 vgic_hw_cfg;
>> >>
>> >>  static unsigned int vgic_maint_irq;
>> >>
>> >> @@ -624,9 +626,9 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
>> >>       struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
>> >>       int vcpu_id = vcpu->vcpu_id;
>> >>       int i, irq, source_cpu;
>> >> -     u32 *lr;
>> >> +     u32 *lr, nr_lr = vgic_nr_lr(vgic_cpu);
>> >
>> > This is static for any system post-boot, right?  Can't we set this
>> > global variable once like we did before instead of having to define
>> > these extra variables and do the bit manipulation all over the place?
>> >
>> > -Christoffer
>>
>> I tried to define a global gich_apr variable before. But Marc didn't agree on
>> that. He suggested to use vgic_cpu_nr_lr to save both GICH_APR offset
>> and nr_lr.
>>
>> Adding gich_apr variable should be the simpler implementation.
>>
> You're talking about storing this information on the vgic_cpu struct,
> which is accessed on every world-switch patch.  There, you don't want
> two memory accesses.
>

It's the implementation of gich_apr in arm32.

We needn't add or change anything in struct vgic_cpu. And both the
assembly code and the code could be much easier.

> Here, on the other hand, you're in host kernel land, and you can do your
> bit-shuffling once, and always access a single static variable like we
> did before, which will simplify the C-code.
>

No bit-shuffling in gich_apr implementation. Is it right?

Regards
Haojian

Comments

Christoffer Dall May 20, 2014, 3:05 p.m. UTC | #1
On Tue, May 20, 2014 at 10:16:22PM +0800, Haojian Zhuang wrote:
> On 20 May 2014 22:01, Christoffer Dall <christoffer.dall@linaro.org> wrote:
> > On Tue, May 20, 2014 at 09:52:53PM +0800, Haojian Zhuang wrote:
> >> On 20 May 2014 21:44, Christoffer Dall <christoffer.dall@linaro.org> wrote:
> >> > On Tue, May 20, 2014 at 09:10:27PM +0800, Haojian Zhuang wrote:
> >
> > [...]
> >
> >> >> diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
> >> >> index 47b2983..4c0c1e9 100644
> >> >> --- a/virt/kvm/arm/vgic.c
> >> >> +++ b/virt/kvm/arm/vgic.c
> >> >> @@ -76,6 +76,8 @@
> >> >>  #define IMPLEMENTER_ARM              0x43b
> >> >>  #define GICC_ARCH_VERSION_V2 0x2
> >> >>
> >> >> +#define vgic_nr_lr(vcpu)     (vcpu->hw_cfg & HWCFG_NR_LR_MASK)
> >> >> +
> >> >>  /* Physical address of vgic virtual cpu interface */
> >> >>  static phys_addr_t vgic_vcpu_base;
> >> >>
> >> >> @@ -97,7 +99,7 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
> >> >>  static void vgic_update_state(struct kvm *kvm);
> >> >>  static void vgic_kick_vcpus(struct kvm *kvm);
> >> >>  static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg);
> >> >> -static u32 vgic_nr_lr;
> >> >> +static u32 vgic_hw_cfg;
> >> >>
> >> >>  static unsigned int vgic_maint_irq;
> >> >>
> >> >> @@ -624,9 +626,9 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
> >> >>       struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
> >> >>       int vcpu_id = vcpu->vcpu_id;
> >> >>       int i, irq, source_cpu;
> >> >> -     u32 *lr;
> >> >> +     u32 *lr, nr_lr = vgic_nr_lr(vgic_cpu);
> >> >
> >> > This is static for any system post-boot, right?  Can't we set this
> >> > global variable once like we did before instead of having to define
> >> > these extra variables and do the bit manipulation all over the place?
> >> >
> >> > -Christoffer
> >>
> >> I tried to define a global gich_apr variable before. But Marc didn't agree on
> >> that. He suggested to use vgic_cpu_nr_lr to save both GICH_APR offset
> >> and nr_lr.
> >>
> >> Adding gich_apr variable should be the simpler implementation.
> >>
> > You're talking about storing this information on the vgic_cpu struct,
> > which is accessed on every world-switch patch.  There, you don't want
> > two memory accesses.
> >
> diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S
> index 76af9302..b27e43f 100644
> --- a/arch/arm/kvm/interrupts_head.S
> +++ b/arch/arm/kvm/interrupts_head.S
> @@ -419,7 +419,9 @@ vcpu        .req    r0              @ vcpu pointer
> always in r0
>         ldr     r7, [r2, #GICH_EISR1]
>         ldr     r8, [r2, #GICH_ELRSR0]
>         ldr     r9, [r2, #GICH_ELRSR1]
> -       ldr     r10, [r2, #GICH_APR]
> +       ldr     r10, =gich_apr
> +       ldr     r10, [r10]
> +       ldr     r10, [r2, r10]
> 
>         str     r3, [r11, #VGIC_CPU_HCR]
>         str     r4, [r11, #VGIC_CPU_VMCR]
> @@ -435,7 +437,11 @@ vcpu       .req    r0              @ vcpu pointer
> always in r0
>         str     r5, [r2, #GICH_HCR]
> 
>         /* Save list registers */
> -       add     r2, r2, #GICH_LR0
> +       ldr     r10, =gich_apr
> +       ldr     r10, [r10]
> +       /* the offset between GICH_APR & GICH_LR0 is 0x10 */
> +       add     r10, r10, #0x10
> +       add     r2, r2, r10
>         add     r3, r11, #VGIC_CPU_LR
>         ldr     r4, [r11, #VGIC_CPU_NR_LR]
>  1:     ldr     r6, [r2], #4
> @@ -469,10 +475,16 @@ vcpu      .req    r0              @ vcpu pointer
> always in r0
> 
>         str     r3, [r2, #GICH_HCR]
>         str     r4, [r2, #GICH_VMCR]
> -       str     r8, [r2, #GICH_APR]
> +       ldr     r6, =gich_apr
> +       ldr     r6, [r6]
> +       str     r8, [r2, r6]
> 
>         /* Restore list registers */
> -       add     r2, r2, #GICH_LR0
> +       ldr     r6, =gich_apr
> +       ldr     r6, [r6]
> +       /* the offset between GICH_APR & GICH_LR0 is 0x10 */
> +       add     r6, r6, #0x10
> +       add     r2, r2, r6
>         add     r3, r11, #VGIC_CPU_LR
>         ldr     r4, [r11, #VGIC_CPU_NR_LR]
>  1:     ldr     r6, [r3], #4
> @@ -618,3 +630,7 @@ vcpu        .req    r0              @ vcpu pointer
> always in r0
>  .macro load_vcpu
>         mrc     p15, 4, vcpu, c13, c0, 2        @ HTPIDR
>  .endm
> +
> +       .global gich_apr
> +gich_apr:
> +       .long   GICH_APR
> 
> diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
> index 47b2983..6bf31db 100644
> --- a/virt/kvm/arm/vgic.c
> +++ b/virt/kvm/arm/vgic.c
> @@ -1470,17 +1470,30 @@ static struct notifier_block vgic_cpu_nb = {
>         .notifier_call = vgic_cpu_notify,
>  };
> 
> +static const struct of_device_id of_vgic_ids[] = {
> +       {
> +               .compatible = "arm,cortex-a15-gic",
> +               .data = (void *)GICH_APR,
> +       }, {
> +               .compatible = "hisilicon,hip04-gic",
> +               .data = (void *)HIP04_GICH_APR,
> +       }, {
> +       },
> +};
> +
>  int kvm_vgic_hyp_init(void)
>  {
>         int ret;
>         struct resource vctrl_res;
>         struct resource vcpu_res;
> +       const struct of_device_id *match;
> 
> -       vgic_node = of_find_compatible_node(NULL, NULL, "arm,cortex-a15-gic");
> +       vgic_node = of_find_matching_node_and_match(NULL, of_vgic_ids, &match);
>         if (!vgic_node) {
>                 kvm_err("error: no compatible vgic node in DT\n");
>                 return -ENODEV;
>         }
> +       gich_apr = (unsigned int)match->data;
> 
>         vgic_maint_irq = irq_of_parse_and_map(vgic_node, 0);
>         if (!vgic_maint_irq) {
> 
> It's the implementation of gich_apr in arm32.
> 
> We needn't add or change anything in struct vgic_cpu. And both the
> assembly code and the code could be much easier.
> 

But we do end up with an extra memory access from EL2 in the critical
path, and I believe Marc's concern here is that if we cross a cache
line, this might really hurt performance.

> > Here, on the other hand, you're in host kernel land, and you can do your
> > bit-shuffling once, and always access a single static variable like we
> > did before, which will simplify the C-code.
> >
> 
> No bit-shuffling in gich_apr implementation. Is it right?
> 

I would like to see us avoid allocating that extra nr_lr variable in
every function mucking with list registers in the C-file.

I would need to look at the data structure size and profile the
world-switch code to properly evaluate if it's worth packing the values
in a single field, so I'll let Marc comment on this one.

-Christoffer
Haojian Zhuang May 20, 2014, 3:39 p.m. UTC | #2
On 20 May 2014 23:05, Christoffer Dall <christoffer.dall@linaro.org> wrote:
> On Tue, May 20, 2014 at 10:16:22PM +0800, Haojian Zhuang wrote:
>> On 20 May 2014 22:01, Christoffer Dall <christoffer.dall@linaro.org> wrote:
>> It's the implementation of gich_apr in arm32.
>>
>> We needn't add or change anything in struct vgic_cpu. And both the
>> assembly code and the code could be much easier.
>>
>
> But we do end up with an extra memory access from EL2 in the critical
> path, and I believe Marc's concern here is that if we cross a cache
> line, this might really hurt performance.
>

Sorry. Do we may cross a cache line or a TLB entry?

I think that you're concerning to cross TLB entries. The reason is in
 below.

1. If the problem is on crossing cache line, it's caused by too much
instructions. Either the packing nr_lr or the gich_apr adds some
instructions. The packing nr_lr needs a little more instructions.

2. ldr instruction is a pseudo instruction. So it's parsed into operation
on PC register. Now I put gich_apr in interrupts_head.S, it results
in gich_apr variable before __kvm_hyp_code_start. It may cross the
TLB entries.
How about to declare gich_apr after __kvm_cpu_return in interrupts.S?
Since save_vgic_state & restore_vgic_state is only used once, declaring
gich_apr just after the code could avoid crossing TLB entry.

Regards
Haojian
Christoffer Dall May 21, 2014, 9:02 a.m. UTC | #3
On Tue, May 20, 2014 at 11:39:12PM +0800, Haojian Zhuang wrote:
> On 20 May 2014 23:05, Christoffer Dall <christoffer.dall@linaro.org> wrote:
> > On Tue, May 20, 2014 at 10:16:22PM +0800, Haojian Zhuang wrote:
> >> On 20 May 2014 22:01, Christoffer Dall <christoffer.dall@linaro.org> wrote:
> >> It's the implementation of gich_apr in arm32.
> >>
> >> We needn't add or change anything in struct vgic_cpu. And both the
> >> assembly code and the code could be much easier.
> >>
> >
> > But we do end up with an extra memory access from EL2 in the critical
> > path, and I believe Marc's concern here is that if we cross a cache
> > line, this might really hurt performance.
> >
> 
> Sorry. Do we may cross a cache line or a TLB entry?
> 
> I think that you're concerning to cross TLB entries. The reason is in
>  below.
> 
> 1. If the problem is on crossing cache line, it's caused by too much
> instructions. Either the packing nr_lr or the gich_apr adds some
> instructions. The packing nr_lr needs a little more instructions.

I don't see why this argument is valid.  If you have a separate
instruction and data cache, you may be loading from a different cache
line when placing the static value close to your instructions.  If you
add a variable to the vcpu struct, all of the fields may no longer fit
in a single data cache line and you may cause the memory subsystem to
have to fetch another cache line.  I believe the latter is Marc's
concern, and I suspect he would be equally concerned about the former.

I'm not too concerned about a TLB entry here, that works at a 4K
granularity and with the proper alignment of the struct and hyp code,
that shouldn't be a concern.  Without it, of course, there's a risk of
requiring another TLB entry as well.


> 
> 2. ldr instruction is a pseudo instruction. So it's parsed into operation
> on PC register.

Eh, it just means that it does a load relative from the PC address, and
if the offset is too far to be encoded in the immediate field, then it
does an indirect load through a literal pool, if I understand what you
are referring to.  In any case, there will be at least one actual ldr
instruction issued on the PE.

> Now I put gich_apr in interrupts_head.S, it results
> in gich_apr variable before __kvm_hyp_code_start. It may cross the
> TLB entries.
> How about to declare gich_apr after __kvm_cpu_return in interrupts.S?
> Since save_vgic_state & restore_vgic_state is only used once, declaring
> gich_apr just after the code could avoid crossing TLB entry.
> 

Again, all the fields in the vcpu struct are quite likely to be aligned
within a single data cache line, I don't believe that's the case if you
stick some data in between the the hyp code.

-Christoffer
Haojian Zhuang May 21, 2014, 9:47 a.m. UTC | #4
On 21 May 2014 17:02, Christoffer Dall <christoffer.dall@linaro.org> wrote:
> On Tue, May 20, 2014 at 11:39:12PM +0800, Haojian Zhuang wrote:
>> On 20 May 2014 23:05, Christoffer Dall <christoffer.dall@linaro.org> wrote:
>> > On Tue, May 20, 2014 at 10:16:22PM +0800, Haojian Zhuang wrote:
>> >> On 20 May 2014 22:01, Christoffer Dall <christoffer.dall@linaro.org> wrote:
>> >> It's the implementation of gich_apr in arm32.
>> >>
>> >> We needn't add or change anything in struct vgic_cpu. And both the
>> >> assembly code and the code could be much easier.
>> >>
>> >
>> > But we do end up with an extra memory access from EL2 in the critical
>> > path, and I believe Marc's concern here is that if we cross a cache
>> > line, this might really hurt performance.
>> >
>>
>> Sorry. Do we may cross a cache line or a TLB entry?
>>
>> I think that you're concerning to cross TLB entries. The reason is in
>>  below.
>>
>> 1. If the problem is on crossing cache line, it's caused by too much
>> instructions. Either the packing nr_lr or the gich_apr adds some
>> instructions. The packing nr_lr needs a little more instructions.
>
> I don't see why this argument is valid.  If you have a separate

I want to make it clear what I missing.

> instruction and data cache, you may be loading from a different cache
> line when placing the static value close to your instructions.  If you
> add a variable to the vcpu struct, all of the fields may no longer fit
> in a single data cache line and you may cause the memory subsystem to
> have to fetch another cache line.  I believe the latter is Marc's

Yes, I forgot new gich_apr is the only variable in the assembly code.
So the gich_apr will be load from a different cache line.

Then let's come back to packing hw_cfg.

Now the high word is used to store the offset of GICH_APR. The
unpacking operation is too complex to calculate the register offset,
especially in arm64 implementation.

How about changing the packing mechanism?

1. Add the definition of enconding in arm-gic.h.

#define HIP04_GIC             (1 << 16)
#define HIP04_GICH_APR   0x70
#define HIP04_GICH_LR0    0x80

2. The code in save_vgic_state could be changed in below.

  ldr     r9, [r2, #GICH_ELRSR1]
+ldr     r10, [r3, #VGIC_CPU_HW_CFG]
+tst     r10, #HIP04_GIC
+ldreq  r10, [r2, #GICH_APR]
+ldrne  r10, [r2, #HIP04_GICH_APR]

Although I used the condition checking at here, the code could
be easier.

I think that the executing time on "ldr" and "ldreq" should be same,
because CPCS should be ready

Then calculation is avoid. Only three instructions are appended
for both GICH_APR & GICH_LR0. The implementation in arm64
should be same & simple.

How do you think so?

Regards
Haojian
Christoffer Dall May 21, 2014, 9:55 a.m. UTC | #5
On Wed, May 21, 2014 at 05:47:00PM +0800, Haojian Zhuang wrote:
> On 21 May 2014 17:02, Christoffer Dall <christoffer.dall@linaro.org> wrote:
> > On Tue, May 20, 2014 at 11:39:12PM +0800, Haojian Zhuang wrote:
> >> On 20 May 2014 23:05, Christoffer Dall <christoffer.dall@linaro.org> wrote:
> >> > On Tue, May 20, 2014 at 10:16:22PM +0800, Haojian Zhuang wrote:
> >> >> On 20 May 2014 22:01, Christoffer Dall <christoffer.dall@linaro.org> wrote:
> >> >> It's the implementation of gich_apr in arm32.
> >> >>
> >> >> We needn't add or change anything in struct vgic_cpu. And both the
> >> >> assembly code and the code could be much easier.
> >> >>
> >> >
> >> > But we do end up with an extra memory access from EL2 in the critical
> >> > path, and I believe Marc's concern here is that if we cross a cache
> >> > line, this might really hurt performance.
> >> >
> >>
> >> Sorry. Do we may cross a cache line or a TLB entry?
> >>
> >> I think that you're concerning to cross TLB entries. The reason is in
> >>  below.
> >>
> >> 1. If the problem is on crossing cache line, it's caused by too much
> >> instructions. Either the packing nr_lr or the gich_apr adds some
> >> instructions. The packing nr_lr needs a little more instructions.
> >
> > I don't see why this argument is valid.  If you have a separate
> 
> I want to make it clear what I missing.
> 
> > instruction and data cache, you may be loading from a different cache
> > line when placing the static value close to your instructions.  If you
> > add a variable to the vcpu struct, all of the fields may no longer fit
> > in a single data cache line and you may cause the memory subsystem to
> > have to fetch another cache line.  I believe the latter is Marc's
> 
> Yes, I forgot new gich_apr is the only variable in the assembly code.
> So the gich_apr will be load from a different cache line.
> 
> Then let's come back to packing hw_cfg.
> 
> Now the high word is used to store the offset of GICH_APR. The
> unpacking operation is too complex to calculate the register offset,
> especially in arm64 implementation.
> 
> How about changing the packing mechanism?
> 
> 1. Add the definition of enconding in arm-gic.h.
> 
> #define HIP04_GIC             (1 << 16)
> #define HIP04_GICH_APR   0x70
> #define HIP04_GICH_LR0    0x80
> 
> 2. The code in save_vgic_state could be changed in below.
> 
>   ldr     r9, [r2, #GICH_ELRSR1]
> +ldr     r10, [r3, #VGIC_CPU_HW_CFG]
> +tst     r10, #HIP04_GIC
> +ldreq  r10, [r2, #GICH_APR]
> +ldrne  r10, [r2, #HIP04_GICH_APR]
> 
> Although I used the condition checking at here, the code could
> be easier.
> 
> I think that the executing time on "ldr" and "ldreq" should be same,
> because CPCS should be ready
> 
> Then calculation is avoid. Only three instructions are appended
> for both GICH_APR & GICH_LR0. The implementation in arm64
> should be same & simple.
> 
I think you misunderstood my point.

Keep the assembly code as is, store the APR and the NR_LR in the HW_CFG
always, on all systems, and don't use any conditionals in the assembly
code (code is difficult to read, instruction prefetching and speculative
execution becomes difficult, etc.).

Only change something in the C-code.  Set a static variable there during
vgic_hyp_init and get rid of all the local variable declarations that
dereference the vgic_vcpu struct.

-Christoffer
diff mbox

Patch

diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S
index 76af9302..b27e43f 100644
--- a/arch/arm/kvm/interrupts_head.S
+++ b/arch/arm/kvm/interrupts_head.S
@@ -419,7 +419,9 @@  vcpu        .req    r0              @ vcpu pointer
always in r0
        ldr     r7, [r2, #GICH_EISR1]
        ldr     r8, [r2, #GICH_ELRSR0]
        ldr     r9, [r2, #GICH_ELRSR1]
-       ldr     r10, [r2, #GICH_APR]
+       ldr     r10, =gich_apr
+       ldr     r10, [r10]
+       ldr     r10, [r2, r10]

        str     r3, [r11, #VGIC_CPU_HCR]
        str     r4, [r11, #VGIC_CPU_VMCR]
@@ -435,7 +437,11 @@  vcpu       .req    r0              @ vcpu pointer
always in r0
        str     r5, [r2, #GICH_HCR]

        /* Save list registers */
-       add     r2, r2, #GICH_LR0
+       ldr     r10, =gich_apr
+       ldr     r10, [r10]
+       /* the offset between GICH_APR & GICH_LR0 is 0x10 */
+       add     r10, r10, #0x10
+       add     r2, r2, r10
        add     r3, r11, #VGIC_CPU_LR
        ldr     r4, [r11, #VGIC_CPU_NR_LR]
 1:     ldr     r6, [r2], #4
@@ -469,10 +475,16 @@  vcpu      .req    r0              @ vcpu pointer
always in r0

        str     r3, [r2, #GICH_HCR]
        str     r4, [r2, #GICH_VMCR]
-       str     r8, [r2, #GICH_APR]
+       ldr     r6, =gich_apr
+       ldr     r6, [r6]
+       str     r8, [r2, r6]

        /* Restore list registers */
-       add     r2, r2, #GICH_LR0
+       ldr     r6, =gich_apr
+       ldr     r6, [r6]
+       /* the offset between GICH_APR & GICH_LR0 is 0x10 */
+       add     r6, r6, #0x10
+       add     r2, r2, r6
        add     r3, r11, #VGIC_CPU_LR
        ldr     r4, [r11, #VGIC_CPU_NR_LR]
 1:     ldr     r6, [r3], #4
@@ -618,3 +630,7 @@  vcpu        .req    r0              @ vcpu pointer
always in r0
 .macro load_vcpu
        mrc     p15, 4, vcpu, c13, c0, 2        @ HTPIDR
 .endm
+
+       .global gich_apr
+gich_apr:
+       .long   GICH_APR

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 47b2983..6bf31db 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1470,17 +1470,30 @@  static struct notifier_block vgic_cpu_nb = {
        .notifier_call = vgic_cpu_notify,
 };

+static const struct of_device_id of_vgic_ids[] = {
+       {
+               .compatible = "arm,cortex-a15-gic",
+               .data = (void *)GICH_APR,
+       }, {
+               .compatible = "hisilicon,hip04-gic",
+               .data = (void *)HIP04_GICH_APR,
+       }, {
+       },
+};
+
 int kvm_vgic_hyp_init(void)
 {
        int ret;
        struct resource vctrl_res;
        struct resource vcpu_res;
+       const struct of_device_id *match;

-       vgic_node = of_find_compatible_node(NULL, NULL, "arm,cortex-a15-gic");
+       vgic_node = of_find_matching_node_and_match(NULL, of_vgic_ids, &match);
        if (!vgic_node) {
                kvm_err("error: no compatible vgic node in DT\n");
                return -ENODEV;
        }
+       gich_apr = (unsigned int)match->data;

        vgic_maint_irq = irq_of_parse_and_map(vgic_node, 0);
        if (!vgic_maint_irq) {