diff mbox

[RFC,v2,08/12] spapr: Only setup HTP if necessary.

Message ID 034945c1bbc8d4a97e5f568d4d463af2c9a24080.1487829585.git.sam.bobroff@au1.ibm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Sam Bobroff Feb. 23, 2017, 6 a.m. UTC
If QEMU is using KVM, and KVM is capable of running in radix mode,
guests can be run in real-mode without allocating a HPT (because KVM
will use a minimal RPT). So in this case, we avoid creating the HPT
at reset time and later (during CAS) create it if it is necessary.

Signed-off-by: Sam Bobroff <sam.bobroff@au1.ibm.com>
---
v2:

* This patch has been mostly rewritten to move the late HPT allocation to CAS.
This allows a guest to start in radix mode (when it's in real mode) and then
change to hash, even if it is a legacy guest and will not call
h_register_process_table().
* Added an exported function to spapr.c to perform HPT allocation and adjust
the vrma if necessary. This makes it possible to allocate the HPT from
h_client_architecture_support() in spapr_hcall.c.

 hw/ppc/spapr.c         | 24 +++++++++++++++---------
 hw/ppc/spapr_hcall.c   | 10 ++++++++++
 include/hw/ppc/spapr.h |  1 +
 3 files changed, 26 insertions(+), 9 deletions(-)

Comments

David Gibson Feb. 28, 2017, 12:28 a.m. UTC | #1
s/HTP/HPT/ in subject line.


On Thu, Feb 23, 2017 at 05:00:01PM +1100, Sam Bobroff wrote:
> If QEMU is using KVM, and KVM is capable of running in radix mode,
> guests can be run in real-mode without allocating a HPT (because KVM
> will use a minimal RPT). So in this case, we avoid creating the HPT
> at reset time and later (during CAS) create it if it is necessary.
> 
> Signed-off-by: Sam Bobroff <sam.bobroff@au1.ibm.com>

So, IIRC, we discussed previously that the logical way to do things
was to, by default, delay HPT allocation until CAS time, and just do
it at reset time for the case that needs it: hash host with KVM.

Did you hit a problem with that approach, or is there still work to be
done here?

> ---
> v2:
> 
> * This patch has been mostly rewritten to move the late HPT allocation to CAS.
> This allows a guest to start in radix mode (when it's in real mode) and then
> change to hash, even if it is a legacy guest and will not call
> h_register_process_table().
> * Added an exported function to spapr.c to perform HPT allocation and adjust
> the vrma if necessary. This makes it possible to allocate the HPT from
> h_client_architecture_support() in spapr_hcall.c.
> 
>  hw/ppc/spapr.c         | 24 +++++++++++++++---------
>  hw/ppc/spapr_hcall.c   | 10 ++++++++++
>  include/hw/ppc/spapr.h |  1 +
>  3 files changed, 26 insertions(+), 9 deletions(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index ca3812555f..dfee0f685f 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -1123,6 +1123,17 @@ static void spapr_reallocate_hpt(sPAPRMachineState *spapr, int shift,
>      }
>  }
>  
> +void spapr_setup_hpt_and_vrma(sPAPRMachineState *spapr)
> +{
> +    spapr_reallocate_hpt(spapr,
> +                     spapr_hpt_shift_for_ramsize(MACHINE(qdev_get_machine())->maxram_size),
> +                     &error_fatal);
> +    if (spapr->vrma_adjust) {
> +        spapr->rma_size = kvmppc_rma_size(spapr_node0_size(),
> +                                          spapr->htab_shift);
> +    }
> +}
> +
>  static void find_unknown_sysbus_device(SysBusDevice *sbdev, void *opaque)
>  {
>      bool matched = false;
> @@ -1151,15 +1162,10 @@ static void ppc_spapr_reset(void)
>      /* Check for unknown sysbus devices */
>      foreach_dynamic_sysbus_device(find_unknown_sysbus_device, NULL);
>  
> -    /* Allocate and/or reset the hash page table */
> -    spapr_reallocate_hpt(spapr,
> -                         spapr_hpt_shift_for_ramsize(machine->maxram_size),
> -                         &error_fatal);
> -
> -    /* Update the RMA size if necessary */
> -    if (spapr->vrma_adjust) {
> -        spapr->rma_size = kvmppc_rma_size(spapr_node0_size(),
> -                                          spapr->htab_shift);
> +    /* If using KVM with radix mode available, VCPUs can be started
> +     * without a HPT because KVM will start them in radix mode. */
> +    if (!(kvm_enabled() && kvmppc_has_cap_mmu_radix())) {
> +        spapr_setup_hpt_and_vrma(spapr);
>      }
>  
>      qemu_devices_reset();
> diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
> index 42d20e0b92..cea34073aa 100644
> --- a/hw/ppc/spapr_hcall.c
> +++ b/hw/ppc/spapr_hcall.c
> @@ -1002,6 +1002,16 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
>      ov5_updates = spapr_ovec_new();
>      spapr->cas_reboot = spapr_ovec_diff(ov5_updates,
>                                          ov5_cas_old, spapr->ov5_cas);
> +    if (kvm_enabled()) {
> +        if (kvmppc_has_cap_mmu_radix()) {
> +            /* If the HPT hasn't yet been set up (see
> +             * ppc_spapr_reset()), and it's needed, do it now: */

I think it's a bit fragile to have here it explicitly mirror the logic
which determines whether the HPT is allocated early.  I'd prefer to
explicitly test here whether we have allocated an HPT - adding a flag,
if we have to.

> +            if (!spapr_ovec_test(ov5_updates, OV5_MMU_RADIX)) {
> +                /* legacy hash or new hash: */
> +                spapr_setup_hpt_and_vrma(spapr);
> +            }
> +        }
> +    }
>  
>      if (!spapr->cas_reboot) {
>          spapr->cas_reboot =
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index f9b17d860a..a30cbc485c 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -590,6 +590,7 @@ void spapr_dt_events(sPAPRMachineState *sm, void *fdt);
>  int spapr_h_cas_compose_response(sPAPRMachineState *sm,
>                                   target_ulong addr, target_ulong size,
>                                   sPAPROptionVector *ov5_updates);
> +void spapr_setup_hpt_and_vrma(sPAPRMachineState *spapr);
>  sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn);
>  void spapr_tce_table_enable(sPAPRTCETable *tcet,
>                              uint32_t page_shift, uint64_t bus_offset,
Suraj Jitindar Singh Feb. 28, 2017, 2:25 a.m. UTC | #2
On Tue, 2017-02-28 at 11:28 +1100, David Gibson wrote:
> s/HTP/HPT/ in subject line.
> 
> 
> On Thu, Feb 23, 2017 at 05:00:01PM +1100, Sam Bobroff wrote:
> > 
> > If QEMU is using KVM, and KVM is capable of running in radix mode,
> > guests can be run in real-mode without allocating a HPT (because
> > KVM
> > will use a minimal RPT). So in this case, we avoid creating the HPT
> > at reset time and later (during CAS) create it if it is necessary.
> > 
> > Signed-off-by: Sam Bobroff <sam.bobroff@au1.ibm.com>
> So, IIRC, we discussed previously that the logical way to do things
> was to, by default, delay HPT allocation until CAS time, and just do
> it at reset time for the case that needs it: hash host with KVM.
> 
> Did you hit a problem with that approach, or is there still work to
> be
> done here?

So what we're doing is assuming radix. Allocate hpt if hash host,
otherwise delay til CAS time and allocate only if guest chose hash.

> 
> > 
> > ---
> > v2:
> > 
> > * This patch has been mostly rewritten to move the late HPT
> > allocation to CAS.
> > This allows a guest to start in radix mode (when it's in real mode)
> > and then
> > change to hash, even if it is a legacy guest and will not call
> > h_register_process_table().
> > * Added an exported function to spapr.c to perform HPT allocation
> > and adjust
> > the vrma if necessary. This makes it possible to allocate the HPT
> > from
> > h_client_architecture_support() in spapr_hcall.c.
> > 
> >  hw/ppc/spapr.c         | 24 +++++++++++++++---------
> >  hw/ppc/spapr_hcall.c   | 10 ++++++++++
> >  include/hw/ppc/spapr.h |  1 +
> >  3 files changed, 26 insertions(+), 9 deletions(-)
> > 
> > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > index ca3812555f..dfee0f685f 100644
> > --- a/hw/ppc/spapr.c
> > +++ b/hw/ppc/spapr.c
> > @@ -1123,6 +1123,17 @@ static void
> > spapr_reallocate_hpt(sPAPRMachineState *spapr, int shift,
> >      }
> >  }
> >  
> > +void spapr_setup_hpt_and_vrma(sPAPRMachineState *spapr)
> > +{
> > +    spapr_reallocate_hpt(spapr,
> > +                     spapr_hpt_shift_for_ramsize(MACHINE(qdev_get_
> > machine())->maxram_size),
> > +                     &error_fatal);
> > +    if (spapr->vrma_adjust) {
> > +        spapr->rma_size = kvmppc_rma_size(spapr_node0_size(),
> > +                                          spapr->htab_shift);
> > +    }
> > +}
> > +
> >  static void find_unknown_sysbus_device(SysBusDevice *sbdev, void
> > *opaque)
> >  {
> >      bool matched = false;
> > @@ -1151,15 +1162,10 @@ static void ppc_spapr_reset(void)
> >      /* Check for unknown sysbus devices */
> >      foreach_dynamic_sysbus_device(find_unknown_sysbus_device,
> > NULL);
> >  
> > -    /* Allocate and/or reset the hash page table */
> > -    spapr_reallocate_hpt(spapr,
> > -                         spapr_hpt_shift_for_ramsize(machine-
> > >maxram_size),
> > -                         &error_fatal);
> > -
> > -    /* Update the RMA size if necessary */
> > -    if (spapr->vrma_adjust) {
> > -        spapr->rma_size = kvmppc_rma_size(spapr_node0_size(),
> > -                                          spapr->htab_shift);
> > +    /* If using KVM with radix mode available, VCPUs can be
> > started
> > +     * without a HPT because KVM will start them in radix mode. */
> > +    if (!(kvm_enabled() && kvmppc_has_cap_mmu_radix())) {
> > +        spapr_setup_hpt_and_vrma(spapr);
> >      }
> >  
> >      qemu_devices_reset();
> > diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
> > index 42d20e0b92..cea34073aa 100644
> > --- a/hw/ppc/spapr_hcall.c
> > +++ b/hw/ppc/spapr_hcall.c
> > @@ -1002,6 +1002,16 @@ static target_ulong
> > h_client_architecture_support(PowerPCCPU *cpu,
> >      ov5_updates = spapr_ovec_new();
> >      spapr->cas_reboot = spapr_ovec_diff(ov5_updates,
> >                                          ov5_cas_old, spapr-
> > >ov5_cas);
> > +    if (kvm_enabled()) {
> > +        if (kvmppc_has_cap_mmu_radix()) {
> > +            /* If the HPT hasn't yet been set up (see
> > +             * ppc_spapr_reset()), and it's needed, do it now: */
> I think it's a bit fragile to have here it explicitly mirror the
> logic
> which determines whether the HPT is allocated early.  I'd prefer to
> explicitly test here whether we have allocated an HPT - adding a
> flag,
> if we have to.

We can use the MSB of patb_entry as that flag.

patb_entry & GUEST_RADIX == GUEST_RADIX -> radix, so assume a hpt
hasn't been allocated.

When we do allocate a hpt we know we're not radix, so set
patb_entry &= ~GUEST_RADIX;

Where GUEST_RADIX is the msb in patb_entry which indicates that a guest
is radix.

Essentially patb_entry & GUEST_RADIX cleared mean hash with hpt
allocated, patb_entry & GUEST_RADIX set means radix so assume an hpt
hasn't been allocated. On the hpt allocation path we clear GUEST_RADIX
in patb_entry and when we set GUEST_RADIX we free the hpt.

> 
> > 
> > +            if (!spapr_ovec_test(ov5_updates, OV5_MMU_RADIX)) {
> > +                /* legacy hash or new hash: */
> > +                spapr_setup_hpt_and_vrma(spapr);
> > +            }
> > +        }
> > +    }
> >  
> >      if (!spapr->cas_reboot) {
> >          spapr->cas_reboot =
> > diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> > index f9b17d860a..a30cbc485c 100644
> > --- a/include/hw/ppc/spapr.h
> > +++ b/include/hw/ppc/spapr.h
> > @@ -590,6 +590,7 @@ void spapr_dt_events(sPAPRMachineState *sm,
> > void *fdt);
> >  int spapr_h_cas_compose_response(sPAPRMachineState *sm,
> >                                   target_ulong addr, target_ulong
> > size,
> >                                   sPAPROptionVector *ov5_updates);
> > +void spapr_setup_hpt_and_vrma(sPAPRMachineState *spapr);
> >  sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t
> > liobn);
> >  void spapr_tce_table_enable(sPAPRTCETable *tcet,
> >                              uint32_t page_shift, uint64_t
> > bus_offset,
David Gibson Feb. 28, 2017, 3:19 a.m. UTC | #3
On Tue, Feb 28, 2017 at 01:25:17PM +1100, Suraj Jitindar Singh wrote:
> On Tue, 2017-02-28 at 11:28 +1100, David Gibson wrote:
> > s/HTP/HPT/ in subject line.
> > 
> > 
> > On Thu, Feb 23, 2017 at 05:00:01PM +1100, Sam Bobroff wrote:
> > > 
> > > If QEMU is using KVM, and KVM is capable of running in radix mode,
> > > guests can be run in real-mode without allocating a HPT (because
> > > KVM
> > > will use a minimal RPT). So in this case, we avoid creating the HPT
> > > at reset time and later (during CAS) create it if it is necessary.
> > > 
> > > Signed-off-by: Sam Bobroff <sam.bobroff@au1.ibm.com>
> > So, IIRC, we discussed previously that the logical way to do things
> > was to, by default, delay HPT allocation until CAS time, and just do
> > it at reset time for the case that needs it: hash host with KVM.
> > 
> > Did you hit a problem with that approach, or is there still work to
> > be
> > done here?
> 
> So what we're doing is assuming radix. Allocate hpt if hash host,
> otherwise delay til CAS time and allocate only if guest chose hash.
> 
> > 
> > > 
> > > ---
> > > v2:
> > > 
> > > * This patch has been mostly rewritten to move the late HPT
> > > allocation to CAS.
> > > This allows a guest to start in radix mode (when it's in real mode)
> > > and then
> > > change to hash, even if it is a legacy guest and will not call
> > > h_register_process_table().
> > > * Added an exported function to spapr.c to perform HPT allocation
> > > and adjust
> > > the vrma if necessary. This makes it possible to allocate the HPT
> > > from
> > > h_client_architecture_support() in spapr_hcall.c.
> > > 
> > >  hw/ppc/spapr.c         | 24 +++++++++++++++---------
> > >  hw/ppc/spapr_hcall.c   | 10 ++++++++++
> > >  include/hw/ppc/spapr.h |  1 +
> > >  3 files changed, 26 insertions(+), 9 deletions(-)
> > > 
> > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > > index ca3812555f..dfee0f685f 100644
> > > --- a/hw/ppc/spapr.c
> > > +++ b/hw/ppc/spapr.c
> > > @@ -1123,6 +1123,17 @@ static void
> > > spapr_reallocate_hpt(sPAPRMachineState *spapr, int shift,
> > >      }
> > >  }
> > >  
> > > +void spapr_setup_hpt_and_vrma(sPAPRMachineState *spapr)
> > > +{
> > > +    spapr_reallocate_hpt(spapr,
> > > +                     spapr_hpt_shift_for_ramsize(MACHINE(qdev_get_
> > > machine())->maxram_size),
> > > +                     &error_fatal);
> > > +    if (spapr->vrma_adjust) {
> > > +        spapr->rma_size = kvmppc_rma_size(spapr_node0_size(),
> > > +                                          spapr->htab_shift);
> > > +    }
> > > +}
> > > +
> > >  static void find_unknown_sysbus_device(SysBusDevice *sbdev, void
> > > *opaque)
> > >  {
> > >      bool matched = false;
> > > @@ -1151,15 +1162,10 @@ static void ppc_spapr_reset(void)
> > >      /* Check for unknown sysbus devices */
> > >      foreach_dynamic_sysbus_device(find_unknown_sysbus_device,
> > > NULL);
> > >  
> > > -    /* Allocate and/or reset the hash page table */
> > > -    spapr_reallocate_hpt(spapr,
> > > -                         spapr_hpt_shift_for_ramsize(machine-
> > > >maxram_size),
> > > -                         &error_fatal);
> > > -
> > > -    /* Update the RMA size if necessary */
> > > -    if (spapr->vrma_adjust) {
> > > -        spapr->rma_size = kvmppc_rma_size(spapr_node0_size(),
> > > -                                          spapr->htab_shift);
> > > +    /* If using KVM with radix mode available, VCPUs can be
> > > started
> > > +     * without a HPT because KVM will start them in radix mode. */
> > > +    if (!(kvm_enabled() && kvmppc_has_cap_mmu_radix())) {
> > > +        spapr_setup_hpt_and_vrma(spapr);
> > >      }
> > >  
> > >      qemu_devices_reset();
> > > diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
> > > index 42d20e0b92..cea34073aa 100644
> > > --- a/hw/ppc/spapr_hcall.c
> > > +++ b/hw/ppc/spapr_hcall.c
> > > @@ -1002,6 +1002,16 @@ static target_ulong
> > > h_client_architecture_support(PowerPCCPU *cpu,
> > >      ov5_updates = spapr_ovec_new();
> > >      spapr->cas_reboot = spapr_ovec_diff(ov5_updates,
> > >                                          ov5_cas_old, spapr-
> > > >ov5_cas);
> > > +    if (kvm_enabled()) {
> > > +        if (kvmppc_has_cap_mmu_radix()) {
> > > +            /* If the HPT hasn't yet been set up (see
> > > +             * ppc_spapr_reset()), and it's needed, do it now: */
> > I think it's a bit fragile to have here it explicitly mirror the
> > logic
> > which determines whether the HPT is allocated early.  I'd prefer to
> > explicitly test here whether we have allocated an HPT - adding a
> > flag,
> > if we have to.
> 
> We can use the MSB of patb_entry as that flag.

Uh.. only for POWER9..

> patb_entry & GUEST_RADIX == GUEST_RADIX -> radix, so assume a hpt
> hasn't been allocated.
> 
> When we do allocate a hpt we know we're not radix, so set
> patb_entry &= ~GUEST_RADIX;
> 
> Where GUEST_RADIX is the msb in patb_entry which indicates that a guest
> is radix.
> 
> Essentially patb_entry & GUEST_RADIX cleared mean hash with hpt
> allocated, patb_entry & GUEST_RADIX set means radix so assume an hpt
> hasn't been allocated. On the hpt allocation path we clear GUEST_RADIX
> in patb_entry and when we set GUEST_RADIX we free the hpt.
> 
> > 
> > > 
> > > +            if (!spapr_ovec_test(ov5_updates, OV5_MMU_RADIX)) {
> > > +                /* legacy hash or new hash: */
> > > +                spapr_setup_hpt_and_vrma(spapr);
> > > +            }
> > > +        }
> > > +    }
> > >  
> > >      if (!spapr->cas_reboot) {
> > >          spapr->cas_reboot =
> > > diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> > > index f9b17d860a..a30cbc485c 100644
> > > --- a/include/hw/ppc/spapr.h
> > > +++ b/include/hw/ppc/spapr.h
> > > @@ -590,6 +590,7 @@ void spapr_dt_events(sPAPRMachineState *sm,
> > > void *fdt);
> > >  int spapr_h_cas_compose_response(sPAPRMachineState *sm,
> > >                                   target_ulong addr, target_ulong
> > > size,
> > >                                   sPAPROptionVector *ov5_updates);
> > > +void spapr_setup_hpt_and_vrma(sPAPRMachineState *spapr);
> > >  sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t
> > > liobn);
> > >  void spapr_tce_table_enable(sPAPRTCETable *tcet,
> > >                              uint32_t page_shift, uint64_t
> > > bus_offset,
>
Suraj Jitindar Singh March 1, 2017, 5:17 a.m. UTC | #4
On Tue, 2017-02-28 at 14:19 +1100, David Gibson wrote:
> On Tue, Feb 28, 2017 at 01:25:17PM +1100, Suraj Jitindar Singh wrote:
> > 
> > On Tue, 2017-02-28 at 11:28 +1100, David Gibson wrote:
> > > 
> > > s/HTP/HPT/ in subject line.
> > > 
> > > 
> > > On Thu, Feb 23, 2017 at 05:00:01PM +1100, Sam Bobroff wrote:
> > > > 
> > > > 
> > > > If QEMU is using KVM, and KVM is capable of running in radix
> > > > mode,
> > > > guests can be run in real-mode without allocating a HPT
> > > > (because
> > > > KVM
> > > > will use a minimal RPT). So in this case, we avoid creating the
> > > > HPT
> > > > at reset time and later (during CAS) create it if it is
> > > > necessary.
> > > > 
> > > > Signed-off-by: Sam Bobroff <sam.bobroff@au1.ibm.com>
> > > So, IIRC, we discussed previously that the logical way to do
> > > things
> > > was to, by default, delay HPT allocation until CAS time, and just
> > > do
> > > it at reset time for the case that needs it: hash host with KVM.
> > > 
> > > Did you hit a problem with that approach, or is there still work
> > > to
> > > be
> > > done here?
> > So what we're doing is assuming radix. Allocate hpt if hash host,
> > otherwise delay til CAS time and allocate only if guest chose hash.
> > 
> > > 
> > > 
> > > > 
> > > > 
> > > > ---
> > > > v2:
> > > > 
> > > > * This patch has been mostly rewritten to move the late HPT
> > > > allocation to CAS.
> > > > This allows a guest to start in radix mode (when it's in real
> > > > mode)
> > > > and then
> > > > change to hash, even if it is a legacy guest and will not call
> > > > h_register_process_table().
> > > > * Added an exported function to spapr.c to perform HPT
> > > > allocation
> > > > and adjust
> > > > the vrma if necessary. This makes it possible to allocate the
> > > > HPT
> > > > from
> > > > h_client_architecture_support() in spapr_hcall.c.
> > > > 
> > > >  hw/ppc/spapr.c         | 24 +++++++++++++++---------
> > > >  hw/ppc/spapr_hcall.c   | 10 ++++++++++
> > > >  include/hw/ppc/spapr.h |  1 +
> > > >  3 files changed, 26 insertions(+), 9 deletions(-)
> > > > 
> > > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > > > index ca3812555f..dfee0f685f 100644
> > > > --- a/hw/ppc/spapr.c
> > > > +++ b/hw/ppc/spapr.c
> > > > @@ -1123,6 +1123,17 @@ static void
> > > > spapr_reallocate_hpt(sPAPRMachineState *spapr, int shift,
> > > >      }
> > > >  }
> > > >  
> > > > +void spapr_setup_hpt_and_vrma(sPAPRMachineState *spapr)
> > > > +{
> > > > +    spapr_reallocate_hpt(spapr,
> > > > +                     spapr_hpt_shift_for_ramsize(MACHINE(qdev_
> > > > get_
> > > > machine())->maxram_size),
> > > > +                     &error_fatal);
> > > > +    if (spapr->vrma_adjust) {
> > > > +        spapr->rma_size = kvmppc_rma_size(spapr_node0_size(),
> > > > +                                          spapr->htab_shift);
> > > > +    }
> > > > +}
> > > > +
> > > >  static void find_unknown_sysbus_device(SysBusDevice *sbdev,
> > > > void
> > > > *opaque)
> > > >  {
> > > >      bool matched = false;
> > > > @@ -1151,15 +1162,10 @@ static void ppc_spapr_reset(void)
> > > >      /* Check for unknown sysbus devices */
> > > >      foreach_dynamic_sysbus_device(find_unknown_sysbus_device,
> > > > NULL);
> > > >  
> > > > -    /* Allocate and/or reset the hash page table */
> > > > -    spapr_reallocate_hpt(spapr,
> > > > -                         spapr_hpt_shift_for_ramsize(machine-
> > > > > 
> > > > > maxram_size),
> > > > -                         &error_fatal);
> > > > -
> > > > -    /* Update the RMA size if necessary */
> > > > -    if (spapr->vrma_adjust) {
> > > > -        spapr->rma_size = kvmppc_rma_size(spapr_node0_size(),
> > > > -                                          spapr->htab_shift);
> > > > +    /* If using KVM with radix mode available, VCPUs can be
> > > > started
> > > > +     * without a HPT because KVM will start them in radix
> > > > mode. */
> > > > +    if (!(kvm_enabled() && kvmppc_has_cap_mmu_radix())) {
> > > > +        spapr_setup_hpt_and_vrma(spapr);
> > > >      }
> > > >  
> > > >      qemu_devices_reset();
> > > > diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
> > > > index 42d20e0b92..cea34073aa 100644
> > > > --- a/hw/ppc/spapr_hcall.c
> > > > +++ b/hw/ppc/spapr_hcall.c
> > > > @@ -1002,6 +1002,16 @@ static target_ulong
> > > > h_client_architecture_support(PowerPCCPU *cpu,
> > > >      ov5_updates = spapr_ovec_new();
> > > >      spapr->cas_reboot = spapr_ovec_diff(ov5_updates,
> > > >                                          ov5_cas_old, spapr-
> > > > > 
> > > > > ov5_cas);
> > > > +    if (kvm_enabled()) {
> > > > +        if (kvmppc_has_cap_mmu_radix()) {
> > > > +            /* If the HPT hasn't yet been set up (see
> > > > +             * ppc_spapr_reset()), and it's needed, do it now:
> > > > */
> > > I think it's a bit fragile to have here it explicitly mirror the
> > > logic
> > > which determines whether the HPT is allocated early.  I'd prefer
> > > to
> > > explicitly test here whether we have allocated an HPT - adding a
> > > flag,
> > > if we have to.
> > We can use the MSB of patb_entry as that flag.
> Uh.. only for POWER9..

Well on <POWER9, patb_entry will always be zero, which we're taking to
mean a hpt has been allocated.

We could always just check spapr->htab == NULL...

> 
> > 
> > patb_entry & GUEST_RADIX == GUEST_RADIX -> radix, so assume a hpt
> > hasn't been allocated.
> > 
> > When we do allocate a hpt we know we're not radix, so set
> > patb_entry &= ~GUEST_RADIX;
> > 
> > Where GUEST_RADIX is the msb in patb_entry which indicates that a
> > guest
> > is radix.
> > 
> > Essentially patb_entry & GUEST_RADIX cleared mean hash with hpt
> > allocated, patb_entry & GUEST_RADIX set means radix so assume an
> > hpt
> > hasn't been allocated. On the hpt allocation path we clear
> > GUEST_RADIX
> > in patb_entry and when we set GUEST_RADIX we free the hpt.
> > 
> > > 
> > > 
> > > > 
> > > > 
> > > > +            if (!spapr_ovec_test(ov5_updates, OV5_MMU_RADIX))
> > > > {
> > > > +                /* legacy hash or new hash: */
> > > > +                spapr_setup_hpt_and_vrma(spapr);
> > > > +            }
> > > > +        }
> > > > +    }
> > > >  
> > > >      if (!spapr->cas_reboot) {
> > > >          spapr->cas_reboot =
> > > > diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> > > > index f9b17d860a..a30cbc485c 100644
> > > > --- a/include/hw/ppc/spapr.h
> > > > +++ b/include/hw/ppc/spapr.h
> > > > @@ -590,6 +590,7 @@ void spapr_dt_events(sPAPRMachineState *sm,
> > > > void *fdt);
> > > >  int spapr_h_cas_compose_response(sPAPRMachineState *sm,
> > > >                                   target_ulong addr,
> > > > target_ulong
> > > > size,
> > > >                                   sPAPROptionVector
> > > > *ov5_updates);
> > > > +void spapr_setup_hpt_and_vrma(sPAPRMachineState *spapr);
> > > >  sPAPRTCETable *spapr_tce_new_table(DeviceState *owner,
> > > > uint32_t
> > > > liobn);
> > > >  void spapr_tce_table_enable(sPAPRTCETable *tcet,
> > > >                              uint32_t page_shift, uint64_t
> > > > bus_offset,
David Gibson March 3, 2017, 5:04 a.m. UTC | #5
On Wed, Mar 01, 2017 at 04:17:43PM +1100, Suraj Jitindar Singh wrote:
> On Tue, 2017-02-28 at 14:19 +1100, David Gibson wrote:
> > On Tue, Feb 28, 2017 at 01:25:17PM +1100, Suraj Jitindar Singh wrote:
> > > 
> > > On Tue, 2017-02-28 at 11:28 +1100, David Gibson wrote:
> > > > 
> > > > s/HTP/HPT/ in subject line.
> > > > 
> > > > 
> > > > On Thu, Feb 23, 2017 at 05:00:01PM +1100, Sam Bobroff wrote:
> > > > > 
> > > > > 
> > > > > If QEMU is using KVM, and KVM is capable of running in radix
> > > > > mode,
> > > > > guests can be run in real-mode without allocating a HPT
> > > > > (because
> > > > > KVM
> > > > > will use a minimal RPT). So in this case, we avoid creating the
> > > > > HPT
> > > > > at reset time and later (during CAS) create it if it is
> > > > > necessary.
> > > > > 
> > > > > Signed-off-by: Sam Bobroff <sam.bobroff@au1.ibm.com>
> > > > So, IIRC, we discussed previously that the logical way to do
> > > > things
> > > > was to, by default, delay HPT allocation until CAS time, and just
> > > > do
> > > > it at reset time for the case that needs it: hash host with KVM.
> > > > 
> > > > Did you hit a problem with that approach, or is there still work
> > > > to
> > > > be
> > > > done here?
> > > So what we're doing is assuming radix. Allocate hpt if hash host,
> > > otherwise delay til CAS time and allocate only if guest chose hash.
> > > 
> > > > 
> > > > 
> > > > > 
> > > > > 
> > > > > ---
> > > > > v2:
> > > > > 
> > > > > * This patch has been mostly rewritten to move the late HPT
> > > > > allocation to CAS.
> > > > > This allows a guest to start in radix mode (when it's in real
> > > > > mode)
> > > > > and then
> > > > > change to hash, even if it is a legacy guest and will not call
> > > > > h_register_process_table().
> > > > > * Added an exported function to spapr.c to perform HPT
> > > > > allocation
> > > > > and adjust
> > > > > the vrma if necessary. This makes it possible to allocate the
> > > > > HPT
> > > > > from
> > > > > h_client_architecture_support() in spapr_hcall.c.
> > > > > 
> > > > >  hw/ppc/spapr.c         | 24 +++++++++++++++---------
> > > > >  hw/ppc/spapr_hcall.c   | 10 ++++++++++
> > > > >  include/hw/ppc/spapr.h |  1 +
> > > > >  3 files changed, 26 insertions(+), 9 deletions(-)
> > > > > 
> > > > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > > > > index ca3812555f..dfee0f685f 100644
> > > > > --- a/hw/ppc/spapr.c
> > > > > +++ b/hw/ppc/spapr.c
> > > > > @@ -1123,6 +1123,17 @@ static void
> > > > > spapr_reallocate_hpt(sPAPRMachineState *spapr, int shift,
> > > > >      }
> > > > >  }
> > > > >  
> > > > > +void spapr_setup_hpt_and_vrma(sPAPRMachineState *spapr)
> > > > > +{
> > > > > +    spapr_reallocate_hpt(spapr,
> > > > > +                     spapr_hpt_shift_for_ramsize(MACHINE(qdev_
> > > > > get_
> > > > > machine())->maxram_size),
> > > > > +                     &error_fatal);
> > > > > +    if (spapr->vrma_adjust) {
> > > > > +        spapr->rma_size = kvmppc_rma_size(spapr_node0_size(),
> > > > > +                                          spapr->htab_shift);
> > > > > +    }
> > > > > +}
> > > > > +
> > > > >  static void find_unknown_sysbus_device(SysBusDevice *sbdev,
> > > > > void
> > > > > *opaque)
> > > > >  {
> > > > >      bool matched = false;
> > > > > @@ -1151,15 +1162,10 @@ static void ppc_spapr_reset(void)
> > > > >      /* Check for unknown sysbus devices */
> > > > >      foreach_dynamic_sysbus_device(find_unknown_sysbus_device,
> > > > > NULL);
> > > > >  
> > > > > -    /* Allocate and/or reset the hash page table */
> > > > > -    spapr_reallocate_hpt(spapr,
> > > > > -                         spapr_hpt_shift_for_ramsize(machine-
> > > > > > 
> > > > > > maxram_size),
> > > > > -                         &error_fatal);
> > > > > -
> > > > > -    /* Update the RMA size if necessary */
> > > > > -    if (spapr->vrma_adjust) {
> > > > > -        spapr->rma_size = kvmppc_rma_size(spapr_node0_size(),
> > > > > -                                          spapr->htab_shift);
> > > > > +    /* If using KVM with radix mode available, VCPUs can be
> > > > > started
> > > > > +     * without a HPT because KVM will start them in radix
> > > > > mode. */
> > > > > +    if (!(kvm_enabled() && kvmppc_has_cap_mmu_radix())) {
> > > > > +        spapr_setup_hpt_and_vrma(spapr);
> > > > >      }
> > > > >  
> > > > >      qemu_devices_reset();
> > > > > diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
> > > > > index 42d20e0b92..cea34073aa 100644
> > > > > --- a/hw/ppc/spapr_hcall.c
> > > > > +++ b/hw/ppc/spapr_hcall.c
> > > > > @@ -1002,6 +1002,16 @@ static target_ulong
> > > > > h_client_architecture_support(PowerPCCPU *cpu,
> > > > >      ov5_updates = spapr_ovec_new();
> > > > >      spapr->cas_reboot = spapr_ovec_diff(ov5_updates,
> > > > >                                          ov5_cas_old, spapr-
> > > > > > 
> > > > > > ov5_cas);
> > > > > +    if (kvm_enabled()) {
> > > > > +        if (kvmppc_has_cap_mmu_radix()) {
> > > > > +            /* If the HPT hasn't yet been set up (see
> > > > > +             * ppc_spapr_reset()), and it's needed, do it now:
> > > > > */
> > > > I think it's a bit fragile to have here it explicitly mirror the
> > > > logic
> > > > which determines whether the HPT is allocated early.  I'd prefer
> > > > to
> > > > explicitly test here whether we have allocated an HPT - adding a
> > > > flag,
> > > > if we have to.
> > > We can use the MSB of patb_entry as that flag.
> > Uh.. only for POWER9..
> 
> Well on <POWER9, patb_entry will always be zero, which we're taking to
> mean a hpt has been allocated.

Hrm, I suppose so.

> We could always just check spapr->htab == NULL...

Ah.. no.  Because it will be NULL when there is an HPT, but it's
inside KVM.

> 
> > 
> > > 
> > > patb_entry & GUEST_RADIX == GUEST_RADIX -> radix, so assume a hpt
> > > hasn't been allocated.
> > > 
> > > When we do allocate a hpt we know we're not radix, so set
> > > patb_entry &= ~GUEST_RADIX;
> > > 
> > > Where GUEST_RADIX is the msb in patb_entry which indicates that a
> > > guest
> > > is radix.
> > > 
> > > Essentially patb_entry & GUEST_RADIX cleared mean hash with hpt
> > > allocated, patb_entry & GUEST_RADIX set means radix so assume an
> > > hpt
> > > hasn't been allocated. On the hpt allocation path we clear
> > > GUEST_RADIX
> > > in patb_entry and when we set GUEST_RADIX we free the hpt.
> > > 
> > > > 
> > > > 
> > > > > 
> > > > > 
> > > > > +            if (!spapr_ovec_test(ov5_updates, OV5_MMU_RADIX))
> > > > > {
> > > > > +                /* legacy hash or new hash: */
> > > > > +                spapr_setup_hpt_and_vrma(spapr);
> > > > > +            }
> > > > > +        }
> > > > > +    }
> > > > >  
> > > > >      if (!spapr->cas_reboot) {
> > > > >          spapr->cas_reboot =
> > > > > diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> > > > > index f9b17d860a..a30cbc485c 100644
> > > > > --- a/include/hw/ppc/spapr.h
> > > > > +++ b/include/hw/ppc/spapr.h
> > > > > @@ -590,6 +590,7 @@ void spapr_dt_events(sPAPRMachineState *sm,
> > > > > void *fdt);
> > > > >  int spapr_h_cas_compose_response(sPAPRMachineState *sm,
> > > > >                                   target_ulong addr,
> > > > > target_ulong
> > > > > size,
> > > > >                                   sPAPROptionVector
> > > > > *ov5_updates);
> > > > > +void spapr_setup_hpt_and_vrma(sPAPRMachineState *spapr);
> > > > >  sPAPRTCETable *spapr_tce_new_table(DeviceState *owner,
> > > > > uint32_t
> > > > > liobn);
> > > > >  void spapr_tce_table_enable(sPAPRTCETable *tcet,
> > > > >                              uint32_t page_shift, uint64_t
> > > > > bus_offset,
>
diff mbox

Patch

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index ca3812555f..dfee0f685f 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1123,6 +1123,17 @@  static void spapr_reallocate_hpt(sPAPRMachineState *spapr, int shift,
     }
 }
 
+void spapr_setup_hpt_and_vrma(sPAPRMachineState *spapr)
+{
+    spapr_reallocate_hpt(spapr,
+                     spapr_hpt_shift_for_ramsize(MACHINE(qdev_get_machine())->maxram_size),
+                     &error_fatal);
+    if (spapr->vrma_adjust) {
+        spapr->rma_size = kvmppc_rma_size(spapr_node0_size(),
+                                          spapr->htab_shift);
+    }
+}
+
 static void find_unknown_sysbus_device(SysBusDevice *sbdev, void *opaque)
 {
     bool matched = false;
@@ -1151,15 +1162,10 @@  static void ppc_spapr_reset(void)
     /* Check for unknown sysbus devices */
     foreach_dynamic_sysbus_device(find_unknown_sysbus_device, NULL);
 
-    /* Allocate and/or reset the hash page table */
-    spapr_reallocate_hpt(spapr,
-                         spapr_hpt_shift_for_ramsize(machine->maxram_size),
-                         &error_fatal);
-
-    /* Update the RMA size if necessary */
-    if (spapr->vrma_adjust) {
-        spapr->rma_size = kvmppc_rma_size(spapr_node0_size(),
-                                          spapr->htab_shift);
+    /* If using KVM with radix mode available, VCPUs can be started
+     * without a HPT because KVM will start them in radix mode. */
+    if (!(kvm_enabled() && kvmppc_has_cap_mmu_radix())) {
+        spapr_setup_hpt_and_vrma(spapr);
     }
 
     qemu_devices_reset();
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index 42d20e0b92..cea34073aa 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -1002,6 +1002,16 @@  static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
     ov5_updates = spapr_ovec_new();
     spapr->cas_reboot = spapr_ovec_diff(ov5_updates,
                                         ov5_cas_old, spapr->ov5_cas);
+    if (kvm_enabled()) {
+        if (kvmppc_has_cap_mmu_radix()) {
+            /* If the HPT hasn't yet been set up (see
+             * ppc_spapr_reset()), and it's needed, do it now: */
+            if (!spapr_ovec_test(ov5_updates, OV5_MMU_RADIX)) {
+                /* legacy hash or new hash: */
+                spapr_setup_hpt_and_vrma(spapr);
+            }
+        }
+    }
 
     if (!spapr->cas_reboot) {
         spapr->cas_reboot =
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index f9b17d860a..a30cbc485c 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -590,6 +590,7 @@  void spapr_dt_events(sPAPRMachineState *sm, void *fdt);
 int spapr_h_cas_compose_response(sPAPRMachineState *sm,
                                  target_ulong addr, target_ulong size,
                                  sPAPROptionVector *ov5_updates);
+void spapr_setup_hpt_and_vrma(sPAPRMachineState *spapr);
 sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn);
 void spapr_tce_table_enable(sPAPRTCETable *tcet,
                             uint32_t page_shift, uint64_t bus_offset,