diff mbox

[qemu,v15,07/17] spapr_iommu: Migrate full state

Message ID 1459762426-18440-8-git-send-email-aik@ozlabs.ru (mailing list archive)
State New, archived
Headers show

Commit Message

Alexey Kardashevskiy April 4, 2016, 9:33 a.m. UTC
The source guest could have reallocated the default TCE table and
migrate bigger/smaller table. This adds reallocation in post_load()
if the default table size is different on source and destination.

This adds @bus_offset, @page_shift, @enabled to the migration stream.
These cannot change without dynamic DMA windows so no change in
behavior is expected now.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
Changes:
v15:
* squashed "migrate full state" into this
* added missing tcet->mig_nb_table initialization in spapr_tce_table_pre_save()
* instead of bumping the version, moved extra parameters to subsection

v14:
* new to the series
---
 hw/ppc/spapr_iommu.c   | 67 ++++++++++++++++++++++++++++++++++++++++++++++++--
 include/hw/ppc/spapr.h |  2 ++
 trace-events           |  2 ++
 3 files changed, 69 insertions(+), 2 deletions(-)

Comments

David Gibson April 5, 2016, 5:58 a.m. UTC | #1
On Mon, Apr 04, 2016 at 07:33:36PM +1000, Alexey Kardashevskiy wrote:
> The source guest could have reallocated the default TCE table and
> migrate bigger/smaller table. This adds reallocation in post_load()
> if the default table size is different on source and destination.
> 
> This adds @bus_offset, @page_shift, @enabled to the migration stream.
> These cannot change without dynamic DMA windows so no change in
> behavior is expected now.
> 
> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>

The mig_table stuff is kind of ugly, but I don't know of any better
way to do it with our current migration infrastructure.

> ---
> Changes:
> v15:
> * squashed "migrate full state" into this
> * added missing tcet->mig_nb_table initialization in spapr_tce_table_pre_save()
> * instead of bumping the version, moved extra parameters to subsection
> 
> v14:
> * new to the series
> ---
>  hw/ppc/spapr_iommu.c   | 67 ++++++++++++++++++++++++++++++++++++++++++++++++--
>  include/hw/ppc/spapr.h |  2 ++
>  trace-events           |  2 ++
>  3 files changed, 69 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
> index 9bcd3f6..52b1e0d 100644
> --- a/hw/ppc/spapr_iommu.c
> +++ b/hw/ppc/spapr_iommu.c
> @@ -137,33 +137,96 @@ static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr,
>      return ret;
>  }
>  
> +static void spapr_tce_table_pre_save(void *opaque)
> +{
> +    sPAPRTCETable *tcet = SPAPR_TCE_TABLE(opaque);
> +
> +    tcet->mig_table = tcet->table;
> +    tcet->mig_nb_table = tcet->nb_table;
> +
> +    trace_spapr_iommu_pre_save(tcet->liobn, tcet->mig_nb_table,
> +                               tcet->bus_offset, tcet->page_shift);
> +}
> +
> +static void spapr_tce_table_do_enable(sPAPRTCETable *tcet);
> +static void spapr_tce_table_do_disable(sPAPRTCETable *tcet);
> +
>  static int spapr_tce_table_post_load(void *opaque, int version_id)
>  {
>      sPAPRTCETable *tcet = SPAPR_TCE_TABLE(opaque);
> +    uint32_t old_nb_table = tcet->nb_table;
>  
>      if (tcet->vdev) {
>          spapr_vio_set_bypass(tcet->vdev, tcet->bypass);
>      }
>  
> +    if (tcet->enabled) {
> +        if (tcet->nb_table != tcet->mig_nb_table) {
> +            if (tcet->nb_table) {
> +                spapr_tce_table_do_disable(tcet);
> +            }
> +            tcet->nb_table = tcet->mig_nb_table;
> +            spapr_tce_table_do_enable(tcet);
> +        }
> +
> +        memcpy(tcet->table, tcet->mig_table,
> +               tcet->nb_table * sizeof(tcet->table[0]));
> +
> +        free(tcet->mig_table);
> +        tcet->mig_table = NULL;
> +    } else if (tcet->table) {
> +        /* Destination guest has a default table but source does not -> free */
> +        spapr_tce_table_do_disable(tcet);
> +    }
> +
> +    trace_spapr_iommu_post_load(tcet->liobn, old_nb_table, tcet->nb_table,
> +                                tcet->bus_offset, tcet->page_shift);
> +
>      return 0;
>  }
>  
> +static bool spapr_tce_table_ex_needed(void *opaque)
> +{
> +    sPAPRTCETable *tcet = opaque;
> +
> +    return tcet->bus_offset || tcet->page_shift != 0xC;
> +}
> +
> +static const VMStateDescription vmstate_spapr_tce_table_ex = {
> +    .name = "spapr_iommu_ex",
> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +    .needed = spapr_tce_table_ex_needed,
> +    .fields = (VMStateField[]) {
> +        VMSTATE_BOOL(enabled, sPAPRTCETable),
> +        VMSTATE_UINT64(bus_offset, sPAPRTCETable),
> +        VMSTATE_UINT32(page_shift, sPAPRTCETable),
> +        VMSTATE_END_OF_LIST()
> +    },
> +};
> +
>  static const VMStateDescription vmstate_spapr_tce_table = {
>      .name = "spapr_iommu",
>      .version_id = 2,
>      .minimum_version_id = 2,
> +    .pre_save = spapr_tce_table_pre_save,
>      .post_load = spapr_tce_table_post_load,
>      .fields      = (VMStateField []) {
>          /* Sanity check */
>          VMSTATE_UINT32_EQUAL(liobn, sPAPRTCETable),
> -        VMSTATE_UINT32_EQUAL(nb_table, sPAPRTCETable),
>  
>          /* IOMMU state */
> +        VMSTATE_UINT32(mig_nb_table, sPAPRTCETable),
>          VMSTATE_BOOL(bypass, sPAPRTCETable),
> -        VMSTATE_VARRAY_UINT32(table, sPAPRTCETable, nb_table, 0, vmstate_info_uint64, uint64_t),
> +        VMSTATE_VARRAY_UINT32_ALLOC(mig_table, sPAPRTCETable, mig_nb_table, 0,
> +                                    vmstate_info_uint64, uint64_t),
>  
>          VMSTATE_END_OF_LIST()
>      },
> +    .subsections = (const VMStateDescription*[]) {
> +        &vmstate_spapr_tce_table_ex,
> +        NULL
> +    }
>  };
>  
>  static MemoryRegionIOMMUOps spapr_iommu_ops = {
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index 75b0b55..c1ea49c 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -545,6 +545,8 @@ struct sPAPRTCETable {
>      uint64_t bus_offset;
>      uint32_t page_shift;
>      uint64_t *table;
> +    uint32_t mig_nb_table;
> +    uint64_t *mig_table;
>      bool bypass;
>      bool need_vfio;
>      int fd;
> diff --git a/trace-events b/trace-events
> index 62dcbba..4335b9b 100644
> --- a/trace-events
> +++ b/trace-events
> @@ -1431,6 +1431,8 @@ spapr_iommu_pci_indirect(uint64_t liobn, uint64_t ioba, uint64_t tce, uint64_t i
>  spapr_iommu_pci_stuff(uint64_t liobn, uint64_t ioba, uint64_t tce_value, uint64_t npages, uint64_t ret) "liobn=%"PRIx64" ioba=0x%"PRIx64" tcevalue=0x%"PRIx64" npages=%"PRId64" ret=%"PRId64
>  spapr_iommu_xlate(uint64_t liobn, uint64_t ioba, uint64_t tce, unsigned perm, unsigned pgsize) "liobn=%"PRIx64" 0x%"PRIx64" -> 0x%"PRIx64" perm=%u mask=%x"
>  spapr_iommu_new_table(uint64_t liobn, void *table, int fd) "liobn=%"PRIx64" table=%p fd=%d"
> +spapr_iommu_pre_save(uint64_t liobn, uint32_t nb, uint64_t offs, uint32_t ps) "liobn=%"PRIx64" %"PRIx32" bus_offset=%"PRIx64" ps=%"PRIu32
> +spapr_iommu_post_load(uint64_t liobn, uint32_t pre_nb, uint32_t post_nb, uint64_t offs, uint32_t ps) "liobn=%"PRIx64" %"PRIx32" => %"PRIx32" bus_offset=%"PRIx64" ps=%"PRIu32
>  
>  # hw/ppc/ppc.c
>  ppc_tb_adjust(uint64_t offs1, uint64_t offs2, int64_t diff, int64_t seconds) "adjusted from 0x%"PRIx64" to 0x%"PRIx64", diff %"PRId64" (%"PRId64"s)"
diff mbox

Patch

diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 9bcd3f6..52b1e0d 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -137,33 +137,96 @@  static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr,
     return ret;
 }
 
+static void spapr_tce_table_pre_save(void *opaque)
+{
+    sPAPRTCETable *tcet = SPAPR_TCE_TABLE(opaque);
+
+    tcet->mig_table = tcet->table;
+    tcet->mig_nb_table = tcet->nb_table;
+
+    trace_spapr_iommu_pre_save(tcet->liobn, tcet->mig_nb_table,
+                               tcet->bus_offset, tcet->page_shift);
+}
+
+static void spapr_tce_table_do_enable(sPAPRTCETable *tcet);
+static void spapr_tce_table_do_disable(sPAPRTCETable *tcet);
+
 static int spapr_tce_table_post_load(void *opaque, int version_id)
 {
     sPAPRTCETable *tcet = SPAPR_TCE_TABLE(opaque);
+    uint32_t old_nb_table = tcet->nb_table;
 
     if (tcet->vdev) {
         spapr_vio_set_bypass(tcet->vdev, tcet->bypass);
     }
 
+    if (tcet->enabled) {
+        if (tcet->nb_table != tcet->mig_nb_table) {
+            if (tcet->nb_table) {
+                spapr_tce_table_do_disable(tcet);
+            }
+            tcet->nb_table = tcet->mig_nb_table;
+            spapr_tce_table_do_enable(tcet);
+        }
+
+        memcpy(tcet->table, tcet->mig_table,
+               tcet->nb_table * sizeof(tcet->table[0]));
+
+        free(tcet->mig_table);
+        tcet->mig_table = NULL;
+    } else if (tcet->table) {
+        /* Destination guest has a default table but source does not -> free */
+        spapr_tce_table_do_disable(tcet);
+    }
+
+    trace_spapr_iommu_post_load(tcet->liobn, old_nb_table, tcet->nb_table,
+                                tcet->bus_offset, tcet->page_shift);
+
     return 0;
 }
 
+static bool spapr_tce_table_ex_needed(void *opaque)
+{
+    sPAPRTCETable *tcet = opaque;
+
+    return tcet->bus_offset || tcet->page_shift != 0xC;
+}
+
+static const VMStateDescription vmstate_spapr_tce_table_ex = {
+    .name = "spapr_iommu_ex",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = spapr_tce_table_ex_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_BOOL(enabled, sPAPRTCETable),
+        VMSTATE_UINT64(bus_offset, sPAPRTCETable),
+        VMSTATE_UINT32(page_shift, sPAPRTCETable),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
 static const VMStateDescription vmstate_spapr_tce_table = {
     .name = "spapr_iommu",
     .version_id = 2,
     .minimum_version_id = 2,
+    .pre_save = spapr_tce_table_pre_save,
     .post_load = spapr_tce_table_post_load,
     .fields      = (VMStateField []) {
         /* Sanity check */
         VMSTATE_UINT32_EQUAL(liobn, sPAPRTCETable),
-        VMSTATE_UINT32_EQUAL(nb_table, sPAPRTCETable),
 
         /* IOMMU state */
+        VMSTATE_UINT32(mig_nb_table, sPAPRTCETable),
         VMSTATE_BOOL(bypass, sPAPRTCETable),
-        VMSTATE_VARRAY_UINT32(table, sPAPRTCETable, nb_table, 0, vmstate_info_uint64, uint64_t),
+        VMSTATE_VARRAY_UINT32_ALLOC(mig_table, sPAPRTCETable, mig_nb_table, 0,
+                                    vmstate_info_uint64, uint64_t),
 
         VMSTATE_END_OF_LIST()
     },
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_spapr_tce_table_ex,
+        NULL
+    }
 };
 
 static MemoryRegionIOMMUOps spapr_iommu_ops = {
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 75b0b55..c1ea49c 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -545,6 +545,8 @@  struct sPAPRTCETable {
     uint64_t bus_offset;
     uint32_t page_shift;
     uint64_t *table;
+    uint32_t mig_nb_table;
+    uint64_t *mig_table;
     bool bypass;
     bool need_vfio;
     int fd;
diff --git a/trace-events b/trace-events
index 62dcbba..4335b9b 100644
--- a/trace-events
+++ b/trace-events
@@ -1431,6 +1431,8 @@  spapr_iommu_pci_indirect(uint64_t liobn, uint64_t ioba, uint64_t tce, uint64_t i
 spapr_iommu_pci_stuff(uint64_t liobn, uint64_t ioba, uint64_t tce_value, uint64_t npages, uint64_t ret) "liobn=%"PRIx64" ioba=0x%"PRIx64" tcevalue=0x%"PRIx64" npages=%"PRId64" ret=%"PRId64
 spapr_iommu_xlate(uint64_t liobn, uint64_t ioba, uint64_t tce, unsigned perm, unsigned pgsize) "liobn=%"PRIx64" 0x%"PRIx64" -> 0x%"PRIx64" perm=%u mask=%x"
 spapr_iommu_new_table(uint64_t liobn, void *table, int fd) "liobn=%"PRIx64" table=%p fd=%d"
+spapr_iommu_pre_save(uint64_t liobn, uint32_t nb, uint64_t offs, uint32_t ps) "liobn=%"PRIx64" %"PRIx32" bus_offset=%"PRIx64" ps=%"PRIu32
+spapr_iommu_post_load(uint64_t liobn, uint32_t pre_nb, uint32_t post_nb, uint64_t offs, uint32_t ps) "liobn=%"PRIx64" %"PRIx32" => %"PRIx32" bus_offset=%"PRIx64" ps=%"PRIu32
 
 # hw/ppc/ppc.c
 ppc_tb_adjust(uint64_t offs1, uint64_t offs2, int64_t diff, int64_t seconds) "adjusted from 0x%"PRIx64" to 0x%"PRIx64", diff %"PRId64" (%"PRId64"s)"