diff mbox series

vfio/migration: fix dirty pages lost bug for ram beyond 3G

Message ID 1583487689-9813-1-git-send-email-yan.y.zhao@intel.com (mailing list archive)
State New, archived
Headers show
Series vfio/migration: fix dirty pages lost bug for ram beyond 3G | expand

Commit Message

Yan Zhao March 6, 2020, 9:41 a.m. UTC
the start address passing to
cpu_physical_memory_set_dirty_range() and
cpu_physical_memory_set_dirty_lebitmap() is the address within the
ram block plus ram block offset.

it's safe to set this start address to gpa if total memory is less than
3G, as ram block offset for pc.ram is 0. But if memory size is beyond
3G, gpa is not equal to the offset in its ram block.

e.g.
for gpa 0x100000000, its offset is actually 0xc0000000.
and for gpa 0x42f500000, its offset should be 0x3EF500000.

This fix is based on Kirti's VFIO live migration patch set v8.
https://lists.gnu.org/archive/html/qemu-devel/2019-08/msg05542.html
(for PATCH v8 11/13
https://lists.gnu.org/archive/html/qemu-devel/2019-08/msg05553.html
and PATCH v8 12/13
https://lists.gnu.org/archive/html/qemu-devel/2019-08/msg05554.html
}

The idea behind it should also be applied to other VFIO live migraiton
patch series below:
https://lists.gnu.org/archive/html/qemu-devel/2019-11/msg01763.html
(Kirti v9)
https://lists.gnu.org/archive/html/qemu-devel/2019-02/msg04912.html
(Yan)
https://lists.gnu.org/archive/html/qemu-devel/2018-04/msg01138.html
(Yulei RFC v4)
https://lists.gnu.org/archive/html/qemu-devel/2017-06/msg05568.html
(Yulei RFC)

Cc: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Yan Zhao <yan.y.zhao@intel.com>
---
 hw/vfio/common.c              | 5 ++++-
 hw/vfio/migration.c           | 8 +++++---
 include/hw/vfio/vfio-common.h | 3 ++-
 3 files changed, 11 insertions(+), 5 deletions(-)

Comments

Alex Williamson March 25, 2020, 6:19 p.m. UTC | #1
On Fri,  6 Mar 2020 17:41:29 +0800
Yan Zhao <yan.y.zhao@intel.com> wrote:

> the start address passing to
> cpu_physical_memory_set_dirty_range() and
> cpu_physical_memory_set_dirty_lebitmap() is the address within the
> ram block plus ram block offset.
> 
> it's safe to set this start address to gpa if total memory is less than
> 3G, as ram block offset for pc.ram is 0. But if memory size is beyond
> 3G, gpa is not equal to the offset in its ram block.
> 
> e.g.
> for gpa 0x100000000, its offset is actually 0xc0000000.
> and for gpa 0x42f500000, its offset should be 0x3EF500000.
> 
> This fix is based on Kirti's VFIO live migration patch set v8.
> https://lists.gnu.org/archive/html/qemu-devel/2019-08/msg05542.html
> (for PATCH v8 11/13
> https://lists.gnu.org/archive/html/qemu-devel/2019-08/msg05553.html
> and PATCH v8 12/13
> https://lists.gnu.org/archive/html/qemu-devel/2019-08/msg05554.html
> }
> 
> The idea behind it should also be applied to other VFIO live migraiton
> patch series below:
> https://lists.gnu.org/archive/html/qemu-devel/2019-11/msg01763.html
> (Kirti v9)
> https://lists.gnu.org/archive/html/qemu-devel/2019-02/msg04912.html
> (Yan)
> https://lists.gnu.org/archive/html/qemu-devel/2018-04/msg01138.html
> (Yulei RFC v4)
> https://lists.gnu.org/archive/html/qemu-devel/2017-06/msg05568.html
> (Yulei RFC)
> 
> Cc: Kevin Tian <kevin.tian@intel.com>
> Signed-off-by: Yan Zhao <yan.y.zhao@intel.com>
> ---
>  hw/vfio/common.c              | 5 ++++-
>  hw/vfio/migration.c           | 8 +++++---
>  include/hw/vfio/vfio-common.h | 3 ++-
>  3 files changed, 11 insertions(+), 5 deletions(-)

Thanks for this, Yan.

Kirti, I never saw an acknowledgment of this, can you confirm you've
incorporated this into your latest?  I do see we're making use of
memory_region_get_ram_addr() now.  Thanks,

Alex

 
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index 6f36b02..ba1a8ef 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -799,6 +799,7 @@ static void vfio_listerner_log_sync(MemoryListener *listener,
>          MemoryRegionSection *section)
>  {
>      uint64_t start_addr, size, pfn_count;
> +    uint64_t block_start;
>      VFIOGroup *group;
>      VFIODevice *vbasedev;
>  
> @@ -819,11 +820,13 @@ static void vfio_listerner_log_sync(MemoryListener *listener,
>      start_addr = TARGET_PAGE_ALIGN(section->offset_within_address_space);
>      size = int128_get64(section->size);
>      pfn_count = size >> TARGET_PAGE_BITS;
> +    block_start = TARGET_PAGE_ALIGN(section->offset_within_region +
> +                             memory_region_get_ram_addr(section->mr));
>  
>      QLIST_FOREACH(group, &vfio_group_list, next) {
>          QLIST_FOREACH(vbasedev, &group->device_list, next) {
>              vfio_get_dirty_page_list(vbasedev, start_addr >> TARGET_PAGE_BITS,
> -                                     pfn_count, TARGET_PAGE_SIZE);
> +                                     pfn_count, TARGET_PAGE_SIZE, block_start);
>          }
>      }
>  }
> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
> index 640bea1..a19b957 100644
> --- a/hw/vfio/migration.c
> +++ b/hw/vfio/migration.c
> @@ -279,7 +279,8 @@ static int vfio_load_device_config_state(QEMUFile *f, void *opaque)
>  void vfio_get_dirty_page_list(VFIODevice *vbasedev,
>                                uint64_t start_pfn,
>                                uint64_t pfn_count,
> -                              uint64_t page_size)
> +                              uint64_t page_size,
> +                              uint64_t block_start)
>  {
>      VFIOMigration *migration = vbasedev->migration;
>      VFIORegion *region = &migration->region;
> @@ -293,6 +294,7 @@ void vfio_get_dirty_page_list(VFIODevice *vbasedev,
>      while (total_pfns > 0) {
>          uint64_t bitmap_size, data_offset = 0;
>          uint64_t start = start_pfn + count;
> +        uint64_t block_start_seg = block_start + count * page_size;
>          void *buf = NULL;
>          bool buffer_mmaped = false;
>  
> @@ -341,7 +343,7 @@ void vfio_get_dirty_page_list(VFIODevice *vbasedev,
>              break;
>          } else if (copied_pfns == VFIO_DEVICE_DIRTY_PFNS_ALL) {
>              /* Mark all pages dirty for this range */
> -            cpu_physical_memory_set_dirty_range(start * page_size,
> +            cpu_physical_memory_set_dirty_range(block_start_seg,
>                                                  total_pfns * page_size,
>                                                  DIRTY_MEMORY_MIGRATION);
>              break;
> @@ -382,7 +384,7 @@ void vfio_get_dirty_page_list(VFIODevice *vbasedev,
>          }
>  
>          cpu_physical_memory_set_dirty_lebitmap((unsigned long *)buf,
> -                                               start * page_size,
> +                                               block_start_seg,
>                                                 copied_pfns);
>          count      += copied_pfns;
>          total_pfns -= copied_pfns;
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index 41ff5eb..6d868fa 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -220,6 +220,7 @@ int vfio_spapr_remove_window(VFIOContainer *container,
>  int vfio_migration_probe(VFIODevice *vbasedev, Error **errp);
>  void vfio_migration_finalize(VFIODevice *vbasedev);
>  void vfio_get_dirty_page_list(VFIODevice *vbasedev, uint64_t start_pfn,
> -                               uint64_t pfn_count, uint64_t page_size);
> +                               uint64_t pfn_count, uint64_t page_size,
> +                               uint64_t block_start);
>  
>  #endif /* HW_VFIO_VFIO_COMMON_H */
Kirti Wankhede March 26, 2020, 8:11 p.m. UTC | #2
On 3/25/2020 11:49 PM, Alex Williamson wrote:
> On Fri,  6 Mar 2020 17:41:29 +0800
> Yan Zhao <yan.y.zhao@intel.com> wrote:
> 
>> the start address passing to
>> cpu_physical_memory_set_dirty_range() and
>> cpu_physical_memory_set_dirty_lebitmap() is the address within the
>> ram block plus ram block offset.
>>
>> it's safe to set this start address to gpa if total memory is less than
>> 3G, as ram block offset for pc.ram is 0. But if memory size is beyond
>> 3G, gpa is not equal to the offset in its ram block.
>>
>> e.g.
>> for gpa 0x100000000, its offset is actually 0xc0000000.
>> and for gpa 0x42f500000, its offset should be 0x3EF500000.
>>
>> This fix is based on Kirti's VFIO live migration patch set v8.
>> https://lists.gnu.org/archive/html/qemu-devel/2019-08/msg05542.html
>> (for PATCH v8 11/13
>> https://lists.gnu.org/archive/html/qemu-devel/2019-08/msg05553.html
>> and PATCH v8 12/13
>> https://lists.gnu.org/archive/html/qemu-devel/2019-08/msg05554.html
>> }
>>
>> The idea behind it should also be applied to other VFIO live migraiton
>> patch series below:
>> https://lists.gnu.org/archive/html/qemu-devel/2019-11/msg01763.html
>> (Kirti v9)
>> https://lists.gnu.org/archive/html/qemu-devel/2019-02/msg04912.html
>> (Yan)
>> https://lists.gnu.org/archive/html/qemu-devel/2018-04/msg01138.html
>> (Yulei RFC v4)
>> https://lists.gnu.org/archive/html/qemu-devel/2017-06/msg05568.html
>> (Yulei RFC)
>>
>> Cc: Kevin Tian <kevin.tian@intel.com>
>> Signed-off-by: Yan Zhao <yan.y.zhao@intel.com>
>> ---
>>   hw/vfio/common.c              | 5 ++++-
>>   hw/vfio/migration.c           | 8 +++++---
>>   include/hw/vfio/vfio-common.h | 3 ++-
>>   3 files changed, 11 insertions(+), 5 deletions(-)
> 
> Thanks for this, Yan.
> 
> Kirti, I never saw an acknowledgment of this, can you confirm you've
> incorporated this into your latest?  I do see we're making use of
> memory_region_get_ram_addr() now.  Thanks,
> 

We do saw this in our testing and I already had it in QEMU patches 
during my testing.

Thanks,
Kirti


> Alex
> 
>   
>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>> index 6f36b02..ba1a8ef 100644
>> --- a/hw/vfio/common.c
>> +++ b/hw/vfio/common.c
>> @@ -799,6 +799,7 @@ static void vfio_listerner_log_sync(MemoryListener *listener,
>>           MemoryRegionSection *section)
>>   {
>>       uint64_t start_addr, size, pfn_count;
>> +    uint64_t block_start;
>>       VFIOGroup *group;
>>       VFIODevice *vbasedev;
>>   
>> @@ -819,11 +820,13 @@ static void vfio_listerner_log_sync(MemoryListener *listener,
>>       start_addr = TARGET_PAGE_ALIGN(section->offset_within_address_space);
>>       size = int128_get64(section->size);
>>       pfn_count = size >> TARGET_PAGE_BITS;
>> +    block_start = TARGET_PAGE_ALIGN(section->offset_within_region +
>> +                             memory_region_get_ram_addr(section->mr));
>>   
>>       QLIST_FOREACH(group, &vfio_group_list, next) {
>>           QLIST_FOREACH(vbasedev, &group->device_list, next) {
>>               vfio_get_dirty_page_list(vbasedev, start_addr >> TARGET_PAGE_BITS,
>> -                                     pfn_count, TARGET_PAGE_SIZE);
>> +                                     pfn_count, TARGET_PAGE_SIZE, block_start);
>>           }
>>       }
>>   }
>> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
>> index 640bea1..a19b957 100644
>> --- a/hw/vfio/migration.c
>> +++ b/hw/vfio/migration.c
>> @@ -279,7 +279,8 @@ static int vfio_load_device_config_state(QEMUFile *f, void *opaque)
>>   void vfio_get_dirty_page_list(VFIODevice *vbasedev,
>>                                 uint64_t start_pfn,
>>                                 uint64_t pfn_count,
>> -                              uint64_t page_size)
>> +                              uint64_t page_size,
>> +                              uint64_t block_start)
>>   {
>>       VFIOMigration *migration = vbasedev->migration;
>>       VFIORegion *region = &migration->region;
>> @@ -293,6 +294,7 @@ void vfio_get_dirty_page_list(VFIODevice *vbasedev,
>>       while (total_pfns > 0) {
>>           uint64_t bitmap_size, data_offset = 0;
>>           uint64_t start = start_pfn + count;
>> +        uint64_t block_start_seg = block_start + count * page_size;
>>           void *buf = NULL;
>>           bool buffer_mmaped = false;
>>   
>> @@ -341,7 +343,7 @@ void vfio_get_dirty_page_list(VFIODevice *vbasedev,
>>               break;
>>           } else if (copied_pfns == VFIO_DEVICE_DIRTY_PFNS_ALL) {
>>               /* Mark all pages dirty for this range */
>> -            cpu_physical_memory_set_dirty_range(start * page_size,
>> +            cpu_physical_memory_set_dirty_range(block_start_seg,
>>                                                   total_pfns * page_size,
>>                                                   DIRTY_MEMORY_MIGRATION);
>>               break;
>> @@ -382,7 +384,7 @@ void vfio_get_dirty_page_list(VFIODevice *vbasedev,
>>           }
>>   
>>           cpu_physical_memory_set_dirty_lebitmap((unsigned long *)buf,
>> -                                               start * page_size,
>> +                                               block_start_seg,
>>                                                  copied_pfns);
>>           count      += copied_pfns;
>>           total_pfns -= copied_pfns;
>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>> index 41ff5eb..6d868fa 100644
>> --- a/include/hw/vfio/vfio-common.h
>> +++ b/include/hw/vfio/vfio-common.h
>> @@ -220,6 +220,7 @@ int vfio_spapr_remove_window(VFIOContainer *container,
>>   int vfio_migration_probe(VFIODevice *vbasedev, Error **errp);
>>   void vfio_migration_finalize(VFIODevice *vbasedev);
>>   void vfio_get_dirty_page_list(VFIODevice *vbasedev, uint64_t start_pfn,
>> -                               uint64_t pfn_count, uint64_t page_size);
>> +                               uint64_t pfn_count, uint64_t page_size,
>> +                               uint64_t block_start);
>>   
>>   #endif /* HW_VFIO_VFIO_COMMON_H */
>
diff mbox series

Patch

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 6f36b02..ba1a8ef 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -799,6 +799,7 @@  static void vfio_listerner_log_sync(MemoryListener *listener,
         MemoryRegionSection *section)
 {
     uint64_t start_addr, size, pfn_count;
+    uint64_t block_start;
     VFIOGroup *group;
     VFIODevice *vbasedev;
 
@@ -819,11 +820,13 @@  static void vfio_listerner_log_sync(MemoryListener *listener,
     start_addr = TARGET_PAGE_ALIGN(section->offset_within_address_space);
     size = int128_get64(section->size);
     pfn_count = size >> TARGET_PAGE_BITS;
+    block_start = TARGET_PAGE_ALIGN(section->offset_within_region +
+                             memory_region_get_ram_addr(section->mr));
 
     QLIST_FOREACH(group, &vfio_group_list, next) {
         QLIST_FOREACH(vbasedev, &group->device_list, next) {
             vfio_get_dirty_page_list(vbasedev, start_addr >> TARGET_PAGE_BITS,
-                                     pfn_count, TARGET_PAGE_SIZE);
+                                     pfn_count, TARGET_PAGE_SIZE, block_start);
         }
     }
 }
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 640bea1..a19b957 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -279,7 +279,8 @@  static int vfio_load_device_config_state(QEMUFile *f, void *opaque)
 void vfio_get_dirty_page_list(VFIODevice *vbasedev,
                               uint64_t start_pfn,
                               uint64_t pfn_count,
-                              uint64_t page_size)
+                              uint64_t page_size,
+                              uint64_t block_start)
 {
     VFIOMigration *migration = vbasedev->migration;
     VFIORegion *region = &migration->region;
@@ -293,6 +294,7 @@  void vfio_get_dirty_page_list(VFIODevice *vbasedev,
     while (total_pfns > 0) {
         uint64_t bitmap_size, data_offset = 0;
         uint64_t start = start_pfn + count;
+        uint64_t block_start_seg = block_start + count * page_size;
         void *buf = NULL;
         bool buffer_mmaped = false;
 
@@ -341,7 +343,7 @@  void vfio_get_dirty_page_list(VFIODevice *vbasedev,
             break;
         } else if (copied_pfns == VFIO_DEVICE_DIRTY_PFNS_ALL) {
             /* Mark all pages dirty for this range */
-            cpu_physical_memory_set_dirty_range(start * page_size,
+            cpu_physical_memory_set_dirty_range(block_start_seg,
                                                 total_pfns * page_size,
                                                 DIRTY_MEMORY_MIGRATION);
             break;
@@ -382,7 +384,7 @@  void vfio_get_dirty_page_list(VFIODevice *vbasedev,
         }
 
         cpu_physical_memory_set_dirty_lebitmap((unsigned long *)buf,
-                                               start * page_size,
+                                               block_start_seg,
                                                copied_pfns);
         count      += copied_pfns;
         total_pfns -= copied_pfns;
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 41ff5eb..6d868fa 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -220,6 +220,7 @@  int vfio_spapr_remove_window(VFIOContainer *container,
 int vfio_migration_probe(VFIODevice *vbasedev, Error **errp);
 void vfio_migration_finalize(VFIODevice *vbasedev);
 void vfio_get_dirty_page_list(VFIODevice *vbasedev, uint64_t start_pfn,
-                               uint64_t pfn_count, uint64_t page_size);
+                               uint64_t pfn_count, uint64_t page_size,
+                               uint64_t block_start);
 
 #endif /* HW_VFIO_VFIO_COMMON_H */