diff mbox series

[RFCv2,5/8] vfio/iommufd: Implement VFIOIOMMUClass::query_dirty_bitmap support

Message ID 20240212135643.5858-6-joao.m.martins@oracle.com (mailing list archive)
State New, archived
Headers show
Series vfio/iommufd: IOMMUFD Dirty Tracking | expand

Commit Message

Joao Martins Feb. 12, 2024, 1:56 p.m. UTC
ioctl(iommufd, IOMMU_HWPT_GET_DIRTY_BITMAP, arg) is the UAPI
that fetches the bitmap that tells what was dirty in an IOVA
range.

A single bitmap is allocated and used across all the hwpts
sharing an IOAS which is then used in log_sync() to set Qemu
global bitmaps.

Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
---
 backends/iommufd.c       | 24 ++++++++++++++++++++++++
 backends/trace-events    |  1 +
 hw/vfio/iommufd.c        | 30 ++++++++++++++++++++++++++++++
 include/sysemu/iommufd.h |  3 +++
 4 files changed, 58 insertions(+)

Comments

Avihai Horon Feb. 19, 2024, 9:30 a.m. UTC | #1
Hi Joao,

On 12/02/2024 15:56, Joao Martins wrote:
> External email: Use caution opening links or attachments
>
>
> ioctl(iommufd, IOMMU_HWPT_GET_DIRTY_BITMAP, arg) is the UAPI
> that fetches the bitmap that tells what was dirty in an IOVA
> range.
>
> A single bitmap is allocated and used across all the hwpts
> sharing an IOAS which is then used in log_sync() to set Qemu
> global bitmaps.
>
> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
> ---
>   backends/iommufd.c       | 24 ++++++++++++++++++++++++
>   backends/trace-events    |  1 +
>   hw/vfio/iommufd.c        | 30 ++++++++++++++++++++++++++++++
>   include/sysemu/iommufd.h |  3 +++
>   4 files changed, 58 insertions(+)
>
> diff --git a/backends/iommufd.c b/backends/iommufd.c
> index 954de61c2da0..dd676d493c37 100644
> --- a/backends/iommufd.c
> +++ b/backends/iommufd.c
> @@ -259,6 +259,30 @@ int iommufd_backend_set_dirty_tracking(IOMMUFDBackend *be, uint32_t hwpt_id,
>       return !ret ? 0 : -errno;
>   }
>
> +int iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be, uint32_t hwpt_id,
> +                                     uint64_t iova, ram_addr_t size,
> +                                     uint64_t page_size, uint64_t *data)
> +{
> +    int ret;
> +    struct iommu_hwpt_get_dirty_bitmap get_dirty_bitmap = {
> +        .size = sizeof(get_dirty_bitmap),
> +        .hwpt_id = hwpt_id,
> +        .iova = iova, .length = size,
> +        .page_size = page_size, .data = (uintptr_t)data,

Member per line for readability?

> +    };
> +
> +    ret = ioctl(be->fd, IOMMU_HWPT_GET_DIRTY_BITMAP, &get_dirty_bitmap);
> +    trace_iommufd_backend_get_dirty_bitmap(be->fd, hwpt_id, iova, size,
> +                                           page_size, ret);
> +    if (ret) {
> +        error_report("IOMMU_HWPT_GET_DIRTY_BITMAP (iova: 0x%"PRIx64
> +                     " size: 0x%"PRIx64") failed: %s", iova,
> +                     size, strerror(errno));
> +    }
> +
> +    return !ret ? 0 : -errno;
> +}
> +
>   static const TypeInfo iommufd_backend_info = {
>       .name = TYPE_IOMMUFD_BACKEND,
>       .parent = TYPE_OBJECT,
> diff --git a/backends/trace-events b/backends/trace-events
> index feba2baca5f7..11a27cb114b6 100644
> --- a/backends/trace-events
> +++ b/backends/trace-events
> @@ -17,3 +17,4 @@ iommufd_backend_alloc_hwpt(int iommufd, uint32_t dev_id, uint32_t pt_id, uint32_
>   iommufd_backend_alloc_ioas(int iommufd, uint32_t ioas, int ret) " iommufd=%d ioas=%d (%d)"
>   iommufd_backend_free_id(int iommufd, uint32_t id, int ret) " iommufd=%d id=%d (%d)"
>   iommufd_backend_set_dirty(int iommufd, uint32_t hwpt_id, bool start, int ret) " iommufd=%d hwpt=%d enable=%d (%d)"
> +iommufd_backend_get_dirty_bitmap(int iommufd, uint32_t hwpt_id, uint64_t iova, uint64_t size, uint64_t page_size, int ret) " iommufd=%d hwpt=%d iova=0x%"PRIx64" size=0x%"PRIx64" page_size=0x%"PRIx64" (%d)"

s/hwpt=%d/hwpt=%u

> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
> index 361e659288fd..79b13bd262cc 100644
> --- a/hw/vfio/iommufd.c
> +++ b/hw/vfio/iommufd.c
> @@ -25,6 +25,7 @@
>   #include "qemu/cutils.h"
>   #include "qemu/chardev_open.h"
>   #include "pci.h"
> +#include "exec/ram_addr.h"
>   #include "migration/migration.h"
>
>   static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr iova,
> @@ -142,6 +143,34 @@ err:
>       return ret;
>   }
>
> +static int iommufd_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
> +                                      VFIOBitmap *vbmap, uint64_t iova,
> +                                      uint64_t size)
> +{
> +    VFIOIOMMUFDContainer *container = container_of(bcontainer,
> +                                                   VFIOIOMMUFDContainer,
> +                                                   bcontainer);
> +    int ret;
> +    VFIOIOASHwpt *hwpt;
> +    unsigned long page_size;
> +
> +    if (!bcontainer->dirty_pages_supported) {

Do we need this check?
IIUC, if we got to iommufd_query_dirty_bitmap(), it means 
bcontainer->dirty_pages_supported is already true.

Thanks.

> +        return 0;
> +    }
> +
> +    page_size = qemu_real_host_page_size();
> +    QLIST_FOREACH(hwpt, &container->hwpt_list, next) {
> +        ret = iommufd_backend_get_dirty_bitmap(container->be, hwpt->hwpt_id,
> +                                               iova, size, page_size,
> +                                               vbmap->bitmap);
> +        if (ret) {
> +            break;
> +        }
> +    }
> +
> +    return ret;
> +}
> +
>   static int iommufd_cdev_getfd(const char *sysfs_path, Error **errp)
>   {
>       long int ret = -ENOTTY;
> @@ -765,6 +794,7 @@ static void vfio_iommu_iommufd_class_init(ObjectClass *klass, void *data)
>       vioc->pci_hot_reset = iommufd_cdev_pci_hot_reset;
>       vioc->host_iommu_device_init = vfio_cdev_host_iommu_device_init;
>       vioc->set_dirty_page_tracking = iommufd_set_dirty_page_tracking;
> +    vioc->query_dirty_bitmap = iommufd_query_dirty_bitmap;
>   };
>
>   static const TypeInfo types[] = {
> diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h
> index 562c189dd92c..ba19b7ea4c19 100644
> --- a/include/sysemu/iommufd.h
> +++ b/include/sysemu/iommufd.h
> @@ -55,5 +55,8 @@ int iommufd_backend_alloc_hwpt(int iommufd, uint32_t dev_id,
>                                  void *data_ptr, uint32_t *out_hwpt);
>   int iommufd_backend_set_dirty_tracking(IOMMUFDBackend *be, uint32_t hwpt_id,
>                                          bool start);
> +int iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be, uint32_t hwpt_id,
> +                                     uint64_t iova, ram_addr_t size,
> +                                     uint64_t page_size, uint64_t *data);
>
>   #endif
> --
> 2.39.3
>
Joao Martins Feb. 20, 2024, 10:59 a.m. UTC | #2
On 19/02/2024 09:30, Avihai Horon wrote:
> Hi Joao,
> 
> On 12/02/2024 15:56, Joao Martins wrote:
>> External email: Use caution opening links or attachments
>>
>>
>> ioctl(iommufd, IOMMU_HWPT_GET_DIRTY_BITMAP, arg) is the UAPI
>> that fetches the bitmap that tells what was dirty in an IOVA
>> range.
>>
>> A single bitmap is allocated and used across all the hwpts
>> sharing an IOAS which is then used in log_sync() to set Qemu
>> global bitmaps.
>>
>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>> ---
>>   backends/iommufd.c       | 24 ++++++++++++++++++++++++
>>   backends/trace-events    |  1 +
>>   hw/vfio/iommufd.c        | 30 ++++++++++++++++++++++++++++++
>>   include/sysemu/iommufd.h |  3 +++
>>   4 files changed, 58 insertions(+)
>>
>> diff --git a/backends/iommufd.c b/backends/iommufd.c
>> index 954de61c2da0..dd676d493c37 100644
>> --- a/backends/iommufd.c
>> +++ b/backends/iommufd.c
>> @@ -259,6 +259,30 @@ int iommufd_backend_set_dirty_tracking(IOMMUFDBackend
>> *be, uint32_t hwpt_id,
>>       return !ret ? 0 : -errno;
>>   }
>>
>> +int iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be, uint32_t hwpt_id,
>> +                                     uint64_t iova, ram_addr_t size,
>> +                                     uint64_t page_size, uint64_t *data)
>> +{
>> +    int ret;
>> +    struct iommu_hwpt_get_dirty_bitmap get_dirty_bitmap = {
>> +        .size = sizeof(get_dirty_bitmap),
>> +        .hwpt_id = hwpt_id,
>> +        .iova = iova, .length = size,
>> +        .page_size = page_size, .data = (uintptr_t)data,
> 
> Member per line for readability?
> 

Yeap

>> +    };
>> +
>> +    ret = ioctl(be->fd, IOMMU_HWPT_GET_DIRTY_BITMAP, &get_dirty_bitmap);
>> +    trace_iommufd_backend_get_dirty_bitmap(be->fd, hwpt_id, iova, size,
>> +                                           page_size, ret);
>> +    if (ret) {
>> +        error_report("IOMMU_HWPT_GET_DIRTY_BITMAP (iova: 0x%"PRIx64
>> +                     " size: 0x%"PRIx64") failed: %s", iova,
>> +                     size, strerror(errno));
>> +    }
>> +
>> +    return !ret ? 0 : -errno;
>> +}
>> +
>>   static const TypeInfo iommufd_backend_info = {
>>       .name = TYPE_IOMMUFD_BACKEND,
>>       .parent = TYPE_OBJECT,
>> diff --git a/backends/trace-events b/backends/trace-events
>> index feba2baca5f7..11a27cb114b6 100644
>> --- a/backends/trace-events
>> +++ b/backends/trace-events
>> @@ -17,3 +17,4 @@ iommufd_backend_alloc_hwpt(int iommufd, uint32_t dev_id,
>> uint32_t pt_id, uint32_
>>   iommufd_backend_alloc_ioas(int iommufd, uint32_t ioas, int ret) " iommufd=%d
>> ioas=%d (%d)"
>>   iommufd_backend_free_id(int iommufd, uint32_t id, int ret) " iommufd=%d
>> id=%d (%d)"
>>   iommufd_backend_set_dirty(int iommufd, uint32_t hwpt_id, bool start, int
>> ret) " iommufd=%d hwpt=%d enable=%d (%d)"
>> +iommufd_backend_get_dirty_bitmap(int iommufd, uint32_t hwpt_id, uint64_t
>> iova, uint64_t size, uint64_t page_size, int ret) " iommufd=%d hwpt=%d
>> iova=0x%"PRIx64" size=0x%"PRIx64" page_size=0x%"PRIx64" (%d)"
> 
> s/hwpt=%d/hwpt=%u
> 
/me nods

>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>> index 361e659288fd..79b13bd262cc 100644
>> --- a/hw/vfio/iommufd.c
>> +++ b/hw/vfio/iommufd.c
>> @@ -25,6 +25,7 @@
>>   #include "qemu/cutils.h"
>>   #include "qemu/chardev_open.h"
>>   #include "pci.h"
>> +#include "exec/ram_addr.h"
>>   #include "migration/migration.h"
>>
>>   static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr iova,
>> @@ -142,6 +143,34 @@ err:
>>       return ret;
>>   }
>>
>> +static int iommufd_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
>> +                                      VFIOBitmap *vbmap, uint64_t iova,
>> +                                      uint64_t size)
>> +{
>> +    VFIOIOMMUFDContainer *container = container_of(bcontainer,
>> +                                                   VFIOIOMMUFDContainer,
>> +                                                   bcontainer);
>> +    int ret;
>> +    VFIOIOASHwpt *hwpt;
>> +    unsigned long page_size;
>> +
>> +    if (!bcontainer->dirty_pages_supported) {
> 
> Do we need this check?
> IIUC, if we got to iommufd_query_dirty_bitmap(), it means
> bcontainer->dirty_pages_supported is already true.
> 

Not quite. Look again at vfio_get_dirty_bitmap(); furthermore
vfio_container_query_dirty_bitmap() doesn't check for dirty_pages_supported.

I guess this check should be better placed into container-base class rather than
here.

> Thanks.
> 
>> +        return 0;
>> +    }
>> +
>> +    page_size = qemu_real_host_page_size();
>> +    QLIST_FOREACH(hwpt, &container->hwpt_list, next) {
>> +        ret = iommufd_backend_get_dirty_bitmap(container->be, hwpt->hwpt_id,
>> +                                               iova, size, page_size,
>> +                                               vbmap->bitmap);
>> +        if (ret) {
>> +            break;
>> +        }
>> +    }
>> +
>> +    return ret;
>> +}
>> +
>>   static int iommufd_cdev_getfd(const char *sysfs_path, Error **errp)
>>   {
>>       long int ret = -ENOTTY;
>> @@ -765,6 +794,7 @@ static void vfio_iommu_iommufd_class_init(ObjectClass
>> *klass, void *data)
>>       vioc->pci_hot_reset = iommufd_cdev_pci_hot_reset;
>>       vioc->host_iommu_device_init = vfio_cdev_host_iommu_device_init;
>>       vioc->set_dirty_page_tracking = iommufd_set_dirty_page_tracking;
>> +    vioc->query_dirty_bitmap = iommufd_query_dirty_bitmap;
>>   };
>>
>>   static const TypeInfo types[] = {
>> diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h
>> index 562c189dd92c..ba19b7ea4c19 100644
>> --- a/include/sysemu/iommufd.h
>> +++ b/include/sysemu/iommufd.h
>> @@ -55,5 +55,8 @@ int iommufd_backend_alloc_hwpt(int iommufd, uint32_t dev_id,
>>                                  void *data_ptr, uint32_t *out_hwpt);
>>   int iommufd_backend_set_dirty_tracking(IOMMUFDBackend *be, uint32_t hwpt_id,
>>                                          bool start);
>> +int iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be, uint32_t hwpt_id,
>> +                                     uint64_t iova, ram_addr_t size,
>> +                                     uint64_t page_size, uint64_t *data);
>>
>>   #endif
>> -- 
>> 2.39.3
>>
Avihai Horon Feb. 20, 2024, 12:52 p.m. UTC | #3
On 20/02/2024 12:59, Joao Martins wrote:
> External email: Use caution opening links or attachments
>
>
> On 19/02/2024 09:30, Avihai Horon wrote:
>> Hi Joao,
>>
>> On 12/02/2024 15:56, Joao Martins wrote:
>>> External email: Use caution opening links or attachments
>>>
>>>
>>> ioctl(iommufd, IOMMU_HWPT_GET_DIRTY_BITMAP, arg) is the UAPI
>>> that fetches the bitmap that tells what was dirty in an IOVA
>>> range.
>>>
>>> A single bitmap is allocated and used across all the hwpts
>>> sharing an IOAS which is then used in log_sync() to set Qemu
>>> global bitmaps.
>>>
>>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>>> ---
>>>    backends/iommufd.c       | 24 ++++++++++++++++++++++++
>>>    backends/trace-events    |  1 +
>>>    hw/vfio/iommufd.c        | 30 ++++++++++++++++++++++++++++++
>>>    include/sysemu/iommufd.h |  3 +++
>>>    4 files changed, 58 insertions(+)
>>>
>>> diff --git a/backends/iommufd.c b/backends/iommufd.c
>>> index 954de61c2da0..dd676d493c37 100644
>>> --- a/backends/iommufd.c
>>> +++ b/backends/iommufd.c
>>> @@ -259,6 +259,30 @@ int iommufd_backend_set_dirty_tracking(IOMMUFDBackend
>>> *be, uint32_t hwpt_id,
>>>        return !ret ? 0 : -errno;
>>>    }
>>>
>>> +int iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be, uint32_t hwpt_id,
>>> +                                     uint64_t iova, ram_addr_t size,
>>> +                                     uint64_t page_size, uint64_t *data)
>>> +{
>>> +    int ret;
>>> +    struct iommu_hwpt_get_dirty_bitmap get_dirty_bitmap = {
>>> +        .size = sizeof(get_dirty_bitmap),
>>> +        .hwpt_id = hwpt_id,
>>> +        .iova = iova, .length = size,
>>> +        .page_size = page_size, .data = (uintptr_t)data,
>> Member per line for readability?
>>
> Yeap
>
>>> +    };
>>> +
>>> +    ret = ioctl(be->fd, IOMMU_HWPT_GET_DIRTY_BITMAP, &get_dirty_bitmap);
>>> +    trace_iommufd_backend_get_dirty_bitmap(be->fd, hwpt_id, iova, size,
>>> +                                           page_size, ret);
>>> +    if (ret) {
>>> +        error_report("IOMMU_HWPT_GET_DIRTY_BITMAP (iova: 0x%"PRIx64
>>> +                     " size: 0x%"PRIx64") failed: %s", iova,
>>> +                     size, strerror(errno));
>>> +    }
>>> +
>>> +    return !ret ? 0 : -errno;
>>> +}
>>> +
>>>    static const TypeInfo iommufd_backend_info = {
>>>        .name = TYPE_IOMMUFD_BACKEND,
>>>        .parent = TYPE_OBJECT,
>>> diff --git a/backends/trace-events b/backends/trace-events
>>> index feba2baca5f7..11a27cb114b6 100644
>>> --- a/backends/trace-events
>>> +++ b/backends/trace-events
>>> @@ -17,3 +17,4 @@ iommufd_backend_alloc_hwpt(int iommufd, uint32_t dev_id,
>>> uint32_t pt_id, uint32_
>>>    iommufd_backend_alloc_ioas(int iommufd, uint32_t ioas, int ret) " iommufd=%d
>>> ioas=%d (%d)"
>>>    iommufd_backend_free_id(int iommufd, uint32_t id, int ret) " iommufd=%d
>>> id=%d (%d)"
>>>    iommufd_backend_set_dirty(int iommufd, uint32_t hwpt_id, bool start, int
>>> ret) " iommufd=%d hwpt=%d enable=%d (%d)"
>>> +iommufd_backend_get_dirty_bitmap(int iommufd, uint32_t hwpt_id, uint64_t
>>> iova, uint64_t size, uint64_t page_size, int ret) " iommufd=%d hwpt=%d
>>> iova=0x%"PRIx64" size=0x%"PRIx64" page_size=0x%"PRIx64" (%d)"
>> s/hwpt=%d/hwpt=%u
>>
> /me nods
>
>>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>>> index 361e659288fd..79b13bd262cc 100644
>>> --- a/hw/vfio/iommufd.c
>>> +++ b/hw/vfio/iommufd.c
>>> @@ -25,6 +25,7 @@
>>>    #include "qemu/cutils.h"
>>>    #include "qemu/chardev_open.h"
>>>    #include "pci.h"
>>> +#include "exec/ram_addr.h"
>>>    #include "migration/migration.h"
>>>
>>>    static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr iova,
>>> @@ -142,6 +143,34 @@ err:
>>>        return ret;
>>>    }
>>>
>>> +static int iommufd_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
>>> +                                      VFIOBitmap *vbmap, uint64_t iova,
>>> +                                      uint64_t size)
>>> +{
>>> +    VFIOIOMMUFDContainer *container = container_of(bcontainer,
>>> +                                                   VFIOIOMMUFDContainer,
>>> +                                                   bcontainer);
>>> +    int ret;
>>> +    VFIOIOASHwpt *hwpt;
>>> +    unsigned long page_size;
>>> +
>>> +    if (!bcontainer->dirty_pages_supported) {
>> Do we need this check?
>> IIUC, if we got to iommufd_query_dirty_bitmap(), it means
>> bcontainer->dirty_pages_supported is already true.
>>
> Not quite. Look again at vfio_get_dirty_bitmap(); furthermore
> vfio_container_query_dirty_bitmap() doesn't check for dirty_pages_supported.

vfio_get_dirty_bitmap() has this check at the beginning:

if (!bcontainer->dirty_pages_supported && !all_device_dirty_tracking) {
     cpu_physical_memory_set_dirty_range(ram_addr, size,
                                         tcg_enabled() ? DIRTY_CLIENTS_ALL :
                                         DIRTY_CLIENTS_NOCODE);
     return 0;
}

So if bcontainer->dirty_pages_supported is false we will mark all dirty 
and return (and not call vfio_container_query_dirty_bitmap()).

Or am I missing something?

> I guess this check should be better placed into container-base class rather than
> here.
>
>> Thanks.
>>
>>> +        return 0;
>>> +    }
>>> +
>>> +    page_size = qemu_real_host_page_size();
>>> +    QLIST_FOREACH(hwpt, &container->hwpt_list, next) {
>>> +        ret = iommufd_backend_get_dirty_bitmap(container->be, hwpt->hwpt_id,
>>> +                                               iova, size, page_size,
>>> +                                               vbmap->bitmap);
>>> +        if (ret) {
>>> +            break;
>>> +        }
>>> +    }
>>> +
>>> +    return ret;
>>> +}
>>> +
>>>    static int iommufd_cdev_getfd(const char *sysfs_path, Error **errp)
>>>    {
>>>        long int ret = -ENOTTY;
>>> @@ -765,6 +794,7 @@ static void vfio_iommu_iommufd_class_init(ObjectClass
>>> *klass, void *data)
>>>        vioc->pci_hot_reset = iommufd_cdev_pci_hot_reset;
>>>        vioc->host_iommu_device_init = vfio_cdev_host_iommu_device_init;
>>>        vioc->set_dirty_page_tracking = iommufd_set_dirty_page_tracking;
>>> +    vioc->query_dirty_bitmap = iommufd_query_dirty_bitmap;
>>>    };
>>>
>>>    static const TypeInfo types[] = {
>>> diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h
>>> index 562c189dd92c..ba19b7ea4c19 100644
>>> --- a/include/sysemu/iommufd.h
>>> +++ b/include/sysemu/iommufd.h
>>> @@ -55,5 +55,8 @@ int iommufd_backend_alloc_hwpt(int iommufd, uint32_t dev_id,
>>>                                   void *data_ptr, uint32_t *out_hwpt);
>>>    int iommufd_backend_set_dirty_tracking(IOMMUFDBackend *be, uint32_t hwpt_id,
>>>                                           bool start);
>>> +int iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be, uint32_t hwpt_id,
>>> +                                     uint64_t iova, ram_addr_t size,
>>> +                                     uint64_t page_size, uint64_t *data);
>>>
>>>    #endif
>>> --
>>> 2.39.3
>>>
Joao Martins Feb. 20, 2024, 1:17 p.m. UTC | #4
On 20/02/2024 12:52, Avihai Horon wrote:
> 
> On 20/02/2024 12:59, Joao Martins wrote:
>> External email: Use caution opening links or attachments
>>
>>
>> On 19/02/2024 09:30, Avihai Horon wrote:
>>> Hi Joao,
>>>
>>> On 12/02/2024 15:56, Joao Martins wrote:
>>>> External email: Use caution opening links or attachments
>>>>
>>>>
>>>> ioctl(iommufd, IOMMU_HWPT_GET_DIRTY_BITMAP, arg) is the UAPI
>>>> that fetches the bitmap that tells what was dirty in an IOVA
>>>> range.
>>>>
>>>> A single bitmap is allocated and used across all the hwpts
>>>> sharing an IOAS which is then used in log_sync() to set Qemu
>>>> global bitmaps.
>>>>
>>>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>>>> ---
>>>>    backends/iommufd.c       | 24 ++++++++++++++++++++++++
>>>>    backends/trace-events    |  1 +
>>>>    hw/vfio/iommufd.c        | 30 ++++++++++++++++++++++++++++++
>>>>    include/sysemu/iommufd.h |  3 +++
>>>>    4 files changed, 58 insertions(+)
>>>>
>>>> diff --git a/backends/iommufd.c b/backends/iommufd.c
>>>> index 954de61c2da0..dd676d493c37 100644
>>>> --- a/backends/iommufd.c
>>>> +++ b/backends/iommufd.c
>>>> @@ -259,6 +259,30 @@ int iommufd_backend_set_dirty_tracking(IOMMUFDBackend
>>>> *be, uint32_t hwpt_id,
>>>>        return !ret ? 0 : -errno;
>>>>    }
>>>>
>>>> +int iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be, uint32_t hwpt_id,
>>>> +                                     uint64_t iova, ram_addr_t size,
>>>> +                                     uint64_t page_size, uint64_t *data)
>>>> +{
>>>> +    int ret;
>>>> +    struct iommu_hwpt_get_dirty_bitmap get_dirty_bitmap = {
>>>> +        .size = sizeof(get_dirty_bitmap),
>>>> +        .hwpt_id = hwpt_id,
>>>> +        .iova = iova, .length = size,
>>>> +        .page_size = page_size, .data = (uintptr_t)data,
>>> Member per line for readability?
>>>
>> Yeap
>>
>>>> +    };
>>>> +
>>>> +    ret = ioctl(be->fd, IOMMU_HWPT_GET_DIRTY_BITMAP, &get_dirty_bitmap);
>>>> +    trace_iommufd_backend_get_dirty_bitmap(be->fd, hwpt_id, iova, size,
>>>> +                                           page_size, ret);
>>>> +    if (ret) {
>>>> +        error_report("IOMMU_HWPT_GET_DIRTY_BITMAP (iova: 0x%"PRIx64
>>>> +                     " size: 0x%"PRIx64") failed: %s", iova,
>>>> +                     size, strerror(errno));
>>>> +    }
>>>> +
>>>> +    return !ret ? 0 : -errno;
>>>> +}
>>>> +
>>>>    static const TypeInfo iommufd_backend_info = {
>>>>        .name = TYPE_IOMMUFD_BACKEND,
>>>>        .parent = TYPE_OBJECT,
>>>> diff --git a/backends/trace-events b/backends/trace-events
>>>> index feba2baca5f7..11a27cb114b6 100644
>>>> --- a/backends/trace-events
>>>> +++ b/backends/trace-events
>>>> @@ -17,3 +17,4 @@ iommufd_backend_alloc_hwpt(int iommufd, uint32_t dev_id,
>>>> uint32_t pt_id, uint32_
>>>>    iommufd_backend_alloc_ioas(int iommufd, uint32_t ioas, int ret) " iommufd=%d
>>>> ioas=%d (%d)"
>>>>    iommufd_backend_free_id(int iommufd, uint32_t id, int ret) " iommufd=%d
>>>> id=%d (%d)"
>>>>    iommufd_backend_set_dirty(int iommufd, uint32_t hwpt_id, bool start, int
>>>> ret) " iommufd=%d hwpt=%d enable=%d (%d)"
>>>> +iommufd_backend_get_dirty_bitmap(int iommufd, uint32_t hwpt_id, uint64_t
>>>> iova, uint64_t size, uint64_t page_size, int ret) " iommufd=%d hwpt=%d
>>>> iova=0x%"PRIx64" size=0x%"PRIx64" page_size=0x%"PRIx64" (%d)"
>>> s/hwpt=%d/hwpt=%u
>>>
>> /me nods
>>
>>>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>>>> index 361e659288fd..79b13bd262cc 100644
>>>> --- a/hw/vfio/iommufd.c
>>>> +++ b/hw/vfio/iommufd.c
>>>> @@ -25,6 +25,7 @@
>>>>    #include "qemu/cutils.h"
>>>>    #include "qemu/chardev_open.h"
>>>>    #include "pci.h"
>>>> +#include "exec/ram_addr.h"
>>>>    #include "migration/migration.h"
>>>>
>>>>    static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr
>>>> iova,
>>>> @@ -142,6 +143,34 @@ err:
>>>>        return ret;
>>>>    }
>>>>
>>>> +static int iommufd_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
>>>> +                                      VFIOBitmap *vbmap, uint64_t iova,
>>>> +                                      uint64_t size)
>>>> +{
>>>> +    VFIOIOMMUFDContainer *container = container_of(bcontainer,
>>>> +                                                   VFIOIOMMUFDContainer,
>>>> +                                                   bcontainer);
>>>> +    int ret;
>>>> +    VFIOIOASHwpt *hwpt;
>>>> +    unsigned long page_size;
>>>> +
>>>> +    if (!bcontainer->dirty_pages_supported) {
>>> Do we need this check?
>>> IIUC, if we got to iommufd_query_dirty_bitmap(), it means
>>> bcontainer->dirty_pages_supported is already true.
>>>
>> Not quite. Look again at vfio_get_dirty_bitmap(); furthermore
>> vfio_container_query_dirty_bitmap() doesn't check for dirty_pages_supported.
> 
> vfio_get_dirty_bitmap() has this check at the beginning:
> 
> if (!bcontainer->dirty_pages_supported && !all_device_dirty_tracking) {
>     cpu_physical_memory_set_dirty_range(ram_addr, size,
>                                         tcg_enabled() ? DIRTY_CLIENTS_ALL :
>                                         DIRTY_CLIENTS_NOCODE);
>     return 0;
> }
> 
> So if bcontainer->dirty_pages_supported is false we will mark all dirty and
> return (and not call vfio_container_query_dirty_bitmap()).
> 
> Or am I missing something?
> 

Nah, I just lacked coffee.

You're right, while I read that check there, I misread what
vfio_devices_all_device_dirty_tracking() returns. And if that returns false it
means we need IOMMU dirty tracking and it was already tested false, otherwise
device dirty tracking is used instead.

	Joao
diff mbox series

Patch

diff --git a/backends/iommufd.c b/backends/iommufd.c
index 954de61c2da0..dd676d493c37 100644
--- a/backends/iommufd.c
+++ b/backends/iommufd.c
@@ -259,6 +259,30 @@  int iommufd_backend_set_dirty_tracking(IOMMUFDBackend *be, uint32_t hwpt_id,
     return !ret ? 0 : -errno;
 }
 
+int iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be, uint32_t hwpt_id,
+                                     uint64_t iova, ram_addr_t size,
+                                     uint64_t page_size, uint64_t *data)
+{
+    int ret;
+    struct iommu_hwpt_get_dirty_bitmap get_dirty_bitmap = {
+        .size = sizeof(get_dirty_bitmap),
+        .hwpt_id = hwpt_id,
+        .iova = iova, .length = size,
+        .page_size = page_size, .data = (uintptr_t)data,
+    };
+
+    ret = ioctl(be->fd, IOMMU_HWPT_GET_DIRTY_BITMAP, &get_dirty_bitmap);
+    trace_iommufd_backend_get_dirty_bitmap(be->fd, hwpt_id, iova, size,
+                                           page_size, ret);
+    if (ret) {
+        error_report("IOMMU_HWPT_GET_DIRTY_BITMAP (iova: 0x%"PRIx64
+                     " size: 0x%"PRIx64") failed: %s", iova,
+                     size, strerror(errno));
+    }
+
+    return !ret ? 0 : -errno;
+}
+
 static const TypeInfo iommufd_backend_info = {
     .name = TYPE_IOMMUFD_BACKEND,
     .parent = TYPE_OBJECT,
diff --git a/backends/trace-events b/backends/trace-events
index feba2baca5f7..11a27cb114b6 100644
--- a/backends/trace-events
+++ b/backends/trace-events
@@ -17,3 +17,4 @@  iommufd_backend_alloc_hwpt(int iommufd, uint32_t dev_id, uint32_t pt_id, uint32_
 iommufd_backend_alloc_ioas(int iommufd, uint32_t ioas, int ret) " iommufd=%d ioas=%d (%d)"
 iommufd_backend_free_id(int iommufd, uint32_t id, int ret) " iommufd=%d id=%d (%d)"
 iommufd_backend_set_dirty(int iommufd, uint32_t hwpt_id, bool start, int ret) " iommufd=%d hwpt=%d enable=%d (%d)"
+iommufd_backend_get_dirty_bitmap(int iommufd, uint32_t hwpt_id, uint64_t iova, uint64_t size, uint64_t page_size, int ret) " iommufd=%d hwpt=%d iova=0x%"PRIx64" size=0x%"PRIx64" page_size=0x%"PRIx64" (%d)"
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 361e659288fd..79b13bd262cc 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -25,6 +25,7 @@ 
 #include "qemu/cutils.h"
 #include "qemu/chardev_open.h"
 #include "pci.h"
+#include "exec/ram_addr.h"
 #include "migration/migration.h"
 
 static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr iova,
@@ -142,6 +143,34 @@  err:
     return ret;
 }
 
+static int iommufd_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
+                                      VFIOBitmap *vbmap, uint64_t iova,
+                                      uint64_t size)
+{
+    VFIOIOMMUFDContainer *container = container_of(bcontainer,
+                                                   VFIOIOMMUFDContainer,
+                                                   bcontainer);
+    int ret;
+    VFIOIOASHwpt *hwpt;
+    unsigned long page_size;
+
+    if (!bcontainer->dirty_pages_supported) {
+        return 0;
+    }
+
+    page_size = qemu_real_host_page_size();
+    QLIST_FOREACH(hwpt, &container->hwpt_list, next) {
+        ret = iommufd_backend_get_dirty_bitmap(container->be, hwpt->hwpt_id,
+                                               iova, size, page_size,
+                                               vbmap->bitmap);
+        if (ret) {
+            break;
+        }
+    }
+
+    return ret;
+}
+
 static int iommufd_cdev_getfd(const char *sysfs_path, Error **errp)
 {
     long int ret = -ENOTTY;
@@ -765,6 +794,7 @@  static void vfio_iommu_iommufd_class_init(ObjectClass *klass, void *data)
     vioc->pci_hot_reset = iommufd_cdev_pci_hot_reset;
     vioc->host_iommu_device_init = vfio_cdev_host_iommu_device_init;
     vioc->set_dirty_page_tracking = iommufd_set_dirty_page_tracking;
+    vioc->query_dirty_bitmap = iommufd_query_dirty_bitmap;
 };
 
 static const TypeInfo types[] = {
diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h
index 562c189dd92c..ba19b7ea4c19 100644
--- a/include/sysemu/iommufd.h
+++ b/include/sysemu/iommufd.h
@@ -55,5 +55,8 @@  int iommufd_backend_alloc_hwpt(int iommufd, uint32_t dev_id,
                                void *data_ptr, uint32_t *out_hwpt);
 int iommufd_backend_set_dirty_tracking(IOMMUFDBackend *be, uint32_t hwpt_id,
                                        bool start);
+int iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be, uint32_t hwpt_id,
+                                     uint64_t iova, ram_addr_t size,
+                                     uint64_t page_size, uint64_t *data);
 
 #endif