@@ -247,31 +247,42 @@ static bool vfio_listener_skipped_section(MemoryRegionSection *section)
/* Called with rcu_read_lock held. */
static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
- ram_addr_t *ram_addr, bool *read_only,
+ ram_addr_t *ram_addr, uint32_t *flag,
Error **errp)
{
bool ret, mr_has_discard_manager;
+ uint32_t mr_flag = 0;
- ret = memory_get_xlat_addr(iotlb, vaddr, ram_addr, read_only,
+ ret = memory_get_xlat_addr(iotlb, vaddr, ram_addr, &mr_flag,
&mr_has_discard_manager, errp);
- if (ret && mr_has_discard_manager) {
- /*
- * Malicious VMs might trigger discarding of IOMMU-mapped memory. The
- * pages will remain pinned inside vfio until unmapped, resulting in a
- * higher memory consumption than expected. If memory would get
- * populated again later, there would be an inconsistency between pages
- * pinned by vfio and pages seen by QEMU. This is the case until
- * unmapped from the IOMMU (e.g., during device reset).
- *
- * With malicious guests, we really only care about pinning more memory
- * than expected. RLIMIT_MEMLOCK set for the user/process can never be
- * exceeded and can be used to mitigate this problem.
- */
- warn_report_once("Using vfio with vIOMMUs and coordinated discarding of"
- " RAM (e.g., virtio-mem) works, however, malicious"
- " guests can trigger pinning of more memory than"
- " intended via an IOMMU. It's possible to mitigate "
- " by setting/adjusting RLIMIT_MEMLOCK.");
+ if (ret) {
+ if (flag) {
+ if (mr_flag & MRF_READONLY)
+ *flag |= VFIO_MRF_READONLY;
+
+ if (mr_flag & MRF_RAMDEV)
+ *flag |= VFIO_MRF_RAMDEV;
+ }
+
+ if (mr_has_discard_manager) {
+ /*
+ * Malicious VMs might trigger discarding of IOMMU-mapped memory. The
+ * pages will remain pinned inside vfio until unmapped, resulting in a
+ * higher memory consumption than expected. If memory would get
+ * populated again later, there would be an inconsistency between pages
+ * pinned by vfio and pages seen by QEMU. This is the case until
+ * unmapped from the IOMMU (e.g., during device reset).
+ *
+ * With malicious guests, we really only care about pinning more memory
+ * than expected. RLIMIT_MEMLOCK set for the user/process can never be
+ * exceeded and can be used to mitigate this problem.
+ */
+ warn_report_once("Using vfio with vIOMMUs and coordinated discarding of"
+ " RAM (e.g., virtio-mem) works, however, malicious"
+ " guests can trigger pinning of more memory than"
+ " intended via an IOMMU. It's possible to mitigate "
+ " by setting/adjusting RLIMIT_MEMLOCK.");
+ }
}
return ret;
}
@@ -298,9 +309,9 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
rcu_read_lock();
if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
- bool read_only;
+ uint32_t flag = 0;
- if (!vfio_get_xlat_addr(iotlb, &vaddr, NULL, &read_only, &local_err)) {
+ if (!vfio_get_xlat_addr(iotlb, &vaddr, NULL, &flag, &local_err)) {
error_report_err(local_err);
goto out;
}
@@ -313,7 +324,7 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
*/
ret = vfio_container_dma_map(bcontainer, iova,
iotlb->addr_mask + 1, vaddr,
- read_only);
+ flag);
if (ret) {
error_report("vfio_container_dma_map(%p, 0x%"HWADDR_PRIx", "
"0x%"HWADDR_PRIx", %p) = %d (%s)",
@@ -363,6 +374,7 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl,
int128_get64(section->size);
hwaddr start, next, iova;
void *vaddr;
+ uint32_t flag = 0;
int ret;
/*
@@ -377,8 +389,10 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl,
section->offset_within_address_space;
vaddr = memory_region_get_ram_ptr(section->mr) + start;
+ flag |= section->readonly? VFIO_MRF_READONLY: 0;
+ flag |= section->ram_device? VFIO_MRF_RAMDEV: 0;
ret = vfio_container_dma_map(bcontainer, iova, next - start,
- vaddr, section->readonly);
+ vaddr, flag);
if (ret) {
/* Rollback */
vfio_ram_discard_notify_discard(rdl, section);
@@ -563,6 +577,7 @@ static void vfio_listener_region_add(MemoryListener *listener,
hwaddr iova, end;
Int128 llend, llsize;
void *vaddr;
+ uint32_t flag = 0;
int ret;
Error *err = NULL;
@@ -661,8 +676,10 @@ static void vfio_listener_region_add(MemoryListener *listener,
}
}
+ flag |= section->readonly? VFIO_MRF_READONLY: 0;
+ flag |= section->ram_device? VFIO_MRF_RAMDEV: 0;
ret = vfio_container_dma_map(bcontainer, iova, int128_get64(llsize),
- vaddr, section->readonly);
+ vaddr, flag);
if (ret) {
error_setg(&err, "vfio_container_dma_map(%p, 0x%"HWADDR_PRIx", "
"0x%"HWADDR_PRIx", %p) = %d (%s)",
@@ -17,12 +17,12 @@
int vfio_container_dma_map(VFIOContainerBase *bcontainer,
hwaddr iova, ram_addr_t size,
- void *vaddr, bool readonly)
+ void *vaddr, uint32_t flag)
{
VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
g_assert(vioc->dma_map);
- return vioc->dma_map(bcontainer, iova, size, vaddr, readonly);
+ return vioc->dma_map(bcontainer, iova, size, vaddr, flag);
}
int vfio_container_dma_unmap(VFIOContainerBase *bcontainer,
@@ -176,7 +176,7 @@ static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer,
}
static int vfio_legacy_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova,
- ram_addr_t size, void *vaddr, bool readonly)
+ ram_addr_t size, void *vaddr, uint32_t flag)
{
const VFIOContainer *container = container_of(bcontainer, VFIOContainer,
bcontainer);
@@ -188,9 +188,11 @@ static int vfio_legacy_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova,
.size = size,
};
- if (!readonly) {
+ if (!(flag & VFIO_MRF_READONLY)) {
map.flags |= VFIO_DMA_MAP_FLAG_WRITE;
}
+ if (flag & VFIO_MRF_RAMDEV)
+ map.flags |= VFIO_DMA_MAP_FLAG_MMIO;
/*
* Try the mapping, if it fails with EBUSY, unmap the region and try
@@ -28,14 +28,14 @@
#include "exec/ram_addr.h"
static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr iova,
- ram_addr_t size, void *vaddr, bool readonly)
+ ram_addr_t size, void *vaddr, uint32_t flag)
{
const VFIOIOMMUFDContainer *container =
container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
return iommufd_backend_map_dma(container->be,
container->ioas_id,
- iova, size, vaddr, readonly);
+ iova, size, vaddr, flag & VFIO_MRF_READONLY);
}
static int iommufd_cdev_unmap(const VFIOContainerBase *bcontainer,
@@ -226,15 +226,15 @@ static void vhost_vdpa_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
}
if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
- bool read_only;
+ uint32_t flag;
- if (!memory_get_xlat_addr(iotlb, &vaddr, NULL, &read_only, NULL,
+ if (!memory_get_xlat_addr(iotlb, &vaddr, NULL, &flag, NULL,
&local_err)) {
error_report_err(local_err);
return;
}
ret = vhost_vdpa_dma_map(s, VHOST_VDPA_GUEST_PA_ASID, iova,
- iotlb->addr_mask + 1, vaddr, read_only);
+ iotlb->addr_mask + 1, vaddr, flag & MRF_READONLY);
if (ret) {
error_report("vhost_vdpa_dma_map(%p, 0x%" HWADDR_PRIx ", "
"0x%" HWADDR_PRIx ", %p) = %d (%m)",
@@ -104,10 +104,15 @@ struct MemoryRegionSection {
hwaddr offset_within_region;
hwaddr offset_within_address_space;
bool readonly;
+ bool ram_device;
bool nonvolatile;
bool unmergeable;
};
+/* memory region flag */
+#define MRF_READONLY 0x1
+#define MRF_RAMDEV 0x2
+
typedef struct IOMMUTLBEntry IOMMUTLBEntry;
/* See address_space_translate: bit 0 is read, bit 1 is write. */
@@ -742,7 +747,7 @@ void ram_discard_manager_unregister_listener(RamDiscardManager *rdm,
* Return: true on success, else false setting @errp with error.
*/
bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
- ram_addr_t *ram_addr, bool *read_only,
+ ram_addr_t *ram_addr, uint32_t *flag,
bool *mr_has_discard_manager, Error **errp);
typedef struct CoalescedMemoryRange CoalescedMemoryRange;
@@ -43,6 +43,10 @@ enum {
VFIO_DEVICE_TYPE_AP = 3,
};
+/* vfio memory region flag */
+#define VFIO_MRF_READONLY 0x1
+#define VFIO_MRF_RAMDEV 0x2
+
typedef struct VFIOMmap {
MemoryRegion mem;
void *mmap;
@@ -73,7 +73,7 @@ typedef struct VFIORamDiscardListener {
int vfio_container_dma_map(VFIOContainerBase *bcontainer,
hwaddr iova, ram_addr_t size,
- void *vaddr, bool readonly);
+ void *vaddr, uint32_t flag);
int vfio_container_dma_unmap(VFIOContainerBase *bcontainer,
hwaddr iova, ram_addr_t size,
IOMMUTLBEntry *iotlb);
@@ -113,7 +113,7 @@ struct VFIOIOMMUClass {
bool (*setup)(VFIOContainerBase *bcontainer, Error **errp);
int (*dma_map)(const VFIOContainerBase *bcontainer,
hwaddr iova, ram_addr_t size,
- void *vaddr, bool readonly);
+ void *vaddr, uint32_t flag);
int (*dma_unmap)(const VFIOContainerBase *bcontainer,
hwaddr iova, ram_addr_t size,
IOMMUTLBEntry *iotlb);
@@ -1560,6 +1560,7 @@ struct vfio_iommu_type1_dma_map {
#define VFIO_DMA_MAP_FLAG_READ (1 << 0) /* readable from device */
#define VFIO_DMA_MAP_FLAG_WRITE (1 << 1) /* writable from device */
#define VFIO_DMA_MAP_FLAG_VADDR (1 << 2)
+#define VFIO_DMA_MAP_FLAG_MMIO (1 << 3)
__u64 vaddr; /* Process virtual address */
__u64 iova; /* IO virtual address */
__u64 size; /* Size of mapping (bytes) */
@@ -223,6 +223,7 @@ struct FlatRange {
uint8_t dirty_log_mask;
bool romd_mode;
bool readonly;
+ bool ram_device;
bool nonvolatile;
bool unmergeable;
};
@@ -240,6 +241,7 @@ section_from_flat_range(FlatRange *fr, FlatView *fv)
.size = fr->addr.size,
.offset_within_address_space = int128_get64(fr->addr.start),
.readonly = fr->readonly,
+ .ram_device = fr->ram_device,
.nonvolatile = fr->nonvolatile,
.unmergeable = fr->unmergeable,
};
@@ -252,6 +254,7 @@ static bool flatrange_equal(FlatRange *a, FlatRange *b)
&& a->offset_in_region == b->offset_in_region
&& a->romd_mode == b->romd_mode
&& a->readonly == b->readonly
+ && a->ram_device == b->ram_device
&& a->nonvolatile == b->nonvolatile
&& a->unmergeable == b->unmergeable;
}
@@ -657,6 +660,7 @@ static void render_memory_region(FlatView *view,
fr.dirty_log_mask = memory_region_get_dirty_log_mask(mr);
fr.romd_mode = mr->romd_mode;
fr.readonly = readonly;
+ fr.ram_device = mr->ram_device;
fr.nonvolatile = nonvolatile;
fr.unmergeable = unmergeable;
@@ -2184,7 +2188,7 @@ void ram_discard_manager_unregister_listener(RamDiscardManager *rdm,
/* Called with rcu_read_lock held. */
bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
- ram_addr_t *ram_addr, bool *read_only,
+ ram_addr_t *ram_addr, uint32_t *flag,
bool *mr_has_discard_manager, Error **errp)
{
MemoryRegion *mr;
@@ -2246,8 +2250,9 @@ bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
*ram_addr = memory_region_get_ram_addr(mr) + xlat;
}
- if (read_only) {
- *read_only = !writable || mr->readonly;
+ if (flag) {
+ *flag |= (!writable || mr->readonly)? MRF_READONLY: 0;
+ *flag |= mr->ram_device? MRF_RAMDEV: 0;
}
return true;
On confidential VM platform, for example, AMD-SEV, P2P doesn't work. The underlying reason is that IOMMU driver set encryption bit on IOMMU page table pte entry, it's reasonalbe if the pte maps iova to system memory. However, if the pte maps iova to device's mmio bar space, setting encryption bit on pte would cause IOMMU translates iova to incorrect bus address, rather than mmio bar address. To fix the issue, the key point is to let IOMMU driver know the target phyical address is system memory or device mmio. VFIO allocates virtual address and maps it to device mmio bar, the member @ram_device of MemoryRegion indicates the memory region is for mmio. The patch passes the info to VFIO DAM API, IOMMU driver would do the correct thing. Signed-off-by: Wencheng Yang <east.moutain.yang@gmail.com> --- hw/vfio/common.c | 67 +++++++++++++++++---------- hw/vfio/container-base.c | 4 +- hw/vfio/container.c | 6 ++- hw/vfio/iommufd.c | 4 +- hw/virtio/vhost-vdpa.c | 6 +-- include/exec/memory.h | 7 ++- include/hw/vfio/vfio-common.h | 4 ++ include/hw/vfio/vfio-container-base.h | 4 +- linux-headers/linux/vfio.h | 1 + system/memory.c | 11 +++-- 10 files changed, 74 insertions(+), 40 deletions(-)