@@ -47,6 +47,76 @@ static void balloon_page(void *addr, int deflate)
#endif
}
+static void do_balloon_bulk_pages(ram_addr_t base_pfn, int page_shift,
+ unsigned long len, bool deflate)
+{
+ ram_addr_t size, processed, chunk, base;
+ void *addr;
+ MemoryRegionSection section = {.mr = NULL};
+
+ size = (len << page_shift);
+ base = (base_pfn << page_shift);
+
+ for (processed = 0; processed < size; processed += chunk) {
+ chunk = size - processed;
+ while (chunk >= TARGET_PAGE_SIZE) {
+ section = memory_region_find(get_system_memory(),
+ base + processed, chunk);
+ if (!section.mr) {
+ chunk = QEMU_ALIGN_DOWN(chunk / 2, TARGET_PAGE_SIZE);
+ } else {
+ break;
+ }
+ }
+
+ if (section.mr &&
+ (int128_nz(section.size) && memory_region_is_ram(section.mr))) {
+ addr = section.offset_within_region +
+ memory_region_get_ram_ptr(section.mr);
+ qemu_madvise(addr, chunk,
+ deflate ? QEMU_MADV_WILLNEED : QEMU_MADV_DONTNEED);
+ } else {
+ fprintf(stderr, "can't find the chunk, skip\n");
+ chunk = TARGET_PAGE_SIZE;
+ }
+ }
+}
+
+static void balloon_bulk_pages(ram_addr_t base_pfn, unsigned long *bitmap,
+ unsigned long len, int page_shift, bool deflate)
+{
+#if defined(__linux__)
+ unsigned long end = len * 8;
+ unsigned long current = 0;
+
+ if (!qemu_balloon_is_inhibited() && (!kvm_enabled() ||
+ kvm_has_sync_mmu())) {
+ while (current < end) {
+ unsigned long one = find_next_bit(bitmap, end, current);
+
+ if (one < end) {
+ unsigned long zero = find_next_zero_bit(bitmap, end, one + 1);
+ unsigned long page_length;
+
+ if (zero >= end) {
+ page_length = end - one;
+ } else {
+ page_length = zero - one;
+ }
+
+ if (page_length) {
+ do_balloon_bulk_pages(base_pfn + one, page_shift,
+ page_length, deflate);
+ }
+ current = one + page_length;
+ } else {
+ current = one;
+ }
+ }
+ }
+#endif
+}
+
static const char *balloon_stat_names[] = {
[VIRTIO_BALLOON_S_SWAP_IN] = "stat-swap-in",
[VIRTIO_BALLOON_S_SWAP_OUT] = "stat-swap-out",
@@ -78,6 +148,12 @@ static bool balloon_stats_supported(const VirtIOBalloon *s)
return virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_STATS_VQ);
}
+static bool balloon_page_bitmap_supported(const VirtIOBalloon *s)
+{
+ VirtIODevice *vdev = VIRTIO_DEVICE(s);
+ return virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_BITMAP);
+}
+
static bool balloon_stats_enabled(const VirtIOBalloon *s)
{
return s->stats_poll_interval > 0;
@@ -224,27 +300,66 @@ static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq)
return;
}
- while (iov_to_buf(elem->out_sg, elem->out_num, offset, &pfn, 4) == 4) {
- ram_addr_t pa;
- ram_addr_t addr;
- int p = virtio_ldl_p(vdev, &pfn);
-
- pa = (ram_addr_t) p << VIRTIO_BALLOON_PFN_SHIFT;
- offset += 4;
-
- /* FIXME: remove get_system_memory(), but how? */
- section = memory_region_find(get_system_memory(), pa, 1);
- if (!int128_nz(section.size) || !memory_region_is_ram(section.mr))
- continue;
-
- trace_virtio_balloon_handle_output(memory_region_name(section.mr),
- pa);
- /* Using memory_region_get_ram_ptr is bending the rules a bit, but
- should be OK because we only want a single page. */
- addr = section.offset_within_region;
- balloon_page(memory_region_get_ram_ptr(section.mr) + addr,
- !!(vq == s->dvq));
- memory_region_unref(section.mr);
+ if (balloon_page_bitmap_supported(s)) {
+ uint64_t base_pfn, tmp64, bmap_len;
+ uint32_t tmp32, page_shift, type;
+ unsigned long *bitmap;
+
+ iov_to_buf(elem->out_sg, elem->out_num, offset,
+ &tmp32, sizeof(uint32_t));
+ type = virtio_ldl_p(vdev, &tmp32);
+ offset += sizeof(uint32_t);
+ /* to suppress build warning*/
+ type = type;
+
+ iov_to_buf(elem->out_sg, elem->out_num, offset,
+ &tmp32, sizeof(uint32_t));
+ page_shift = virtio_ldl_p(vdev, &tmp32);
+ offset += sizeof(uint32_t);
+
+ iov_to_buf(elem->out_sg, elem->out_num, offset,
+ &tmp64, sizeof(uint64_t));
+ base_pfn = virtio_ldq_p(vdev, &tmp64);
+ offset += sizeof(uint64_t);
+
+ iov_to_buf(elem->out_sg, elem->out_num, offset,
+ &tmp64, sizeof(uint64_t));
+ bmap_len = virtio_ldq_p(vdev, &tmp64);
+ offset += sizeof(uint64_t);
+
+ bitmap = bitmap_new(bmap_len * 8);
+ iov_to_buf(elem->out_sg, elem->out_num, offset,
+ bitmap, bmap_len);
+ offset += bmap_len;
+
+ balloon_bulk_pages(base_pfn, bitmap, bmap_len,
+ page_shift, !!(vq == s->dvq));
+ g_free(bitmap);
+ } else {
+ while (iov_to_buf(elem->out_sg, elem->out_num, offset,
+ &pfn, 4) == 4) {
+ ram_addr_t pa;
+ ram_addr_t addr;
+ int p = virtio_ldl_p(vdev, &pfn);
+
+ pa = (ram_addr_t) p << VIRTIO_BALLOON_PFN_SHIFT;
+ offset += 4;
+
+ /* FIXME: remove get_system_memory(), but how? */
+ section = memory_region_find(get_system_memory(), pa, 1);
+ if (!int128_nz(section.size) ||
+ !memory_region_is_ram(section.mr))
+ continue;
+
+ trace_virtio_balloon_handle_output(memory_region_name(
+ section.mr), pa);
+ /* Using memory_region_get_ram_ptr is bending the rules a bit,
+ * but should be OK because we only want a single page. */
+ addr = section.offset_within_region;
+ balloon_page(memory_region_get_ram_ptr(section.mr) + addr,
+ !!(vq == s->dvq));
+ memory_region_unref(section.mr);
+ }
}
virtqueue_push(vq, elem, offset);
@@ -374,6 +489,7 @@ static uint64_t virtio_balloon_get_features(VirtIODevice *vdev, uint64_t f,
VirtIOBalloon *dev = VIRTIO_BALLOON(vdev);
f |= dev->host_features;
virtio_add_feature(&f, VIRTIO_BALLOON_F_STATS_VQ);
+ virtio_add_feature(&f, VIRTIO_BALLOON_F_PAGE_BITMAP);
return f;
}
@@ -388,6 +504,7 @@ static void virtio_balloon_to_target(void *opaque, ram_addr_t target)
{
VirtIOBalloon *dev = VIRTIO_BALLOON(opaque);
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+
ram_addr_t vm_ram_size = get_current_ram_size();
if (target > vm_ram_size) {
@@ -34,6 +34,7 @@
#define VIRTIO_BALLOON_F_MUST_TELL_HOST 0 /* Tell before reclaiming pages */
#define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */
#define VIRTIO_BALLOON_F_DEFLATE_ON_OOM 2 /* Deflate balloon on OOM */
+#define VIRTIO_BALLOON_F_PAGE_BITMAP 3 /* Use page bitmap to send page info*/
/* Size of a PFN in the balloon interface. */
#define VIRTIO_BALLOON_PFN_SHIFT 12
The implementation of the current virtio-balloon is not very efficient, Bellow is test result of time spends on inflating the balloon to 3GB of a 4GB idle guest: a. allocating pages (6.5%, 103ms) b. sending PFNs to host (68.3%, 787ms) c. address translation (6.1%, 96ms) d. madvise (19%, 300ms) It takes about 1577ms for the whole inflating process to complete. The test shows that the bottle neck is the stage b and stage d. If using a bitmap to send the page info instead of the PFNs, we can reduce the overhead spends on stage b quite a lot. Furthermore, it's possible to do the address translation and do the madvise with a bulk of pages, instead of the current page per page way, so the overhead of stage c and stage d can also be reduced a lot. This patch is the QEMU side implementation which is intended to speed up the inflating & deflating process by adding a new feature to the virtio-balloon device. And now, inflating the balloon to 3GB of a 4GB idle guest only takes 210ms, it's about 8 times as fast as before. TODO: optimize stage a by allocating/freeing a chunk of pages instead of a single page at a time. v2 changes: change the interface Signed-off-by: Liang Li <liang.z.li@intel.com> --- hw/virtio/virtio-balloon.c | 159 ++++++++++++++++++++---- include/standard-headers/linux/virtio_balloon.h | 1 + 2 files changed, 139 insertions(+), 21 deletions(-)