@@ -42,6 +42,10 @@
#define OOM_VBALLOON_DEFAULT_PAGES 256
#define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80
+#define BALLOON_BMAP_SIZE (8 * PAGE_SIZE)
+#define PFNS_PER_BMAP (BALLOON_BMAP_SIZE * BITS_PER_BYTE)
+#define BALLOON_BMAP_COUNT 32
+
static int oom_pages = OOM_VBALLOON_DEFAULT_PAGES;
module_param(oom_pages, int, S_IRUSR | S_IWUSR);
MODULE_PARM_DESC(oom_pages, "pages to free on OOM");
@@ -67,6 +71,18 @@ struct virtio_balloon {
/* Number of balloon pages we've told the Host we're not using. */
unsigned int num_pages;
+ /* Pointer to the response header. */
+ void *resp_hdr;
+ /* Pointer to the start address of response data. */
+ unsigned long *resp_data;
+ /* Pointer offset of the response data. */
+ unsigned long resp_pos;
+ /* Bitmap and bitmap count used to tell the host the pages */
+ unsigned long *page_bitmap[BALLOON_BMAP_COUNT];
+ /* Number of split page bitmaps */
+ unsigned int nr_page_bmap;
+ /* Used to record the processed pfn range */
+ unsigned long min_pfn, max_pfn, start_pfn, end_pfn;
/*
* The pages we've told the Host we're not using are enqueued
* at vb_dev_info->pages list.
@@ -110,20 +126,227 @@ static void balloon_ack(struct virtqueue *vq)
wake_up(&vb->acked);
}
-static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq)
+static inline void init_bmap_pfn_range(struct virtio_balloon *vb)
{
- struct scatterlist sg;
+ vb->min_pfn = ULONG_MAX;
+ vb->max_pfn = 0;
+}
+
+static inline void update_bmap_pfn_range(struct virtio_balloon *vb,
+ struct page *page)
+{
+ unsigned long balloon_pfn = page_to_balloon_pfn(page);
+
+ vb->min_pfn = min(balloon_pfn, vb->min_pfn);
+ vb->max_pfn = max(balloon_pfn, vb->max_pfn);
+}
+
+static void extend_page_bitmap(struct virtio_balloon *vb)
+{
+ int i, bmap_count;
+ unsigned long bmap_len;
+
+ bmap_len = ALIGN(get_max_pfn(), BITS_PER_LONG) / BITS_PER_BYTE;
+ bmap_len = ALIGN(bmap_len, BALLOON_BMAP_SIZE);
+ bmap_count = min((int)(bmap_len / BALLOON_BMAP_SIZE),
+ BALLOON_BMAP_COUNT);
+
+ for (i = 1; i < bmap_count; i++) {
+ vb->page_bitmap[i] = kmalloc(BALLOON_BMAP_SIZE, GFP_KERNEL);
+ if (vb->page_bitmap[i])
+ vb->nr_page_bmap++;
+ else
+ break;
+ }
+}
+
+static void free_extended_page_bitmap(struct virtio_balloon *vb)
+{
+ int i, bmap_count = vb->nr_page_bmap;
+
+
+ for (i = 1; i < bmap_count; i++) {
+ kfree(vb->page_bitmap[i]);
+ vb->page_bitmap[i] = NULL;
+ vb->nr_page_bmap--;
+ }
+}
+
+static void kfree_page_bitmap(struct virtio_balloon *vb)
+{
+ int i;
+
+ for (i = 0; i < vb->nr_page_bmap; i++)
+ kfree(vb->page_bitmap[i]);
+}
+
+static void clear_page_bitmap(struct virtio_balloon *vb)
+{
+ int i;
+
+ for (i = 0; i < vb->nr_page_bmap; i++)
+ memset(vb->page_bitmap[i], 0, BALLOON_BMAP_SIZE);
+}
+
+static unsigned long do_set_resp_bitmap(struct virtio_balloon *vb,
+ unsigned long *bitmap, unsigned long base_pfn,
+ unsigned long pos, int nr_page)
+
+{
+ struct virtio_balloon_bmap_hdr *hdr;
+ unsigned long end, new_pos, new_end, nr_left, proccessed = 0;
+
+ new_pos = pos;
+ new_end = end = pos + nr_page;
+
+ if (pos % BITS_PER_LONG) {
+ unsigned long pos_s;
+
+ pos_s = rounddown(pos, BITS_PER_LONG);
+ hdr = (struct virtio_balloon_bmap_hdr *)(vb->resp_data
+ + vb->resp_pos);
+ hdr->head.start_pfn = base_pfn + pos_s;
+ hdr->head.page_shift = PAGE_SHIFT;
+ hdr->head.bmap_len = sizeof(unsigned long);
+ hdr->bmap[0] = cpu_to_virtio64(vb->vdev,
+ bitmap[pos_s / BITS_PER_LONG]);
+ vb->resp_pos += 2;
+ if (pos_s + BITS_PER_LONG >= end)
+ return roundup(end, BITS_PER_LONG) - pos;
+ new_pos = roundup(pos, BITS_PER_LONG);
+ }
+
+ if (end % BITS_PER_LONG) {
+ unsigned long pos_e;
+
+ pos_e = roundup(end, BITS_PER_LONG);
+ hdr = (struct virtio_balloon_bmap_hdr *)(vb->resp_data
+ + vb->resp_pos);
+ hdr->head.start_pfn = base_pfn + pos_e - BITS_PER_LONG;
+ hdr->head.page_shift = PAGE_SHIFT;
+ hdr->head.bmap_len = sizeof(unsigned long);
+ hdr->bmap[0] = bitmap[pos_e / BITS_PER_LONG - 1];
+ vb->resp_pos += 2;
+ if (new_pos + BITS_PER_LONG >= pos_e)
+ return pos_e - pos;
+ new_end = rounddown(end, BITS_PER_LONG);
+ }
+
+ nr_left = nr_page = new_end - new_pos;
+
+ while (proccessed < nr_page) {
+ int bulk, order;
+
+ order = get_order(nr_left << PAGE_SHIFT);
+ if ((1 << order) > nr_left)
+ order--;
+ hdr = (struct virtio_balloon_bmap_hdr *)(vb->resp_data
+ + vb->resp_pos);
+ hdr->head.start_pfn = base_pfn + new_pos + proccessed;
+ hdr->head.page_shift = order + PAGE_SHIFT;
+ hdr->head.bmap_len = 0;
+ bulk = 1 << order;
+ nr_left -= bulk;
+ proccessed += bulk;
+ vb->resp_pos++;
+ }
+
+ return roundup(end, BITS_PER_LONG) - pos;
+}
+
+static void send_resp_data(struct virtio_balloon *vb, struct virtqueue *vq,
+ bool busy_wait)
+{
+ struct scatterlist sg[2];
+ struct virtio_balloon_resp_hdr *hdr = vb->resp_hdr;
unsigned int len;
- sg_init_one(&sg, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns);
+ len = hdr->data_len = vb->resp_pos * sizeof(unsigned long);
+ sg_init_table(sg, 2);
+ sg_set_buf(&sg[0], hdr, sizeof(struct virtio_balloon_resp_hdr));
+ sg_set_buf(&sg[1], vb->resp_data, len);
+
+ if (virtqueue_add_outbuf(vq, sg, 2, vb, GFP_KERNEL) == 0) {
+ virtqueue_kick(vq);
+ if (busy_wait)
+ while (!virtqueue_get_buf(vq, &len)
+ && !virtqueue_is_broken(vq))
+ cpu_relax();
+ else
+ wait_event(vb->acked, virtqueue_get_buf(vq, &len));
+ vb->resp_pos = 0;
+ free_extended_page_bitmap(vb);
+ }
+}
- /* We should always be able to add one buffer to an empty queue. */
- virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL);
- virtqueue_kick(vq);
+static void set_bulk_pages(struct virtio_balloon *vb, struct virtqueue *vq,
+ unsigned long start_pfn, unsigned long *bitmap,
+ unsigned long len, bool busy_wait)
+{
+ unsigned long pos = 0, end = len * BITS_PER_BYTE;
+
+ while (pos < end) {
+ unsigned long one = find_next_bit(bitmap, end, pos);
+
+ if ((vb->resp_pos + 64) * sizeof(unsigned long) >
+ BALLOON_BMAP_SIZE)
+ send_resp_data(vb, vq, busy_wait);
+ if (one < end) {
+ unsigned long pages, zero;
+
+ zero = find_next_zero_bit(bitmap, end, one + 1);
+ if (zero >= end)
+ pages = end - one;
+ else
+ pages = zero - one;
+ if (pages) {
+ pages = do_set_resp_bitmap(vb, bitmap,
+ start_pfn, one, pages);
+ }
+ pos = one + pages;
+ } else
+ pos = one;
+ }
+}
- /* When host has read buffer, this completes via balloon_ack */
- wait_event(vb->acked, virtqueue_get_buf(vq, &len));
+static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq)
+{
+ if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_PAGE_BITMAP)) {
+ int nr_pfn, nr_used_bmap, i;
+ unsigned long start_pfn, bmap_len;
+
+ start_pfn = vb->start_pfn;
+ nr_pfn = vb->end_pfn - start_pfn + 1;
+ nr_pfn = roundup(nr_pfn, BITS_PER_LONG);
+ nr_used_bmap = nr_pfn / PFNS_PER_BMAP;
+ if (nr_pfn % PFNS_PER_BMAP)
+ nr_used_bmap++;
+ bmap_len = nr_pfn / BITS_PER_BYTE;
+
+ for (i = 0; i < nr_used_bmap; i++) {
+ unsigned int bmap_size = BALLOON_BMAP_SIZE;
+
+ if (i + 1 == nr_used_bmap)
+ bmap_size = bmap_len - BALLOON_BMAP_SIZE * i;
+ set_bulk_pages(vb, vq, start_pfn + i * PFNS_PER_BMAP,
+ vb->page_bitmap[i], bmap_size, false);
+ }
+ if (vb->resp_pos > 0)
+ send_resp_data(vb, vq, false);
+ } else {
+ struct scatterlist sg;
+ unsigned int len;
+
+ sg_init_one(&sg, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns);
+ /* We should always be able to add one buffer to an
+ * empty queue
+ */
+ virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL);
+ virtqueue_kick(vq);
+ /* When host has read buffer, this completes via balloon_ack */
+ wait_event(vb->acked, virtqueue_get_buf(vq, &len));
+ }
}
static void set_page_pfns(struct virtio_balloon *vb,
@@ -138,13 +361,59 @@ static void set_page_pfns(struct virtio_balloon *vb,
page_to_balloon_pfn(page) + i);
}
-static unsigned fill_balloon(struct virtio_balloon *vb, size_t num)
+static void set_page_bitmap(struct virtio_balloon *vb,
+ struct list_head *pages, struct virtqueue *vq)
{
- struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info;
- unsigned num_allocated_pages;
+ unsigned long pfn, pfn_limit;
+ struct page *page;
+ bool found;
+ int bmap_idx;
+
+ vb->min_pfn = rounddown(vb->min_pfn, BITS_PER_LONG);
+ vb->max_pfn = roundup(vb->max_pfn, BITS_PER_LONG);
+ pfn_limit = PFNS_PER_BMAP * vb->nr_page_bmap;
+
+ for (pfn = vb->min_pfn; pfn < vb->max_pfn; pfn += pfn_limit) {
+ unsigned long end_pfn;
+
+ clear_page_bitmap(vb);
+ vb->start_pfn = pfn;
+ end_pfn = pfn;
+ found = false;
+ list_for_each_entry(page, pages, lru) {
+ unsigned long pos, balloon_pfn;
+
+ balloon_pfn = page_to_balloon_pfn(page);
+ if (balloon_pfn < pfn || balloon_pfn >= pfn + pfn_limit)
+ continue;
+ bmap_idx = (balloon_pfn - pfn) / PFNS_PER_BMAP;
+ pos = (balloon_pfn - pfn) % PFNS_PER_BMAP;
+ set_bit(pos, vb->page_bitmap[bmap_idx]);
+ if (balloon_pfn > end_pfn)
+ end_pfn = balloon_pfn;
+ found = true;
+ }
+ if (found) {
+ vb->end_pfn = end_pfn;
+ tell_host(vb, vq);
+ }
+ }
+}
- /* We can only do one array worth at a time. */
- num = min(num, ARRAY_SIZE(vb->pfns));
+static unsigned int fill_balloon(struct virtio_balloon *vb, size_t num)
+{
+ struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info;
+ unsigned int num_allocated_pages;
+ bool use_bmap = virtio_has_feature(vb->vdev,
+ VIRTIO_BALLOON_F_PAGE_BITMAP);
+
+ if (use_bmap) {
+ if (vb->nr_page_bmap == 1)
+ extend_page_bitmap(vb);
+ init_bmap_pfn_range(vb);
+ } else
+ /* We can only do one array worth at a time. */
+ num = min(num, ARRAY_SIZE(vb->pfns));
mutex_lock(&vb->balloon_lock);
for (vb->num_pfns = 0; vb->num_pfns < num;
@@ -159,7 +428,10 @@ static unsigned fill_balloon(struct virtio_balloon *vb, size_t num)
msleep(200);
break;
}
- set_page_pfns(vb, vb->pfns + vb->num_pfns, page);
+ if (use_bmap)
+ update_bmap_pfn_range(vb, page);
+ else
+ set_page_pfns(vb, vb->pfns + vb->num_pfns, page);
vb->num_pages += VIRTIO_BALLOON_PAGES_PER_PAGE;
if (!virtio_has_feature(vb->vdev,
VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
@@ -168,8 +440,13 @@ static unsigned fill_balloon(struct virtio_balloon *vb, size_t num)
num_allocated_pages = vb->num_pfns;
/* Did we get any? */
- if (vb->num_pfns != 0)
- tell_host(vb, vb->inflate_vq);
+ if (vb->num_pfns != 0) {
+ if (use_bmap)
+ set_page_bitmap(vb, &vb_dev_info->pages,
+ vb->inflate_vq);
+ else
+ tell_host(vb, vb->inflate_vq);
+ }
mutex_unlock(&vb->balloon_lock);
return num_allocated_pages;
@@ -189,15 +466,22 @@ static void release_pages_balloon(struct virtio_balloon *vb,
}
}
-static unsigned leak_balloon(struct virtio_balloon *vb, size_t num)
+static unsigned int leak_balloon(struct virtio_balloon *vb, size_t num)
{
- unsigned num_freed_pages;
+ unsigned int num_freed_pages;
struct page *page;
struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info;
LIST_HEAD(pages);
+ bool use_bmap = virtio_has_feature(vb->vdev,
+ VIRTIO_BALLOON_F_PAGE_BITMAP);
- /* We can only do one array worth at a time. */
- num = min(num, ARRAY_SIZE(vb->pfns));
+ if (use_bmap) {
+ if (vb->nr_page_bmap == 1)
+ extend_page_bitmap(vb);
+ init_bmap_pfn_range(vb);
+ } else
+ /* We can only do one array worth at a time. */
+ num = min(num, ARRAY_SIZE(vb->pfns));
mutex_lock(&vb->balloon_lock);
/* We can't release more pages than taken */
@@ -207,7 +491,10 @@ static unsigned leak_balloon(struct virtio_balloon *vb, size_t num)
page = balloon_page_dequeue(vb_dev_info);
if (!page)
break;
- set_page_pfns(vb, vb->pfns + vb->num_pfns, page);
+ if (use_bmap)
+ update_bmap_pfn_range(vb, page);
+ else
+ set_page_pfns(vb, vb->pfns + vb->num_pfns, page);
list_add(&page->lru, &pages);
vb->num_pages -= VIRTIO_BALLOON_PAGES_PER_PAGE;
}
@@ -218,8 +505,12 @@ static unsigned leak_balloon(struct virtio_balloon *vb, size_t num)
* virtio_has_feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST);
* is true, we *have* to do it in this order
*/
- if (vb->num_pfns != 0)
- tell_host(vb, vb->deflate_vq);
+ if (vb->num_pfns != 0) {
+ if (use_bmap)
+ set_page_bitmap(vb, &pages, vb->deflate_vq);
+ else
+ tell_host(vb, vb->deflate_vq);
+ }
release_pages_balloon(vb, &pages);
mutex_unlock(&vb->balloon_lock);
return num_freed_pages;
@@ -431,6 +722,20 @@ static int init_vqs(struct virtio_balloon *vb)
}
#ifdef CONFIG_BALLOON_COMPACTION
+static void tell_host_one_page(struct virtio_balloon *vb,
+ struct virtqueue *vq, struct page *page)
+{
+ struct virtio_balloon_bmap_hdr *bmap_hdr;
+
+ bmap_hdr = (struct virtio_balloon_bmap_hdr *)(vb->resp_data
+ + vb->resp_pos);
+ bmap_hdr->head.start_pfn = page_to_pfn(page);
+ bmap_hdr->head.page_shift = PAGE_SHIFT;
+ bmap_hdr->head.bmap_len = 0;
+ vb->resp_pos++;
+ send_resp_data(vb, vq, false);
+}
+
/*
* virtballoon_migratepage - perform the balloon page migration on behalf of
* a compation thread. (called under page lock)
@@ -455,6 +760,8 @@ static int virtballoon_migratepage(struct balloon_dev_info *vb_dev_info,
struct virtio_balloon *vb = container_of(vb_dev_info,
struct virtio_balloon, vb_dev_info);
unsigned long flags;
+ bool use_bmap = virtio_has_feature(vb->vdev,
+ VIRTIO_BALLOON_F_PAGE_BITMAP);
/*
* In order to avoid lock contention while migrating pages concurrently
@@ -475,15 +782,23 @@ static int virtballoon_migratepage(struct balloon_dev_info *vb_dev_info,
vb_dev_info->isolated_pages--;
__count_vm_event(BALLOON_MIGRATE);
spin_unlock_irqrestore(&vb_dev_info->pages_lock, flags);
- vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE;
- set_page_pfns(vb, vb->pfns, newpage);
- tell_host(vb, vb->inflate_vq);
+ if (use_bmap)
+ tell_host_one_page(vb, vb->inflate_vq, newpage);
+ else {
+ vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE;
+ set_page_pfns(vb, vb->pfns, newpage);
+ tell_host(vb, vb->inflate_vq);
+ }
/* balloon's page migration 2nd step -- deflate "page" */
balloon_page_delete(page);
- vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE;
- set_page_pfns(vb, vb->pfns, page);
- tell_host(vb, vb->deflate_vq);
+ if (use_bmap)
+ tell_host_one_page(vb, vb->deflate_vq, page);
+ else {
+ vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE;
+ set_page_pfns(vb, vb->pfns, page);
+ tell_host(vb, vb->deflate_vq);
+ }
mutex_unlock(&vb->balloon_lock);
@@ -533,6 +848,28 @@ static int virtballoon_probe(struct virtio_device *vdev)
spin_lock_init(&vb->stop_update_lock);
vb->stop_update = false;
vb->num_pages = 0;
+ vb->resp_hdr = kzalloc(sizeof(struct virtio_balloon_resp_hdr),
+ GFP_KERNEL);
+ /* Clear the feature bit if memory allocation fails */
+ if (!vb->resp_hdr)
+ __virtio_clear_bit(vdev, VIRTIO_BALLOON_F_PAGE_BITMAP);
+ else {
+ vb->page_bitmap[0] = kmalloc(BALLOON_BMAP_SIZE, GFP_KERNEL);
+ if (!vb->page_bitmap[0]) {
+ __virtio_clear_bit(vdev, VIRTIO_BALLOON_F_PAGE_BITMAP);
+ kfree(vb->resp_hdr);
+ } else {
+ vb->nr_page_bmap = 1;
+ vb->resp_data = kmalloc(BALLOON_BMAP_SIZE, GFP_KERNEL);
+ if (!vb->resp_data) {
+ __virtio_clear_bit(vdev,
+ VIRTIO_BALLOON_F_PAGE_BITMAP);
+ kfree(vb->page_bitmap[0]);
+ kfree(vb->resp_hdr);
+ }
+ }
+ }
+ vb->resp_pos = 0;
mutex_init(&vb->balloon_lock);
init_waitqueue_head(&vb->acked);
vb->vdev = vdev;
@@ -609,6 +946,8 @@ static void virtballoon_remove(struct virtio_device *vdev)
remove_common(vb);
if (vb->vb_dev_info.inode)
iput(vb->vb_dev_info.inode);
+ kfree_page_bitmap(vb);
+ kfree(vb->resp_hdr);
kfree(vb);
}
@@ -647,6 +986,7 @@ static int virtballoon_restore(struct virtio_device *vdev)
VIRTIO_BALLOON_F_MUST_TELL_HOST,
VIRTIO_BALLOON_F_STATS_VQ,
VIRTIO_BALLOON_F_DEFLATE_ON_OOM,
+ VIRTIO_BALLOON_F_PAGE_BITMAP,
};
static struct virtio_driver virtio_balloon_driver = {
The implementation of the current virtio-balloon is not very efficient, the time spends on different stages of inflating the balloon to 7GB of a 8GB idle guest: a. allocating pages (6.5%) b. sending PFNs to host (68.3%) c. address translation (6.1%) d. madvise (19%) It takes about 4126ms for the inflating process to complete. Debugging shows that the bottle neck are the stage b and stage d. If using a bitmap to send the page info instead of the PFNs, we can reduce the overhead in stage b quite a lot. Furthermore, we can do the address translation and call madvise() with a bulk of RAM pages, instead of the current page per page way, the overhead of stage c and stage d can also be reduced a lot. This patch is the kernel side implementation which is intended to speed up the inflating & deflating process by adding a new feature to the virtio-balloon device. With this new feature, inflating the balloon to 7GB of a 8GB idle guest only takes 590ms, the performance improvement is about 85%. TODO: optimize stage a by allocating/freeing a chunk of pages instead of a single page at a time. Signed-off-by: Liang Li <liang.z.li@intel.com> Suggested-by: Michael S. Tsirkin <mst@redhat.com> Cc: Michael S. Tsirkin <mst@redhat.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Cornelia Huck <cornelia.huck@de.ibm.com> Cc: Amit Shah <amit.shah@redhat.com> Cc: Dave Hansen <dave.hansen@intel.com> --- drivers/virtio/virtio_balloon.c | 398 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 369 insertions(+), 29 deletions(-)