@@ -51,6 +51,10 @@ MODULE_PARM_DESC(oom_pages, "pages to free on OOM");
static struct vfsmount *balloon_mnt;
#endif
+/* Types of pages to chunk */
+#define PAGE_CHUNK_TYPE_BALLOON 0 /* Chunk of inflate/deflate pages */
+#define PAGE_CHNUK_UNUSED_PAGE 1 /* Chunk of unused pages */
+
/* The size of one page_bmap used to record inflated/deflated pages. */
#define VIRTIO_BALLOON_PAGE_BMAP_SIZE (8 * PAGE_SIZE)
/*
@@ -81,12 +85,25 @@ struct virtio_balloon_page_chunk {
unsigned long *page_bmap[VIRTIO_BALLOON_PAGE_BMAP_MAX_NUM];
};
+struct virtio_balloon_cmdq_unused_page {
+ struct virtio_balloon_cmdq_hdr hdr;
+ struct vring_desc *desc_table;
+ /* Number of added descriptors */
+ unsigned int num;
+};
+
+struct virtio_balloon_cmdq_stats {
+ struct virtio_balloon_cmdq_hdr hdr;
+ struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR];
+};
+
struct virtio_balloon {
struct virtio_device *vdev;
- struct virtqueue *inflate_vq, *deflate_vq, *stats_vq;
+ struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *cmd_vq;
/* The balloon servicing is delegated to a freezable workqueue. */
struct work_struct update_balloon_stats_work;
+ struct work_struct cmdq_handle_work;
struct work_struct update_balloon_size_work;
/* Prevent updating balloon when it is being canceled. */
@@ -115,8 +132,10 @@ struct virtio_balloon {
unsigned int num_pfns;
__virtio32 pfns[VIRTIO_BALLOON_ARRAY_PFNS_MAX];
- /* Memory statistics */
- struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR];
+ /* Cmdq msg buffer for memory statistics */
+ struct virtio_balloon_cmdq_stats cmdq_stats;
+ /* Cmdq msg buffer for reporting ununsed pages */
+ struct virtio_balloon_cmdq_unused_page cmdq_unused_page;
/* To register callback in oom notifier call chain */
struct notifier_block nb;
@@ -208,31 +227,77 @@ static void clear_page_bmap(struct virtio_balloon *vb,
VIRTIO_BALLOON_PAGE_BMAP_SIZE);
}
-static void send_page_chunks(struct virtio_balloon *vb, struct virtqueue *vq)
+static void send_page_chunks(struct virtio_balloon *vb, struct virtqueue *vq,
+ int type, bool busy_wait)
{
- unsigned int len, num;
- struct vring_desc *desc = vb->balloon_page_chunk.desc_table;
+ unsigned int len, *num, reset_num;
+ struct vring_desc *desc;
+
+ switch (type) {
+ case PAGE_CHUNK_TYPE_BALLOON:
+ desc = vb->balloon_page_chunk.desc_table;
+ num = &vb->balloon_page_chunk.chunk_num;
+ reset_num = 0;
+ break;
+ case PAGE_CHNUK_UNUSED_PAGE:
+ desc = vb->cmdq_unused_page.desc_table;
+ num = &vb->cmdq_unused_page.num;
+ /*
+ * The first desc is used for the cmdq_hdr, so chunks will be
+ * added from the second desc.
+ */
+ reset_num = 1;
+ break;
+ default:
+ dev_warn(&vb->vdev->dev, "%s: unknown page chunk type %d\n",
+ __func__, type);
+ return;
+ }
- num = vb->balloon_page_chunk.chunk_num;
- if (!virtqueue_indirect_desc_table_add(vq, desc, num)) {
+ if (!virtqueue_indirect_desc_table_add(vq, desc, *num)) {
virtqueue_kick(vq);
- wait_event(vb->acked, virtqueue_get_buf(vq, &len));
- vb->balloon_page_chunk.chunk_num = 0;
+ if (busy_wait)
+ while (!virtqueue_get_buf(vq, &len) &&
+ !virtqueue_is_broken(vq))
+ cpu_relax();
+ else
+ wait_event(vb->acked, virtqueue_get_buf(vq, &len));
+ /*
+ * Now, the descriptor have been delivered to the host. Reset
+ * the field in the structure that records the number of added
+ * descriptors, so that new added descriptor can be re-counted.
+ */
+ *num = reset_num;
}
}
/* Add a chunk to the buffer. */
static void add_one_chunk(struct virtio_balloon *vb, struct virtqueue *vq,
- u64 base_addr, u32 size)
+ int type, u64 base_addr, u32 size)
{
- unsigned int *num = &vb->balloon_page_chunk.chunk_num;
- struct vring_desc *desc = &vb->balloon_page_chunk.desc_table[*num];
+ unsigned int *num;
+ struct vring_desc *desc;
+
+ switch (type) {
+ case PAGE_CHUNK_TYPE_BALLOON:
+ num = &vb->balloon_page_chunk.chunk_num;
+ desc = &vb->balloon_page_chunk.desc_table[*num];
+ break;
+ case PAGE_CHNUK_UNUSED_PAGE:
+ num = &vb->cmdq_unused_page.num;
+ desc = &vb->cmdq_unused_page.desc_table[*num];
+ break;
+ default:
+ dev_warn(&vb->vdev->dev, "%s: chunk %d of unknown pages\n",
+ __func__, type);
+ return;
+ }
desc->addr = cpu_to_virtio64(vb->vdev, base_addr);
desc->len = cpu_to_virtio32(vb->vdev, size);
*num += 1;
if (*num == VIRTIO_BALLOON_MAX_PAGE_CHUNKS)
- send_page_chunks(vb, vq);
+ send_page_chunks(vb, vq, type, false);
}
static void convert_bmap_to_chunks(struct virtio_balloon *vb,
@@ -264,7 +329,8 @@ static void convert_bmap_to_chunks(struct virtio_balloon *vb,
chunk_base_addr = (pfn_start + next_one) <<
VIRTIO_BALLOON_PFN_SHIFT;
if (chunk_size) {
- add_one_chunk(vb, vq, chunk_base_addr, chunk_size);
+ add_one_chunk(vb, vq, PAGE_CHUNK_TYPE_BALLOON,
+ chunk_base_addr, chunk_size);
pos += next_zero + 1;
}
}
@@ -311,7 +377,7 @@ static void tell_host_from_page_bmap(struct virtio_balloon *vb,
pfn_num);
}
if (vb->balloon_page_chunk.chunk_num > 0)
- send_page_chunks(vb, vq);
+ send_page_chunks(vb, vq, PAGE_CHUNK_TYPE_BALLOON, false);
}
static void set_page_pfns(struct virtio_balloon *vb,
@@ -516,8 +582,8 @@ static inline void update_stat(struct virtio_balloon *vb, int idx,
u16 tag, u64 val)
{
BUG_ON(idx >= VIRTIO_BALLOON_S_NR);
- vb->stats[idx].tag = cpu_to_virtio16(vb->vdev, tag);
- vb->stats[idx].val = cpu_to_virtio64(vb->vdev, val);
+ vb->cmdq_stats.stats[idx].tag = cpu_to_virtio16(vb->vdev, tag);
+ vb->cmdq_stats.stats[idx].val = cpu_to_virtio64(vb->vdev, val);
}
#define pages_to_bytes(x) ((u64)(x) << PAGE_SHIFT)
@@ -582,7 +648,8 @@ static void stats_handle_request(struct virtio_balloon *vb)
vq = vb->stats_vq;
if (!virtqueue_get_buf(vq, &len))
return;
- sg_init_one(&sg, vb->stats, sizeof(vb->stats[0]) * num_stats);
+ sg_init_one(&sg, vb->cmdq_stats.stats,
+ sizeof(vb->cmdq_stats.stats[0]) * num_stats);
virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL);
virtqueue_kick(vq);
}
@@ -686,43 +753,216 @@ static void update_balloon_size_func(struct work_struct *work)
queue_work(system_freezable_wq, work);
}
+static void cmdq_handle_stats(struct virtio_balloon *vb)
+{
+ struct scatterlist sg;
+ unsigned int num_stats;
+
+ spin_lock(&vb->stop_update_lock);
+ if (!vb->stop_update) {
+ num_stats = update_balloon_stats(vb);
+ sg_init_one(&sg, &vb->cmdq_stats,
+ sizeof(struct virtio_balloon_cmdq_hdr) +
+ sizeof(struct virtio_balloon_stat) * num_stats);
+ virtqueue_add_outbuf(vb->cmd_vq, &sg, 1, vb, GFP_KERNEL);
+ virtqueue_kick(vb->cmd_vq);
+ }
+ spin_unlock(&vb->stop_update_lock);
+}
+
+/*
+ * The header part of the message buffer is given to the device to send a
+ * command to the driver.
+ */
+static void host_cmd_buf_add(struct virtio_balloon *vb,
+ struct virtio_balloon_cmdq_hdr *hdr)
+{
+ struct scatterlist sg;
+
+ hdr->flags = 0;
+ sg_init_one(&sg, hdr, VIRTIO_BALLOON_CMDQ_HDR_SIZE);
+
+ if (virtqueue_add_inbuf(vb->cmd_vq, &sg, 1, hdr, GFP_KERNEL) < 0) {
+ __virtio_clear_bit(vb->vdev,
+ VIRTIO_BALLOON_F_CMD_VQ);
+ dev_warn(&vb->vdev->dev, "%s: add miscq msg buf err\n",
+ __func__);
+ return;
+ }
+
+ virtqueue_kick(vb->cmd_vq);
+}
+
+static void cmdq_handle_unused_pages(struct virtio_balloon *vb)
+{
+ struct virtqueue *vq = vb->cmd_vq;
+ struct vring_desc *hdr_desc = &vb->cmdq_unused_page.desc_table[0];
+ unsigned long hdr_pa;
+ unsigned int order = 0, migratetype = 0;
+ struct zone *zone = NULL;
+ struct page *page = NULL;
+ u64 pfn;
+ int ret = 0;
+
+ /* Put the hdr to the first desc */
+ hdr_pa = virt_to_phys((void *)&vb->cmdq_unused_page.hdr);
+ hdr_desc->addr = cpu_to_virtio64(vb->vdev, hdr_pa);
+ hdr_desc->len = cpu_to_virtio32(vb->vdev,
+ sizeof(struct virtio_balloon_cmdq_hdr));
+ vb->cmdq_unused_page.num = 1;
+
+ for_each_populated_zone(zone) {
+ for (order = MAX_ORDER - 1; order > 0; order--) {
+ for (migratetype = 0; migratetype < MIGRATE_TYPES;
+ migratetype++) {
+ do {
+ ret = report_unused_page_block(zone,
+ order, migratetype, &page);
+ if (!ret) {
+ pfn = (u64)page_to_pfn(page);
+ add_one_chunk(vb, vq,
+ PAGE_CHNUK_UNUSED_PAGE,
+ pfn << VIRTIO_BALLOON_PFN_SHIFT,
+ (u64)(1 << order) *
+ VIRTIO_BALLOON_PAGES_PER_PAGE);
+ }
+ } while (!ret);
+ }
+ }
+ }
+
+ /* Set the cmd completion flag. */
+ vb->cmdq_unused_page.hdr.flags |=
+ cpu_to_le32(VIRTIO_BALLOON_CMDQ_F_COMPLETION);
+ send_page_chunks(vb, vq, PAGE_CHNUK_UNUSED_PAGE, true);
+}
+
+static void cmdq_handle(struct virtio_balloon *vb)
+{
+ struct virtqueue *vq;
+ struct virtio_balloon_cmdq_hdr *hdr;
+ unsigned int len;
+
+ vq = vb->cmd_vq;
+ while ((hdr = (struct virtio_balloon_cmdq_hdr *)
+ virtqueue_get_buf(vq, &len)) != NULL) {
+ switch (hdr->cmd) {
+ case VIRTIO_BALLOON_CMDQ_REPORT_STATS:
+ cmdq_handle_stats(vb);
+ break;
+ case VIRTIO_BALLOON_CMDQ_REPORT_UNUSED_PAGES:
+ cmdq_handle_unused_pages(vb);
+ break;
+ default:
+ dev_warn(&vb->vdev->dev, "%s: wrong cmd\n", __func__);
+ return;
+ }
+ /*
+ * Replenish all the command buffer to the device after a
+ * command is handled. This is for the convenience of the
+ * device to rewind the cmdq to get back all the command
+ * buffer after live migration.
+ */
+ host_cmd_buf_add(vb, &vb->cmdq_stats.hdr);
+ host_cmd_buf_add(vb, &vb->cmdq_unused_page.hdr);
+ }
+}
+
+static void cmdq_handle_work_func(struct work_struct *work)
+{
+ struct virtio_balloon *vb;
+
+ vb = container_of(work, struct virtio_balloon,
+ cmdq_handle_work);
+ cmdq_handle(vb);
+}
+
+static void cmdq_callback(struct virtqueue *vq)
+{
+ struct virtio_balloon *vb = vq->vdev->priv;
+
+ queue_work(system_freezable_wq, &vb->cmdq_handle_work);
+}
+
static int init_vqs(struct virtio_balloon *vb)
{
- struct virtqueue *vqs[3];
- vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request };
- static const char * const names[] = { "inflate", "deflate", "stats" };
- int err, nvqs;
+ struct virtqueue **vqs;
+ vq_callback_t **callbacks;
+ const char **names;
+ int err = -ENOMEM;
+ int nvqs;
+
+ /* Inflateq and deflateq are used unconditionally */
+ nvqs = 2;
+
+ if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_CMD_VQ) ||
+ virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ))
+ nvqs++;
+
+ /* Allocate space for find_vqs parameters */
+ vqs = kcalloc(nvqs, sizeof(*vqs), GFP_KERNEL);
+ if (!vqs)
+ goto err_vq;
+ callbacks = kmalloc_array(nvqs, sizeof(*callbacks), GFP_KERNEL);
+ if (!callbacks)
+ goto err_callback;
+ names = kmalloc_array(nvqs, sizeof(*names), GFP_KERNEL);
+ if (!names)
+ goto err_names;
+
+ callbacks[0] = balloon_ack;
+ names[0] = "inflate";
+ callbacks[1] = balloon_ack;
+ names[1] = "deflate";
/*
- * We expect two virtqueues: inflate and deflate, and
- * optionally stat.
+ * The stats_vq is used only when cmdq is not supported (or disabled)
+ * by the device.
*/
- nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2;
- err = vb->vdev->config->find_vqs(vb->vdev, nvqs, vqs, callbacks, names,
- NULL);
- if (err)
- return err;
+ if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_CMD_VQ)) {
+ callbacks[2] = cmdq_callback;
+ names[2] = "cmdq";
+ } else if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
+ callbacks[2] = stats_request;
+ names[2] = "stats";
+ }
+ err = vb->vdev->config->find_vqs(vb->vdev, nvqs, vqs, callbacks,
+ names, NULL);
+ if (err)
+ goto err_find;
vb->inflate_vq = vqs[0];
vb->deflate_vq = vqs[1];
- if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
+
+ if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_CMD_VQ)) {
+ vb->cmd_vq = vqs[2];
+ /* Prime the cmdq with the header buffer. */
+ host_cmd_buf_add(vb, &vb->cmdq_stats.hdr);
+ host_cmd_buf_add(vb, &vb->cmdq_unused_page.hdr);
+ } else if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
struct scatterlist sg;
- unsigned int num_stats;
- vb->stats_vq = vqs[2];
+ vb->stats_vq = vqs[2];
/*
* Prime this virtqueue with one buffer so the hypervisor can
* use it to signal us later (it can't be broken yet!).
*/
- num_stats = update_balloon_stats(vb);
-
- sg_init_one(&sg, vb->stats, sizeof(vb->stats[0]) * num_stats);
+ sg_init_one(&sg, vb->cmdq_stats.stats,
+ sizeof(vb->cmdq_stats.stats));
if (virtqueue_add_outbuf(vb->stats_vq, &sg, 1, vb, GFP_KERNEL)
< 0)
BUG();
virtqueue_kick(vb->stats_vq);
}
- return 0;
+
+err_find:
+ kfree(names);
+err_names:
+ kfree(callbacks);
+err_callback:
+ kfree(vqs);
+err_vq:
+ return err;
}
#ifdef CONFIG_BALLOON_COMPACTION
@@ -730,7 +970,8 @@ static int init_vqs(struct virtio_balloon *vb)
static void tell_host_one_page(struct virtio_balloon *vb,
struct virtqueue *vq, struct page *page)
{
- add_one_chunk(vb, vq, page_to_pfn(page) << VIRTIO_BALLOON_PFN_SHIFT,
+ add_one_chunk(vb, vq, PAGE_CHUNK_TYPE_BALLOON,
+ page_to_pfn(page) << VIRTIO_BALLOON_PFN_SHIFT,
VIRTIO_BALLOON_PAGES_PER_PAGE);
}
@@ -865,6 +1106,40 @@ static int balloon_page_chunk_init(struct virtio_balloon *vb)
return -ENOMEM;
}
+/*
+ * Each type of command is handled one in-flight each time. So, we allocate
+ * one message buffer for each type of command. The header part of the message
+ * buffer will be offered to the device, so that the device can send a command
+ * using the corresponding command buffer to the driver later.
+ */
+static int cmdq_init(struct virtio_balloon *vb)
+{
+ vb->cmdq_unused_page.desc_table = alloc_indirect(vb->vdev,
+ VIRTIO_BALLOON_MAX_PAGE_CHUNKS,
+ GFP_KERNEL);
+ if (!vb->cmdq_unused_page.desc_table) {
+ dev_warn(&vb->vdev->dev, "%s: failed\n", __func__);
+ __virtio_clear_bit(vb->vdev,
+ VIRTIO_BALLOON_F_CMD_VQ);
+ return -ENOMEM;
+ }
+ vb->cmdq_unused_page.num = 0;
+
+ /*
+ * The header is initialized to let the device know which type of
+ * command buffer it receives. The device will later use a buffer
+ * according to the type of command that it needs to send.
+ */
+ vb->cmdq_stats.hdr.cmd = VIRTIO_BALLOON_CMDQ_REPORT_STATS;
+ vb->cmdq_stats.hdr.flags = 0;
+ vb->cmdq_unused_page.hdr.cmd = VIRTIO_BALLOON_CMDQ_REPORT_UNUSED_PAGES;
+ vb->cmdq_unused_page.hdr.flags = 0;
+
+ INIT_WORK(&vb->cmdq_handle_work, cmdq_handle_work_func);
+
+ return 0;
+}
+
static int virtballoon_validate(struct virtio_device *vdev)
{
struct virtio_balloon *vb = NULL;
@@ -883,6 +1158,11 @@ static int virtballoon_validate(struct virtio_device *vdev)
goto err_page_chunk;
}
+ if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_CMD_VQ)) {
+ err = cmdq_init(vb);
+ if (err < 0)
+ goto err_vb;
+ }
return 0;
err_page_chunk:
@@ -902,7 +1182,10 @@ static int virtballoon_probe(struct virtio_device *vdev)
return -EINVAL;
}
- INIT_WORK(&vb->update_balloon_stats_work, update_balloon_stats_func);
+ if (!virtio_has_feature(vdev, VIRTIO_BALLOON_F_CMD_VQ) &&
+ virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ))
+ INIT_WORK(&vb->update_balloon_stats_work,
+ update_balloon_stats_func);
INIT_WORK(&vb->update_balloon_size_work, update_balloon_size_func);
spin_lock_init(&vb->stop_update_lock);
vb->stop_update = false;
@@ -980,6 +1263,7 @@ static void virtballoon_remove(struct virtio_device *vdev)
spin_unlock_irq(&vb->stop_update_lock);
cancel_work_sync(&vb->update_balloon_size_work);
cancel_work_sync(&vb->update_balloon_stats_work);
+ cancel_work_sync(&vb->cmdq_handle_work);
remove_common(vb);
free_page_bmap(vb);
@@ -1029,6 +1313,7 @@ static unsigned int features[] = {
VIRTIO_BALLOON_F_STATS_VQ,
VIRTIO_BALLOON_F_DEFLATE_ON_OOM,
VIRTIO_BALLOON_F_PAGE_CHUNKS,
+ VIRTIO_BALLOON_F_CMD_VQ,
};
static struct virtio_driver virtio_balloon_driver = {
@@ -35,6 +35,7 @@
#define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */
#define VIRTIO_BALLOON_F_DEFLATE_ON_OOM 2 /* Deflate balloon on OOM */
#define VIRTIO_BALLOON_F_PAGE_CHUNKS 3 /* Inflate/Deflate pages in chunks */
+#define VIRTIO_BALLOON_F_CMD_VQ 4 /* Command virtqueue */
/* Size of a PFN in the balloon interface. */
#define VIRTIO_BALLOON_PFN_SHIFT 12
@@ -83,4 +84,16 @@ struct virtio_balloon_stat {
__virtio64 val;
} __attribute__((packed));
+/* Use the memory of a vring_desc to place the cmdq header */
+#define VIRTIO_BALLOON_CMDQ_HDR_SIZE sizeof(struct vring_desc)
+
+struct virtio_balloon_cmdq_hdr {
+#define VIRTIO_BALLOON_CMDQ_REPORT_STATS 0
+#define VIRTIO_BALLOON_CMDQ_REPORT_UNUSED_PAGES 1
+ __le32 cmd;
+/* Flag to indicate the completion of handling a command */
+#define VIRTIO_BALLOON_CMDQ_F_COMPLETION 1
+ __le32 flags;
+};
+
#endif /* _LINUX_VIRTIO_BALLOON_H */
Add a new vq, cmdq, to handle requests between the device and driver. This patch implements two commands send from the device and handled in the driver. 1) cmd VIRTIO_BALLOON_CMDQ_REPORT_STATS: this command is used to report the guest memory statistics to the host. The stats_vq mechanism is not used when the cmdq mechanism is enabled. 2) cmd VIRTIO_BALLOON_CMDQ_REPORT_UNUSED_PAGES: this command is used to report the guest unused pages to the host. Signed-off-by: Wei Wang <wei.w.wang@intel.com> --- drivers/virtio/virtio_balloon.c | 363 ++++++++++++++++++++++++++++++++---- include/uapi/linux/virtio_balloon.h | 13 ++ 2 files changed, 337 insertions(+), 39 deletions(-)