Message ID | 1499863221-16206-9-git-send-email-wei.w.wang@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Wed, Jul 12, 2017 at 08:40:21PM +0800, Wei Wang wrote: > Add a new vq, cmdq, to handle requests between the device and driver. > > This patch implements two commands sent from the device and handled in > the driver. > 1) VIRTIO_BALLOON_CMDQ_REPORT_STATS: this command is used to report > the guest memory statistics to the host. The stats_vq mechanism is not > used when the cmdq mechanism is enabled. > 2) VIRTIO_BALLOON_CMDQ_REPORT_UNUSED_PAGES: this command is used to > report the guest unused pages to the host. > > Since now we have a vq to handle multiple commands, we need to keep only > one vq operation at a time. Here, we change the existing START_USE() > and END_USE() to lock on each vq operation. > > Signed-off-by: Wei Wang <wei.w.wang@intel.com> > Signed-off-by: Liang Li <liang.z.li@intel.com> > --- > drivers/virtio/virtio_balloon.c | 245 ++++++++++++++++++++++++++++++++++-- > drivers/virtio/virtio_ring.c | 25 +++- > include/linux/virtio.h | 2 + > include/uapi/linux/virtio_balloon.h | 10 ++ > 4 files changed, 265 insertions(+), 17 deletions(-) > > diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c > index aa4e7ec..ae91fbf 100644 > --- a/drivers/virtio/virtio_balloon.c > +++ b/drivers/virtio/virtio_balloon.c > @@ -54,11 +54,12 @@ static struct vfsmount *balloon_mnt; > > struct virtio_balloon { > struct virtio_device *vdev; > - struct virtqueue *inflate_vq, *deflate_vq, *stats_vq; > + struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *cmd_vq; > > /* The balloon servicing is delegated to a freezable workqueue. */ > struct work_struct update_balloon_stats_work; > struct work_struct update_balloon_size_work; > + struct work_struct cmdq_handle_work; > > /* Prevent updating balloon when it is being canceled. */ > spinlock_t stop_update_lock; > @@ -90,6 +91,12 @@ struct virtio_balloon { > /* Memory statistics */ > struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR]; > > + /* Cmdq msg buffer for memory statistics */ > + struct virtio_balloon_cmdq_hdr cmdq_stats_hdr; > + > + /* Cmdq msg buffer for reporting ununsed pages */ > + struct virtio_balloon_cmdq_hdr cmdq_unused_page_hdr; > + > /* To register callback in oom notifier call chain */ > struct notifier_block nb; > }; > @@ -485,25 +492,214 @@ static void update_balloon_size_func(struct work_struct *work) > queue_work(system_freezable_wq, work); > } > > +static unsigned int cmdq_hdr_add(struct virtqueue *vq, > + struct virtio_balloon_cmdq_hdr *hdr, > + bool in) > +{ > + unsigned int id = VIRTQUEUE_DESC_ID_INIT; > + uint64_t hdr_pa = (uint64_t)virt_to_phys((void *)hdr); > + > + virtqueue_add_chain_desc(vq, hdr_pa, sizeof(*hdr), &id, &id, in); > + > + /* Deliver the hdr for the host to send commands. */ > + if (in) { > + hdr->flags = 0; > + virtqueue_add_chain(vq, id, 0, NULL, hdr, NULL); > + virtqueue_kick(vq); > + } > + > + return id; > +} > + > +static void cmdq_add_chain_desc(struct virtio_balloon *vb, > + struct virtio_balloon_cmdq_hdr *hdr, > + uint64_t addr, > + uint32_t len, > + unsigned int *head_id, > + unsigned int *prev_id) > +{ > +retry: > + if (*head_id == VIRTQUEUE_DESC_ID_INIT) { > + *head_id = cmdq_hdr_add(vb->cmd_vq, hdr, 0); > + *prev_id = *head_id; > + } > + > + virtqueue_add_chain_desc(vb->cmd_vq, addr, len, head_id, prev_id, 0); > + if (*head_id == *prev_id) { That's an ugly way to detect ring full. > + /* > + * The VQ was full and kicked to release some descs. Now we > + * will re-start to build the chain by using the hdr as the > + * first desc, so we need to detach the desc that was just > + * added, and re-start to add the hdr. > + */ > + virtqueue_detach_buf(vb->cmd_vq, *head_id, NULL); > + *head_id = VIRTQUEUE_DESC_ID_INIT; > + *prev_id = VIRTQUEUE_DESC_ID_INIT; > + goto retry; > + } > +} > + > +static void cmdq_handle_stats(struct virtio_balloon *vb) > +{ > + unsigned int num_stats, > + head_id = VIRTQUEUE_DESC_ID_INIT, > + prev_id = VIRTQUEUE_DESC_ID_INIT; > + uint64_t addr = (uint64_t)virt_to_phys((void *)vb->stats); > + uint32_t len; > + > + spin_lock(&vb->stop_update_lock); > + if (!vb->stop_update) { > + num_stats = update_balloon_stats(vb); > + len = sizeof(struct virtio_balloon_stat) * num_stats; > + cmdq_add_chain_desc(vb, &vb->cmdq_stats_hdr, addr, len, > + &head_id, &prev_id); > + virtqueue_add_chain(vb->cmd_vq, head_id, 0, NULL, vb, NULL); > + virtqueue_kick_sync(vb->cmd_vq); > + } > + spin_unlock(&vb->stop_update_lock); > +} > + > +static void cmdq_add_unused_page(struct virtio_balloon *vb, > + struct zone *zone, > + unsigned int order, > + unsigned int type, > + struct page *page, > + unsigned int *head_id, > + unsigned int *prev_id) > +{ > + uint64_t addr; > + uint32_t len; > + > + while (!report_unused_page_block(zone, order, type, &page)) { > + addr = (u64)page_to_pfn(page) << VIRTIO_BALLOON_PFN_SHIFT; > + len = (u64)(1 << order) << VIRTIO_BALLOON_PFN_SHIFT; > + cmdq_add_chain_desc(vb, &vb->cmdq_unused_page_hdr, addr, len, > + head_id, prev_id); > + } > +} > + > +static void cmdq_handle_unused_pages(struct virtio_balloon *vb) > +{ > + struct virtqueue *vq = vb->cmd_vq; > + unsigned int order = 0, type = 0, > + head_id = VIRTQUEUE_DESC_ID_INIT, > + prev_id = VIRTQUEUE_DESC_ID_INIT; > + struct zone *zone = NULL; > + struct page *page = NULL; > + > + for_each_populated_zone(zone) > + for_each_migratetype_order(order, type) > + cmdq_add_unused_page(vb, zone, order, type, page, > + &head_id, &prev_id); > + > + /* Set the cmd completion flag. */ > + vb->cmdq_unused_page_hdr.flags |= > + cpu_to_le32(VIRTIO_BALLOON_CMDQ_F_COMPLETION); > + virtqueue_add_chain(vq, head_id, 0, NULL, vb, NULL); > + virtqueue_kick_sync(vb->cmd_vq); > +} > + > +static void cmdq_handle(struct virtio_balloon *vb) > +{ > + struct virtio_balloon_cmdq_hdr *hdr; > + unsigned int len; > + > + while ((hdr = (struct virtio_balloon_cmdq_hdr *) > + virtqueue_get_buf(vb->cmd_vq, &len)) != NULL) { > + switch (__le32_to_cpu(hdr->cmd)) { > + case VIRTIO_BALLOON_CMDQ_REPORT_STATS: > + cmdq_handle_stats(vb); > + break; > + case VIRTIO_BALLOON_CMDQ_REPORT_UNUSED_PAGES: > + cmdq_handle_unused_pages(vb); > + break; > + default: > + dev_warn(&vb->vdev->dev, "%s: wrong cmd\n", __func__); > + return; > + } > + /* > + * Replenish all the command buffer to the device after a > + * command is handled. This is for the convenience of the > + * device to rewind the cmdq to get back all the command > + * buffer after live migration. > + */ > + cmdq_hdr_add(vb->cmd_vq, &vb->cmdq_stats_hdr, 1); > + cmdq_hdr_add(vb->cmd_vq, &vb->cmdq_unused_page_hdr, 1); > + } > +} > + > +static void cmdq_handle_work_func(struct work_struct *work) > +{ > + struct virtio_balloon *vb; > + > + vb = container_of(work, struct virtio_balloon, > + cmdq_handle_work); > + cmdq_handle(vb); > +} > + > +static void cmdq_callback(struct virtqueue *vq) > +{ > + struct virtio_balloon *vb = vq->vdev->priv; > + > + queue_work(system_freezable_wq, &vb->cmdq_handle_work); > +} > + > static int init_vqs(struct virtio_balloon *vb) > { > - struct virtqueue *vqs[3]; > - vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request }; > - static const char * const names[] = { "inflate", "deflate", "stats" }; > - int err, nvqs; > + struct virtqueue **vqs; > + vq_callback_t **callbacks; > + const char **names; > + int err = -ENOMEM; > + int nvqs; > + > + /* Inflateq and deflateq are used unconditionally */ > + nvqs = 2; > + > + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_CMD_VQ) || > + virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) > + nvqs++; > + > + /* Allocate space for find_vqs parameters */ > + vqs = kcalloc(nvqs, sizeof(*vqs), GFP_KERNEL); > + if (!vqs) > + goto err_vq; > + callbacks = kmalloc_array(nvqs, sizeof(*callbacks), GFP_KERNEL); > + if (!callbacks) > + goto err_callback; > + names = kmalloc_array(nvqs, sizeof(*names), GFP_KERNEL); > + if (!names) > + goto err_names; > + > + callbacks[0] = balloon_ack; > + names[0] = "inflate"; > + callbacks[1] = balloon_ack; > + names[1] = "deflate"; > > /* > - * We expect two virtqueues: inflate and deflate, and > - * optionally stat. > + * The stats_vq is used only when cmdq is not supported (or disabled) > + * by the device. > */ > - nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2; > - err = virtio_find_vqs(vb->vdev, nvqs, vqs, callbacks, names, NULL); > - if (err) > - return err; > + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_CMD_VQ)) { > + callbacks[2] = cmdq_callback; > + names[2] = "cmdq"; > + } else if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) { > + callbacks[2] = stats_request; > + names[2] = "stats"; > + } > > + err = vb->vdev->config->find_vqs(vb->vdev, nvqs, vqs, callbacks, > + names, NULL, NULL); > + if (err) > + goto err_find; > vb->inflate_vq = vqs[0]; > vb->deflate_vq = vqs[1]; > - if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) { > + > + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_CMD_VQ)) { > + vb->cmd_vq = vqs[2]; > + /* Prime the cmdq with the header buffer. */ > + cmdq_hdr_add(vb->cmd_vq, &vb->cmdq_stats_hdr, 1); > + cmdq_hdr_add(vb->cmd_vq, &vb->cmdq_unused_page_hdr, 1); > + } else if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) { > struct scatterlist sg; > unsigned int num_stats; > vb->stats_vq = vqs[2]; > @@ -520,6 +716,16 @@ static int init_vqs(struct virtio_balloon *vb) > BUG(); > virtqueue_kick(vb->stats_vq); > } > + > +err_find: > + kfree(names); > +err_names: > + kfree(callbacks); > +err_callback: > + kfree(vqs); > +err_vq: > + return err; > + > return 0; > } > > @@ -640,7 +846,18 @@ static int virtballoon_probe(struct virtio_device *vdev) > goto out; > } > > - INIT_WORK(&vb->update_balloon_stats_work, update_balloon_stats_func); > + if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_CMD_VQ)) { > + vb->cmdq_stats_hdr.cmd = > + cpu_to_le32(VIRTIO_BALLOON_CMDQ_REPORT_STATS); > + vb->cmdq_stats_hdr.flags = 0; > + vb->cmdq_unused_page_hdr.cmd = > + cpu_to_le32(VIRTIO_BALLOON_CMDQ_REPORT_UNUSED_PAGES); > + vb->cmdq_unused_page_hdr.flags = 0; > + INIT_WORK(&vb->cmdq_handle_work, cmdq_handle_work_func); > + } else if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) { > + INIT_WORK(&vb->update_balloon_stats_work, > + update_balloon_stats_func); > + } > INIT_WORK(&vb->update_balloon_size_work, update_balloon_size_func); > spin_lock_init(&vb->stop_update_lock); > vb->stop_update = false; > @@ -722,6 +939,7 @@ static void virtballoon_remove(struct virtio_device *vdev) > spin_unlock_irq(&vb->stop_update_lock); > cancel_work_sync(&vb->update_balloon_size_work); > cancel_work_sync(&vb->update_balloon_stats_work); > + cancel_work_sync(&vb->cmdq_handle_work); > > xb_empty(&vb->page_xb); > remove_common(vb); > @@ -776,6 +994,7 @@ static unsigned int features[] = { > VIRTIO_BALLOON_F_STATS_VQ, > VIRTIO_BALLOON_F_DEFLATE_ON_OOM, > VIRTIO_BALLOON_F_SG, > + VIRTIO_BALLOON_F_CMD_VQ, > }; > > static struct virtio_driver virtio_balloon_driver = { > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c > index b9d7e10..793de12 100644 > --- a/drivers/virtio/virtio_ring.c > +++ b/drivers/virtio/virtio_ring.c > @@ -52,8 +52,13 @@ > "%s:"fmt, (_vq)->vq.name, ##args); \ > (_vq)->broken = true; \ > } while (0) > -#define START_USE(vq) > -#define END_USE(vq) > +#define START_USE(_vq) \ > + do { \ > + while ((_vq)->in_use) \ > + cpu_relax(); \ > + (_vq)->in_use = __LINE__; \ > + } while (0) > +#define END_USE(_vq) ((_vq)->in_use = 0) > #endif > > struct vring_desc_state { > @@ -101,9 +106,9 @@ struct vring_virtqueue { > size_t queue_size_in_bytes; > dma_addr_t queue_dma_addr; > > -#ifdef DEBUG > /* They're supposed to lock for us. */ > unsigned int in_use; > +#ifdef DEBUG > > /* Figure out if their kicks are too delayed. */ > bool last_add_time_valid; > @@ -845,6 +850,18 @@ static void detach_buf(struct vring_virtqueue *vq, unsigned int head, > } > } > > +void virtqueue_detach_buf(struct virtqueue *_vq, unsigned int head, void **ctx) > +{ > + struct vring_virtqueue *vq = to_vvq(_vq); > + > + START_USE(vq); > + > + detach_buf(vq, head, ctx); > + > + END_USE(vq); > +} > +EXPORT_SYMBOL_GPL(virtqueue_detach_buf); > + > static inline bool more_used(const struct vring_virtqueue *vq) > { > return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev, vq->vring.used->idx); > @@ -1158,8 +1175,8 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, > vq->avail_idx_shadow = 0; > vq->num_added = 0; > list_add_tail(&vq->vq.list, &vdev->vqs); > + vq->in_use = 0; > #ifdef DEBUG > - vq->in_use = false; > vq->last_add_time_valid = false; > #endif > > diff --git a/include/linux/virtio.h b/include/linux/virtio.h > index 9f27101..9df480b 100644 > --- a/include/linux/virtio.h > +++ b/include/linux/virtio.h > @@ -88,6 +88,8 @@ void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len); > void *virtqueue_get_buf_ctx(struct virtqueue *vq, unsigned int *len, > void **ctx); > > +void virtqueue_detach_buf(struct virtqueue *_vq, unsigned int head, void **ctx); > + > void virtqueue_disable_cb(struct virtqueue *vq); > > bool virtqueue_enable_cb(struct virtqueue *vq); > diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h > index 37780a7..b38c370 100644 > --- a/include/uapi/linux/virtio_balloon.h > +++ b/include/uapi/linux/virtio_balloon.h > @@ -35,6 +35,7 @@ > #define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */ > #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM 2 /* Deflate balloon on OOM */ > #define VIRTIO_BALLOON_F_SG 3 /* Use sg instead of PFN lists */ > +#define VIRTIO_BALLOON_F_CMD_VQ 4 /* Command virtqueue */ > > /* Size of a PFN in the balloon interface. */ > #define VIRTIO_BALLOON_PFN_SHIFT 12 > @@ -83,4 +84,13 @@ struct virtio_balloon_stat { > __virtio64 val; > } __attribute__((packed)); > > +struct virtio_balloon_cmdq_hdr { > +#define VIRTIO_BALLOON_CMDQ_REPORT_STATS 0 > +#define VIRTIO_BALLOON_CMDQ_REPORT_UNUSED_PAGES 1 > + __le32 cmd; > +/* Flag to indicate the completion of handling a command */ > +#define VIRTIO_BALLOON_CMDQ_F_COMPLETION 1 > + __le32 flags; > +}; > + > #endif /* _LINUX_VIRTIO_BALLOON_H */ > -- > 2.7.4
On 07/13/2017 08:22 AM, Michael S. Tsirkin wrote: > On Wed, Jul 12, 2017 at 08:40:21PM +0800, Wei Wang wrote: >> Add a new vq, cmdq, to handle requests between the device and driver. >> >> This patch implements two commands sent from the device and handled in >> the driver. >> 1) VIRTIO_BALLOON_CMDQ_REPORT_STATS: this command is used to report >> the guest memory statistics to the host. The stats_vq mechanism is not >> used when the cmdq mechanism is enabled. >> 2) VIRTIO_BALLOON_CMDQ_REPORT_UNUSED_PAGES: this command is used to >> report the guest unused pages to the host. >> >> Since now we have a vq to handle multiple commands, we need to keep only >> one vq operation at a time. Here, we change the existing START_USE() >> and END_USE() to lock on each vq operation. >> >> Signed-off-by: Wei Wang <wei.w.wang@intel.com> >> Signed-off-by: Liang Li <liang.z.li@intel.com> >> --- >> drivers/virtio/virtio_balloon.c | 245 ++++++++++++++++++++++++++++++++++-- >> drivers/virtio/virtio_ring.c | 25 +++- >> include/linux/virtio.h | 2 + >> include/uapi/linux/virtio_balloon.h | 10 ++ >> 4 files changed, 265 insertions(+), 17 deletions(-) >> >> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c >> index aa4e7ec..ae91fbf 100644 >> --- a/drivers/virtio/virtio_balloon.c >> +++ b/drivers/virtio/virtio_balloon.c >> @@ -54,11 +54,12 @@ static struct vfsmount *balloon_mnt; >> >> struct virtio_balloon { >> struct virtio_device *vdev; >> - struct virtqueue *inflate_vq, *deflate_vq, *stats_vq; >> + struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *cmd_vq; >> >> /* The balloon servicing is delegated to a freezable workqueue. */ >> struct work_struct update_balloon_stats_work; >> struct work_struct update_balloon_size_work; >> + struct work_struct cmdq_handle_work; >> >> /* Prevent updating balloon when it is being canceled. */ >> spinlock_t stop_update_lock; >> @@ -90,6 +91,12 @@ struct virtio_balloon { >> /* Memory statistics */ >> struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR]; >> >> + /* Cmdq msg buffer for memory statistics */ >> + struct virtio_balloon_cmdq_hdr cmdq_stats_hdr; >> + >> + /* Cmdq msg buffer for reporting ununsed pages */ >> + struct virtio_balloon_cmdq_hdr cmdq_unused_page_hdr; >> + >> /* To register callback in oom notifier call chain */ >> struct notifier_block nb; >> }; >> @@ -485,25 +492,214 @@ static void update_balloon_size_func(struct work_struct *work) >> queue_work(system_freezable_wq, work); >> } >> >> +static unsigned int cmdq_hdr_add(struct virtqueue *vq, >> + struct virtio_balloon_cmdq_hdr *hdr, >> + bool in) >> +{ >> + unsigned int id = VIRTQUEUE_DESC_ID_INIT; >> + uint64_t hdr_pa = (uint64_t)virt_to_phys((void *)hdr); >> + >> + virtqueue_add_chain_desc(vq, hdr_pa, sizeof(*hdr), &id, &id, in); >> + >> + /* Deliver the hdr for the host to send commands. */ >> + if (in) { >> + hdr->flags = 0; >> + virtqueue_add_chain(vq, id, 0, NULL, hdr, NULL); >> + virtqueue_kick(vq); >> + } >> + >> + return id; >> +} >> + >> +static void cmdq_add_chain_desc(struct virtio_balloon *vb, >> + struct virtio_balloon_cmdq_hdr *hdr, >> + uint64_t addr, >> + uint32_t len, >> + unsigned int *head_id, >> + unsigned int *prev_id) >> +{ >> +retry: >> + if (*head_id == VIRTQUEUE_DESC_ID_INIT) { >> + *head_id = cmdq_hdr_add(vb->cmd_vq, hdr, 0); >> + *prev_id = *head_id; >> + } >> + >> + virtqueue_add_chain_desc(vb->cmd_vq, addr, len, head_id, prev_id, 0); >> + if (*head_id == *prev_id) { > That's an ugly way to detect ring full. It's actually not detecting ring full. I will call it tail_id, instead of prev_id. So, *head_id == *tail_id is the case that the first desc was just added by virtqueue_add_chain_desc(). Best, Wei
On Thu, Jul 13, 2017 at 04:46:29PM +0800, Wei Wang wrote: > On 07/13/2017 08:22 AM, Michael S. Tsirkin wrote: > > On Wed, Jul 12, 2017 at 08:40:21PM +0800, Wei Wang wrote: > > > Add a new vq, cmdq, to handle requests between the device and driver. > > > > > > This patch implements two commands sent from the device and handled in > > > the driver. > > > 1) VIRTIO_BALLOON_CMDQ_REPORT_STATS: this command is used to report > > > the guest memory statistics to the host. The stats_vq mechanism is not > > > used when the cmdq mechanism is enabled. > > > 2) VIRTIO_BALLOON_CMDQ_REPORT_UNUSED_PAGES: this command is used to > > > report the guest unused pages to the host. > > > > > > Since now we have a vq to handle multiple commands, we need to keep only > > > one vq operation at a time. Here, we change the existing START_USE() > > > and END_USE() to lock on each vq operation. > > > > > > Signed-off-by: Wei Wang <wei.w.wang@intel.com> > > > Signed-off-by: Liang Li <liang.z.li@intel.com> > > > --- > > > drivers/virtio/virtio_balloon.c | 245 ++++++++++++++++++++++++++++++++++-- > > > drivers/virtio/virtio_ring.c | 25 +++- > > > include/linux/virtio.h | 2 + > > > include/uapi/linux/virtio_balloon.h | 10 ++ > > > 4 files changed, 265 insertions(+), 17 deletions(-) > > > > > > diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c > > > index aa4e7ec..ae91fbf 100644 > > > --- a/drivers/virtio/virtio_balloon.c > > > +++ b/drivers/virtio/virtio_balloon.c > > > @@ -54,11 +54,12 @@ static struct vfsmount *balloon_mnt; > > > struct virtio_balloon { > > > struct virtio_device *vdev; > > > - struct virtqueue *inflate_vq, *deflate_vq, *stats_vq; > > > + struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *cmd_vq; > > > /* The balloon servicing is delegated to a freezable workqueue. */ > > > struct work_struct update_balloon_stats_work; > > > struct work_struct update_balloon_size_work; > > > + struct work_struct cmdq_handle_work; > > > /* Prevent updating balloon when it is being canceled. */ > > > spinlock_t stop_update_lock; > > > @@ -90,6 +91,12 @@ struct virtio_balloon { > > > /* Memory statistics */ > > > struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR]; > > > + /* Cmdq msg buffer for memory statistics */ > > > + struct virtio_balloon_cmdq_hdr cmdq_stats_hdr; > > > + > > > + /* Cmdq msg buffer for reporting ununsed pages */ typo above btw > > > + struct virtio_balloon_cmdq_hdr cmdq_unused_page_hdr; > > > + > > > /* To register callback in oom notifier call chain */ > > > struct notifier_block nb; > > > }; > > > @@ -485,25 +492,214 @@ static void update_balloon_size_func(struct work_struct *work) > > > queue_work(system_freezable_wq, work); > > > } > > > +static unsigned int cmdq_hdr_add(struct virtqueue *vq, > > > + struct virtio_balloon_cmdq_hdr *hdr, > > > + bool in) > > > +{ > > > + unsigned int id = VIRTQUEUE_DESC_ID_INIT; > > > + uint64_t hdr_pa = (uint64_t)virt_to_phys((void *)hdr); > > > + > > > + virtqueue_add_chain_desc(vq, hdr_pa, sizeof(*hdr), &id, &id, in); > > > + > > > + /* Deliver the hdr for the host to send commands. */ > > > + if (in) { > > > + hdr->flags = 0; > > > + virtqueue_add_chain(vq, id, 0, NULL, hdr, NULL); > > > + virtqueue_kick(vq); > > > + } > > > + > > > + return id; > > > +} > > > + > > > +static void cmdq_add_chain_desc(struct virtio_balloon *vb, > > > + struct virtio_balloon_cmdq_hdr *hdr, > > > + uint64_t addr, > > > + uint32_t len, > > > + unsigned int *head_id, > > > + unsigned int *prev_id) > > > +{ > > > +retry: > > > + if (*head_id == VIRTQUEUE_DESC_ID_INIT) { > > > + *head_id = cmdq_hdr_add(vb->cmd_vq, hdr, 0); > > > + *prev_id = *head_id; > > > + } > > > + > > > + virtqueue_add_chain_desc(vb->cmd_vq, addr, len, head_id, prev_id, 0); > > > + if (*head_id == *prev_id) { > > That's an ugly way to detect ring full. > > It's actually not detecting ring full. I will call it tail_id, instead of > prev_id. > So, *head_id == *tail_id is the case that the first desc was just added by > virtqueue_add_chain_desc(). > > Best, > Wei Oh so it's adding header before each list. Ugh. I don't think we should stay with this API. It's just too tricky to use. If we have an API that fails when it can't add descriptors (you can reserve space for the last descriptor) the balloon knows whether it's the first descriptor in a chain and can just use a boolean that tells it whether that is the case.
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index aa4e7ec..ae91fbf 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -54,11 +54,12 @@ static struct vfsmount *balloon_mnt; struct virtio_balloon { struct virtio_device *vdev; - struct virtqueue *inflate_vq, *deflate_vq, *stats_vq; + struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *cmd_vq; /* The balloon servicing is delegated to a freezable workqueue. */ struct work_struct update_balloon_stats_work; struct work_struct update_balloon_size_work; + struct work_struct cmdq_handle_work; /* Prevent updating balloon when it is being canceled. */ spinlock_t stop_update_lock; @@ -90,6 +91,12 @@ struct virtio_balloon { /* Memory statistics */ struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR]; + /* Cmdq msg buffer for memory statistics */ + struct virtio_balloon_cmdq_hdr cmdq_stats_hdr; + + /* Cmdq msg buffer for reporting ununsed pages */ + struct virtio_balloon_cmdq_hdr cmdq_unused_page_hdr; + /* To register callback in oom notifier call chain */ struct notifier_block nb; }; @@ -485,25 +492,214 @@ static void update_balloon_size_func(struct work_struct *work) queue_work(system_freezable_wq, work); } +static unsigned int cmdq_hdr_add(struct virtqueue *vq, + struct virtio_balloon_cmdq_hdr *hdr, + bool in) +{ + unsigned int id = VIRTQUEUE_DESC_ID_INIT; + uint64_t hdr_pa = (uint64_t)virt_to_phys((void *)hdr); + + virtqueue_add_chain_desc(vq, hdr_pa, sizeof(*hdr), &id, &id, in); + + /* Deliver the hdr for the host to send commands. */ + if (in) { + hdr->flags = 0; + virtqueue_add_chain(vq, id, 0, NULL, hdr, NULL); + virtqueue_kick(vq); + } + + return id; +} + +static void cmdq_add_chain_desc(struct virtio_balloon *vb, + struct virtio_balloon_cmdq_hdr *hdr, + uint64_t addr, + uint32_t len, + unsigned int *head_id, + unsigned int *prev_id) +{ +retry: + if (*head_id == VIRTQUEUE_DESC_ID_INIT) { + *head_id = cmdq_hdr_add(vb->cmd_vq, hdr, 0); + *prev_id = *head_id; + } + + virtqueue_add_chain_desc(vb->cmd_vq, addr, len, head_id, prev_id, 0); + if (*head_id == *prev_id) { + /* + * The VQ was full and kicked to release some descs. Now we + * will re-start to build the chain by using the hdr as the + * first desc, so we need to detach the desc that was just + * added, and re-start to add the hdr. + */ + virtqueue_detach_buf(vb->cmd_vq, *head_id, NULL); + *head_id = VIRTQUEUE_DESC_ID_INIT; + *prev_id = VIRTQUEUE_DESC_ID_INIT; + goto retry; + } +} + +static void cmdq_handle_stats(struct virtio_balloon *vb) +{ + unsigned int num_stats, + head_id = VIRTQUEUE_DESC_ID_INIT, + prev_id = VIRTQUEUE_DESC_ID_INIT; + uint64_t addr = (uint64_t)virt_to_phys((void *)vb->stats); + uint32_t len; + + spin_lock(&vb->stop_update_lock); + if (!vb->stop_update) { + num_stats = update_balloon_stats(vb); + len = sizeof(struct virtio_balloon_stat) * num_stats; + cmdq_add_chain_desc(vb, &vb->cmdq_stats_hdr, addr, len, + &head_id, &prev_id); + virtqueue_add_chain(vb->cmd_vq, head_id, 0, NULL, vb, NULL); + virtqueue_kick_sync(vb->cmd_vq); + } + spin_unlock(&vb->stop_update_lock); +} + +static void cmdq_add_unused_page(struct virtio_balloon *vb, + struct zone *zone, + unsigned int order, + unsigned int type, + struct page *page, + unsigned int *head_id, + unsigned int *prev_id) +{ + uint64_t addr; + uint32_t len; + + while (!report_unused_page_block(zone, order, type, &page)) { + addr = (u64)page_to_pfn(page) << VIRTIO_BALLOON_PFN_SHIFT; + len = (u64)(1 << order) << VIRTIO_BALLOON_PFN_SHIFT; + cmdq_add_chain_desc(vb, &vb->cmdq_unused_page_hdr, addr, len, + head_id, prev_id); + } +} + +static void cmdq_handle_unused_pages(struct virtio_balloon *vb) +{ + struct virtqueue *vq = vb->cmd_vq; + unsigned int order = 0, type = 0, + head_id = VIRTQUEUE_DESC_ID_INIT, + prev_id = VIRTQUEUE_DESC_ID_INIT; + struct zone *zone = NULL; + struct page *page = NULL; + + for_each_populated_zone(zone) + for_each_migratetype_order(order, type) + cmdq_add_unused_page(vb, zone, order, type, page, + &head_id, &prev_id); + + /* Set the cmd completion flag. */ + vb->cmdq_unused_page_hdr.flags |= + cpu_to_le32(VIRTIO_BALLOON_CMDQ_F_COMPLETION); + virtqueue_add_chain(vq, head_id, 0, NULL, vb, NULL); + virtqueue_kick_sync(vb->cmd_vq); +} + +static void cmdq_handle(struct virtio_balloon *vb) +{ + struct virtio_balloon_cmdq_hdr *hdr; + unsigned int len; + + while ((hdr = (struct virtio_balloon_cmdq_hdr *) + virtqueue_get_buf(vb->cmd_vq, &len)) != NULL) { + switch (__le32_to_cpu(hdr->cmd)) { + case VIRTIO_BALLOON_CMDQ_REPORT_STATS: + cmdq_handle_stats(vb); + break; + case VIRTIO_BALLOON_CMDQ_REPORT_UNUSED_PAGES: + cmdq_handle_unused_pages(vb); + break; + default: + dev_warn(&vb->vdev->dev, "%s: wrong cmd\n", __func__); + return; + } + /* + * Replenish all the command buffer to the device after a + * command is handled. This is for the convenience of the + * device to rewind the cmdq to get back all the command + * buffer after live migration. + */ + cmdq_hdr_add(vb->cmd_vq, &vb->cmdq_stats_hdr, 1); + cmdq_hdr_add(vb->cmd_vq, &vb->cmdq_unused_page_hdr, 1); + } +} + +static void cmdq_handle_work_func(struct work_struct *work) +{ + struct virtio_balloon *vb; + + vb = container_of(work, struct virtio_balloon, + cmdq_handle_work); + cmdq_handle(vb); +} + +static void cmdq_callback(struct virtqueue *vq) +{ + struct virtio_balloon *vb = vq->vdev->priv; + + queue_work(system_freezable_wq, &vb->cmdq_handle_work); +} + static int init_vqs(struct virtio_balloon *vb) { - struct virtqueue *vqs[3]; - vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request }; - static const char * const names[] = { "inflate", "deflate", "stats" }; - int err, nvqs; + struct virtqueue **vqs; + vq_callback_t **callbacks; + const char **names; + int err = -ENOMEM; + int nvqs; + + /* Inflateq and deflateq are used unconditionally */ + nvqs = 2; + + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_CMD_VQ) || + virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) + nvqs++; + + /* Allocate space for find_vqs parameters */ + vqs = kcalloc(nvqs, sizeof(*vqs), GFP_KERNEL); + if (!vqs) + goto err_vq; + callbacks = kmalloc_array(nvqs, sizeof(*callbacks), GFP_KERNEL); + if (!callbacks) + goto err_callback; + names = kmalloc_array(nvqs, sizeof(*names), GFP_KERNEL); + if (!names) + goto err_names; + + callbacks[0] = balloon_ack; + names[0] = "inflate"; + callbacks[1] = balloon_ack; + names[1] = "deflate"; /* - * We expect two virtqueues: inflate and deflate, and - * optionally stat. + * The stats_vq is used only when cmdq is not supported (or disabled) + * by the device. */ - nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2; - err = virtio_find_vqs(vb->vdev, nvqs, vqs, callbacks, names, NULL); - if (err) - return err; + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_CMD_VQ)) { + callbacks[2] = cmdq_callback; + names[2] = "cmdq"; + } else if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) { + callbacks[2] = stats_request; + names[2] = "stats"; + } + err = vb->vdev->config->find_vqs(vb->vdev, nvqs, vqs, callbacks, + names, NULL, NULL); + if (err) + goto err_find; vb->inflate_vq = vqs[0]; vb->deflate_vq = vqs[1]; - if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) { + + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_CMD_VQ)) { + vb->cmd_vq = vqs[2]; + /* Prime the cmdq with the header buffer. */ + cmdq_hdr_add(vb->cmd_vq, &vb->cmdq_stats_hdr, 1); + cmdq_hdr_add(vb->cmd_vq, &vb->cmdq_unused_page_hdr, 1); + } else if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) { struct scatterlist sg; unsigned int num_stats; vb->stats_vq = vqs[2]; @@ -520,6 +716,16 @@ static int init_vqs(struct virtio_balloon *vb) BUG(); virtqueue_kick(vb->stats_vq); } + +err_find: + kfree(names); +err_names: + kfree(callbacks); +err_callback: + kfree(vqs); +err_vq: + return err; + return 0; } @@ -640,7 +846,18 @@ static int virtballoon_probe(struct virtio_device *vdev) goto out; } - INIT_WORK(&vb->update_balloon_stats_work, update_balloon_stats_func); + if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_CMD_VQ)) { + vb->cmdq_stats_hdr.cmd = + cpu_to_le32(VIRTIO_BALLOON_CMDQ_REPORT_STATS); + vb->cmdq_stats_hdr.flags = 0; + vb->cmdq_unused_page_hdr.cmd = + cpu_to_le32(VIRTIO_BALLOON_CMDQ_REPORT_UNUSED_PAGES); + vb->cmdq_unused_page_hdr.flags = 0; + INIT_WORK(&vb->cmdq_handle_work, cmdq_handle_work_func); + } else if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) { + INIT_WORK(&vb->update_balloon_stats_work, + update_balloon_stats_func); + } INIT_WORK(&vb->update_balloon_size_work, update_balloon_size_func); spin_lock_init(&vb->stop_update_lock); vb->stop_update = false; @@ -722,6 +939,7 @@ static void virtballoon_remove(struct virtio_device *vdev) spin_unlock_irq(&vb->stop_update_lock); cancel_work_sync(&vb->update_balloon_size_work); cancel_work_sync(&vb->update_balloon_stats_work); + cancel_work_sync(&vb->cmdq_handle_work); xb_empty(&vb->page_xb); remove_common(vb); @@ -776,6 +994,7 @@ static unsigned int features[] = { VIRTIO_BALLOON_F_STATS_VQ, VIRTIO_BALLOON_F_DEFLATE_ON_OOM, VIRTIO_BALLOON_F_SG, + VIRTIO_BALLOON_F_CMD_VQ, }; static struct virtio_driver virtio_balloon_driver = { diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index b9d7e10..793de12 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -52,8 +52,13 @@ "%s:"fmt, (_vq)->vq.name, ##args); \ (_vq)->broken = true; \ } while (0) -#define START_USE(vq) -#define END_USE(vq) +#define START_USE(_vq) \ + do { \ + while ((_vq)->in_use) \ + cpu_relax(); \ + (_vq)->in_use = __LINE__; \ + } while (0) +#define END_USE(_vq) ((_vq)->in_use = 0) #endif struct vring_desc_state { @@ -101,9 +106,9 @@ struct vring_virtqueue { size_t queue_size_in_bytes; dma_addr_t queue_dma_addr; -#ifdef DEBUG /* They're supposed to lock for us. */ unsigned int in_use; +#ifdef DEBUG /* Figure out if their kicks are too delayed. */ bool last_add_time_valid; @@ -845,6 +850,18 @@ static void detach_buf(struct vring_virtqueue *vq, unsigned int head, } } +void virtqueue_detach_buf(struct virtqueue *_vq, unsigned int head, void **ctx) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + + START_USE(vq); + + detach_buf(vq, head, ctx); + + END_USE(vq); +} +EXPORT_SYMBOL_GPL(virtqueue_detach_buf); + static inline bool more_used(const struct vring_virtqueue *vq) { return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev, vq->vring.used->idx); @@ -1158,8 +1175,8 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, vq->avail_idx_shadow = 0; vq->num_added = 0; list_add_tail(&vq->vq.list, &vdev->vqs); + vq->in_use = 0; #ifdef DEBUG - vq->in_use = false; vq->last_add_time_valid = false; #endif diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 9f27101..9df480b 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -88,6 +88,8 @@ void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len); void *virtqueue_get_buf_ctx(struct virtqueue *vq, unsigned int *len, void **ctx); +void virtqueue_detach_buf(struct virtqueue *_vq, unsigned int head, void **ctx); + void virtqueue_disable_cb(struct virtqueue *vq); bool virtqueue_enable_cb(struct virtqueue *vq); diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h index 37780a7..b38c370 100644 --- a/include/uapi/linux/virtio_balloon.h +++ b/include/uapi/linux/virtio_balloon.h @@ -35,6 +35,7 @@ #define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */ #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM 2 /* Deflate balloon on OOM */ #define VIRTIO_BALLOON_F_SG 3 /* Use sg instead of PFN lists */ +#define VIRTIO_BALLOON_F_CMD_VQ 4 /* Command virtqueue */ /* Size of a PFN in the balloon interface. */ #define VIRTIO_BALLOON_PFN_SHIFT 12 @@ -83,4 +84,13 @@ struct virtio_balloon_stat { __virtio64 val; } __attribute__((packed)); +struct virtio_balloon_cmdq_hdr { +#define VIRTIO_BALLOON_CMDQ_REPORT_STATS 0 +#define VIRTIO_BALLOON_CMDQ_REPORT_UNUSED_PAGES 1 + __le32 cmd; +/* Flag to indicate the completion of handling a command */ +#define VIRTIO_BALLOON_CMDQ_F_COMPLETION 1 + __le32 flags; +}; + #endif /* _LINUX_VIRTIO_BALLOON_H */