Message ID | 1502940416-42944-4-git-send-email-wei.w.wang@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Thu, Aug 17, 2017 at 11:26:54AM +0800, Wei Wang wrote: > Add a new feature, VIRTIO_BALLOON_F_SG, which enables the transfer > of balloon (i.e. inflated/deflated) pages using scatter-gather lists > to the host. > > The implementation of the previous virtio-balloon is not very > efficient, because the balloon pages are transferred to the > host one by one. Here is the breakdown of the time in percentage > spent on each step of the balloon inflating process (inflating > 7GB of an 8GB idle guest). > > 1) allocating pages (6.5%) > 2) sending PFNs to host (68.3%) > 3) address translation (6.1%) > 4) madvise (19%) > > It takes about 4126ms for the inflating process to complete. > The above profiling shows that the bottlenecks are stage 2) > and stage 4). > > This patch optimizes step 2) by transferring pages to the host in > sgs. An sg describes a chunk of guest physically continuous pages. > With this mechanism, step 4) can also be optimized by doing address > translation and madvise() in chunks rather than page by page. > > With this new feature, the above ballooning process takes ~541ms > resulting in an improvement of ~87%. > > TODO: optimize stage 1) by allocating/freeing a chunk of pages > instead of a single page each time. > > Signed-off-by: Wei Wang <wei.w.wang@intel.com> > Signed-off-by: Liang Li <liang.z.li@intel.com> > Suggested-by: Michael S. Tsirkin <mst@redhat.com> > --- > drivers/virtio/virtio_balloon.c | 157 ++++++++++++++++++++++++++++++++---- > include/uapi/linux/virtio_balloon.h | 1 + > 2 files changed, 141 insertions(+), 17 deletions(-) > > diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c > index f0b3a0b..72041b4 100644 > --- a/drivers/virtio/virtio_balloon.c > +++ b/drivers/virtio/virtio_balloon.c > @@ -32,6 +32,7 @@ > #include <linux/mm.h> > #include <linux/mount.h> > #include <linux/magic.h> > +#include <linux/xbitmap.h> > > /* > * Balloon device works in 4K page units. So each page is pointed to by > @@ -79,6 +80,9 @@ struct virtio_balloon { > /* Synchronize access/update to this struct virtio_balloon elements */ > struct mutex balloon_lock; > > + /* The xbitmap used to record ballooned pages */ > + struct xb page_xb; > + > /* The array of pfns we tell the Host about. */ > unsigned int num_pfns; > __virtio32 pfns[VIRTIO_BALLOON_ARRAY_PFNS_MAX]; > @@ -141,13 +145,98 @@ static void set_page_pfns(struct virtio_balloon *vb, > page_to_balloon_pfn(page) + i); > } > > +static int add_one_sg(struct virtqueue *vq, void *addr, uint32_t size) > +{ > + struct scatterlist sg; > + > + sg_init_one(&sg, addr, size); > + return virtqueue_add_inbuf(vq, &sg, 1, vq, GFP_KERNEL); > +} > + > +static void send_balloon_page_sg(struct virtio_balloon *vb, > + struct virtqueue *vq, > + void *addr, > + uint32_t size) > +{ > + unsigned int len; > + int ret; > + > + do { > + ret = add_one_sg(vq, addr, size); > + virtqueue_kick(vq); > + wait_event(vb->acked, virtqueue_get_buf(vq, &len)); > + /* > + * It is uncommon to see the vq is full, because the sg is sent > + * one by one and the device is able to handle it in time. But > + * if that happens, we go back to retry after an entry gets > + * released. > + */ Why send one by one though? Why not batch some s/gs and wait for all of them to be completed? If memory if fragmented, waiting every time is worse than what we have now (VIRTIO_BALLOON_ARRAY_PFNS_MAX at a time). > + } while (unlikely(ret == -ENOSPC)); > +} > + > +/* > + * Send balloon pages in sgs to host. The balloon pages are recorded in the > + * page xbitmap. Each bit in the bitmap corresponds to a page of PAGE_SIZE. > + * The page xbitmap is searched for continuous "1" bits, which correspond > + * to continuous pages, to chunk into sgs. > + * > + * @page_xb_start and @page_xb_end form the range of bits in the xbitmap that > + * need to be searched. > + */ > +static void tell_host_sgs(struct virtio_balloon *vb, > + struct virtqueue *vq, > + unsigned long page_xb_start, > + unsigned long page_xb_end) > +{ > + unsigned long sg_pfn_start, sg_pfn_end; > + void *sg_addr; > + uint32_t sg_len, sg_max_len = round_down(UINT_MAX, PAGE_SIZE); > + > + sg_pfn_start = page_xb_start; > + while (sg_pfn_start < page_xb_end) { > + sg_pfn_start = xb_find_next_bit(&vb->page_xb, sg_pfn_start, > + page_xb_end, 1); > + if (sg_pfn_start == page_xb_end + 1) > + break; > + sg_pfn_end = xb_find_next_bit(&vb->page_xb, sg_pfn_start + 1, > + page_xb_end, 0); > + sg_addr = (void *)pfn_to_kaddr(sg_pfn_start); > + sg_len = (sg_pfn_end - sg_pfn_start) << PAGE_SHIFT; > + while (sg_len > sg_max_len) { > + send_balloon_page_sg(vb, vq, sg_addr, sg_max_len); > + sg_addr += sg_max_len; > + sg_len -= sg_max_len; > + } > + send_balloon_page_sg(vb, vq, sg_addr, sg_len); > + xb_zero(&vb->page_xb, sg_pfn_start, sg_pfn_end); > + sg_pfn_start = sg_pfn_end + 1; > + } > +} > + > +static inline void xb_set_page(struct virtio_balloon *vb, > + struct page *page, > + unsigned long *pfn_min, > + unsigned long *pfn_max) > +{ > + unsigned long pfn = page_to_pfn(page); > + > + *pfn_min = min(pfn, *pfn_min); > + *pfn_max = max(pfn, *pfn_max); > + xb_preload(GFP_KERNEL); > + xb_set_bit(&vb->page_xb, pfn); > + xb_preload_end(); > +} > + > static unsigned fill_balloon(struct virtio_balloon *vb, size_t num) > { > struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info; > unsigned num_allocated_pages; > + bool use_sg = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_SG); > + unsigned long pfn_max = 0, pfn_min = ULONG_MAX; > > /* We can only do one array worth at a time. */ > - num = min(num, ARRAY_SIZE(vb->pfns)); > + if (!use_sg) > + num = min(num, ARRAY_SIZE(vb->pfns)); > > mutex_lock(&vb->balloon_lock); > for (vb->num_pfns = 0; vb->num_pfns < num; > @@ -162,7 +251,12 @@ static unsigned fill_balloon(struct virtio_balloon *vb, size_t num) > msleep(200); > break; > } > - set_page_pfns(vb, vb->pfns + vb->num_pfns, page); > + > + if (use_sg) > + xb_set_page(vb, page, &pfn_min, &pfn_max); > + else > + set_page_pfns(vb, vb->pfns + vb->num_pfns, page); > + > vb->num_pages += VIRTIO_BALLOON_PAGES_PER_PAGE; > if (!virtio_has_feature(vb->vdev, > VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) > @@ -171,8 +265,12 @@ static unsigned fill_balloon(struct virtio_balloon *vb, size_t num) > > num_allocated_pages = vb->num_pfns; > /* Did we get any? */ > - if (vb->num_pfns != 0) > - tell_host(vb, vb->inflate_vq); > + if (vb->num_pfns) { > + if (use_sg) > + tell_host_sgs(vb, vb->inflate_vq, pfn_min, pfn_max); > + else > + tell_host(vb, vb->inflate_vq); > + } > mutex_unlock(&vb->balloon_lock); > > return num_allocated_pages; > @@ -198,9 +296,12 @@ static unsigned leak_balloon(struct virtio_balloon *vb, size_t num) > struct page *page; > struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info; > LIST_HEAD(pages); > + bool use_sg = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_SG); > + unsigned long pfn_max = 0, pfn_min = ULONG_MAX; > > - /* We can only do one array worth at a time. */ > - num = min(num, ARRAY_SIZE(vb->pfns)); > + /* Traditionally, we can only do one array worth at a time. */ > + if (!use_sg) > + num = min(num, ARRAY_SIZE(vb->pfns)); > > mutex_lock(&vb->balloon_lock); > /* We can't release more pages than taken */ > @@ -210,7 +311,11 @@ static unsigned leak_balloon(struct virtio_balloon *vb, size_t num) > page = balloon_page_dequeue(vb_dev_info); > if (!page) > break; > - set_page_pfns(vb, vb->pfns + vb->num_pfns, page); > + if (use_sg) > + xb_set_page(vb, page, &pfn_min, &pfn_max); > + else > + set_page_pfns(vb, vb->pfns + vb->num_pfns, page); > + > list_add(&page->lru, &pages); > vb->num_pages -= VIRTIO_BALLOON_PAGES_PER_PAGE; > } > @@ -221,8 +326,12 @@ static unsigned leak_balloon(struct virtio_balloon *vb, size_t num) > * virtio_has_feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST); > * is true, we *have* to do it in this order > */ > - if (vb->num_pfns != 0) > - tell_host(vb, vb->deflate_vq); > + if (vb->num_pfns) { > + if (use_sg) > + tell_host_sgs(vb, vb->deflate_vq, pfn_min, pfn_max); > + else > + tell_host(vb, vb->deflate_vq); > + } > release_pages_balloon(vb, &pages); > mutex_unlock(&vb->balloon_lock); > return num_freed_pages; > @@ -441,6 +550,7 @@ static int init_vqs(struct virtio_balloon *vb) > } > > #ifdef CONFIG_BALLOON_COMPACTION > + > /* > * virtballoon_migratepage - perform the balloon page migration on behalf of > * a compation thread. (called under page lock) > @@ -464,6 +574,7 @@ static int virtballoon_migratepage(struct balloon_dev_info *vb_dev_info, > { > struct virtio_balloon *vb = container_of(vb_dev_info, > struct virtio_balloon, vb_dev_info); > + bool use_sg = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_SG); > unsigned long flags; > > /* > @@ -485,16 +596,24 @@ static int virtballoon_migratepage(struct balloon_dev_info *vb_dev_info, > vb_dev_info->isolated_pages--; > __count_vm_event(BALLOON_MIGRATE); > spin_unlock_irqrestore(&vb_dev_info->pages_lock, flags); > - vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE; > - set_page_pfns(vb, vb->pfns, newpage); > - tell_host(vb, vb->inflate_vq); > - > + if (use_sg) { > + send_balloon_page_sg(vb, vb->inflate_vq, page_address(newpage), > + PAGE_SIZE); > + } else { > + vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE; > + set_page_pfns(vb, vb->pfns, newpage); > + tell_host(vb, vb->inflate_vq); > + } > /* balloon's page migration 2nd step -- deflate "page" */ > balloon_page_delete(page); > - vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE; > - set_page_pfns(vb, vb->pfns, page); > - tell_host(vb, vb->deflate_vq); > - > + if (use_sg) { > + send_balloon_page_sg(vb, vb->deflate_vq, page_address(page), > + PAGE_SIZE); > + } else { > + vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE; > + set_page_pfns(vb, vb->pfns, page); > + tell_host(vb, vb->deflate_vq); > + } > mutex_unlock(&vb->balloon_lock); > > put_page(page); /* balloon reference */ > @@ -553,6 +672,9 @@ static int virtballoon_probe(struct virtio_device *vdev) > if (err) > goto out_free_vb; > > + if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_SG)) > + xb_init(&vb->page_xb); > + > vb->nb.notifier_call = virtballoon_oom_notify; > vb->nb.priority = VIRTBALLOON_OOM_NOTIFY_PRIORITY; > err = register_oom_notifier(&vb->nb); > @@ -669,6 +791,7 @@ static unsigned int features[] = { > VIRTIO_BALLOON_F_MUST_TELL_HOST, > VIRTIO_BALLOON_F_STATS_VQ, > VIRTIO_BALLOON_F_DEFLATE_ON_OOM, > + VIRTIO_BALLOON_F_SG, > }; > > static struct virtio_driver virtio_balloon_driver = { > diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h > index 343d7dd..37780a7 100644 > --- a/include/uapi/linux/virtio_balloon.h > +++ b/include/uapi/linux/virtio_balloon.h > @@ -34,6 +34,7 @@ > #define VIRTIO_BALLOON_F_MUST_TELL_HOST 0 /* Tell before reclaiming pages */ > #define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */ > #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM 2 /* Deflate balloon on OOM */ > +#define VIRTIO_BALLOON_F_SG 3 /* Use sg instead of PFN lists */ > > /* Size of a PFN in the balloon interface. */ > #define VIRTIO_BALLOON_PFN_SHIFT 12 > -- > 2.7.4
On 08/18/2017 10:22 AM, Michael S. Tsirkin wrote: > +static void send_balloon_page_sg(struct virtio_balloon *vb, > + struct virtqueue *vq, > + void *addr, > + uint32_t size) > +{ > + unsigned int len; > + int ret; > + > + do { > + ret = add_one_sg(vq, addr, size); > + virtqueue_kick(vq); > + wait_event(vb->acked, virtqueue_get_buf(vq, &len)); > + /* > + * It is uncommon to see the vq is full, because the sg is sent > + * one by one and the device is able to handle it in time. But > + * if that happens, we go back to retry after an entry gets > + * released. > + */ > Why send one by one though? Why not batch some s/gs and wait for all > of them to be completed? If memory if fragmented, waiting every time is > worse than what we have now (VIRTIO_BALLOON_ARRAY_PFNS_MAX at a time). > OK, I'll do batching in some fashion. Best, Wei
Hi Wei, [auto build test ERROR on linus/master] [also build test ERROR on v4.13-rc5 next-20170817] [if your patch is applied to the wrong git tree, please drop us a note to help improve the system] url: https://github.com/0day-ci/linux/commits/Wei-Wang/lib-xbitmap-Introduce-xbitmap/20170820-035516 config: xtensa-allmodconfig (attached as .config) compiler: xtensa-linux-gcc (GCC) 4.9.0 reproduce: wget https://raw.githubusercontent.com/01org/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # save the attached .config to linux build tree make.cross ARCH=xtensa All errors (new ones prefixed by >>): drivers/virtio/virtio_balloon.c: In function 'tell_host_sgs': >> drivers/virtio/virtio_balloon.c:203:3: error: implicit declaration of function 'pfn_to_kaddr' [-Werror=implicit-function-declaration] sg_addr = (void *)pfn_to_kaddr(sg_pfn_start); ^ cc1: some warnings being treated as errors vim +/pfn_to_kaddr +203 drivers/virtio/virtio_balloon.c 176 177 /* 178 * Send balloon pages in sgs to host. The balloon pages are recorded in the 179 * page xbitmap. Each bit in the bitmap corresponds to a page of PAGE_SIZE. 180 * The page xbitmap is searched for continuous "1" bits, which correspond 181 * to continuous pages, to chunk into sgs. 182 * 183 * @page_xb_start and @page_xb_end form the range of bits in the xbitmap that 184 * need to be searched. 185 */ 186 static void tell_host_sgs(struct virtio_balloon *vb, 187 struct virtqueue *vq, 188 unsigned long page_xb_start, 189 unsigned long page_xb_end) 190 { 191 unsigned long sg_pfn_start, sg_pfn_end; 192 void *sg_addr; 193 uint32_t sg_len, sg_max_len = round_down(UINT_MAX, PAGE_SIZE); 194 195 sg_pfn_start = page_xb_start; 196 while (sg_pfn_start < page_xb_end) { 197 sg_pfn_start = xb_find_next_bit(&vb->page_xb, sg_pfn_start, 198 page_xb_end, 1); 199 if (sg_pfn_start == page_xb_end + 1) 200 break; 201 sg_pfn_end = xb_find_next_bit(&vb->page_xb, sg_pfn_start + 1, 202 page_xb_end, 0); > 203 sg_addr = (void *)pfn_to_kaddr(sg_pfn_start); 204 sg_len = (sg_pfn_end - sg_pfn_start) << PAGE_SHIFT; 205 while (sg_len > sg_max_len) { 206 send_balloon_page_sg(vb, vq, sg_addr, sg_max_len); 207 sg_addr += sg_max_len; 208 sg_len -= sg_max_len; 209 } 210 send_balloon_page_sg(vb, vq, sg_addr, sg_len); 211 xb_zero(&vb->page_xb, sg_pfn_start, sg_pfn_end); 212 sg_pfn_start = sg_pfn_end + 1; 213 } 214 } 215 --- 0-DAY kernel test infrastructure Open Source Technology Center https://lists.01.org/pipermail/kbuild-all Intel Corporation
On Fri, Aug 18, 2017 at 03:39:27PM +0800, Wei Wang wrote: > On 08/18/2017 10:22 AM, Michael S. Tsirkin wrote: > > +static void send_balloon_page_sg(struct virtio_balloon *vb, > > + struct virtqueue *vq, > > + void *addr, > > + uint32_t size) > > +{ > > + unsigned int len; > > + int ret; > > + > > + do { > > + ret = add_one_sg(vq, addr, size); > > + virtqueue_kick(vq); > > + wait_event(vb->acked, virtqueue_get_buf(vq, &len)); > > + /* > > + * It is uncommon to see the vq is full, because the sg is sent > > + * one by one and the device is able to handle it in time. But > > + * if that happens, we go back to retry after an entry gets > > + * released. > > + */ > > Why send one by one though? Why not batch some s/gs and wait for all > > of them to be completed? If memory if fragmented, waiting every time is > > worse than what we have now (VIRTIO_BALLOON_ARRAY_PFNS_MAX at a time). > > > > OK, I'll do batching in some fashion. > > > Best, > Wei > > btw you need to address the build errors that kbot has found.
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index f0b3a0b..72041b4 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -32,6 +32,7 @@ #include <linux/mm.h> #include <linux/mount.h> #include <linux/magic.h> +#include <linux/xbitmap.h> /* * Balloon device works in 4K page units. So each page is pointed to by @@ -79,6 +80,9 @@ struct virtio_balloon { /* Synchronize access/update to this struct virtio_balloon elements */ struct mutex balloon_lock; + /* The xbitmap used to record ballooned pages */ + struct xb page_xb; + /* The array of pfns we tell the Host about. */ unsigned int num_pfns; __virtio32 pfns[VIRTIO_BALLOON_ARRAY_PFNS_MAX]; @@ -141,13 +145,98 @@ static void set_page_pfns(struct virtio_balloon *vb, page_to_balloon_pfn(page) + i); } +static int add_one_sg(struct virtqueue *vq, void *addr, uint32_t size) +{ + struct scatterlist sg; + + sg_init_one(&sg, addr, size); + return virtqueue_add_inbuf(vq, &sg, 1, vq, GFP_KERNEL); +} + +static void send_balloon_page_sg(struct virtio_balloon *vb, + struct virtqueue *vq, + void *addr, + uint32_t size) +{ + unsigned int len; + int ret; + + do { + ret = add_one_sg(vq, addr, size); + virtqueue_kick(vq); + wait_event(vb->acked, virtqueue_get_buf(vq, &len)); + /* + * It is uncommon to see the vq is full, because the sg is sent + * one by one and the device is able to handle it in time. But + * if that happens, we go back to retry after an entry gets + * released. + */ + } while (unlikely(ret == -ENOSPC)); +} + +/* + * Send balloon pages in sgs to host. The balloon pages are recorded in the + * page xbitmap. Each bit in the bitmap corresponds to a page of PAGE_SIZE. + * The page xbitmap is searched for continuous "1" bits, which correspond + * to continuous pages, to chunk into sgs. + * + * @page_xb_start and @page_xb_end form the range of bits in the xbitmap that + * need to be searched. + */ +static void tell_host_sgs(struct virtio_balloon *vb, + struct virtqueue *vq, + unsigned long page_xb_start, + unsigned long page_xb_end) +{ + unsigned long sg_pfn_start, sg_pfn_end; + void *sg_addr; + uint32_t sg_len, sg_max_len = round_down(UINT_MAX, PAGE_SIZE); + + sg_pfn_start = page_xb_start; + while (sg_pfn_start < page_xb_end) { + sg_pfn_start = xb_find_next_bit(&vb->page_xb, sg_pfn_start, + page_xb_end, 1); + if (sg_pfn_start == page_xb_end + 1) + break; + sg_pfn_end = xb_find_next_bit(&vb->page_xb, sg_pfn_start + 1, + page_xb_end, 0); + sg_addr = (void *)pfn_to_kaddr(sg_pfn_start); + sg_len = (sg_pfn_end - sg_pfn_start) << PAGE_SHIFT; + while (sg_len > sg_max_len) { + send_balloon_page_sg(vb, vq, sg_addr, sg_max_len); + sg_addr += sg_max_len; + sg_len -= sg_max_len; + } + send_balloon_page_sg(vb, vq, sg_addr, sg_len); + xb_zero(&vb->page_xb, sg_pfn_start, sg_pfn_end); + sg_pfn_start = sg_pfn_end + 1; + } +} + +static inline void xb_set_page(struct virtio_balloon *vb, + struct page *page, + unsigned long *pfn_min, + unsigned long *pfn_max) +{ + unsigned long pfn = page_to_pfn(page); + + *pfn_min = min(pfn, *pfn_min); + *pfn_max = max(pfn, *pfn_max); + xb_preload(GFP_KERNEL); + xb_set_bit(&vb->page_xb, pfn); + xb_preload_end(); +} + static unsigned fill_balloon(struct virtio_balloon *vb, size_t num) { struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info; unsigned num_allocated_pages; + bool use_sg = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_SG); + unsigned long pfn_max = 0, pfn_min = ULONG_MAX; /* We can only do one array worth at a time. */ - num = min(num, ARRAY_SIZE(vb->pfns)); + if (!use_sg) + num = min(num, ARRAY_SIZE(vb->pfns)); mutex_lock(&vb->balloon_lock); for (vb->num_pfns = 0; vb->num_pfns < num; @@ -162,7 +251,12 @@ static unsigned fill_balloon(struct virtio_balloon *vb, size_t num) msleep(200); break; } - set_page_pfns(vb, vb->pfns + vb->num_pfns, page); + + if (use_sg) + xb_set_page(vb, page, &pfn_min, &pfn_max); + else + set_page_pfns(vb, vb->pfns + vb->num_pfns, page); + vb->num_pages += VIRTIO_BALLOON_PAGES_PER_PAGE; if (!virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) @@ -171,8 +265,12 @@ static unsigned fill_balloon(struct virtio_balloon *vb, size_t num) num_allocated_pages = vb->num_pfns; /* Did we get any? */ - if (vb->num_pfns != 0) - tell_host(vb, vb->inflate_vq); + if (vb->num_pfns) { + if (use_sg) + tell_host_sgs(vb, vb->inflate_vq, pfn_min, pfn_max); + else + tell_host(vb, vb->inflate_vq); + } mutex_unlock(&vb->balloon_lock); return num_allocated_pages; @@ -198,9 +296,12 @@ static unsigned leak_balloon(struct virtio_balloon *vb, size_t num) struct page *page; struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info; LIST_HEAD(pages); + bool use_sg = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_SG); + unsigned long pfn_max = 0, pfn_min = ULONG_MAX; - /* We can only do one array worth at a time. */ - num = min(num, ARRAY_SIZE(vb->pfns)); + /* Traditionally, we can only do one array worth at a time. */ + if (!use_sg) + num = min(num, ARRAY_SIZE(vb->pfns)); mutex_lock(&vb->balloon_lock); /* We can't release more pages than taken */ @@ -210,7 +311,11 @@ static unsigned leak_balloon(struct virtio_balloon *vb, size_t num) page = balloon_page_dequeue(vb_dev_info); if (!page) break; - set_page_pfns(vb, vb->pfns + vb->num_pfns, page); + if (use_sg) + xb_set_page(vb, page, &pfn_min, &pfn_max); + else + set_page_pfns(vb, vb->pfns + vb->num_pfns, page); + list_add(&page->lru, &pages); vb->num_pages -= VIRTIO_BALLOON_PAGES_PER_PAGE; } @@ -221,8 +326,12 @@ static unsigned leak_balloon(struct virtio_balloon *vb, size_t num) * virtio_has_feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST); * is true, we *have* to do it in this order */ - if (vb->num_pfns != 0) - tell_host(vb, vb->deflate_vq); + if (vb->num_pfns) { + if (use_sg) + tell_host_sgs(vb, vb->deflate_vq, pfn_min, pfn_max); + else + tell_host(vb, vb->deflate_vq); + } release_pages_balloon(vb, &pages); mutex_unlock(&vb->balloon_lock); return num_freed_pages; @@ -441,6 +550,7 @@ static int init_vqs(struct virtio_balloon *vb) } #ifdef CONFIG_BALLOON_COMPACTION + /* * virtballoon_migratepage - perform the balloon page migration on behalf of * a compation thread. (called under page lock) @@ -464,6 +574,7 @@ static int virtballoon_migratepage(struct balloon_dev_info *vb_dev_info, { struct virtio_balloon *vb = container_of(vb_dev_info, struct virtio_balloon, vb_dev_info); + bool use_sg = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_SG); unsigned long flags; /* @@ -485,16 +596,24 @@ static int virtballoon_migratepage(struct balloon_dev_info *vb_dev_info, vb_dev_info->isolated_pages--; __count_vm_event(BALLOON_MIGRATE); spin_unlock_irqrestore(&vb_dev_info->pages_lock, flags); - vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE; - set_page_pfns(vb, vb->pfns, newpage); - tell_host(vb, vb->inflate_vq); - + if (use_sg) { + send_balloon_page_sg(vb, vb->inflate_vq, page_address(newpage), + PAGE_SIZE); + } else { + vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE; + set_page_pfns(vb, vb->pfns, newpage); + tell_host(vb, vb->inflate_vq); + } /* balloon's page migration 2nd step -- deflate "page" */ balloon_page_delete(page); - vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE; - set_page_pfns(vb, vb->pfns, page); - tell_host(vb, vb->deflate_vq); - + if (use_sg) { + send_balloon_page_sg(vb, vb->deflate_vq, page_address(page), + PAGE_SIZE); + } else { + vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE; + set_page_pfns(vb, vb->pfns, page); + tell_host(vb, vb->deflate_vq); + } mutex_unlock(&vb->balloon_lock); put_page(page); /* balloon reference */ @@ -553,6 +672,9 @@ static int virtballoon_probe(struct virtio_device *vdev) if (err) goto out_free_vb; + if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_SG)) + xb_init(&vb->page_xb); + vb->nb.notifier_call = virtballoon_oom_notify; vb->nb.priority = VIRTBALLOON_OOM_NOTIFY_PRIORITY; err = register_oom_notifier(&vb->nb); @@ -669,6 +791,7 @@ static unsigned int features[] = { VIRTIO_BALLOON_F_MUST_TELL_HOST, VIRTIO_BALLOON_F_STATS_VQ, VIRTIO_BALLOON_F_DEFLATE_ON_OOM, + VIRTIO_BALLOON_F_SG, }; static struct virtio_driver virtio_balloon_driver = { diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h index 343d7dd..37780a7 100644 --- a/include/uapi/linux/virtio_balloon.h +++ b/include/uapi/linux/virtio_balloon.h @@ -34,6 +34,7 @@ #define VIRTIO_BALLOON_F_MUST_TELL_HOST 0 /* Tell before reclaiming pages */ #define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */ #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM 2 /* Deflate balloon on OOM */ +#define VIRTIO_BALLOON_F_SG 3 /* Use sg instead of PFN lists */ /* Size of a PFN in the balloon interface. */ #define VIRTIO_BALLOON_PFN_SHIFT 12