Message ID | 20240529224252.80395-1-dongwon.kim@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | ui/gtk: Wait until the current guest frame is rendered before switching to RUN_STATE_SAVE_VM | expand |
Hi On Thu, May 30, 2024 at 2:44 AM <dongwon.kim@intel.com> wrote: > From: Dongwon <dongwon.kim@intel.com> > > Make sure rendering of the current frame is finished before switching > the run state to RUN_STATE_SAVE_VM by waiting for egl-sync object to be > signaled. > Can you expand on what this solves? > > Cc: Marc-André Lureau <marcandre.lureau@redhat.com> > Cc: Vivek Kasireddy <vivek.kasireddy@intel.com> > Signed-off-by: Dongwon Kim <dongwon.kim@intel.com> > --- > ui/egl-helpers.c | 2 -- > ui/gtk.c | 19 +++++++++++++++++++ > 2 files changed, 19 insertions(+), 2 deletions(-) > > diff --git a/ui/egl-helpers.c b/ui/egl-helpers.c > index 99b2ebbe23..dafeb36074 100644 > --- a/ui/egl-helpers.c > +++ b/ui/egl-helpers.c > @@ -396,8 +396,6 @@ void egl_dmabuf_create_fence(QemuDmaBuf *dmabuf) > fence_fd = eglDupNativeFenceFDANDROID(qemu_egl_display, > sync); > qemu_dmabuf_set_fence_fd(dmabuf, fence_fd); > - eglDestroySyncKHR(qemu_egl_display, sync); > - qemu_dmabuf_set_sync(dmabuf, NULL); > If this function is called multiple times, it will now set a new fence_fd each time, and potentially leak older fd. Maybe it could first check if a fence_fd exists instead. } > } > > diff --git a/ui/gtk.c b/ui/gtk.c > index 93b13b7a30..cf0dd6abed 100644 > --- a/ui/gtk.c > +++ b/ui/gtk.c > @@ -600,9 +600,12 @@ void gd_hw_gl_flushed(void *vcon) > > fence_fd = qemu_dmabuf_get_fence_fd(dmabuf); > if (fence_fd >= 0) { > + void *sync = qemu_dmabuf_get_sync(dmabuf); > qemu_set_fd_handler(fence_fd, NULL, NULL, NULL); > close(fence_fd); > qemu_dmabuf_set_fence_fd(dmabuf, -1); > + eglDestroySyncKHR(qemu_egl_display, sync); > + qemu_dmabuf_set_sync(dmabuf, NULL); > graphic_hw_gl_block(vc->gfx.dcl.con, false); > } > } > @@ -682,6 +685,22 @@ static const DisplayGLCtxOps egl_ctx_ops = { > static void gd_change_runstate(void *opaque, bool running, RunState state) > { > GtkDisplayState *s = opaque; > + QemuDmaBuf *dmabuf; > + int i; > + > + if (state == RUN_STATE_SAVE_VM) { > + for (i = 0; i < s->nb_vcs; i++) { > + VirtualConsole *vc = &s->vc[i]; > + dmabuf = vc->gfx.guest_fb.dmabuf; > + if (dmabuf && qemu_dmabuf_get_fence_fd(dmabuf) >= 0) { > + /* wait for the rendering to be completed */ > + eglClientWaitSync(qemu_egl_display, > + qemu_dmabuf_get_sync(dmabuf), > + EGL_SYNC_FLUSH_COMMANDS_BIT_KHR, > + 1000000000); > I don't think adding waiting points in the migration path is appropriate. Perhaps once you explain the actual issue, it will be easier to help. + } > + } > + } > > gd_update_caption(s); > } > -- > 2.34.1 > > >
On 6/4/2024 4:12 AM, Marc-André Lureau wrote: > Hi > > On Thu, May 30, 2024 at 2:44 AM <dongwon.kim@intel.com > <mailto:dongwon.kim@intel.com>> wrote: > > From: Dongwon <dongwon.kim@intel.com <mailto:dongwon.kim@intel.com>> > > Make sure rendering of the current frame is finished before switching > the run state to RUN_STATE_SAVE_VM by waiting for egl-sync object to be > signaled. > > > Can you expand on what this solves? In current scheme, guest waits for the fence to be signaled for each frame it submits before moving to the next frame. If the guest’s state is saved while it is still waiting for the fence, The guest will continue to wait for the fence that was signaled while ago when it is restored to the point. One way to prevent it is to get it finish the current frame before changing the state. > > > Cc: Marc-André Lureau <marcandre.lureau@redhat.com > <mailto:marcandre.lureau@redhat.com>> > Cc: Vivek Kasireddy <vivek.kasireddy@intel.com > <mailto:vivek.kasireddy@intel.com>> > Signed-off-by: Dongwon Kim <dongwon.kim@intel.com > <mailto:dongwon.kim@intel.com>> > --- > ui/egl-helpers.c | 2 -- > ui/gtk.c | 19 +++++++++++++++++++ > 2 files changed, 19 insertions(+), 2 deletions(-) > > diff --git a/ui/egl-helpers.c b/ui/egl-helpers.c > index 99b2ebbe23..dafeb36074 100644 > --- a/ui/egl-helpers.c > +++ b/ui/egl-helpers.c > @@ -396,8 +396,6 @@ void egl_dmabuf_create_fence(QemuDmaBuf *dmabuf) > fence_fd = eglDupNativeFenceFDANDROID(qemu_egl_display, > sync); > qemu_dmabuf_set_fence_fd(dmabuf, fence_fd); > - eglDestroySyncKHR(qemu_egl_display, sync); > - qemu_dmabuf_set_sync(dmabuf, NULL); > > > If this function is called multiple times, it will now set a new > fence_fd each time, and potentially leak older fd. Maybe it could first > check if a fence_fd exists instead. We can make that change. > > } > } > > diff --git a/ui/gtk.c b/ui/gtk.c > index 93b13b7a30..cf0dd6abed 100644 > --- a/ui/gtk.c > +++ b/ui/gtk.c > @@ -600,9 +600,12 @@ void gd_hw_gl_flushed(void *vcon) > > fence_fd = qemu_dmabuf_get_fence_fd(dmabuf); > if (fence_fd >= 0) { > + void *sync = qemu_dmabuf_get_sync(dmabuf); > qemu_set_fd_handler(fence_fd, NULL, NULL, NULL); > close(fence_fd); > qemu_dmabuf_set_fence_fd(dmabuf, -1); > + eglDestroySyncKHR(qemu_egl_display, sync); > + qemu_dmabuf_set_sync(dmabuf, NULL); > graphic_hw_gl_block(vc->gfx.dcl.con, false); > } > } > @@ -682,6 +685,22 @@ static const DisplayGLCtxOps egl_ctx_ops = { > static void gd_change_runstate(void *opaque, bool running, > RunState state) > { > GtkDisplayState *s = opaque; > + QemuDmaBuf *dmabuf; > + int i; > + > + if (state == RUN_STATE_SAVE_VM) { > + for (i = 0; i < s->nb_vcs; i++) { > + VirtualConsole *vc = &s->vc[i]; > + dmabuf = vc->gfx.guest_fb.dmabuf; > + if (dmabuf && qemu_dmabuf_get_fence_fd(dmabuf) >= 0) { > + /* wait for the rendering to be completed */ > + eglClientWaitSync(qemu_egl_display, > + qemu_dmabuf_get_sync(dmabuf), > + EGL_SYNC_FLUSH_COMMANDS_BIT_KHR, > + 1000000000); > > > I don't think adding waiting points in the migration path is > appropriate. Perhaps once you explain the actual issue, it will be > easier to help. > > + } > + } > + } > > gd_update_caption(s); > } > -- > 2.34.1 > > > > > -- > Marc-André Lureau
Hi On Tue, Jun 4, 2024 at 9:49 PM Kim, Dongwon <dongwon.kim@intel.com> wrote: > On 6/4/2024 4:12 AM, Marc-André Lureau wrote: > > Hi > > > > On Thu, May 30, 2024 at 2:44 AM <dongwon.kim@intel.com > > <mailto:dongwon.kim@intel.com>> wrote: > > > > From: Dongwon <dongwon.kim@intel.com <mailto:dongwon.kim@intel.com>> > > > > Make sure rendering of the current frame is finished before switching > > the run state to RUN_STATE_SAVE_VM by waiting for egl-sync object to > be > > signaled. > > > > > > Can you expand on what this solves? > > In current scheme, guest waits for the fence to be signaled for each > frame it submits before moving to the next frame. If the guest’s state > is saved while it is still waiting for the fence, The guest will > continue to wait for the fence that was signaled while ago when it is > restored to the point. One way to prevent it is to get it finish the > current frame before changing the state. > After the UI sets a fence, hw_ops->gl_block(true) gets called, which will block virtio-gpu/virgl from processing commands (until the fence is signaled and gl_block/false called again). But this "blocking" state is not saved. So how does this affect save/restore? Please give more details, thanks > > > > > > Cc: Marc-André Lureau <marcandre.lureau@redhat.com > > <mailto:marcandre.lureau@redhat.com>> > > Cc: Vivek Kasireddy <vivek.kasireddy@intel.com > > <mailto:vivek.kasireddy@intel.com>> > > Signed-off-by: Dongwon Kim <dongwon.kim@intel.com > > <mailto:dongwon.kim@intel.com>> > > --- > > ui/egl-helpers.c | 2 -- > > ui/gtk.c | 19 +++++++++++++++++++ > > 2 files changed, 19 insertions(+), 2 deletions(-) > > > > diff --git a/ui/egl-helpers.c b/ui/egl-helpers.c > > index 99b2ebbe23..dafeb36074 100644 > > --- a/ui/egl-helpers.c > > +++ b/ui/egl-helpers.c > > @@ -396,8 +396,6 @@ void egl_dmabuf_create_fence(QemuDmaBuf *dmabuf) > > fence_fd = eglDupNativeFenceFDANDROID(qemu_egl_display, > > sync); > > qemu_dmabuf_set_fence_fd(dmabuf, fence_fd); > > - eglDestroySyncKHR(qemu_egl_display, sync); > > - qemu_dmabuf_set_sync(dmabuf, NULL); > > > > > > If this function is called multiple times, it will now set a new > > fence_fd each time, and potentially leak older fd. Maybe it could first > > check if a fence_fd exists instead. > > We can make that change. > > > > > } > > } > > > > diff --git a/ui/gtk.c b/ui/gtk.c > > index 93b13b7a30..cf0dd6abed 100644 > > --- a/ui/gtk.c > > +++ b/ui/gtk.c > > @@ -600,9 +600,12 @@ void gd_hw_gl_flushed(void *vcon) > > > > fence_fd = qemu_dmabuf_get_fence_fd(dmabuf); > > if (fence_fd >= 0) { > > + void *sync = qemu_dmabuf_get_sync(dmabuf); > > qemu_set_fd_handler(fence_fd, NULL, NULL, NULL); > > close(fence_fd); > > qemu_dmabuf_set_fence_fd(dmabuf, -1); > > + eglDestroySyncKHR(qemu_egl_display, sync); > > + qemu_dmabuf_set_sync(dmabuf, NULL); > > graphic_hw_gl_block(vc->gfx.dcl.con, false); > > } > > } > > @@ -682,6 +685,22 @@ static const DisplayGLCtxOps egl_ctx_ops = { > > static void gd_change_runstate(void *opaque, bool running, > > RunState state) > > { > > GtkDisplayState *s = opaque; > > + QemuDmaBuf *dmabuf; > > + int i; > > + > > + if (state == RUN_STATE_SAVE_VM) { > > + for (i = 0; i < s->nb_vcs; i++) { > > + VirtualConsole *vc = &s->vc[i]; > > + dmabuf = vc->gfx.guest_fb.dmabuf; > > + if (dmabuf && qemu_dmabuf_get_fence_fd(dmabuf) >= 0) { > > + /* wait for the rendering to be completed */ > > + eglClientWaitSync(qemu_egl_display, > > + qemu_dmabuf_get_sync(dmabuf), > > + EGL_SYNC_FLUSH_COMMANDS_BIT_KHR, > > + 1000000000); > > > > > > I don't think adding waiting points in the migration path is > > appropriate. Perhaps once you explain the actual issue, it will be > > easier to help. > > > > + } > > + } > > + } > > > > gd_update_caption(s); > > } > > -- > > 2.34.1 > > > > > > > > > > -- > > Marc-André Lureau > >
Hi, From: Marc-André Lureau <marcandre.lureau@gmail.com> Sent: Wednesday, June 5, 2024 12:56 AM To: Kim, Dongwon <dongwon.kim@intel.com> Cc: qemu-devel@nongnu.org; Peter Xu <peterx@redhat.com> Subject: Re: [PATCH] ui/gtk: Wait until the current guest frame is rendered before switching to RUN_STATE_SAVE_VM Hi On Tue, Jun 4, 2024 at 9:49 PM Kim, Dongwon <mailto:dongwon.kim@intel.com> wrote: On 6/4/2024 4:12 AM, Marc-André Lureau wrote: > Hi > > On Thu, May 30, 2024 at 2:44 AM <mailto:dongwon.kim@intel.com > <mailto:mailto:dongwon.kim@intel.com>> wrote: > > From: Dongwon <mailto:dongwon.kim@intel.com <mailto:mailto:dongwon.kim@intel.com>> > > Make sure rendering of the current frame is finished before switching > the run state to RUN_STATE_SAVE_VM by waiting for egl-sync object to be > signaled. > > > Can you expand on what this solves? In current scheme, guest waits for the fence to be signaled for each frame it submits before moving to the next frame. If the guest’s state is saved while it is still waiting for the fence, The guest will continue to wait for the fence that was signaled while ago when it is restored to the point. One way to prevent it is to get it finish the current frame before changing the state. After the UI sets a fence, hw_ops->gl_block(true) gets called, which will block virtio-gpu/virgl from processing commands (until the fence is signaled and gl_block/false called again). But this "blocking" state is not saved. So how does this affect save/restore? Please give more details, thanks Yeah sure. "Blocking" state is not saved but guest's state is saved while it was still waiting for the response for its last resource-flush virtio msg. This virtio response, by the way is set to be sent to the guest when the pipeline is unblocked (and when the fence is signaled.). Once the guest's state is saved, current instance of guest will be continued and receives the response as usual. The problem is happening when we restore the saved guest's state again because what guest does will be waiting for the response that was sent a while ago to the original instance. > > > Cc: Marc-André Lureau <mailto:marcandre.lureau@redhat.com > <mailto:mailto:marcandre.lureau@redhat.com>> > Cc: Vivek Kasireddy <mailto:vivek.kasireddy@intel.com > <mailto:mailto:vivek.kasireddy@intel.com>> > Signed-off-by: Dongwon Kim <mailto:dongwon.kim@intel.com > <mailto:mailto:dongwon.kim@intel.com>> > --- > ui/egl-helpers.c | 2 -- > ui/gtk.c | 19 +++++++++++++++++++ > 2 files changed, 19 insertions(+), 2 deletions(-) > > diff --git a/ui/egl-helpers.c b/ui/egl-helpers.c > index 99b2ebbe23..dafeb36074 100644 > --- a/ui/egl-helpers.c > +++ b/ui/egl-helpers.c > @@ -396,8 +396,6 @@ void egl_dmabuf_create_fence(QemuDmaBuf *dmabuf) > fence_fd = eglDupNativeFenceFDANDROID(qemu_egl_display, > sync); > qemu_dmabuf_set_fence_fd(dmabuf, fence_fd); > - eglDestroySyncKHR(qemu_egl_display, sync); > - qemu_dmabuf_set_sync(dmabuf, NULL); > > > If this function is called multiple times, it will now set a new > fence_fd each time, and potentially leak older fd. Maybe it could first > check if a fence_fd exists instead. We can make that change. > > } > } > > diff --git a/ui/gtk.c b/ui/gtk.c > index 93b13b7a30..cf0dd6abed 100644 > --- a/ui/gtk.c > +++ b/ui/gtk.c > @@ -600,9 +600,12 @@ void gd_hw_gl_flushed(void *vcon) > > fence_fd = qemu_dmabuf_get_fence_fd(dmabuf); > if (fence_fd >= 0) { > + void *sync = qemu_dmabuf_get_sync(dmabuf); > qemu_set_fd_handler(fence_fd, NULL, NULL, NULL); > close(fence_fd); > qemu_dmabuf_set_fence_fd(dmabuf, -1); > + eglDestroySyncKHR(qemu_egl_display, sync); > + qemu_dmabuf_set_sync(dmabuf, NULL); > graphic_hw_gl_block(vc->gfx.dcl.con, false); > } > } > @@ -682,6 +685,22 @@ static const DisplayGLCtxOps egl_ctx_ops = { > static void gd_change_runstate(void *opaque, bool running, > RunState state) > { > GtkDisplayState *s = opaque; > + QemuDmaBuf *dmabuf; > + int i; > + > + if (state == RUN_STATE_SAVE_VM) { > + for (i = 0; i < s->nb_vcs; i++) { > + VirtualConsole *vc = &s->vc[i]; > + dmabuf = vc->gfx.guest_fb.dmabuf; > + if (dmabuf && qemu_dmabuf_get_fence_fd(dmabuf) >= 0) { > + /* wait for the rendering to be completed */ > + eglClientWaitSync(qemu_egl_display, > + qemu_dmabuf_get_sync(dmabuf), > + EGL_SYNC_FLUSH_COMMANDS_BIT_KHR, > + 1000000000); > > > I don't think adding waiting points in the migration path is > appropriate. Perhaps once you explain the actual issue, it will be > easier to help. > > + } > + } > + } > > gd_update_caption(s); > } > -- > 2.34.1 > > > > > -- > Marc-André Lureau
Hi On Wed, Jun 12, 2024 at 5:29 AM Kim, Dongwon <dongwon.kim@intel.com> wrote: > Hi, > > From: Marc-André Lureau <marcandre.lureau@gmail.com> > Sent: Wednesday, June 5, 2024 12:56 AM > To: Kim, Dongwon <dongwon.kim@intel.com> > Cc: qemu-devel@nongnu.org; Peter Xu <peterx@redhat.com> > Subject: Re: [PATCH] ui/gtk: Wait until the current guest frame is > rendered before switching to RUN_STATE_SAVE_VM > > Hi > > On Tue, Jun 4, 2024 at 9:49 PM Kim, Dongwon <mailto:dongwon.kim@intel.com> > wrote: > On 6/4/2024 4:12 AM, Marc-André Lureau wrote: > > Hi > > > > On Thu, May 30, 2024 at 2:44 AM <mailto:dongwon.kim@intel.com > > <mailto:mailto:dongwon.kim@intel.com>> wrote: > > > > From: Dongwon <mailto:dongwon.kim@intel.com <mailto:mailto: > dongwon.kim@intel.com>> > > > > Make sure rendering of the current frame is finished before switching > > the run state to RUN_STATE_SAVE_VM by waiting for egl-sync object to > be > > signaled. > > > > > > Can you expand on what this solves? > > In current scheme, guest waits for the fence to be signaled for each > frame it submits before moving to the next frame. If the guest’s state > is saved while it is still waiting for the fence, The guest will > continue to wait for the fence that was signaled while ago when it is > restored to the point. One way to prevent it is to get it finish the > current frame before changing the state. > > After the UI sets a fence, hw_ops->gl_block(true) gets called, which will > block virtio-gpu/virgl from processing commands (until the fence is > signaled and gl_block/false called again). > > But this "blocking" state is not saved. So how does this affect > save/restore? Please give more details, thanks > > Yeah sure. "Blocking" state is not saved but guest's state is saved while > it was still waiting for the response for its last resource-flush virtio > msg. This virtio response, by the way is set to be sent to the guest when > the pipeline is unblocked (and when the fence is signaled.). Once the > guest's state is saved, current instance of guest will be continued and > receives the response as usual. The problem is happening when we restore > the saved guest's state again because what guest does will be waiting for > the response that was sent a while ago to the original instance. > Where is the pending response saved? Can you detail how you test this? thanks
On 6/11/2024 10:44 PM, Marc-André Lureau wrote: > Hi > > On Wed, Jun 12, 2024 at 5:29 AM Kim, Dongwon <dongwon.kim@intel.com > <mailto:dongwon.kim@intel.com>> wrote: > > Hi, > > From: Marc-André Lureau <marcandre.lureau@gmail.com > <mailto:marcandre.lureau@gmail.com>> > Sent: Wednesday, June 5, 2024 12:56 AM > To: Kim, Dongwon <dongwon.kim@intel.com <mailto:dongwon.kim@intel.com>> > Cc: qemu-devel@nongnu.org <mailto:qemu-devel@nongnu.org>; Peter Xu > <peterx@redhat.com <mailto:peterx@redhat.com>> > Subject: Re: [PATCH] ui/gtk: Wait until the current guest frame is > rendered before switching to RUN_STATE_SAVE_VM > > Hi > > On Tue, Jun 4, 2024 at 9:49 PM Kim, Dongwon > <mailto:dongwon.kim@intel.com <mailto:dongwon.kim@intel.com>> wrote: > On 6/4/2024 4:12 AM, Marc-André Lureau wrote: > > Hi > > > > On Thu, May 30, 2024 at 2:44 AM <mailto:dongwon.kim@intel.com > <mailto:dongwon.kim@intel.com> > > <mailto:mailto <mailto:mailto>:dongwon.kim@intel.com > <mailto:dongwon.kim@intel.com>>> wrote: > > > > From: Dongwon <mailto:dongwon.kim@intel.com > <mailto:dongwon.kim@intel.com> <mailto:mailto > <mailto:mailto>:dongwon.kim@intel.com <mailto:dongwon.kim@intel.com>>> > > > > Make sure rendering of the current frame is finished before > switching > > the run state to RUN_STATE_SAVE_VM by waiting for egl-sync > object to be > > signaled. > > > > > > Can you expand on what this solves? > > In current scheme, guest waits for the fence to be signaled for each > frame it submits before moving to the next frame. If the guest’s state > is saved while it is still waiting for the fence, The guest will > continue to wait for the fence that was signaled while ago when it is > restored to the point. One way to prevent it is to get it finish the > current frame before changing the state. > > After the UI sets a fence, hw_ops->gl_block(true) gets called, which > will block virtio-gpu/virgl from processing commands (until the > fence is signaled and gl_block/false called again). > > But this "blocking" state is not saved. So how does this affect > save/restore? Please give more details, thanks > > Yeah sure. "Blocking" state is not saved but guest's state is saved > while it was still waiting for the response for its last > resource-flush virtio msg. This virtio response, by the way is set > to be sent to the guest when the pipeline is unblocked (and when the > fence is signaled.). Once the guest's state is saved, current > instance of guest will be continued and receives the response as > usual. The problem is happening when we restore the saved guest's > state again because what guest does will be waiting for the response > that was sent a while ago to the original instance. > > > Where is the pending response saved? Can you detail how you test this? > There is no pending response for the guest's restored point, which is a problem. The response is sent out after saving is done. Normal cycle : resource-flush (scanout flush) -> gl block -> render -> gl unblock (after fence is signaled) -> pending response sent out to the guest -> guest (virtio-gpu drv) processes the next scanout frame -> (next cycle) resource-flush -> gl block ...... When vm state is saved in the middle : resource-flush (scanout-flush) -> gl block -> saving vm-state -> render -> gl unblock -> pending response (resp #1) sent out to the guest -> guest (virtio-gpu drv) processes the next scanout frame -> (next cycle) resource-flush -> gl block ...... Now, we restore the vm-state we saved vm-state is restored -> guest (virtio-gpu drv) can't move on as this state is still waiting for the response (resp #1) So we need to make sure vm-state is saved after the cycle is completed. This situation would be only happening if you use blob=true with virtio-gpu drv as KMS on the linux guest. Do you have any similar setup? > thanks > > -- > Marc-André Lureau
Hi On Wed, Jun 12, 2024 at 10:50 PM Kim, Dongwon <dongwon.kim@intel.com> wrote: > On 6/11/2024 10:44 PM, Marc-André Lureau wrote: > > Hi > > > > On Wed, Jun 12, 2024 at 5:29 AM Kim, Dongwon <dongwon.kim@intel.com > > <mailto:dongwon.kim@intel.com>> wrote: > > > > Hi, > > > > From: Marc-André Lureau <marcandre.lureau@gmail.com > > <mailto:marcandre.lureau@gmail.com>> > > Sent: Wednesday, June 5, 2024 12:56 AM > > To: Kim, Dongwon <dongwon.kim@intel.com <mailto: > dongwon.kim@intel.com>> > > Cc: qemu-devel@nongnu.org <mailto:qemu-devel@nongnu.org>; Peter Xu > > <peterx@redhat.com <mailto:peterx@redhat.com>> > > Subject: Re: [PATCH] ui/gtk: Wait until the current guest frame is > > rendered before switching to RUN_STATE_SAVE_VM > > > > Hi > > > > On Tue, Jun 4, 2024 at 9:49 PM Kim, Dongwon > > <mailto:dongwon.kim@intel.com <mailto:dongwon.kim@intel.com>> wrote: > > On 6/4/2024 4:12 AM, Marc-André Lureau wrote: > > > Hi > > > > > > On Thu, May 30, 2024 at 2:44 AM <mailto:dongwon.kim@intel.com > > <mailto:dongwon.kim@intel.com> > > > <mailto:mailto <mailto:mailto>:dongwon.kim@intel.com > > <mailto:dongwon.kim@intel.com>>> wrote: > > > > > > From: Dongwon <mailto:dongwon.kim@intel.com > > <mailto:dongwon.kim@intel.com> <mailto:mailto > > <mailto:mailto>:dongwon.kim@intel.com <mailto:dongwon.kim@intel.com > >>> > > > > > > Make sure rendering of the current frame is finished before > > switching > > > the run state to RUN_STATE_SAVE_VM by waiting for egl-sync > > object to be > > > signaled. > > > > > > > > > Can you expand on what this solves? > > > > In current scheme, guest waits for the fence to be signaled for each > > frame it submits before moving to the next frame. If the guest’s > state > > is saved while it is still waiting for the fence, The guest will > > continue to wait for the fence that was signaled while ago when it > is > > restored to the point. One way to prevent it is to get it finish the > > current frame before changing the state. > > > > After the UI sets a fence, hw_ops->gl_block(true) gets called, which > > will block virtio-gpu/virgl from processing commands (until the > > fence is signaled and gl_block/false called again). > > > > But this "blocking" state is not saved. So how does this affect > > save/restore? Please give more details, thanks > > > > Yeah sure. "Blocking" state is not saved but guest's state is saved > > while it was still waiting for the response for its last > > resource-flush virtio msg. This virtio response, by the way is set > > to be sent to the guest when the pipeline is unblocked (and when the > > fence is signaled.). Once the guest's state is saved, current > > instance of guest will be continued and receives the response as > > usual. The problem is happening when we restore the saved guest's > > state again because what guest does will be waiting for the response > > that was sent a while ago to the original instance. > > > > > > Where is the pending response saved? Can you detail how you test this? > > > > There is no pending response for the guest's restored point, which is a > problem. The response is sent out after saving is done. > > Normal cycle : > > resource-flush (scanout flush) -> gl block -> render -> gl unblock > (after fence is signaled) -> pending response sent out to the guest -> > guest (virtio-gpu drv) processes the next scanout frame -> (next cycle) > resource-flush -> gl block ...... > > When vm state is saved in the middle : > > resource-flush (scanout-flush) -> gl block -> saving vm-state -> render > -> gl unblock -> pending response (resp #1) sent out to the guest -> > guest (virtio-gpu drv) processes the next scanout frame -> (next cycle) > resource-flush -> gl block ...... > > Now, we restore the vm-state we saved > > vm-state is restored -> guest (virtio-gpu drv) can't move on as this > state is still waiting for the response (resp #1) > Ok, so actually it's more of a device state issue than a UI/GTK. We end up not saving a state that reflects the guest state. My understanding is that the guest is waiting for a fence reply, and we don't save that. Imho, a better fix would be to either save the fenceq (but then, what else is missing to complete the operation on resume?), or have a wait to delay the migration until the fences are flushed. > So we need to make sure vm-state is saved after the cycle is completed. > > This situation would be only happening if you use blob=true with > virtio-gpu drv as KMS on the linux guest. Do you have any similar setup? > > No, further details to reproduce would help. Even better would be having some automated test.
Hi Marc-André, On 6/13/2024 6:16 AM, Marc-André Lureau wrote: > Hi > > On Wed, Jun 12, 2024 at 10:50 PM Kim, Dongwon <dongwon.kim@intel.com > <mailto:dongwon.kim@intel.com>> wrote: > > On 6/11/2024 10:44 PM, Marc-André Lureau wrote: > > Hi > > > > On Wed, Jun 12, 2024 at 5:29 AM Kim, Dongwon > <dongwon.kim@intel.com <mailto:dongwon.kim@intel.com> > > <mailto:dongwon.kim@intel.com <mailto:dongwon.kim@intel.com>>> wrote: > > > > Hi, > > > > From: Marc-André Lureau <marcandre.lureau@gmail.com > <mailto:marcandre.lureau@gmail.com> > > <mailto:marcandre.lureau@gmail.com > <mailto:marcandre.lureau@gmail.com>>> > > Sent: Wednesday, June 5, 2024 12:56 AM > > To: Kim, Dongwon <dongwon.kim@intel.com > <mailto:dongwon.kim@intel.com> <mailto:dongwon.kim@intel.com > <mailto:dongwon.kim@intel.com>>> > > Cc: qemu-devel@nongnu.org <mailto:qemu-devel@nongnu.org> > <mailto:qemu-devel@nongnu.org <mailto:qemu-devel@nongnu.org>>; Peter Xu > > <peterx@redhat.com <mailto:peterx@redhat.com> > <mailto:peterx@redhat.com <mailto:peterx@redhat.com>>> > > Subject: Re: [PATCH] ui/gtk: Wait until the current guest > frame is > > rendered before switching to RUN_STATE_SAVE_VM > > > > Hi > > > > On Tue, Jun 4, 2024 at 9:49 PM Kim, Dongwon > > <mailto:dongwon.kim@intel.com <mailto:dongwon.kim@intel.com> > <mailto:dongwon.kim@intel.com <mailto:dongwon.kim@intel.com>>> wrote: > > On 6/4/2024 4:12 AM, Marc-André Lureau wrote: > > > Hi > > > > > > On Thu, May 30, 2024 at 2:44 AM > <mailto:dongwon.kim@intel.com <mailto:dongwon.kim@intel.com> > > <mailto:dongwon.kim@intel.com <mailto:dongwon.kim@intel.com>> > > > <mailto:mailto <mailto:mailto> <mailto:mailto > <mailto:mailto>>:dongwon.kim@intel.com <mailto:dongwon.kim@intel.com> > > <mailto:dongwon.kim@intel.com > <mailto:dongwon.kim@intel.com>>>> wrote: > > > > > > From: Dongwon <mailto:dongwon.kim@intel.com > <mailto:dongwon.kim@intel.com> > > <mailto:dongwon.kim@intel.com <mailto:dongwon.kim@intel.com>> > <mailto:mailto <mailto:mailto> > > <mailto:mailto <mailto:mailto>>:dongwon.kim@intel.com > <mailto:dongwon.kim@intel.com> <mailto:dongwon.kim@intel.com > <mailto:dongwon.kim@intel.com>>>> > > > > > > Make sure rendering of the current frame is finished > before > > switching > > > the run state to RUN_STATE_SAVE_VM by waiting for egl-sync > > object to be > > > signaled. > > > > > > > > > Can you expand on what this solves? > > > > In current scheme, guest waits for the fence to be signaled > for each > > frame it submits before moving to the next frame. If the > guest’s state > > is saved while it is still waiting for the fence, The guest will > > continue to wait for the fence that was signaled while ago > when it is > > restored to the point. One way to prevent it is to get it > finish the > > current frame before changing the state. > > > > After the UI sets a fence, hw_ops->gl_block(true) gets > called, which > > will block virtio-gpu/virgl from processing commands (until the > > fence is signaled and gl_block/false called again). > > > > But this "blocking" state is not saved. So how does this affect > > save/restore? Please give more details, thanks > > > > Yeah sure. "Blocking" state is not saved but guest's state is > saved > > while it was still waiting for the response for its last > > resource-flush virtio msg. This virtio response, by the way > is set > > to be sent to the guest when the pipeline is unblocked (and > when the > > fence is signaled.). Once the guest's state is saved, current > > instance of guest will be continued and receives the response as > > usual. The problem is happening when we restore the saved guest's > > state again because what guest does will be waiting for the > response > > that was sent a while ago to the original instance. > > > > > > Where is the pending response saved? Can you detail how you test > this? > > > > There is no pending response for the guest's restored point, which is a > problem. The response is sent out after saving is done. > > Normal cycle : > > resource-flush (scanout flush) -> gl block -> render -> gl unblock > (after fence is signaled) -> pending response sent out to the guest -> > guest (virtio-gpu drv) processes the next scanout frame -> (next cycle) > resource-flush -> gl block ...... > > When vm state is saved in the middle : > > resource-flush (scanout-flush) -> gl block -> saving vm-state -> render > -> gl unblock -> pending response (resp #1) sent out to the guest -> > guest (virtio-gpu drv) processes the next scanout frame -> (next cycle) > resource-flush -> gl block ...... > > Now, we restore the vm-state we saved > > vm-state is restored -> guest (virtio-gpu drv) can't move on as this > state is still waiting for the response (resp #1) > > > Ok, so actually it's more of a device state issue than a UI/GTK. We end > up not saving a state that reflects the guest state. My understanding is > that the guest is waiting for a fence reply, and we don't save that. > Imho, a better fix would be to either save the fenceq (but then, what > else is missing to complete the operation on resume?), or have a wait to > delay the migration until the fences are flushed. The second method you are proposing here - 'have a wait'. I understand you mean delaying the start point of migration but don't you think the current patch is basically doing the similar thing? Assuming egl wait sync is what we need to use for a wait, do you have any suggestion where that should be called other than 'gd_change_runstate'? > > > So we need to make sure vm-state is saved after the cycle is completed. > > This situation would be only happening if you use blob=true with > virtio-gpu drv as KMS on the linux guest. Do you have any similar setup? > > > No, further details to reproduce would help. Even better would be having > some automated test. I will think about this. We use GPU shared via SRIOV as a GPU deviceand virtio-gpu as a display device on the guest. I think I need to find and test more general cases. > > > -- > Marc-André Lureau
Hi On Thu, Jun 13, 2024 at 9:27 PM Kim, Dongwon <dongwon.kim@intel.com> wrote: > Hi Marc-André, > > On 6/13/2024 6:16 AM, Marc-André Lureau wrote: > > Hi > > > > On Wed, Jun 12, 2024 at 10:50 PM Kim, Dongwon <dongwon.kim@intel.com > > <mailto:dongwon.kim@intel.com>> wrote: > > > > On 6/11/2024 10:44 PM, Marc-André Lureau wrote: > > > Hi > > > > > > On Wed, Jun 12, 2024 at 5:29 AM Kim, Dongwon > > <dongwon.kim@intel.com <mailto:dongwon.kim@intel.com> > > > <mailto:dongwon.kim@intel.com <mailto:dongwon.kim@intel.com>>> > wrote: > > > > > > Hi, > > > > > > From: Marc-André Lureau <marcandre.lureau@gmail.com > > <mailto:marcandre.lureau@gmail.com> > > > <mailto:marcandre.lureau@gmail.com > > <mailto:marcandre.lureau@gmail.com>>> > > > Sent: Wednesday, June 5, 2024 12:56 AM > > > To: Kim, Dongwon <dongwon.kim@intel.com > > <mailto:dongwon.kim@intel.com> <mailto:dongwon.kim@intel.com > > <mailto:dongwon.kim@intel.com>>> > > > Cc: qemu-devel@nongnu.org <mailto:qemu-devel@nongnu.org> > > <mailto:qemu-devel@nongnu.org <mailto:qemu-devel@nongnu.org>>; > Peter Xu > > > <peterx@redhat.com <mailto:peterx@redhat.com> > > <mailto:peterx@redhat.com <mailto:peterx@redhat.com>>> > > > Subject: Re: [PATCH] ui/gtk: Wait until the current guest > > frame is > > > rendered before switching to RUN_STATE_SAVE_VM > > > > > > Hi > > > > > > On Tue, Jun 4, 2024 at 9:49 PM Kim, Dongwon > > > <mailto:dongwon.kim@intel.com <mailto:dongwon.kim@intel.com> > > <mailto:dongwon.kim@intel.com <mailto:dongwon.kim@intel.com>>> > wrote: > > > On 6/4/2024 4:12 AM, Marc-André Lureau wrote: > > > > Hi > > > > > > > > On Thu, May 30, 2024 at 2:44 AM > > <mailto:dongwon.kim@intel.com <mailto:dongwon.kim@intel.com> > > > <mailto:dongwon.kim@intel.com <mailto:dongwon.kim@intel.com>> > > > > <mailto:mailto <mailto:mailto> <mailto:mailto > > <mailto:mailto>>:dongwon.kim@intel.com <mailto:dongwon.kim@intel.com > > > > > <mailto:dongwon.kim@intel.com > > <mailto:dongwon.kim@intel.com>>>> wrote: > > > > > > > > From: Dongwon <mailto:dongwon.kim@intel.com > > <mailto:dongwon.kim@intel.com> > > > <mailto:dongwon.kim@intel.com <mailto:dongwon.kim@intel.com>> > > <mailto:mailto <mailto:mailto> > > > <mailto:mailto <mailto:mailto>>:dongwon.kim@intel.com > > <mailto:dongwon.kim@intel.com> <mailto:dongwon.kim@intel.com > > <mailto:dongwon.kim@intel.com>>>> > > > > > > > > Make sure rendering of the current frame is finished > > before > > > switching > > > > the run state to RUN_STATE_SAVE_VM by waiting for > egl-sync > > > object to be > > > > signaled. > > > > > > > > > > > > Can you expand on what this solves? > > > > > > In current scheme, guest waits for the fence to be signaled > > for each > > > frame it submits before moving to the next frame. If the > > guest’s state > > > is saved while it is still waiting for the fence, The guest > will > > > continue to wait for the fence that was signaled while ago > > when it is > > > restored to the point. One way to prevent it is to get it > > finish the > > > current frame before changing the state. > > > > > > After the UI sets a fence, hw_ops->gl_block(true) gets > > called, which > > > will block virtio-gpu/virgl from processing commands (until > the > > > fence is signaled and gl_block/false called again). > > > > > > But this "blocking" state is not saved. So how does this > affect > > > save/restore? Please give more details, thanks > > > > > > Yeah sure. "Blocking" state is not saved but guest's state is > > saved > > > while it was still waiting for the response for its last > > > resource-flush virtio msg. This virtio response, by the way > > is set > > > to be sent to the guest when the pipeline is unblocked (and > > when the > > > fence is signaled.). Once the guest's state is saved, current > > > instance of guest will be continued and receives the response > as > > > usual. The problem is happening when we restore the saved > guest's > > > state again because what guest does will be waiting for the > > response > > > that was sent a while ago to the original instance. > > > > > > > > > Where is the pending response saved? Can you detail how you test > > this? > > > > > > > There is no pending response for the guest's restored point, which > is a > > problem. The response is sent out after saving is done. > > > > Normal cycle : > > > > resource-flush (scanout flush) -> gl block -> render -> gl unblock > > (after fence is signaled) -> pending response sent out to the guest > -> > > guest (virtio-gpu drv) processes the next scanout frame -> (next > cycle) > > resource-flush -> gl block ...... > > > > When vm state is saved in the middle : > > > > resource-flush (scanout-flush) -> gl block -> saving vm-state -> > render > > -> gl unblock -> pending response (resp #1) sent out to the guest -> > > guest (virtio-gpu drv) processes the next scanout frame -> (next > cycle) > > resource-flush -> gl block ...... > > > > Now, we restore the vm-state we saved > > > > vm-state is restored -> guest (virtio-gpu drv) can't move on as this > > state is still waiting for the response (resp #1) > > > > > > Ok, so actually it's more of a device state issue than a UI/GTK. We end > > up not saving a state that reflects the guest state. My understanding is > > that the guest is waiting for a fence reply, and we don't save that. > > Imho, a better fix would be to either save the fenceq (but then, what > > else is missing to complete the operation on resume?), or have a wait to > > delay the migration until the fences are flushed. > > The second method you are proposing here - 'have a wait'. I understand > you mean delaying the start point of migration but don't you think the > current patch is basically doing the similar thing? Assuming egl wait > sync is what we need to use for a wait, do you have any suggestion where > that should be called other than 'gd_change_runstate'? > It should be handled at virtio-gpu side. I am not sure if runstate handler or pre_save are the right place. Peter, what is the correct way to delay migration until the host finishes some work? (in this case we would need to wait for the rendering/UI, to signal pending fences) thanks
diff --git a/ui/egl-helpers.c b/ui/egl-helpers.c index 99b2ebbe23..dafeb36074 100644 --- a/ui/egl-helpers.c +++ b/ui/egl-helpers.c @@ -396,8 +396,6 @@ void egl_dmabuf_create_fence(QemuDmaBuf *dmabuf) fence_fd = eglDupNativeFenceFDANDROID(qemu_egl_display, sync); qemu_dmabuf_set_fence_fd(dmabuf, fence_fd); - eglDestroySyncKHR(qemu_egl_display, sync); - qemu_dmabuf_set_sync(dmabuf, NULL); } } diff --git a/ui/gtk.c b/ui/gtk.c index 93b13b7a30..cf0dd6abed 100644 --- a/ui/gtk.c +++ b/ui/gtk.c @@ -600,9 +600,12 @@ void gd_hw_gl_flushed(void *vcon) fence_fd = qemu_dmabuf_get_fence_fd(dmabuf); if (fence_fd >= 0) { + void *sync = qemu_dmabuf_get_sync(dmabuf); qemu_set_fd_handler(fence_fd, NULL, NULL, NULL); close(fence_fd); qemu_dmabuf_set_fence_fd(dmabuf, -1); + eglDestroySyncKHR(qemu_egl_display, sync); + qemu_dmabuf_set_sync(dmabuf, NULL); graphic_hw_gl_block(vc->gfx.dcl.con, false); } } @@ -682,6 +685,22 @@ static const DisplayGLCtxOps egl_ctx_ops = { static void gd_change_runstate(void *opaque, bool running, RunState state) { GtkDisplayState *s = opaque; + QemuDmaBuf *dmabuf; + int i; + + if (state == RUN_STATE_SAVE_VM) { + for (i = 0; i < s->nb_vcs; i++) { + VirtualConsole *vc = &s->vc[i]; + dmabuf = vc->gfx.guest_fb.dmabuf; + if (dmabuf && qemu_dmabuf_get_fence_fd(dmabuf) >= 0) { + /* wait for the rendering to be completed */ + eglClientWaitSync(qemu_egl_display, + qemu_dmabuf_get_sync(dmabuf), + EGL_SYNC_FLUSH_COMMANDS_BIT_KHR, + 1000000000); + } + } + } gd_update_caption(s); }