diff mbox series

[QEMU,1/1] virtgpu: do not destroy resources when guest suspend

Message ID 20230608025655.1674357-2-Jiqian.Chen@amd.com (mailing list archive)
State New, archived
Headers show
Series | expand

Commit Message

Chen, Jiqian June 8, 2023, 2:56 a.m. UTC
After suspending and resuming guest VM, you will get
a black screen, and the display can't come back.

This is because when guest did suspending, it called
into qemu to call virtio_gpu_gl_reset. In function
virtio_gpu_gl_reset, it destroyed resources and reset
renderer, which were used for display. As a result,
guest's screen can't come back to the time when it was
suspended and only showed black.

So, this patch adds a new ctrl message
VIRTIO_GPU_CMD_STATUS_FREEZING to get notification from
guest. If guest is during suspending, it sets freezing
status of virtgpu to true, this will prevent destroying
resources and resetting renderer when guest calls into
virtio_gpu_gl_reset. If guest is during resuming, it sets
freezing to false, and then virtio_gpu_gl_reset will keep
its origin actions and has no other impaction.

Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com>
---
 hw/display/virtio-gpu-gl.c                  |  9 ++++++-
 hw/display/virtio-gpu-virgl.c               |  3 +++
 hw/display/virtio-gpu.c                     | 26 +++++++++++++++++++--
 include/hw/virtio/virtio-gpu.h              |  3 +++
 include/standard-headers/linux/virtio_gpu.h |  9 +++++++
 5 files changed, 47 insertions(+), 3 deletions(-)

Comments

Bob Beckett June 8, 2023, 4:41 p.m. UTC | #1
On 08/06/2023 03:56, Jiqian Chen wrote:
> After suspending and resuming guest VM, you will get
> a black screen, and the display can't come back.
>
> This is because when guest did suspending, it called
> into qemu to call virtio_gpu_gl_reset. In function
> virtio_gpu_gl_reset, it destroyed resources and reset
> renderer, which were used for display. As a result,
> guest's screen can't come back to the time when it was
> suspended and only showed black.
>
> So, this patch adds a new ctrl message
> VIRTIO_GPU_CMD_STATUS_FREEZING to get notification from
> guest. If guest is during suspending, it sets freezing
> status of virtgpu to true, this will prevent destroying
> resources and resetting renderer when guest calls into
> virtio_gpu_gl_reset. If guest is during resuming, it sets
> freezing to false, and then virtio_gpu_gl_reset will keep
> its origin actions and has no other impaction.
>
> Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com>
> ---
>   hw/display/virtio-gpu-gl.c                  |  9 ++++++-
>   hw/display/virtio-gpu-virgl.c               |  3 +++
>   hw/display/virtio-gpu.c                     | 26 +++++++++++++++++++--
>   include/hw/virtio/virtio-gpu.h              |  3 +++
>   include/standard-headers/linux/virtio_gpu.h |  9 +++++++
>   5 files changed, 47 insertions(+), 3 deletions(-)
>
> diff --git a/hw/display/virtio-gpu-gl.c b/hw/display/virtio-gpu-gl.c
> index e06be60dfb..e11ad233eb 100644
> --- a/hw/display/virtio-gpu-gl.c
> +++ b/hw/display/virtio-gpu-gl.c
> @@ -100,7 +100,14 @@ static void virtio_gpu_gl_reset(VirtIODevice *vdev)
>        */
>       if (gl->renderer_inited && !gl->renderer_reset) {
>           virtio_gpu_virgl_reset_scanout(g);
> -        gl->renderer_reset = true;
> +        /*
> +         * If guest is suspending, we shouldn't reset renderer,
> +         * otherwise, the display can't come back to the time when
> +         * it was suspended after guest resumed.
> +         */
> +        if (!g->freezing) {
> +            gl->renderer_reset = true;
> +        }
>       }
>   }
>   
> diff --git a/hw/display/virtio-gpu-virgl.c b/hw/display/virtio-gpu-virgl.c
> index 73cb92c8d5..183ec92d53 100644
> --- a/hw/display/virtio-gpu-virgl.c
> +++ b/hw/display/virtio-gpu-virgl.c
> @@ -464,6 +464,9 @@ void virtio_gpu_virgl_process_cmd(VirtIOGPU *g,
>       case VIRTIO_GPU_CMD_GET_EDID:
>           virtio_gpu_get_edid(g, cmd);
>           break;
> +    case VIRTIO_GPU_CMD_STATUS_FREEZING:
> +        virtio_gpu_cmd_status_freezing(g, cmd);
> +        break;
>       default:
>           cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
>           break;
> diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
> index 5e15c79b94..8f235d7848 100644
> --- a/hw/display/virtio-gpu.c
> +++ b/hw/display/virtio-gpu.c
> @@ -373,6 +373,16 @@ static void virtio_gpu_resource_create_blob(VirtIOGPU *g,
>       QTAILQ_INSERT_HEAD(&g->reslist, res, next);
>   }
>   
> +void virtio_gpu_cmd_status_freezing(VirtIOGPU *g,
> +                         struct virtio_gpu_ctrl_command *cmd)
> +{
> +    struct virtio_gpu_status_freezing sf;
> +
> +    VIRTIO_GPU_FILL_CMD(sf);
> +    virtio_gpu_bswap_32(&sf, sizeof(sf));
> +    g->freezing = sf.freezing;
> +}
> +
>   static void virtio_gpu_disable_scanout(VirtIOGPU *g, int scanout_id)
>   {
>       struct virtio_gpu_scanout *scanout = &g->parent_obj.scanout[scanout_id];
> @@ -986,6 +996,9 @@ void virtio_gpu_simple_process_cmd(VirtIOGPU *g,
>       case VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING:
>           virtio_gpu_resource_detach_backing(g, cmd);
>           break;
> +    case VIRTIO_GPU_CMD_STATUS_FREEZING:
> +        virtio_gpu_cmd_status_freezing(g, cmd);
> +        break;
>       default:
>           cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
>           break;
> @@ -1344,6 +1357,8 @@ void virtio_gpu_device_realize(DeviceState *qdev, Error **errp)
>       QTAILQ_INIT(&g->reslist);
>       QTAILQ_INIT(&g->cmdq);
>       QTAILQ_INIT(&g->fenceq);
> +
> +    g->freezing = false;
>   }
>   
>   void virtio_gpu_reset(VirtIODevice *vdev)
> @@ -1352,8 +1367,15 @@ void virtio_gpu_reset(VirtIODevice *vdev)
>       struct virtio_gpu_simple_resource *res, *tmp;
>       struct virtio_gpu_ctrl_command *cmd;
>   
> -    QTAILQ_FOREACH_SAFE(res, &g->reslist, next, tmp) {
> -        virtio_gpu_resource_destroy(g, res);
> +    /*
> +     * If guest is suspending, we shouldn't destroy resources,
> +     * otherwise, the display can't come back to the time when
> +     * it was suspended after guest resumed.
> +     */


What should happen if qemu is torn down while the guest is suspended.
Currently there is no other place where the resources will be destroyed. 
While it is likely viable to rely on process auto tear down of mem and 
files from the host cleanup point of view, it feels bad to rely on that.
Perhaps an inverted conditional with destroy loop in 
virtio_gpu_device_unrealize() would suffice?

I am not a qemu expert, but I am assuming that the unrealize call will 
be called during machine destruction if the guest is suspended.
Also if qemu supports (or intends to support in future) hotplugging of 
the device, the would help avoid leaks until qemu exit too.


> +    if (!g->freezing) {
> +        QTAILQ_FOREACH_SAFE(res, &g->reslist, next, tmp) {
> +            virtio_gpu_resource_destroy(g, res);
> +        }
>       }
>   
>       while (!QTAILQ_EMPTY(&g->cmdq)) {
> diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h
> index 2e28507efe..c21c2990fb 100644
> --- a/include/hw/virtio/virtio-gpu.h
> +++ b/include/hw/virtio/virtio-gpu.h
> @@ -173,6 +173,7 @@ struct VirtIOGPU {
>   
>       uint64_t hostmem;
>   
> +    bool freezing;
>       bool processing_cmdq;
>       QEMUTimer *fence_poll;
>       QEMUTimer *print_stats;
> @@ -284,5 +285,7 @@ void virtio_gpu_virgl_reset_scanout(VirtIOGPU *g);
>   void virtio_gpu_virgl_reset(VirtIOGPU *g);
>   int virtio_gpu_virgl_init(VirtIOGPU *g);
>   int virtio_gpu_virgl_get_num_capsets(VirtIOGPU *g);
> +void virtio_gpu_cmd_status_freezing(VirtIOGPU *g,
> +                         struct virtio_gpu_ctrl_command *cmd);
>   
>   #endif
> diff --git a/include/standard-headers/linux/virtio_gpu.h b/include/standard-headers/linux/virtio_gpu.h
> index 2da48d3d4c..aefffbd751 100644
> --- a/include/standard-headers/linux/virtio_gpu.h
> +++ b/include/standard-headers/linux/virtio_gpu.h
> @@ -116,6 +116,9 @@ enum virtio_gpu_ctrl_type {
>   	VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID,
>   	VIRTIO_GPU_RESP_ERR_INVALID_CONTEXT_ID,
>   	VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER,
> +
> +	/* status */
> +	VIRTIO_GPU_CMD_STATUS_FREEZING = 0x1300,
>   };
>   
>   enum virtio_gpu_shm_id {
> @@ -453,4 +456,10 @@ struct virtio_gpu_resource_unmap_blob {
>   	uint32_t padding;
>   };
>   
> +/* VIRTIO_GPU_CMD_STATUS_FREEZING */
> +struct virtio_gpu_status_freezing {
> +	struct virtio_gpu_ctrl_hdr hdr;
> +	__u32 freezing;
> +};
> +
>   #endif
Chen, Jiqian June 9, 2023, 3:43 a.m. UTC | #2
On 2023/6/9 00:41, Robert Beckett wrote:
> 
> On 08/06/2023 03:56, Jiqian Chen wrote:
>> After suspending and resuming guest VM, you will get
>> a black screen, and the display can't come back.
>>
>> This is because when guest did suspending, it called
>> into qemu to call virtio_gpu_gl_reset. In function
>> virtio_gpu_gl_reset, it destroyed resources and reset
>> renderer, which were used for display. As a result,
>> guest's screen can't come back to the time when it was
>> suspended and only showed black.
>>
>> So, this patch adds a new ctrl message
>> VIRTIO_GPU_CMD_STATUS_FREEZING to get notification from
>> guest. If guest is during suspending, it sets freezing
>> status of virtgpu to true, this will prevent destroying
>> resources and resetting renderer when guest calls into
>> virtio_gpu_gl_reset. If guest is during resuming, it sets
>> freezing to false, and then virtio_gpu_gl_reset will keep
>> its origin actions and has no other impaction.
>>
>> Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com>
>> ---
>>   hw/display/virtio-gpu-gl.c                  |  9 ++++++-
>>   hw/display/virtio-gpu-virgl.c               |  3 +++
>>   hw/display/virtio-gpu.c                     | 26 +++++++++++++++++++--
>>   include/hw/virtio/virtio-gpu.h              |  3 +++
>>   include/standard-headers/linux/virtio_gpu.h |  9 +++++++
>>   5 files changed, 47 insertions(+), 3 deletions(-)
>>
>> diff --git a/hw/display/virtio-gpu-gl.c b/hw/display/virtio-gpu-gl.c
>> index e06be60dfb..e11ad233eb 100644
>> --- a/hw/display/virtio-gpu-gl.c
>> +++ b/hw/display/virtio-gpu-gl.c
>> @@ -100,7 +100,14 @@ static void virtio_gpu_gl_reset(VirtIODevice *vdev)
>>        */
>>       if (gl->renderer_inited && !gl->renderer_reset) {
>>           virtio_gpu_virgl_reset_scanout(g);
>> -        gl->renderer_reset = true;
>> +        /*
>> +         * If guest is suspending, we shouldn't reset renderer,
>> +         * otherwise, the display can't come back to the time when
>> +         * it was suspended after guest resumed.
>> +         */
>> +        if (!g->freezing) {
>> +            gl->renderer_reset = true;
>> +        }
>>       }
>>   }
>>   diff --git a/hw/display/virtio-gpu-virgl.c b/hw/display/virtio-gpu-virgl.c
>> index 73cb92c8d5..183ec92d53 100644
>> --- a/hw/display/virtio-gpu-virgl.c
>> +++ b/hw/display/virtio-gpu-virgl.c
>> @@ -464,6 +464,9 @@ void virtio_gpu_virgl_process_cmd(VirtIOGPU *g,
>>       case VIRTIO_GPU_CMD_GET_EDID:
>>           virtio_gpu_get_edid(g, cmd);
>>           break;
>> +    case VIRTIO_GPU_CMD_STATUS_FREEZING:
>> +        virtio_gpu_cmd_status_freezing(g, cmd);
>> +        break;
>>       default:
>>           cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
>>           break;
>> diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
>> index 5e15c79b94..8f235d7848 100644
>> --- a/hw/display/virtio-gpu.c
>> +++ b/hw/display/virtio-gpu.c
>> @@ -373,6 +373,16 @@ static void virtio_gpu_resource_create_blob(VirtIOGPU *g,
>>       QTAILQ_INSERT_HEAD(&g->reslist, res, next);
>>   }
>>   +void virtio_gpu_cmd_status_freezing(VirtIOGPU *g,
>> +                         struct virtio_gpu_ctrl_command *cmd)
>> +{
>> +    struct virtio_gpu_status_freezing sf;
>> +
>> +    VIRTIO_GPU_FILL_CMD(sf);
>> +    virtio_gpu_bswap_32(&sf, sizeof(sf));
>> +    g->freezing = sf.freezing;
>> +}
>> +
>>   static void virtio_gpu_disable_scanout(VirtIOGPU *g, int scanout_id)
>>   {
>>       struct virtio_gpu_scanout *scanout = &g->parent_obj.scanout[scanout_id];
>> @@ -986,6 +996,9 @@ void virtio_gpu_simple_process_cmd(VirtIOGPU *g,
>>       case VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING:
>>           virtio_gpu_resource_detach_backing(g, cmd);
>>           break;
>> +    case VIRTIO_GPU_CMD_STATUS_FREEZING:
>> +        virtio_gpu_cmd_status_freezing(g, cmd);
>> +        break;
>>       default:
>>           cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
>>           break;
>> @@ -1344,6 +1357,8 @@ void virtio_gpu_device_realize(DeviceState *qdev, Error **errp)
>>       QTAILQ_INIT(&g->reslist);
>>       QTAILQ_INIT(&g->cmdq);
>>       QTAILQ_INIT(&g->fenceq);
>> +
>> +    g->freezing = false;
>>   }
>>     void virtio_gpu_reset(VirtIODevice *vdev)
>> @@ -1352,8 +1367,15 @@ void virtio_gpu_reset(VirtIODevice *vdev)
>>       struct virtio_gpu_simple_resource *res, *tmp;
>>       struct virtio_gpu_ctrl_command *cmd;
>>   -    QTAILQ_FOREACH_SAFE(res, &g->reslist, next, tmp) {
>> -        virtio_gpu_resource_destroy(g, res);
>> +    /*
>> +     * If guest is suspending, we shouldn't destroy resources,
>> +     * otherwise, the display can't come back to the time when
>> +     * it was suspended after guest resumed.
>> +     */
> 
> 
> What should happen if qemu is torn down while the guest is suspended.
> Currently there is no other place where the resources will be destroyed. While it is likely viable to rely on process auto tear down of mem and files from the host cleanup point of view, it feels bad to rely on that.
> Perhaps an inverted conditional with destroy loop in virtio_gpu_device_unrealize() would suffice?
> 
> I am not a qemu expert, but I am assuming that the unrealize call will be called during machine destruction if the guest is suspended.
> Also if qemu supports (or intends to support in future) hotplugging of the device, the would help avoid leaks until qemu exit too.
> 

Hi Bob,

Thank you for reviewing.
I didn't consider that situation before. It seems leaks will happen in that situation. I will try to solve this problem, and then get you a response.

> 
>> +    if (!g->freezing) {
>> +        QTAILQ_FOREACH_SAFE(res, &g->reslist, next, tmp) {
>> +            virtio_gpu_resource_destroy(g, res);
>> +        }
>>       }
>>         while (!QTAILQ_EMPTY(&g->cmdq)) {
>> diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h
>> index 2e28507efe..c21c2990fb 100644
>> --- a/include/hw/virtio/virtio-gpu.h
>> +++ b/include/hw/virtio/virtio-gpu.h
>> @@ -173,6 +173,7 @@ struct VirtIOGPU {
>>         uint64_t hostmem;
>>   +    bool freezing;
>>       bool processing_cmdq;
>>       QEMUTimer *fence_poll;
>>       QEMUTimer *print_stats;
>> @@ -284,5 +285,7 @@ void virtio_gpu_virgl_reset_scanout(VirtIOGPU *g);
>>   void virtio_gpu_virgl_reset(VirtIOGPU *g);
>>   int virtio_gpu_virgl_init(VirtIOGPU *g);
>>   int virtio_gpu_virgl_get_num_capsets(VirtIOGPU *g);
>> +void virtio_gpu_cmd_status_freezing(VirtIOGPU *g,
>> +                         struct virtio_gpu_ctrl_command *cmd);
>>     #endif
>> diff --git a/include/standard-headers/linux/virtio_gpu.h b/include/standard-headers/linux/virtio_gpu.h
>> index 2da48d3d4c..aefffbd751 100644
>> --- a/include/standard-headers/linux/virtio_gpu.h
>> +++ b/include/standard-headers/linux/virtio_gpu.h
>> @@ -116,6 +116,9 @@ enum virtio_gpu_ctrl_type {
>>       VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID,
>>       VIRTIO_GPU_RESP_ERR_INVALID_CONTEXT_ID,
>>       VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER,
>> +
>> +    /* status */
>> +    VIRTIO_GPU_CMD_STATUS_FREEZING = 0x1300,
>>   };
>>     enum virtio_gpu_shm_id {
>> @@ -453,4 +456,10 @@ struct virtio_gpu_resource_unmap_blob {
>>       uint32_t padding;
>>   };
>>   +/* VIRTIO_GPU_CMD_STATUS_FREEZING */
>> +struct virtio_gpu_status_freezing {
>> +    struct virtio_gpu_ctrl_hdr hdr;
>> +    __u32 freezing;
>> +};
>> +
>>   #endif
Marc-André Lureau June 12, 2023, 12:42 p.m. UTC | #3
Hi

On Thu, Jun 8, 2023 at 6:26 AM Jiqian Chen <Jiqian.Chen@amd.com> wrote:

> After suspending and resuming guest VM, you will get
> a black screen, and the display can't come back.
>
> This is because when guest did suspending, it called
> into qemu to call virtio_gpu_gl_reset. In function
> virtio_gpu_gl_reset, it destroyed resources and reset
> renderer, which were used for display. As a result,
> guest's screen can't come back to the time when it was
> suspended and only showed black.
>
> So, this patch adds a new ctrl message
> VIRTIO_GPU_CMD_STATUS_FREEZING to get notification from
> guest. If guest is during suspending, it sets freezing
> status of virtgpu to true, this will prevent destroying
> resources and resetting renderer when guest calls into
> virtio_gpu_gl_reset. If guest is during resuming, it sets
> freezing to false, and then virtio_gpu_gl_reset will keep
> its origin actions and has no other impaction.
>
> Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com>
> ---
>  hw/display/virtio-gpu-gl.c                  |  9 ++++++-
>  hw/display/virtio-gpu-virgl.c               |  3 +++
>  hw/display/virtio-gpu.c                     | 26 +++++++++++++++++++--
>  include/hw/virtio/virtio-gpu.h              |  3 +++
>  include/standard-headers/linux/virtio_gpu.h |  9 +++++++
>  5 files changed, 47 insertions(+), 3 deletions(-)
>
> diff --git a/hw/display/virtio-gpu-gl.c b/hw/display/virtio-gpu-gl.c
> index e06be60dfb..e11ad233eb 100644
> --- a/hw/display/virtio-gpu-gl.c
> +++ b/hw/display/virtio-gpu-gl.c
> @@ -100,7 +100,14 @@ static void virtio_gpu_gl_reset(VirtIODevice *vdev)
>       */
>      if (gl->renderer_inited && !gl->renderer_reset) {
>          virtio_gpu_virgl_reset_scanout(g);
> -        gl->renderer_reset = true;
> +        /*
> +         * If guest is suspending, we shouldn't reset renderer,
> +         * otherwise, the display can't come back to the time when
> +         * it was suspended after guest resumed.
> +         */
> +        if (!g->freezing) {
> +            gl->renderer_reset = true;
> +        }
>      }
>  }
>
> diff --git a/hw/display/virtio-gpu-virgl.c b/hw/display/virtio-gpu-virgl.c
> index 73cb92c8d5..183ec92d53 100644
> --- a/hw/display/virtio-gpu-virgl.c
> +++ b/hw/display/virtio-gpu-virgl.c
> @@ -464,6 +464,9 @@ void virtio_gpu_virgl_process_cmd(VirtIOGPU *g,
>      case VIRTIO_GPU_CMD_GET_EDID:
>          virtio_gpu_get_edid(g, cmd);
>          break;
> +    case VIRTIO_GPU_CMD_STATUS_FREEZING:
> +        virtio_gpu_cmd_status_freezing(g, cmd);
> +        break;
>      default:
>          cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
>          break;
> diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
> index 5e15c79b94..8f235d7848 100644
> --- a/hw/display/virtio-gpu.c
> +++ b/hw/display/virtio-gpu.c
> @@ -373,6 +373,16 @@ static void virtio_gpu_resource_create_blob(VirtIOGPU
> *g,
>      QTAILQ_INSERT_HEAD(&g->reslist, res, next);
>  }
>
> +void virtio_gpu_cmd_status_freezing(VirtIOGPU *g,
> +                         struct virtio_gpu_ctrl_command *cmd)
> +{
> +    struct virtio_gpu_status_freezing sf;
> +
> +    VIRTIO_GPU_FILL_CMD(sf);
> +    virtio_gpu_bswap_32(&sf, sizeof(sf));
> +    g->freezing = sf.freezing;
> +}
> +
>  static void virtio_gpu_disable_scanout(VirtIOGPU *g, int scanout_id)
>  {
>      struct virtio_gpu_scanout *scanout =
> &g->parent_obj.scanout[scanout_id];
> @@ -986,6 +996,9 @@ void virtio_gpu_simple_process_cmd(VirtIOGPU *g,
>      case VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING:
>          virtio_gpu_resource_detach_backing(g, cmd);
>          break;
> +    case VIRTIO_GPU_CMD_STATUS_FREEZING:
> +        virtio_gpu_cmd_status_freezing(g, cmd);
> +        break;
>      default:
>          cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
>          break;
> @@ -1344,6 +1357,8 @@ void virtio_gpu_device_realize(DeviceState *qdev,
> Error **errp)
>      QTAILQ_INIT(&g->reslist);
>      QTAILQ_INIT(&g->cmdq);
>      QTAILQ_INIT(&g->fenceq);
> +
> +    g->freezing = false;
>  }
>
>  void virtio_gpu_reset(VirtIODevice *vdev)
> @@ -1352,8 +1367,15 @@ void virtio_gpu_reset(VirtIODevice *vdev)
>      struct virtio_gpu_simple_resource *res, *tmp;
>      struct virtio_gpu_ctrl_command *cmd;
>
> -    QTAILQ_FOREACH_SAFE(res, &g->reslist, next, tmp) {
> -        virtio_gpu_resource_destroy(g, res);
> +    /*
> +     * If guest is suspending, we shouldn't destroy resources,
> +     * otherwise, the display can't come back to the time when
> +     * it was suspended after guest resumed.
> +     */
> +    if (!g->freezing) {
> +        QTAILQ_FOREACH_SAFE(res, &g->reslist, next, tmp) {
> +            virtio_gpu_resource_destroy(g, res);
> +        }
>      }
>
>      while (!QTAILQ_EMPTY(&g->cmdq)) {
> diff --git a/include/hw/virtio/virtio-gpu.h
> b/include/hw/virtio/virtio-gpu.h
> index 2e28507efe..c21c2990fb 100644
> --- a/include/hw/virtio/virtio-gpu.h
> +++ b/include/hw/virtio/virtio-gpu.h
> @@ -173,6 +173,7 @@ struct VirtIOGPU {
>
>      uint64_t hostmem;
>
> +    bool freezing;
>      bool processing_cmdq;
>      QEMUTimer *fence_poll;
>      QEMUTimer *print_stats;
> @@ -284,5 +285,7 @@ void virtio_gpu_virgl_reset_scanout(VirtIOGPU *g);
>  void virtio_gpu_virgl_reset(VirtIOGPU *g);
>  int virtio_gpu_virgl_init(VirtIOGPU *g);
>  int virtio_gpu_virgl_get_num_capsets(VirtIOGPU *g);
> +void virtio_gpu_cmd_status_freezing(VirtIOGPU *g,
> +                         struct virtio_gpu_ctrl_command *cmd);
>
>  #endif
> diff --git a/include/standard-headers/linux/virtio_gpu.h
> b/include/standard-headers/linux/virtio_gpu.h
> index 2da48d3d4c..aefffbd751 100644
> --- a/include/standard-headers/linux/virtio_gpu.h
> +++ b/include/standard-headers/linux/virtio_gpu.h
> @@ -116,6 +116,9 @@ enum virtio_gpu_ctrl_type {
>         VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID,
>         VIRTIO_GPU_RESP_ERR_INVALID_CONTEXT_ID,
>         VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER,
> +
> +       /* status */
> +       VIRTIO_GPU_CMD_STATUS_FREEZING = 0x1300,
>  };
>

Adding a new command requires new feature flag (and maybe it should be in
the <0x1000 range instead)

But I am not sure we need a new message at the virtio-gpu level. Gerd, wdyt?

Maybe it's not a good place to reset all GPU resources during QEMU reset()
after all, if it's called during s3 and there is no mechanism to restore
it. Damien?

thanks


...

>  enum virtio_gpu_shm_id {
> @@ -453,4 +456,10 @@ struct virtio_gpu_resource_unmap_blob {
>         uint32_t padding;
>  };
>
> +/* VIRTIO_GPU_CMD_STATUS_FREEZING */
> +struct virtio_gpu_status_freezing {
> +       struct virtio_gpu_ctrl_hdr hdr;
> +       __u32 freezing;
> +};
> +
>  #endif
> --
> 2.34.1
>
>
>
Chen, Jiqian June 15, 2023, 7:14 a.m. UTC | #4
Hi Marc-André Lureau

On 2023/6/12 20:42, Marc-André Lureau wrote:
> Hi
> 
> On Thu, Jun 8, 2023 at 6:26 AM Jiqian Chen <Jiqian.Chen@amd.com <mailto:Jiqian.Chen@amd.com>> wrote:
> 
>     After suspending and resuming guest VM, you will get
>     a black screen, and the display can't come back.
> 
>     This is because when guest did suspending, it called
>     into qemu to call virtio_gpu_gl_reset. In function
>     virtio_gpu_gl_reset, it destroyed resources and reset
>     renderer, which were used for display. As a result,
>     guest's screen can't come back to the time when it was
>     suspended and only showed black.
> 
>     So, this patch adds a new ctrl message
>     VIRTIO_GPU_CMD_STATUS_FREEZING to get notification from
>     guest. If guest is during suspending, it sets freezing
>     status of virtgpu to true, this will prevent destroying
>     resources and resetting renderer when guest calls into
>     virtio_gpu_gl_reset. If guest is during resuming, it sets
>     freezing to false, and then virtio_gpu_gl_reset will keep
>     its origin actions and has no other impaction.
> 
>     Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com <mailto:Jiqian.Chen@amd.com>>
>     ---
>      hw/display/virtio-gpu-gl.c                  |  9 ++++++-
>      hw/display/virtio-gpu-virgl.c               |  3 +++
>      hw/display/virtio-gpu.c                     | 26 +++++++++++++++++++--
>      include/hw/virtio/virtio-gpu.h              |  3 +++
>      include/standard-headers/linux/virtio_gpu.h |  9 +++++++
>      5 files changed, 47 insertions(+), 3 deletions(-)
> 
>     diff --git a/hw/display/virtio-gpu-gl.c b/hw/display/virtio-gpu-gl.c
>     index e06be60dfb..e11ad233eb 100644
>     --- a/hw/display/virtio-gpu-gl.c
>     +++ b/hw/display/virtio-gpu-gl.c
>     @@ -100,7 +100,14 @@ static void virtio_gpu_gl_reset(VirtIODevice *vdev)
>           */
>          if (gl->renderer_inited && !gl->renderer_reset) {
>              virtio_gpu_virgl_reset_scanout(g);
>     -        gl->renderer_reset = true;
>     +        /*
>     +         * If guest is suspending, we shouldn't reset renderer,
>     +         * otherwise, the display can't come back to the time when
>     +         * it was suspended after guest resumed.
>     +         */
>     +        if (!g->freezing) {
>     +            gl->renderer_reset = true;
>     +        }
>          }
>      }
> 
>     diff --git a/hw/display/virtio-gpu-virgl.c b/hw/display/virtio-gpu-virgl.c
>     index 73cb92c8d5..183ec92d53 100644
>     --- a/hw/display/virtio-gpu-virgl.c
>     +++ b/hw/display/virtio-gpu-virgl.c
>     @@ -464,6 +464,9 @@ void virtio_gpu_virgl_process_cmd(VirtIOGPU *g,
>          case VIRTIO_GPU_CMD_GET_EDID:
>              virtio_gpu_get_edid(g, cmd);
>              break;
>     +    case VIRTIO_GPU_CMD_STATUS_FREEZING:
>     +        virtio_gpu_cmd_status_freezing(g, cmd);
>     +        break;
>          default:
>              cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
>              break;
>     diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
>     index 5e15c79b94..8f235d7848 100644
>     --- a/hw/display/virtio-gpu.c
>     +++ b/hw/display/virtio-gpu.c
>     @@ -373,6 +373,16 @@ static void virtio_gpu_resource_create_blob(VirtIOGPU *g,
>          QTAILQ_INSERT_HEAD(&g->reslist, res, next);
>      }
> 
>     +void virtio_gpu_cmd_status_freezing(VirtIOGPU *g,
>     +                         struct virtio_gpu_ctrl_command *cmd)
>     +{
>     +    struct virtio_gpu_status_freezing sf;
>     +
>     +    VIRTIO_GPU_FILL_CMD(sf);
>     +    virtio_gpu_bswap_32(&sf, sizeof(sf));
>     +    g->freezing = sf.freezing;
>     +}
>     +
>      static void virtio_gpu_disable_scanout(VirtIOGPU *g, int scanout_id)
>      {
>          struct virtio_gpu_scanout *scanout = &g->parent_obj.scanout[scanout_id];
>     @@ -986,6 +996,9 @@ void virtio_gpu_simple_process_cmd(VirtIOGPU *g,
>          case VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING:
>              virtio_gpu_resource_detach_backing(g, cmd);
>              break;
>     +    case VIRTIO_GPU_CMD_STATUS_FREEZING:
>     +        virtio_gpu_cmd_status_freezing(g, cmd);
>     +        break;
>          default:
>              cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
>              break;
>     @@ -1344,6 +1357,8 @@ void virtio_gpu_device_realize(DeviceState *qdev, Error **errp)
>          QTAILQ_INIT(&g->reslist);
>          QTAILQ_INIT(&g->cmdq);
>          QTAILQ_INIT(&g->fenceq);
>     +
>     +    g->freezing = false;
>      }
> 
>      void virtio_gpu_reset(VirtIODevice *vdev)
>     @@ -1352,8 +1367,15 @@ void virtio_gpu_reset(VirtIODevice *vdev)
>          struct virtio_gpu_simple_resource *res, *tmp;
>          struct virtio_gpu_ctrl_command *cmd;
> 
>     -    QTAILQ_FOREACH_SAFE(res, &g->reslist, next, tmp) {
>     -        virtio_gpu_resource_destroy(g, res);
>     +    /*
>     +     * If guest is suspending, we shouldn't destroy resources,
>     +     * otherwise, the display can't come back to the time when
>     +     * it was suspended after guest resumed.
>     +     */
>     +    if (!g->freezing) {
>     +        QTAILQ_FOREACH_SAFE(res, &g->reslist, next, tmp) {
>     +            virtio_gpu_resource_destroy(g, res);
>     +        }
>          }
> 
>          while (!QTAILQ_EMPTY(&g->cmdq)) {
>     diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h
>     index 2e28507efe..c21c2990fb 100644
>     --- a/include/hw/virtio/virtio-gpu.h
>     +++ b/include/hw/virtio/virtio-gpu.h
>     @@ -173,6 +173,7 @@ struct VirtIOGPU {
> 
>          uint64_t hostmem;
> 
>     +    bool freezing;
>          bool processing_cmdq;
>          QEMUTimer *fence_poll;
>          QEMUTimer *print_stats;
>     @@ -284,5 +285,7 @@ void virtio_gpu_virgl_reset_scanout(VirtIOGPU *g);
>      void virtio_gpu_virgl_reset(VirtIOGPU *g);
>      int virtio_gpu_virgl_init(VirtIOGPU *g);
>      int virtio_gpu_virgl_get_num_capsets(VirtIOGPU *g);
>     +void virtio_gpu_cmd_status_freezing(VirtIOGPU *g,
>     +                         struct virtio_gpu_ctrl_command *cmd);
> 
>      #endif
>     diff --git a/include/standard-headers/linux/virtio_gpu.h b/include/standard-headers/linux/virtio_gpu.h
>     index 2da48d3d4c..aefffbd751 100644
>     --- a/include/standard-headers/linux/virtio_gpu.h
>     +++ b/include/standard-headers/linux/virtio_gpu.h
>     @@ -116,6 +116,9 @@ enum virtio_gpu_ctrl_type {
>             VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID,
>             VIRTIO_GPU_RESP_ERR_INVALID_CONTEXT_ID,
>             VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER,
>     +
>     +       /* status */
>     +       VIRTIO_GPU_CMD_STATUS_FREEZING = 0x1300,
>      };
> 
> 

Thank you for reviewing!

> Adding a new command requires new feature flag (and maybe it should be in the <0x1000 range instead)
> 
> But I am not sure we need a new message at the virtio-gpu level. Gerd, wdyt?

I added this message to get notifications from guest. I don't know what level is more suitable. Could you please give some advice?

> 
> Maybe it's not a good place to reset all GPU resources during QEMU reset() after all, if it's called during s3 and there is no mechanism to restore it. Damien?

Yes, during s3, virtio_gpu_reset() is called, and all resources are destroyed. If there is a better place to destroy resources, I think the display problem may disappear and the new message I added may be not necessary.

> 
> thanks
> 
> 
> ...
> 
>      enum virtio_gpu_shm_id {
>     @@ -453,4 +456,10 @@ struct virtio_gpu_resource_unmap_blob {
>             uint32_t padding;
>      };
> 
>     +/* VIRTIO_GPU_CMD_STATUS_FREEZING */
>     +struct virtio_gpu_status_freezing {
>     +       struct virtio_gpu_ctrl_hdr hdr;
>     +       __u32 freezing;
>     +};
>     +
>      #endif
>     -- 
>     2.34.1
> 
> 
> 
> 
> -- 
> Marc-André Lureau
Chen, Jiqian June 15, 2023, 7:23 a.m. UTC | #5
On 2023/6/9 00:41, Robert Beckett wrote:
> 
> On 08/06/2023 03:56, Jiqian Chen wrote:
>> After suspending and resuming guest VM, you will get
>> a black screen, and the display can't come back.
>>
>> This is because when guest did suspending, it called
>> into qemu to call virtio_gpu_gl_reset. In function
>> virtio_gpu_gl_reset, it destroyed resources and reset
>> renderer, which were used for display. As a result,
>> guest's screen can't come back to the time when it was
>> suspended and only showed black.
>>
>> So, this patch adds a new ctrl message
>> VIRTIO_GPU_CMD_STATUS_FREEZING to get notification from
>> guest. If guest is during suspending, it sets freezing
>> status of virtgpu to true, this will prevent destroying
>> resources and resetting renderer when guest calls into
>> virtio_gpu_gl_reset. If guest is during resuming, it sets
>> freezing to false, and then virtio_gpu_gl_reset will keep
>> its origin actions and has no other impaction.
>>
>> Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com>
>> ---
>>   hw/display/virtio-gpu-gl.c                  |  9 ++++++-
>>   hw/display/virtio-gpu-virgl.c               |  3 +++
>>   hw/display/virtio-gpu.c                     | 26 +++++++++++++++++++--
>>   include/hw/virtio/virtio-gpu.h              |  3 +++
>>   include/standard-headers/linux/virtio_gpu.h |  9 +++++++
>>   5 files changed, 47 insertions(+), 3 deletions(-)
>>
>> diff --git a/hw/display/virtio-gpu-gl.c b/hw/display/virtio-gpu-gl.c
>> index e06be60dfb..e11ad233eb 100644
>> --- a/hw/display/virtio-gpu-gl.c
>> +++ b/hw/display/virtio-gpu-gl.c
>> @@ -100,7 +100,14 @@ static void virtio_gpu_gl_reset(VirtIODevice *vdev)
>>        */
>>       if (gl->renderer_inited && !gl->renderer_reset) {
>>           virtio_gpu_virgl_reset_scanout(g);
>> -        gl->renderer_reset = true;
>> +        /*
>> +         * If guest is suspending, we shouldn't reset renderer,
>> +         * otherwise, the display can't come back to the time when
>> +         * it was suspended after guest resumed.
>> +         */
>> +        if (!g->freezing) {
>> +            gl->renderer_reset = true;
>> +        }
>>       }
>>   }
>>   diff --git a/hw/display/virtio-gpu-virgl.c b/hw/display/virtio-gpu-virgl.c
>> index 73cb92c8d5..183ec92d53 100644
>> --- a/hw/display/virtio-gpu-virgl.c
>> +++ b/hw/display/virtio-gpu-virgl.c
>> @@ -464,6 +464,9 @@ void virtio_gpu_virgl_process_cmd(VirtIOGPU *g,
>>       case VIRTIO_GPU_CMD_GET_EDID:
>>           virtio_gpu_get_edid(g, cmd);
>>           break;
>> +    case VIRTIO_GPU_CMD_STATUS_FREEZING:
>> +        virtio_gpu_cmd_status_freezing(g, cmd);
>> +        break;
>>       default:
>>           cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
>>           break;
>> diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
>> index 5e15c79b94..8f235d7848 100644
>> --- a/hw/display/virtio-gpu.c
>> +++ b/hw/display/virtio-gpu.c
>> @@ -373,6 +373,16 @@ static void virtio_gpu_resource_create_blob(VirtIOGPU *g,
>>       QTAILQ_INSERT_HEAD(&g->reslist, res, next);
>>   }
>>   +void virtio_gpu_cmd_status_freezing(VirtIOGPU *g,
>> +                         struct virtio_gpu_ctrl_command *cmd)
>> +{
>> +    struct virtio_gpu_status_freezing sf;
>> +
>> +    VIRTIO_GPU_FILL_CMD(sf);
>> +    virtio_gpu_bswap_32(&sf, sizeof(sf));
>> +    g->freezing = sf.freezing;
>> +}
>> +
>>   static void virtio_gpu_disable_scanout(VirtIOGPU *g, int scanout_id)
>>   {
>>       struct virtio_gpu_scanout *scanout = &g->parent_obj.scanout[scanout_id];
>> @@ -986,6 +996,9 @@ void virtio_gpu_simple_process_cmd(VirtIOGPU *g,
>>       case VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING:
>>           virtio_gpu_resource_detach_backing(g, cmd);
>>           break;
>> +    case VIRTIO_GPU_CMD_STATUS_FREEZING:
>> +        virtio_gpu_cmd_status_freezing(g, cmd);
>> +        break;
>>       default:
>>           cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
>>           break;
>> @@ -1344,6 +1357,8 @@ void virtio_gpu_device_realize(DeviceState *qdev, Error **errp)
>>       QTAILQ_INIT(&g->reslist);
>>       QTAILQ_INIT(&g->cmdq);
>>       QTAILQ_INIT(&g->fenceq);
>> +
>> +    g->freezing = false;
>>   }
>>     void virtio_gpu_reset(VirtIODevice *vdev)
>> @@ -1352,8 +1367,15 @@ void virtio_gpu_reset(VirtIODevice *vdev)
>>       struct virtio_gpu_simple_resource *res, *tmp;
>>       struct virtio_gpu_ctrl_command *cmd;
>>   -    QTAILQ_FOREACH_SAFE(res, &g->reslist, next, tmp) {
>> -        virtio_gpu_resource_destroy(g, res);
>> +    /*
>> +     * If guest is suspending, we shouldn't destroy resources,
>> +     * otherwise, the display can't come back to the time when
>> +     * it was suspended after guest resumed.
>> +     */
> 
> 
> What should happen if qemu is torn down while the guest is suspended.

I don't know if there is a place to reclaim all memory in Qemu. If not, in the case you said, the memory of resources may leak.
But I ran "sudo xl destroy <domain id>" to destroy Qemu without suspending guest, and then I found it didn't destroy resources too (By adding printings in virtio_gpu_reset, but the function didn't be called when destroying Qemu). So, the leak may be an existing problem before using my patch.

> Currently there is no other place where the resources will be destroyed. While it is likely viable to rely on process auto tear down of mem and files from the host cleanup point of view, it feels bad to rely on that.
> Perhaps an inverted conditional with destroy loop in virtio_gpu_device_unrealize() would suffice?

But may the inverted conditional also need to rely on guest?
And I also added printings in virtio_gpu_device_unrealize(), but didn't get them when destroying Qemu.
If there is a more suitable place to destroy resources instead of during suspending process, the leak and the dependency problem will go.

> 
> I am not a qemu expert, but I am assuming that the unrealize call will be called during machine destruction if the guest is suspended.
> Also if qemu supports (or intends to support in future) hotplugging of the device, the would help avoid leaks until qemu exit too.
> 

I think so too. The hotplug function is beneficial to the leaks. But it seems Qemu doesn't support virtgpu hotplug now.
If virtgpu supports hotplug, its call stack may be:
main_destroy
	libxl_domain_destroy
		libxl__device_pci_destroy_all
			libxl__device_pci_remove_common
				do_pci_remove
					qmp_device_del
						hotplug_handler_unplug
							unrealize
> 
>> +    if (!g->freezing) {
>> +        QTAILQ_FOREACH_SAFE(res, &g->reslist, next, tmp) {
>> +            virtio_gpu_resource_destroy(g, res);
>> +        }
>>       }
>>         while (!QTAILQ_EMPTY(&g->cmdq)) {
>> diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h
>> index 2e28507efe..c21c2990fb 100644
>> --- a/include/hw/virtio/virtio-gpu.h
>> +++ b/include/hw/virtio/virtio-gpu.h
>> @@ -173,6 +173,7 @@ struct VirtIOGPU {
>>         uint64_t hostmem;
>>   +    bool freezing;
>>       bool processing_cmdq;
>>       QEMUTimer *fence_poll;
>>       QEMUTimer *print_stats;
>> @@ -284,5 +285,7 @@ void virtio_gpu_virgl_reset_scanout(VirtIOGPU *g);
>>   void virtio_gpu_virgl_reset(VirtIOGPU *g);
>>   int virtio_gpu_virgl_init(VirtIOGPU *g);
>>   int virtio_gpu_virgl_get_num_capsets(VirtIOGPU *g);
>> +void virtio_gpu_cmd_status_freezing(VirtIOGPU *g,
>> +                         struct virtio_gpu_ctrl_command *cmd);
>>     #endif
>> diff --git a/include/standard-headers/linux/virtio_gpu.h b/include/standard-headers/linux/virtio_gpu.h
>> index 2da48d3d4c..aefffbd751 100644
>> --- a/include/standard-headers/linux/virtio_gpu.h
>> +++ b/include/standard-headers/linux/virtio_gpu.h
>> @@ -116,6 +116,9 @@ enum virtio_gpu_ctrl_type {
>>       VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID,
>>       VIRTIO_GPU_RESP_ERR_INVALID_CONTEXT_ID,
>>       VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER,
>> +
>> +    /* status */
>> +    VIRTIO_GPU_CMD_STATUS_FREEZING = 0x1300,
>>   };
>>     enum virtio_gpu_shm_id {
>> @@ -453,4 +456,10 @@ struct virtio_gpu_resource_unmap_blob {
>>       uint32_t padding;
>>   };
>>   +/* VIRTIO_GPU_CMD_STATUS_FREEZING */
>> +struct virtio_gpu_status_freezing {
>> +    struct virtio_gpu_ctrl_hdr hdr;
>> +    __u32 freezing;
>> +};
>> +
>>   #endif
Gerd Hoffmann June 19, 2023, 12:51 p.m. UTC | #6
Hi, 
> Adding a new command requires new feature flag (and maybe it should be in
> the <0x1000 range instead)
> 
> But I am not sure we need a new message at the virtio-gpu level. Gerd, wdyt?
> 
> Maybe it's not a good place to reset all GPU resources during QEMU reset()
> after all, if it's called during s3 and there is no mechanism to restore
> it. Damien?

The guest driver should be able to restore resources after resume.

take care,
  Gerd
Chen, Jiqian June 20, 2023, 9:11 a.m. UTC | #7
Hi Gerd Hoffmann

On 2023/6/19 20:51, Gerd Hoffmann wrote:
>   Hi, 
>> Adding a new command requires new feature flag (and maybe it should be in
>> the <0x1000 range instead)
>>
>> But I am not sure we need a new message at the virtio-gpu level. Gerd, wdyt?
>>
>> Maybe it's not a good place to reset all GPU resources during QEMU reset()
>> after all, if it's called during s3 and there is no mechanism to restore
>> it. Damien?
> 
> The guest driver should be able to restore resources after resume.

Thank you for your suggestion!
As far as I know, resources are created on host side and guest has no backup, if resources are destroyed, guest can't restore them.
Or do you mean guest driver need to send commands to re-create resources after resume?
If so, I have some questions. Can guest re-create resources by using object(virtio_vpu_object) or others? Can the new resources replace the destroyed resources to continue the suspended display tasks after resume? I think those will help me improve my implementation, thank you!

> 
> take care,
>   Gerd
>
Gerd Hoffmann June 20, 2023, 9:41 a.m. UTC | #8
Hi,

> > The guest driver should be able to restore resources after resume.
> 
> Thank you for your suggestion!
> As far as I know, resources are created on host side and guest has no backup, if resources are destroyed, guest can't restore them.
> Or do you mean guest driver need to send commands to re-create resources after resume?

The later.  The guest driver knows which resources it has created,
it can restore them after suspend.

> If so, I have some questions. Can guest re-create resources by using
> object(virtio_vpu_object) or others? Can the new resources replace the
> destroyed resources to continue the suspended display tasks after
> resume?

Any display scanout information will be gone too, the guest driver needs
re-create this too (after re-creating the resources).

take care,
  Gerd
Bob Beckett June 20, 2023, 12:26 p.m. UTC | #9
On 20/06/2023 10:41, Gerd Hoffmann wrote:
>    Hi,
>
>>> The guest driver should be able to restore resources after resume.
>> Thank you for your suggestion!
>> As far as I know, resources are created on host side and guest has no backup, if resources are destroyed, guest can't restore them.
>> Or do you mean guest driver need to send commands to re-create resources after resume?
> The later.  The guest driver knows which resources it has created,
> it can restore them after suspend.


Are you sure that this is viable?

How would you propose that a guest kernel could reproduce a resource, 
including pixel data upload during a resume?

The kernel would not have any of the pixel data to transfer to host. 
This is normally achieved by guest apps calling GL calls and mesa asking 
the kernel to create the textures with the given data (often read from a 
file).
If your suggestion is to get the userland application to do it, that 
would entirely break how suspend/resume is meant to happen. It should be 
transparent to userland applications for the most part.

Could you explain how you anticipate the guest being able to reproduce 
the resources please?


>
>> If so, I have some questions. Can guest re-create resources by using
>> object(virtio_vpu_object) or others? Can the new resources replace the
>> destroyed resources to continue the suspended display tasks after
>> resume?
> Any display scanout information will be gone too, the guest driver needs
> re-create this too (after re-creating the resources).
>
> take care,
>    Gerd
>
Kim, Dongwon June 20, 2023, 5:57 p.m. UTC | #10
Hello,

I just came across this discussion regarding s3/s4 support in virtio-gpu 
driver and QEMU.

We saw similar problem a while ago (QEMU deletes all objects upon 
suspension) and

came up with an experimental solution that is basically making 
virtio-gpu driver to do object creation

for all existing resources once VM is resumed so that he QEMU recreate them.

This method has worked pretty well on our case. I submitted patches for 
this to dri-devel a while ago.

[RFC PATCH 0/2] drm/virtio:virtio-gpu driver freeze-and-restore 
implementation (lists.freedesktop.org) 
<https://lists.freedesktop.org/archives/dri-devel/2022-September/373892.html>

This is kernel driver only solution. Nothing has to be changed in QEMU.

Jiqian and other reviewers, can you check this old solution we suggested 
as well?

On 6/20/2023 5:26 AM, Robert Beckett wrote:

>
> On 20/06/2023 10:41, Gerd Hoffmann wrote:
>>    Hi,
>>
>>>> The guest driver should be able to restore resources after resume.
>>> Thank you for your suggestion!
>>> As far as I know, resources are created on host side and guest has 
>>> no backup, if resources are destroyed, guest can't restore them.
>>> Or do you mean guest driver need to send commands to re-create 
>>> resources after resume?
>> The later.  The guest driver knows which resources it has created,
>> it can restore them after suspend.
>
>
> Are you sure that this is viable?
>
> How would you propose that a guest kernel could reproduce a resource, 
> including pixel data upload during a resume?
>
> The kernel would not have any of the pixel data to transfer to host. 
> This is normally achieved by guest apps calling GL calls and mesa 
> asking the kernel to create the textures with the given data (often 
> read from a file).
> If your suggestion is to get the userland application to do it, that 
> would entirely break how suspend/resume is meant to happen. It should 
> be transparent to userland applications for the most part.
>
> Could you explain how you anticipate the guest being able to reproduce 
> the resources please?
>
>
>>
>>> If so, I have some questions. Can guest re-create resources by using
>>> object(virtio_vpu_object) or others? Can the new resources replace the
>>> destroyed resources to continue the suspended display tasks after
>>> resume?
>> Any display scanout information will be gone too, the guest driver needs
>> re-create this too (after re-creating the resources).
>>
>> take care,
>>    Gerd
>>
>
Gerd Hoffmann June 21, 2023, 8:39 a.m. UTC | #11
On Tue, Jun 20, 2023 at 01:26:15PM +0100, Robert Beckett wrote:
> 
> On 20/06/2023 10:41, Gerd Hoffmann wrote:
> >    Hi,
> > 
> > > > The guest driver should be able to restore resources after resume.
> > > Thank you for your suggestion!
> > > As far as I know, resources are created on host side and guest has no backup, if resources are destroyed, guest can't restore them.
> > > Or do you mean guest driver need to send commands to re-create resources after resume?
> > The later.  The guest driver knows which resources it has created,
> > it can restore them after suspend.
> 
> Are you sure that this is viable?
> 
> How would you propose that a guest kernel could reproduce a resource,
> including pixel data upload during a resume?
> 
> The kernel would not have any of the pixel data to transfer to host.

Depends on the of resource type.  For resources which are created by
uploading pixel data (using VIRTIO_GPU_CMD_TRANSFER_TO_HOST_*) a guest
mirror exists which can be used for re-upload.

For resources filled by gl rendering ops this is indeed not the case.

> Could you explain how you anticipate the guest being able to reproduce the
> resources please?

Same you do on physical hardware?  Suspend can poweroff your PCI
devices, so there must be some standard way to handle that situation
for resources stored in gpu device memory, which is very similar to
the problem we have here.

take care,
  Gerd
Bob Beckett June 21, 2023, 11:14 a.m. UTC | #12
On 21/06/2023 09:39, Gerd Hoffmann wrote:
> On Tue, Jun 20, 2023 at 01:26:15PM +0100, Robert Beckett wrote:
>> On 20/06/2023 10:41, Gerd Hoffmann wrote:
>>>     Hi,
>>>
>>>>> The guest driver should be able to restore resources after resume.
>>>> Thank you for your suggestion!
>>>> As far as I know, resources are created on host side and guest has no backup, if resources are destroyed, guest can't restore them.
>>>> Or do you mean guest driver need to send commands to re-create resources after resume?
>>> The later.  The guest driver knows which resources it has created,
>>> it can restore them after suspend.
>> Are you sure that this is viable?
>>
>> How would you propose that a guest kernel could reproduce a resource,
>> including pixel data upload during a resume?
>>
>> The kernel would not have any of the pixel data to transfer to host.
> Depends on the of resource type.  For resources which are created by
> uploading pixel data (using VIRTIO_GPU_CMD_TRANSFER_TO_HOST_*) a guest
> mirror exists which can be used for re-upload.

unfortunately this is not always the case.

https://gitlab.freedesktop.org/mesa/mesa/-/blob/main/src/gallium/drivers/virgl/virgl_resource.c#L668

Often mesa will decide that it won't need to access a resource again 
after initial upload (textures etc). In this case, if it is able to copy 
back from host if needed, it will not maintain the guest shadow copy. 
Instead it will create a single page proxy object. The transfer to host 
will then over fill it to the correct size.

I think this was a fairly huge optimization for them.

>
> For resources filled by gl rendering ops this is indeed not the case.
>
>> Could you explain how you anticipate the guest being able to reproduce the
>> resources please?
> Same you do on physical hardware?  Suspend can poweroff your PCI
> devices, so there must be some standard way to handle that situation
> for resources stored in gpu device memory, which is very similar to
> the problem we have here.

In traditional PCI gfx card setups, TTM is used as the memory manager in 
the kernel. This is used to migrate the buffers back from VRAM to system 
pages during a suspend.

This would be suitable for use to track host blob buffers that get 
mapped to guest via the PCI BAR, though would be a significant 
re-architecting of virtio gpu driver.

It would not help with the previously mentioned proxied resources. 
Though in theory the driver could read the resources back from host to 
guest pages during suspend, this would then be potentially complicated 
by suspend time alloc failures etc.


As virtio drivers are by design paravirt drivers ,I think it is 
reasonable to accept some knowledge with and cooperation with the host 
to manage suspend/resume.

It seems to me like a lot of effort and long term maintenance to add 
support for transparent suspend/resume that would otherwise be unneeded.

Perhaps others have alternative designs for this?

>
> take care,
>    Gerd
>
Gerd Hoffmann June 21, 2023, 11:52 a.m. UTC | #13
Hi,

> As virtio drivers are by design paravirt drivers ,I think it is reasonable
> to accept some knowledge with and cooperation with the host to manage
> suspend/resume.

Fair point.

In any case this needs a feature flag, so guest and host can negotiate
whenever this is supported or not.

virtio spec needs an update for that, describing the exact behavior.

take care,
  Gerd
Kim, Dongwon June 29, 2023, 4:48 p.m. UTC | #14
On 6/21/2023 4:14 AM, Robert Beckett wrote:
>
> On 21/06/2023 09:39, Gerd Hoffmann wrote:
>> On Tue, Jun 20, 2023 at 01:26:15PM +0100, Robert Beckett wrote:
>>> On 20/06/2023 10:41, Gerd Hoffmann wrote:
>>>>     Hi,
>>>>
>>>>>> The guest driver should be able to restore resources after resume.
>>>>> Thank you for your suggestion!
>>>>> As far as I know, resources are created on host side and guest has 
>>>>> no backup, if resources are destroyed, guest can't restore them.
>>>>> Or do you mean guest driver need to send commands to re-create 
>>>>> resources after resume?
>>>> The later.  The guest driver knows which resources it has created,
>>>> it can restore them after suspend.
>>> Are you sure that this is viable?
>>>
>>> How would you propose that a guest kernel could reproduce a resource,
>>> including pixel data upload during a resume?
>>>
>>> The kernel would not have any of the pixel data to transfer to host.
>> Depends on the of resource type.  For resources which are created by
>> uploading pixel data (using VIRTIO_GPU_CMD_TRANSFER_TO_HOST_*) a guest
>> mirror exists which can be used for re-upload.
>
> unfortunately this is not always the case.
>
> https://gitlab.freedesktop.org/mesa/mesa/-/blob/main/src/gallium/drivers/virgl/virgl_resource.c#L668 
>
>
> Often mesa will decide that it won't need to access a resource again 
> after initial upload (textures etc). In this case, if it is able to 
> copy back from host if needed, it will not maintain the guest shadow 
> copy. Instead it will create a single page proxy object. The transfer 
> to host will then over fill it to the correct size.
>
> I think this was a fairly huge optimization for them.
>
I have been only focused on scanout blob so didn't think too much about 
all virgl objects but aren't all the virtio-gpu-object will be 
maintained until they are removed by the driver regardless of the type 
of data they contain? Does Mesa (virgl) remove those objects after they 
are uploaded to the host?

>>
>> For resources filled by gl rendering ops this is indeed not the case.
>>
>>> Could you explain how you anticipate the guest being able to 
>>> reproduce the
>>> resources please?
>> Same you do on physical hardware?  Suspend can poweroff your PCI
>> devices, so there must be some standard way to handle that situation
>> for resources stored in gpu device memory, which is very similar to
>> the problem we have here.
>
> In traditional PCI gfx card setups, TTM is used as the memory manager 
> in the kernel. This is used to migrate the buffers back from VRAM to 
> system pages during a suspend.
>
> This would be suitable for use to track host blob buffers that get 
> mapped to guest via the PCI BAR, though would be a significant 
> re-architecting of virtio gpu driver.
>
> It would not help with the previously mentioned proxied resources. 
> Though in theory the driver could read the resources back from host to 
> guest pages during suspend, this would then be potentially complicated 
> by suspend time alloc failures etc.
>
>
> As virtio drivers are by design paravirt drivers ,I think it is 
> reasonable to accept some knowledge with and cooperation with the host 
> to manage suspend/resume.
>
> It seems to me like a lot of effort and long term maintenance to add 
> support for transparent suspend/resume that would otherwise be unneeded.
>
> Perhaps others have alternative designs for this?
>
>>
>> take care,
>>    Gerd
>>
>
Kim, Dongwon June 29, 2023, 4:53 p.m. UTC | #15
This method - letting QEMU not remove resources would work on S3 case 
but with S4, the QEMU would lose all the resources anyway as the process 
will be terminated. So objects restoring was only option for us as

in [RFC PATCH 2/2] drm/virtio: restore virtio_gpu_objects upon suspend 
and resume (lists.freedesktop.org) 
<https://lists.freedesktop.org/archives/dri-devel/2022-September/373894.html>

But I only considered and tested cases with scanout blob resources, so 
this may not cover other resource types...

On 6/7/2023 7:56 PM, Jiqian Chen wrote:
> After suspending and resuming guest VM, you will get
> a black screen, and the display can't come back.
>
> This is because when guest did suspending, it called
> into qemu to call virtio_gpu_gl_reset. In function
> virtio_gpu_gl_reset, it destroyed resources and reset
> renderer, which were used for display. As a result,
> guest's screen can't come back to the time when it was
> suspended and only showed black.
>
> So, this patch adds a new ctrl message
> VIRTIO_GPU_CMD_STATUS_FREEZING to get notification from
> guest. If guest is during suspending, it sets freezing
> status of virtgpu to true, this will prevent destroying
> resources and resetting renderer when guest calls into
> virtio_gpu_gl_reset. If guest is during resuming, it sets
> freezing to false, and then virtio_gpu_gl_reset will keep
> its origin actions and has no other impaction.
>
> Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com>
> ---
>   hw/display/virtio-gpu-gl.c                  |  9 ++++++-
>   hw/display/virtio-gpu-virgl.c               |  3 +++
>   hw/display/virtio-gpu.c                     | 26 +++++++++++++++++++--
>   include/hw/virtio/virtio-gpu.h              |  3 +++
>   include/standard-headers/linux/virtio_gpu.h |  9 +++++++
>   5 files changed, 47 insertions(+), 3 deletions(-)
>
> diff --git a/hw/display/virtio-gpu-gl.c b/hw/display/virtio-gpu-gl.c
> index e06be60dfb..e11ad233eb 100644
> --- a/hw/display/virtio-gpu-gl.c
> +++ b/hw/display/virtio-gpu-gl.c
> @@ -100,7 +100,14 @@ static void virtio_gpu_gl_reset(VirtIODevice *vdev)
>        */
>       if (gl->renderer_inited && !gl->renderer_reset) {
>           virtio_gpu_virgl_reset_scanout(g);
> -        gl->renderer_reset = true;
> +        /*
> +         * If guest is suspending, we shouldn't reset renderer,
> +         * otherwise, the display can't come back to the time when
> +         * it was suspended after guest resumed.
> +         */
> +        if (!g->freezing) {
> +            gl->renderer_reset = true;
> +        }
>       }
>   }
>   
> diff --git a/hw/display/virtio-gpu-virgl.c b/hw/display/virtio-gpu-virgl.c
> index 73cb92c8d5..183ec92d53 100644
> --- a/hw/display/virtio-gpu-virgl.c
> +++ b/hw/display/virtio-gpu-virgl.c
> @@ -464,6 +464,9 @@ void virtio_gpu_virgl_process_cmd(VirtIOGPU *g,
>       case VIRTIO_GPU_CMD_GET_EDID:
>           virtio_gpu_get_edid(g, cmd);
>           break;
> +    case VIRTIO_GPU_CMD_STATUS_FREEZING:
> +        virtio_gpu_cmd_status_freezing(g, cmd);
> +        break;
>       default:
>           cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
>           break;
> diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
> index 5e15c79b94..8f235d7848 100644
> --- a/hw/display/virtio-gpu.c
> +++ b/hw/display/virtio-gpu.c
> @@ -373,6 +373,16 @@ static void virtio_gpu_resource_create_blob(VirtIOGPU *g,
>       QTAILQ_INSERT_HEAD(&g->reslist, res, next);
>   }
>   
> +void virtio_gpu_cmd_status_freezing(VirtIOGPU *g,
> +                         struct virtio_gpu_ctrl_command *cmd)
> +{
> +    struct virtio_gpu_status_freezing sf;
> +
> +    VIRTIO_GPU_FILL_CMD(sf);
> +    virtio_gpu_bswap_32(&sf, sizeof(sf));
> +    g->freezing = sf.freezing;
> +}
> +
>   static void virtio_gpu_disable_scanout(VirtIOGPU *g, int scanout_id)
>   {
>       struct virtio_gpu_scanout *scanout = &g->parent_obj.scanout[scanout_id];
> @@ -986,6 +996,9 @@ void virtio_gpu_simple_process_cmd(VirtIOGPU *g,
>       case VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING:
>           virtio_gpu_resource_detach_backing(g, cmd);
>           break;
> +    case VIRTIO_GPU_CMD_STATUS_FREEZING:
> +        virtio_gpu_cmd_status_freezing(g, cmd);
> +        break;
>       default:
>           cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
>           break;
> @@ -1344,6 +1357,8 @@ void virtio_gpu_device_realize(DeviceState *qdev, Error **errp)
>       QTAILQ_INIT(&g->reslist);
>       QTAILQ_INIT(&g->cmdq);
>       QTAILQ_INIT(&g->fenceq);
> +
> +    g->freezing = false;
>   }
>   
>   void virtio_gpu_reset(VirtIODevice *vdev)
> @@ -1352,8 +1367,15 @@ void virtio_gpu_reset(VirtIODevice *vdev)
>       struct virtio_gpu_simple_resource *res, *tmp;
>       struct virtio_gpu_ctrl_command *cmd;
>   
> -    QTAILQ_FOREACH_SAFE(res, &g->reslist, next, tmp) {
> -        virtio_gpu_resource_destroy(g, res);
> +    /*
> +     * If guest is suspending, we shouldn't destroy resources,
> +     * otherwise, the display can't come back to the time when
> +     * it was suspended after guest resumed.
> +     */
> +    if (!g->freezing) {
> +        QTAILQ_FOREACH_SAFE(res, &g->reslist, next, tmp) {
> +            virtio_gpu_resource_destroy(g, res);
> +        }
>       }
>   
>       while (!QTAILQ_EMPTY(&g->cmdq)) {
> diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h
> index 2e28507efe..c21c2990fb 100644
> --- a/include/hw/virtio/virtio-gpu.h
> +++ b/include/hw/virtio/virtio-gpu.h
> @@ -173,6 +173,7 @@ struct VirtIOGPU {
>   
>       uint64_t hostmem;
>   
> +    bool freezing;
>       bool processing_cmdq;
>       QEMUTimer *fence_poll;
>       QEMUTimer *print_stats;
> @@ -284,5 +285,7 @@ void virtio_gpu_virgl_reset_scanout(VirtIOGPU *g);
>   void virtio_gpu_virgl_reset(VirtIOGPU *g);
>   int virtio_gpu_virgl_init(VirtIOGPU *g);
>   int virtio_gpu_virgl_get_num_capsets(VirtIOGPU *g);
> +void virtio_gpu_cmd_status_freezing(VirtIOGPU *g,
> +                         struct virtio_gpu_ctrl_command *cmd);
>   
>   #endif
> diff --git a/include/standard-headers/linux/virtio_gpu.h b/include/standard-headers/linux/virtio_gpu.h
> index 2da48d3d4c..aefffbd751 100644
> --- a/include/standard-headers/linux/virtio_gpu.h
> +++ b/include/standard-headers/linux/virtio_gpu.h
> @@ -116,6 +116,9 @@ enum virtio_gpu_ctrl_type {
>   	VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID,
>   	VIRTIO_GPU_RESP_ERR_INVALID_CONTEXT_ID,
>   	VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER,
> +
> +	/* status */
> +	VIRTIO_GPU_CMD_STATUS_FREEZING = 0x1300,
>   };
>   
>   enum virtio_gpu_shm_id {
> @@ -453,4 +456,10 @@ struct virtio_gpu_resource_unmap_blob {
>   	uint32_t padding;
>   };
>   
> +/* VIRTIO_GPU_CMD_STATUS_FREEZING */
> +struct virtio_gpu_status_freezing {
> +	struct virtio_gpu_ctrl_hdr hdr;
> +	__u32 freezing;
> +};
> +
>   #endif
Chen, Jiqian June 30, 2023, 7:14 a.m. UTC | #16
Hi Dongwon,

On 2023/6/30 00:53, Kim, Dongwon wrote:
> This method - letting QEMU not remove resources would work on S3 case but with S4, the QEMU would lose all the resources anyway as the process will be terminated. So objects restoring was only option for us as
My patch is for S3 function on Xen. I haven't tried S4 before, I will try S4 later.

> 
> in [RFC PATCH 2/2] drm/virtio: restore virtio_gpu_objects upon suspend and resume (lists.freedesktop.org) <https://lists.freedesktop.org/archives/dri-devel/2022-September/373894.html>
> 
> But I only considered and tested cases with scanout blob resources, so this may not cover other resource types...
I tried your patch, but I can't success to resume guest and guest crashed.

> 
> On 6/7/2023 7:56 PM, Jiqian Chen wrote:
>> After suspending and resuming guest VM, you will get
>> a black screen, and the display can't come back.
>>
>> This is because when guest did suspending, it called
>> into qemu to call virtio_gpu_gl_reset. In function
>> virtio_gpu_gl_reset, it destroyed resources and reset
>> renderer, which were used for display. As a result,
>> guest's screen can't come back to the time when it was
>> suspended and only showed black.
>>
>> So, this patch adds a new ctrl message
>> VIRTIO_GPU_CMD_STATUS_FREEZING to get notification from
>> guest. If guest is during suspending, it sets freezing
>> status of virtgpu to true, this will prevent destroying
>> resources and resetting renderer when guest calls into
>> virtio_gpu_gl_reset. If guest is during resuming, it sets
>> freezing to false, and then virtio_gpu_gl_reset will keep
>> its origin actions and has no other impaction.
>>
>> Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com>
>> ---
>>   hw/display/virtio-gpu-gl.c                  |  9 ++++++-
>>   hw/display/virtio-gpu-virgl.c               |  3 +++
>>   hw/display/virtio-gpu.c                     | 26 +++++++++++++++++++--
>>   include/hw/virtio/virtio-gpu.h              |  3 +++
>>   include/standard-headers/linux/virtio_gpu.h |  9 +++++++
>>   5 files changed, 47 insertions(+), 3 deletions(-)
>>
>> diff --git a/hw/display/virtio-gpu-gl.c b/hw/display/virtio-gpu-gl.c
>> index e06be60dfb..e11ad233eb 100644
>> --- a/hw/display/virtio-gpu-gl.c
>> +++ b/hw/display/virtio-gpu-gl.c
>> @@ -100,7 +100,14 @@ static void virtio_gpu_gl_reset(VirtIODevice *vdev)
>>        */
>>       if (gl->renderer_inited && !gl->renderer_reset) {
>>           virtio_gpu_virgl_reset_scanout(g);
>> -        gl->renderer_reset = true;
>> +        /*
>> +         * If guest is suspending, we shouldn't reset renderer,
>> +         * otherwise, the display can't come back to the time when
>> +         * it was suspended after guest resumed.
>> +         */
>> +        if (!g->freezing) {
>> +            gl->renderer_reset = true;
>> +        }
>>       }
>>   }
>>   diff --git a/hw/display/virtio-gpu-virgl.c b/hw/display/virtio-gpu-virgl.c
>> index 73cb92c8d5..183ec92d53 100644
>> --- a/hw/display/virtio-gpu-virgl.c
>> +++ b/hw/display/virtio-gpu-virgl.c
>> @@ -464,6 +464,9 @@ void virtio_gpu_virgl_process_cmd(VirtIOGPU *g,
>>       case VIRTIO_GPU_CMD_GET_EDID:
>>           virtio_gpu_get_edid(g, cmd);
>>           break;
>> +    case VIRTIO_GPU_CMD_STATUS_FREEZING:
>> +        virtio_gpu_cmd_status_freezing(g, cmd);
>> +        break;
>>       default:
>>           cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
>>           break;
>> diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
>> index 5e15c79b94..8f235d7848 100644
>> --- a/hw/display/virtio-gpu.c
>> +++ b/hw/display/virtio-gpu.c
>> @@ -373,6 +373,16 @@ static void virtio_gpu_resource_create_blob(VirtIOGPU *g,
>>       QTAILQ_INSERT_HEAD(&g->reslist, res, next);
>>   }
>>   +void virtio_gpu_cmd_status_freezing(VirtIOGPU *g,
>> +                         struct virtio_gpu_ctrl_command *cmd)
>> +{
>> +    struct virtio_gpu_status_freezing sf;
>> +
>> +    VIRTIO_GPU_FILL_CMD(sf);
>> +    virtio_gpu_bswap_32(&sf, sizeof(sf));
>> +    g->freezing = sf.freezing;
>> +}
>> +
>>   static void virtio_gpu_disable_scanout(VirtIOGPU *g, int scanout_id)
>>   {
>>       struct virtio_gpu_scanout *scanout = &g->parent_obj.scanout[scanout_id];
>> @@ -986,6 +996,9 @@ void virtio_gpu_simple_process_cmd(VirtIOGPU *g,
>>       case VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING:
>>           virtio_gpu_resource_detach_backing(g, cmd);
>>           break;
>> +    case VIRTIO_GPU_CMD_STATUS_FREEZING:
>> +        virtio_gpu_cmd_status_freezing(g, cmd);
>> +        break;
>>       default:
>>           cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
>>           break;
>> @@ -1344,6 +1357,8 @@ void virtio_gpu_device_realize(DeviceState *qdev, Error **errp)
>>       QTAILQ_INIT(&g->reslist);
>>       QTAILQ_INIT(&g->cmdq);
>>       QTAILQ_INIT(&g->fenceq);
>> +
>> +    g->freezing = false;
>>   }
>>     void virtio_gpu_reset(VirtIODevice *vdev)
>> @@ -1352,8 +1367,15 @@ void virtio_gpu_reset(VirtIODevice *vdev)
>>       struct virtio_gpu_simple_resource *res, *tmp;
>>       struct virtio_gpu_ctrl_command *cmd;
>>   -    QTAILQ_FOREACH_SAFE(res, &g->reslist, next, tmp) {
>> -        virtio_gpu_resource_destroy(g, res);
>> +    /*
>> +     * If guest is suspending, we shouldn't destroy resources,
>> +     * otherwise, the display can't come back to the time when
>> +     * it was suspended after guest resumed.
>> +     */
>> +    if (!g->freezing) {
>> +        QTAILQ_FOREACH_SAFE(res, &g->reslist, next, tmp) {
>> +            virtio_gpu_resource_destroy(g, res);
>> +        }
>>       }
>>         while (!QTAILQ_EMPTY(&g->cmdq)) {
>> diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h
>> index 2e28507efe..c21c2990fb 100644
>> --- a/include/hw/virtio/virtio-gpu.h
>> +++ b/include/hw/virtio/virtio-gpu.h
>> @@ -173,6 +173,7 @@ struct VirtIOGPU {
>>         uint64_t hostmem;
>>   +    bool freezing;
>>       bool processing_cmdq;
>>       QEMUTimer *fence_poll;
>>       QEMUTimer *print_stats;
>> @@ -284,5 +285,7 @@ void virtio_gpu_virgl_reset_scanout(VirtIOGPU *g);
>>   void virtio_gpu_virgl_reset(VirtIOGPU *g);
>>   int virtio_gpu_virgl_init(VirtIOGPU *g);
>>   int virtio_gpu_virgl_get_num_capsets(VirtIOGPU *g);
>> +void virtio_gpu_cmd_status_freezing(VirtIOGPU *g,
>> +                         struct virtio_gpu_ctrl_command *cmd);
>>     #endif
>> diff --git a/include/standard-headers/linux/virtio_gpu.h b/include/standard-headers/linux/virtio_gpu.h
>> index 2da48d3d4c..aefffbd751 100644
>> --- a/include/standard-headers/linux/virtio_gpu.h
>> +++ b/include/standard-headers/linux/virtio_gpu.h
>> @@ -116,6 +116,9 @@ enum virtio_gpu_ctrl_type {
>>       VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID,
>>       VIRTIO_GPU_RESP_ERR_INVALID_CONTEXT_ID,
>>       VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER,
>> +
>> +    /* status */
>> +    VIRTIO_GPU_CMD_STATUS_FREEZING = 0x1300,
>>   };
>>     enum virtio_gpu_shm_id {
>> @@ -453,4 +456,10 @@ struct virtio_gpu_resource_unmap_blob {
>>       uint32_t padding;
>>   };
>>   +/* VIRTIO_GPU_CMD_STATUS_FREEZING */
>> +struct virtio_gpu_status_freezing {
>> +    struct virtio_gpu_ctrl_hdr hdr;
>> +    __u32 freezing;
>> +};
>> +
>>   #endif
Kim, Dongwon June 30, 2023, 4:18 p.m. UTC | #17
Hi,

On 6/30/2023 12:14 AM, Chen, Jiqian wrote:
> Hi Dongwon,
>
> On 2023/6/30 00:53, Kim, Dongwon wrote:
>> This method - letting QEMU not remove resources would work on S3 case but with S4, the QEMU would lose all the resources anyway as the process will be terminated. So objects restoring was only option for us as
> My patch is for S3 function on Xen. I haven't tried S4 before, I will try S4 later.

I understand s3 is your priority but this code path will be executed for 
s4 as well, so I think we should make sure s4 is covered as well.

>> in [RFC PATCH 2/2] drm/virtio: restore virtio_gpu_objects upon suspend and resume (lists.freedesktop.org) <https://lists.freedesktop.org/archives/dri-devel/2022-September/373894.html>
>>
>> But I only considered and tested cases with scanout blob resources, so this may not cover other resource types...
> I tried your patch, but I can't success to resume guest and guest crashed.
Hmm, probably due to some difference in the setting. Are you using blob 
guest scanout (sharing display by sharing scatter-gather list of the 
buffer for zero copy)? We may have to debug it little bit. Anyway, the 
patch I shared is based on "recovery" instead of forcing QEMU to keep 
the resources. I think this is only way to cover both S3 and S4. Why 
don't we have some time to look into this path as well?
>> On 6/7/2023 7:56 PM, Jiqian Chen wrote:
>>> After suspending and resuming guest VM, you will get
>>> a black screen, and the display can't come back.
>>>
>>> This is because when guest did suspending, it called
>>> into qemu to call virtio_gpu_gl_reset. In function
>>> virtio_gpu_gl_reset, it destroyed resources and reset
>>> renderer, which were used for display. As a result,
>>> guest's screen can't come back to the time when it was
>>> suspended and only showed black.
>>>
>>> So, this patch adds a new ctrl message
>>> VIRTIO_GPU_CMD_STATUS_FREEZING to get notification from
>>> guest. If guest is during suspending, it sets freezing
>>> status of virtgpu to true, this will prevent destroying
>>> resources and resetting renderer when guest calls into
>>> virtio_gpu_gl_reset. If guest is during resuming, it sets
>>> freezing to false, and then virtio_gpu_gl_reset will keep
>>> its origin actions and has no other impaction.
>>>
>>> Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com>
>>> ---
>>>    hw/display/virtio-gpu-gl.c                  |  9 ++++++-
>>>    hw/display/virtio-gpu-virgl.c               |  3 +++
>>>    hw/display/virtio-gpu.c                     | 26 +++++++++++++++++++--
>>>    include/hw/virtio/virtio-gpu.h              |  3 +++
>>>    include/standard-headers/linux/virtio_gpu.h |  9 +++++++
>>>    5 files changed, 47 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/hw/display/virtio-gpu-gl.c b/hw/display/virtio-gpu-gl.c
>>> index e06be60dfb..e11ad233eb 100644
>>> --- a/hw/display/virtio-gpu-gl.c
>>> +++ b/hw/display/virtio-gpu-gl.c
>>> @@ -100,7 +100,14 @@ static void virtio_gpu_gl_reset(VirtIODevice *vdev)
>>>         */
>>>        if (gl->renderer_inited && !gl->renderer_reset) {
>>>            virtio_gpu_virgl_reset_scanout(g);
>>> -        gl->renderer_reset = true;
>>> +        /*
>>> +         * If guest is suspending, we shouldn't reset renderer,
>>> +         * otherwise, the display can't come back to the time when
>>> +         * it was suspended after guest resumed.
>>> +         */
>>> +        if (!g->freezing) {
>>> +            gl->renderer_reset = true;
>>> +        }
>>>        }
>>>    }
>>>    diff --git a/hw/display/virtio-gpu-virgl.c b/hw/display/virtio-gpu-virgl.c
>>> index 73cb92c8d5..183ec92d53 100644
>>> --- a/hw/display/virtio-gpu-virgl.c
>>> +++ b/hw/display/virtio-gpu-virgl.c
>>> @@ -464,6 +464,9 @@ void virtio_gpu_virgl_process_cmd(VirtIOGPU *g,
>>>        case VIRTIO_GPU_CMD_GET_EDID:
>>>            virtio_gpu_get_edid(g, cmd);
>>>            break;
>>> +    case VIRTIO_GPU_CMD_STATUS_FREEZING:
>>> +        virtio_gpu_cmd_status_freezing(g, cmd);
>>> +        break;
>>>        default:
>>>            cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
>>>            break;
>>> diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
>>> index 5e15c79b94..8f235d7848 100644
>>> --- a/hw/display/virtio-gpu.c
>>> +++ b/hw/display/virtio-gpu.c
>>> @@ -373,6 +373,16 @@ static void virtio_gpu_resource_create_blob(VirtIOGPU *g,
>>>        QTAILQ_INSERT_HEAD(&g->reslist, res, next);
>>>    }
>>>    +void virtio_gpu_cmd_status_freezing(VirtIOGPU *g,
>>> +                         struct virtio_gpu_ctrl_command *cmd)
>>> +{
>>> +    struct virtio_gpu_status_freezing sf;
>>> +
>>> +    VIRTIO_GPU_FILL_CMD(sf);
>>> +    virtio_gpu_bswap_32(&sf, sizeof(sf));
>>> +    g->freezing = sf.freezing;
>>> +}
>>> +
>>>    static void virtio_gpu_disable_scanout(VirtIOGPU *g, int scanout_id)
>>>    {
>>>        struct virtio_gpu_scanout *scanout = &g->parent_obj.scanout[scanout_id];
>>> @@ -986,6 +996,9 @@ void virtio_gpu_simple_process_cmd(VirtIOGPU *g,
>>>        case VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING:
>>>            virtio_gpu_resource_detach_backing(g, cmd);
>>>            break;
>>> +    case VIRTIO_GPU_CMD_STATUS_FREEZING:
>>> +        virtio_gpu_cmd_status_freezing(g, cmd);
>>> +        break;
>>>        default:
>>>            cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
>>>            break;
>>> @@ -1344,6 +1357,8 @@ void virtio_gpu_device_realize(DeviceState *qdev, Error **errp)
>>>        QTAILQ_INIT(&g->reslist);
>>>        QTAILQ_INIT(&g->cmdq);
>>>        QTAILQ_INIT(&g->fenceq);
>>> +
>>> +    g->freezing = false;
>>>    }
>>>      void virtio_gpu_reset(VirtIODevice *vdev)
>>> @@ -1352,8 +1367,15 @@ void virtio_gpu_reset(VirtIODevice *vdev)
>>>        struct virtio_gpu_simple_resource *res, *tmp;
>>>        struct virtio_gpu_ctrl_command *cmd;
>>>    -    QTAILQ_FOREACH_SAFE(res, &g->reslist, next, tmp) {
>>> -        virtio_gpu_resource_destroy(g, res);
>>> +    /*
>>> +     * If guest is suspending, we shouldn't destroy resources,
>>> +     * otherwise, the display can't come back to the time when
>>> +     * it was suspended after guest resumed.
>>> +     */
>>> +    if (!g->freezing) {
>>> +        QTAILQ_FOREACH_SAFE(res, &g->reslist, next, tmp) {
>>> +            virtio_gpu_resource_destroy(g, res);
>>> +        }
>>>        }
>>>          while (!QTAILQ_EMPTY(&g->cmdq)) {
>>> diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h
>>> index 2e28507efe..c21c2990fb 100644
>>> --- a/include/hw/virtio/virtio-gpu.h
>>> +++ b/include/hw/virtio/virtio-gpu.h
>>> @@ -173,6 +173,7 @@ struct VirtIOGPU {
>>>          uint64_t hostmem;
>>>    +    bool freezing;
>>>        bool processing_cmdq;
>>>        QEMUTimer *fence_poll;
>>>        QEMUTimer *print_stats;
>>> @@ -284,5 +285,7 @@ void virtio_gpu_virgl_reset_scanout(VirtIOGPU *g);
>>>    void virtio_gpu_virgl_reset(VirtIOGPU *g);
>>>    int virtio_gpu_virgl_init(VirtIOGPU *g);
>>>    int virtio_gpu_virgl_get_num_capsets(VirtIOGPU *g);
>>> +void virtio_gpu_cmd_status_freezing(VirtIOGPU *g,
>>> +                         struct virtio_gpu_ctrl_command *cmd);
>>>      #endif
>>> diff --git a/include/standard-headers/linux/virtio_gpu.h b/include/standard-headers/linux/virtio_gpu.h
>>> index 2da48d3d4c..aefffbd751 100644
>>> --- a/include/standard-headers/linux/virtio_gpu.h
>>> +++ b/include/standard-headers/linux/virtio_gpu.h
>>> @@ -116,6 +116,9 @@ enum virtio_gpu_ctrl_type {
>>>        VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID,
>>>        VIRTIO_GPU_RESP_ERR_INVALID_CONTEXT_ID,
>>>        VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER,
>>> +
>>> +    /* status */
>>> +    VIRTIO_GPU_CMD_STATUS_FREEZING = 0x1300,
>>>    };
>>>      enum virtio_gpu_shm_id {
>>> @@ -453,4 +456,10 @@ struct virtio_gpu_resource_unmap_blob {
>>>        uint32_t padding;
>>>    };
>>>    +/* VIRTIO_GPU_CMD_STATUS_FREEZING */
>>> +struct virtio_gpu_status_freezing {
>>> +    struct virtio_gpu_ctrl_hdr hdr;
>>> +    __u32 freezing;
>>> +};
>>> +
>>>    #endif
diff mbox series

Patch

diff --git a/hw/display/virtio-gpu-gl.c b/hw/display/virtio-gpu-gl.c
index e06be60dfb..e11ad233eb 100644
--- a/hw/display/virtio-gpu-gl.c
+++ b/hw/display/virtio-gpu-gl.c
@@ -100,7 +100,14 @@  static void virtio_gpu_gl_reset(VirtIODevice *vdev)
      */
     if (gl->renderer_inited && !gl->renderer_reset) {
         virtio_gpu_virgl_reset_scanout(g);
-        gl->renderer_reset = true;
+        /*
+         * If guest is suspending, we shouldn't reset renderer,
+         * otherwise, the display can't come back to the time when
+         * it was suspended after guest resumed.
+         */
+        if (!g->freezing) {
+            gl->renderer_reset = true;
+        }
     }
 }
 
diff --git a/hw/display/virtio-gpu-virgl.c b/hw/display/virtio-gpu-virgl.c
index 73cb92c8d5..183ec92d53 100644
--- a/hw/display/virtio-gpu-virgl.c
+++ b/hw/display/virtio-gpu-virgl.c
@@ -464,6 +464,9 @@  void virtio_gpu_virgl_process_cmd(VirtIOGPU *g,
     case VIRTIO_GPU_CMD_GET_EDID:
         virtio_gpu_get_edid(g, cmd);
         break;
+    case VIRTIO_GPU_CMD_STATUS_FREEZING:
+        virtio_gpu_cmd_status_freezing(g, cmd);
+        break;
     default:
         cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
         break;
diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
index 5e15c79b94..8f235d7848 100644
--- a/hw/display/virtio-gpu.c
+++ b/hw/display/virtio-gpu.c
@@ -373,6 +373,16 @@  static void virtio_gpu_resource_create_blob(VirtIOGPU *g,
     QTAILQ_INSERT_HEAD(&g->reslist, res, next);
 }
 
+void virtio_gpu_cmd_status_freezing(VirtIOGPU *g,
+                         struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_status_freezing sf;
+
+    VIRTIO_GPU_FILL_CMD(sf);
+    virtio_gpu_bswap_32(&sf, sizeof(sf));
+    g->freezing = sf.freezing;
+}
+
 static void virtio_gpu_disable_scanout(VirtIOGPU *g, int scanout_id)
 {
     struct virtio_gpu_scanout *scanout = &g->parent_obj.scanout[scanout_id];
@@ -986,6 +996,9 @@  void virtio_gpu_simple_process_cmd(VirtIOGPU *g,
     case VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING:
         virtio_gpu_resource_detach_backing(g, cmd);
         break;
+    case VIRTIO_GPU_CMD_STATUS_FREEZING:
+        virtio_gpu_cmd_status_freezing(g, cmd);
+        break;
     default:
         cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
         break;
@@ -1344,6 +1357,8 @@  void virtio_gpu_device_realize(DeviceState *qdev, Error **errp)
     QTAILQ_INIT(&g->reslist);
     QTAILQ_INIT(&g->cmdq);
     QTAILQ_INIT(&g->fenceq);
+
+    g->freezing = false;
 }
 
 void virtio_gpu_reset(VirtIODevice *vdev)
@@ -1352,8 +1367,15 @@  void virtio_gpu_reset(VirtIODevice *vdev)
     struct virtio_gpu_simple_resource *res, *tmp;
     struct virtio_gpu_ctrl_command *cmd;
 
-    QTAILQ_FOREACH_SAFE(res, &g->reslist, next, tmp) {
-        virtio_gpu_resource_destroy(g, res);
+    /*
+     * If guest is suspending, we shouldn't destroy resources,
+     * otherwise, the display can't come back to the time when
+     * it was suspended after guest resumed.
+     */
+    if (!g->freezing) {
+        QTAILQ_FOREACH_SAFE(res, &g->reslist, next, tmp) {
+            virtio_gpu_resource_destroy(g, res);
+        }
     }
 
     while (!QTAILQ_EMPTY(&g->cmdq)) {
diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h
index 2e28507efe..c21c2990fb 100644
--- a/include/hw/virtio/virtio-gpu.h
+++ b/include/hw/virtio/virtio-gpu.h
@@ -173,6 +173,7 @@  struct VirtIOGPU {
 
     uint64_t hostmem;
 
+    bool freezing;
     bool processing_cmdq;
     QEMUTimer *fence_poll;
     QEMUTimer *print_stats;
@@ -284,5 +285,7 @@  void virtio_gpu_virgl_reset_scanout(VirtIOGPU *g);
 void virtio_gpu_virgl_reset(VirtIOGPU *g);
 int virtio_gpu_virgl_init(VirtIOGPU *g);
 int virtio_gpu_virgl_get_num_capsets(VirtIOGPU *g);
+void virtio_gpu_cmd_status_freezing(VirtIOGPU *g,
+                         struct virtio_gpu_ctrl_command *cmd);
 
 #endif
diff --git a/include/standard-headers/linux/virtio_gpu.h b/include/standard-headers/linux/virtio_gpu.h
index 2da48d3d4c..aefffbd751 100644
--- a/include/standard-headers/linux/virtio_gpu.h
+++ b/include/standard-headers/linux/virtio_gpu.h
@@ -116,6 +116,9 @@  enum virtio_gpu_ctrl_type {
 	VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID,
 	VIRTIO_GPU_RESP_ERR_INVALID_CONTEXT_ID,
 	VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER,
+
+	/* status */
+	VIRTIO_GPU_CMD_STATUS_FREEZING = 0x1300,
 };
 
 enum virtio_gpu_shm_id {
@@ -453,4 +456,10 @@  struct virtio_gpu_resource_unmap_blob {
 	uint32_t padding;
 };
 
+/* VIRTIO_GPU_CMD_STATUS_FREEZING */
+struct virtio_gpu_status_freezing {
+	struct virtio_gpu_ctrl_hdr hdr;
+	__u32 freezing;
+};
+
 #endif