diff mbox series

[v2,1/2] virtio-pmem: Support PCI BAR-relative addresses

Message ID 20210715223505.GA29329@linuxonhyperv3.guj3yctzbm1etfxqx2vob5hsef.xx.internal.cloudapp.net (mailing list archive)
State New, archived
Headers show
Series [v2,1/2] virtio-pmem: Support PCI BAR-relative addresses | expand

Commit Message

Taylor Stark July 15, 2021, 10:35 p.m. UTC
Update virtio-pmem to allow for the pmem region to be specified in either
guest absolute terms or as a PCI BAR-relative address. This is required
to support virtio-pmem in Hyper-V, since Hyper-V only allows PCI devices
to operate on PCI memory ranges defined via BARs.

Virtio-pmem will check for a shared memory window and use that if found,
else it will fallback to using the guest absolute addresses in
virtio_pmem_config. This was chosen over defining a new feature bit,
since it's similar to how virtio-fs is configured.

Signed-off-by: Taylor Stark <tstark@microsoft.com>
---
 drivers/nvdimm/virtio_pmem.c | 21 +++++++++++++++++----
 drivers/nvdimm/virtio_pmem.h |  3 +++
 2 files changed, 20 insertions(+), 4 deletions(-)

Comments

Pankaj Gupta July 19, 2021, 8:36 p.m. UTC | #1
> Update virtio-pmem to allow for the pmem region to be specified in either
> guest absolute terms or as a PCI BAR-relative address. This is required
> to support virtio-pmem in Hyper-V, since Hyper-V only allows PCI devices
> to operate on PCI memory ranges defined via BARs.
>
> Virtio-pmem will check for a shared memory window and use that if found,
> else it will fallback to using the guest absolute addresses in
> virtio_pmem_config. This was chosen over defining a new feature bit,
> since it's similar to how virtio-fs is configured.
>
> Signed-off-by: Taylor Stark <tstark@microsoft.com>
> ---
>  drivers/nvdimm/virtio_pmem.c | 21 +++++++++++++++++----
>  drivers/nvdimm/virtio_pmem.h |  3 +++
>  2 files changed, 20 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/nvdimm/virtio_pmem.c b/drivers/nvdimm/virtio_pmem.c
> index 726c7354d465..43c1d835a449 100644
> --- a/drivers/nvdimm/virtio_pmem.c
> +++ b/drivers/nvdimm/virtio_pmem.c
> @@ -37,6 +37,8 @@ static int virtio_pmem_probe(struct virtio_device *vdev)
>         struct virtio_pmem *vpmem;
>         struct resource res;
>         int err = 0;
> +       bool have_shm_region;
> +       struct virtio_shm_region pmem_region;
>
>         if (!vdev->config->get) {
>                 dev_err(&vdev->dev, "%s failure: config access disabled\n",
> @@ -58,10 +60,21 @@ static int virtio_pmem_probe(struct virtio_device *vdev)
>                 goto out_err;
>         }
>
> -       virtio_cread_le(vpmem->vdev, struct virtio_pmem_config,
> -                       start, &vpmem->start);
> -       virtio_cread_le(vpmem->vdev, struct virtio_pmem_config,
> -                       size, &vpmem->size);
> +       /* Retrieve the pmem device's address and size. It may have been supplied
> +        * as a PCI BAR-relative shared memory region, or as a guest absolute address.
> +        */
> +       have_shm_region = virtio_get_shm_region(vpmem->vdev, &pmem_region,
> +                                               VIRTIO_PMEM_SHMCAP_ID_PMEM_REGION);

Current implementation of Virtio pmem device in Qemu does not expose
it as PCI BAR.
So, can't test it. Just curious if device side implementation is also
tested for asynchronous
flush case?

Thanks,
Pankaj

> +
> +       if (have_shm_region) {
> +               vpmem->start = pmem_region.addr;
> +               vpmem->size = pmem_region.len;
> +       } else {
> +               virtio_cread_le(vpmem->vdev, struct virtio_pmem_config,
> +                               start, &vpmem->start);
> +               virtio_cread_le(vpmem->vdev, struct virtio_pmem_config,
> +                               size, &vpmem->size);
> +       }
>
>         res.start = vpmem->start;
>         res.end   = vpmem->start + vpmem->size - 1;
> diff --git a/drivers/nvdimm/virtio_pmem.h b/drivers/nvdimm/virtio_pmem.h
> index 0dddefe594c4..62bb564e81cb 100644
> --- a/drivers/nvdimm/virtio_pmem.h
> +++ b/drivers/nvdimm/virtio_pmem.h
> @@ -50,6 +50,9 @@ struct virtio_pmem {
>         __u64 size;
>  };
>
> +/* For the id field in virtio_pci_shm_cap */
> +#define VIRTIO_PMEM_SHMCAP_ID_PMEM_REGION 0
> +
>  void virtio_pmem_host_ack(struct virtqueue *vq);
>  int async_pmem_flush(struct nd_region *nd_region, struct bio *bio);
>  #endif
> --
> 2.32.0
>
>
Michael S. Tsirkin July 19, 2021, 9:17 p.m. UTC | #2
On Thu, Jul 15, 2021 at 03:35:05PM -0700, Taylor Stark wrote:
> Update virtio-pmem to allow for the pmem region to be specified in either
> guest absolute terms or as a PCI BAR-relative address. This is required
> to support virtio-pmem in Hyper-V, since Hyper-V only allows PCI devices
> to operate on PCI memory ranges defined via BARs.
> 
> Virtio-pmem will check for a shared memory window and use that if found,
> else it will fallback to using the guest absolute addresses in
> virtio_pmem_config. This was chosen over defining a new feature bit,
> since it's similar to how virtio-fs is configured.
> 
> Signed-off-by: Taylor Stark <tstark@microsoft.com>

This needs to be added to the device spec too.
Can you send a spec patch please?
It's a subscriber-only list virtio-comment@lists.oasis-open.org


> ---
>  drivers/nvdimm/virtio_pmem.c | 21 +++++++++++++++++----
>  drivers/nvdimm/virtio_pmem.h |  3 +++
>  2 files changed, 20 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/nvdimm/virtio_pmem.c b/drivers/nvdimm/virtio_pmem.c
> index 726c7354d465..43c1d835a449 100644
> --- a/drivers/nvdimm/virtio_pmem.c
> +++ b/drivers/nvdimm/virtio_pmem.c
> @@ -37,6 +37,8 @@ static int virtio_pmem_probe(struct virtio_device *vdev)
>  	struct virtio_pmem *vpmem;
>  	struct resource res;
>  	int err = 0;
> +	bool have_shm_region;
> +	struct virtio_shm_region pmem_region;
>  
>  	if (!vdev->config->get) {
>  		dev_err(&vdev->dev, "%s failure: config access disabled\n",
> @@ -58,10 +60,21 @@ static int virtio_pmem_probe(struct virtio_device *vdev)
>  		goto out_err;
>  	}
>  
> -	virtio_cread_le(vpmem->vdev, struct virtio_pmem_config,
> -			start, &vpmem->start);
> -	virtio_cread_le(vpmem->vdev, struct virtio_pmem_config,
> -			size, &vpmem->size);
> +	/* Retrieve the pmem device's address and size. It may have been supplied
> +	 * as a PCI BAR-relative shared memory region, or as a guest absolute address.
> +	 */
> +	have_shm_region = virtio_get_shm_region(vpmem->vdev, &pmem_region,
> +						VIRTIO_PMEM_SHMCAP_ID_PMEM_REGION);
> +
> +	if (have_shm_region) {
> +		vpmem->start = pmem_region.addr;
> +		vpmem->size = pmem_region.len;
> +	} else {
> +		virtio_cread_le(vpmem->vdev, struct virtio_pmem_config,
> +				start, &vpmem->start);
> +		virtio_cread_le(vpmem->vdev, struct virtio_pmem_config,
> +				size, &vpmem->size);
> +	}
>  
>  	res.start = vpmem->start;
>  	res.end   = vpmem->start + vpmem->size - 1;
> diff --git a/drivers/nvdimm/virtio_pmem.h b/drivers/nvdimm/virtio_pmem.h
> index 0dddefe594c4..62bb564e81cb 100644
> --- a/drivers/nvdimm/virtio_pmem.h
> +++ b/drivers/nvdimm/virtio_pmem.h
> @@ -50,6 +50,9 @@ struct virtio_pmem {
>  	__u64 size;
>  };
>  
> +/* For the id field in virtio_pci_shm_cap */
> +#define VIRTIO_PMEM_SHMCAP_ID_PMEM_REGION 0
> +
>  void virtio_pmem_host_ack(struct virtqueue *vq);
>  int async_pmem_flush(struct nd_region *nd_region, struct bio *bio);
>  #endif
> -- 
> 2.32.0
> 
>
Taylor Stark July 20, 2021, 6:35 a.m. UTC | #3
On Mon, Jul 19, 2021 at 10:36:34PM +0200, Pankaj Gupta wrote:
> > Update virtio-pmem to allow for the pmem region to be specified in either
> > guest absolute terms or as a PCI BAR-relative address. This is required
> > to support virtio-pmem in Hyper-V, since Hyper-V only allows PCI devices
> > to operate on PCI memory ranges defined via BARs.
> >
> > Virtio-pmem will check for a shared memory window and use that if found,
> > else it will fallback to using the guest absolute addresses in
> > virtio_pmem_config. This was chosen over defining a new feature bit,
> > since it's similar to how virtio-fs is configured.
> >
> > Signed-off-by: Taylor Stark <tstark@microsoft.com>
> > ---
> >  drivers/nvdimm/virtio_pmem.c | 21 +++++++++++++++++----
> >  drivers/nvdimm/virtio_pmem.h |  3 +++
> >  2 files changed, 20 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/nvdimm/virtio_pmem.c b/drivers/nvdimm/virtio_pmem.c
> > index 726c7354d465..43c1d835a449 100644
> > --- a/drivers/nvdimm/virtio_pmem.c
> > +++ b/drivers/nvdimm/virtio_pmem.c
> > @@ -37,6 +37,8 @@ static int virtio_pmem_probe(struct virtio_device *vdev)
> >         struct virtio_pmem *vpmem;
> >         struct resource res;
> >         int err = 0;
> > +       bool have_shm_region;
> > +       struct virtio_shm_region pmem_region;
> >
> >         if (!vdev->config->get) {
> >                 dev_err(&vdev->dev, "%s failure: config access disabled\n",
> > @@ -58,10 +60,21 @@ static int virtio_pmem_probe(struct virtio_device *vdev)
> >                 goto out_err;
> >         }
> >
> > -       virtio_cread_le(vpmem->vdev, struct virtio_pmem_config,
> > -                       start, &vpmem->start);
> > -       virtio_cread_le(vpmem->vdev, struct virtio_pmem_config,
> > -                       size, &vpmem->size);
> > +       /* Retrieve the pmem device's address and size. It may have been supplied
> > +        * as a PCI BAR-relative shared memory region, or as a guest absolute address.
> > +        */
> > +       have_shm_region = virtio_get_shm_region(vpmem->vdev, &pmem_region,
> > +                                               VIRTIO_PMEM_SHMCAP_ID_PMEM_REGION);
> 
> Current implementation of Virtio pmem device in Qemu does not expose
> it as PCI BAR.
> So, can't test it. Just curious if device side implementation is also
> tested for asynchronous
> flush case?
> 
> Thanks,
> Pankaj

Yes, I tested the async flush case as well. We basically call
FlushFileBuffers on the backing file, which is Windows' equivalent of
fsync. I also briefly tested with qemu to ensure that still works with
the patch.

Thanks,
Taylor
 
> > +
> > +       if (have_shm_region) {
> > +               vpmem->start = pmem_region.addr;
> > +               vpmem->size = pmem_region.len;
> > +       } else {
> > +               virtio_cread_le(vpmem->vdev, struct virtio_pmem_config,
> > +                               start, &vpmem->start);
> > +               virtio_cread_le(vpmem->vdev, struct virtio_pmem_config,
> > +                               size, &vpmem->size);
> > +       }
> >
> >         res.start = vpmem->start;
> >         res.end   = vpmem->start + vpmem->size - 1;
> > diff --git a/drivers/nvdimm/virtio_pmem.h b/drivers/nvdimm/virtio_pmem.h
> > index 0dddefe594c4..62bb564e81cb 100644
> > --- a/drivers/nvdimm/virtio_pmem.h
> > +++ b/drivers/nvdimm/virtio_pmem.h
> > @@ -50,6 +50,9 @@ struct virtio_pmem {
> >         __u64 size;
> >  };
> >
> > +/* For the id field in virtio_pci_shm_cap */
> > +#define VIRTIO_PMEM_SHMCAP_ID_PMEM_REGION 0
> > +
> >  void virtio_pmem_host_ack(struct virtqueue *vq);
> >  int async_pmem_flush(struct nd_region *nd_region, struct bio *bio);
> >  #endif
> > --
> > 2.32.0
> >
> >
Taylor Stark July 20, 2021, 6:41 a.m. UTC | #4
On Mon, Jul 19, 2021 at 05:17:00PM -0400, Michael S. Tsirkin wrote:
> On Thu, Jul 15, 2021 at 03:35:05PM -0700, Taylor Stark wrote:
> > Update virtio-pmem to allow for the pmem region to be specified in either
> > guest absolute terms or as a PCI BAR-relative address. This is required
> > to support virtio-pmem in Hyper-V, since Hyper-V only allows PCI devices
> > to operate on PCI memory ranges defined via BARs.
> > 
> > Virtio-pmem will check for a shared memory window and use that if found,
> > else it will fallback to using the guest absolute addresses in
> > virtio_pmem_config. This was chosen over defining a new feature bit,
> > since it's similar to how virtio-fs is configured.
> > 
> > Signed-off-by: Taylor Stark <tstark@microsoft.com>
> 
> This needs to be added to the device spec too.
> Can you send a spec patch please?
> It's a subscriber-only list virtio-comment@lists.oasis-open.org

Absolutely! I tried looking on the virtio-spec repo on github but
I couldn't find a spec for virtio-pmem to update. There is this
issue (https://github.com/oasis-tcs/virtio-spec/issues/78) which
seems to indicate the virtio-pmem spec hasn't been added yet.
Any suggestions for where to send a patch?

Thanks,
Taylor 
 
> > ---
> >  drivers/nvdimm/virtio_pmem.c | 21 +++++++++++++++++----
> >  drivers/nvdimm/virtio_pmem.h |  3 +++
> >  2 files changed, 20 insertions(+), 4 deletions(-)
> > 
> > diff --git a/drivers/nvdimm/virtio_pmem.c b/drivers/nvdimm/virtio_pmem.c
> > index 726c7354d465..43c1d835a449 100644
> > --- a/drivers/nvdimm/virtio_pmem.c
> > +++ b/drivers/nvdimm/virtio_pmem.c
> > @@ -37,6 +37,8 @@ static int virtio_pmem_probe(struct virtio_device *vdev)
> >  	struct virtio_pmem *vpmem;
> >  	struct resource res;
> >  	int err = 0;
> > +	bool have_shm_region;
> > +	struct virtio_shm_region pmem_region;
> >  
> >  	if (!vdev->config->get) {
> >  		dev_err(&vdev->dev, "%s failure: config access disabled\n",
> > @@ -58,10 +60,21 @@ static int virtio_pmem_probe(struct virtio_device *vdev)
> >  		goto out_err;
> >  	}
> >  
> > -	virtio_cread_le(vpmem->vdev, struct virtio_pmem_config,
> > -			start, &vpmem->start);
> > -	virtio_cread_le(vpmem->vdev, struct virtio_pmem_config,
> > -			size, &vpmem->size);
> > +	/* Retrieve the pmem device's address and size. It may have been supplied
> > +	 * as a PCI BAR-relative shared memory region, or as a guest absolute address.
> > +	 */
> > +	have_shm_region = virtio_get_shm_region(vpmem->vdev, &pmem_region,
> > +						VIRTIO_PMEM_SHMCAP_ID_PMEM_REGION);
> > +
> > +	if (have_shm_region) {
> > +		vpmem->start = pmem_region.addr;
> > +		vpmem->size = pmem_region.len;
> > +	} else {
> > +		virtio_cread_le(vpmem->vdev, struct virtio_pmem_config,
> > +				start, &vpmem->start);
> > +		virtio_cread_le(vpmem->vdev, struct virtio_pmem_config,
> > +				size, &vpmem->size);
> > +	}
> >  
> >  	res.start = vpmem->start;
> >  	res.end   = vpmem->start + vpmem->size - 1;
> > diff --git a/drivers/nvdimm/virtio_pmem.h b/drivers/nvdimm/virtio_pmem.h
> > index 0dddefe594c4..62bb564e81cb 100644
> > --- a/drivers/nvdimm/virtio_pmem.h
> > +++ b/drivers/nvdimm/virtio_pmem.h
> > @@ -50,6 +50,9 @@ struct virtio_pmem {
> >  	__u64 size;
> >  };
> >  
> > +/* For the id field in virtio_pci_shm_cap */
> > +#define VIRTIO_PMEM_SHMCAP_ID_PMEM_REGION 0
> > +
> >  void virtio_pmem_host_ack(struct virtqueue *vq);
> >  int async_pmem_flush(struct nd_region *nd_region, struct bio *bio);
> >  #endif
> > -- 
> > 2.32.0
Pankaj Gupta July 20, 2021, 6:51 a.m. UTC | #5
> > >
> > > -       virtio_cread_le(vpmem->vdev, struct virtio_pmem_config,
> > > -                       start, &vpmem->start);
> > > -       virtio_cread_le(vpmem->vdev, struct virtio_pmem_config,
> > > -                       size, &vpmem->size);
> > > +       /* Retrieve the pmem device's address and size. It may have been supplied
> > > +        * as a PCI BAR-relative shared memory region, or as a guest absolute address.
> > > +        */
> > > +       have_shm_region = virtio_get_shm_region(vpmem->vdev, &pmem_region,
> > > +                                               VIRTIO_PMEM_SHMCAP_ID_PMEM_REGION);
> >
> > Current implementation of Virtio pmem device in Qemu does not expose
> > it as PCI BAR.
> > So, can't test it. Just curious if device side implementation is also
> > tested for asynchronous
> > flush case?
> >
> > Thanks,
> > Pankaj
>
> Yes, I tested the async flush case as well. We basically call
> FlushFileBuffers on the backing file, which is Windows' equivalent of
> fsync. I also briefly tested with qemu to ensure that still works with
> the patch.

Thank you for the confirmation. This sounds really good.
I am also getting back to pending items for virtio-pmem.

On a side question: Do you guys have any or plan for Windows guest
implementation
for virtio-pmem?

Thanks,
Pankaj
Michael S. Tsirkin July 20, 2021, 9:46 a.m. UTC | #6
On Mon, Jul 19, 2021 at 11:41:03PM -0700, Taylor Stark wrote:
> On Mon, Jul 19, 2021 at 05:17:00PM -0400, Michael S. Tsirkin wrote:
> > On Thu, Jul 15, 2021 at 03:35:05PM -0700, Taylor Stark wrote:
> > > Update virtio-pmem to allow for the pmem region to be specified in either
> > > guest absolute terms or as a PCI BAR-relative address. This is required
> > > to support virtio-pmem in Hyper-V, since Hyper-V only allows PCI devices
> > > to operate on PCI memory ranges defined via BARs.
> > > 
> > > Virtio-pmem will check for a shared memory window and use that if found,
> > > else it will fallback to using the guest absolute addresses in
> > > virtio_pmem_config. This was chosen over defining a new feature bit,
> > > since it's similar to how virtio-fs is configured.
> > > 
> > > Signed-off-by: Taylor Stark <tstark@microsoft.com>
> > 
> > This needs to be added to the device spec too.
> > Can you send a spec patch please?
> > It's a subscriber-only list virtio-comment@lists.oasis-open.org
> 
> Absolutely! I tried looking on the virtio-spec repo on github but
> I couldn't find a spec for virtio-pmem to update. There is this
> issue (https://github.com/oasis-tcs/virtio-spec/issues/78) which
> seems to indicate the virtio-pmem spec hasn't been added yet.
> Any suggestions for where to send a patch?
> 
> Thanks,
> Taylor 

Just apply that patch (whichever you think is right)
and do yours on top and indicate this in the email.
Send it to virtio-comments.

> > > ---
> > >  drivers/nvdimm/virtio_pmem.c | 21 +++++++++++++++++----
> > >  drivers/nvdimm/virtio_pmem.h |  3 +++
> > >  2 files changed, 20 insertions(+), 4 deletions(-)
> > > 
> > > diff --git a/drivers/nvdimm/virtio_pmem.c b/drivers/nvdimm/virtio_pmem.c
> > > index 726c7354d465..43c1d835a449 100644
> > > --- a/drivers/nvdimm/virtio_pmem.c
> > > +++ b/drivers/nvdimm/virtio_pmem.c
> > > @@ -37,6 +37,8 @@ static int virtio_pmem_probe(struct virtio_device *vdev)
> > >  	struct virtio_pmem *vpmem;
> > >  	struct resource res;
> > >  	int err = 0;
> > > +	bool have_shm_region;
> > > +	struct virtio_shm_region pmem_region;
> > >  
> > >  	if (!vdev->config->get) {
> > >  		dev_err(&vdev->dev, "%s failure: config access disabled\n",
> > > @@ -58,10 +60,21 @@ static int virtio_pmem_probe(struct virtio_device *vdev)
> > >  		goto out_err;
> > >  	}
> > >  
> > > -	virtio_cread_le(vpmem->vdev, struct virtio_pmem_config,
> > > -			start, &vpmem->start);
> > > -	virtio_cread_le(vpmem->vdev, struct virtio_pmem_config,
> > > -			size, &vpmem->size);
> > > +	/* Retrieve the pmem device's address and size. It may have been supplied
> > > +	 * as a PCI BAR-relative shared memory region, or as a guest absolute address.
> > > +	 */
> > > +	have_shm_region = virtio_get_shm_region(vpmem->vdev, &pmem_region,
> > > +						VIRTIO_PMEM_SHMCAP_ID_PMEM_REGION);
> > > +
> > > +	if (have_shm_region) {
> > > +		vpmem->start = pmem_region.addr;
> > > +		vpmem->size = pmem_region.len;
> > > +	} else {
> > > +		virtio_cread_le(vpmem->vdev, struct virtio_pmem_config,
> > > +				start, &vpmem->start);
> > > +		virtio_cread_le(vpmem->vdev, struct virtio_pmem_config,
> > > +				size, &vpmem->size);
> > > +	}
> > >  
> > >  	res.start = vpmem->start;
> > >  	res.end   = vpmem->start + vpmem->size - 1;
> > > diff --git a/drivers/nvdimm/virtio_pmem.h b/drivers/nvdimm/virtio_pmem.h
> > > index 0dddefe594c4..62bb564e81cb 100644
> > > --- a/drivers/nvdimm/virtio_pmem.h
> > > +++ b/drivers/nvdimm/virtio_pmem.h
> > > @@ -50,6 +50,9 @@ struct virtio_pmem {
> > >  	__u64 size;
> > >  };
> > >  
> > > +/* For the id field in virtio_pci_shm_cap */
> > > +#define VIRTIO_PMEM_SHMCAP_ID_PMEM_REGION 0
> > > +
> > >  void virtio_pmem_host_ack(struct virtqueue *vq);
> > >  int async_pmem_flush(struct nd_region *nd_region, struct bio *bio);
> > >  #endif
> > > -- 
> > > 2.32.0
Taylor Stark July 21, 2021, 9:18 p.m. UTC | #7
On Tue, Jul 20, 2021 at 05:46:30AM -0400, Michael S. Tsirkin wrote:
> On Mon, Jul 19, 2021 at 11:41:03PM -0700, Taylor Stark wrote:
> > On Mon, Jul 19, 2021 at 05:17:00PM -0400, Michael S. Tsirkin wrote:
> > > On Thu, Jul 15, 2021 at 03:35:05PM -0700, Taylor Stark wrote:
> > > > Update virtio-pmem to allow for the pmem region to be specified in either
> > > > guest absolute terms or as a PCI BAR-relative address. This is required
> > > > to support virtio-pmem in Hyper-V, since Hyper-V only allows PCI devices
> > > > to operate on PCI memory ranges defined via BARs.
> > > > 
> > > > Virtio-pmem will check for a shared memory window and use that if found,
> > > > else it will fallback to using the guest absolute addresses in
> > > > virtio_pmem_config. This was chosen over defining a new feature bit,
> > > > since it's similar to how virtio-fs is configured.
> > > > 
> > > > Signed-off-by: Taylor Stark <tstark@microsoft.com>
> > > 
> > > This needs to be added to the device spec too.
> > > Can you send a spec patch please?
> > > It's a subscriber-only list virtio-comment@lists.oasis-open.org
> > 
> > Absolutely! I tried looking on the virtio-spec repo on github but
> > I couldn't find a spec for virtio-pmem to update. There is this
> > issue (https://github.com/oasis-tcs/virtio-spec/issues/78) which
> > seems to indicate the virtio-pmem spec hasn't been added yet.
> > Any suggestions for where to send a patch?
> > 
> > Thanks,
> > Taylor 
> 
> Just apply that patch (whichever you think is right)
> and do yours on top and indicate this in the email.
> Send it to virtio-comments.

Posted the patch here for those that want to follow along:
https://lists.oasis-open.org/archives/virtio-comment/202107/msg00111.html
 
> > > > ---
> > > >  drivers/nvdimm/virtio_pmem.c | 21 +++++++++++++++++----
> > > >  drivers/nvdimm/virtio_pmem.h |  3 +++
> > > >  2 files changed, 20 insertions(+), 4 deletions(-)
> > > > 
> > > > diff --git a/drivers/nvdimm/virtio_pmem.c b/drivers/nvdimm/virtio_pmem.c
> > > > index 726c7354d465..43c1d835a449 100644
> > > > --- a/drivers/nvdimm/virtio_pmem.c
> > > > +++ b/drivers/nvdimm/virtio_pmem.c
> > > > @@ -37,6 +37,8 @@ static int virtio_pmem_probe(struct virtio_device *vdev)
> > > >  	struct virtio_pmem *vpmem;
> > > >  	struct resource res;
> > > >  	int err = 0;
> > > > +	bool have_shm_region;
> > > > +	struct virtio_shm_region pmem_region;
> > > >  
> > > >  	if (!vdev->config->get) {
> > > >  		dev_err(&vdev->dev, "%s failure: config access disabled\n",
> > > > @@ -58,10 +60,21 @@ static int virtio_pmem_probe(struct virtio_device *vdev)
> > > >  		goto out_err;
> > > >  	}
> > > >  
> > > > -	virtio_cread_le(vpmem->vdev, struct virtio_pmem_config,
> > > > -			start, &vpmem->start);
> > > > -	virtio_cread_le(vpmem->vdev, struct virtio_pmem_config,
> > > > -			size, &vpmem->size);
> > > > +	/* Retrieve the pmem device's address and size. It may have been supplied
> > > > +	 * as a PCI BAR-relative shared memory region, or as a guest absolute address.
> > > > +	 */
> > > > +	have_shm_region = virtio_get_shm_region(vpmem->vdev, &pmem_region,
> > > > +						VIRTIO_PMEM_SHMCAP_ID_PMEM_REGION);
> > > > +
> > > > +	if (have_shm_region) {
> > > > +		vpmem->start = pmem_region.addr;
> > > > +		vpmem->size = pmem_region.len;
> > > > +	} else {
> > > > +		virtio_cread_le(vpmem->vdev, struct virtio_pmem_config,
> > > > +				start, &vpmem->start);
> > > > +		virtio_cread_le(vpmem->vdev, struct virtio_pmem_config,
> > > > +				size, &vpmem->size);
> > > > +	}
> > > >  
> > > >  	res.start = vpmem->start;
> > > >  	res.end   = vpmem->start + vpmem->size - 1;
> > > > diff --git a/drivers/nvdimm/virtio_pmem.h b/drivers/nvdimm/virtio_pmem.h
> > > > index 0dddefe594c4..62bb564e81cb 100644
> > > > --- a/drivers/nvdimm/virtio_pmem.h
> > > > +++ b/drivers/nvdimm/virtio_pmem.h
> > > > @@ -50,6 +50,9 @@ struct virtio_pmem {
> > > >  	__u64 size;
> > > >  };
> > > >  
> > > > +/* For the id field in virtio_pci_shm_cap */
> > > > +#define VIRTIO_PMEM_SHMCAP_ID_PMEM_REGION 0
> > > > +
> > > >  void virtio_pmem_host_ack(struct virtqueue *vq);
> > > >  int async_pmem_flush(struct nd_region *nd_region, struct bio *bio);
> > > >  #endif
> > > > -- 
> > > > 2.32.0
Taylor Stark July 21, 2021, 10:08 p.m. UTC | #8
On Tue, Jul 20, 2021 at 08:51:04AM +0200, Pankaj Gupta wrote:
> > > >
> > > > -       virtio_cread_le(vpmem->vdev, struct virtio_pmem_config,
> > > > -                       start, &vpmem->start);
> > > > -       virtio_cread_le(vpmem->vdev, struct virtio_pmem_config,
> > > > -                       size, &vpmem->size);
> > > > +       /* Retrieve the pmem device's address and size. It may have been supplied
> > > > +        * as a PCI BAR-relative shared memory region, or as a guest absolute address.
> > > > +        */
> > > > +       have_shm_region = virtio_get_shm_region(vpmem->vdev, &pmem_region,
> > > > +                                               VIRTIO_PMEM_SHMCAP_ID_PMEM_REGION);
> > >
> > > Current implementation of Virtio pmem device in Qemu does not expose
> > > it as PCI BAR.
> > > So, can't test it. Just curious if device side implementation is also
> > > tested for asynchronous
> > > flush case?
> > >
> > > Thanks,
> > > Pankaj
> >
> > Yes, I tested the async flush case as well. We basically call
> > FlushFileBuffers on the backing file, which is Windows' equivalent of
> > fsync. I also briefly tested with qemu to ensure that still works with
> > the patch.
> 
> Thank you for the confirmation. This sounds really good.
> I am also getting back to pending items for virtio-pmem.
> 
> On a side question: Do you guys have any or plan for Windows guest
> implementation
> for virtio-pmem?

Unfortunately, my team doesn't currently have any plans to add a Windows
virtio-pmem implementation. My team is primarily focused on virtualization
in client environments, which is a little different than server environments.
For our Windows-based scenarios, dynamically sized disks are important. It's
tricky to get that to work with pmem+DAX given that Windows isn't state separated.
Pankaj Gupta July 22, 2021, 4:40 a.m. UTC | #9
> > On a side question: Do you guys have any or plan for Windows guest
> > implementation
> > for virtio-pmem?
>
> Unfortunately, my team doesn't currently have any plans to add a Windows
> virtio-pmem implementation. My team is primarily focused on virtualization
> in client environments, which is a little different than server environments.
> For our Windows-based scenarios, dynamically sized disks are important. It's
> tricky to get that to work with pmem+DAX given that Windows isn't state separated.

I see. Thank you for the details.

Best regards,
Pankaj
Dan Williams Aug. 25, 2021, 12:29 a.m. UTC | #10
On Wed, Jul 21, 2021 at 3:09 PM Taylor Stark <tstark@linux.microsoft.com> wrote:
>
> On Tue, Jul 20, 2021 at 08:51:04AM +0200, Pankaj Gupta wrote:
> > > > >
> > > > > -       virtio_cread_le(vpmem->vdev, struct virtio_pmem_config,
> > > > > -                       start, &vpmem->start);
> > > > > -       virtio_cread_le(vpmem->vdev, struct virtio_pmem_config,
> > > > > -                       size, &vpmem->size);
> > > > > +       /* Retrieve the pmem device's address and size. It may have been supplied
> > > > > +        * as a PCI BAR-relative shared memory region, or as a guest absolute address.
> > > > > +        */
> > > > > +       have_shm_region = virtio_get_shm_region(vpmem->vdev, &pmem_region,
> > > > > +                                               VIRTIO_PMEM_SHMCAP_ID_PMEM_REGION);
> > > >
> > > > Current implementation of Virtio pmem device in Qemu does not expose
> > > > it as PCI BAR.
> > > > So, can't test it. Just curious if device side implementation is also
> > > > tested for asynchronous
> > > > flush case?
> > > >
> > > > Thanks,
> > > > Pankaj
> > >
> > > Yes, I tested the async flush case as well. We basically call
> > > FlushFileBuffers on the backing file, which is Windows' equivalent of
> > > fsync. I also briefly tested with qemu to ensure that still works with
> > > the patch.
> >
> > Thank you for the confirmation. This sounds really good.
> > I am also getting back to pending items for virtio-pmem.
> >
> > On a side question: Do you guys have any or plan for Windows guest
> > implementation
> > for virtio-pmem?
>
> Unfortunately, my team doesn't currently have any plans to add a Windows
> virtio-pmem implementation. My team is primarily focused on virtualization
> in client environments, which is a little different than server environments.
> For our Windows-based scenarios, dynamically sized disks are important. It's
> tricky to get that to work with pmem+DAX given that Windows isn't state separated.

Pardon me for commenting on an old thread...

What does "state separated" mean here? There's configuration
flexibility in the driver to resize persistent memory namespaces.
Taylor Stark Aug. 25, 2021, 9:46 p.m. UTC | #11
On Tue, Aug 24, 2021 at 05:29:11PM -0700, Dan Williams wrote:
> On Wed, Jul 21, 2021 at 3:09 PM Taylor Stark <tstark@linux.microsoft.com> wrote:
> >
> > On Tue, Jul 20, 2021 at 08:51:04AM +0200, Pankaj Gupta wrote:
> > > > > >
> > > > > > -       virtio_cread_le(vpmem->vdev, struct virtio_pmem_config,
> > > > > > -                       start, &vpmem->start);
> > > > > > -       virtio_cread_le(vpmem->vdev, struct virtio_pmem_config,
> > > > > > -                       size, &vpmem->size);
> > > > > > +       /* Retrieve the pmem device's address and size. It may have been supplied
> > > > > > +        * as a PCI BAR-relative shared memory region, or as a guest absolute address.
> > > > > > +        */
> > > > > > +       have_shm_region = virtio_get_shm_region(vpmem->vdev, &pmem_region,
> > > > > > +                                               VIRTIO_PMEM_SHMCAP_ID_PMEM_REGION);
> > > > >
> > > > > Current implementation of Virtio pmem device in Qemu does not expose
> > > > > it as PCI BAR.
> > > > > So, can't test it. Just curious if device side implementation is also
> > > > > tested for asynchronous
> > > > > flush case?
> > > > >
> > > > > Thanks,
> > > > > Pankaj
> > > >
> > > > Yes, I tested the async flush case as well. We basically call
> > > > FlushFileBuffers on the backing file, which is Windows' equivalent of
> > > > fsync. I also briefly tested with qemu to ensure that still works with
> > > > the patch.
> > >
> > > Thank you for the confirmation. This sounds really good.
> > > I am also getting back to pending items for virtio-pmem.
> > >
> > > On a side question: Do you guys have any or plan for Windows guest
> > > implementation
> > > for virtio-pmem?
> >
> > Unfortunately, my team doesn't currently have any plans to add a Windows
> > virtio-pmem implementation. My team is primarily focused on virtualization
> > in client environments, which is a little different than server environments.
> > For our Windows-based scenarios, dynamically sized disks are important. It's
> > tricky to get that to work with pmem+DAX given that Windows isn't state separated.
> 
> Pardon me for commenting on an old thread...
> 
> What does "state separated" mean here? There's configuration
> flexibility in the driver to resize persistent memory namespaces.

I think I might have been using Microsoft specific terminology - my bad. By "state
separated" I mean the system is split into read-only and read-write partitions.
Typically OS state is on the read-only partition and user data is on the
read-write partition (for easier servicing/upgrade). One of our primary use cases
for virtio-pmem is WSL GUI app support. In that scenario, we have a read-only
system distro, and we let the user dynamically fill the read-write partitions with
as many apps as they want (and have space for - remembering that their Windows apps
on the host are eating up space as well). Windows is not state separated, so we
have OS state intermingled with user data/apps all on one read-write partition.

The crux of the problem isn't really related to state separation, it's how do
you handle dynamically sized data with virtio-pmem? If there's a way to do that
already, I'm all ears :) But right now virtio-pmem is supplied with a fixed range
during init, so it wasn't immediately obvious to me how to make dynamically sized
data work. We'd have to like pick a max size, and expand the backing file on the
host on second level page fault when the guest tries to touch a page past whats 
already been allocated or something. Which is doable, there are just gotchas around
failure cases (do we have to kill the guest?), sharing disk space between the
Windows host and guest, etc. Getting back to why I said state separation makes
this easier, the read-only partitions are fixed size. So our WSL system distro
slots in nicely with virtio-pmem, but less so IMO for Windows guests (at least for
our use cases).

Long explanation - hope it helped to explain things. And if I'm missing something
obvious, please let me know! :)

Thanks,
Taylor
Dan Williams Aug. 25, 2021, 9:59 p.m. UTC | #12
On Wed, Aug 25, 2021 at 2:46 PM Taylor Stark <tstark@linux.microsoft.com> wrote:
>
> On Tue, Aug 24, 2021 at 05:29:11PM -0700, Dan Williams wrote:
> > On Wed, Jul 21, 2021 at 3:09 PM Taylor Stark <tstark@linux.microsoft.com> wrote:
> > >
> > > On Tue, Jul 20, 2021 at 08:51:04AM +0200, Pankaj Gupta wrote:
> > > > > > >
> > > > > > > -       virtio_cread_le(vpmem->vdev, struct virtio_pmem_config,
> > > > > > > -                       start, &vpmem->start);
> > > > > > > -       virtio_cread_le(vpmem->vdev, struct virtio_pmem_config,
> > > > > > > -                       size, &vpmem->size);
> > > > > > > +       /* Retrieve the pmem device's address and size. It may have been supplied
> > > > > > > +        * as a PCI BAR-relative shared memory region, or as a guest absolute address.
> > > > > > > +        */
> > > > > > > +       have_shm_region = virtio_get_shm_region(vpmem->vdev, &pmem_region,
> > > > > > > +                                               VIRTIO_PMEM_SHMCAP_ID_PMEM_REGION);
> > > > > >
> > > > > > Current implementation of Virtio pmem device in Qemu does not expose
> > > > > > it as PCI BAR.
> > > > > > So, can't test it. Just curious if device side implementation is also
> > > > > > tested for asynchronous
> > > > > > flush case?
> > > > > >
> > > > > > Thanks,
> > > > > > Pankaj
> > > > >
> > > > > Yes, I tested the async flush case as well. We basically call
> > > > > FlushFileBuffers on the backing file, which is Windows' equivalent of
> > > > > fsync. I also briefly tested with qemu to ensure that still works with
> > > > > the patch.
> > > >
> > > > Thank you for the confirmation. This sounds really good.
> > > > I am also getting back to pending items for virtio-pmem.
> > > >
> > > > On a side question: Do you guys have any or plan for Windows guest
> > > > implementation
> > > > for virtio-pmem?
> > >
> > > Unfortunately, my team doesn't currently have any plans to add a Windows
> > > virtio-pmem implementation. My team is primarily focused on virtualization
> > > in client environments, which is a little different than server environments.
> > > For our Windows-based scenarios, dynamically sized disks are important. It's
> > > tricky to get that to work with pmem+DAX given that Windows isn't state separated.
> >
> > Pardon me for commenting on an old thread...
> >
> > What does "state separated" mean here? There's configuration
> > flexibility in the driver to resize persistent memory namespaces.
>
> I think I might have been using Microsoft specific terminology - my bad. By "state
> separated" I mean the system is split into read-only and read-write partitions.
> Typically OS state is on the read-only partition and user data is on the
> read-write partition (for easier servicing/upgrade). One of our primary use cases
> for virtio-pmem is WSL GUI app support. In that scenario, we have a read-only
> system distro, and we let the user dynamically fill the read-write partitions with
> as many apps as they want (and have space for - remembering that their Windows apps
> on the host are eating up space as well). Windows is not state separated, so we
> have OS state intermingled with user data/apps all on one read-write partition.
>
> The crux of the problem isn't really related to state separation, it's how do
> you handle dynamically sized data with virtio-pmem? If there's a way to do that
> already, I'm all ears :) But right now virtio-pmem is supplied with a fixed range
> during init, so it wasn't immediately obvious to me how to make dynamically sized
> data work. We'd have to like pick a max size, and expand the backing file on the
> host on second level page fault when the guest tries to touch a page past whats
> already been allocated or something. Which is doable, there are just gotchas around
> failure cases (do we have to kill the guest?), sharing disk space between the
> Windows host and guest, etc. Getting back to why I said state separation makes
> this easier, the read-only partitions are fixed size. So our WSL system distro
> slots in nicely with virtio-pmem, but less so IMO for Windows guests (at least for
> our use cases).
>
> Long explanation - hope it helped to explain things. And if I'm missing something
> obvious, please let me know! :)
>

Thanks for the explanation, it makes sense now. As for the dynamic
resize support it should just be a "small matter of programming" to
add resize and revalidation support to the virtio-pmem driver. For
example drivers/block/loop.c is a similar file backed block driver and
it supports resize, virtio-pmem is similar just split over the VM
boundary.
diff mbox series

Patch

diff --git a/drivers/nvdimm/virtio_pmem.c b/drivers/nvdimm/virtio_pmem.c
index 726c7354d465..43c1d835a449 100644
--- a/drivers/nvdimm/virtio_pmem.c
+++ b/drivers/nvdimm/virtio_pmem.c
@@ -37,6 +37,8 @@  static int virtio_pmem_probe(struct virtio_device *vdev)
 	struct virtio_pmem *vpmem;
 	struct resource res;
 	int err = 0;
+	bool have_shm_region;
+	struct virtio_shm_region pmem_region;
 
 	if (!vdev->config->get) {
 		dev_err(&vdev->dev, "%s failure: config access disabled\n",
@@ -58,10 +60,21 @@  static int virtio_pmem_probe(struct virtio_device *vdev)
 		goto out_err;
 	}
 
-	virtio_cread_le(vpmem->vdev, struct virtio_pmem_config,
-			start, &vpmem->start);
-	virtio_cread_le(vpmem->vdev, struct virtio_pmem_config,
-			size, &vpmem->size);
+	/* Retrieve the pmem device's address and size. It may have been supplied
+	 * as a PCI BAR-relative shared memory region, or as a guest absolute address.
+	 */
+	have_shm_region = virtio_get_shm_region(vpmem->vdev, &pmem_region,
+						VIRTIO_PMEM_SHMCAP_ID_PMEM_REGION);
+
+	if (have_shm_region) {
+		vpmem->start = pmem_region.addr;
+		vpmem->size = pmem_region.len;
+	} else {
+		virtio_cread_le(vpmem->vdev, struct virtio_pmem_config,
+				start, &vpmem->start);
+		virtio_cread_le(vpmem->vdev, struct virtio_pmem_config,
+				size, &vpmem->size);
+	}
 
 	res.start = vpmem->start;
 	res.end   = vpmem->start + vpmem->size - 1;
diff --git a/drivers/nvdimm/virtio_pmem.h b/drivers/nvdimm/virtio_pmem.h
index 0dddefe594c4..62bb564e81cb 100644
--- a/drivers/nvdimm/virtio_pmem.h
+++ b/drivers/nvdimm/virtio_pmem.h
@@ -50,6 +50,9 @@  struct virtio_pmem {
 	__u64 size;
 };
 
+/* For the id field in virtio_pci_shm_cap */
+#define VIRTIO_PMEM_SHMCAP_ID_PMEM_REGION 0
+
 void virtio_pmem_host_ack(struct virtqueue *vq);
 int async_pmem_flush(struct nd_region *nd_region, struct bio *bio);
 #endif