diff mbox series

[RFC] powerpc/papr_scm: Implement scm async flush

Message ID 160682501436.2579014.14501834468510806255.stgit@lep8c.aus.stglabs.ibm.com (mailing list archive)
State New
Headers show
Series [RFC] powerpc/papr_scm: Implement scm async flush | expand

Commit Message

Shivaprasad G Bhat Dec. 1, 2020, 12:18 p.m. UTC
Tha patch implements SCM async-flush hcall and sets the
ND_REGION_ASYNC capability when the platform device tree
has "ibm,async-flush-required" set.

The below demonstration shows the map_sync behavior when
ibm,async-flush-required is present in device tree.
(https://github.com/avocado-framework-tests/avocado-misc-tests/blob/master/memory/ndctl.py.data/map_sync.c)

The pmem0 is from nvdimm without async-flush-required,
and pmem1 is from nvdimm with async-flush-required, mounted as
/dev/pmem0 on /mnt1 type xfs (rw,relatime,attr2,dax=always,inode64,logbufs=8,logbsize=32k,noquota)
/dev/pmem1 on /mnt2 type xfs (rw,relatime,attr2,dax=always,inode64,logbufs=8,logbsize=32k,noquota)

#./mapsync /mnt1/newfile    ----> Without async-flush-required
#./mapsync /mnt2/newfile    ----> With async-flush-required
Failed to mmap  with Operation not supported

Signed-off-by: Shivaprasad G Bhat <sbhat@linux.ibm.com>
---
The HCALL semantics are in review, not final.

 Documentation/powerpc/papr_hcalls.rst     |   14 ++++++++++
 arch/powerpc/include/asm/hvcall.h         |    3 +-
 arch/powerpc/platforms/pseries/papr_scm.c |   39 +++++++++++++++++++++++++++++
 3 files changed, 55 insertions(+), 1 deletion(-)

Comments

Pankaj Gupta Dec. 1, 2020, 12:47 p.m. UTC | #1
> Tha patch implements SCM async-flush hcall and sets the
> ND_REGION_ASYNC capability when the platform device tree
> has "ibm,async-flush-required" set.

So, you are reusing the existing ND_REGION_ASYNC flag for the
hypercall based async flush with device tree discovery?

Out of curiosity, does virtio based flush work in ppc? Was just thinking
if we can reuse virtio based flush present in virtio-pmem? Or anything
else we are trying to achieve here?

Thanks,
Pankaj
>
> The below demonstration shows the map_sync behavior when
> ibm,async-flush-required is present in device tree.
> (https://github.com/avocado-framework-tests/avocado-misc-tests/blob/master/memory/ndctl.py.data/map_sync.c)
>
> The pmem0 is from nvdimm without async-flush-required,
> and pmem1 is from nvdimm with async-flush-required, mounted as
> /dev/pmem0 on /mnt1 type xfs (rw,relatime,attr2,dax=always,inode64,logbufs=8,logbsize=32k,noquota)
> /dev/pmem1 on /mnt2 type xfs (rw,relatime,attr2,dax=always,inode64,logbufs=8,logbsize=32k,noquota)
>
> #./mapsync /mnt1/newfile    ----> Without async-flush-required
> #./mapsync /mnt2/newfile    ----> With async-flush-required
> Failed to mmap  with Operation not supported
>
> Signed-off-by: Shivaprasad G Bhat <sbhat@linux.ibm.com>
> ---
> The HCALL semantics are in review, not final.

Any link of the discussion?

>
>  Documentation/powerpc/papr_hcalls.rst     |   14 ++++++++++
>  arch/powerpc/include/asm/hvcall.h         |    3 +-
>  arch/powerpc/platforms/pseries/papr_scm.c |   39 +++++++++++++++++++++++++++++
>  3 files changed, 55 insertions(+), 1 deletion(-)
>
> diff --git a/Documentation/powerpc/papr_hcalls.rst b/Documentation/powerpc/papr_hcalls.rst
> index 48fcf1255a33..cc310814f24c 100644
> --- a/Documentation/powerpc/papr_hcalls.rst
> +++ b/Documentation/powerpc/papr_hcalls.rst
> @@ -275,6 +275,20 @@ Health Bitmap Flags:
>  Given a DRC Index collect the performance statistics for NVDIMM and copy them
>  to the resultBuffer.
>
> +**H_SCM_ASYNC_FLUSH**
> +
> +| Input: *drcIndex*
> +| Out: *continue-token*
> +| Return Value: *H_SUCCESS, H_Parameter, H_P2, H_BUSY*
> +
> +Given a DRC Index Flush the data to backend NVDIMM device.
> +
> +The hcall returns H_BUSY when the flush takes longer time and the hcall needs
> +to be issued multiple times in order to be completely serviced. The
> +*continue-token* from the output to be passed in the argument list in
> +subsequent hcalls to the hypervisor until the hcall is completely serviced
> +at which point H_SUCCESS is returned by the hypervisor.
> +
>  References
>  ==========
>  .. [1] "Power Architecture Platform Reference"
> diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
> index c1fbccb04390..4a13074bc782 100644
> --- a/arch/powerpc/include/asm/hvcall.h
> +++ b/arch/powerpc/include/asm/hvcall.h
> @@ -306,7 +306,8 @@
>  #define H_SCM_HEALTH            0x400
>  #define H_SCM_PERFORMANCE_STATS 0x418
>  #define H_RPT_INVALIDATE       0x448
> -#define MAX_HCALL_OPCODE       H_RPT_INVALIDATE
> +#define H_SCM_ASYNC_FLUSH      0x4A0
> +#define MAX_HCALL_OPCODE       H_SCM_ASYNC_FLUSH
>
>  /* Scope args for H_SCM_UNBIND_ALL */
>  #define H_UNBIND_SCOPE_ALL (0x1)
> diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
> index 835163f54244..1f8c5153cb3d 100644
> --- a/arch/powerpc/platforms/pseries/papr_scm.c
> +++ b/arch/powerpc/platforms/pseries/papr_scm.c
> @@ -93,6 +93,7 @@ struct papr_scm_priv {
>         uint64_t block_size;
>         int metadata_size;
>         bool is_volatile;
> +       bool async_flush_required;
>
>         uint64_t bound_addr;
>
> @@ -117,6 +118,38 @@ struct papr_scm_priv {
>         size_t stat_buffer_len;
>  };
>
> +static int papr_scm_pmem_flush(struct nd_region *nd_region, struct bio *bio)
> +{
> +       unsigned long ret[PLPAR_HCALL_BUFSIZE];
> +       struct papr_scm_priv *p = nd_region_provider_data(nd_region);
> +       int64_t rc;
> +       uint64_t token = 0;
> +
> +       do {
> +               rc = plpar_hcall(H_SCM_ASYNC_FLUSH, ret, p->drc_index, token);
> +
> +               /* Check if we are stalled for some time */
> +               token = ret[0];
> +               if (H_IS_LONG_BUSY(rc)) {
> +                       msleep(get_longbusy_msecs(rc));
> +                       rc = H_BUSY;
> +               } else if (rc == H_BUSY) {
> +                       cond_resched();
> +               }
> +
> +       } while (rc == H_BUSY);
> +
> +       if (rc)
> +               dev_err(&p->pdev->dev, "flush error: %lld\n", rc);
> +       else
> +               dev_dbg(&p->pdev->dev, "flush drc 0x%x complete\n",
> +                       p->drc_index);
> +
> +       dev_dbg(&p->pdev->dev, "Flush call complete\n");
> +
> +       return rc;
> +}
> +
>  static LIST_HEAD(papr_nd_regions);
>  static DEFINE_MUTEX(papr_ndr_lock);
>
> @@ -943,6 +976,11 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
>         ndr_desc.num_mappings = 1;
>         ndr_desc.nd_set = &p->nd_set;
>
> +       if (p->async_flush_required) {
> +               set_bit(ND_REGION_ASYNC, &ndr_desc.flags);
> +               ndr_desc.flush = papr_scm_pmem_flush;
> +       }
> +
>         if (p->is_volatile)
>                 p->region = nvdimm_volatile_region_create(p->bus, &ndr_desc);
>         else {
> @@ -1088,6 +1126,7 @@ static int papr_scm_probe(struct platform_device *pdev)
>         p->block_size = block_size;
>         p->blocks = blocks;
>         p->is_volatile = !of_property_read_bool(dn, "ibm,cache-flush-required");
> +       p->async_flush_required = of_property_read_bool(dn, "ibm,async-flush-required");
>
>         /* We just need to ensure that set cookies are unique across */
>         uuid_parse(uuid_str, (uuid_t *) uuid);
>
> _______________________________________________
> Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
> To unsubscribe send an email to linux-nvdimm-leave@lists.01.org
Aneesh Kumar K.V Dec. 1, 2020, 12:57 p.m. UTC | #2
On 12/1/20 6:17 PM, Pankaj Gupta wrote:
>> Tha patch implements SCM async-flush hcall and sets the
>> ND_REGION_ASYNC capability when the platform device tree
>> has "ibm,async-flush-required" set.
> 
> So, you are reusing the existing ND_REGION_ASYNC flag for the
> hypercall based async flush with device tree discovery?
> 
> Out of curiosity, does virtio based flush work in ppc? Was just thinking
> if we can reuse virtio based flush present in virtio-pmem? Or anything
> else we are trying to achieve here?
> 


Not with PAPR based pmem driver papr_scm.ko. The devices there are 
considered platform device and we use hypercalls to configure the 
device. On similar fashion we are now using hypercall to flush the host 
based caches.

-aneesh
Pankaj Gupta Dec. 1, 2020, 1:02 p.m. UTC | #3
> >> Tha patch implements SCM async-flush hcall and sets the
> >> ND_REGION_ASYNC capability when the platform device tree
> >> has "ibm,async-flush-required" set.
> >
> > So, you are reusing the existing ND_REGION_ASYNC flag for the
> > hypercall based async flush with device tree discovery?
> >
> > Out of curiosity, does virtio based flush work in ppc? Was just thinking
> > if we can reuse virtio based flush present in virtio-pmem? Or anything
> > else we are trying to achieve here?
> >
>
>
> Not with PAPR based pmem driver papr_scm.ko. The devices there are
> considered platform device and we use hypercalls to configure the
> device. On similar fashion we are now using hypercall to flush the host
> based caches.

o.k. Thanks for answering.

Best regards,
Pankaj

>
> -aneesh
diff mbox series

Patch

diff --git a/Documentation/powerpc/papr_hcalls.rst b/Documentation/powerpc/papr_hcalls.rst
index 48fcf1255a33..cc310814f24c 100644
--- a/Documentation/powerpc/papr_hcalls.rst
+++ b/Documentation/powerpc/papr_hcalls.rst
@@ -275,6 +275,20 @@  Health Bitmap Flags:
 Given a DRC Index collect the performance statistics for NVDIMM and copy them
 to the resultBuffer.
 
+**H_SCM_ASYNC_FLUSH**
+
+| Input: *drcIndex*
+| Out: *continue-token*
+| Return Value: *H_SUCCESS, H_Parameter, H_P2, H_BUSY*
+
+Given a DRC Index Flush the data to backend NVDIMM device.
+
+The hcall returns H_BUSY when the flush takes longer time and the hcall needs
+to be issued multiple times in order to be completely serviced. The
+*continue-token* from the output to be passed in the argument list in
+subsequent hcalls to the hypervisor until the hcall is completely serviced
+at which point H_SUCCESS is returned by the hypervisor.
+
 References
 ==========
 .. [1] "Power Architecture Platform Reference"
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index c1fbccb04390..4a13074bc782 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -306,7 +306,8 @@ 
 #define H_SCM_HEALTH            0x400
 #define H_SCM_PERFORMANCE_STATS 0x418
 #define H_RPT_INVALIDATE	0x448
-#define MAX_HCALL_OPCODE	H_RPT_INVALIDATE
+#define H_SCM_ASYNC_FLUSH	0x4A0
+#define MAX_HCALL_OPCODE	H_SCM_ASYNC_FLUSH
 
 /* Scope args for H_SCM_UNBIND_ALL */
 #define H_UNBIND_SCOPE_ALL (0x1)
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index 835163f54244..1f8c5153cb3d 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -93,6 +93,7 @@  struct papr_scm_priv {
 	uint64_t block_size;
 	int metadata_size;
 	bool is_volatile;
+	bool async_flush_required;
 
 	uint64_t bound_addr;
 
@@ -117,6 +118,38 @@  struct papr_scm_priv {
 	size_t stat_buffer_len;
 };
 
+static int papr_scm_pmem_flush(struct nd_region *nd_region, struct bio *bio)
+{
+	unsigned long ret[PLPAR_HCALL_BUFSIZE];
+	struct papr_scm_priv *p = nd_region_provider_data(nd_region);
+	int64_t rc;
+	uint64_t token = 0;
+
+	do {
+		rc = plpar_hcall(H_SCM_ASYNC_FLUSH, ret, p->drc_index, token);
+
+		/* Check if we are stalled for some time */
+		token = ret[0];
+		if (H_IS_LONG_BUSY(rc)) {
+			msleep(get_longbusy_msecs(rc));
+			rc = H_BUSY;
+		} else if (rc == H_BUSY) {
+			cond_resched();
+		}
+
+	} while (rc == H_BUSY);
+
+	if (rc)
+		dev_err(&p->pdev->dev, "flush error: %lld\n", rc);
+	else
+		dev_dbg(&p->pdev->dev, "flush drc 0x%x complete\n",
+			p->drc_index);
+
+	dev_dbg(&p->pdev->dev, "Flush call complete\n");
+
+	return rc;
+}
+
 static LIST_HEAD(papr_nd_regions);
 static DEFINE_MUTEX(papr_ndr_lock);
 
@@ -943,6 +976,11 @@  static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
 	ndr_desc.num_mappings = 1;
 	ndr_desc.nd_set = &p->nd_set;
 
+	if (p->async_flush_required) {
+		set_bit(ND_REGION_ASYNC, &ndr_desc.flags);
+		ndr_desc.flush = papr_scm_pmem_flush;
+	}
+
 	if (p->is_volatile)
 		p->region = nvdimm_volatile_region_create(p->bus, &ndr_desc);
 	else {
@@ -1088,6 +1126,7 @@  static int papr_scm_probe(struct platform_device *pdev)
 	p->block_size = block_size;
 	p->blocks = blocks;
 	p->is_volatile = !of_property_read_bool(dn, "ibm,cache-flush-required");
+	p->async_flush_required = of_property_read_bool(dn, "ibm,async-flush-required");
 
 	/* We just need to ensure that set cookies are unique across */
 	uuid_parse(uuid_str, (uuid_t *) uuid);