diff mbox series

[RFC,v2,08/10] iommu/riscv: support nested iommu for flushing cache

Message ID 20240614142156.29420-9-zong.li@sifive.com (mailing list archive)
State RFC
Headers show
Series RISC-V IOMMU HPM and nested IOMMU support | expand

Checks

Context Check Description
conchuod/vmtest-fixes-PR fail merge-conflict

Commit Message

Zong Li June 14, 2024, 2:21 p.m. UTC
This patch implements cache_invalidate_user operation for the userspace
to flush the hardware caches for a nested domain through iommufd.

Signed-off-by: Zong Li <zong.li@sifive.com>
---
 drivers/iommu/riscv/iommu.c  | 90 ++++++++++++++++++++++++++++++++++--
 include/uapi/linux/iommufd.h | 11 +++++
 2 files changed, 97 insertions(+), 4 deletions(-)

Comments

Baolu Lu June 15, 2024, 3:22 a.m. UTC | #1
On 6/14/24 10:21 PM, Zong Li wrote:
> This patch implements cache_invalidate_user operation for the userspace
> to flush the hardware caches for a nested domain through iommufd.

$ grep "This patch" Documentation/process/submitting-patches.rst

Same to other commit messages.

Best regards,
baolu
Zong Li June 17, 2024, 2:16 a.m. UTC | #2
On Sat, Jun 15, 2024 at 11:24 AM Baolu Lu <baolu.lu@linux.intel.com> wrote:
>
> On 6/14/24 10:21 PM, Zong Li wrote:
> > This patch implements cache_invalidate_user operation for the userspace
> > to flush the hardware caches for a nested domain through iommufd.
>
> $ grep "This patch" Documentation/process/submitting-patches.rst
>
> Same to other commit messages.
>

Thank you for your tips. I will modify them in the next version.

> Best regards,
> baolu
Jason Gunthorpe June 19, 2024, 4:17 p.m. UTC | #3
On Fri, Jun 14, 2024 at 10:21:54PM +0800, Zong Li wrote:
> This patch implements cache_invalidate_user operation for the userspace
> to flush the hardware caches for a nested domain through iommufd.
> 
> Signed-off-by: Zong Li <zong.li@sifive.com>
> ---
>  drivers/iommu/riscv/iommu.c  | 90 ++++++++++++++++++++++++++++++++++--
>  include/uapi/linux/iommufd.h | 11 +++++
>  2 files changed, 97 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/iommu/riscv/iommu.c b/drivers/iommu/riscv/iommu.c
> index 410b236e9b24..d08eb0a2939e 100644
> --- a/drivers/iommu/riscv/iommu.c
> +++ b/drivers/iommu/riscv/iommu.c
> @@ -1587,8 +1587,9 @@ static int riscv_iommu_attach_dev_nested(struct iommu_domain *domain, struct dev
>  	if (riscv_iommu_bond_link(riscv_domain, dev))
>  		return -ENOMEM;
>  
> -	riscv_iommu_iotlb_inval(riscv_domain, 0, ULONG_MAX);
> -	info->dc_user.ta |= RISCV_IOMMU_PC_TA_V;
> +	if (riscv_iommu_bond_link(info->domain, dev))
> +		return -ENOMEM;

?? Is this in the wrong patch then? Confused

>  	riscv_iommu_iodir_update(iommu, dev, &info->dc_user);
>  
>  	info->domain = riscv_domain;
> @@ -1611,13 +1612,92 @@ static void riscv_iommu_domain_free_nested(struct iommu_domain *domain)
>  	kfree(riscv_domain);
>  }
>  
> +static int riscv_iommu_fix_user_cmd(struct riscv_iommu_command *cmd,
> +				    unsigned int pscid, unsigned int gscid)
> +{
> +	u32 opcode = FIELD_GET(RISCV_IOMMU_CMD_OPCODE, cmd->dword0);
> +
> +	switch (opcode) {
> +	case RISCV_IOMMU_CMD_IOTINVAL_OPCODE:
> +		u32 func = FIELD_GET(RISCV_IOMMU_CMD_FUNC, cmd->dword0);
> +
> +		if (func != RISCV_IOMMU_CMD_IOTINVAL_FUNC_GVMA &&
> +		    func != RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA) {
> +			pr_warn("The IOTINVAL function: 0x%x is not supported\n",
> +				func);
> +			return -EOPNOTSUPP;
> +		}
> +
> +		if (func == RISCV_IOMMU_CMD_IOTINVAL_FUNC_GVMA) {
> +			cmd->dword0 &= ~RISCV_IOMMU_CMD_FUNC;
> +			cmd->dword0 |= FIELD_PREP(RISCV_IOMMU_CMD_FUNC,
> +						  RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA);
> +		}
> +
> +		cmd->dword0 &= ~(RISCV_IOMMU_CMD_IOTINVAL_PSCID |
> +				 RISCV_IOMMU_CMD_IOTINVAL_GSCID);
> +		riscv_iommu_cmd_inval_set_pscid(cmd, pscid);
> +		riscv_iommu_cmd_inval_set_gscid(cmd, gscid);
> +		break;
> +	case RISCV_IOMMU_CMD_IODIR_OPCODE:
> +		/*
> +		 * Ensure the device ID is right. We expect that VMM has
> +		 * transferred the device ID to host's from guest's.
> +		 */

I'm not sure what this remark means, but I expect you will need to
translate any devices IDs from virtual to physical.

>  
>  static int
> -riscv_iommu_get_dc_user(struct device *dev, struct iommu_hwpt_riscv_iommu *user_arg)
> +riscv_iommu_get_dc_user(struct device *dev, struct iommu_hwpt_riscv_iommu *user_arg,
> +			struct riscv_iommu_domain *s1_domain)
>  {
>  	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
>  	struct riscv_iommu_device *iommu = dev_to_iommu(dev);
> @@ -1663,6 +1743,8 @@ riscv_iommu_get_dc_user(struct device *dev, struct iommu_hwpt_riscv_iommu *user_
>  		       riscv_iommu_get_dc(iommu, fwspec->ids[i]),
>  		       sizeof(struct riscv_iommu_dc));
>  		info->dc_user.fsc = dc.fsc;
> +		info->dc_user.ta = FIELD_PREP(RISCV_IOMMU_PC_TA_PSCID, s1_domain->pscid) |
> +					      RISCV_IOMMU_PC_TA_V;
>  	}

It is really weird that the s1 domain has any kind of id. What is the
PSCID? Is it analogous to VMID on ARM?

Jason
Zong Li June 28, 2024, 8:19 a.m. UTC | #4
On Thu, Jun 20, 2024 at 12:17 AM Jason Gunthorpe <jgg@ziepe.ca> wrote:
>
> On Fri, Jun 14, 2024 at 10:21:54PM +0800, Zong Li wrote:
> > This patch implements cache_invalidate_user operation for the userspace
> > to flush the hardware caches for a nested domain through iommufd.
> >
> > Signed-off-by: Zong Li <zong.li@sifive.com>
> > ---
> >  drivers/iommu/riscv/iommu.c  | 90 ++++++++++++++++++++++++++++++++++--
> >  include/uapi/linux/iommufd.h | 11 +++++
> >  2 files changed, 97 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/iommu/riscv/iommu.c b/drivers/iommu/riscv/iommu.c
> > index 410b236e9b24..d08eb0a2939e 100644
> > --- a/drivers/iommu/riscv/iommu.c
> > +++ b/drivers/iommu/riscv/iommu.c
> > @@ -1587,8 +1587,9 @@ static int riscv_iommu_attach_dev_nested(struct iommu_domain *domain, struct dev
> >       if (riscv_iommu_bond_link(riscv_domain, dev))
> >               return -ENOMEM;
> >
> > -     riscv_iommu_iotlb_inval(riscv_domain, 0, ULONG_MAX);
> > -     info->dc_user.ta |= RISCV_IOMMU_PC_TA_V;
> > +     if (riscv_iommu_bond_link(info->domain, dev))
> > +             return -ENOMEM;
>
> ?? Is this in the wrong patch then? Confused

Yes, it should be in 7th patch in this series. I will fix it in next version.

>
> >       riscv_iommu_iodir_update(iommu, dev, &info->dc_user);
> >
> >       info->domain = riscv_domain;
> > @@ -1611,13 +1612,92 @@ static void riscv_iommu_domain_free_nested(struct iommu_domain *domain)
> >       kfree(riscv_domain);
> >  }
> >
> > +static int riscv_iommu_fix_user_cmd(struct riscv_iommu_command *cmd,
> > +                                 unsigned int pscid, unsigned int gscid)
> > +{
> > +     u32 opcode = FIELD_GET(RISCV_IOMMU_CMD_OPCODE, cmd->dword0);
> > +
> > +     switch (opcode) {
> > +     case RISCV_IOMMU_CMD_IOTINVAL_OPCODE:
> > +             u32 func = FIELD_GET(RISCV_IOMMU_CMD_FUNC, cmd->dword0);
> > +
> > +             if (func != RISCV_IOMMU_CMD_IOTINVAL_FUNC_GVMA &&
> > +                 func != RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA) {
> > +                     pr_warn("The IOTINVAL function: 0x%x is not supported\n",
> > +                             func);
> > +                     return -EOPNOTSUPP;
> > +             }
> > +
> > +             if (func == RISCV_IOMMU_CMD_IOTINVAL_FUNC_GVMA) {
> > +                     cmd->dword0 &= ~RISCV_IOMMU_CMD_FUNC;
> > +                     cmd->dword0 |= FIELD_PREP(RISCV_IOMMU_CMD_FUNC,
> > +                                               RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA);
> > +             }
> > +
> > +             cmd->dword0 &= ~(RISCV_IOMMU_CMD_IOTINVAL_PSCID |
> > +                              RISCV_IOMMU_CMD_IOTINVAL_GSCID);
> > +             riscv_iommu_cmd_inval_set_pscid(cmd, pscid);
> > +             riscv_iommu_cmd_inval_set_gscid(cmd, gscid);
> > +             break;
> > +     case RISCV_IOMMU_CMD_IODIR_OPCODE:
> > +             /*
> > +              * Ensure the device ID is right. We expect that VMM has
> > +              * transferred the device ID to host's from guest's.
> > +              */
>
> I'm not sure what this remark means, but I expect you will need to
> translate any devices IDs from virtual to physical.

I think we need some data structure to map it. I didn't do that here
because our internal implementation translates the right ID in VMM,
but as you mentioned, we can't expect that VMM will do that for
kernel.

>
> >
> >  static int
> > -riscv_iommu_get_dc_user(struct device *dev, struct iommu_hwpt_riscv_iommu *user_arg)
> > +riscv_iommu_get_dc_user(struct device *dev, struct iommu_hwpt_riscv_iommu *user_arg,
> > +                     struct riscv_iommu_domain *s1_domain)
> >  {
> >       struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
> >       struct riscv_iommu_device *iommu = dev_to_iommu(dev);
> > @@ -1663,6 +1743,8 @@ riscv_iommu_get_dc_user(struct device *dev, struct iommu_hwpt_riscv_iommu *user_
> >                      riscv_iommu_get_dc(iommu, fwspec->ids[i]),
> >                      sizeof(struct riscv_iommu_dc));
> >               info->dc_user.fsc = dc.fsc;
> > +             info->dc_user.ta = FIELD_PREP(RISCV_IOMMU_PC_TA_PSCID, s1_domain->pscid) |
> > +                                           RISCV_IOMMU_PC_TA_V;
> >       }
>
> It is really weird that the s1 domain has any kind of id. What is the
> PSCID? Is it analogous to VMID on ARM?

I think the VMID is closer to the GSCID. The PSCID might be more like
the ASID, as it is used as the address space ID for the process
identified by the first-stage page table.
The GSCID used to tag the G-stage TLB, the PSCID is used to tag the
single stage TLB and the tuple {GSCID, PSCID} is used to tag the
VS-stage TLB. The IOTINVAL.VMA command can flush the mapping by
matching GSCID only, PSCID only or the tuple {GSCID, PSCID}. We can
consider the situation that there are two devices pass through to a
guest, then we will have two s1 domains under the same s2 domain, and
we can flush their mapping by { GSCID, PSCID } and { GSCID, PSCID' }
respectively.

>
> Jason
Jason Gunthorpe June 28, 2024, 10:26 p.m. UTC | #5
On Fri, Jun 28, 2024 at 04:19:28PM +0800, Zong Li wrote:

> > > +     case RISCV_IOMMU_CMD_IODIR_OPCODE:
> > > +             /*
> > > +              * Ensure the device ID is right. We expect that VMM has
> > > +              * transferred the device ID to host's from guest's.
> > > +              */
> >
> > I'm not sure what this remark means, but I expect you will need to
> > translate any devices IDs from virtual to physical.
> 
> I think we need some data structure to map it. I didn't do that here
> because our internal implementation translates the right ID in VMM,
> but as you mentioned, we can't expect that VMM will do that for
> kernel.

Yes, you need the viommu stuff Nicolin is working on to hold the
translation, same as the ARM driver.

In the mean time you can't support this invalidation opcode.
 
> > >  static int
> > > -riscv_iommu_get_dc_user(struct device *dev, struct iommu_hwpt_riscv_iommu *user_arg)
> > > +riscv_iommu_get_dc_user(struct device *dev, struct iommu_hwpt_riscv_iommu *user_arg,
> > > +                     struct riscv_iommu_domain *s1_domain)
> > >  {
> > >       struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
> > >       struct riscv_iommu_device *iommu = dev_to_iommu(dev);
> > > @@ -1663,6 +1743,8 @@ riscv_iommu_get_dc_user(struct device *dev, struct iommu_hwpt_riscv_iommu *user_
> > >                      riscv_iommu_get_dc(iommu, fwspec->ids[i]),
> > >                      sizeof(struct riscv_iommu_dc));
> > >               info->dc_user.fsc = dc.fsc;
> > > +             info->dc_user.ta = FIELD_PREP(RISCV_IOMMU_PC_TA_PSCID, s1_domain->pscid) |
> > > +                                           RISCV_IOMMU_PC_TA_V;
> > >       }
> >
> > It is really weird that the s1 domain has any kind of id. What is the
> > PSCID? Is it analogous to VMID on ARM?
> 
> I think the VMID is closer to the GSCID. The PSCID might be more like
> the ASID, as it is used as the address space ID for the process
> identified by the first-stage page table.

That does sound like the ASID, but I would expect this to work by
using the VM provided PSCID and just flowing the PSCID through
transparently during the invalidation.

Why have the kernel allocate and override a PSCID when the PSCID is
scoped by the GSCID and can be safely delegated to the VM?

This is going to be necessary if you ever want to support the direct
invalidate queues like ARM/AMD have already as it will not be
desirable to translate the PSCID on that performance path.

It will also be necessary to implement the viommu invalidation path
since there is no domain there, which is needed for the ATS as above.

Jason
diff mbox series

Patch

diff --git a/drivers/iommu/riscv/iommu.c b/drivers/iommu/riscv/iommu.c
index 410b236e9b24..d08eb0a2939e 100644
--- a/drivers/iommu/riscv/iommu.c
+++ b/drivers/iommu/riscv/iommu.c
@@ -1587,8 +1587,9 @@  static int riscv_iommu_attach_dev_nested(struct iommu_domain *domain, struct dev
 	if (riscv_iommu_bond_link(riscv_domain, dev))
 		return -ENOMEM;
 
-	riscv_iommu_iotlb_inval(riscv_domain, 0, ULONG_MAX);
-	info->dc_user.ta |= RISCV_IOMMU_PC_TA_V;
+	if (riscv_iommu_bond_link(info->domain, dev))
+		return -ENOMEM;
+
 	riscv_iommu_iodir_update(iommu, dev, &info->dc_user);
 
 	info->domain = riscv_domain;
@@ -1611,13 +1612,92 @@  static void riscv_iommu_domain_free_nested(struct iommu_domain *domain)
 	kfree(riscv_domain);
 }
 
+static int riscv_iommu_fix_user_cmd(struct riscv_iommu_command *cmd,
+				    unsigned int pscid, unsigned int gscid)
+{
+	u32 opcode = FIELD_GET(RISCV_IOMMU_CMD_OPCODE, cmd->dword0);
+
+	switch (opcode) {
+	case RISCV_IOMMU_CMD_IOTINVAL_OPCODE:
+		u32 func = FIELD_GET(RISCV_IOMMU_CMD_FUNC, cmd->dword0);
+
+		if (func != RISCV_IOMMU_CMD_IOTINVAL_FUNC_GVMA &&
+		    func != RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA) {
+			pr_warn("The IOTINVAL function: 0x%x is not supported\n",
+				func);
+			return -EOPNOTSUPP;
+		}
+
+		if (func == RISCV_IOMMU_CMD_IOTINVAL_FUNC_GVMA) {
+			cmd->dword0 &= ~RISCV_IOMMU_CMD_FUNC;
+			cmd->dword0 |= FIELD_PREP(RISCV_IOMMU_CMD_FUNC,
+						  RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA);
+		}
+
+		cmd->dword0 &= ~(RISCV_IOMMU_CMD_IOTINVAL_PSCID |
+				 RISCV_IOMMU_CMD_IOTINVAL_GSCID);
+		riscv_iommu_cmd_inval_set_pscid(cmd, pscid);
+		riscv_iommu_cmd_inval_set_gscid(cmd, gscid);
+		break;
+	case RISCV_IOMMU_CMD_IODIR_OPCODE:
+		/*
+		 * Ensure the device ID is right. We expect that VMM has
+		 * transferred the device ID to host's from guest's.
+		 */
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int riscv_iommu_cache_invalidate_user(struct iommu_domain *domain,
+					     struct iommu_user_data_array *array)
+{
+	struct riscv_iommu_domain *riscv_domain = iommu_domain_to_riscv(domain);
+	struct iommu_hwpt_riscv_iommu_invalidate inv_info;
+	int ret, index;
+
+	if (array->type != IOMMU_HWPT_INVALIDATE_DATA_RISCV_IOMMU) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	for (index = 0; index < array->entry_num; index++) {
+		ret = iommu_copy_struct_from_user_array(&inv_info, array,
+							IOMMU_HWPT_INVALIDATE_DATA_RISCV_IOMMU,
+							index, cmd);
+		if (ret)
+			break;
+
+		ret = riscv_iommu_fix_user_cmd((struct riscv_iommu_command *)inv_info.cmd,
+					       riscv_domain->pscid,
+					       riscv_domain->s2->gscid);
+		if (ret == -EOPNOTSUPP)
+			continue;
+
+		riscv_iommu_cmd_send(riscv_domain->iommu,
+				     (struct riscv_iommu_command *)inv_info.cmd);
+		riscv_iommu_cmd_sync(riscv_domain->iommu,
+				     RISCV_IOMMU_IOTINVAL_TIMEOUT);
+	}
+
+out:
+	array->entry_num = index;
+
+	return ret;
+}
+
 static const struct iommu_domain_ops riscv_iommu_nested_domain_ops = {
 	.attach_dev	= riscv_iommu_attach_dev_nested,
 	.free		= riscv_iommu_domain_free_nested,
+	.cache_invalidate_user = riscv_iommu_cache_invalidate_user,
 };
 
 static int
-riscv_iommu_get_dc_user(struct device *dev, struct iommu_hwpt_riscv_iommu *user_arg)
+riscv_iommu_get_dc_user(struct device *dev, struct iommu_hwpt_riscv_iommu *user_arg,
+			struct riscv_iommu_domain *s1_domain)
 {
 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
 	struct riscv_iommu_device *iommu = dev_to_iommu(dev);
@@ -1663,6 +1743,8 @@  riscv_iommu_get_dc_user(struct device *dev, struct iommu_hwpt_riscv_iommu *user_
 		       riscv_iommu_get_dc(iommu, fwspec->ids[i]),
 		       sizeof(struct riscv_iommu_dc));
 		info->dc_user.fsc = dc.fsc;
+		info->dc_user.ta = FIELD_PREP(RISCV_IOMMU_PC_TA_PSCID, s1_domain->pscid) |
+					      RISCV_IOMMU_PC_TA_V;
 	}
 
 	return 0;
@@ -1708,7 +1790,7 @@  riscv_iommu_domain_alloc_nested(struct device *dev,
 	}
 
 	/* Get device context of stage-1 from user*/
-	ret = riscv_iommu_get_dc_user(dev, &arg);
+	ret = riscv_iommu_get_dc_user(dev, &arg, s1_domain);
 	if (ret) {
 		kfree(s1_domain);
 		return ERR_PTR(-EINVAL);
diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index 514463fe85d3..876cbe980a42 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -653,9 +653,11 @@  struct iommu_hwpt_get_dirty_bitmap {
  * enum iommu_hwpt_invalidate_data_type - IOMMU HWPT Cache Invalidation
  *                                        Data Type
  * @IOMMU_HWPT_INVALIDATE_DATA_VTD_S1: Invalidation data for VTD_S1
+ * @IOMMU_HWPT_INVALIDATE_DATA_RISCV_IOMMU: Invalidation data for RISCV_IOMMU
  */
 enum iommu_hwpt_invalidate_data_type {
 	IOMMU_HWPT_INVALIDATE_DATA_VTD_S1,
+	IOMMU_HWPT_INVALIDATE_DATA_RISCV_IOMMU,
 };
 
 /**
@@ -694,6 +696,15 @@  struct iommu_hwpt_vtd_s1_invalidate {
 	__u32 __reserved;
 };
 
+/**
+ * struct iommu_hwpt_riscv_iommu_invalidate - RISCV IOMMU cache invalidation
+ *                                            (IOMMU_HWPT_TYPE_RISCV_IOMMU)
+ * @cmd: An array holds a command for cache invalidation
+ */
+struct iommu_hwpt_riscv_iommu_invalidate {
+	__aligned_u64 cmd[2];
+};
+
 /**
  * struct iommu_hwpt_invalidate - ioctl(IOMMU_HWPT_INVALIDATE)
  * @size: sizeof(struct iommu_hwpt_invalidate)