Message ID | 20240614142156.29420-9-zong.li@sifive.com (mailing list archive) |
---|---|
State | RFC |
Headers | show |
Series | RISC-V IOMMU HPM and nested IOMMU support | expand |
Context | Check | Description |
---|---|---|
conchuod/vmtest-fixes-PR | fail | merge-conflict |
On 6/14/24 10:21 PM, Zong Li wrote: > This patch implements cache_invalidate_user operation for the userspace > to flush the hardware caches for a nested domain through iommufd. $ grep "This patch" Documentation/process/submitting-patches.rst Same to other commit messages. Best regards, baolu
On Sat, Jun 15, 2024 at 11:24 AM Baolu Lu <baolu.lu@linux.intel.com> wrote: > > On 6/14/24 10:21 PM, Zong Li wrote: > > This patch implements cache_invalidate_user operation for the userspace > > to flush the hardware caches for a nested domain through iommufd. > > $ grep "This patch" Documentation/process/submitting-patches.rst > > Same to other commit messages. > Thank you for your tips. I will modify them in the next version. > Best regards, > baolu
On Fri, Jun 14, 2024 at 10:21:54PM +0800, Zong Li wrote: > This patch implements cache_invalidate_user operation for the userspace > to flush the hardware caches for a nested domain through iommufd. > > Signed-off-by: Zong Li <zong.li@sifive.com> > --- > drivers/iommu/riscv/iommu.c | 90 ++++++++++++++++++++++++++++++++++-- > include/uapi/linux/iommufd.h | 11 +++++ > 2 files changed, 97 insertions(+), 4 deletions(-) > > diff --git a/drivers/iommu/riscv/iommu.c b/drivers/iommu/riscv/iommu.c > index 410b236e9b24..d08eb0a2939e 100644 > --- a/drivers/iommu/riscv/iommu.c > +++ b/drivers/iommu/riscv/iommu.c > @@ -1587,8 +1587,9 @@ static int riscv_iommu_attach_dev_nested(struct iommu_domain *domain, struct dev > if (riscv_iommu_bond_link(riscv_domain, dev)) > return -ENOMEM; > > - riscv_iommu_iotlb_inval(riscv_domain, 0, ULONG_MAX); > - info->dc_user.ta |= RISCV_IOMMU_PC_TA_V; > + if (riscv_iommu_bond_link(info->domain, dev)) > + return -ENOMEM; ?? Is this in the wrong patch then? Confused > riscv_iommu_iodir_update(iommu, dev, &info->dc_user); > > info->domain = riscv_domain; > @@ -1611,13 +1612,92 @@ static void riscv_iommu_domain_free_nested(struct iommu_domain *domain) > kfree(riscv_domain); > } > > +static int riscv_iommu_fix_user_cmd(struct riscv_iommu_command *cmd, > + unsigned int pscid, unsigned int gscid) > +{ > + u32 opcode = FIELD_GET(RISCV_IOMMU_CMD_OPCODE, cmd->dword0); > + > + switch (opcode) { > + case RISCV_IOMMU_CMD_IOTINVAL_OPCODE: > + u32 func = FIELD_GET(RISCV_IOMMU_CMD_FUNC, cmd->dword0); > + > + if (func != RISCV_IOMMU_CMD_IOTINVAL_FUNC_GVMA && > + func != RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA) { > + pr_warn("The IOTINVAL function: 0x%x is not supported\n", > + func); > + return -EOPNOTSUPP; > + } > + > + if (func == RISCV_IOMMU_CMD_IOTINVAL_FUNC_GVMA) { > + cmd->dword0 &= ~RISCV_IOMMU_CMD_FUNC; > + cmd->dword0 |= FIELD_PREP(RISCV_IOMMU_CMD_FUNC, > + RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA); > + } > + > + cmd->dword0 &= ~(RISCV_IOMMU_CMD_IOTINVAL_PSCID | > + RISCV_IOMMU_CMD_IOTINVAL_GSCID); > + riscv_iommu_cmd_inval_set_pscid(cmd, pscid); > + riscv_iommu_cmd_inval_set_gscid(cmd, gscid); > + break; > + case RISCV_IOMMU_CMD_IODIR_OPCODE: > + /* > + * Ensure the device ID is right. We expect that VMM has > + * transferred the device ID to host's from guest's. > + */ I'm not sure what this remark means, but I expect you will need to translate any devices IDs from virtual to physical. > > static int > -riscv_iommu_get_dc_user(struct device *dev, struct iommu_hwpt_riscv_iommu *user_arg) > +riscv_iommu_get_dc_user(struct device *dev, struct iommu_hwpt_riscv_iommu *user_arg, > + struct riscv_iommu_domain *s1_domain) > { > struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); > struct riscv_iommu_device *iommu = dev_to_iommu(dev); > @@ -1663,6 +1743,8 @@ riscv_iommu_get_dc_user(struct device *dev, struct iommu_hwpt_riscv_iommu *user_ > riscv_iommu_get_dc(iommu, fwspec->ids[i]), > sizeof(struct riscv_iommu_dc)); > info->dc_user.fsc = dc.fsc; > + info->dc_user.ta = FIELD_PREP(RISCV_IOMMU_PC_TA_PSCID, s1_domain->pscid) | > + RISCV_IOMMU_PC_TA_V; > } It is really weird that the s1 domain has any kind of id. What is the PSCID? Is it analogous to VMID on ARM? Jason
On Thu, Jun 20, 2024 at 12:17 AM Jason Gunthorpe <jgg@ziepe.ca> wrote: > > On Fri, Jun 14, 2024 at 10:21:54PM +0800, Zong Li wrote: > > This patch implements cache_invalidate_user operation for the userspace > > to flush the hardware caches for a nested domain through iommufd. > > > > Signed-off-by: Zong Li <zong.li@sifive.com> > > --- > > drivers/iommu/riscv/iommu.c | 90 ++++++++++++++++++++++++++++++++++-- > > include/uapi/linux/iommufd.h | 11 +++++ > > 2 files changed, 97 insertions(+), 4 deletions(-) > > > > diff --git a/drivers/iommu/riscv/iommu.c b/drivers/iommu/riscv/iommu.c > > index 410b236e9b24..d08eb0a2939e 100644 > > --- a/drivers/iommu/riscv/iommu.c > > +++ b/drivers/iommu/riscv/iommu.c > > @@ -1587,8 +1587,9 @@ static int riscv_iommu_attach_dev_nested(struct iommu_domain *domain, struct dev > > if (riscv_iommu_bond_link(riscv_domain, dev)) > > return -ENOMEM; > > > > - riscv_iommu_iotlb_inval(riscv_domain, 0, ULONG_MAX); > > - info->dc_user.ta |= RISCV_IOMMU_PC_TA_V; > > + if (riscv_iommu_bond_link(info->domain, dev)) > > + return -ENOMEM; > > ?? Is this in the wrong patch then? Confused Yes, it should be in 7th patch in this series. I will fix it in next version. > > > riscv_iommu_iodir_update(iommu, dev, &info->dc_user); > > > > info->domain = riscv_domain; > > @@ -1611,13 +1612,92 @@ static void riscv_iommu_domain_free_nested(struct iommu_domain *domain) > > kfree(riscv_domain); > > } > > > > +static int riscv_iommu_fix_user_cmd(struct riscv_iommu_command *cmd, > > + unsigned int pscid, unsigned int gscid) > > +{ > > + u32 opcode = FIELD_GET(RISCV_IOMMU_CMD_OPCODE, cmd->dword0); > > + > > + switch (opcode) { > > + case RISCV_IOMMU_CMD_IOTINVAL_OPCODE: > > + u32 func = FIELD_GET(RISCV_IOMMU_CMD_FUNC, cmd->dword0); > > + > > + if (func != RISCV_IOMMU_CMD_IOTINVAL_FUNC_GVMA && > > + func != RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA) { > > + pr_warn("The IOTINVAL function: 0x%x is not supported\n", > > + func); > > + return -EOPNOTSUPP; > > + } > > + > > + if (func == RISCV_IOMMU_CMD_IOTINVAL_FUNC_GVMA) { > > + cmd->dword0 &= ~RISCV_IOMMU_CMD_FUNC; > > + cmd->dword0 |= FIELD_PREP(RISCV_IOMMU_CMD_FUNC, > > + RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA); > > + } > > + > > + cmd->dword0 &= ~(RISCV_IOMMU_CMD_IOTINVAL_PSCID | > > + RISCV_IOMMU_CMD_IOTINVAL_GSCID); > > + riscv_iommu_cmd_inval_set_pscid(cmd, pscid); > > + riscv_iommu_cmd_inval_set_gscid(cmd, gscid); > > + break; > > + case RISCV_IOMMU_CMD_IODIR_OPCODE: > > + /* > > + * Ensure the device ID is right. We expect that VMM has > > + * transferred the device ID to host's from guest's. > > + */ > > I'm not sure what this remark means, but I expect you will need to > translate any devices IDs from virtual to physical. I think we need some data structure to map it. I didn't do that here because our internal implementation translates the right ID in VMM, but as you mentioned, we can't expect that VMM will do that for kernel. > > > > > static int > > -riscv_iommu_get_dc_user(struct device *dev, struct iommu_hwpt_riscv_iommu *user_arg) > > +riscv_iommu_get_dc_user(struct device *dev, struct iommu_hwpt_riscv_iommu *user_arg, > > + struct riscv_iommu_domain *s1_domain) > > { > > struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); > > struct riscv_iommu_device *iommu = dev_to_iommu(dev); > > @@ -1663,6 +1743,8 @@ riscv_iommu_get_dc_user(struct device *dev, struct iommu_hwpt_riscv_iommu *user_ > > riscv_iommu_get_dc(iommu, fwspec->ids[i]), > > sizeof(struct riscv_iommu_dc)); > > info->dc_user.fsc = dc.fsc; > > + info->dc_user.ta = FIELD_PREP(RISCV_IOMMU_PC_TA_PSCID, s1_domain->pscid) | > > + RISCV_IOMMU_PC_TA_V; > > } > > It is really weird that the s1 domain has any kind of id. What is the > PSCID? Is it analogous to VMID on ARM? I think the VMID is closer to the GSCID. The PSCID might be more like the ASID, as it is used as the address space ID for the process identified by the first-stage page table. The GSCID used to tag the G-stage TLB, the PSCID is used to tag the single stage TLB and the tuple {GSCID, PSCID} is used to tag the VS-stage TLB. The IOTINVAL.VMA command can flush the mapping by matching GSCID only, PSCID only or the tuple {GSCID, PSCID}. We can consider the situation that there are two devices pass through to a guest, then we will have two s1 domains under the same s2 domain, and we can flush their mapping by { GSCID, PSCID } and { GSCID, PSCID' } respectively. > > Jason
On Fri, Jun 28, 2024 at 04:19:28PM +0800, Zong Li wrote: > > > + case RISCV_IOMMU_CMD_IODIR_OPCODE: > > > + /* > > > + * Ensure the device ID is right. We expect that VMM has > > > + * transferred the device ID to host's from guest's. > > > + */ > > > > I'm not sure what this remark means, but I expect you will need to > > translate any devices IDs from virtual to physical. > > I think we need some data structure to map it. I didn't do that here > because our internal implementation translates the right ID in VMM, > but as you mentioned, we can't expect that VMM will do that for > kernel. Yes, you need the viommu stuff Nicolin is working on to hold the translation, same as the ARM driver. In the mean time you can't support this invalidation opcode. > > > static int > > > -riscv_iommu_get_dc_user(struct device *dev, struct iommu_hwpt_riscv_iommu *user_arg) > > > +riscv_iommu_get_dc_user(struct device *dev, struct iommu_hwpt_riscv_iommu *user_arg, > > > + struct riscv_iommu_domain *s1_domain) > > > { > > > struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); > > > struct riscv_iommu_device *iommu = dev_to_iommu(dev); > > > @@ -1663,6 +1743,8 @@ riscv_iommu_get_dc_user(struct device *dev, struct iommu_hwpt_riscv_iommu *user_ > > > riscv_iommu_get_dc(iommu, fwspec->ids[i]), > > > sizeof(struct riscv_iommu_dc)); > > > info->dc_user.fsc = dc.fsc; > > > + info->dc_user.ta = FIELD_PREP(RISCV_IOMMU_PC_TA_PSCID, s1_domain->pscid) | > > > + RISCV_IOMMU_PC_TA_V; > > > } > > > > It is really weird that the s1 domain has any kind of id. What is the > > PSCID? Is it analogous to VMID on ARM? > > I think the VMID is closer to the GSCID. The PSCID might be more like > the ASID, as it is used as the address space ID for the process > identified by the first-stage page table. That does sound like the ASID, but I would expect this to work by using the VM provided PSCID and just flowing the PSCID through transparently during the invalidation. Why have the kernel allocate and override a PSCID when the PSCID is scoped by the GSCID and can be safely delegated to the VM? This is going to be necessary if you ever want to support the direct invalidate queues like ARM/AMD have already as it will not be desirable to translate the PSCID on that performance path. It will also be necessary to implement the viommu invalidation path since there is no domain there, which is needed for the ATS as above. Jason
diff --git a/drivers/iommu/riscv/iommu.c b/drivers/iommu/riscv/iommu.c index 410b236e9b24..d08eb0a2939e 100644 --- a/drivers/iommu/riscv/iommu.c +++ b/drivers/iommu/riscv/iommu.c @@ -1587,8 +1587,9 @@ static int riscv_iommu_attach_dev_nested(struct iommu_domain *domain, struct dev if (riscv_iommu_bond_link(riscv_domain, dev)) return -ENOMEM; - riscv_iommu_iotlb_inval(riscv_domain, 0, ULONG_MAX); - info->dc_user.ta |= RISCV_IOMMU_PC_TA_V; + if (riscv_iommu_bond_link(info->domain, dev)) + return -ENOMEM; + riscv_iommu_iodir_update(iommu, dev, &info->dc_user); info->domain = riscv_domain; @@ -1611,13 +1612,92 @@ static void riscv_iommu_domain_free_nested(struct iommu_domain *domain) kfree(riscv_domain); } +static int riscv_iommu_fix_user_cmd(struct riscv_iommu_command *cmd, + unsigned int pscid, unsigned int gscid) +{ + u32 opcode = FIELD_GET(RISCV_IOMMU_CMD_OPCODE, cmd->dword0); + + switch (opcode) { + case RISCV_IOMMU_CMD_IOTINVAL_OPCODE: + u32 func = FIELD_GET(RISCV_IOMMU_CMD_FUNC, cmd->dword0); + + if (func != RISCV_IOMMU_CMD_IOTINVAL_FUNC_GVMA && + func != RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA) { + pr_warn("The IOTINVAL function: 0x%x is not supported\n", + func); + return -EOPNOTSUPP; + } + + if (func == RISCV_IOMMU_CMD_IOTINVAL_FUNC_GVMA) { + cmd->dword0 &= ~RISCV_IOMMU_CMD_FUNC; + cmd->dword0 |= FIELD_PREP(RISCV_IOMMU_CMD_FUNC, + RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA); + } + + cmd->dword0 &= ~(RISCV_IOMMU_CMD_IOTINVAL_PSCID | + RISCV_IOMMU_CMD_IOTINVAL_GSCID); + riscv_iommu_cmd_inval_set_pscid(cmd, pscid); + riscv_iommu_cmd_inval_set_gscid(cmd, gscid); + break; + case RISCV_IOMMU_CMD_IODIR_OPCODE: + /* + * Ensure the device ID is right. We expect that VMM has + * transferred the device ID to host's from guest's. + */ + break; + default: + return -EOPNOTSUPP; + } + + return 0; +} + +static int riscv_iommu_cache_invalidate_user(struct iommu_domain *domain, + struct iommu_user_data_array *array) +{ + struct riscv_iommu_domain *riscv_domain = iommu_domain_to_riscv(domain); + struct iommu_hwpt_riscv_iommu_invalidate inv_info; + int ret, index; + + if (array->type != IOMMU_HWPT_INVALIDATE_DATA_RISCV_IOMMU) { + ret = -EINVAL; + goto out; + } + + for (index = 0; index < array->entry_num; index++) { + ret = iommu_copy_struct_from_user_array(&inv_info, array, + IOMMU_HWPT_INVALIDATE_DATA_RISCV_IOMMU, + index, cmd); + if (ret) + break; + + ret = riscv_iommu_fix_user_cmd((struct riscv_iommu_command *)inv_info.cmd, + riscv_domain->pscid, + riscv_domain->s2->gscid); + if (ret == -EOPNOTSUPP) + continue; + + riscv_iommu_cmd_send(riscv_domain->iommu, + (struct riscv_iommu_command *)inv_info.cmd); + riscv_iommu_cmd_sync(riscv_domain->iommu, + RISCV_IOMMU_IOTINVAL_TIMEOUT); + } + +out: + array->entry_num = index; + + return ret; +} + static const struct iommu_domain_ops riscv_iommu_nested_domain_ops = { .attach_dev = riscv_iommu_attach_dev_nested, .free = riscv_iommu_domain_free_nested, + .cache_invalidate_user = riscv_iommu_cache_invalidate_user, }; static int -riscv_iommu_get_dc_user(struct device *dev, struct iommu_hwpt_riscv_iommu *user_arg) +riscv_iommu_get_dc_user(struct device *dev, struct iommu_hwpt_riscv_iommu *user_arg, + struct riscv_iommu_domain *s1_domain) { struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); struct riscv_iommu_device *iommu = dev_to_iommu(dev); @@ -1663,6 +1743,8 @@ riscv_iommu_get_dc_user(struct device *dev, struct iommu_hwpt_riscv_iommu *user_ riscv_iommu_get_dc(iommu, fwspec->ids[i]), sizeof(struct riscv_iommu_dc)); info->dc_user.fsc = dc.fsc; + info->dc_user.ta = FIELD_PREP(RISCV_IOMMU_PC_TA_PSCID, s1_domain->pscid) | + RISCV_IOMMU_PC_TA_V; } return 0; @@ -1708,7 +1790,7 @@ riscv_iommu_domain_alloc_nested(struct device *dev, } /* Get device context of stage-1 from user*/ - ret = riscv_iommu_get_dc_user(dev, &arg); + ret = riscv_iommu_get_dc_user(dev, &arg, s1_domain); if (ret) { kfree(s1_domain); return ERR_PTR(-EINVAL); diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 514463fe85d3..876cbe980a42 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -653,9 +653,11 @@ struct iommu_hwpt_get_dirty_bitmap { * enum iommu_hwpt_invalidate_data_type - IOMMU HWPT Cache Invalidation * Data Type * @IOMMU_HWPT_INVALIDATE_DATA_VTD_S1: Invalidation data for VTD_S1 + * @IOMMU_HWPT_INVALIDATE_DATA_RISCV_IOMMU: Invalidation data for RISCV_IOMMU */ enum iommu_hwpt_invalidate_data_type { IOMMU_HWPT_INVALIDATE_DATA_VTD_S1, + IOMMU_HWPT_INVALIDATE_DATA_RISCV_IOMMU, }; /** @@ -694,6 +696,15 @@ struct iommu_hwpt_vtd_s1_invalidate { __u32 __reserved; }; +/** + * struct iommu_hwpt_riscv_iommu_invalidate - RISCV IOMMU cache invalidation + * (IOMMU_HWPT_TYPE_RISCV_IOMMU) + * @cmd: An array holds a command for cache invalidation + */ +struct iommu_hwpt_riscv_iommu_invalidate { + __aligned_u64 cmd[2]; +}; + /** * struct iommu_hwpt_invalidate - ioctl(IOMMU_HWPT_INVALIDATE) * @size: sizeof(struct iommu_hwpt_invalidate)
This patch implements cache_invalidate_user operation for the userspace to flush the hardware caches for a nested domain through iommufd. Signed-off-by: Zong Li <zong.li@sifive.com> --- drivers/iommu/riscv/iommu.c | 90 ++++++++++++++++++++++++++++++++++-- include/uapi/linux/iommufd.h | 11 +++++ 2 files changed, 97 insertions(+), 4 deletions(-)