From patchwork Wed Jun 24 08:55:14 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yi Liu X-Patchwork-Id: 11622681 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 5062E92A for ; Wed, 24 Jun 2020 08:50:38 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 3434E207DD for ; Wed, 24 Jun 2020 08:50:38 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S2388745AbgFXIs6 (ORCPT ); Wed, 24 Jun 2020 04:48:58 -0400 Received: from mga01.intel.com ([192.55.52.88]:1312 "EHLO mga01.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S2388488AbgFXIs5 (ORCPT ); Wed, 24 Jun 2020 04:48:57 -0400 IronPort-SDR: 5A/ZSPL9rARm2/YshQjjp/BGuZYVp9klWxuA9DI+BDTbjmojm47maNBTyyN+vXrc86XUEmw60U 6GGl5ngEQiwQ== X-IronPort-AV: E=McAfee;i="6000,8403,9661"; a="162484866" X-IronPort-AV: E=Sophos;i="5.75,274,1589266800"; d="scan'208";a="162484866" X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga003.jf.intel.com ([10.7.209.27]) by fmsmga101.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 24 Jun 2020 01:48:56 -0700 IronPort-SDR: Mshv1yZSW/RDg9neKSin4mVPpUS/9+04XFNCAwKPiJJ5pGmG1KAcsuq7yVn6T5A5Mkcta4MLFd sIt1hCcEprBQ== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.75,274,1589266800"; d="scan'208";a="275624493" Received: from jacob-builder.jf.intel.com ([10.7.199.155]) by orsmga003.jf.intel.com with ESMTP; 24 Jun 2020 01:48:55 -0700 From: Liu Yi L To: alex.williamson@redhat.com, eric.auger@redhat.com, baolu.lu@linux.intel.com, joro@8bytes.org Cc: kevin.tian@intel.com, jacob.jun.pan@linux.intel.com, ashok.raj@intel.com, yi.l.liu@intel.com, jun.j.tian@intel.com, yi.y.sun@intel.com, jean-philippe@linaro.org, peterx@redhat.com, hao.wu@intel.com, iommu@lists.linux-foundation.org, kvm@vger.kernel.org, linux-kernel@vger.kernel.org Subject: [PATCH v3 01/14] vfio/type1: Refactor vfio_iommu_type1_ioctl() Date: Wed, 24 Jun 2020 01:55:14 -0700 Message-Id: <1592988927-48009-2-git-send-email-yi.l.liu@intel.com> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1592988927-48009-1-git-send-email-yi.l.liu@intel.com> References: <1592988927-48009-1-git-send-email-yi.l.liu@intel.com> Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org This patch refactors the vfio_iommu_type1_ioctl() to use switch instead of if-else, and each cmd got a helper function. Cc: Kevin Tian CC: Jacob Pan Cc: Alex Williamson Cc: Eric Auger Cc: Jean-Philippe Brucker Cc: Joerg Roedel Cc: Lu Baolu Suggested-by: Christoph Hellwig Signed-off-by: Liu Yi L --- drivers/vfio/vfio_iommu_type1.c | 392 ++++++++++++++++++++++------------------ 1 file changed, 213 insertions(+), 179 deletions(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 5e556ac..7accb59 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -2453,6 +2453,23 @@ static int vfio_domains_have_iommu_cache(struct vfio_iommu *iommu) return ret; } +static int vfio_iommu_type1_check_extension(struct vfio_iommu *iommu, + unsigned long arg) +{ + switch (arg) { + case VFIO_TYPE1_IOMMU: + case VFIO_TYPE1v2_IOMMU: + case VFIO_TYPE1_NESTING_IOMMU: + return 1; + case VFIO_DMA_CC_IOMMU: + if (!iommu) + return 0; + return vfio_domains_have_iommu_cache(iommu); + default: + return 0; + } +} + static int vfio_iommu_iova_add_cap(struct vfio_info_cap *caps, struct vfio_iommu_type1_info_cap_iova_range *cap_iovas, size_t size) @@ -2529,238 +2546,255 @@ static int vfio_iommu_migration_build_caps(struct vfio_iommu *iommu, return vfio_info_add_capability(caps, &cap_mig.header, sizeof(cap_mig)); } -static long vfio_iommu_type1_ioctl(void *iommu_data, - unsigned int cmd, unsigned long arg) +static int vfio_iommu_type1_get_info(struct vfio_iommu *iommu, + unsigned long arg) { - struct vfio_iommu *iommu = iommu_data; + struct vfio_iommu_type1_info info; unsigned long minsz; + struct vfio_info_cap caps = { .buf = NULL, .size = 0 }; + unsigned long capsz; + int ret; - if (cmd == VFIO_CHECK_EXTENSION) { - switch (arg) { - case VFIO_TYPE1_IOMMU: - case VFIO_TYPE1v2_IOMMU: - case VFIO_TYPE1_NESTING_IOMMU: - return 1; - case VFIO_DMA_CC_IOMMU: - if (!iommu) - return 0; - return vfio_domains_have_iommu_cache(iommu); - default: - return 0; - } - } else if (cmd == VFIO_IOMMU_GET_INFO) { - struct vfio_iommu_type1_info info; - struct vfio_info_cap caps = { .buf = NULL, .size = 0 }; - unsigned long capsz; - int ret; - - minsz = offsetofend(struct vfio_iommu_type1_info, iova_pgsizes); + minsz = offsetofend(struct vfio_iommu_type1_info, iova_pgsizes); - /* For backward compatibility, cannot require this */ - capsz = offsetofend(struct vfio_iommu_type1_info, cap_offset); + /* For backward compatibility, cannot require this */ + capsz = offsetofend(struct vfio_iommu_type1_info, cap_offset); - if (copy_from_user(&info, (void __user *)arg, minsz)) - return -EFAULT; + if (copy_from_user(&info, (void __user *)arg, minsz)) + return -EFAULT; - if (info.argsz < minsz) - return -EINVAL; + if (info.argsz < minsz) + return -EINVAL; - if (info.argsz >= capsz) { - minsz = capsz; - info.cap_offset = 0; /* output, no-recopy necessary */ - } + if (info.argsz >= capsz) { + minsz = capsz; + info.cap_offset = 0; /* output, no-recopy necessary */ + } - mutex_lock(&iommu->lock); - info.flags = VFIO_IOMMU_INFO_PGSIZES; + mutex_lock(&iommu->lock); + info.flags = VFIO_IOMMU_INFO_PGSIZES; - info.iova_pgsizes = iommu->pgsize_bitmap; + info.iova_pgsizes = iommu->pgsize_bitmap; - ret = vfio_iommu_migration_build_caps(iommu, &caps); + ret = vfio_iommu_migration_build_caps(iommu, &caps); - if (!ret) - ret = vfio_iommu_iova_build_caps(iommu, &caps); + if (!ret) + ret = vfio_iommu_iova_build_caps(iommu, &caps); - mutex_unlock(&iommu->lock); + mutex_unlock(&iommu->lock); - if (ret) - return ret; + if (ret) + return ret; - if (caps.size) { - info.flags |= VFIO_IOMMU_INFO_CAPS; + if (caps.size) { + info.flags |= VFIO_IOMMU_INFO_CAPS; - if (info.argsz < sizeof(info) + caps.size) { - info.argsz = sizeof(info) + caps.size; - } else { - vfio_info_cap_shift(&caps, sizeof(info)); - if (copy_to_user((void __user *)arg + - sizeof(info), caps.buf, - caps.size)) { - kfree(caps.buf); - return -EFAULT; - } - info.cap_offset = sizeof(info); + if (info.argsz < sizeof(info) + caps.size) { + info.argsz = sizeof(info) + caps.size; + } else { + vfio_info_cap_shift(&caps, sizeof(info)); + if (copy_to_user((void __user *)arg + + sizeof(info), caps.buf, + caps.size)) { + kfree(caps.buf); + return -EFAULT; } - - kfree(caps.buf); + info.cap_offset = sizeof(info); } - return copy_to_user((void __user *)arg, &info, minsz) ? - -EFAULT : 0; + kfree(caps.buf); + } - } else if (cmd == VFIO_IOMMU_MAP_DMA) { - struct vfio_iommu_type1_dma_map map; - uint32_t mask = VFIO_DMA_MAP_FLAG_READ | - VFIO_DMA_MAP_FLAG_WRITE; + return copy_to_user((void __user *)arg, &info, minsz) ? + -EFAULT : 0; +} - minsz = offsetofend(struct vfio_iommu_type1_dma_map, size); +static int vfio_iommu_type1_map_dma(struct vfio_iommu *iommu, + unsigned long arg) +{ + struct vfio_iommu_type1_dma_map map; + unsigned long minsz; + uint32_t mask = VFIO_DMA_MAP_FLAG_READ | + VFIO_DMA_MAP_FLAG_WRITE; - if (copy_from_user(&map, (void __user *)arg, minsz)) - return -EFAULT; + minsz = offsetofend(struct vfio_iommu_type1_dma_map, size); - if (map.argsz < minsz || map.flags & ~mask) - return -EINVAL; + if (copy_from_user(&map, (void __user *)arg, minsz)) + return -EFAULT; - return vfio_dma_do_map(iommu, &map); + if (map.argsz < minsz || map.flags & ~mask) + return -EINVAL; - } else if (cmd == VFIO_IOMMU_UNMAP_DMA) { - struct vfio_iommu_type1_dma_unmap unmap; - struct vfio_bitmap bitmap = { 0 }; - int ret; + return vfio_dma_do_map(iommu, &map); +} - minsz = offsetofend(struct vfio_iommu_type1_dma_unmap, size); +static int vfio_iommu_type1_unmap_dma(struct vfio_iommu *iommu, + unsigned long arg) +{ + struct vfio_iommu_type1_dma_unmap unmap; + struct vfio_bitmap bitmap = { 0 }; + unsigned long minsz; + long ret; - if (copy_from_user(&unmap, (void __user *)arg, minsz)) - return -EFAULT; + minsz = offsetofend(struct vfio_iommu_type1_dma_unmap, size); - if (unmap.argsz < minsz || - unmap.flags & ~VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) - return -EINVAL; + if (copy_from_user(&unmap, (void __user *)arg, minsz)) + return -EFAULT; - if (unmap.flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) { - unsigned long pgshift; + if (unmap.argsz < minsz || + unmap.flags & ~VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) + return -EINVAL; - if (unmap.argsz < (minsz + sizeof(bitmap))) - return -EINVAL; + if (unmap.flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) { + unsigned long pgshift; - if (copy_from_user(&bitmap, - (void __user *)(arg + minsz), - sizeof(bitmap))) - return -EFAULT; + if (unmap.argsz < (minsz + sizeof(bitmap))) + return -EINVAL; - if (!access_ok((void __user *)bitmap.data, bitmap.size)) - return -EINVAL; + if (copy_from_user(&bitmap, + (void __user *)(arg + minsz), + sizeof(bitmap))) + return -EFAULT; - pgshift = __ffs(bitmap.pgsize); - ret = verify_bitmap_size(unmap.size >> pgshift, - bitmap.size); - if (ret) - return ret; - } + if (!access_ok((void __user *)bitmap.data, bitmap.size)) + return -EINVAL; - ret = vfio_dma_do_unmap(iommu, &unmap, &bitmap); + pgshift = __ffs(bitmap.pgsize); + ret = verify_bitmap_size(unmap.size >> pgshift, + bitmap.size); if (ret) return ret; + } + + ret = vfio_dma_do_unmap(iommu, &unmap, &bitmap); + if (ret) + return ret; - return copy_to_user((void __user *)arg, &unmap, minsz) ? + return copy_to_user((void __user *)arg, &unmap, minsz) ? -EFAULT : 0; - } else if (cmd == VFIO_IOMMU_DIRTY_PAGES) { - struct vfio_iommu_type1_dirty_bitmap dirty; - uint32_t mask = VFIO_IOMMU_DIRTY_PAGES_FLAG_START | - VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP | - VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP; - int ret = 0; +} - if (!iommu->v2) - return -EACCES; +static int vfio_iommu_type1_dirty_pages(struct vfio_iommu *iommu, + unsigned long arg) +{ + struct vfio_iommu_type1_dirty_bitmap dirty; + uint32_t mask = VFIO_IOMMU_DIRTY_PAGES_FLAG_START | + VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP | + VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP; + unsigned long minsz; + int ret = 0; - minsz = offsetofend(struct vfio_iommu_type1_dirty_bitmap, - flags); + if (!iommu->v2) + return -EACCES; - if (copy_from_user(&dirty, (void __user *)arg, minsz)) - return -EFAULT; + minsz = offsetofend(struct vfio_iommu_type1_dirty_bitmap, + flags); - if (dirty.argsz < minsz || dirty.flags & ~mask) - return -EINVAL; + if (copy_from_user(&dirty, (void __user *)arg, minsz)) + return -EFAULT; + + if (dirty.argsz < minsz || dirty.flags & ~mask) + return -EINVAL; + + /* only one flag should be set at a time */ + if (__ffs(dirty.flags) != __fls(dirty.flags)) + return -EINVAL; + + if (dirty.flags & VFIO_IOMMU_DIRTY_PAGES_FLAG_START) { + size_t pgsize; - /* only one flag should be set at a time */ - if (__ffs(dirty.flags) != __fls(dirty.flags)) + mutex_lock(&iommu->lock); + pgsize = 1 << __ffs(iommu->pgsize_bitmap); + if (!iommu->dirty_page_tracking) { + ret = vfio_dma_bitmap_alloc_all(iommu, pgsize); + if (!ret) + iommu->dirty_page_tracking = true; + } + mutex_unlock(&iommu->lock); + return ret; + } else if (dirty.flags & VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP) { + mutex_lock(&iommu->lock); + if (iommu->dirty_page_tracking) { + iommu->dirty_page_tracking = false; + vfio_dma_bitmap_free_all(iommu); + } + mutex_unlock(&iommu->lock); + return 0; + } else if (dirty.flags & + VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP) { + struct vfio_iommu_type1_dirty_bitmap_get range; + unsigned long pgshift; + size_t data_size = dirty.argsz - minsz; + size_t iommu_pgsize; + + if (!data_size || data_size < sizeof(range)) return -EINVAL; - if (dirty.flags & VFIO_IOMMU_DIRTY_PAGES_FLAG_START) { - size_t pgsize; + if (copy_from_user(&range, (void __user *)(arg + minsz), + sizeof(range))) + return -EFAULT; - mutex_lock(&iommu->lock); - pgsize = 1 << __ffs(iommu->pgsize_bitmap); - if (!iommu->dirty_page_tracking) { - ret = vfio_dma_bitmap_alloc_all(iommu, pgsize); - if (!ret) - iommu->dirty_page_tracking = true; - } - mutex_unlock(&iommu->lock); + if (range.iova + range.size < range.iova) + return -EINVAL; + if (!access_ok((void __user *)range.bitmap.data, + range.bitmap.size)) + return -EINVAL; + + pgshift = __ffs(range.bitmap.pgsize); + ret = verify_bitmap_size(range.size >> pgshift, + range.bitmap.size); + if (ret) return ret; - } else if (dirty.flags & VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP) { - mutex_lock(&iommu->lock); - if (iommu->dirty_page_tracking) { - iommu->dirty_page_tracking = false; - vfio_dma_bitmap_free_all(iommu); - } - mutex_unlock(&iommu->lock); - return 0; - } else if (dirty.flags & - VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP) { - struct vfio_iommu_type1_dirty_bitmap_get range; - unsigned long pgshift; - size_t data_size = dirty.argsz - minsz; - size_t iommu_pgsize; - - if (!data_size || data_size < sizeof(range)) - return -EINVAL; - - if (copy_from_user(&range, (void __user *)(arg + minsz), - sizeof(range))) - return -EFAULT; - if (range.iova + range.size < range.iova) - return -EINVAL; - if (!access_ok((void __user *)range.bitmap.data, - range.bitmap.size)) - return -EINVAL; + mutex_lock(&iommu->lock); - pgshift = __ffs(range.bitmap.pgsize); - ret = verify_bitmap_size(range.size >> pgshift, - range.bitmap.size); - if (ret) - return ret; + iommu_pgsize = (size_t)1 << __ffs(iommu->pgsize_bitmap); - mutex_lock(&iommu->lock); + /* allow only smallest supported pgsize */ + if (range.bitmap.pgsize != iommu_pgsize) { + ret = -EINVAL; + goto out_unlock; + } + if (range.iova & (iommu_pgsize - 1)) { + ret = -EINVAL; + goto out_unlock; + } + if (!range.size || range.size & (iommu_pgsize - 1)) { + ret = -EINVAL; + goto out_unlock; + } - iommu_pgsize = (size_t)1 << __ffs(iommu->pgsize_bitmap); + if (iommu->dirty_page_tracking) + ret = vfio_iova_dirty_bitmap(range.bitmap.data, + iommu, range.iova, range.size, + range.bitmap.pgsize); + else + ret = -EINVAL; +out_unlock: + mutex_unlock(&iommu->lock); - /* allow only smallest supported pgsize */ - if (range.bitmap.pgsize != iommu_pgsize) { - ret = -EINVAL; - goto out_unlock; - } - if (range.iova & (iommu_pgsize - 1)) { - ret = -EINVAL; - goto out_unlock; - } - if (!range.size || range.size & (iommu_pgsize - 1)) { - ret = -EINVAL; - goto out_unlock; - } + return ret; + } - if (iommu->dirty_page_tracking) - ret = vfio_iova_dirty_bitmap(range.bitmap.data, - iommu, range.iova, range.size, - range.bitmap.pgsize); - else - ret = -EINVAL; -out_unlock: - mutex_unlock(&iommu->lock); + return -EINVAL; +} - return ret; - } +static long vfio_iommu_type1_ioctl(void *iommu_data, + unsigned int cmd, unsigned long arg) +{ + struct vfio_iommu *iommu = iommu_data; + + switch (cmd) { + case VFIO_CHECK_EXTENSION: + return vfio_iommu_type1_check_extension(iommu, arg); + case VFIO_IOMMU_GET_INFO: + return vfio_iommu_type1_get_info(iommu, arg); + case VFIO_IOMMU_MAP_DMA: + return vfio_iommu_type1_map_dma(iommu, arg); + case VFIO_IOMMU_UNMAP_DMA: + return vfio_iommu_type1_unmap_dma(iommu, arg); + case VFIO_IOMMU_DIRTY_PAGES: + return vfio_iommu_type1_dirty_pages(iommu, arg); } return -ENOTTY; From patchwork Wed Jun 24 08:55:15 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yi Liu X-Patchwork-Id: 11622651 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 6F9D8912 for ; Wed, 24 Jun 2020 08:49:00 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 56F902088E for ; Wed, 24 Jun 2020 08:49:00 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S2388798AbgFXIs7 (ORCPT ); Wed, 24 Jun 2020 04:48:59 -0400 Received: from mga01.intel.com ([192.55.52.88]:1312 "EHLO mga01.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S2388627AbgFXIs6 (ORCPT ); Wed, 24 Jun 2020 04:48:58 -0400 IronPort-SDR: TkiZ07cZ2DcNPiP3jt/NNjwEJ6cpaRDcn9weCllO1/RLIPxWG5+kvEiUIw7HwDLh2oGzBPNDHQ XB9jz3DQAqIw== X-IronPort-AV: E=McAfee;i="6000,8403,9661"; a="162484868" X-IronPort-AV: E=Sophos;i="5.75,274,1589266800"; d="scan'208";a="162484868" X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga003.jf.intel.com ([10.7.209.27]) by fmsmga101.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 24 Jun 2020 01:48:56 -0700 IronPort-SDR: H7pfF6pVsFfantbj3FlPTng1XOH3Wr2GzXJBZGsTYaTbZl0ony5hF+i9sJks/onbo/JRtsSkFb wBQoI34QZHmQ== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.75,274,1589266800"; d="scan'208";a="275624496" Received: from jacob-builder.jf.intel.com ([10.7.199.155]) by orsmga003.jf.intel.com with ESMTP; 24 Jun 2020 01:48:55 -0700 From: Liu Yi L To: alex.williamson@redhat.com, eric.auger@redhat.com, baolu.lu@linux.intel.com, joro@8bytes.org Cc: kevin.tian@intel.com, jacob.jun.pan@linux.intel.com, ashok.raj@intel.com, yi.l.liu@intel.com, jun.j.tian@intel.com, yi.y.sun@intel.com, jean-philippe@linaro.org, peterx@redhat.com, hao.wu@intel.com, iommu@lists.linux-foundation.org, kvm@vger.kernel.org, linux-kernel@vger.kernel.org Subject: [PATCH v3 02/14] iommu: Report domain nesting info Date: Wed, 24 Jun 2020 01:55:15 -0700 Message-Id: <1592988927-48009-3-git-send-email-yi.l.liu@intel.com> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1592988927-48009-1-git-send-email-yi.l.liu@intel.com> References: <1592988927-48009-1-git-send-email-yi.l.liu@intel.com> Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org IOMMUs that support nesting translation needs report the capability info to userspace, e.g. the format of first level/stage paging structures. This patch reports nesting info by DOMAIN_ATTR_NESTING. Caller can get nesting info after setting DOMAIN_ATTR_NESTING. v2 -> v3: *) remvoe cap/ecap_mask in iommu_nesting_info. *) reuse DOMAIN_ATTR_NESTING to get nesting info. *) return an empty iommu_nesting_info for SMMU drivers per Jean' suggestion. Cc: Kevin Tian CC: Jacob Pan Cc: Alex Williamson Cc: Eric Auger Cc: Jean-Philippe Brucker Cc: Joerg Roedel Cc: Lu Baolu Signed-off-by: Liu Yi L Signed-off-by: Jacob Pan --- drivers/iommu/arm-smmu-v3.c | 29 ++++++++++++++++++++-- drivers/iommu/arm-smmu.c | 29 ++++++++++++++++++++-- include/uapi/linux/iommu.h | 59 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 113 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index f578677..0c45d4d 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -3019,6 +3019,32 @@ static struct iommu_group *arm_smmu_device_group(struct device *dev) return group; } +static int arm_smmu_domain_nesting_info(struct arm_smmu_domain *smmu_domain, + void *data) +{ + struct iommu_nesting_info *info = (struct iommu_nesting_info *) data; + u32 size; + + if (!info || smmu_domain->stage != ARM_SMMU_DOMAIN_NESTED) + return -ENODEV; + + size = sizeof(struct iommu_nesting_info); + + /* + * if provided buffer size is not equal to the size, should + * return 0 and also the expected buffer size to caller. + */ + if (info->size != size) { + info->size = size; + return 0; + } + + /* report an empty iommu_nesting_info for now */ + memset(info, 0x0, size); + info->size = size; + return 0; +} + static int arm_smmu_domain_get_attr(struct iommu_domain *domain, enum iommu_attr attr, void *data) { @@ -3028,8 +3054,7 @@ static int arm_smmu_domain_get_attr(struct iommu_domain *domain, case IOMMU_DOMAIN_UNMANAGED: switch (attr) { case DOMAIN_ATTR_NESTING: - *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED); - return 0; + return arm_smmu_domain_nesting_info(smmu_domain, data); default: return -ENODEV; } diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 243bc4c..908607d 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -1506,6 +1506,32 @@ static struct iommu_group *arm_smmu_device_group(struct device *dev) return group; } +static int arm_smmu_domain_nesting_info(struct arm_smmu_domain *smmu_domain, + void *data) +{ + struct iommu_nesting_info *info = (struct iommu_nesting_info *) data; + u32 size; + + if (!info || smmu_domain->stage != ARM_SMMU_DOMAIN_NESTED) + return -ENODEV; + + size = sizeof(struct iommu_nesting_info); + + /* + * if provided buffer size is not equal to the size, should + * return 0 and also the expected buffer size to caller. + */ + if (info->size != size) { + info->size = size; + return 0; + } + + /* report an empty iommu_nesting_info for now */ + memset(info, 0x0, size); + info->size = size; + return 0; +} + static int arm_smmu_domain_get_attr(struct iommu_domain *domain, enum iommu_attr attr, void *data) { @@ -1515,8 +1541,7 @@ static int arm_smmu_domain_get_attr(struct iommu_domain *domain, case IOMMU_DOMAIN_UNMANAGED: switch (attr) { case DOMAIN_ATTR_NESTING: - *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED); - return 0; + return arm_smmu_domain_nesting_info(smmu_domain, data); default: return -ENODEV; } diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h index 1afc661..898c99a 100644 --- a/include/uapi/linux/iommu.h +++ b/include/uapi/linux/iommu.h @@ -332,4 +332,63 @@ struct iommu_gpasid_bind_data { } vendor; }; +/* + * struct iommu_nesting_info - Information for nesting-capable IOMMU. + * user space should check it before using + * nesting capability. + * + * @size: size of the whole structure + * @format: PASID table entry format, the same definition with + * @format of struct iommu_gpasid_bind_data. + * @features: supported nesting features. + * @flags: currently reserved for future extension. + * @data: vendor specific cap info. + * + * +---------------+----------------------------------------------------+ + * | feature | Notes | + * +===============+====================================================+ + * | SYSWIDE_PASID | Kernel manages PASID in system wide, PASIDs used | + * | | in the system should be allocated by host kernel | + * +---------------+----------------------------------------------------+ + * | BIND_PGTBL | bind page tables to host PASID, the PASID could | + * | | either be a host PASID passed in bind request or | + * | | default PASIDs (e.g. default PASID of aux-domain) | + * +---------------+----------------------------------------------------+ + * | CACHE_INVLD | mandatory feature for nesting capable IOMMU | + * +---------------+----------------------------------------------------+ + * + */ +struct iommu_nesting_info { + __u32 size; + __u32 format; + __u32 features; +#define IOMMU_NESTING_FEAT_SYSWIDE_PASID (1 << 0) +#define IOMMU_NESTING_FEAT_BIND_PGTBL (1 << 1) +#define IOMMU_NESTING_FEAT_CACHE_INVLD (1 << 2) + __u32 flags; + __u8 data[]; +}; + +/* + * struct iommu_nesting_info_vtd - Intel VT-d specific nesting info + * + * + * @flags: VT-d specific flags. Currently reserved for future + * extension. + * @addr_width: The output addr width of first level/stage translation + * @pasid_bits: Maximum supported PASID bits, 0 represents no PASID + * support. + * @cap_reg: Describe basic capabilities as defined in VT-d capability + * register. + * @ecap_reg: Describe the extended capabilities as defined in VT-d + * extended capability register. + */ +struct iommu_nesting_info_vtd { + __u32 flags; + __u16 addr_width; + __u16 pasid_bits; + __u64 cap_reg; + __u64 ecap_reg; +}; + #endif /* _UAPI_IOMMU_H */ From patchwork Wed Jun 24 08:55:16 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yi Liu X-Patchwork-Id: 11622683 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id ECB71912 for ; Wed, 24 Jun 2020 08:50:42 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id DD36A207DD for ; Wed, 24 Jun 2020 08:50:42 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S2389587AbgFXIuh (ORCPT ); Wed, 24 Jun 2020 04:50:37 -0400 Received: from mga01.intel.com ([192.55.52.88]:1309 "EHLO mga01.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S2388751AbgFXIs6 (ORCPT ); Wed, 24 Jun 2020 04:48:58 -0400 IronPort-SDR: Mgw+p3TqEkD/19/cuzxRIpbThfaJBu7BhKjsC8Vxez0Z6QPF+hFYHGzcJSzsC5npE6dIkyL8Xr TXNgTrHvv+ow== X-IronPort-AV: E=McAfee;i="6000,8403,9661"; a="162484870" X-IronPort-AV: E=Sophos;i="5.75,274,1589266800"; d="scan'208";a="162484870" X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga003.jf.intel.com ([10.7.209.27]) by fmsmga101.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 24 Jun 2020 01:48:56 -0700 IronPort-SDR: ErmQurzQgDq9ukx/suh91j2ddBBL+uI3lDxNeSyEYCBuXHjYlADG2368Ri0BrSgsMxhH1/N0pW 6Ii595tlRKOQ== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.75,274,1589266800"; d="scan'208";a="275624498" Received: from jacob-builder.jf.intel.com ([10.7.199.155]) by orsmga003.jf.intel.com with ESMTP; 24 Jun 2020 01:48:55 -0700 From: Liu Yi L To: alex.williamson@redhat.com, eric.auger@redhat.com, baolu.lu@linux.intel.com, joro@8bytes.org Cc: kevin.tian@intel.com, jacob.jun.pan@linux.intel.com, ashok.raj@intel.com, yi.l.liu@intel.com, jun.j.tian@intel.com, yi.y.sun@intel.com, jean-philippe@linaro.org, peterx@redhat.com, hao.wu@intel.com, iommu@lists.linux-foundation.org, kvm@vger.kernel.org, linux-kernel@vger.kernel.org Subject: [PATCH v3 03/14] vfio/type1: Report iommu nesting info to userspace Date: Wed, 24 Jun 2020 01:55:16 -0700 Message-Id: <1592988927-48009-4-git-send-email-yi.l.liu@intel.com> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1592988927-48009-1-git-send-email-yi.l.liu@intel.com> References: <1592988927-48009-1-git-send-email-yi.l.liu@intel.com> Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org This patch exports iommu nesting capability info to user space through VFIO. User space is expected to check this info for supported uAPIs (e.g. PASID alloc/free, bind page table, and cache invalidation) and the vendor specific format information for first level/stage page table that will be bound to. The nesting info is available only after the nesting iommu type is set for a container. Current implementation imposes one limitation - one nesting container should include at most one group. The philosophy of vfio container is having all groups/devices within the container share the same IOMMU context. When vSVA is enabled, one IOMMU context could include one 2nd-level address space and multiple 1st-level address spaces. While the 2nd-leve address space is reasonably sharable by multiple groups , blindly sharing 1st-level address spaces across all groups within the container might instead break the guest expectation. In the future sub/ super container concept might be introduced to allow partial address space sharing within an IOMMU context. But for now let's go with this restriction by requiring singleton container for using nesting iommu features. Below link has the related discussion about this decision. https://lkml.org/lkml/2020/5/15/1028 Cc: Kevin Tian CC: Jacob Pan Cc: Alex Williamson Cc: Eric Auger Cc: Jean-Philippe Brucker Cc: Joerg Roedel Cc: Lu Baolu Signed-off-by: Liu Yi L --- drivers/vfio/vfio_iommu_type1.c | 73 +++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/vfio.h | 9 +++++ 2 files changed, 82 insertions(+) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 7accb59..8c143d5 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -72,6 +72,7 @@ struct vfio_iommu { uint64_t pgsize_bitmap; bool v2; bool nesting; + struct iommu_nesting_info *nesting_info; bool dirty_page_tracking; bool pinned_page_dirty_scope; }; @@ -130,6 +131,9 @@ struct vfio_regions { #define IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu) \ (!list_empty(&iommu->domain_list)) +#define IS_DOMAIN_IN_CONTAINER(iommu) ((iommu->external_domain) || \ + (!list_empty(&iommu->domain_list))) + #define DIRTY_BITMAP_BYTES(n) (ALIGN(n, BITS_PER_TYPE(u64)) / BITS_PER_BYTE) /* @@ -1959,6 +1963,12 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, } } + /* Nesting type container can include only one group */ + if (iommu->nesting && IS_DOMAIN_IN_CONTAINER(iommu)) { + mutex_unlock(&iommu->lock); + return -EINVAL; + } + group = kzalloc(sizeof(*group), GFP_KERNEL); domain = kzalloc(sizeof(*domain), GFP_KERNEL); if (!group || !domain) { @@ -2029,6 +2039,36 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, if (ret) goto out_domain; + /* Nesting cap info is available only after attaching */ + if (iommu->nesting) { + struct iommu_nesting_info tmp; + struct iommu_nesting_info *info; + + /* First get the size of vendor specific nesting info */ + ret = iommu_domain_get_attr(domain->domain, + DOMAIN_ATTR_NESTING, + &tmp); + if (ret) + goto out_detach; + + info = kzalloc(tmp.size, GFP_KERNEL); + if (!info) { + ret = -ENOMEM; + goto out_detach; + } + + /* Now get the nesting info */ + info->size = tmp.size; + ret = iommu_domain_get_attr(domain->domain, + DOMAIN_ATTR_NESTING, + info); + if (ret) { + kfree(info); + goto out_detach; + } + iommu->nesting_info = info; + } + /* Get aperture info */ iommu_domain_get_attr(domain->domain, DOMAIN_ATTR_GEOMETRY, &geo); @@ -2138,6 +2178,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, return 0; out_detach: + kfree(iommu->nesting_info); vfio_iommu_detach_group(domain, group); out_domain: iommu_domain_free(domain->domain); @@ -2338,6 +2379,8 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, vfio_iommu_unmap_unpin_all(iommu); else vfio_iommu_unmap_unpin_reaccount(iommu); + + kfree(iommu->nesting_info); } iommu_domain_free(domain->domain); list_del(&domain->next); @@ -2546,6 +2589,30 @@ static int vfio_iommu_migration_build_caps(struct vfio_iommu *iommu, return vfio_info_add_capability(caps, &cap_mig.header, sizeof(cap_mig)); } +static int vfio_iommu_info_add_nesting_cap(struct vfio_iommu *iommu, + struct vfio_info_cap *caps) +{ + struct vfio_info_cap_header *header; + struct vfio_iommu_type1_info_cap_nesting *nesting_cap; + size_t size; + + size = sizeof(*nesting_cap) + iommu->nesting_info->size; + + header = vfio_info_cap_add(caps, size, + VFIO_IOMMU_TYPE1_INFO_CAP_NESTING, 1); + if (IS_ERR(header)) + return PTR_ERR(header); + + nesting_cap = container_of(header, + struct vfio_iommu_type1_info_cap_nesting, + header); + + memcpy(&nesting_cap->info, iommu->nesting_info, + iommu->nesting_info->size); + + return 0; +} + static int vfio_iommu_type1_get_info(struct vfio_iommu *iommu, unsigned long arg) { @@ -2586,6 +2653,12 @@ static int vfio_iommu_type1_get_info(struct vfio_iommu *iommu, if (ret) return ret; + if (iommu->nesting_info) { + ret = vfio_iommu_info_add_nesting_cap(iommu, &caps); + if (ret) + return ret; + } + if (caps.size) { info.flags |= VFIO_IOMMU_INFO_CAPS; diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index eca66926..f1f39e1 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -14,6 +14,7 @@ #include #include +#include #define VFIO_API_VERSION 0 @@ -1039,6 +1040,14 @@ struct vfio_iommu_type1_info_cap_migration { __u64 max_dirty_bitmap_size; /* in bytes */ }; +#define VFIO_IOMMU_TYPE1_INFO_CAP_NESTING 3 + +struct vfio_iommu_type1_info_cap_nesting { + struct vfio_info_cap_header header; + __u32 flags; + __u8 info[]; +}; + #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12) /** From patchwork Wed Jun 24 08:55:17 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yi Liu X-Patchwork-Id: 11622661 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id A4BE56C1 for ; Wed, 24 Jun 2020 08:49:31 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 8BAA9214D8 for ; Wed, 24 Jun 2020 08:49:31 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S2389065AbgFXItY (ORCPT ); Wed, 24 Jun 2020 04:49:24 -0400 Received: from mga01.intel.com ([192.55.52.88]:1313 "EHLO mga01.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S2388758AbgFXItG (ORCPT ); Wed, 24 Jun 2020 04:49:06 -0400 IronPort-SDR: qIUddvJuuKUSQ4hPOUkAkzGDM7d4kJqj2x4ONeWHRoX4t/5ns23NuBSSydgwpkGQmdUMT5Uits YeomXqylye1Q== X-IronPort-AV: E=McAfee;i="6000,8403,9661"; a="162484872" X-IronPort-AV: E=Sophos;i="5.75,274,1589266800"; d="scan'208";a="162484872" X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga003.jf.intel.com ([10.7.209.27]) by fmsmga101.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 24 Jun 2020 01:48:56 -0700 IronPort-SDR: 07bEmCiGFRvzyqlaclakKOX8fXkYsNBYeuDamlaPpJpZ0vOq5Qu5BZrPnTb0tgMj2nqsvX6eE0 6I3yAsYyDUZg== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.75,274,1589266800"; d="scan'208";a="275624502" Received: from jacob-builder.jf.intel.com ([10.7.199.155]) by orsmga003.jf.intel.com with ESMTP; 24 Jun 2020 01:48:55 -0700 From: Liu Yi L To: alex.williamson@redhat.com, eric.auger@redhat.com, baolu.lu@linux.intel.com, joro@8bytes.org Cc: kevin.tian@intel.com, jacob.jun.pan@linux.intel.com, ashok.raj@intel.com, yi.l.liu@intel.com, jun.j.tian@intel.com, yi.y.sun@intel.com, jean-philippe@linaro.org, peterx@redhat.com, hao.wu@intel.com, iommu@lists.linux-foundation.org, kvm@vger.kernel.org, linux-kernel@vger.kernel.org Subject: [PATCH v3 04/14] vfio: Add PASID allocation/free support Date: Wed, 24 Jun 2020 01:55:17 -0700 Message-Id: <1592988927-48009-5-git-send-email-yi.l.liu@intel.com> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1592988927-48009-1-git-send-email-yi.l.liu@intel.com> References: <1592988927-48009-1-git-send-email-yi.l.liu@intel.com> Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org Shared Virtual Addressing (a.k.a Shared Virtual Memory) allows sharing multiple process virtual address spaces with the device for simplified programming model. PASID is used to tag an virtual address space in DMA requests and to identify the related translation structure in IOMMU. When a PASID-capable device is assigned to a VM, we want the same capability of using PASID to tag guest process virtual address spaces to achieve virtual SVA (vSVA). PASID management for guest is vendor specific. Some vendors (e.g. Intel VT-d) requires system-wide managed PASIDs cross all devices, regardless of whether a device is used by host or assigned to guest. Other vendors (e.g. ARM SMMU) may allow PASIDs managed per-device thus could be fully delegated to the guest for assigned devices. For system-wide managed PASIDs, this patch introduces a vfio module to handle explicit PASID alloc/free requests from guest. Allocated PASIDs are associated to a process (or, mm_struct) in IOASID core. A vfio_mm object is introduced to track mm_struct. Multiple VFIO containers within a process share the same vfio_mm object. A quota mechanism is provided to prevent malicious user from exhausting available PASIDs. Currently the quota is a global parameter applied to all VFIO devices. In the future per-device quota might be supported too. Cc: Kevin Tian CC: Jacob Pan Cc: Eric Auger Cc: Jean-Philippe Brucker Cc: Joerg Roedel Cc: Lu Baolu Suggested-by: Alex Williamson Signed-off-by: Liu Yi L --- v1 -> v2: *) added in v2, split from the pasid alloc/free support of v1 --- drivers/vfio/Kconfig | 5 ++ drivers/vfio/Makefile | 1 + drivers/vfio/vfio_pasid.c | 151 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/vfio.h | 28 +++++++++ 4 files changed, 185 insertions(+) create mode 100644 drivers/vfio/vfio_pasid.c diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index fd17db9..3d8a108 100644 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig @@ -19,6 +19,11 @@ config VFIO_VIRQFD depends on VFIO && EVENTFD default n +config VFIO_PASID + tristate + depends on IOASID && VFIO + default n + menuconfig VFIO tristate "VFIO Non-Privileged userspace driver framework" depends on IOMMU_API diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile index de67c47..bb836a3 100644 --- a/drivers/vfio/Makefile +++ b/drivers/vfio/Makefile @@ -3,6 +3,7 @@ vfio_virqfd-y := virqfd.o obj-$(CONFIG_VFIO) += vfio.o obj-$(CONFIG_VFIO_VIRQFD) += vfio_virqfd.o +obj-$(CONFIG_VFIO_PASID) += vfio_pasid.o obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o obj-$(CONFIG_VFIO_SPAPR_EEH) += vfio_spapr_eeh.o diff --git a/drivers/vfio/vfio_pasid.c b/drivers/vfio/vfio_pasid.c new file mode 100644 index 0000000..dd5b6d1 --- /dev/null +++ b/drivers/vfio/vfio_pasid.c @@ -0,0 +1,151 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020 Intel Corporation. + * Author: Liu Yi L + * + */ + +#include +#include +#include +#include +#include +#include + +#define DRIVER_VERSION "0.1" +#define DRIVER_AUTHOR "Liu Yi L " +#define DRIVER_DESC "PASID management for VFIO bus drivers" + +#define VFIO_DEFAULT_PASID_QUOTA 1000 +static int pasid_quota = VFIO_DEFAULT_PASID_QUOTA; +module_param_named(pasid_quota, pasid_quota, uint, 0444); +MODULE_PARM_DESC(pasid_quota, + " Set the quota for max number of PASIDs that an application is allowed to request (default 1000)"); + +struct vfio_mm_token { + unsigned long long val; +}; + +struct vfio_mm { + struct kref kref; + struct vfio_mm_token token; + int ioasid_sid; + int pasid_quota; + struct list_head next; +}; + +static struct vfio_pasid { + struct mutex vfio_mm_lock; + struct list_head vfio_mm_list; +} vfio_pasid; + +/* called with vfio.vfio_mm_lock held */ +static void vfio_mm_release(struct kref *kref) +{ + struct vfio_mm *vmm = container_of(kref, struct vfio_mm, kref); + + list_del(&vmm->next); + mutex_unlock(&vfio_pasid.vfio_mm_lock); + ioasid_free_set(vmm->ioasid_sid, true); + kfree(vmm); +} + +void vfio_mm_put(struct vfio_mm *vmm) +{ + kref_put_mutex(&vmm->kref, vfio_mm_release, &vfio_pasid.vfio_mm_lock); +} + +static void vfio_mm_get(struct vfio_mm *vmm) +{ + kref_get(&vmm->kref); +} + +struct vfio_mm *vfio_mm_get_from_task(struct task_struct *task) +{ + struct mm_struct *mm = get_task_mm(task); + struct vfio_mm *vmm; + unsigned long long val = (unsigned long long) mm; + int ret; + + mutex_lock(&vfio_pasid.vfio_mm_lock); + /* Search existing vfio_mm with current mm pointer */ + list_for_each_entry(vmm, &vfio_pasid.vfio_mm_list, next) { + if (vmm->token.val == val) { + vfio_mm_get(vmm); + goto out; + } + } + + vmm = kzalloc(sizeof(*vmm), GFP_KERNEL); + if (!vmm) + return ERR_PTR(-ENOMEM); + + /* + * IOASID core provides a 'IOASID set' concept to track all + * PASIDs associated with a token. Here we use mm_struct as + * the token and create a IOASID set per mm_struct. All the + * containers of the process share the same IOASID set. + */ + ret = ioasid_alloc_set((struct ioasid_set *) mm, pasid_quota, + &vmm->ioasid_sid); + if (ret) { + kfree(vmm); + return ERR_PTR(ret); + } + + kref_init(&vmm->kref); + vmm->token.val = (unsigned long long) mm; + vmm->pasid_quota = pasid_quota; + + list_add(&vmm->next, &vfio_pasid.vfio_mm_list); +out: + mutex_unlock(&vfio_pasid.vfio_mm_lock); + mmput(mm); + return vmm; +} + +int vfio_pasid_alloc(struct vfio_mm *vmm, int min, int max) +{ + ioasid_t pasid; + + pasid = ioasid_alloc(vmm->ioasid_sid, min, max, NULL); + + return (pasid == INVALID_IOASID) ? -ENOSPC : pasid; +} + +void vfio_pasid_free_range(struct vfio_mm *vmm, + ioasid_t min, ioasid_t max) +{ + ioasid_t pasid = min; + + if (min > max) + return; + + /* + * IOASID core will notify PASID users (e.g. IOMMU driver) to + * teardown necessary structures depending on the to-be-freed + * PASID. + */ + for (; pasid <= max; pasid++) + ioasid_free(pasid); +} + +static int __init vfio_pasid_init(void) +{ + mutex_init(&vfio_pasid.vfio_mm_lock); + INIT_LIST_HEAD(&vfio_pasid.vfio_mm_list); + return 0; +} + +static void __exit vfio_pasid_exit(void) +{ + WARN_ON(!list_empty(&vfio_pasid.vfio_mm_list)); +} + +module_init(vfio_pasid_init); +module_exit(vfio_pasid_exit); + +MODULE_VERSION(DRIVER_VERSION); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR(DRIVER_AUTHOR); +MODULE_DESCRIPTION(DRIVER_DESC); diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 38d3c6a..74e077d 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -97,6 +97,34 @@ extern int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops); extern void vfio_unregister_iommu_driver( const struct vfio_iommu_driver_ops *ops); +struct vfio_mm; +#if IS_ENABLED(CONFIG_VFIO_PASID) +extern struct vfio_mm *vfio_mm_get_from_task(struct task_struct *task); +extern void vfio_mm_put(struct vfio_mm *vmm); +extern int vfio_pasid_alloc(struct vfio_mm *vmm, int min, int max); +extern void vfio_pasid_free_range(struct vfio_mm *vmm, + ioasid_t min, ioasid_t max); +#else +static inline struct vfio_mm *vfio_mm_get_from_task(struct task_struct *task) +{ + return NULL; +} + +static inline void vfio_mm_put(struct vfio_mm *vmm) +{ +} + +static inline int vfio_pasid_alloc(struct vfio_mm *vmm, int min, int max) +{ + return -ENOTTY; +} + +static inline void vfio_pasid_free_range(struct vfio_mm *vmm, + ioasid_t min, ioasid_t max) +{ +} +#endif /* CONFIG_VFIO_PASID */ + /* * External user API */ From patchwork Wed Jun 24 08:55:18 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yi Liu X-Patchwork-Id: 11622679 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 3E05A912 for ; Wed, 24 Jun 2020 08:50:34 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 309E420C09 for ; Wed, 24 Jun 2020 08:50:34 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S2389538AbgFXIu2 (ORCPT ); Wed, 24 Jun 2020 04:50:28 -0400 Received: from mga01.intel.com ([192.55.52.88]:1312 "EHLO mga01.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S2388773AbgFXIs7 (ORCPT ); Wed, 24 Jun 2020 04:48:59 -0400 IronPort-SDR: ZYGk/OAEeBhJy4kprwxl79iOSks1nyv0SKJIcAEtmAGuZxkRyArINaaJeJZZozw50156s6P/vM D1C8crctM3Lg== X-IronPort-AV: E=McAfee;i="6000,8403,9661"; a="162484873" X-IronPort-AV: E=Sophos;i="5.75,274,1589266800"; d="scan'208,223";a="162484873" X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga003.jf.intel.com ([10.7.209.27]) by fmsmga101.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 24 Jun 2020 01:48:56 -0700 IronPort-SDR: Hc3byCbM5bJ/HI10zQqtmurLDwV0bymRvZWu2TH8Js2Udb2kizvjba0tY+UqMnro+Hf16oGDEn 0EIEyfKMYbhw== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.75,274,1589266800"; d="scan'208,223";a="275624504" Received: from jacob-builder.jf.intel.com ([10.7.199.155]) by orsmga003.jf.intel.com with ESMTP; 24 Jun 2020 01:48:55 -0700 From: Liu Yi L To: alex.williamson@redhat.com, eric.auger@redhat.com, baolu.lu@linux.intel.com, joro@8bytes.org Cc: kevin.tian@intel.com, jacob.jun.pan@linux.intel.com, ashok.raj@intel.com, yi.l.liu@intel.com, jun.j.tian@intel.com, yi.y.sun@intel.com, jean-philippe@linaro.org, peterx@redhat.com, hao.wu@intel.com, iommu@lists.linux-foundation.org, kvm@vger.kernel.org, linux-kernel@vger.kernel.org Subject: [PATCH v3 05/14] iommu/vt-d: Support setting ioasid set to domain Date: Wed, 24 Jun 2020 01:55:18 -0700 Message-Id: <1592988927-48009-6-git-send-email-yi.l.liu@intel.com> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1592988927-48009-1-git-send-email-yi.l.liu@intel.com> References: <1592988927-48009-1-git-send-email-yi.l.liu@intel.com> Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org From IOMMU p.o.v., PASIDs allocated and managed by external components (e.g. VFIO) will be passed in for gpasid_bind/unbind operation. IOMMU needs some knowledge to check the PASID ownership, hence add an interface for those components to tell the PASID owner. In latest kernel design, PASID ownership is managed by IOASID set where the PASID is allocated from. This patch adds support for setting ioasid set ID to the domains used for nesting/vSVA. Subsequent SVA operations on the PASID will be checked against its IOASID set for proper ownership. Cc: Kevin Tian CC: Jacob Pan Cc: Alex Williamson Cc: Eric Auger Cc: Jean-Philippe Brucker Cc: Joerg Roedel Cc: Lu Baolu Signed-off-by: Liu Yi L Signed-off-by: Jacob Pan --- drivers/iommu/intel/iommu.c | 16 ++++++++++++++++ include/linux/intel-iommu.h | 4 ++++ include/linux/iommu.h | 1 + 3 files changed, 21 insertions(+) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 1dcdac8..b139ab4 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1787,6 +1787,7 @@ static struct dmar_domain *alloc_domain(int flags) if (first_level_by_default()) domain->flags |= DOMAIN_FLAG_USE_FIRST_LEVEL; domain->has_iotlb_device = false; + domain->ioasid_sid = INVALID_IOASID_SET; INIT_LIST_HEAD(&domain->devices); return domain; @@ -6031,6 +6032,21 @@ intel_iommu_domain_set_attr(struct iommu_domain *domain, } spin_unlock_irqrestore(&device_domain_lock, flags); break; + case DOMAIN_ATTR_IOASID_SID: + if (!(dmar_domain->flags & DOMAIN_FLAG_NESTING_MODE)) { + ret = -ENODEV; + break; + } + if ((dmar_domain->ioasid_sid != INVALID_IOASID_SET) && + (dmar_domain->ioasid_sid != (*(int *) data))) { + pr_warn_ratelimited("multi ioasid_set (%d:%d) setting", + dmar_domain->ioasid_sid, + (*(int *) data)); + ret = -EBUSY; + break; + } + dmar_domain->ioasid_sid = *(int *) data; + break; default: ret = -EINVAL; break; diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 02d312a..07b3195 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -548,6 +548,10 @@ struct dmar_domain { 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */ u64 max_addr; /* maximum mapped address */ + int ioasid_sid; /* + * the ioasid set which tracks all + * PASIDs used by the domain. + */ int default_pasid; /* * The default pasid used for non-SVM * traffic on mediated devices. diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 2567c33..21d32be 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -124,6 +124,7 @@ enum iommu_attr { DOMAIN_ATTR_FSL_PAMUV1, DOMAIN_ATTR_NESTING, /* two stages of translation */ DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, + DOMAIN_ATTR_IOASID_SID, DOMAIN_ATTR_MAX, }; From patchwork Wed Jun 24 08:55:19 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yi Liu X-Patchwork-Id: 11622653 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 5BDCC6C1 for ; Wed, 24 Jun 2020 08:49:07 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 48D8520B80 for ; Wed, 24 Jun 2020 08:49:07 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S2388949AbgFXItF (ORCPT ); Wed, 24 Jun 2020 04:49:05 -0400 Received: from mga01.intel.com ([192.55.52.88]:1309 "EHLO mga01.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S2388787AbgFXItA (ORCPT ); Wed, 24 Jun 2020 04:49:00 -0400 IronPort-SDR: pf2z4HwoM8bdbCTl9aKojEHyGvzv0ybCaS1a1JcmZo1BK6uVasbuYMlZ0bhTQWkv7euyT5gcOp a1P1Ad2EiEZQ== X-IronPort-AV: E=McAfee;i="6000,8403,9661"; a="162484875" X-IronPort-AV: E=Sophos;i="5.75,274,1589266800"; d="scan'208";a="162484875" X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga003.jf.intel.com ([10.7.209.27]) by fmsmga101.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 24 Jun 2020 01:48:57 -0700 IronPort-SDR: ZuNjk+8VQJ/PKrC5QryMb7568r+qxl4BDN2mSL+nKSXPaaZp0j754iOps4QVoQIMXENV2RNi9j gngxAlzWHpQg== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.75,274,1589266800"; d="scan'208";a="275624508" Received: from jacob-builder.jf.intel.com ([10.7.199.155]) by orsmga003.jf.intel.com with ESMTP; 24 Jun 2020 01:48:55 -0700 From: Liu Yi L To: alex.williamson@redhat.com, eric.auger@redhat.com, baolu.lu@linux.intel.com, joro@8bytes.org Cc: kevin.tian@intel.com, jacob.jun.pan@linux.intel.com, ashok.raj@intel.com, yi.l.liu@intel.com, jun.j.tian@intel.com, yi.y.sun@intel.com, jean-philippe@linaro.org, peterx@redhat.com, hao.wu@intel.com, iommu@lists.linux-foundation.org, kvm@vger.kernel.org, linux-kernel@vger.kernel.org Subject: [PATCH v3 06/14] vfio/type1: Add VFIO_IOMMU_PASID_REQUEST (alloc/free) Date: Wed, 24 Jun 2020 01:55:19 -0700 Message-Id: <1592988927-48009-7-git-send-email-yi.l.liu@intel.com> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1592988927-48009-1-git-send-email-yi.l.liu@intel.com> References: <1592988927-48009-1-git-send-email-yi.l.liu@intel.com> Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org This patch allows user space to request PASID allocation/free, e.g. when serving the request from the guest. PASIDs that are not freed by userspace are automatically freed when the IOASID set is destroyed when process exits. Cc: Kevin Tian CC: Jacob Pan Cc: Alex Williamson Cc: Eric Auger Cc: Jean-Philippe Brucker Cc: Joerg Roedel Cc: Lu Baolu Signed-off-by: Liu Yi L Signed-off-by: Yi Sun Signed-off-by: Jacob Pan --- v1 -> v2: *) move the vfio_mm related code to be a seprate module *) use a single structure for alloc/free, could support a range of PASIDs *) fetch vfio_mm at group_attach time instead of at iommu driver open time --- drivers/vfio/Kconfig | 1 + drivers/vfio/vfio_iommu_type1.c | 96 ++++++++++++++++++++++++++++++++++++++++- drivers/vfio/vfio_pasid.c | 10 +++++ include/linux/vfio.h | 6 +++ include/uapi/linux/vfio.h | 36 ++++++++++++++++ 5 files changed, 147 insertions(+), 2 deletions(-) diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index 3d8a108..95d90c6 100644 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig @@ -2,6 +2,7 @@ config VFIO_IOMMU_TYPE1 tristate depends on VFIO + select VFIO_PASID if (X86) default n config VFIO_IOMMU_SPAPR_TCE diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 8c143d5..d0891c5 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -73,6 +73,7 @@ struct vfio_iommu { bool v2; bool nesting; struct iommu_nesting_info *nesting_info; + struct vfio_mm *vmm; bool dirty_page_tracking; bool pinned_page_dirty_scope; }; @@ -1933,6 +1934,17 @@ static void vfio_iommu_iova_insert_copy(struct vfio_iommu *iommu, list_splice_tail(iova_copy, iova); } + +static void vfio_iommu_release_nesting_info(struct vfio_iommu *iommu) +{ + if (iommu->vmm) { + vfio_mm_put(iommu->vmm); + iommu->vmm = NULL; + } + + kfree(iommu->nesting_info); +} + static int vfio_iommu_type1_attach_group(void *iommu_data, struct iommu_group *iommu_group) { @@ -2067,6 +2079,25 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, goto out_detach; } iommu->nesting_info = info; + + if (info->features & IOMMU_NESTING_FEAT_SYSWIDE_PASID) { + struct vfio_mm *vmm; + int sid; + + vmm = vfio_mm_get_from_task(current); + if (IS_ERR(vmm)) { + ret = PTR_ERR(vmm); + goto out_detach; + } + iommu->vmm = vmm; + + sid = vfio_mm_ioasid_sid(vmm); + ret = iommu_domain_set_attr(domain->domain, + DOMAIN_ATTR_IOASID_SID, + &sid); + if (ret) + goto out_detach; + } } /* Get aperture info */ @@ -2178,7 +2209,8 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, return 0; out_detach: - kfree(iommu->nesting_info); + if (iommu->nesting_info) + vfio_iommu_release_nesting_info(iommu); vfio_iommu_detach_group(domain, group); out_domain: iommu_domain_free(domain->domain); @@ -2380,7 +2412,8 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, else vfio_iommu_unmap_unpin_reaccount(iommu); - kfree(iommu->nesting_info); + if (iommu->nesting_info) + vfio_iommu_release_nesting_info(iommu); } iommu_domain_free(domain->domain); list_del(&domain->next); @@ -2852,6 +2885,63 @@ static int vfio_iommu_type1_dirty_pages(struct vfio_iommu *iommu, return -EINVAL; } +static int vfio_iommu_type1_pasid_alloc(struct vfio_iommu *iommu, + unsigned int min, + unsigned int max) +{ + int ret = -ENOTSUPP; + + mutex_lock(&iommu->lock); + if (iommu->vmm) + ret = vfio_pasid_alloc(iommu->vmm, min, max); + mutex_unlock(&iommu->lock); + return ret; +} + +static int vfio_iommu_type1_pasid_free(struct vfio_iommu *iommu, + unsigned int min, + unsigned int max) +{ + int ret = -ENOTSUPP; + + mutex_lock(&iommu->lock); + if (iommu->vmm) { + vfio_pasid_free_range(iommu->vmm, min, max); + ret = 0; + } + mutex_unlock(&iommu->lock); + return ret; +} + +static int vfio_iommu_type1_pasid_request(struct vfio_iommu *iommu, + unsigned long arg) +{ + struct vfio_iommu_type1_pasid_request req; + unsigned long minsz; + + minsz = offsetofend(struct vfio_iommu_type1_pasid_request, range); + + if (copy_from_user(&req, (void __user *)arg, minsz)) + return -EFAULT; + + if (req.argsz < minsz || (req.flags & ~VFIO_PASID_REQUEST_MASK)) + return -EINVAL; + + if (req.range.min > req.range.max) + return -EINVAL; + + switch (req.flags & VFIO_PASID_REQUEST_MASK) { + case VFIO_IOMMU_ALLOC_PASID: + return vfio_iommu_type1_pasid_alloc(iommu, + req.range.min, req.range.max); + case VFIO_IOMMU_FREE_PASID: + return vfio_iommu_type1_pasid_free(iommu, + req.range.min, req.range.max); + default: + return -EINVAL; + } +} + static long vfio_iommu_type1_ioctl(void *iommu_data, unsigned int cmd, unsigned long arg) { @@ -2868,6 +2958,8 @@ static long vfio_iommu_type1_ioctl(void *iommu_data, return vfio_iommu_type1_unmap_dma(iommu, arg); case VFIO_IOMMU_DIRTY_PAGES: return vfio_iommu_type1_dirty_pages(iommu, arg); + case VFIO_IOMMU_PASID_REQUEST: + return vfio_iommu_type1_pasid_request(iommu, arg); } return -ENOTTY; diff --git a/drivers/vfio/vfio_pasid.c b/drivers/vfio/vfio_pasid.c index dd5b6d1..2ea9f1a 100644 --- a/drivers/vfio/vfio_pasid.c +++ b/drivers/vfio/vfio_pasid.c @@ -54,6 +54,7 @@ void vfio_mm_put(struct vfio_mm *vmm) { kref_put_mutex(&vmm->kref, vfio_mm_release, &vfio_pasid.vfio_mm_lock); } +EXPORT_SYMBOL_GPL(vfio_mm_put); static void vfio_mm_get(struct vfio_mm *vmm) { @@ -103,6 +104,13 @@ struct vfio_mm *vfio_mm_get_from_task(struct task_struct *task) mmput(mm); return vmm; } +EXPORT_SYMBOL_GPL(vfio_mm_get_from_task); + +int vfio_mm_ioasid_sid(struct vfio_mm *vmm) +{ + return vmm->ioasid_sid; +} +EXPORT_SYMBOL_GPL(vfio_mm_ioasid_sid); int vfio_pasid_alloc(struct vfio_mm *vmm, int min, int max) { @@ -112,6 +120,7 @@ int vfio_pasid_alloc(struct vfio_mm *vmm, int min, int max) return (pasid == INVALID_IOASID) ? -ENOSPC : pasid; } +EXPORT_SYMBOL_GPL(vfio_pasid_alloc); void vfio_pasid_free_range(struct vfio_mm *vmm, ioasid_t min, ioasid_t max) @@ -129,6 +138,7 @@ void vfio_pasid_free_range(struct vfio_mm *vmm, for (; pasid <= max; pasid++) ioasid_free(pasid); } +EXPORT_SYMBOL_GPL(vfio_pasid_free_range); static int __init vfio_pasid_init(void) { diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 74e077d..8e60a32 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -101,6 +101,7 @@ struct vfio_mm; #if IS_ENABLED(CONFIG_VFIO_PASID) extern struct vfio_mm *vfio_mm_get_from_task(struct task_struct *task); extern void vfio_mm_put(struct vfio_mm *vmm); +int vfio_mm_ioasid_sid(struct vfio_mm *vmm); extern int vfio_pasid_alloc(struct vfio_mm *vmm, int min, int max); extern void vfio_pasid_free_range(struct vfio_mm *vmm, ioasid_t min, ioasid_t max); @@ -114,6 +115,11 @@ static inline void vfio_mm_put(struct vfio_mm *vmm) { } +static inline int vfio_mm_ioasid_sid(struct vfio_mm *vmm) +{ + return -ENOTTY; +} + static inline int vfio_pasid_alloc(struct vfio_mm *vmm, int min, int max) { return -ENOTTY; diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index f1f39e1..657b2db 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -1162,6 +1162,42 @@ struct vfio_iommu_type1_dirty_bitmap_get { #define VFIO_IOMMU_DIRTY_PAGES _IO(VFIO_TYPE, VFIO_BASE + 17) +/** + * VFIO_IOMMU_PASID_REQUEST - _IOWR(VFIO_TYPE, VFIO_BASE + 18, + * struct vfio_iommu_type1_pasid_request) + * + * PASID (Processor Address Space ID) is a PCIe concept for tagging + * address spaces in DMA requests. When system-wide PASID allocation + * is required by underlying iommu driver (e.g. Intel VT-d), this + * provides an interface for userspace to request pasid alloc/free + * for its assigned devices. Userspace should check the availability + * of this API through VFIO_IOMMU_GET_INFO. + * + * @flags=VFIO_IOMMU_ALLOC_PASID, allocate a single PASID within @range. + * @flags=VFIO_IOMMU_FREE_PASID, free the PASIDs within @range. + * @range is [min, max], which means both @min and @max are inclusive. + * ALLOC_PASID and FREE_PASID are mutually exclusive. + * + * returns: allocated PASID value on success, -errno on failure for + * ALLOC_PASID; + * 0 for FREE_PASID operation; + */ +struct vfio_iommu_type1_pasid_request { + __u32 argsz; +#define VFIO_IOMMU_ALLOC_PASID (1 << 0) +#define VFIO_IOMMU_FREE_PASID (1 << 1) + __u32 flags; + struct { + __u32 min; + __u32 max; + } range; +}; + +#define VFIO_PASID_REQUEST_MASK (VFIO_IOMMU_ALLOC_PASID | \ + VFIO_IOMMU_FREE_PASID) + +#define VFIO_IOMMU_PASID_REQUEST _IO(VFIO_TYPE, VFIO_BASE + 18) + /* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */ /* From patchwork Wed Jun 24 08:55:20 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yi Liu X-Patchwork-Id: 11622659 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 29E11912 for ; Wed, 24 Jun 2020 08:49:23 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 1A32B20C09 for ; Wed, 24 Jun 2020 08:49:23 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S2388979AbgFXItH (ORCPT ); Wed, 24 Jun 2020 04:49:07 -0400 Received: from mga01.intel.com ([192.55.52.88]:1312 "EHLO mga01.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S2388830AbgFXItA (ORCPT ); Wed, 24 Jun 2020 04:49:00 -0400 IronPort-SDR: x+Ktyql2Lq+s3D+21HysRuToSdNyuVvvlqN/T6J0dymAxOKCM62JJyr2ZKgeQqFc0vZQWJ5cyc QsengJnTppiA== X-IronPort-AV: E=McAfee;i="6000,8403,9661"; a="162484880" X-IronPort-AV: E=Sophos;i="5.75,274,1589266800"; d="scan'208";a="162484880" X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga003.jf.intel.com ([10.7.209.27]) by fmsmga101.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 24 Jun 2020 01:48:57 -0700 IronPort-SDR: 91O4qvDAA1/KciOEwqFBpU/Blf0Qf4VKnay75aZ4HLXHC3RzcRBx2/W8sC1RY5tkXd+8rJMQ7F 63K7ZvHm4+Pg== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.75,274,1589266800"; d="scan'208";a="275624512" Received: from jacob-builder.jf.intel.com ([10.7.199.155]) by orsmga003.jf.intel.com with ESMTP; 24 Jun 2020 01:48:56 -0700 From: Liu Yi L To: alex.williamson@redhat.com, eric.auger@redhat.com, baolu.lu@linux.intel.com, joro@8bytes.org Cc: kevin.tian@intel.com, jacob.jun.pan@linux.intel.com, ashok.raj@intel.com, yi.l.liu@intel.com, jun.j.tian@intel.com, yi.y.sun@intel.com, jean-philippe@linaro.org, peterx@redhat.com, hao.wu@intel.com, iommu@lists.linux-foundation.org, kvm@vger.kernel.org, linux-kernel@vger.kernel.org Subject: [PATCH v3 07/14] iommu: Pass domain to sva_unbind_gpasid() Date: Wed, 24 Jun 2020 01:55:20 -0700 Message-Id: <1592988927-48009-8-git-send-email-yi.l.liu@intel.com> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1592988927-48009-1-git-send-email-yi.l.liu@intel.com> References: <1592988927-48009-1-git-send-email-yi.l.liu@intel.com> Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org From: Yi Sun Current interface is good enough for SVA virtualization on an assigned physical PCI device, but when it comes to mediated devices, a physical device may attached with multiple aux-domains. Also, for guest unbind, the PASID to be unbind should be allocated to the VM. This check requires to know the ioasid_set which is associated with the domain. So this interface needs to pass in domain info. Then the iommu driver is able to know which domain will be used for the 2nd stage translation of the nesting mode and also be able to do PASID ownership check. This patch passes @domain per the above reason. Cc: Kevin Tian CC: Jacob Pan Cc: Alex Williamson Cc: Eric Auger Cc: Jean-Philippe Brucker Cc: Joerg Roedel Cc: Lu Baolu Signed-off-by: Yi Sun Signed-off-by: Liu Yi L --- v2 -> v3: *) pass in domain info only *) use ioasid_t for pasid instead of int type v1 -> v2: *) added in v2. --- drivers/iommu/intel/svm.c | 3 ++- drivers/iommu/iommu.c | 2 +- include/linux/intel-iommu.h | 3 ++- include/linux/iommu.h | 3 ++- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index e995e1a..1e567a1 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -436,7 +436,8 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev, return ret; } -int intel_svm_unbind_gpasid(struct device *dev, int pasid) +int intel_svm_unbind_gpasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid) { struct intel_iommu *iommu = intel_svm_device_to_iommu(dev); struct intel_svm_dev *sdev; diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 595527e..5f74837 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2081,7 +2081,7 @@ int __iommu_sva_unbind_gpasid(struct iommu_domain *domain, struct device *dev, if (unlikely(!domain->ops->sva_unbind_gpasid)) return -ENODEV; - return domain->ops->sva_unbind_gpasid(dev, data->hpasid); + return domain->ops->sva_unbind_gpasid(domain, dev, data->hpasid); } EXPORT_SYMBOL_GPL(__iommu_sva_unbind_gpasid); diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 07b3195..a6f8f41 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -737,7 +737,8 @@ extern int intel_svm_enable_prq(struct intel_iommu *iommu); extern int intel_svm_finish_prq(struct intel_iommu *iommu); int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev, struct iommu_gpasid_bind_data *data); -int intel_svm_unbind_gpasid(struct device *dev, int pasid); +int intel_svm_unbind_gpasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid); struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata); void intel_svm_unbind(struct iommu_sva *handle); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 21d32be..22f0730 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -303,7 +303,8 @@ struct iommu_ops { int (*sva_bind_gpasid)(struct iommu_domain *domain, struct device *dev, struct iommu_gpasid_bind_data *data); - int (*sva_unbind_gpasid)(struct device *dev, int pasid); + int (*sva_unbind_gpasid)(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid); int (*def_domain_type)(struct device *dev); From patchwork Wed Jun 24 08:55:21 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yi Liu X-Patchwork-Id: 11622677 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 5CC2F912 for ; Wed, 24 Jun 2020 08:50:16 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 4C1A82082F for ; Wed, 24 Jun 2020 08:50:16 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S2389136AbgFXIuP (ORCPT ); Wed, 24 Jun 2020 04:50:15 -0400 Received: from mga01.intel.com ([192.55.52.88]:1312 "EHLO mga01.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S2388488AbgFXIs7 (ORCPT ); Wed, 24 Jun 2020 04:48:59 -0400 IronPort-SDR: P4sOO4Us0GBiX+/+1FiPGtQiAygTPUeqjqy+1djJrAAMP7cfGsRtNU7d5oK8qjE4qYXwd17BAK 1Yyul5k2dt5Q== X-IronPort-AV: E=McAfee;i="6000,8403,9661"; a="162484876" X-IronPort-AV: E=Sophos;i="5.75,274,1589266800"; d="scan'208";a="162484876" X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga003.jf.intel.com ([10.7.209.27]) by fmsmga101.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 24 Jun 2020 01:48:57 -0700 IronPort-SDR: piGJ++LaamA9YfyuMqCwvU5vZ7l/wzJwV4Bw4Dq47ONOCUSOEPD/GlUNxfV5Vrfp/NSTbReAMh 1qWzMEW5nQGw== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.75,274,1589266800"; d="scan'208";a="275624514" Received: from jacob-builder.jf.intel.com ([10.7.199.155]) by orsmga003.jf.intel.com with ESMTP; 24 Jun 2020 01:48:56 -0700 From: Liu Yi L To: alex.williamson@redhat.com, eric.auger@redhat.com, baolu.lu@linux.intel.com, joro@8bytes.org Cc: kevin.tian@intel.com, jacob.jun.pan@linux.intel.com, ashok.raj@intel.com, yi.l.liu@intel.com, jun.j.tian@intel.com, yi.y.sun@intel.com, jean-philippe@linaro.org, peterx@redhat.com, hao.wu@intel.com, iommu@lists.linux-foundation.org, kvm@vger.kernel.org, linux-kernel@vger.kernel.org Subject: [PATCH v3 08/14] iommu/vt-d: Check ownership for PASIDs from user-space Date: Wed, 24 Jun 2020 01:55:21 -0700 Message-Id: <1592988927-48009-9-git-send-email-yi.l.liu@intel.com> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1592988927-48009-1-git-send-email-yi.l.liu@intel.com> References: <1592988927-48009-1-git-send-email-yi.l.liu@intel.com> Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org When an IOMMU domain with nesting attribute is used for guest SVA, a system-wide PASID is allocated for binding with the device and the domain. For security reason, we need to check the PASID passsed from user-space. e.g. page table bind/unbind and PASID related cache invalidation. Cc: Kevin Tian CC: Jacob Pan Cc: Alex Williamson Cc: Eric Auger Cc: Jean-Philippe Brucker Cc: Joerg Roedel Cc: Lu Baolu Signed-off-by: Liu Yi L Signed-off-by: Jacob Pan --- drivers/iommu/intel/iommu.c | 10 ++++++++++ drivers/iommu/intel/svm.c | 7 +++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index b139ab4..b50395e 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -5429,6 +5429,7 @@ intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev, int granu = 0; u64 pasid = 0; u64 addr = 0; + void *pdata; granu = to_vtd_granularity(cache_type, inv_info->granularity); if (granu == -EINVAL) { @@ -5448,6 +5449,15 @@ intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev, (inv_info->granu.addr_info.flags & IOMMU_INV_ADDR_FLAGS_PASID)) pasid = inv_info->granu.addr_info.pasid; + pdata = ioasid_find(dmar_domain->ioasid_sid, pasid, NULL); + if (!pdata) { + ret = -EINVAL; + goto out_unlock; + } else if (IS_ERR(pdata)) { + ret = PTR_ERR(pdata); + goto out_unlock; + } + switch (BIT(cache_type)) { case IOMMU_CACHE_INV_TYPE_IOTLB: /* HW will ignore LSB bits based on address mask */ diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 1e567a1..aa2e1aa 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -323,7 +323,7 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev, dmar_domain = to_dmar_domain(domain); mutex_lock(&pasid_mutex); - svm = ioasid_find(INVALID_IOASID_SET, data->hpasid, NULL); + svm = ioasid_find(dmar_domain->ioasid_sid, data->hpasid, NULL); if (IS_ERR(svm)) { ret = PTR_ERR(svm); goto out; @@ -440,6 +440,7 @@ int intel_svm_unbind_gpasid(struct iommu_domain *domain, struct device *dev, ioasid_t pasid) { struct intel_iommu *iommu = intel_svm_device_to_iommu(dev); + struct dmar_domain *dmar_domain; struct intel_svm_dev *sdev; struct intel_svm *svm; int ret = -EINVAL; @@ -447,8 +448,10 @@ int intel_svm_unbind_gpasid(struct iommu_domain *domain, if (WARN_ON(!iommu)) return -EINVAL; + dmar_domain = to_dmar_domain(domain); + mutex_lock(&pasid_mutex); - svm = ioasid_find(INVALID_IOASID_SET, pasid, NULL); + svm = ioasid_find(dmar_domain->ioasid_sid, pasid, NULL); if (!svm) { ret = -EINVAL; goto out; From patchwork Wed Jun 24 08:55:22 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yi Liu X-Patchwork-Id: 11622657 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 1B1D0912 for ; Wed, 24 Jun 2020 08:49:20 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id F27FF2088E for ; Wed, 24 Jun 2020 08:49:19 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S2389016AbgFXItI (ORCPT ); Wed, 24 Jun 2020 04:49:08 -0400 Received: from mga01.intel.com ([192.55.52.88]:1312 "EHLO mga01.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S2388846AbgFXItC (ORCPT ); Wed, 24 Jun 2020 04:49:02 -0400 IronPort-SDR: YGeht1RB56DKH2P4m5yyK2VVEVQj7Gr1JZe/dWEQnD6EziADdz8YWU48nBpM544ZZpEDq2COia N3Ki4+ScSAiQ== X-IronPort-AV: E=McAfee;i="6000,8403,9661"; a="162484881" X-IronPort-AV: E=Sophos;i="5.75,274,1589266800"; d="scan'208";a="162484881" X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga003.jf.intel.com ([10.7.209.27]) by fmsmga101.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 24 Jun 2020 01:48:57 -0700 IronPort-SDR: tzd1NU+54w2Raar+gnIp7Rg5cailNVzUj8j9pd1NcaInHzj6yUCevy+b7eDwV+a9uNZBoXre2L HhWeGD7U0yog== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.75,274,1589266800"; d="scan'208";a="275624518" Received: from jacob-builder.jf.intel.com ([10.7.199.155]) by orsmga003.jf.intel.com with ESMTP; 24 Jun 2020 01:48:56 -0700 From: Liu Yi L To: alex.williamson@redhat.com, eric.auger@redhat.com, baolu.lu@linux.intel.com, joro@8bytes.org Cc: kevin.tian@intel.com, jacob.jun.pan@linux.intel.com, ashok.raj@intel.com, yi.l.liu@intel.com, jun.j.tian@intel.com, yi.y.sun@intel.com, jean-philippe@linaro.org, peterx@redhat.com, hao.wu@intel.com, iommu@lists.linux-foundation.org, kvm@vger.kernel.org, linux-kernel@vger.kernel.org Subject: [PATCH v3 09/14] vfio/type1: Support binding guest page tables to PASID Date: Wed, 24 Jun 2020 01:55:22 -0700 Message-Id: <1592988927-48009-10-git-send-email-yi.l.liu@intel.com> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1592988927-48009-1-git-send-email-yi.l.liu@intel.com> References: <1592988927-48009-1-git-send-email-yi.l.liu@intel.com> Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org Nesting translation allows two-levels/stages page tables, with 1st level for guest translations (e.g. GVA->GPA), 2nd level for host translations (e.g. GPA->HPA). This patch adds interface for binding guest page tables to a PASID. This PASID must have been allocated to user space before the binding request. Cc: Kevin Tian CC: Jacob Pan Cc: Alex Williamson Cc: Eric Auger Cc: Jean-Philippe Brucker Cc: Joerg Roedel Cc: Lu Baolu Signed-off-by: Jean-Philippe Brucker Signed-off-by: Liu Yi L Signed-off-by: Jacob Pan --- v2 -> v3: *) use __iommu_sva_unbind_gpasid() for unbind call issued by VFIO https://lore.kernel.org/linux-iommu/1592931837-58223-6-git-send-email-jacob.jun.pan@linux.intel.com/ v1 -> v2: *) rename subject from "vfio/type1: Bind guest page tables to host" *) remove VFIO_IOMMU_BIND, introduce VFIO_IOMMU_NESTING_OP to support bind/ unbind guet page table *) replaced vfio_iommu_for_each_dev() with a group level loop since this series enforces one group per container w/ nesting type as start. *) rename vfio_bind/unbind_gpasid_fn() to vfio_dev_bind/unbind_gpasid_fn() *) vfio_dev_unbind_gpasid() always successful *) use vfio_mm->pasid_lock to avoid race between PASID free and page table bind/unbind --- drivers/vfio/vfio_iommu_type1.c | 169 ++++++++++++++++++++++++++++++++++++++++ drivers/vfio/vfio_pasid.c | 30 +++++++ include/linux/vfio.h | 20 +++++ include/uapi/linux/vfio.h | 30 +++++++ 4 files changed, 249 insertions(+) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index d0891c5..5926533 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -148,6 +148,33 @@ struct vfio_regions { #define DIRTY_BITMAP_PAGES_MAX ((u64)INT_MAX) #define DIRTY_BITMAP_SIZE_MAX DIRTY_BITMAP_BYTES(DIRTY_BITMAP_PAGES_MAX) +struct domain_capsule { + struct vfio_group *group; + struct iommu_domain *domain; + void *data; +}; + +/* iommu->lock must be held */ +static struct vfio_group *vfio_find_nesting_group(struct vfio_iommu *iommu) +{ + struct vfio_domain *d; + struct vfio_group *g, *group = NULL; + + if (!iommu->nesting_info) + return NULL; + + /* only support singleton container with nesting type */ + list_for_each_entry(d, &iommu->domain_list, next) { + list_for_each_entry(g, &d->group_list, next) { + if (!group) { + group = g; + break; + } + } + } + return group; +} + static int put_pfn(unsigned long pfn, int prot); static struct vfio_group *vfio_iommu_find_iommu_group(struct vfio_iommu *iommu, @@ -2351,6 +2378,48 @@ static int vfio_iommu_resv_refresh(struct vfio_iommu *iommu, return ret; } +static int vfio_dev_bind_gpasid_fn(struct device *dev, void *data) +{ + struct domain_capsule *dc = (struct domain_capsule *)data; + unsigned long arg = *(unsigned long *) dc->data; + + return iommu_sva_bind_gpasid(dc->domain, dev, (void __user *) arg); +} + +static int vfio_dev_unbind_gpasid_fn(struct device *dev, void *data) +{ + struct domain_capsule *dc = (struct domain_capsule *)data; + unsigned long arg = *(unsigned long *) dc->data; + + iommu_sva_unbind_gpasid(dc->domain, dev, (void __user *) arg); + return 0; +} + +static int __vfio_dev_unbind_gpasid_fn(struct device *dev, void *data) +{ + struct domain_capsule *dc = (struct domain_capsule *)data; + struct iommu_gpasid_bind_data *unbind_data = + (struct iommu_gpasid_bind_data *) dc->data; + + __iommu_sva_unbind_gpasid(dc->domain, dev, unbind_data); + return 0; +} + +static void vfio_group_unbind_gpasid_fn(ioasid_t pasid, void *data) +{ + struct domain_capsule *dc = (struct domain_capsule *) data; + struct iommu_gpasid_bind_data unbind_data; + + unbind_data.argsz = offsetof(struct iommu_gpasid_bind_data, vendor); + unbind_data.flags = 0; + unbind_data.hpasid = pasid; + + dc->data = &unbind_data; + + iommu_group_for_each_dev(dc->group->iommu_group, + dc, __vfio_dev_unbind_gpasid_fn); +} + static void vfio_iommu_type1_detach_group(void *iommu_data, struct iommu_group *iommu_group) { @@ -2394,6 +2463,21 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, if (!group) continue; + if (iommu->nesting_info && iommu->vmm && + (iommu->nesting_info->features & + IOMMU_NESTING_FEAT_BIND_PGTBL)) { + struct domain_capsule dc = { .group = group, + .domain = domain->domain, + .data = NULL }; + + /* + * Unbind page tables bound with system wide PASIDs + * which are allocated to user space. + */ + vfio_mm_for_each_pasid(iommu->vmm, &dc, + vfio_group_unbind_gpasid_fn); + } + vfio_iommu_detach_group(domain, group); update_dirty_scope = !group->pinned_page_dirty_scope; list_del(&group->next); @@ -2942,6 +3026,89 @@ static int vfio_iommu_type1_pasid_request(struct vfio_iommu *iommu, } } +static long vfio_iommu_handle_pgtbl_op(struct vfio_iommu *iommu, + bool is_bind, unsigned long arg) +{ + struct iommu_nesting_info *info; + struct domain_capsule dc = { .data = &arg }; + struct vfio_group *group; + struct vfio_domain *domain; + int ret; + + mutex_lock(&iommu->lock); + + info = iommu->nesting_info; + if (!info || !(info->features & IOMMU_NESTING_FEAT_BIND_PGTBL)) { + ret = -ENOTSUPP; + goto out_unlock_iommu; + } + + if (!iommu->vmm) { + ret = -EINVAL; + goto out_unlock_iommu; + } + + group = vfio_find_nesting_group(iommu); + if (!group) { + ret = -EINVAL; + goto out_unlock_iommu; + } + + domain = list_first_entry(&iommu->domain_list, + struct vfio_domain, next); + dc.group = group; + dc.domain = domain->domain; + + /* Avoid race with other containers within the same process */ + vfio_mm_pasid_lock(iommu->vmm); + + if (is_bind) { + ret = iommu_group_for_each_dev(group->iommu_group, &dc, + vfio_dev_bind_gpasid_fn); + if (ret) + iommu_group_for_each_dev(group->iommu_group, &dc, + vfio_dev_unbind_gpasid_fn); + } else { + iommu_group_for_each_dev(group->iommu_group, + &dc, vfio_dev_unbind_gpasid_fn); + ret = 0; + } + + vfio_mm_pasid_unlock(iommu->vmm); +out_unlock_iommu: + mutex_unlock(&iommu->lock); + return ret; +} + +static long vfio_iommu_type1_nesting_op(struct vfio_iommu *iommu, + unsigned long arg) +{ + struct vfio_iommu_type1_nesting_op hdr; + unsigned int minsz; + int ret; + + minsz = offsetofend(struct vfio_iommu_type1_nesting_op, flags); + + if (copy_from_user(&hdr, (void __user *)arg, minsz)) + return -EFAULT; + + if (hdr.argsz < minsz || hdr.flags & ~VFIO_NESTING_OP_MASK) + return -EINVAL; + + switch (hdr.flags & VFIO_NESTING_OP_MASK) { + case VFIO_IOMMU_NESTING_OP_BIND_PGTBL: + ret = vfio_iommu_handle_pgtbl_op(iommu, true, arg + minsz); + break; + case VFIO_IOMMU_NESTING_OP_UNBIND_PGTBL: + ret = vfio_iommu_handle_pgtbl_op(iommu, false, arg + minsz); + break; + default: + ret = -EINVAL; + } + + return ret; +} + static long vfio_iommu_type1_ioctl(void *iommu_data, unsigned int cmd, unsigned long arg) { @@ -2960,6 +3127,8 @@ static long vfio_iommu_type1_ioctl(void *iommu_data, return vfio_iommu_type1_dirty_pages(iommu, arg); case VFIO_IOMMU_PASID_REQUEST: return vfio_iommu_type1_pasid_request(iommu, arg); + case VFIO_IOMMU_NESTING_OP: + return vfio_iommu_type1_nesting_op(iommu, arg); } return -ENOTTY; diff --git a/drivers/vfio/vfio_pasid.c b/drivers/vfio/vfio_pasid.c index 2ea9f1a..20f1e72 100644 --- a/drivers/vfio/vfio_pasid.c +++ b/drivers/vfio/vfio_pasid.c @@ -30,6 +30,7 @@ struct vfio_mm { struct kref kref; struct vfio_mm_token token; int ioasid_sid; + struct mutex pasid_lock; int pasid_quota; struct list_head next; }; @@ -97,6 +98,7 @@ struct vfio_mm *vfio_mm_get_from_task(struct task_struct *task) kref_init(&vmm->kref); vmm->token.val = (unsigned long long) mm; vmm->pasid_quota = pasid_quota; + mutex_init(&vmm->pasid_lock); list_add(&vmm->next, &vfio_pasid.vfio_mm_list); out: @@ -134,12 +136,40 @@ void vfio_pasid_free_range(struct vfio_mm *vmm, * IOASID core will notify PASID users (e.g. IOMMU driver) to * teardown necessary structures depending on the to-be-freed * PASID. + * Hold pasid_lock to avoid race with PASID usages like bind/ + * unbind page tables to requested PASID. */ + mutex_lock(&vmm->pasid_lock); for (; pasid <= max; pasid++) ioasid_free(pasid); + mutex_unlock(&vmm->pasid_lock); } EXPORT_SYMBOL_GPL(vfio_pasid_free_range); +int vfio_mm_for_each_pasid(struct vfio_mm *vmm, void *data, + void (*fn)(ioasid_t id, void *data)) +{ + int ret; + + mutex_lock(&vmm->pasid_lock); + ret = ioasid_set_for_each_ioasid(vmm->ioasid_sid, fn, data); + mutex_unlock(&vmm->pasid_lock); + return ret; +} +EXPORT_SYMBOL_GPL(vfio_mm_for_each_pasid); + +void vfio_mm_pasid_lock(struct vfio_mm *vmm) +{ + mutex_lock(&vmm->pasid_lock); +} +EXPORT_SYMBOL_GPL(vfio_mm_pasid_lock); + +void vfio_mm_pasid_unlock(struct vfio_mm *vmm) +{ + mutex_unlock(&vmm->pasid_lock); +} +EXPORT_SYMBOL_GPL(vfio_mm_pasid_unlock); + static int __init vfio_pasid_init(void) { mutex_init(&vfio_pasid.vfio_mm_lock); diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 8e60a32..9028a09 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -105,6 +105,11 @@ int vfio_mm_ioasid_sid(struct vfio_mm *vmm); extern int vfio_pasid_alloc(struct vfio_mm *vmm, int min, int max); extern void vfio_pasid_free_range(struct vfio_mm *vmm, ioasid_t min, ioasid_t max); +extern int vfio_mm_for_each_pasid(struct vfio_mm *vmm, void *data, + void (*fn)(ioasid_t id, void *data)); +extern void vfio_mm_pasid_lock(struct vfio_mm *vmm); +extern void vfio_mm_pasid_unlock(struct vfio_mm *vmm); + #else static inline struct vfio_mm *vfio_mm_get_from_task(struct task_struct *task) { @@ -129,6 +134,21 @@ static inline void vfio_pasid_free_range(struct vfio_mm *vmm, ioasid_t min, ioasid_t max) { } + +static inline int vfio_mm_for_each_pasid(struct vfio_mm *vmm, void *data, + void (*fn)(ioasid_t id, void *data)) +{ + return -ENOTTY; +} + +static inline void vfio_mm_pasid_lock(struct vfio_mm *vmm) +{ +} + +static inline void vfio_mm_pasid_unlock(struct vfio_mm *vmm) +{ +} + #endif /* CONFIG_VFIO_PASID */ /* diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 657b2db..2c9def8 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -1198,6 +1198,36 @@ struct vfio_iommu_type1_pasid_request { #define VFIO_IOMMU_PASID_REQUEST _IO(VFIO_TYPE, VFIO_BASE + 18) +/** + * VFIO_IOMMU_NESTING_OP - _IOW(VFIO_TYPE, VFIO_BASE + 19, + * struct vfio_iommu_type1_nesting_op) + * + * This interface allows user space to utilize the nesting IOMMU + * capabilities as reported through VFIO_IOMMU_GET_INFO. + * + * @data[] types defined for each op: + * +=================+===============================================+ + * | NESTING OP | @data[] | + * +=================+===============================================+ + * | BIND_PGTBL | struct iommu_gpasid_bind_data | + * +-----------------+-----------------------------------------------+ + * | UNBIND_PGTBL | struct iommu_gpasid_bind_data | + * +-----------------+-----------------------------------------------+ + * + * returns: 0 on success, -errno on failure. + */ +struct vfio_iommu_type1_nesting_op { + __u32 argsz; + __u32 flags; +#define VFIO_NESTING_OP_MASK (0xffff) /* lower 16-bits for op */ + __u8 data[]; +}; + +#define VFIO_IOMMU_NESTING_OP_BIND_PGTBL (0) +#define VFIO_IOMMU_NESTING_OP_UNBIND_PGTBL (1) + +#define VFIO_IOMMU_NESTING_OP _IO(VFIO_TYPE, VFIO_BASE + 19) + /* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */ /* From patchwork Wed Jun 24 08:55:23 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yi Liu X-Patchwork-Id: 11622671 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 5D1C96C1 for ; Wed, 24 Jun 2020 08:49:59 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 4BE0B2082F for ; Wed, 24 Jun 2020 08:49:59 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S2388910AbgFXItE (ORCPT ); Wed, 24 Jun 2020 04:49:04 -0400 Received: from mga01.intel.com ([192.55.52.88]:1309 "EHLO mga01.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S2388857AbgFXItB (ORCPT ); Wed, 24 Jun 2020 04:49:01 -0400 IronPort-SDR: 59+k0MuE5Z5+Xb3kGLVdv09Oe/tpxDcN0a66q3DLBBrsMgbOj/oo0Wfm58jH3kxvHTLQoZ5W6g UPJWFphWjwHQ== X-IronPort-AV: E=McAfee;i="6000,8403,9661"; a="162484882" X-IronPort-AV: E=Sophos;i="5.75,274,1589266800"; d="scan'208";a="162484882" X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga003.jf.intel.com ([10.7.209.27]) by fmsmga101.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 24 Jun 2020 01:48:57 -0700 IronPort-SDR: E8W7F/cBC6NIiYaEoaRkxgCm4w8jP534zpS6pw/OoG9br0vMDHC6/kHM31ekJnejcyYl6iFFhI KhTZWk7uBceA== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.75,274,1589266800"; d="scan'208";a="275624521" Received: from jacob-builder.jf.intel.com ([10.7.199.155]) by orsmga003.jf.intel.com with ESMTP; 24 Jun 2020 01:48:56 -0700 From: Liu Yi L To: alex.williamson@redhat.com, eric.auger@redhat.com, baolu.lu@linux.intel.com, joro@8bytes.org Cc: kevin.tian@intel.com, jacob.jun.pan@linux.intel.com, ashok.raj@intel.com, yi.l.liu@intel.com, jun.j.tian@intel.com, yi.y.sun@intel.com, jean-philippe@linaro.org, peterx@redhat.com, hao.wu@intel.com, iommu@lists.linux-foundation.org, kvm@vger.kernel.org, linux-kernel@vger.kernel.org Subject: [PATCH v3 10/14] vfio/type1: Allow invalidating first-level/stage IOMMU cache Date: Wed, 24 Jun 2020 01:55:23 -0700 Message-Id: <1592988927-48009-11-git-send-email-yi.l.liu@intel.com> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1592988927-48009-1-git-send-email-yi.l.liu@intel.com> References: <1592988927-48009-1-git-send-email-yi.l.liu@intel.com> Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org This patch provides an interface allowing the userspace to invalidate IOMMU cache for first-level page table. It is required when the first level IOMMU page table is not managed by the host kernel in the nested translation setup. Cc: Kevin Tian CC: Jacob Pan Cc: Alex Williamson Cc: Eric Auger Cc: Jean-Philippe Brucker Cc: Joerg Roedel Cc: Lu Baolu Signed-off-by: Liu Yi L Signed-off-by: Eric Auger Signed-off-by: Jacob Pan --- v1 -> v2: *) rename from "vfio/type1: Flush stage-1 IOMMU cache for nesting type" *) rename vfio_cache_inv_fn() to vfio_dev_cache_invalidate_fn() *) vfio_dev_cache_inv_fn() always successful *) remove VFIO_IOMMU_CACHE_INVALIDATE, and reuse VFIO_IOMMU_NESTING_OP --- drivers/vfio/vfio_iommu_type1.c | 52 +++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/vfio.h | 3 +++ 2 files changed, 55 insertions(+) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 5926533..4c21300 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -3080,6 +3080,53 @@ static long vfio_iommu_handle_pgtbl_op(struct vfio_iommu *iommu, return ret; } +static int vfio_dev_cache_invalidate_fn(struct device *dev, void *data) +{ + struct domain_capsule *dc = (struct domain_capsule *)data; + unsigned long arg = *(unsigned long *) dc->data; + + iommu_cache_invalidate(dc->domain, dev, (void __user *) arg); + return 0; +} + +static long vfio_iommu_invalidate_cache(struct vfio_iommu *iommu, + unsigned long arg) +{ + struct domain_capsule dc = { .data = &arg }; + struct vfio_group *group; + struct vfio_domain *domain; + int ret = 0; + struct iommu_nesting_info *info; + + mutex_lock(&iommu->lock); + /* + * Cache invalidation is required for any nesting IOMMU, + * so no need to check system-wide PASID support. + */ + info = iommu->nesting_info; + if (!info || !(info->features & IOMMU_NESTING_FEAT_CACHE_INVLD)) { + ret = -ENOTSUPP; + goto out_unlock; + } + + group = vfio_find_nesting_group(iommu); + if (!group) { + ret = -EINVAL; + goto out_unlock; + } + + domain = list_first_entry(&iommu->domain_list, + struct vfio_domain, next); + dc.group = group; + dc.domain = domain->domain; + iommu_group_for_each_dev(group->iommu_group, &dc, + vfio_dev_cache_invalidate_fn); + +out_unlock: + mutex_unlock(&iommu->lock); + return ret; +} + static long vfio_iommu_type1_nesting_op(struct vfio_iommu *iommu, unsigned long arg) { @@ -3102,6 +3149,11 @@ static long vfio_iommu_type1_nesting_op(struct vfio_iommu *iommu, case VFIO_IOMMU_NESTING_OP_UNBIND_PGTBL: ret = vfio_iommu_handle_pgtbl_op(iommu, false, arg + minsz); break; + case VFIO_IOMMU_NESTING_OP_CACHE_INVLD: + { + ret = vfio_iommu_invalidate_cache(iommu, arg + minsz); + break; + } default: ret = -EINVAL; } diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 2c9def8..7f8678e 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -1213,6 +1213,8 @@ struct vfio_iommu_type1_pasid_request { * +-----------------+-----------------------------------------------+ * | UNBIND_PGTBL | struct iommu_gpasid_bind_data | * +-----------------+-----------------------------------------------+ + * | CACHE_INVLD | struct iommu_cache_invalidate_info | + * +-----------------+-----------------------------------------------+ * * returns: 0 on success, -errno on failure. */ @@ -1225,6 +1227,7 @@ struct vfio_iommu_type1_nesting_op { #define VFIO_IOMMU_NESTING_OP_BIND_PGTBL (0) #define VFIO_IOMMU_NESTING_OP_UNBIND_PGTBL (1) +#define VFIO_IOMMU_NESTING_OP_CACHE_INVLD (2) #define VFIO_IOMMU_NESTING_OP _IO(VFIO_TYPE, VFIO_BASE + 19) From patchwork Wed Jun 24 08:55:24 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yi Liu X-Patchwork-Id: 11622673 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 55CA792A for ; Wed, 24 Jun 2020 08:50:12 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 45A3B20B80 for ; Wed, 24 Jun 2020 08:50:12 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S2389425AbgFXIuC (ORCPT ); Wed, 24 Jun 2020 04:50:02 -0400 Received: from mga01.intel.com ([192.55.52.88]:1309 "EHLO mga01.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S2388627AbgFXItC (ORCPT ); Wed, 24 Jun 2020 04:49:02 -0400 IronPort-SDR: XOGK6MIGM+rF5RYvYWTBpm6eVcT8fjPhavwPfmqDCRELtFYeSrxGjFV6mTv8KJ6xJYLoiHnEG1 s5cydVAjyegA== X-IronPort-AV: E=McAfee;i="6000,8403,9661"; a="162484884" X-IronPort-AV: E=Sophos;i="5.75,274,1589266800"; d="scan'208";a="162484884" X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga003.jf.intel.com ([10.7.209.27]) by fmsmga101.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 24 Jun 2020 01:48:57 -0700 IronPort-SDR: THVJXCEkFHZnO1tmq2+rljLU7NZo52RLORfu12BgWwchCSjBKNX/3VBUhd1P+aXw6WSzoAr1Zp o3g7OVZO80gQ== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.75,274,1589266800"; d="scan'208";a="275624524" Received: from jacob-builder.jf.intel.com ([10.7.199.155]) by orsmga003.jf.intel.com with ESMTP; 24 Jun 2020 01:48:56 -0700 From: Liu Yi L To: alex.williamson@redhat.com, eric.auger@redhat.com, baolu.lu@linux.intel.com, joro@8bytes.org Cc: kevin.tian@intel.com, jacob.jun.pan@linux.intel.com, ashok.raj@intel.com, yi.l.liu@intel.com, jun.j.tian@intel.com, yi.y.sun@intel.com, jean-philippe@linaro.org, peterx@redhat.com, hao.wu@intel.com, iommu@lists.linux-foundation.org, kvm@vger.kernel.org, linux-kernel@vger.kernel.org Subject: [PATCH v3 11/14] vfio/type1: Add vSVA support for IOMMU-backed mdevs Date: Wed, 24 Jun 2020 01:55:24 -0700 Message-Id: <1592988927-48009-12-git-send-email-yi.l.liu@intel.com> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1592988927-48009-1-git-send-email-yi.l.liu@intel.com> References: <1592988927-48009-1-git-send-email-yi.l.liu@intel.com> Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org Recent years, mediated device pass-through framework (e.g. vfio-mdev) is used to achieve flexible device sharing across domains (e.g. VMs). Also there are hardware assisted mediated pass-through solutions from platform vendors. e.g. Intel VT-d scalable mode which supports Intel Scalable I/O Virtualization technology. Such mdevs are called IOMMU- backed mdevs as there are IOMMU enforced DMA isolation for such mdevs. In kernel, IOMMU-backed mdevs are exposed to IOMMU layer by aux-domain concept, which means mdevs are protected by an iommu domain which is auxiliary to the domain that the kernel driver primarily uses for DMA API. Details can be found in the KVM presentation as below: https://events19.linuxfoundation.org/wp-content/uploads/2017/12/\ Hardware-Assisted-Mediated-Pass-Through-with-VFIO-Kevin-Tian-Intel.pdf This patch extends NESTING_IOMMU ops to IOMMU-backed mdev devices. The main requirement is to use the auxiliary domain associated with mdev. Cc: Kevin Tian CC: Jacob Pan CC: Jun Tian Cc: Alex Williamson Cc: Eric Auger Cc: Jean-Philippe Brucker Cc: Joerg Roedel Cc: Lu Baolu Signed-off-by: Liu Yi L --- v1 -> v2: *) check the iommu_device to ensure the handling mdev is IOMMU-backed --- drivers/vfio/vfio_iommu_type1.c | 40 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 4c21300..e1a794c 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -2378,20 +2378,41 @@ static int vfio_iommu_resv_refresh(struct vfio_iommu *iommu, return ret; } +static struct device *vfio_get_iommu_device(struct vfio_group *group, + struct device *dev) +{ + if (group->mdev_group) + return vfio_mdev_get_iommu_device(dev); + else + return dev; +} + static int vfio_dev_bind_gpasid_fn(struct device *dev, void *data) { struct domain_capsule *dc = (struct domain_capsule *)data; unsigned long arg = *(unsigned long *) dc->data; + struct device *iommu_device; + + iommu_device = vfio_get_iommu_device(dc->group, dev); + if (!iommu_device) + return -EINVAL; - return iommu_sva_bind_gpasid(dc->domain, dev, (void __user *) arg); + return iommu_sva_bind_gpasid(dc->domain, iommu_device, + (void __user *) arg); } static int vfio_dev_unbind_gpasid_fn(struct device *dev, void *data) { struct domain_capsule *dc = (struct domain_capsule *)data; unsigned long arg = *(unsigned long *) dc->data; + struct device *iommu_device; - iommu_sva_unbind_gpasid(dc->domain, dev, (void __user *) arg); + iommu_device = vfio_get_iommu_device(dc->group, dev); + if (!iommu_device) + return -EINVAL; + + iommu_sva_unbind_gpasid(dc->domain, iommu_device, + (void __user *) arg); return 0; } @@ -2400,8 +2421,13 @@ static int __vfio_dev_unbind_gpasid_fn(struct device *dev, void *data) struct domain_capsule *dc = (struct domain_capsule *)data; struct iommu_gpasid_bind_data *unbind_data = (struct iommu_gpasid_bind_data *) dc->data; + struct device *iommu_device; + + iommu_device = vfio_get_iommu_device(dc->group, dev); + if (!iommu_device) + return -EINVAL; - __iommu_sva_unbind_gpasid(dc->domain, dev, unbind_data); + __iommu_sva_unbind_gpasid(dc->domain, iommu_device, unbind_data); return 0; } @@ -3084,8 +3110,14 @@ static int vfio_dev_cache_invalidate_fn(struct device *dev, void *data) { struct domain_capsule *dc = (struct domain_capsule *)data; unsigned long arg = *(unsigned long *) dc->data; + struct device *iommu_device; + + iommu_device = vfio_get_iommu_device(dc->group, dev); + if (!iommu_device) + return -EINVAL; - iommu_cache_invalidate(dc->domain, dev, (void __user *) arg); + iommu_cache_invalidate(dc->domain, iommu_device, + (void __user *) arg); return 0; } From patchwork Wed Jun 24 08:55:25 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yi Liu X-Patchwork-Id: 11622675 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 1211D92A for ; Wed, 24 Jun 2020 08:50:14 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id E9D1920C09 for ; Wed, 24 Jun 2020 08:50:13 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S2389386AbgFXIuA (ORCPT ); Wed, 24 Jun 2020 04:50:00 -0400 Received: from mga01.intel.com ([192.55.52.88]:1312 "EHLO mga01.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S2388660AbgFXItD (ORCPT ); Wed, 24 Jun 2020 04:49:03 -0400 IronPort-SDR: mxR51ucmsAByMv8Mj3avbsNbKpPWNGljqx0QCmxI2yJoWulyRFti+rIdcAdXoUEBpwi44/lbgI J5N2w6YH256w== X-IronPort-AV: E=McAfee;i="6000,8403,9661"; a="162484885" X-IronPort-AV: E=Sophos;i="5.75,274,1589266800"; d="scan'208";a="162484885" X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga003.jf.intel.com ([10.7.209.27]) by fmsmga101.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 24 Jun 2020 01:48:57 -0700 IronPort-SDR: L6FoifsQFx4bA/nHCHryDV9h3rCjZnQr9jzuF8ltGq+kNNkgdJO6S4ZIx+OJJBPduTU03ZVA7l xdsrMNCFhUow== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.75,274,1589266800"; d="scan'208";a="275624527" Received: from jacob-builder.jf.intel.com ([10.7.199.155]) by orsmga003.jf.intel.com with ESMTP; 24 Jun 2020 01:48:56 -0700 From: Liu Yi L To: alex.williamson@redhat.com, eric.auger@redhat.com, baolu.lu@linux.intel.com, joro@8bytes.org Cc: kevin.tian@intel.com, jacob.jun.pan@linux.intel.com, ashok.raj@intel.com, yi.l.liu@intel.com, jun.j.tian@intel.com, yi.y.sun@intel.com, jean-philippe@linaro.org, peterx@redhat.com, hao.wu@intel.com, iommu@lists.linux-foundation.org, kvm@vger.kernel.org, linux-kernel@vger.kernel.org Subject: [PATCH v3 12/14] vfio/pci: Expose PCIe PASID capability to guest Date: Wed, 24 Jun 2020 01:55:25 -0700 Message-Id: <1592988927-48009-13-git-send-email-yi.l.liu@intel.com> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1592988927-48009-1-git-send-email-yi.l.liu@intel.com> References: <1592988927-48009-1-git-send-email-yi.l.liu@intel.com> Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org This patch exposes PCIe PASID capability to guest for assigned devices. Existing vfio_pci driver hides it from guest by setting the capability length as 0 in pci_ext_cap_length[]. And this patch only exposes PASID capability for devices which has PCIe PASID extended struture in its configuration space. So VFs, will will not see PASID capability on VFs as VF doesn't implement PASID extended structure in its configuration space. For VF, it is a TODO in future. Related discussion can be found in below link: https://lkml.org/lkml/2020/4/7/693 Cc: Kevin Tian CC: Jacob Pan Cc: Alex Williamson Cc: Eric Auger Cc: Jean-Philippe Brucker Cc: Joerg Roedel Cc: Lu Baolu Signed-off-by: Liu Yi L --- v1 -> v2: *) added in v2, but it was sent in a separate patchseries before --- drivers/vfio/pci/vfio_pci_config.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c index 8746c94..56d126b 100644 --- a/drivers/vfio/pci/vfio_pci_config.c +++ b/drivers/vfio/pci/vfio_pci_config.c @@ -95,7 +95,7 @@ static const u16 pci_ext_cap_length[PCI_EXT_CAP_ID_MAX + 1] = { [PCI_EXT_CAP_ID_LTR] = PCI_EXT_CAP_LTR_SIZEOF, [PCI_EXT_CAP_ID_SECPCI] = 0, /* not yet */ [PCI_EXT_CAP_ID_PMUX] = 0, /* not yet */ - [PCI_EXT_CAP_ID_PASID] = 0, /* not yet */ + [PCI_EXT_CAP_ID_PASID] = PCI_EXT_CAP_PASID_SIZEOF, }; /* From patchwork Wed Jun 24 08:55:26 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yi Liu X-Patchwork-Id: 11622667 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 310BF912 for ; Wed, 24 Jun 2020 08:49:51 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 21FF920B80 for ; Wed, 24 Jun 2020 08:49:51 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S2389211AbgFXItk (ORCPT ); Wed, 24 Jun 2020 04:49:40 -0400 Received: from mga01.intel.com ([192.55.52.88]:1309 "EHLO mga01.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S2388913AbgFXItE (ORCPT ); Wed, 24 Jun 2020 04:49:04 -0400 IronPort-SDR: Sz7rPHSjVSe5wPBT/oWXGkLFpCRSJ67uOkW9bRYloZL87hOVAOn9zAemEwM1I8yQJNZRMlKSAC mYqW75pGZdmg== X-IronPort-AV: E=McAfee;i="6000,8403,9661"; a="162484888" X-IronPort-AV: E=Sophos;i="5.75,274,1589266800"; d="scan'208";a="162484888" X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga003.jf.intel.com ([10.7.209.27]) by fmsmga101.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 24 Jun 2020 01:48:57 -0700 IronPort-SDR: NZ7BSxtJikNVPALknIpfd8Y5zn1lr07WGKCF0SseHKBjTTjrYjk1sSEIJFHaoEweL5NrqzVDov mRwKvnqc5kxw== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.75,274,1589266800"; d="scan'208";a="275624532" Received: from jacob-builder.jf.intel.com ([10.7.199.155]) by orsmga003.jf.intel.com with ESMTP; 24 Jun 2020 01:48:56 -0700 From: Liu Yi L To: alex.williamson@redhat.com, eric.auger@redhat.com, baolu.lu@linux.intel.com, joro@8bytes.org Cc: kevin.tian@intel.com, jacob.jun.pan@linux.intel.com, ashok.raj@intel.com, yi.l.liu@intel.com, jun.j.tian@intel.com, yi.y.sun@intel.com, jean-philippe@linaro.org, peterx@redhat.com, hao.wu@intel.com, iommu@lists.linux-foundation.org, kvm@vger.kernel.org, linux-kernel@vger.kernel.org Subject: [PATCH v3 13/14] vfio: Document dual stage control Date: Wed, 24 Jun 2020 01:55:26 -0700 Message-Id: <1592988927-48009-14-git-send-email-yi.l.liu@intel.com> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1592988927-48009-1-git-send-email-yi.l.liu@intel.com> References: <1592988927-48009-1-git-send-email-yi.l.liu@intel.com> Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org From: Eric Auger The VFIO API was enhanced to support nested stage control: a bunch of new iotcls and usage guideline. Let's document the process to follow to set up nested mode. Cc: Kevin Tian CC: Jacob Pan Cc: Alex Williamson Cc: Eric Auger Cc: Jean-Philippe Brucker Cc: Joerg Roedel Cc: Lu Baolu Signed-off-by: Eric Auger Signed-off-by: Liu Yi L Reviewed-by: Stefan Hajnoczi --- v2 -> v3: *) address comments from Stefan Hajnoczi v1 -> v2: *) new in v2, compared with Eric's original version, pasid table bind and fault reporting is removed as this series doesn't cover them. Original version from Eric. https://lkml.org/lkml/2020/3/20/700 Documentation/driver-api/vfio.rst | 67 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/Documentation/driver-api/vfio.rst b/Documentation/driver-api/vfio.rst index f1a4d3c..639890f 100644 --- a/Documentation/driver-api/vfio.rst +++ b/Documentation/driver-api/vfio.rst @@ -239,6 +239,73 @@ group and can access them as follows:: /* Gratuitous device reset and go... */ ioctl(device, VFIO_DEVICE_RESET); +IOMMU Dual Stage Control +------------------------ + +Some IOMMUs support 2 stages/levels of translation. Stage corresponds to +the ARM terminology while level corresponds to Intel's VTD terminology. +In the following text we use either without distinction. + +This is useful when the guest is exposed with a virtual IOMMU and some +devices are assigned to the guest through VFIO. Then the guest OS can use +stage 1 (GIOVA -> GPA or GVA->GPA), while the hypervisor uses stage 2 for +VM isolation (GPA -> HPA). + +Under dual stage translation, the guest gets ownership of the stage 1 page +tables and also owns stage 1 configuration structures. The hypervisor owns +the root configuration structure (for security reason), including stage 2 +configuration. This works as long as configuration structures and page table +formats are compatible between the virtual IOMMU and the physical IOMMU. + +Assuming the HW supports it, this nested mode is selected by choosing the +VFIO_TYPE1_NESTING_IOMMU type through: + + ioctl(container, VFIO_SET_IOMMU, VFIO_TYPE1_NESTING_IOMMU); + +This forces the hypervisor to use the stage 2, leaving stage 1 available +for guest usage. The guest stage 1 format depends on IOMMU vendor, and +it is the same with the nesting configuration method. User space should +check the format and configuration method after setting nesting type by +using: + + ioctl(container->fd, VFIO_IOMMU_GET_INFO, &nesting_info); + +Details can be found in Documentation/userspace-api/iommu.rst. For Intel +VT-d, each stage 1 page table is bound to host by: + + nesting_op->flags = VFIO_IOMMU_NESTING_OP_BIND_PGTBL; + memcpy(&nesting_op->data, &bind_data, sizeof(bind_data)); + ioctl(container->fd, VFIO_IOMMU_NESTING_OP, nesting_op); + +As mentioned above, guest OS may use stage 1 for GIOVA->GPA or GVA->GPA. +GVA->GPA page tables are available when PASID (Process Address Space ID) +is exposed to guest. e.g. guest with PASID-capable devices assigned. For +such page table binding, the bind_data should include PASID info, which +is allocated by guest itself or by host. This depends on hardware vendor +e.g. Intel VT-d requires to allocate PASID from host. This requirement is +defined by the Virtual Command Support in VT-d 3.0 spec, guest software +running on VT-d should allocate PASID from host kernel. To allocate PASID +from host, user space should +check the IOMMU_NESTING_FEAT_SYSWIDE_PASID +bit of the nesting info reported from host kernel. VFIO reports the nesting +info by VFIO_IOMMU_GET_INFO. User space could allocate PASID from host by: + + req.flags = VFIO_IOMMU_ALLOC_PASID; + ioctl(container, VFIO_IOMMU_PASID_REQUEST, &req); + +With first stage/level page table bound to host, it allows to combine the +guest stage 1 translation along with the hypervisor stage 2 translation to +get final address. + +When the guest invalidates stage 1 related caches, invalidations must be +forwarded to the host through + + nesting_op->flags = VFIO_IOMMU_NESTING_OP_CACHE_INVLD; + memcpy(&nesting_op->data, &inv_data, sizeof(inv_data)); + ioctl(container->fd, VFIO_IOMMU_NESTING_OP, nesting_op); + +Those invalidations can happen at various granularity levels, page, context, +... + VFIO User API ------------------------------------------------------------------------------- From patchwork Wed Jun 24 08:55:27 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yi Liu X-Patchwork-Id: 11622665 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 4E4DE6C1 for ; Wed, 24 Jun 2020 08:49:42 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 3B74120C09 for ; Wed, 24 Jun 2020 08:49:42 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S2389248AbgFXItl (ORCPT ); Wed, 24 Jun 2020 04:49:41 -0400 Received: from mga01.intel.com ([192.55.52.88]:1312 "EHLO mga01.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S2388912AbgFXItE (ORCPT ); Wed, 24 Jun 2020 04:49:04 -0400 IronPort-SDR: AM7guxDdpUih3ezsBnau+fAlnOyfMcyO0WcqZnbBqRMxjtEiTxJ8y79BkeCHr6alel0lKHP0O1 VxwPUN75cKMA== X-IronPort-AV: E=McAfee;i="6000,8403,9661"; a="162484886" X-IronPort-AV: E=Sophos;i="5.75,274,1589266800"; d="scan'208";a="162484886" X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga003.jf.intel.com ([10.7.209.27]) by fmsmga101.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 24 Jun 2020 01:48:57 -0700 IronPort-SDR: P+2ooJdc8y2i6peOdRCmA1aCnjyRXqRY0Kpqn2cbWKpyBr12xultIKfWxKmzcdYIA9paOfjLoD bU6HrPn/svjg== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.75,274,1589266800"; d="scan'208";a="275624531" Received: from jacob-builder.jf.intel.com ([10.7.199.155]) by orsmga003.jf.intel.com with ESMTP; 24 Jun 2020 01:48:56 -0700 From: Liu Yi L To: alex.williamson@redhat.com, eric.auger@redhat.com, baolu.lu@linux.intel.com, joro@8bytes.org Cc: kevin.tian@intel.com, jacob.jun.pan@linux.intel.com, ashok.raj@intel.com, yi.l.liu@intel.com, jun.j.tian@intel.com, yi.y.sun@intel.com, jean-philippe@linaro.org, peterx@redhat.com, hao.wu@intel.com, iommu@lists.linux-foundation.org, kvm@vger.kernel.org, linux-kernel@vger.kernel.org Subject: [PATCH v3 14/14] iommu/vt-d: Support reporting nesting capability info Date: Wed, 24 Jun 2020 01:55:27 -0700 Message-Id: <1592988927-48009-15-git-send-email-yi.l.liu@intel.com> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1592988927-48009-1-git-send-email-yi.l.liu@intel.com> References: <1592988927-48009-1-git-send-email-yi.l.liu@intel.com> Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org Cc: Kevin Tian CC: Jacob Pan Cc: Alex Williamson Cc: Eric Auger Cc: Jean-Philippe Brucker Cc: Joerg Roedel Cc: Lu Baolu Signed-off-by: Liu Yi L Signed-off-by: Jacob Pan --- v2 -> v3: *) remove cap/ecap_mask in iommu_nesting_info. --- drivers/iommu/intel/iommu.c | 79 +++++++++++++++++++++++++++++++++++++++++++-- include/linux/intel-iommu.h | 16 +++++++++ 2 files changed, 93 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index b50395e..5f36894 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -5651,12 +5651,16 @@ static inline bool iommu_pasid_support(void) static inline bool nested_mode_support(void) { struct dmar_drhd_unit *drhd; - struct intel_iommu *iommu; + struct intel_iommu *iommu, *prev = NULL; bool ret = true; rcu_read_lock(); for_each_active_iommu(iommu, drhd) { - if (!sm_supported(iommu) || !ecap_nest(iommu->ecap)) { + if (!prev) + prev = iommu; + if (!sm_supported(iommu) || !ecap_nest(iommu->ecap) || + (VTD_CAP_MASK & (iommu->cap ^ prev->cap)) || + (VTD_ECAP_MASK & (iommu->ecap ^ prev->ecap))) { ret = false; break; } @@ -6065,11 +6069,82 @@ intel_iommu_domain_set_attr(struct iommu_domain *domain, return ret; } +static int intel_iommu_get_nesting_info(struct iommu_domain *domain, + struct iommu_nesting_info *info) +{ + struct dmar_domain *dmar_domain = to_dmar_domain(domain); + u64 cap = VTD_CAP_MASK, ecap = VTD_ECAP_MASK; + struct device_domain_info *domain_info; + struct iommu_nesting_info_vtd vtd; + unsigned long flags; + u32 size; + + if ((domain->type != IOMMU_DOMAIN_UNMANAGED) || + !(dmar_domain->flags & DOMAIN_FLAG_NESTING_MODE)) + return -ENODEV; + + if (!info) + return -EINVAL; + + size = sizeof(struct iommu_nesting_info) + + sizeof(struct iommu_nesting_info_vtd); + /* + * if provided buffer size is not equal to the size, should + * return 0 and also the expected buffer size to caller. + */ + if (info->size != size) { + info->size = size; + return 0; + } + + spin_lock_irqsave(&device_domain_lock, flags); + /* + * arbitrary select the first domain_info as all nesting + * related capabilities should be consistent across iommu + * units. + */ + domain_info = list_first_entry(&dmar_domain->devices, + struct device_domain_info, link); + cap &= domain_info->iommu->cap; + ecap &= domain_info->iommu->ecap; + spin_unlock_irqrestore(&device_domain_lock, flags); + + info->format = IOMMU_PASID_FORMAT_INTEL_VTD; + info->features = IOMMU_NESTING_FEAT_SYSWIDE_PASID | + IOMMU_NESTING_FEAT_BIND_PGTBL | + IOMMU_NESTING_FEAT_CACHE_INVLD; + vtd.flags = 0; + vtd.addr_width = dmar_domain->gaw; + vtd.pasid_bits = ilog2(intel_pasid_max_id); + vtd.cap_reg = cap; + vtd.ecap_reg = ecap; + + memcpy(info->data, &vtd, sizeof(vtd)); + return 0; +} + +static int intel_iommu_domain_get_attr(struct iommu_domain *domain, + enum iommu_attr attr, void *data) +{ + switch (attr) { + case DOMAIN_ATTR_NESTING: + { + struct iommu_nesting_info *info = + (struct iommu_nesting_info *) data; + + return intel_iommu_get_nesting_info(domain, info); + } + default: + return -ENODEV; + } +} + const struct iommu_ops intel_iommu_ops = { .capable = intel_iommu_capable, .domain_alloc = intel_iommu_domain_alloc, .domain_free = intel_iommu_domain_free, .domain_set_attr = intel_iommu_domain_set_attr, + .domain_get_attr = intel_iommu_domain_get_attr, .attach_dev = intel_iommu_attach_device, .detach_dev = intel_iommu_detach_device, .aux_attach_dev = intel_iommu_aux_attach_device, diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index a6f8f41..a76cd45 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -196,6 +196,22 @@ #define ecap_max_handle_mask(e) ((e >> 20) & 0xf) #define ecap_sc_support(e) ((e >> 7) & 0x1) /* Snooping Control */ +/* Nesting Support Capability Alignment */ +#define VTD_CAP_FL1GP (1ULL << 56) +#define VTD_CAP_FL5LP (1ULL << 60) +#define VTD_ECAP_PRS (1ULL << 29) +#define VTD_ECAP_ERS (1ULL << 30) +#define VTD_ECAP_SRS (1ULL << 31) +#define VTD_ECAP_EAFS (1ULL << 34) +#define VTD_ECAP_PASID (1ULL << 40) + +/* Only capabilities marked in below MASKs are reported */ +#define VTD_CAP_MASK (VTD_CAP_FL1GP | VTD_CAP_FL5LP) + +#define VTD_ECAP_MASK (VTD_ECAP_PRS | VTD_ECAP_ERS | \ + VTD_ECAP_SRS | VTD_ECAP_EAFS | \ + VTD_ECAP_PASID) + /* Virtual command interface capability */ #define vccap_pasid(v) (((v) & DMA_VCS_PAS)) /* PASID allocation */