From patchwork Mon May 7 07:21:49 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Du, Changbin" X-Patchwork-Id: 10383359 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork.web.codeaurora.org (Postfix) with ESMTP id 69C1460159 for ; Mon, 7 May 2018 07:32:29 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 568B628999 for ; Mon, 7 May 2018 07:32:29 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 4B14A289A0; Mon, 7 May 2018 07:32:29 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-5.2 required=2.0 tests=BAYES_00, MAILING_LIST_MULTI, RCVD_IN_DNSWL_MED autolearn=ham version=3.3.1 Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher DHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.wl.linuxfoundation.org (Postfix) with ESMTPS id 66A8228999 for ; Mon, 7 May 2018 07:32:28 +0000 (UTC) Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 0562C6E38B; Mon, 7 May 2018 07:32:27 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from mga18.intel.com (mga18.intel.com [134.134.136.126]) by gabe.freedesktop.org (Postfix) with ESMTPS id 27DEE6E37C; Mon, 7 May 2018 07:32:24 +0000 (UTC) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga004.fm.intel.com ([10.253.24.48]) by orsmga106.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 07 May 2018 00:32:23 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.49,373,1520924400"; d="scan'208";a="52095056" Received: from gvt-dell.bj.intel.com (HELO gvt-dell-host.bj.intel.com) ([10.238.154.59]) by fmsmga004.fm.intel.com with ESMTP; 07 May 2018 00:32:22 -0700 From: changbin.du@intel.com To: intel-gvt-dev@lists.freedesktop.org Date: Mon, 7 May 2018 15:21:49 +0800 Message-Id: <1525677713-8395-11-git-send-email-changbin.du@intel.com> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1525677713-8395-1-git-send-email-changbin.du@intel.com> References: <1525677713-8395-1-git-send-email-changbin.du@intel.com> Subject: [Intel-gfx] [PATCH v5 10/14] drm/i915/kvmgt: Support setting dma map for huge pages X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: intel-gfx@lists.freedesktop.org MIME-Version: 1.0 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" X-Virus-Scanned: ClamAV using ClamSMTP From: Changbin Du To support huge gtt, we need to support huge pages in kvmgt first. This patch adds a 'size' param to the intel_gvt_mpt::dma_map_guest_page API and implements it in kvmgt. v2: rebase. Signed-off-by: Changbin Du --- drivers/gpu/drm/i915/gvt/gtt.c | 6 +- drivers/gpu/drm/i915/gvt/hypercall.h | 2 +- drivers/gpu/drm/i915/gvt/kvmgt.c | 130 +++++++++++++++++++++++++---------- drivers/gpu/drm/i915/gvt/mpt.h | 7 +- 4 files changed, 101 insertions(+), 44 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 2f13464..ffeecda 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -1104,7 +1104,7 @@ static int split_64KB_gtt_entry(struct intel_vgpu *vgpu, for (i = 0; i < GTT_64K_PTE_STRIDE; i++) { ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, - start_gfn + i, &dma_addr); + start_gfn + i, PAGE_SIZE, &dma_addr); if (ret) return ret; @@ -1150,7 +1150,7 @@ static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu, }; /* direct shadow */ - ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, gfn, &dma_addr); + ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, gfn, PAGE_SIZE, &dma_addr); if (ret) return -ENXIO; @@ -2078,7 +2078,7 @@ static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, } ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, gfn, - &dma_addr); + PAGE_SIZE, &dma_addr); if (ret) { gvt_vgpu_err("fail to populate guest ggtt entry\n"); /* guest driver may read/write the entry when partial diff --git a/drivers/gpu/drm/i915/gvt/hypercall.h b/drivers/gpu/drm/i915/gvt/hypercall.h index f6dd9f7..5af11cf 100644 --- a/drivers/gpu/drm/i915/gvt/hypercall.h +++ b/drivers/gpu/drm/i915/gvt/hypercall.h @@ -53,7 +53,7 @@ struct intel_gvt_mpt { unsigned long (*gfn_to_mfn)(unsigned long handle, unsigned long gfn); int (*dma_map_guest_page)(unsigned long handle, unsigned long gfn, - dma_addr_t *dma_addr); + unsigned long size, dma_addr_t *dma_addr); void (*dma_unmap_guest_page)(unsigned long handle, dma_addr_t dma_addr); int (*map_gfn_to_mfn)(unsigned long handle, unsigned long gfn, diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index df4e4a0..4d2f53a 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -94,6 +94,7 @@ struct gvt_dma { struct rb_node dma_addr_node; gfn_t gfn; dma_addr_t dma_addr; + unsigned long size; struct kref ref; }; @@ -106,51 +107,103 @@ static int kvmgt_guest_init(struct mdev_device *mdev); static void intel_vgpu_release_work(struct work_struct *work); static bool kvmgt_guest_exit(struct kvmgt_guest_info *info); -static int gvt_dma_map_page(struct intel_vgpu *vgpu, unsigned long gfn, - dma_addr_t *dma_addr) +static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, + unsigned long size) { - struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev; - struct page *page; - unsigned long pfn; + int total_pages; + int npage; int ret; - /* Pin the page first. */ - ret = vfio_pin_pages(mdev_dev(vgpu->vdev.mdev), &gfn, 1, - IOMMU_READ | IOMMU_WRITE, &pfn); - if (ret != 1) { - gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx: %d\n", - gfn, ret); - return -EINVAL; + total_pages = roundup(size, PAGE_SIZE) / PAGE_SIZE; + + for (npage = 0; npage < total_pages; npage++) { + unsigned long cur_gfn = gfn + npage; + + ret = vfio_unpin_pages(mdev_dev(vgpu->vdev.mdev), &cur_gfn, 1); + WARN_ON(ret != 1); } +} - if (!pfn_valid(pfn)) { - gvt_vgpu_err("pfn 0x%lx is not mem backed\n", pfn); - vfio_unpin_pages(mdev_dev(vgpu->vdev.mdev), &gfn, 1); - return -EINVAL; +/* Pin a normal or compound guest page for dma. */ +static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, + unsigned long size, struct page **page) +{ + unsigned long base_pfn = 0; + int total_pages; + int npage; + int ret; + + total_pages = roundup(size, PAGE_SIZE) / PAGE_SIZE; + /* + * We pin the pages one-by-one to avoid allocating a big arrary + * on stack to hold pfns. + */ + for (npage = 0; npage < total_pages; npage++) { + unsigned long cur_gfn = gfn + npage; + unsigned long pfn; + + ret = vfio_pin_pages(mdev_dev(vgpu->vdev.mdev), &cur_gfn, 1, + IOMMU_READ | IOMMU_WRITE, &pfn); + if (ret != 1) { + gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx, ret %d\n", + cur_gfn, ret); + goto err; + } + + if (!pfn_valid(pfn)) { + gvt_vgpu_err("pfn 0x%lx is not mem backed\n", pfn); + npage++; + ret = -EFAULT; + goto err; + } + + if (npage == 0) + base_pfn = pfn; + else if (base_pfn + npage != pfn) { + gvt_vgpu_err("The pages are not continuous\n"); + ret = -EINVAL; + npage++; + goto err; + } } + *page = pfn_to_page(base_pfn); + return 0; +err: + gvt_unpin_guest_page(vgpu, gfn, npage * PAGE_SIZE); + return ret; +} + +static int gvt_dma_map_page(struct intel_vgpu *vgpu, unsigned long gfn, + dma_addr_t *dma_addr, unsigned long size) +{ + struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev; + struct page *page = NULL; + int ret; + + ret = gvt_pin_guest_page(vgpu, gfn, size, &page); + if (ret) + return ret; + /* Setup DMA mapping. */ - page = pfn_to_page(pfn); - *dma_addr = dma_map_page(dev, page, 0, PAGE_SIZE, - PCI_DMA_BIDIRECTIONAL); - if (dma_mapping_error(dev, *dma_addr)) { - gvt_vgpu_err("DMA mapping failed for gfn 0x%lx\n", gfn); - vfio_unpin_pages(mdev_dev(vgpu->vdev.mdev), &gfn, 1); - return -ENOMEM; + *dma_addr = dma_map_page(dev, page, 0, size, PCI_DMA_BIDIRECTIONAL); + ret = dma_mapping_error(dev, *dma_addr); + if (ret) { + gvt_vgpu_err("DMA mapping failed for pfn 0x%lx, ret %d\n", + page_to_pfn(page), ret); + gvt_unpin_guest_page(vgpu, gfn, size); } - return 0; + return ret; } static void gvt_dma_unmap_page(struct intel_vgpu *vgpu, unsigned long gfn, - dma_addr_t dma_addr) + dma_addr_t dma_addr, unsigned long size) { struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev; - int ret; - dma_unmap_page(dev, dma_addr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); - ret = vfio_unpin_pages(mdev_dev(vgpu->vdev.mdev), &gfn, 1); - WARN_ON(ret != 1); + dma_unmap_page(dev, dma_addr, size, PCI_DMA_BIDIRECTIONAL); + gvt_unpin_guest_page(vgpu, gfn, size); } static struct gvt_dma *__gvt_cache_find_dma_addr(struct intel_vgpu *vgpu, @@ -191,7 +244,7 @@ static struct gvt_dma *__gvt_cache_find_gfn(struct intel_vgpu *vgpu, gfn_t gfn) } static int __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn, - dma_addr_t dma_addr) + dma_addr_t dma_addr, unsigned long size) { struct gvt_dma *new, *itr; struct rb_node **link, *parent = NULL; @@ -203,6 +256,7 @@ static int __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn, new->vgpu = vgpu; new->gfn = gfn; new->dma_addr = dma_addr; + new->size = size; kref_init(&new->ref); /* gfn_cache maps gfn to struct gvt_dma. */ @@ -260,7 +314,7 @@ static void gvt_cache_destroy(struct intel_vgpu *vgpu) break; } dma = rb_entry(node, struct gvt_dma, gfn_node); - gvt_dma_unmap_page(vgpu, dma->gfn, dma->dma_addr); + gvt_dma_unmap_page(vgpu, dma->gfn, dma->dma_addr, dma->size); __gvt_cache_remove_entry(vgpu, dma); mutex_unlock(&vgpu->vdev.cache_lock); } @@ -515,7 +569,8 @@ static int intel_vgpu_iommu_notifier(struct notifier_block *nb, if (!entry) continue; - gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr); + gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr, + entry->size); __gvt_cache_remove_entry(vgpu, entry); } mutex_unlock(&vgpu->vdev.cache_lock); @@ -1648,7 +1703,7 @@ static unsigned long kvmgt_gfn_to_pfn(unsigned long handle, unsigned long gfn) } int kvmgt_dma_map_guest_page(unsigned long handle, unsigned long gfn, - dma_addr_t *dma_addr) + unsigned long size, dma_addr_t *dma_addr) { struct kvmgt_guest_info *info; struct intel_vgpu *vgpu; @@ -1665,11 +1720,11 @@ int kvmgt_dma_map_guest_page(unsigned long handle, unsigned long gfn, entry = __gvt_cache_find_gfn(info->vgpu, gfn); if (!entry) { - ret = gvt_dma_map_page(vgpu, gfn, dma_addr); + ret = gvt_dma_map_page(vgpu, gfn, dma_addr, size); if (ret) goto err_unlock; - ret = __gvt_cache_add(info->vgpu, gfn, *dma_addr); + ret = __gvt_cache_add(info->vgpu, gfn, *dma_addr, size); if (ret) goto err_unmap; } else { @@ -1681,7 +1736,7 @@ int kvmgt_dma_map_guest_page(unsigned long handle, unsigned long gfn, return 0; err_unmap: - gvt_dma_unmap_page(vgpu, gfn, *dma_addr); + gvt_dma_unmap_page(vgpu, gfn, *dma_addr, size); err_unlock: mutex_unlock(&info->vgpu->vdev.cache_lock); return ret; @@ -1691,7 +1746,8 @@ static void __gvt_dma_release(struct kref *ref) { struct gvt_dma *entry = container_of(ref, typeof(*entry), ref); - gvt_dma_unmap_page(entry->vgpu, entry->gfn, entry->dma_addr); + gvt_dma_unmap_page(entry->vgpu, entry->gfn, entry->dma_addr, + entry->size); __gvt_cache_remove_entry(entry->vgpu, entry); } diff --git a/drivers/gpu/drm/i915/gvt/mpt.h b/drivers/gpu/drm/i915/gvt/mpt.h index 32ffcd5..67f19992 100644 --- a/drivers/gpu/drm/i915/gvt/mpt.h +++ b/drivers/gpu/drm/i915/gvt/mpt.h @@ -230,17 +230,18 @@ static inline unsigned long intel_gvt_hypervisor_gfn_to_mfn( /** * intel_gvt_hypervisor_dma_map_guest_page - setup dma map for guest page * @vgpu: a vGPU - * @gpfn: guest pfn + * @gfn: guest pfn + * @size: page size * @dma_addr: retrieve allocated dma addr * * Returns: * 0 on success, negative error code if failed. */ static inline int intel_gvt_hypervisor_dma_map_guest_page( - struct intel_vgpu *vgpu, unsigned long gfn, + struct intel_vgpu *vgpu, unsigned long gfn, unsigned long size, dma_addr_t *dma_addr) { - return intel_gvt_host.mpt->dma_map_guest_page(vgpu->handle, gfn, + return intel_gvt_host.mpt->dma_map_guest_page(vgpu->handle, gfn, size, dma_addr); }