diff mbox series

[2/3] vfio/iommu_type1: Optimize dirty bitmap population based on iommu HWDBM

Message ID 20210413091445.7448-3-zhukeqian1@huawei.com (mailing list archive)
State New, archived
Headers show
Series vfio/iommu_type1: Implement dirty log tracking based on IOMMU HWDBM | expand

Commit Message

Keqian Zhu April 13, 2021, 9:14 a.m. UTC
From: Kunkun Jiang <jiangkunkun@huawei.com>

In the past if vfio_iommu is not of pinned_page_dirty_scope and
vfio_dma is iommu_mapped, we populate full dirty bitmap for this
vfio_dma. Now we can try to get dirty log from iommu before make
the lousy decision.

The bitmap population:

In detail, if all vfio_group are of pinned_page_dirty_scope, the
dirty bitmap population is not affected. If there are vfio_groups
not of pinned_page_dirty_scope and their domains support HWDBM,
then we can try to get dirty log from IOMMU. Otherwise, lead to
full dirty bitmap.

Consider DMA and group hotplug:

Start dirty log for newly added DMA range, and stop dirty log for
DMA range going to remove.

If a domain don't support HWDBM at start, but can support it after
hotplug some groups (attach a first group with HWDBM or detach all
groups without HWDBM). If a domain support HWDBM at start, but do
not support it after hotplug some groups (attach a group without
HWDBM or detach all groups without HWDBM). So our policy is that
switch dirty log for domains dynamically.

Co-developed-by: Keqian Zhu <zhukeqian1@huawei.com>
Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
---
 drivers/vfio/vfio_iommu_type1.c | 166 ++++++++++++++++++++++++++++++--
 1 file changed, 159 insertions(+), 7 deletions(-)

Comments

kernel test robot April 13, 2021, 6:05 p.m. UTC | #1
Hi Keqian,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on vfio/next]
[also build test ERROR on linux/master linus/master v5.12-rc7 next-20210413]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Keqian-Zhu/vfio-iommu_type1-Implement-dirty-log-tracking-based-on-IOMMU-HWDBM/20210413-171632
base:   https://github.com/awilliam/linux-vfio.git next
config: arm-randconfig-r015-20210413 (attached as .config)
compiler: arm-linux-gnueabi-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/5553c39f302409e175a70157c47679e61297dec5
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Keqian-Zhu/vfio-iommu_type1-Implement-dirty-log-tracking-based-on-IOMMU-HWDBM/20210413-171632
        git checkout 5553c39f302409e175a70157c47679e61297dec5
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=arm 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   drivers/vfio/vfio_iommu_type1.c: In function 'vfio_iommu_dirty_log_clear':
>> drivers/vfio/vfio_iommu_type1.c:1215:9: error: implicit declaration of function 'iommu_clear_dirty_log' [-Werror=implicit-function-declaration]
    1215 |   ret = iommu_clear_dirty_log(d->domain, start_iova, size,
         |         ^~~~~~~~~~~~~~~~~~~~~
   drivers/vfio/vfio_iommu_type1.c: In function 'vfio_iommu_dirty_log_sync':
>> drivers/vfio/vfio_iommu_type1.c:1234:9: error: implicit declaration of function 'iommu_sync_dirty_log' [-Werror=implicit-function-declaration]
    1234 |   ret = iommu_sync_dirty_log(d->domain, dma->iova, dma->size,
         |         ^~~~~~~~~~~~~~~~~~~~
   In file included from arch/arm/include/asm/bug.h:60,
                    from include/linux/bug.h:5,
                    from include/linux/thread_info.h:12,
                    from include/asm-generic/preempt.h:5,
                    from ./arch/arm/include/generated/asm/preempt.h:1,
                    from include/linux/preempt.h:78,
                    from include/linux/spinlock.h:51,
                    from include/linux/ipc.h:5,
                    from include/uapi/linux/sem.h:5,
                    from include/linux/sem.h:5,
                    from include/linux/compat.h:14,
                    from drivers/vfio/vfio_iommu_type1.c:24:
   drivers/vfio/vfio_iommu_type1.c: In function 'vfio_dma_dirty_log_switch':
>> drivers/vfio/vfio_iommu_type1.c:1373:11: error: implicit declaration of function 'iommu_switch_dirty_log' [-Werror=implicit-function-declaration]
    1373 |   WARN_ON(iommu_switch_dirty_log(d->domain, enable, dma->iova,
         |           ^~~~~~~~~~~~~~~~~~~~~~
   include/asm-generic/bug.h:188:25: note: in definition of macro 'WARN_ON'
     188 |  int __ret_warn_on = !!(condition);    \
         |                         ^~~~~~~~~
   drivers/vfio/vfio_iommu_type1.c: In function 'vfio_group_supports_hwdbm':
   drivers/vfio/vfio_iommu_type1.c:2360:33: error: 'IOMMU_DEV_FEAT_HWDBM' undeclared (first use in this function); did you mean 'IOMMU_DEV_FEAT_SVA'?
    2360 |  enum iommu_dev_features feat = IOMMU_DEV_FEAT_HWDBM;
         |                                 ^~~~~~~~~~~~~~~~~~~~
         |                                 IOMMU_DEV_FEAT_SVA
   drivers/vfio/vfio_iommu_type1.c:2360:33: note: each undeclared identifier is reported only once for each function it appears in
   cc1: some warnings being treated as errors


vim +/iommu_clear_dirty_log +1215 drivers/vfio/vfio_iommu_type1.c

  1204	
  1205	static int vfio_iommu_dirty_log_clear(struct vfio_iommu *iommu,
  1206					      dma_addr_t start_iova, size_t size,
  1207					      unsigned long *bitmap_buffer,
  1208					      dma_addr_t base_iova,
  1209					      unsigned long pgshift)
  1210	{
  1211		struct vfio_domain *d;
  1212		int ret = 0;
  1213	
  1214		list_for_each_entry(d, &iommu->domain_list, next) {
> 1215			ret = iommu_clear_dirty_log(d->domain, start_iova, size,
  1216						    bitmap_buffer, base_iova, pgshift);
  1217			if (ret) {
  1218				pr_warn("vfio_iommu dirty log clear failed!\n");
  1219				break;
  1220			}
  1221		}
  1222	
  1223		return ret;
  1224	}
  1225	
  1226	static int vfio_iommu_dirty_log_sync(struct vfio_iommu *iommu,
  1227					     struct vfio_dma *dma,
  1228					     unsigned long pgshift)
  1229	{
  1230		struct vfio_domain *d;
  1231		int ret = 0;
  1232	
  1233		list_for_each_entry(d, &iommu->domain_list, next) {
> 1234			ret = iommu_sync_dirty_log(d->domain, dma->iova, dma->size,
  1235						   dma->bitmap, dma->iova, pgshift);
  1236			if (ret) {
  1237				pr_warn("vfio_iommu dirty log sync failed!\n");
  1238				break;
  1239			}
  1240		}
  1241	
  1242		return ret;
  1243	}
  1244	
  1245	static int update_user_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu,
  1246				      struct vfio_dma *dma, dma_addr_t base_iova,
  1247				      size_t pgsize)
  1248	{
  1249		unsigned long pgshift = __ffs(pgsize);
  1250		unsigned long nbits = dma->size >> pgshift;
  1251		unsigned long bit_offset = (dma->iova - base_iova) >> pgshift;
  1252		unsigned long copy_offset = bit_offset / BITS_PER_LONG;
  1253		unsigned long shift = bit_offset % BITS_PER_LONG;
  1254		unsigned long leftover;
  1255		int ret;
  1256	
  1257		if (!iommu->num_non_pinned_groups || !dma->iommu_mapped) {
  1258			/* nothing to do */
  1259		} else if (!iommu->num_non_hwdbm_groups) {
  1260			/* try to get dirty log from IOMMU */
  1261			ret = vfio_iommu_dirty_log_sync(iommu, dma, pgshift);
  1262			if (ret)
  1263				return ret;
  1264		} else {
  1265			/*
  1266			 * mark all pages dirty if any IOMMU capable device is not able
  1267			 * to report dirty pages and all pages are pinned and mapped.
  1268			 */
  1269			bitmap_set(dma->bitmap, 0, nbits);
  1270		}
  1271	
  1272		if (shift) {
  1273			bitmap_shift_left(dma->bitmap, dma->bitmap, shift,
  1274					  nbits + shift);
  1275	
  1276			if (copy_from_user(&leftover,
  1277					   (void __user *)(bitmap + copy_offset),
  1278					   sizeof(leftover)))
  1279				return -EFAULT;
  1280	
  1281			bitmap_or(dma->bitmap, dma->bitmap, &leftover, shift);
  1282		}
  1283	
  1284		if (copy_to_user((void __user *)(bitmap + copy_offset), dma->bitmap,
  1285				 DIRTY_BITMAP_BYTES(nbits + shift)))
  1286			return -EFAULT;
  1287	
  1288		/* Recover the bitmap if it'll be used to clear hardware dirty log */
  1289		if (shift && iommu->num_non_pinned_groups && dma->iommu_mapped &&
  1290		    !iommu->num_non_hwdbm_groups)
  1291			bitmap_shift_right(dma->bitmap, dma->bitmap, shift,
  1292					   nbits + shift);
  1293	
  1294		return 0;
  1295	}
  1296	
  1297	static int vfio_iova_dirty_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu,
  1298					  dma_addr_t iova, size_t size, size_t pgsize)
  1299	{
  1300		struct vfio_dma *dma;
  1301		struct rb_node *n;
  1302		unsigned long pgshift = __ffs(pgsize);
  1303		int ret;
  1304	
  1305		/*
  1306		 * GET_BITMAP request must fully cover vfio_dma mappings.  Multiple
  1307		 * vfio_dma mappings may be clubbed by specifying large ranges, but
  1308		 * there must not be any previous mappings bisected by the range.
  1309		 * An error will be returned if these conditions are not met.
  1310		 */
  1311		dma = vfio_find_dma(iommu, iova, 1);
  1312		if (dma && dma->iova != iova)
  1313			return -EINVAL;
  1314	
  1315		dma = vfio_find_dma(iommu, iova + size - 1, 0);
  1316		if (dma && dma->iova + dma->size != iova + size)
  1317			return -EINVAL;
  1318	
  1319		for (n = rb_first(&iommu->dma_list); n; n = rb_next(n)) {
  1320			struct vfio_dma *dma = rb_entry(n, struct vfio_dma, node);
  1321	
  1322			if (dma->iova < iova)
  1323				continue;
  1324	
  1325			if (dma->iova > iova + size - 1)
  1326				break;
  1327	
  1328			ret = update_user_bitmap(bitmap, iommu, dma, iova, pgsize);
  1329			if (ret)
  1330				return ret;
  1331	
  1332			/* Clear iommu dirty log to re-enable dirty log tracking */
  1333			if (iommu->num_non_pinned_groups && dma->iommu_mapped &&
  1334			    !iommu->num_non_hwdbm_groups) {
  1335				ret = vfio_iommu_dirty_log_clear(iommu,	dma->iova,
  1336						dma->size, dma->bitmap, dma->iova,
  1337						pgshift);
  1338				if (ret)
  1339					return ret;
  1340			}
  1341	
  1342			/*
  1343			 * Re-populate bitmap to include all pinned pages which are
  1344			 * considered as dirty but exclude pages which are unpinned and
  1345			 * pages which are marked dirty by vfio_dma_rw()
  1346			 */
  1347			bitmap_clear(dma->bitmap, 0, dma->size >> pgshift);
  1348			vfio_dma_populate_bitmap(dma, pgsize);
  1349		}
  1350		return 0;
  1351	}
  1352	
  1353	static int verify_bitmap_size(uint64_t npages, uint64_t bitmap_size)
  1354	{
  1355		if (!npages || !bitmap_size || (bitmap_size > DIRTY_BITMAP_SIZE_MAX) ||
  1356		    (bitmap_size < DIRTY_BITMAP_BYTES(npages)))
  1357			return -EINVAL;
  1358	
  1359		return 0;
  1360	}
  1361	
  1362	static void vfio_dma_dirty_log_switch(struct vfio_iommu *iommu,
  1363					      struct vfio_dma *dma, bool enable)
  1364	{
  1365		struct vfio_domain *d;
  1366	
  1367		if (!dma->iommu_mapped)
  1368			return;
  1369	
  1370		list_for_each_entry(d, &iommu->domain_list, next) {
  1371			if (d->num_non_hwdbm_groups)
  1372				continue;
> 1373			WARN_ON(iommu_switch_dirty_log(d->domain, enable, dma->iova,
  1374						       dma->size, d->prot | dma->prot));
  1375		}
  1376	}
  1377	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
diff mbox series

Patch

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 9cb9ce021b22..77950e47f56f 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -1202,6 +1202,46 @@  static void vfio_update_pgsize_bitmap(struct vfio_iommu *iommu)
 	}
 }
 
+static int vfio_iommu_dirty_log_clear(struct vfio_iommu *iommu,
+				      dma_addr_t start_iova, size_t size,
+				      unsigned long *bitmap_buffer,
+				      dma_addr_t base_iova,
+				      unsigned long pgshift)
+{
+	struct vfio_domain *d;
+	int ret = 0;
+
+	list_for_each_entry(d, &iommu->domain_list, next) {
+		ret = iommu_clear_dirty_log(d->domain, start_iova, size,
+					    bitmap_buffer, base_iova, pgshift);
+		if (ret) {
+			pr_warn("vfio_iommu dirty log clear failed!\n");
+			break;
+		}
+	}
+
+	return ret;
+}
+
+static int vfio_iommu_dirty_log_sync(struct vfio_iommu *iommu,
+				     struct vfio_dma *dma,
+				     unsigned long pgshift)
+{
+	struct vfio_domain *d;
+	int ret = 0;
+
+	list_for_each_entry(d, &iommu->domain_list, next) {
+		ret = iommu_sync_dirty_log(d->domain, dma->iova, dma->size,
+					   dma->bitmap, dma->iova, pgshift);
+		if (ret) {
+			pr_warn("vfio_iommu dirty log sync failed!\n");
+			break;
+		}
+	}
+
+	return ret;
+}
+
 static int update_user_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu,
 			      struct vfio_dma *dma, dma_addr_t base_iova,
 			      size_t pgsize)
@@ -1212,13 +1252,22 @@  static int update_user_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu,
 	unsigned long copy_offset = bit_offset / BITS_PER_LONG;
 	unsigned long shift = bit_offset % BITS_PER_LONG;
 	unsigned long leftover;
+	int ret;
 
-	/*
-	 * mark all pages dirty if any IOMMU capable device is not able
-	 * to report dirty pages and all pages are pinned and mapped.
-	 */
-	if (iommu->num_non_pinned_groups && dma->iommu_mapped)
+	if (!iommu->num_non_pinned_groups || !dma->iommu_mapped) {
+		/* nothing to do */
+	} else if (!iommu->num_non_hwdbm_groups) {
+		/* try to get dirty log from IOMMU */
+		ret = vfio_iommu_dirty_log_sync(iommu, dma, pgshift);
+		if (ret)
+			return ret;
+	} else {
+		/*
+		 * mark all pages dirty if any IOMMU capable device is not able
+		 * to report dirty pages and all pages are pinned and mapped.
+		 */
 		bitmap_set(dma->bitmap, 0, nbits);
+	}
 
 	if (shift) {
 		bitmap_shift_left(dma->bitmap, dma->bitmap, shift,
@@ -1236,6 +1285,12 @@  static int update_user_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu,
 			 DIRTY_BITMAP_BYTES(nbits + shift)))
 		return -EFAULT;
 
+	/* Recover the bitmap if it'll be used to clear hardware dirty log */
+	if (shift && iommu->num_non_pinned_groups && dma->iommu_mapped &&
+	    !iommu->num_non_hwdbm_groups)
+		bitmap_shift_right(dma->bitmap, dma->bitmap, shift,
+				   nbits + shift);
+
 	return 0;
 }
 
@@ -1274,6 +1329,16 @@  static int vfio_iova_dirty_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu,
 		if (ret)
 			return ret;
 
+		/* Clear iommu dirty log to re-enable dirty log tracking */
+		if (iommu->num_non_pinned_groups && dma->iommu_mapped &&
+		    !iommu->num_non_hwdbm_groups) {
+			ret = vfio_iommu_dirty_log_clear(iommu,	dma->iova,
+					dma->size, dma->bitmap, dma->iova,
+					pgshift);
+			if (ret)
+				return ret;
+		}
+
 		/*
 		 * Re-populate bitmap to include all pinned pages which are
 		 * considered as dirty but exclude pages which are unpinned and
@@ -1294,6 +1359,22 @@  static int verify_bitmap_size(uint64_t npages, uint64_t bitmap_size)
 	return 0;
 }
 
+static void vfio_dma_dirty_log_switch(struct vfio_iommu *iommu,
+				      struct vfio_dma *dma, bool enable)
+{
+	struct vfio_domain *d;
+
+	if (!dma->iommu_mapped)
+		return;
+
+	list_for_each_entry(d, &iommu->domain_list, next) {
+		if (d->num_non_hwdbm_groups)
+			continue;
+		WARN_ON(iommu_switch_dirty_log(d->domain, enable, dma->iova,
+					       dma->size, d->prot | dma->prot));
+	}
+}
+
 static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
 			     struct vfio_iommu_type1_dma_unmap *unmap,
 			     struct vfio_bitmap *bitmap)
@@ -1446,6 +1527,10 @@  static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
 				break;
 		}
 
+		/* Stop log for removed dma */
+		if (iommu->dirty_page_tracking)
+			vfio_dma_dirty_log_switch(iommu, dma, false);
+
 		unmapped += dma->size;
 		n = rb_next(n);
 		vfio_remove_dma(iommu, dma);
@@ -1677,8 +1762,13 @@  static int vfio_dma_do_map(struct vfio_iommu *iommu,
 
 	if (!ret && iommu->dirty_page_tracking) {
 		ret = vfio_dma_bitmap_alloc(dma, pgsize);
-		if (ret)
+		if (ret) {
 			vfio_remove_dma(iommu, dma);
+			goto out_unlock;
+		}
+
+		/* Start dirty log for newly added dma */
+		vfio_dma_dirty_log_switch(iommu, dma, true);
 	}
 
 out_unlock:
@@ -2273,6 +2363,21 @@  static bool vfio_group_supports_hwdbm(struct vfio_group *group)
 					 vfio_dev_enable_feature);
 }
 
+static void vfio_domain_dirty_log_switch(struct vfio_iommu *iommu,
+					 struct vfio_domain *d, bool enable)
+{
+	struct rb_node *n;
+	struct vfio_dma *dma;
+
+	for (n = rb_first(&iommu->dma_list); n; n = rb_next(n)) {
+		dma = rb_entry(n, struct vfio_dma, node);
+		if (!dma->iommu_mapped)
+			continue;
+		WARN_ON(iommu_switch_dirty_log(d->domain, enable, dma->iova,
+					       dma->size, d->prot | dma->prot));
+	}
+}
+
 /*
  * Called after a new group is added to the group_list of domain, or before an
  * old group is removed from the group_list of domain.
@@ -2282,6 +2387,10 @@  static void vfio_iommu_update_hwdbm(struct vfio_iommu *iommu,
 				    struct vfio_group *group,
 				    bool attach)
 {
+	uint64_t old_num_non_hwdbm = domain->num_non_hwdbm_groups;
+	bool singular = list_is_singular(&domain->group_list);
+	bool log_enabled, should_enable;
+
 	/* Update the HWDBM status of group, domain and iommu */
 	group->iommu_hwdbm = vfio_group_supports_hwdbm(group);
 	if (!group->iommu_hwdbm && attach) {
@@ -2291,6 +2400,30 @@  static void vfio_iommu_update_hwdbm(struct vfio_iommu *iommu,
 		domain->num_non_hwdbm_groups--;
 		iommu->num_non_hwdbm_groups--;
 	}
+
+	if (!iommu->dirty_page_tracking)
+		return;
+
+	/*
+	 * The vfio_domain can switch dirty log tracking dynamically due to
+	 * group attach/detach. The basic idea is to convert current dirty log
+	 * status to desired dirty log status.
+	 *
+	 * If num_non_hwdbm_groups is zero then dirty log has been enabled. One
+	 * exception is that this is the first group attached to a domain.
+	 *
+	 * If the updated num_non_hwdbm_groups is zero then dirty log should be
+	 * enabled. One exception is that this is the last group detached from
+	 * a domain.
+	 */
+	log_enabled = !old_num_non_hwdbm && !(attach && singular);
+	should_enable = !domain->num_non_hwdbm_groups && !(!attach && singular);
+
+	/* Switch dirty log tracking when status changed */
+	if (should_enable && !log_enabled)
+		vfio_domain_dirty_log_switch(iommu, domain, true);
+	else if (!should_enable && log_enabled)
+		vfio_domain_dirty_log_switch(iommu, domain, false);
 }
 
 static int vfio_iommu_type1_attach_group(void *iommu_data,
@@ -3046,6 +3179,22 @@  static int vfio_iommu_type1_unmap_dma(struct vfio_iommu *iommu,
 			-EFAULT : 0;
 }
 
+static void vfio_iommu_dirty_log_switch(struct vfio_iommu *iommu, bool enable)
+{
+	struct vfio_domain *d;
+
+	/*
+	 * We enable dirty log tracking for these vfio_domains that support
+	 * HWDBM. Even if all iommu domains don't support HWDBM for now. They
+	 * may support it after detach some groups.
+	 */
+	list_for_each_entry(d, &iommu->domain_list, next) {
+		if (d->num_non_hwdbm_groups)
+			continue;
+		vfio_domain_dirty_log_switch(iommu, d, enable);
+	}
+}
+
 static int vfio_iommu_type1_dirty_pages(struct vfio_iommu *iommu,
 					unsigned long arg)
 {
@@ -3078,8 +3227,10 @@  static int vfio_iommu_type1_dirty_pages(struct vfio_iommu *iommu,
 		pgsize = 1 << __ffs(iommu->pgsize_bitmap);
 		if (!iommu->dirty_page_tracking) {
 			ret = vfio_dma_bitmap_alloc_all(iommu, pgsize);
-			if (!ret)
+			if (!ret) {
 				iommu->dirty_page_tracking = true;
+				vfio_iommu_dirty_log_switch(iommu, true);
+			}
 		}
 		mutex_unlock(&iommu->lock);
 		return ret;
@@ -3088,6 +3239,7 @@  static int vfio_iommu_type1_dirty_pages(struct vfio_iommu *iommu,
 		if (iommu->dirty_page_tracking) {
 			iommu->dirty_page_tracking = false;
 			vfio_dma_bitmap_free_all(iommu);
+			vfio_iommu_dirty_log_switch(iommu, false);
 		}
 		mutex_unlock(&iommu->lock);
 		return 0;