diff mbox

[v6,15/15] xfs, dax: introduce xfs_break_dax_layouts()

Message ID 152112917064.24669.8101553386217458496.stgit@dwillia2-desk3.amr.corp.intel.com (mailing list archive)
State Superseded, archived
Headers show

Commit Message

Dan Williams March 15, 2018, 3:52 p.m. UTC
xfs_break_dax_layouts(), similar to xfs_break_leased_layouts(), scans
for busy / pinned dax pages and waits for those pages to go idle before
any potential extent unmap operation.

dax_layout_busy_page() handles synchronizing against new page-busy
events (get_user_pages). It invalidates all mappings to trigger the
get_user_pages slow path which will eventually block on the xfs inode
log held in XFS_MMAPLOCK_EXCL mode. If dax_layout_busy_page() finds a
busy page it returns it for xfs to wait for the page-idle event that
will fire when the page reference count reaches 1 (recall ZONE_DEVICE
pages are idle at count 1).

While waiting, the XFS_MMAPLOCK_EXCL lock is dropped in order to not
deadlock the process that might be trying to elevate the page count of
more pages before arranging for any of them to go idle. I.e. the typical
case of submitting I/O is that iov_iter_get_pages() elevates the
reference count of all pages in the I/O before starting I/O on the first
page.

Cc: Jan Kara <jack@suse.cz>
Cc: Dave Chinner <david@fromorbit.com>
Cc: "Darrick J. Wong" <darrick.wong@oracle.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 fs/xfs/xfs_file.c |   67 +++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 54 insertions(+), 13 deletions(-)


--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Christoph Hellwig March 16, 2018, 7:09 p.m. UTC | #1
Looks fine,

Reviewed-by: Christoph Hellwig <hch@lst.de>
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
kernel test robot March 17, 2018, 10:11 p.m. UTC | #2
Hi Dan,

I love your patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on v4.16-rc5]
[cannot apply to next-20180316]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Dan-Williams/dax-fix-dma-vs-truncate-hole-punch/20180318-050250
config: x86_64-randconfig-x014-201811 (attached as .config)
compiler: gcc-7 (Debian 7.3.0-1) 7.3.0
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

All errors (new ones prefixed by >>):

   fs/xfs/xfs_file.c: In function 'xfs_break_dax_layouts':
>> fs/xfs/xfs_file.c:786:8: error: implicit declaration of function '___wait_var_event'; did you mean 'xfs_wait_var_event'? [-Werror=implicit-function-declaration]
     ret = ___wait_var_event(&page->_refcount,
           ^~~~~~~~~~~~~~~~~
           xfs_wait_var_event
>> fs/xfs/xfs_file.c:788:10: error: invalid use of void expression
       0, 0, xfs_wait_var_event(inode, iolock));
             ^~~~~~~~~~~~~~~~~~
   cc1: some warnings being treated as errors

vim +786 fs/xfs/xfs_file.c

   773	
   774	static int
   775	xfs_break_dax_layouts(
   776		struct inode		*inode,
   777		uint			iolock)
   778	{
   779		struct page		*page;
   780		int			ret;
   781	
   782		page = dax_layout_busy_page(inode->i_mapping);
   783		if (!page)
   784			return 0;
   785	
 > 786		ret = ___wait_var_event(&page->_refcount,
   787				atomic_read(&page->_refcount) == 1, TASK_INTERRUPTIBLE,
 > 788				0, 0, xfs_wait_var_event(inode, iolock));
   789		if (ret < 0)
   790			return ret;
   791		return 1;
   792	}
   793	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
kernel test robot March 17, 2018, 11:47 p.m. UTC | #3
Hi Dan,

I love your patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on v4.16-rc5]
[cannot apply to next-20180316]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Dan-Williams/dax-fix-dma-vs-truncate-hole-punch/20180318-050250
config: x86_64-randconfig-s2-03180641 (attached as .config)
compiler: gcc-6 (Debian 6.4.0-9) 6.4.0 20171026
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

All errors (new ones prefixed by >>):

   fs//xfs/xfs_file.c: In function 'xfs_break_dax_layouts':
>> fs//xfs/xfs_file.c:786:8: error: implicit declaration of function '___wait_var_event' [-Werror=implicit-function-declaration]
     ret = ___wait_var_event(&page->_refcount,
           ^~~~~~~~~~~~~~~~~
   fs//xfs/xfs_file.c:788:4: error: invalid use of void expression
       0, 0, xfs_wait_var_event(inode, iolock));
       ^
   cc1: some warnings being treated as errors
--
   drivers//dax/super.c: In function 'generic_dax_pagefree':
>> drivers//dax/super.c:170:2: error: implicit declaration of function 'wake_up_var' [-Werror=implicit-function-declaration]
     wake_up_var(&page->_refcount);
     ^~~~~~~~~~~
   cc1: some warnings being treated as errors

vim +/___wait_var_event +786 fs//xfs/xfs_file.c

   773	
   774	static int
   775	xfs_break_dax_layouts(
   776		struct inode		*inode,
   777		uint			iolock)
   778	{
   779		struct page		*page;
   780		int			ret;
   781	
   782		page = dax_layout_busy_page(inode->i_mapping);
   783		if (!page)
   784			return 0;
   785	
 > 786		ret = ___wait_var_event(&page->_refcount,
   787				atomic_read(&page->_refcount) == 1, TASK_INTERRUPTIBLE,
   788				0, 0, xfs_wait_var_event(inode, iolock));
   789		if (ret < 0)
   790			return ret;
   791		return 1;
   792	}
   793	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
diff mbox

Patch

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 399c5221f101..2ccdbb19e31a 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -759,6 +759,38 @@  xfs_file_write_iter(
 	return ret;
 }
 
+static void
+xfs_wait_var_event(
+	struct inode		*inode,
+	uint			iolock)
+{
+	struct xfs_inode        *ip = XFS_I(inode);
+
+	xfs_iunlock(ip, iolock);
+	schedule();
+	xfs_ilock(ip, iolock);
+}
+
+static int
+xfs_break_dax_layouts(
+	struct inode		*inode,
+	uint			iolock)
+{
+	struct page		*page;
+	int			ret;
+
+	page = dax_layout_busy_page(inode->i_mapping);
+	if (!page)
+		return 0;
+
+	ret = ___wait_var_event(&page->_refcount,
+			atomic_read(&page->_refcount) == 1, TASK_INTERRUPTIBLE,
+			0, 0, xfs_wait_var_event(inode, iolock));
+	if (ret < 0)
+		return ret;
+	return 1;
+}
+
 int
 xfs_break_layouts(
 	struct inode		*inode,
@@ -766,23 +798,32 @@  xfs_break_layouts(
 	enum layout_break_reason reason)
 {
 	struct xfs_inode	*ip = XFS_I(inode);
-	int			ret;
+	int			ret = 0;
 
 	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL
 				| XFS_MMAPLOCK_EXCL));
 
-	switch (reason) {
-	case BREAK_TRUNCATE:
-		/* fall through */
-	case BREAK_WRITE:
-		ret = xfs_break_leased_layouts(inode, iolock);
-		if (ret > 0)
-			ret = 0;
-		break;
-	default:
-		ret = -EINVAL;
-		break;
-	}
+	do {
+		switch (reason) {
+		case BREAK_TRUNCATE:
+			ret = xfs_break_dax_layouts(inode, *iolock);
+			/* fall through */
+		case BREAK_WRITE:
+			if (ret != 0)
+				break;
+			ret = xfs_break_leased_layouts(inode, iolock);
+			break;
+		default:
+			ret = -EINVAL;
+			break;
+		}
+		/*
+		 * This loop terminates when either layout break attempt
+		 * returns an error, or both layout break attempts
+		 * return 0, i.e. layouts are verified broken while
+		 * holding all required locks.
+		 */
+	} while (ret > 0);
 
 	return ret;
 }