@@ -29,6 +29,11 @@
#include <linux/uio.h>
#include <linux/vmstat.h>
+/*
+ * dax_clear_blocks() is called from within transaction context from XFS,
+ * and hence this means the stack from this point must follow GFP_NOFS
+ * semantics for all operations.
+ */
int dax_clear_blocks(struct inode *inode, sector_t block, long size)
{
struct block_device *bdev = inode->i_sb->s_bdev;
@@ -1284,15 +1284,12 @@ xfs_map_direct(
trace_xfs_gbmap_direct(XFS_I(inode), offset, size, type, imap);
- /* XXX: preparation for removing unwritten extents in DAX */
-#if 0
if (dax_fault) {
ASSERT(type == XFS_IO_OVERWRITE);
trace_xfs_gbmap_direct_none(XFS_I(inode), offset, size, type,
imap);
return;
}
-#endif
if (bh_result->b_private) {
ioend = bh_result->b_private;
@@ -1420,10 +1417,12 @@ __xfs_get_blocks(
if (error)
goto out_unlock;
+ /* for DAX, we convert unwritten extents directly */
if (create &&
(!nimaps ||
(imap.br_startblock == HOLESTARTBLOCK ||
- imap.br_startblock == DELAYSTARTBLOCK))) {
+ imap.br_startblock == DELAYSTARTBLOCK) ||
+ (IS_DAX(inode) && ISUNWRITTEN(&imap)))) {
if (direct || xfs_get_extsz_hint(ip)) {
/*
* Drop the ilock in preparation for starting the block
@@ -1468,6 +1467,12 @@ __xfs_get_blocks(
goto out_unlock;
}
+ if (IS_DAX(inode) && create) {
+ ASSERT(!ISUNWRITTEN(&imap));
+ /* zeroing is not needed at a higher layer */
+ new = 0;
+ }
+
/* trim mapping down to size requested */
if (direct || size > (1 << inode->i_blkbits))
xfs_map_trim_size(inode, iblock, bh_result,
@@ -131,6 +131,7 @@ xfs_iomap_write_direct(
uint qblocks, resblks, resrtextents;
int committed;
int error;
+ int bmapi_flags = XFS_BMAPI_PREALLOC;
error = xfs_qm_dqattach(ip, 0);
if (error)
@@ -196,13 +197,26 @@ xfs_iomap_write_direct(
xfs_trans_ijoin(tp, ip, 0);
/*
+ * For DAX, we do not allocate unwritten extents, but instead we zero
+ * the block before we commit the transaction. Ideally we'd like to do
+ * this outside the transaction context, but if we commit and then crash
+ * we may not have zeroed the blocks and this will be exposed on
+ * recovery of the allocation. Hence we must zero before commit.
+ * Further, if we are mapping unwritten extents here, we need to zero
+ * and convert them to written so that we don't need an unwritten extent
+ * callback for DAX.
+ */
+ if (IS_DAX(VFS_I(ip)))
+ bmapi_flags = XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO;
+
+ /*
* From this point onwards we overwrite the imap pointer that the
* caller gave to us.
*/
xfs_bmap_init(&free_list, &firstfsb);
nimaps = 1;
error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb,
- XFS_BMAPI_PREALLOC, &firstfsb, 0,
+ bmapi_flags, &firstfsb, 0,
imap, &nimaps, &free_list);
if (error)
goto out_bmap_cancel;
@@ -213,6 +227,7 @@ xfs_iomap_write_direct(
error = xfs_bmap_finish(&tp, &free_list, &committed);
if (error)
goto out_bmap_cancel;
+
error = xfs_trans_commit(tp);
if (error)
goto out_unlock;