@@ -378,78 +378,101 @@ xfs_map_blocks(
struct inode *inode,
loff_t offset,
struct xfs_bmbt_irec *imap,
- int type)
+ int *type)
{
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
ssize_t count = i_blocksize(inode);
xfs_fileoff_t offset_fsb, end_fsb;
+ int whichfork = XFS_DATA_FORK;
int error = 0;
- int bmapi_flags = XFS_BMAPI_ENTIRE;
int nimaps = 1;
if (XFS_FORCED_SHUTDOWN(mp))
return -EIO;
- /*
- * Truncate can race with writeback since writeback doesn't take the
- * iolock and truncate decreases the file size before it starts
- * truncating the pages between new_size and old_size. Therefore, we
- * can end up in the situation where writeback gets a CoW fork mapping
- * but the truncate makes the mapping invalid and we end up in here
- * trying to get a new mapping. Bail out here so that we simply never
- * get a valid mapping and so we drop the write altogether. The page
- * truncation will kill the contents anyway.
- */
- if (type == XFS_IO_COW && offset > i_size_read(inode))
- return 0;
-
- ASSERT(type != XFS_IO_COW);
- if (type == XFS_IO_UNWRITTEN)
- bmapi_flags |= XFS_BMAPI_IGSTATE;
-
xfs_ilock(ip, XFS_ILOCK_SHARED);
ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
(ip->i_df.if_flags & XFS_IFEXTENTS));
ASSERT(offset <= mp->m_super->s_maxbytes);
+ if (xfs_is_reflink_inode(ip) &&
+ xfs_reflink_find_cow_mapping(ip, offset, imap)) {
+ xfs_iunlock(ip, XFS_ILOCK_SHARED);
+ /*
+ * Truncate can race with writeback since writeback doesn't
+ * take the iolock and truncate decreases the file size before
+ * it starts truncating the pages between new_size and old_size.
+ * Therefore, we can end up in the situation where writeback
+ * gets a CoW fork mapping but the truncate makes the mapping
+ * invalid and we end up in here trying to get a new mapping.
+ * bail out here so that we simply never get a valid mapping
+ * and so we drop the write altogether. The page truncation
+ * will kill the contents anyway.
+ */
+ if (offset > i_size_read(inode))
+ return 0;
+ whichfork = XFS_COW_FORK;
+ *type = XFS_IO_COW;
+ goto done;
+ }
+
if (offset > mp->m_super->s_maxbytes - count)
count = mp->m_super->s_maxbytes - offset;
end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
offset_fsb = XFS_B_TO_FSBT(mp, offset);
error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
- imap, &nimaps, bmapi_flags);
- /*
- * Truncate an overwrite extent if there's a pending CoW
- * reservation before the end of this extent. This forces us
- * to come back to writepage to take care of the CoW.
- */
- if (nimaps && type == XFS_IO_OVERWRITE)
+ imap, &nimaps, XFS_BMAPI_ENTIRE);
+ if (!nimaps) {
+ /*
+ * Lookup returns no match? Beyond eof? regardless,
+ * return it as a hole so we don't write it
+ */
+ imap->br_startoff = offset_fsb;
+ imap->br_blockcount = end_fsb - offset_fsb;
+ imap->br_startblock = HOLESTARTBLOCK;
+ *type = XFS_IO_HOLE;
+ } else if (imap->br_startblock == HOLESTARTBLOCK) {
+ /* landed in a hole */
+ *type = XFS_IO_HOLE;
+ } else if (isnullstartblock(imap->br_startblock)) {
+ /* got a delalloc extent */
+ *type = XFS_IO_DELALLOC;
+ } else {
+ /*
+ * Got an existing extent for overwrite. Truncate it if there
+ * is a pending CoW reservation before the end of this extent,
+ * so that we pick up the COW extents in the next iteration.
+ */
xfs_reflink_trim_irec_to_next_cow(ip, offset_fsb, imap);
+ if (imap->br_state == XFS_EXT_UNWRITTEN)
+ *type = XFS_IO_UNWRITTEN;
+ else
+ *type = XFS_IO_OVERWRITE;
+ }
xfs_iunlock(ip, XFS_ILOCK_SHARED);
-
if (error)
return error;
- if (type == XFS_IO_DELALLOC &&
- (!nimaps || isnullstartblock(imap->br_startblock))) {
- error = xfs_iomap_write_allocate(ip, XFS_DATA_FORK, offset,
- imap);
- if (!error)
- trace_xfs_map_blocks_alloc(ip, offset, count, type, imap);
- return error;
- }
-
-#ifdef DEBUG
- if (type == XFS_IO_UNWRITTEN) {
- ASSERT(nimaps);
- ASSERT(imap->br_startblock != HOLESTARTBLOCK);
- ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
+done:
+ switch (*type) {
+ case XFS_IO_HOLE:
+ case XFS_IO_OVERWRITE:
+ case XFS_IO_UNWRITTEN:
+ /* nothing to do! */
+ trace_xfs_map_blocks_found(ip, offset, count, *type, imap);
+ return 0;
+ case XFS_IO_DELALLOC:
+ case XFS_IO_COW:
+ error = xfs_iomap_write_allocate(ip, whichfork, offset, imap);
+ if (error)
+ return error;
+ trace_xfs_map_blocks_alloc(ip, offset, count, *type, imap);
+ return 0;
+ default:
+ ASSERT(1);
+ return -EFSCORRUPTED;
}
-#endif
- if (nimaps)
- trace_xfs_map_blocks_found(ip, offset, count, type, imap);
- return 0;
}
STATIC bool
@@ -709,6 +732,14 @@ xfs_map_at_offset(
set_buffer_mapped(bh);
clear_buffer_delay(bh);
clear_buffer_unwritten(bh);
+
+ /*
+ * If this is a realtime file, data may be on a different device.
+ * to that pointed to from the buffer_head b_bdev currently. We can't
+ * trust that the bufferhead has a already been mapped correctly, so
+ * set the bdev now.
+ */
+ bh->b_bdev = xfs_find_bdev_for_inode(inode);
}
STATIC void
@@ -769,56 +800,6 @@ xfs_aops_discard_page(
xfs_vm_invalidatepage(page, 0, PAGE_SIZE);
}
-static int
-xfs_map_cow(
- struct xfs_writepage_ctx *wpc,
- struct inode *inode,
- loff_t offset,
- unsigned int *new_type)
-{
- struct xfs_inode *ip = XFS_I(inode);
- struct xfs_bmbt_irec imap;
- bool is_cow = false;
- int error;
-
- /*
- * If we already have a valid COW mapping keep using it.
- */
- if (wpc->io_type == XFS_IO_COW) {
- wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap, offset);
- if (wpc->imap_valid) {
- *new_type = XFS_IO_COW;
- return 0;
- }
- }
-
- /*
- * Else we need to check if there is a COW mapping at this offset.
- */
- xfs_ilock(ip, XFS_ILOCK_SHARED);
- is_cow = xfs_reflink_find_cow_mapping(ip, offset, &imap);
- xfs_iunlock(ip, XFS_ILOCK_SHARED);
-
- if (!is_cow)
- return 0;
-
- /*
- * And if the COW mapping has a delayed extent here we need to
- * allocate real space for it now.
- */
- if (isnullstartblock(imap.br_startblock)) {
- error = xfs_iomap_write_allocate(ip, XFS_COW_FORK, offset,
- &imap);
- if (error)
- return error;
- }
-
- wpc->io_type = *new_type = XFS_IO_COW;
- wpc->imap_valid = true;
- wpc->imap = imap;
- return 0;
-}
-
/*
* We implement an immediate ioend submission policy here to avoid needing to
* chain multiple ioends and hence nest mempool allocations which can violate
@@ -845,85 +826,81 @@ xfs_writepage_map(
{
LIST_HEAD(submit_list);
struct xfs_ioend *ioend, *next;
- struct buffer_head *bh, *head;
+ struct buffer_head *bh;
ssize_t len = i_blocksize(inode);
- uint64_t offset;
int error = 0;
int count = 0;
- int uptodate = 1;
- unsigned int new_type;
+ bool uptodate = true;
+ loff_t file_offset; /* file offset of page */
+ unsigned poffset; /* offset into page */
- bh = head = page_buffers(page);
- offset = page_offset(page);
- do {
- if (offset >= end_offset)
+ /*
+ * Walk the blocks on the page, and we we run off then end of the
+ * current map or find the current map invalid, grab a new one.
+ * We only use bufferheads here to check per-block state - they no
+ * longer control the iteration through the page. This allows us to
+ * replace the bufferhead with some other state tracking mechanism in
+ * future.
+ */
+ file_offset = page_offset(page);
+ bh = page_buffers(page);
+ for (poffset = 0;
+ poffset < PAGE_SIZE;
+ poffset += len, file_offset += len, bh = bh->b_this_page) {
+ /* past the range we are writing, so nothing more to write. */
+ if (file_offset >= end_offset)
break;
- if (!buffer_uptodate(bh))
- uptodate = 0;
/*
- * set_page_dirty dirties all buffers in a page, independent
- * of their state. The dirty state however is entirely
- * meaningless for holes (!mapped && uptodate), so skip
- * buffers covering holes here.
+ * Block does not contain valid data, skip it, mark the current
+ * map as invalid because we have a discontiguity. This ensures
+ * we put subsequent writeable buffers into a new ioend.
*/
- if (!buffer_mapped(bh) && buffer_uptodate(bh)) {
- wpc->imap_valid = false;
- continue;
- }
-
- if (buffer_unwritten(bh))
- new_type = XFS_IO_UNWRITTEN;
- else if (buffer_delay(bh))
- new_type = XFS_IO_DELALLOC;
- else if (buffer_uptodate(bh))
- new_type = XFS_IO_OVERWRITE;
- else {
+ if (!buffer_uptodate(bh)) {
if (PageUptodate(page))
ASSERT(buffer_mapped(bh));
- /*
- * This buffer is not uptodate and will not be
- * written to disk. Ensure that we will put any
- * subsequent writeable buffers into a new
- * ioend.
- */
+ uptodate = false;
wpc->imap_valid = false;
continue;
}
- if (xfs_is_reflink_inode(XFS_I(inode))) {
- error = xfs_map_cow(wpc, inode, offset, &new_type);
- if (error)
- goto out;
- }
-
- if (wpc->io_type != new_type) {
- wpc->io_type = new_type;
- wpc->imap_valid = false;
- }
-
+ /* Check to see if current map spans this file offset */
if (wpc->imap_valid)
wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap,
- offset);
+ file_offset);
+ /*
+ * If we don't have a valid map, now it's time to get a new one
+ * for this offset. This will convert delayed allocations
+ * (including COW ones) into real extents. If we return without
+ * a valid map, it means we landed in a hole and we skip the
+ * block.
+ */
if (!wpc->imap_valid) {
- error = xfs_map_blocks(inode, offset, &wpc->imap,
- wpc->io_type);
+ error = xfs_map_blocks(inode, file_offset, &wpc->imap,
+ &wpc->io_type);
if (error)
goto out;
wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap,
- offset);
+ file_offset);
}
- if (wpc->imap_valid) {
- lock_buffer(bh);
- if (wpc->io_type != XFS_IO_OVERWRITE)
- xfs_map_at_offset(inode, bh, &wpc->imap, offset);
- xfs_add_to_ioend(inode, bh, offset, wpc, wbc, &submit_list);
- count++;
+
+ if (!wpc->imap_valid || wpc->io_type == XFS_IO_HOLE) {
+ /*
+ * set_page_dirty dirties all buffers in a page, independent
+ * of their state. The dirty state however is entirely
+ * meaningless for holes (!mapped && uptodate), so check we did
+ * have a buffer covering a hole here and continue.
+ */
+ continue;
}
- } while (offset += len, ((bh = bh->b_this_page) != head));
+ lock_buffer(bh);
+ xfs_map_at_offset(inode, bh, &wpc->imap, file_offset);
+ xfs_add_to_ioend(inode, bh, file_offset, wpc, wbc, &submit_list);
+ count++;
+ }
- if (uptodate && bh == head)
+ if (uptodate && poffset == PAGE_SIZE)
SetPageUptodate(page);
ASSERT(wpc->ioend || list_empty(&submit_list));
@@ -29,6 +29,7 @@ enum {
XFS_IO_UNWRITTEN, /* covers allocated but uninitialized data */
XFS_IO_OVERWRITE, /* covers already allocated extent */
XFS_IO_COW, /* covers copy-on-write extent */
+ XFS_IO_HOLE, /* covers region without any block allocation */
};
#define XFS_IO_TYPES \
@@ -36,7 +37,8 @@ enum {
{ XFS_IO_DELALLOC, "delalloc" }, \
{ XFS_IO_UNWRITTEN, "unwritten" }, \
{ XFS_IO_OVERWRITE, "overwrite" }, \
- { XFS_IO_COW, "CoW" }
+ { XFS_IO_COW, "CoW" }, \
+ { XFS_IO_HOLE, "hole" }
/*
* Structure for buffered I/O completions.