@@ -580,9 +580,24 @@ xfs_report_dioalign(
struct xfs_buftarg *target = xfs_inode_buftarg(ip);
struct block_device *bdev = target->bt_bdev;
- stat->result_mask |= STATX_DIOALIGN;
+ stat->result_mask |= STATX_DIOALIGN | STATX_DIO_READ_ALIGN;
stat->dio_mem_align = bdev_dma_alignment(bdev) + 1;
- stat->dio_offset_align = bdev_logical_block_size(bdev);
+
+ /*
+ * For COW inodes, we can only perform out of place writes of entire
+ * allocation units (blocks or RT extents).
+ * For writes smaller than the allocation unit, we must fall back to
+ * buffered I/O to perform read-modify-write cycles. At best this is
+ * highly inefficient; at worst it leads to page cache invalidation
+ * races. Tell applications to avoid this by reporting the larger write
+ * alignment in dio_offset_align, and the smaller read alignment in
+ * dio_read_offset_align.
+ */
+ stat->dio_read_offset_align = bdev_logical_block_size(bdev);
+ if (xfs_is_cow_inode(ip))
+ stat->dio_offset_align = xfs_inode_alloc_unitsize(ip);
+ else
+ stat->dio_offset_align = stat->dio_read_offset_align;
}
static void
@@ -658,7 +673,7 @@ xfs_vn_getattr(
stat->rdev = inode->i_rdev;
break;
case S_IFREG:
- if (request_mask & STATX_DIOALIGN)
+ if (request_mask & (STATX_DIOALIGN | STATX_DIO_READ_ALIGN))
xfs_report_dioalign(ip, stat);
if (request_mask & STATX_WRITE_ATOMIC)
xfs_report_atomic_write(ip, stat);