@@ -1037,6 +1037,51 @@ static int dax_iomap_direct_access(struct iomap *iomap, loff_t pos, size_t size,
return rc;
}
+/*
+ * Copy the head and tail part of the pages not included in the write but
+ * required for CoW, because pos/pos+length are not page aligned. But in dax
+ * page fault case, the range is page aligned, we need to copy the whole range
+ * of data. Use copy_edge to distinguish these cases.
+ */
+static int dax_iomap_cow_copy(loff_t pos, loff_t length, size_t align_size,
+ struct iomap *srcmap, void *daddr, bool copy_edge)
+{
+ loff_t head_off = pos & (align_size - 1);
+ size_t size = ALIGN(head_off + length, align_size);
+ loff_t end = pos + length;
+ loff_t pg_end = round_up(end, align_size);
+ void *saddr = 0;
+ int ret = 0;
+
+ ret = dax_iomap_direct_access(srcmap, pos, size, &saddr, NULL);
+ if (ret)
+ return ret;
+
+ if (!copy_edge)
+ return copy_mc_to_kernel(daddr, saddr, length);
+
+ /* Copy the head part of the range. Note: we pass offset as length. */
+ if (head_off) {
+ if (saddr)
+ ret = copy_mc_to_kernel(daddr, saddr, head_off);
+ else
+ memset(daddr, 0, head_off);
+ }
+ /* Copy the tail part of the range */
+ if (end < pg_end) {
+ loff_t tail_off = head_off + length;
+ loff_t tail_len = pg_end - end;
+
+ if (saddr)
+ ret = copy_mc_to_kernel(daddr + tail_off,
+ saddr + tail_off, tail_len);
+ else
+ memset(daddr + tail_off, 0, tail_len);
+ }
+
+ return ret;
+}
+
/*
* The user has performed a load from a hole in the file. Allocating a new
* page in the file would cause excessive storage usage for workloads with
@@ -1166,11 +1211,12 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
struct dax_device *dax_dev = iomap->dax_dev;
struct iov_iter *iter = data;
loff_t end = pos + length, done = 0;
+ bool write = iov_iter_rw(iter) == WRITE;
ssize_t ret = 0;
size_t xfer;
int id;
- if (iov_iter_rw(iter) == READ) {
+ if (!write) {
end = min(end, i_size_read(inode));
if (pos >= end)
return 0;
@@ -1179,7 +1225,8 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
return iov_iter_zero(min(length, end - pos), iter);
}
- if (WARN_ON_ONCE(iomap->type != IOMAP_MAPPED))
+ if (WARN_ON_ONCE(iomap->type != IOMAP_MAPPED &&
+ !(iomap->flags & IOMAP_F_SHARED)))
return -EIO;
/*
@@ -1218,6 +1265,13 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
break;
}
+ if (write && srcmap->addr != iomap->addr) {
+ ret = dax_iomap_cow_copy(pos, length, PAGE_SIZE, srcmap,
+ kaddr, true);
+ if (ret)
+ break;
+ }
+
map_len = PFN_PHYS(map_len);
kaddr += offset;
map_len -= offset;
@@ -1229,7 +1283,7 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
* validated via access_ok() in either vfs_read() or
* vfs_write(), depending on which operation we are doing.
*/
- if (iov_iter_rw(iter) == WRITE)
+ if (write)
xfer = dax_copy_from_iter(dax_dev, pgoff, kaddr,
map_len, iter);
else
@@ -1379,6 +1433,7 @@ static vm_fault_t dax_fault_actor(struct vm_fault *vmf, pfn_t *pfnp,
bool sync = dax_fault_is_synchronous(flags, vmf->vma, iomap);
int err = 0;
pfn_t pfn;
+ void *kaddr;
/* if we are reading UNWRITTEN and HOLE, return a hole. */
if (!write &&
@@ -1389,18 +1444,24 @@ static vm_fault_t dax_fault_actor(struct vm_fault *vmf, pfn_t *pfnp,
return dax_pmd_load_hole(xas, vmf, iomap, &entry);
}
- if (iomap->type != IOMAP_MAPPED) {
+ if (iomap->type != IOMAP_MAPPED && !(iomap->flags & IOMAP_F_SHARED)) {
WARN_ON_ONCE(1);
return VM_FAULT_SIGBUS;
}
- err = dax_iomap_direct_access(iomap, pos, size, NULL, &pfn);
+ err = dax_iomap_direct_access(iomap, pos, size, &kaddr, &pfn);
if (err)
return dax_fault_return(err);
entry = dax_insert_entry(xas, mapping, vmf, entry, pfn, 0,
write && !sync);
+ if (write && srcmap->addr != iomap->addr) {
+ err = dax_iomap_cow_copy(pos, size, size, srcmap, kaddr, false);
+ if (err)
+ return dax_fault_return(err);
+ }
+
if (sync)
return dax_fault_synchronous_pfnp(pfnp, pfn);