@@ -22,6 +22,7 @@ Currently, these files are in /proc/sys/fs:
- aio-max-nr
- aio-nr
- dentry-state
+- dio_short_writes
- dquot-max
- dquot-nr
- file-max
@@ -76,6 +77,19 @@ dcache isn't pruned yet.
==============================================================
+dio_short_writes:
+
+In case Direct I/O encounters an transient error, it returns
+the errorcode, even if it has performed part of the write.
+This flag, if on (default), will return the number of bytes written
+so far, as the write(2) symantics are. However, some older applications
+still consider a direct write as an error if all of the I/O
+submitted is not complete. ie write(file, count, buf) != count.
+This option can be disabled on systems in order to support
+existing applications which do not expect short writes.
+
+==============================================================
+
dquot-max & dquot-nr:
The file dquot-max shows the maximum number of cached disk
@@ -409,7 +409,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
if (!ret)
ret = blk_status_to_errno(dio->bio.bi_status);
- if (likely(!ret))
+ if (likely(dio->size))
ret = dio->size;
bio_put(&dio->bio);
@@ -151,6 +151,7 @@ struct dio {
} ____cacheline_aligned_in_smp;
static struct kmem_cache *dio_cache __read_mostly;
+unsigned int sysctl_dio_short_writes = 1;
/*
* How many pages are in the queue?
@@ -262,7 +263,7 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags)
ret = dio->page_errors;
if (ret == 0)
ret = dio->io_error;
- if (ret == 0)
+ if (!sysctl_dio_short_writes && (ret == 0))
ret = transferred;
if (dio->end_io) {
@@ -310,7 +311,9 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags)
}
kmem_cache_free(dio_cache, dio);
- return ret;
+ if (!sysctl_dio_short_writes)
+ return ret;
+ return transferred ? transferred : ret;
}
static void dio_aio_complete_work(struct work_struct *work)
@@ -716,23 +716,24 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio)
struct kiocb *iocb = dio->iocb;
struct inode *inode = file_inode(iocb->ki_filp);
loff_t offset = iocb->ki_pos;
- ssize_t ret;
+ ssize_t err;
+ ssize_t transferred = dio->size;
if (dio->end_io) {
- ret = dio->end_io(iocb,
- dio->error ? dio->error : dio->size,
- dio->flags);
+ err = dio->end_io(iocb,
+ (transferred && sysctl_dio_short_writes) ?
+ transferred : dio->error,
+ dio->flags);
} else {
- ret = dio->error;
+ err = dio->error;
}
- if (likely(!ret)) {
- ret = dio->size;
+ if (likely(transferred)) {
/* check for short read */
- if (offset + ret > dio->i_size &&
+ if (offset + transferred > dio->i_size &&
!(dio->flags & IOMAP_DIO_WRITE))
- ret = dio->i_size - offset;
- iocb->ki_pos += ret;
+ transferred = dio->i_size - offset;
+ iocb->ki_pos += transferred;
}
/*
@@ -759,7 +760,7 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio)
inode_dio_end(file_inode(iocb->ki_filp));
kfree(dio);
- return ret;
+ return (transferred && sysctl_dio_short_writes) ? transferred : err;
}
static void iomap_dio_complete_work(struct work_struct *work)
@@ -1469,6 +1469,7 @@ static inline void i_gid_write(struct inode *inode, gid_t gid)
}
extern struct timespec current_time(struct inode *inode);
+extern unsigned int sysctl_dio_short_writes;
/*
* Snapshotting support.
@@ -1844,6 +1844,15 @@ static struct ctl_table fs_table[] = {
.proc_handler = proc_dointvec_minmax,
.extra1 = &one,
},
+ {
+ .procname = "dio_short_writes",
+ .data = &sysctl_dio_short_writes,
+ .maxlen = sizeof(int),
+ .mode = 0600,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
{ }
};