Message ID | 20220227093434.2889464-3-jhubbard@nvidia.com (mailing list archive) |
---|---|
State | Deferred, archived |
Headers | show |
Series | block, fs: convert most Direct IO cases to FOLL_PIN | expand |
> +ssize_t iov_iter_pin_pages(struct iov_iter *i, > + struct page **pages, size_t maxsize, unsigned int maxpages, > + size_t *start) > +{ > + size_t len; > + int n, res; > + > + if (maxsize > i->count) > + maxsize = i->count; > + if (!maxsize) > + return 0; > + > + WARN_ON_ONCE(!iter_is_iovec(i)); > + > + if (likely(iter_is_iovec(i))) { > + unsigned int gup_flags = 0; > + unsigned long addr; > + > + if (iov_iter_rw(i) != WRITE) > + gup_flags |= FOLL_WRITE; > + if (i->nofault) > + gup_flags |= FOLL_NOFAULT; > + > + addr = first_iovec_segment(i, &len, start, maxsize, maxpages); > + n = DIV_ROUND_UP(len, PAGE_SIZE); > + res = pin_user_pages_fast(addr, n, gup_flags, pages); > + if (unlikely(res <= 0)) > + return res; > + return (res == n ? len : res * PAGE_SIZE) - *start; Trying to be clever like that just makes the code a lot less readable. I should not have to reason about a return value. Same in the other function.
On 2/27/22 13:57, Jens Axboe wrote: ... >> + return res; >> + return (res == n ? len : res * PAGE_SIZE) - *start; > > Trying to be clever like that just makes the code a lot less readable. I > should not have to reason about a return value. Same in the other > function. > No argument there. This was shamelessly lifted from iov_iter_get_pages(), and I initially calculated that keeping it identical to that known-good code, where possible, was better than fixing it up. However, I'll go ahead and simplify it--with pleasure--based on this feedback. thanks,
On 2/27/22 13:57, Jens Axboe wrote: >> +ssize_t iov_iter_pin_pages(struct iov_iter *i, >> + struct page **pages, size_t maxsize, unsigned int maxpages, >> + size_t *start) >> +{ >> + size_t len; >> + int n, res; >> + >> + if (maxsize > i->count) >> + maxsize = i->count; >> + if (!maxsize) >> + return 0; >> + >> + WARN_ON_ONCE(!iter_is_iovec(i)); >> + >> + if (likely(iter_is_iovec(i))) { >> + unsigned int gup_flags = 0; >> + unsigned long addr; >> + >> + if (iov_iter_rw(i) != WRITE) >> + gup_flags |= FOLL_WRITE; >> + if (i->nofault) >> + gup_flags |= FOLL_NOFAULT; >> + >> + addr = first_iovec_segment(i, &len, start, maxsize, maxpages); >> + n = DIV_ROUND_UP(len, PAGE_SIZE); >> + res = pin_user_pages_fast(addr, n, gup_flags, pages); >> + if (unlikely(res <= 0)) >> + return res; >> + return (res == n ? len : res * PAGE_SIZE) - *start; > > Trying to be clever like that just makes the code a lot less readable. I > should not have to reason about a return value. Same in the other > function. > Here is a differential patch on top of this one, and only showing one of the two routines. How does this direction look to you? diff --git a/lib/iov_iter.c b/lib/iov_iter.c index e64e8e4edd0c..8e96f1e9ebc6 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1588,7 +1588,17 @@ ssize_t iov_iter_pin_pages(struct iov_iter *i, res = pin_user_pages_fast(addr, n, gup_flags, pages); if (unlikely(res <= 0)) return res; - return (res == n ? len : res * PAGE_SIZE) - *start; + + /* Cap len at the number of pages that were actually pinned: */ + if (res < n) + len = res * PAGE_SIZE; + + /* + * The return value is the amount pinned in bytes that the + * caller will actually use. So, reduce it by the offset into + * the first page: + */ + return len - *start; } return -EFAULT; thanks,
diff --git a/include/linux/uio.h b/include/linux/uio.h index 739285fe5a2f..208020c2b75a 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -236,6 +236,10 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages, size_t maxsize, unsigned maxpages, size_t *start); ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages, size_t maxsize, size_t *start); +ssize_t iov_iter_pin_pages(struct iov_iter *i, struct page **pages, + size_t maxsize, unsigned int maxpages, size_t *start); +ssize_t iov_iter_pin_pages_alloc(struct iov_iter *i, struct page ***pages, + size_t maxsize, size_t *start); int iov_iter_npages(const struct iov_iter *i, int maxpages); void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state); diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 6dd5330f7a99..e64e8e4edd0c 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1560,6 +1560,41 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, } EXPORT_SYMBOL(iov_iter_get_pages); +ssize_t iov_iter_pin_pages(struct iov_iter *i, + struct page **pages, size_t maxsize, unsigned int maxpages, + size_t *start) +{ + size_t len; + int n, res; + + if (maxsize > i->count) + maxsize = i->count; + if (!maxsize) + return 0; + + WARN_ON_ONCE(!iter_is_iovec(i)); + + if (likely(iter_is_iovec(i))) { + unsigned int gup_flags = 0; + unsigned long addr; + + if (iov_iter_rw(i) != WRITE) + gup_flags |= FOLL_WRITE; + if (i->nofault) + gup_flags |= FOLL_NOFAULT; + + addr = first_iovec_segment(i, &len, start, maxsize, maxpages); + n = DIV_ROUND_UP(len, PAGE_SIZE); + res = pin_user_pages_fast(addr, n, gup_flags, pages); + if (unlikely(res <= 0)) + return res; + return (res == n ? len : res * PAGE_SIZE) - *start; + } + + return -EFAULT; +} +EXPORT_SYMBOL(iov_iter_pin_pages); + static struct page **get_pages_array(size_t n) { return kvmalloc_array(n, sizeof(struct page *), GFP_KERNEL); @@ -1696,6 +1731,49 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, } EXPORT_SYMBOL(iov_iter_get_pages_alloc); +ssize_t iov_iter_pin_pages_alloc(struct iov_iter *i, + struct page ***pages, size_t maxsize, + size_t *start) +{ + struct page **p; + size_t len; + int n, res; + + if (maxsize > i->count) + maxsize = i->count; + if (!maxsize) + return 0; + + WARN_ON_ONCE(!iter_is_iovec(i)); + + if (likely(iter_is_iovec(i))) { + unsigned int gup_flags = 0; + unsigned long addr; + + if (iov_iter_rw(i) != WRITE) + gup_flags |= FOLL_WRITE; + if (i->nofault) + gup_flags |= FOLL_NOFAULT; + + addr = first_iovec_segment(i, &len, start, maxsize, ~0U); + n = DIV_ROUND_UP(len, PAGE_SIZE); + p = get_pages_array(n); + if (!p) + return -ENOMEM; + res = pin_user_pages_fast(addr, n, gup_flags, p); + if (unlikely(res <= 0)) { + kvfree(p); + *pages = NULL; + return res; + } + *pages = p; + return (res == n ? len : res * PAGE_SIZE) - *start; + } + + return -EFAULT; +} +EXPORT_SYMBOL(iov_iter_pin_pages_alloc); + size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i) {