diff mbox

[V4,1/2] ceph: Implement readv/preadv for sync operation.

Message ID 201309121325129235088@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

majianpeng Sept. 12, 2013, 5:25 a.m. UTC
For readv/preadv sync-operatoin, ceph only do the first iov.
It don't think other iovs.Now implement this.

V4:
	modify one bug.
V3: 
   modify some bug.
V2:
  -add generic_segment_checks
  -using struct iov_iter replace cloning the iovs.
  -return previous successfully copied if ceph_copy_page_vector_to_user
   met error.


Signed-off-by: Jianpeng Ma <majianpeng@gmail.com>

Reviewed-by: Yan, Zheng <zheng.z.yan@intel.com>

---
  fs/ceph/file.c | 157 ++++++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 106 insertions(+), 51 deletions(-)

-- 
1.8.1.2

Comments

Yan, Zheng Sept. 12, 2013, 5:38 a.m. UTC | #1
Reviewed-by: Yan, Zheng <zheng.z.yan@intel.com>

On 09/12/2013 01:25 PM, majianpeng wrote:
> For readv/preadv sync-operatoin, ceph only do the first iov.
> It don't think other iovs.Now implement this.
> 
> V4:
> 	modify one bug.
> V3: 
>    modify some bug.
> V2:
>   -add generic_segment_checks
>   -using struct iov_iter replace cloning the iovs.
>   -return previous successfully copied if ceph_copy_page_vector_to_user
>    met error.
> 
> 
> Signed-off-by: Jianpeng Ma <majianpeng@gmail.com>
> Reviewed-by: Yan, Zheng <zheng.z.yan@intel.com>
> ---
>   fs/ceph/file.c | 157 ++++++++++++++++++++++++++++++++++++++-------------------
>  1 file changed, 106 insertions(+), 51 deletions(-)
> 
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index 3de8982..bc7fa52 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -408,51 +408,94 @@ more:
>   *
>   * If the read spans object boundary, just do multiple reads.
>   */
> -static ssize_t ceph_sync_read(struct file *file, char __user *data,
> -			      unsigned len, loff_t *poff, int *checkeof)
> +static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i,
> +				int *checkeof)
>  {
> +	struct file *file = iocb->ki_filp;
>  	struct inode *inode = file_inode(file);
>  	struct page **pages;
> -	u64 off = *poff;
> +	u64 off = iocb->ki_pos;
>  	int num_pages, ret;
>  
> -	dout("sync_read on file %p %llu~%u %s\n", file, off, len,
> +	dout("sync_read on file %p %llu~%u %s\n", file, off,
> +	     (unsigned)iocb->ki_left,
>  	     (file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
> -
> -	if (file->f_flags & O_DIRECT) {
> -		num_pages = calc_pages_for((unsigned long)data, len);
> -		pages = ceph_get_direct_page_vector(data, num_pages, true);
> -	} else {
> -		num_pages = calc_pages_for(off, len);
> -		pages = ceph_alloc_page_vector(num_pages, GFP_NOFS);
> -	}
> -	if (IS_ERR(pages))
> -		return PTR_ERR(pages);
> -
>  	/*
>  	 * flush any page cache pages in this range.  this
>  	 * will make concurrent normal and sync io slow,
>  	 * but it will at least behave sensibly when they are
>  	 * in sequence.
>  	 */
> -	ret = filemap_write_and_wait(inode->i_mapping);
> +	ret = filemap_write_and_wait_range(inode->i_mapping, off,
> +						off + iocb->ki_left);
>  	if (ret < 0)
> -		goto done;
> -
> -	ret = striped_read(inode, off, len, pages, num_pages, checkeof,
> -			   file->f_flags & O_DIRECT,
> -			   (unsigned long)data & ~PAGE_MASK);
> +		return ret;
>  
> -	if (ret >= 0 && (file->f_flags & O_DIRECT) == 0)
> -		ret = ceph_copy_page_vector_to_user(pages, data, off, ret);
> -	if (ret >= 0)
> -		*poff = off + ret;
> +	if (file->f_flags & O_DIRECT) {
> +		while (iov_iter_count(i)) {
> +			void __user *data = i->iov[0].iov_base + i->iov_offset;
> +			size_t len = i->iov[0].iov_len - i->iov_offset;
> +
> +			num_pages = calc_pages_for((unsigned long)data, len);
> +			pages = ceph_get_direct_page_vector(data,
> +							    num_pages, true);
> +			if (IS_ERR(pages))
> +				return PTR_ERR(pages);
> +
> +			ret = striped_read(inode, off, len,
> +					   pages, num_pages, checkeof,
> +					   1, (unsigned long)data & ~PAGE_MASK);
> +			ceph_put_page_vector(pages, num_pages, true);
> +
> +			if (ret <= 0)
> +				break;
> +			off += ret;
> +			iov_iter_advance(i, ret);
> +			if (ret < len)
> +				break;
> +		}
> +	} else {
> +		size_t len = iocb->ki_left;
>  
> -done:
> -	if (file->f_flags & O_DIRECT)
> -		ceph_put_page_vector(pages, num_pages, true);
> -	else
> +		num_pages = calc_pages_for(off, len);
> +		pages = ceph_alloc_page_vector(num_pages, GFP_NOFS);
> +		if (IS_ERR(pages))
> +			return PTR_ERR(pages);
> +		ret = striped_read(inode, off, len, pages,
> +					num_pages, checkeof, 0, 0);
> +		if (ret > 0) {
> +			int l, k = 0;
> +			size_t left = len = ret;
> +
> +			while (left) {
> +				void __user *data = i->iov[0].iov_base
> +							+ i->iov_offset;
> +				l = min(i->iov[0].iov_len - i->iov_offset,
> +					left);
> +
> +				ret = ceph_copy_page_vector_to_user(&pages[k],
> +								    data, off,
> +								    l);
> +				if (ret > 0) {
> +					iov_iter_advance(i, ret);
> +					left -= ret;
> +					off += ret;
> +					k = calc_pages_for(iocb->ki_pos,
> +							   len - left + 1) - 1;
> +					BUG_ON(k >= num_pages && left);
> +				} else
> +					break;
> +			}
> +		}
>  		ceph_release_page_vector(pages, num_pages);
> +	}
> +
> +	if (off > iocb->ki_pos) {
> +		ret = off - iocb->ki_pos;
> +		iocb->ki_pos = off;
> +		iocb->ki_left -= ret;
> +	}
> +
>  	dout("sync_read result %d\n", ret);
>  	return ret;
>  }
> @@ -647,55 +690,67 @@ static ssize_t ceph_aio_read(struct kiocb *iocb, const struct iovec *iov,
>  {
>  	struct file *filp = iocb->ki_filp;
>  	struct ceph_file_info *fi = filp->private_data;
> -	loff_t *ppos = &iocb->ki_pos;
> -	size_t len = iov->iov_len;
> +	size_t len = 0;
>  	struct inode *inode = file_inode(filp);
>  	struct ceph_inode_info *ci = ceph_inode(inode);
> -	void __user *base = iov->iov_base;
>  	ssize_t ret;
>  	int want, got = 0;
>  	int checkeof = 0, read = 0;
>  
>  	dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n",
>  	     inode, ceph_vinop(inode), pos, (unsigned)len, inode);
> -again:
> +
> +	ret = generic_segment_checks(iov, &nr_segs, &len, VERIFY_WRITE);
> +	if (ret)
> +		return ret;
> +
>  	if (fi->fmode & CEPH_FILE_MODE_LAZY)
>  		want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
>  	else
>  		want = CEPH_CAP_FILE_CACHE;
>  	ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, &got, -1);
>  	if (ret < 0)
> -		goto out;
> +		return ret;
> +
>  	dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n",
>  	     inode, ceph_vinop(inode), pos, (unsigned)len,
>  	     ceph_cap_string(got));
>  
>  	if ((got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0 ||
>  	    (iocb->ki_filp->f_flags & O_DIRECT) ||
> -	    (fi->flags & CEPH_F_SYNC))
> +	    (fi->flags & CEPH_F_SYNC)) {
> +		struct iov_iter i;
> +
> +		iocb->ki_left = len;
> +		iov_iter_init(&i, iov, nr_segs, len, 0);
> +again:
>  		/* hmm, this isn't really async... */
> -		ret = ceph_sync_read(filp, base, len, ppos, &checkeof);
> -	else
> +		ret = ceph_sync_read(iocb, &i, &checkeof);
> +
> +		if (checkeof && ret >= 0) {
> +			int statret = ceph_do_getattr(inode,
> +						      CEPH_STAT_CAP_SIZE);
> +
> +			/* hit EOF or hole? */
> +			if (statret == 0 && iocb->ki_pos < inode->i_size &&
> +				iocb->ki_left) {
> +				dout("sync_read hit hole, ppos %lld < size %lld"
> +				     ", reading more\n", iocb->ki_pos,
> +				     inode->i_size);
> +
> +				read += ret;
> +				checkeof = 0;
> +				goto again;
> +			}
> +		}
> +
> +	} else
>  		ret = generic_file_aio_read(iocb, iov, nr_segs, pos);
>  
> -out:
>  	dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n",
>  	     inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret);
>  	ceph_put_cap_refs(ci, got);
>  
> -	if (checkeof && ret >= 0) {
> -		int statret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE);
> -
> -		/* hit EOF or hole? */
> -		if (statret == 0 && *ppos < inode->i_size) {
> -			dout("aio_read sync_read hit hole, ppos %lld < size %lld, reading more\n", *ppos, inode->i_size);
> -			read += ret;
> -			base += ret;
> -			len -= ret;
> -			checkeof = 0;
> -			goto again;
> -		}
> -	}
>  	if (ret >= 0)
>  		ret += read;
>  
> 

--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yan, Zheng Sept. 22, 2013, 2:04 a.m. UTC | #2
On Thu, Sep 12, 2013 at 1:25 PM, majianpeng <majianpeng@gmail.com> wrote:
> For readv/preadv sync-operatoin, ceph only do the first iov.
> It don't think other iovs.Now implement this.
>
> V4:
>         modify one bug.
> V3:
>    modify some bug.
> V2:
>   -add generic_segment_checks
>   -using struct iov_iter replace cloning the iovs.
>   -return previous successfully copied if ceph_copy_page_vector_to_user
>    met error.
>
>
> Signed-off-by: Jianpeng Ma <majianpeng@gmail.com>
> Reviewed-by: Yan, Zheng <zheng.z.yan@intel.com>
> ---
>   fs/ceph/file.c | 157 ++++++++++++++++++++++++++++++++++++++-------------------
>  1 file changed, 106 insertions(+), 51 deletions(-)
>
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index 3de8982..bc7fa52 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -408,51 +408,94 @@ more:
>   *
>   * If the read spans object boundary, just do multiple reads.
>   */
> -static ssize_t ceph_sync_read(struct file *file, char __user *data,
> -                             unsigned len, loff_t *poff, int *checkeof)
> +static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i,
> +                               int *checkeof)
>  {
> +       struct file *file = iocb->ki_filp;
>         struct inode *inode = file_inode(file);
>         struct page **pages;
> -       u64 off = *poff;
> +       u64 off = iocb->ki_pos;
>         int num_pages, ret;
>
> -       dout("sync_read on file %p %llu~%u %s\n", file, off, len,
> +       dout("sync_read on file %p %llu~%u %s\n", file, off,
> +            (unsigned)iocb->ki_left,
>              (file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
> -
> -       if (file->f_flags & O_DIRECT) {
> -               num_pages = calc_pages_for((unsigned long)data, len);
> -               pages = ceph_get_direct_page_vector(data, num_pages, true);
> -       } else {
> -               num_pages = calc_pages_for(off, len);
> -               pages = ceph_alloc_page_vector(num_pages, GFP_NOFS);
> -       }
> -       if (IS_ERR(pages))
> -               return PTR_ERR(pages);
> -
>         /*
>          * flush any page cache pages in this range.  this
>          * will make concurrent normal and sync io slow,
>          * but it will at least behave sensibly when they are
>          * in sequence.
>          */
> -       ret = filemap_write_and_wait(inode->i_mapping);
> +       ret = filemap_write_and_wait_range(inode->i_mapping, off,
> +                                               off + iocb->ki_left);
>         if (ret < 0)
> -               goto done;
> -
> -       ret = striped_read(inode, off, len, pages, num_pages, checkeof,
> -                          file->f_flags & O_DIRECT,
> -                          (unsigned long)data & ~PAGE_MASK);
> +               return ret;
>
> -       if (ret >= 0 && (file->f_flags & O_DIRECT) == 0)
> -               ret = ceph_copy_page_vector_to_user(pages, data, off, ret);
> -       if (ret >= 0)
> -               *poff = off + ret;
> +       if (file->f_flags & O_DIRECT) {
> +               while (iov_iter_count(i)) {
> +                       void __user *data = i->iov[0].iov_base + i->iov_offset;
> +                       size_t len = i->iov[0].iov_len - i->iov_offset;
> +
> +                       num_pages = calc_pages_for((unsigned long)data, len);
> +                       pages = ceph_get_direct_page_vector(data,
> +                                                           num_pages, true);
> +                       if (IS_ERR(pages))
> +                               return PTR_ERR(pages);
> +
> +                       ret = striped_read(inode, off, len,
> +                                          pages, num_pages, checkeof,
> +                                          1, (unsigned long)data & ~PAGE_MASK);
> +                       ceph_put_page_vector(pages, num_pages, true);
> +
> +                       if (ret <= 0)
> +                               break;
> +                       off += ret;
> +                       iov_iter_advance(i, ret);
> +                       if (ret < len)
> +                               break;
> +               }
> +       } else {
> +               size_t len = iocb->ki_left;
>
> -done:
> -       if (file->f_flags & O_DIRECT)
> -               ceph_put_page_vector(pages, num_pages, true);
> -       else
> +               num_pages = calc_pages_for(off, len);
> +               pages = ceph_alloc_page_vector(num_pages, GFP_NOFS);
> +               if (IS_ERR(pages))
> +                       return PTR_ERR(pages);
> +               ret = striped_read(inode, off, len, pages,
> +                                       num_pages, checkeof, 0, 0);
> +               if (ret > 0) {
> +                       int l, k = 0;
> +                       size_t left = len = ret;
> +
> +                       while (left) {
> +                               void __user *data = i->iov[0].iov_base
> +                                                       + i->iov_offset;
> +                               l = min(i->iov[0].iov_len - i->iov_offset,
> +                                       left);
> +
> +                               ret = ceph_copy_page_vector_to_user(&pages[k],
> +                                                                   data, off,
> +                                                                   l);
> +                               if (ret > 0) {
> +                                       iov_iter_advance(i, ret);
> +                                       left -= ret;
> +                                       off += ret;
> +                                       k = calc_pages_for(iocb->ki_pos,
> +                                                          len - left + 1) - 1;
> +                                       BUG_ON(k >= num_pages && left);
> +                               } else
> +                                       break;
> +                       }
> +               }
>                 ceph_release_page_vector(pages, num_pages);
> +       }
> +
> +       if (off > iocb->ki_pos) {
> +               ret = off - iocb->ki_pos;
> +               iocb->ki_pos = off;
> +               iocb->ki_left -= ret;
> +       }
> +
>         dout("sync_read result %d\n", ret);
>         return ret;
>  }
> @@ -647,55 +690,67 @@ static ssize_t ceph_aio_read(struct kiocb *iocb, const struct iovec *iov,
>  {
>         struct file *filp = iocb->ki_filp;
>         struct ceph_file_info *fi = filp->private_data;
> -       loff_t *ppos = &iocb->ki_pos;
> -       size_t len = iov->iov_len;
> +       size_t len = 0;
>         struct inode *inode = file_inode(filp);
>         struct ceph_inode_info *ci = ceph_inode(inode);
> -       void __user *base = iov->iov_base;
>         ssize_t ret;
>         int want, got = 0;
>         int checkeof = 0, read = 0;
>
>         dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n",
>              inode, ceph_vinop(inode), pos, (unsigned)len, inode);
> -again:
> +
> +       ret = generic_segment_checks(iov, &nr_segs, &len, VERIFY_WRITE);
> +       if (ret)
> +               return ret;
> +
>         if (fi->fmode & CEPH_FILE_MODE_LAZY)
>                 want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
>         else
>                 want = CEPH_CAP_FILE_CACHE;
>         ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, &got, -1);
>         if (ret < 0)
> -               goto out;
> +               return ret;
> +
>         dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n",
>              inode, ceph_vinop(inode), pos, (unsigned)len,
>              ceph_cap_string(got));
>
>         if ((got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0 ||
>             (iocb->ki_filp->f_flags & O_DIRECT) ||
> -           (fi->flags & CEPH_F_SYNC))
> +           (fi->flags & CEPH_F_SYNC)) {
> +               struct iov_iter i;
> +
> +               iocb->ki_left = len;
> +               iov_iter_init(&i, iov, nr_segs, len, 0);
> +again:
>                 /* hmm, this isn't really async... */
> -               ret = ceph_sync_read(filp, base, len, ppos, &checkeof);
> -       else
> +               ret = ceph_sync_read(iocb, &i, &checkeof);
> +
> +               if (checkeof && ret >= 0) {
> +                       int statret = ceph_do_getattr(inode,
> +                                                     CEPH_STAT_CAP_SIZE);

It's wrong to move getattr to here. because getattr while holding Fr
cap can cause hang.

Regards
Yan, Zheng

> +
> +                       /* hit EOF or hole? */
> +                       if (statret == 0 && iocb->ki_pos < inode->i_size &&
> +                               iocb->ki_left) {
> +                               dout("sync_read hit hole, ppos %lld < size %lld"
> +                                    ", reading more\n", iocb->ki_pos,
> +                                    inode->i_size);
> +
> +                               read += ret;
> +                               checkeof = 0;
> +                               goto again;
> +                       }
> +               }
> +
> +       } else
>                 ret = generic_file_aio_read(iocb, iov, nr_segs, pos);
>
> -out:
>         dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n",
>              inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret);
>         ceph_put_cap_refs(ci, got);
>
> -       if (checkeof && ret >= 0) {
> -               int statret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE);
> -
> -               /* hit EOF or hole? */
> -               if (statret == 0 && *ppos < inode->i_size) {
> -                       dout("aio_read sync_read hit hole, ppos %lld < size %lld, reading more\n", *ppos, inode->i_size);
> -                       read += ret;
> -                       base += ret;
> -                       len -= ret;
> -                       checkeof = 0;
> -                       goto again;
> -               }
> -       }
>         if (ret >= 0)
>                 ret += read;
>
> --
> 1.8.1.2
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
majianpeng Sept. 22, 2013, 3 a.m. UTC | #3
Pk9uIFRodSwgU2VwIDEyLCAyMDEzIGF0IDE6MjUgUE0sIG1hamlhbnBlbmcgPG1hamlhbnBlbmdA
Z21haWwuY29tPiB3cm90ZToNCj4+IEZvciByZWFkdi9wcmVhZHYgc3luYy1vcGVyYXRvaW4sIGNl
cGggb25seSBkbyB0aGUgZmlyc3QgaW92Lg0KPj4gSXQgZG9uJ3QgdGhpbmsgb3RoZXIgaW92cy5O
b3cgaW1wbGVtZW50IHRoaXMuDQo+Pg0KPj4gVjQ6DQo+PiAgICAgICAgIG1vZGlmeSBvbmUgYnVn
Lg0KPj4gVjM6DQo+PiAgICBtb2RpZnkgc29tZSBidWcuDQo+PiBWMjoNCj4+ICAgLWFkZCBnZW5l
cmljX3NlZ21lbnRfY2hlY2tzDQo+PiAgIC11c2luZyBzdHJ1Y3QgaW92X2l0ZXIgcmVwbGFjZSBj
bG9uaW5nIHRoZSBpb3ZzLg0KPj4gICAtcmV0dXJuIHByZXZpb3VzIHN1Y2Nlc3NmdWxseSBjb3Bp
ZWQgaWYgY2VwaF9jb3B5X3BhZ2VfdmVjdG9yX3RvX3VzZXINCj4+ICAgIG1ldCBlcnJvci4NCj4+
DQo+Pg0KPj4gU2lnbmVkLW9mZi1ieTogSmlhbnBlbmcgTWEgPG1hamlhbnBlbmdAZ21haWwuY29t
Pg0KPj4gUmV2aWV3ZWQtYnk6IFlhbiwgWmhlbmcgPHpoZW5nLnoueWFuQGludGVsLmNvbT4NCj4+
IC0tLQ0KPj4gICBmcy9jZXBoL2ZpbGUuYyB8IDE1NyArKysrKysrKysrKysrKysrKysrKysrKysr
KysrKysrKysrKysrKy0tLS0tLS0tLS0tLS0tLS0tLS0NCj4+ICAxIGZpbGUgY2hhbmdlZCwgMTA2
IGluc2VydGlvbnMoKyksIDUxIGRlbGV0aW9ucygtKQ0KPj4NCj4+IGRpZmYgLS1naXQgYS9mcy9j
ZXBoL2ZpbGUuYyBiL2ZzL2NlcGgvZmlsZS5jDQo+PiBpbmRleCAzZGU4OTgyLi5iYzdmYTUyIDEw
MDY0NA0KPj4gLS0tIGEvZnMvY2VwaC9maWxlLmMNCj4+ICsrKyBiL2ZzL2NlcGgvZmlsZS5jDQo+
PiBAQCAtNDA4LDUxICs0MDgsOTQgQEAgbW9yZToNCj4+ICAgKg0KPj4gICAqIElmIHRoZSByZWFk
IHNwYW5zIG9iamVjdCBib3VuZGFyeSwganVzdCBkbyBtdWx0aXBsZSByZWFkcy4NCj4+ICAgKi8N
Cj4+IC1zdGF0aWMgc3NpemVfdCBjZXBoX3N5bmNfcmVhZChzdHJ1Y3QgZmlsZSAqZmlsZSwgY2hh
ciBfX3VzZXIgKmRhdGEsDQo+PiAtICAgICAgICAgICAgICAgICAgICAgICAgICAgICB1bnNpZ25l
ZCBsZW4sIGxvZmZfdCAqcG9mZiwgaW50ICpjaGVja2VvZikNCj4+ICtzdGF0aWMgc3NpemVfdCBj
ZXBoX3N5bmNfcmVhZChzdHJ1Y3Qga2lvY2IgKmlvY2IsIHN0cnVjdCBpb3ZfaXRlciAqaSwNCj4+
ICsgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgaW50ICpjaGVja2VvZikNCj4+ICB7DQo+
PiArICAgICAgIHN0cnVjdCBmaWxlICpmaWxlID0gaW9jYi0+a2lfZmlscDsNCj4+ICAgICAgICAg
c3RydWN0IGlub2RlICppbm9kZSA9IGZpbGVfaW5vZGUoZmlsZSk7DQo+PiAgICAgICAgIHN0cnVj
dCBwYWdlICoqcGFnZXM7DQo+PiAtICAgICAgIHU2NCBvZmYgPSAqcG9mZjsNCj4+ICsgICAgICAg
dTY0IG9mZiA9IGlvY2ItPmtpX3BvczsNCj4+ICAgICAgICAgaW50IG51bV9wYWdlcywgcmV0Ow0K
Pj4NCj4+IC0gICAgICAgZG91dCgic3luY19yZWFkIG9uIGZpbGUgJXAgJWxsdX4ldSAlc1xuIiwg
ZmlsZSwgb2ZmLCBsZW4sDQo+PiArICAgICAgIGRvdXQoInN5bmNfcmVhZCBvbiBmaWxlICVwICVs
bHV+JXUgJXNcbiIsIGZpbGUsIG9mZiwNCj4+ICsgICAgICAgICAgICAodW5zaWduZWQpaW9jYi0+
a2lfbGVmdCwNCj4+ICAgICAgICAgICAgICAoZmlsZS0+Zl9mbGFncyAmIE9fRElSRUNUKSA/ICJP
X0RJUkVDVCIgOiAiIik7DQo+PiAtDQo+PiAtICAgICAgIGlmIChmaWxlLT5mX2ZsYWdzICYgT19E
SVJFQ1QpIHsNCj4+IC0gICAgICAgICAgICAgICBudW1fcGFnZXMgPSBjYWxjX3BhZ2VzX2Zvcigo
dW5zaWduZWQgbG9uZylkYXRhLCBsZW4pOw0KPj4gLSAgICAgICAgICAgICAgIHBhZ2VzID0gY2Vw
aF9nZXRfZGlyZWN0X3BhZ2VfdmVjdG9yKGRhdGEsIG51bV9wYWdlcywgdHJ1ZSk7DQo+PiAtICAg
ICAgIH0gZWxzZSB7DQo+PiAtICAgICAgICAgICAgICAgbnVtX3BhZ2VzID0gY2FsY19wYWdlc19m
b3Iob2ZmLCBsZW4pOw0KPj4gLSAgICAgICAgICAgICAgIHBhZ2VzID0gY2VwaF9hbGxvY19wYWdl
X3ZlY3RvcihudW1fcGFnZXMsIEdGUF9OT0ZTKTsNCj4+IC0gICAgICAgfQ0KPj4gLSAgICAgICBp
ZiAoSVNfRVJSKHBhZ2VzKSkNCj4+IC0gICAgICAgICAgICAgICByZXR1cm4gUFRSX0VSUihwYWdl
cyk7DQo+PiAtDQo+PiAgICAgICAgIC8qDQo+PiAgICAgICAgICAqIGZsdXNoIGFueSBwYWdlIGNh
Y2hlIHBhZ2VzIGluIHRoaXMgcmFuZ2UuICB0aGlzDQo+PiAgICAgICAgICAqIHdpbGwgbWFrZSBj
b25jdXJyZW50IG5vcm1hbCBhbmQgc3luYyBpbyBzbG93LA0KPj4gICAgICAgICAgKiBidXQgaXQg
d2lsbCBhdCBsZWFzdCBiZWhhdmUgc2Vuc2libHkgd2hlbiB0aGV5IGFyZQ0KPj4gICAgICAgICAg
KiBpbiBzZXF1ZW5jZS4NCj4+ICAgICAgICAgICovDQo+PiAtICAgICAgIHJldCA9IGZpbGVtYXBf
d3JpdGVfYW5kX3dhaXQoaW5vZGUtPmlfbWFwcGluZyk7DQo+PiArICAgICAgIHJldCA9IGZpbGVt
YXBfd3JpdGVfYW5kX3dhaXRfcmFuZ2UoaW5vZGUtPmlfbWFwcGluZywgb2ZmLA0KPj4gKyAgICAg
ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgb2ZmICsgaW9jYi0+a2lf
bGVmdCk7DQo+PiAgICAgICAgIGlmIChyZXQgPCAwKQ0KPj4gLSAgICAgICAgICAgICAgIGdvdG8g
ZG9uZTsNCj4+IC0NCj4+IC0gICAgICAgcmV0ID0gc3RyaXBlZF9yZWFkKGlub2RlLCBvZmYsIGxl
biwgcGFnZXMsIG51bV9wYWdlcywgY2hlY2tlb2YsDQo+PiAtICAgICAgICAgICAgICAgICAgICAg
ICAgICBmaWxlLT5mX2ZsYWdzICYgT19ESVJFQ1QsDQo+PiAtICAgICAgICAgICAgICAgICAgICAg
ICAgICAodW5zaWduZWQgbG9uZylkYXRhICYgflBBR0VfTUFTSyk7DQo+PiArICAgICAgICAgICAg
ICAgcmV0dXJuIHJldDsNCj4+DQo+PiAtICAgICAgIGlmIChyZXQgPj0gMCAmJiAoZmlsZS0+Zl9m
bGFncyAmIE9fRElSRUNUKSA9PSAwKQ0KPj4gLSAgICAgICAgICAgICAgIHJldCA9IGNlcGhfY29w
eV9wYWdlX3ZlY3Rvcl90b191c2VyKHBhZ2VzLCBkYXRhLCBvZmYsIHJldCk7DQo+PiAtICAgICAg
IGlmIChyZXQgPj0gMCkNCj4+IC0gICAgICAgICAgICAgICAqcG9mZiA9IG9mZiArIHJldDsNCj4+
ICsgICAgICAgaWYgKGZpbGUtPmZfZmxhZ3MgJiBPX0RJUkVDVCkgew0KPj4gKyAgICAgICAgICAg
ICAgIHdoaWxlIChpb3ZfaXRlcl9jb3VudChpKSkgew0KPj4gKyAgICAgICAgICAgICAgICAgICAg
ICAgdm9pZCBfX3VzZXIgKmRhdGEgPSBpLT5pb3ZbMF0uaW92X2Jhc2UgKyBpLT5pb3Zfb2Zmc2V0
Ow0KPj4gKyAgICAgICAgICAgICAgICAgICAgICAgc2l6ZV90IGxlbiA9IGktPmlvdlswXS5pb3Zf
bGVuIC0gaS0+aW92X29mZnNldDsNCj4+ICsNCj4+ICsgICAgICAgICAgICAgICAgICAgICAgIG51
bV9wYWdlcyA9IGNhbGNfcGFnZXNfZm9yKCh1bnNpZ25lZCBsb25nKWRhdGEsIGxlbik7DQo+PiAr
ICAgICAgICAgICAgICAgICAgICAgICBwYWdlcyA9IGNlcGhfZ2V0X2RpcmVjdF9wYWdlX3ZlY3Rv
cihkYXRhLA0KPj4gKyAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg
ICAgICAgICAgICAgICAgbnVtX3BhZ2VzLCB0cnVlKTsNCj4+ICsgICAgICAgICAgICAgICAgICAg
ICAgIGlmIChJU19FUlIocGFnZXMpKQ0KPj4gKyAgICAgICAgICAgICAgICAgICAgICAgICAgICAg
ICByZXR1cm4gUFRSX0VSUihwYWdlcyk7DQo+PiArDQo+PiArICAgICAgICAgICAgICAgICAgICAg
ICByZXQgPSBzdHJpcGVkX3JlYWQoaW5vZGUsIG9mZiwgbGVuLA0KPj4gKyAgICAgICAgICAgICAg
ICAgICAgICAgICAgICAgICAgICAgICAgICAgIHBhZ2VzLCBudW1fcGFnZXMsIGNoZWNrZW9mLA0K
Pj4gKyAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIDEsICh1bnNpZ25l
ZCBsb25nKWRhdGEgJiB+UEFHRV9NQVNLKTsNCj4+ICsgICAgICAgICAgICAgICAgICAgICAgIGNl
cGhfcHV0X3BhZ2VfdmVjdG9yKHBhZ2VzLCBudW1fcGFnZXMsIHRydWUpOw0KPj4gKw0KPj4gKyAg
ICAgICAgICAgICAgICAgICAgICAgaWYgKHJldCA8PSAwKQ0KPj4gKyAgICAgICAgICAgICAgICAg
ICAgICAgICAgICAgICBicmVhazsNCj4+ICsgICAgICAgICAgICAgICAgICAgICAgIG9mZiArPSBy
ZXQ7DQo+PiArICAgICAgICAgICAgICAgICAgICAgICBpb3ZfaXRlcl9hZHZhbmNlKGksIHJldCk7
DQo+PiArICAgICAgICAgICAgICAgICAgICAgICBpZiAocmV0IDwgbGVuKQ0KPj4gKyAgICAgICAg
ICAgICAgICAgICAgICAgICAgICAgICBicmVhazsNCj4+ICsgICAgICAgICAgICAgICB9DQo+PiAr
ICAgICAgIH0gZWxzZSB7DQo+PiArICAgICAgICAgICAgICAgc2l6ZV90IGxlbiA9IGlvY2ItPmtp
X2xlZnQ7DQo+Pg0KPj4gLWRvbmU6DQo+PiAtICAgICAgIGlmIChmaWxlLT5mX2ZsYWdzICYgT19E
SVJFQ1QpDQo+PiAtICAgICAgICAgICAgICAgY2VwaF9wdXRfcGFnZV92ZWN0b3IocGFnZXMsIG51
bV9wYWdlcywgdHJ1ZSk7DQo+PiAtICAgICAgIGVsc2UNCj4+ICsgICAgICAgICAgICAgICBudW1f
cGFnZXMgPSBjYWxjX3BhZ2VzX2ZvcihvZmYsIGxlbik7DQo+PiArICAgICAgICAgICAgICAgcGFn
ZXMgPSBjZXBoX2FsbG9jX3BhZ2VfdmVjdG9yKG51bV9wYWdlcywgR0ZQX05PRlMpOw0KPj4gKyAg
ICAgICAgICAgICAgIGlmIChJU19FUlIocGFnZXMpKQ0KPj4gKyAgICAgICAgICAgICAgICAgICAg
ICAgcmV0dXJuIFBUUl9FUlIocGFnZXMpOw0KPj4gKyAgICAgICAgICAgICAgIHJldCA9IHN0cmlw
ZWRfcmVhZChpbm9kZSwgb2ZmLCBsZW4sIHBhZ2VzLA0KPj4gKyAgICAgICAgICAgICAgICAgICAg
ICAgICAgICAgICAgICAgICAgIG51bV9wYWdlcywgY2hlY2tlb2YsIDAsIDApOw0KPj4gKyAgICAg
ICAgICAgICAgIGlmIChyZXQgPiAwKSB7DQo+PiArICAgICAgICAgICAgICAgICAgICAgICBpbnQg
bCwgayA9IDA7DQo+PiArICAgICAgICAgICAgICAgICAgICAgICBzaXplX3QgbGVmdCA9IGxlbiA9
IHJldDsNCj4+ICsNCj4+ICsgICAgICAgICAgICAgICAgICAgICAgIHdoaWxlIChsZWZ0KSB7DQo+
PiArICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHZvaWQgX191c2VyICpkYXRhID0gaS0+
aW92WzBdLmlvdl9iYXNlDQo+PiArICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg
ICAgICAgICAgICAgICAgICAgICsgaS0+aW92X29mZnNldDsNCj4+ICsgICAgICAgICAgICAgICAg
ICAgICAgICAgICAgICAgbCA9IG1pbihpLT5pb3ZbMF0uaW92X2xlbiAtIGktPmlvdl9vZmZzZXQs
DQo+PiArICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgbGVmdCk7DQo+PiAr
DQo+PiArICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHJldCA9IGNlcGhfY29weV9wYWdl
X3ZlY3Rvcl90b191c2VyKCZwYWdlc1trXSwNCj4+ICsgICAgICAgICAgICAgICAgICAgICAgICAg
ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgZGF0YSwgb2ZmLA0KPj4g
KyAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg
ICAgICAgICAgICBsKTsNCj4+ICsgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgaWYgKHJl
dCA+IDApIHsNCj4+ICsgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBpb3Zf
aXRlcl9hZHZhbmNlKGksIHJldCk7DQo+PiArICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg
ICAgICAgICAgbGVmdCAtPSByZXQ7DQo+PiArICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg
ICAgICAgICAgb2ZmICs9IHJldDsNCj4+ICsgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg
ICAgICAgICBrID0gY2FsY19wYWdlc19mb3IoaW9jYi0+a2lfcG9zLA0KPj4gKyAgICAgICAgICAg
ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBsZW4gLSBsZWZ0
ICsgMSkgLSAxOw0KPj4gKyAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIEJV
R19PTihrID49IG51bV9wYWdlcyAmJiBsZWZ0KTsNCj4+ICsgICAgICAgICAgICAgICAgICAgICAg
ICAgICAgICAgfSBlbHNlDQo+PiArICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg
ICAgYnJlYWs7DQo+PiArICAgICAgICAgICAgICAgICAgICAgICB9DQo+PiArICAgICAgICAgICAg
ICAgfQ0KPj4gICAgICAgICAgICAgICAgIGNlcGhfcmVsZWFzZV9wYWdlX3ZlY3RvcihwYWdlcywg
bnVtX3BhZ2VzKTsNCj4+ICsgICAgICAgfQ0KPj4gKw0KPj4gKyAgICAgICBpZiAob2ZmID4gaW9j
Yi0+a2lfcG9zKSB7DQo+PiArICAgICAgICAgICAgICAgcmV0ID0gb2ZmIC0gaW9jYi0+a2lfcG9z
Ow0KPj4gKyAgICAgICAgICAgICAgIGlvY2ItPmtpX3BvcyA9IG9mZjsNCj4+ICsgICAgICAgICAg
ICAgICBpb2NiLT5raV9sZWZ0IC09IHJldDsNCj4+ICsgICAgICAgfQ0KPj4gKw0KPj4gICAgICAg
ICBkb3V0KCJzeW5jX3JlYWQgcmVzdWx0ICVkXG4iLCByZXQpOw0KPj4gICAgICAgICByZXR1cm4g
cmV0Ow0KPj4gIH0NCj4+IEBAIC02NDcsNTUgKzY5MCw2NyBAQCBzdGF0aWMgc3NpemVfdCBjZXBo
X2Fpb19yZWFkKHN0cnVjdCBraW9jYiAqaW9jYiwgY29uc3Qgc3RydWN0IGlvdmVjICppb3YsDQo+
PiAgew0KPj4gICAgICAgICBzdHJ1Y3QgZmlsZSAqZmlscCA9IGlvY2ItPmtpX2ZpbHA7DQo+PiAg
ICAgICAgIHN0cnVjdCBjZXBoX2ZpbGVfaW5mbyAqZmkgPSBmaWxwLT5wcml2YXRlX2RhdGE7DQo+
PiAtICAgICAgIGxvZmZfdCAqcHBvcyA9ICZpb2NiLT5raV9wb3M7DQo+PiAtICAgICAgIHNpemVf
dCBsZW4gPSBpb3YtPmlvdl9sZW47DQo+PiArICAgICAgIHNpemVfdCBsZW4gPSAwOw0KPj4gICAg
ICAgICBzdHJ1Y3QgaW5vZGUgKmlub2RlID0gZmlsZV9pbm9kZShmaWxwKTsNCj4+ICAgICAgICAg
c3RydWN0IGNlcGhfaW5vZGVfaW5mbyAqY2kgPSBjZXBoX2lub2RlKGlub2RlKTsNCj4+IC0gICAg
ICAgdm9pZCBfX3VzZXIgKmJhc2UgPSBpb3YtPmlvdl9iYXNlOw0KPj4gICAgICAgICBzc2l6ZV90
IHJldDsNCj4+ICAgICAgICAgaW50IHdhbnQsIGdvdCA9IDA7DQo+PiAgICAgICAgIGludCBjaGVj
a2VvZiA9IDAsIHJlYWQgPSAwOw0KPj4NCj4+ICAgICAgICAgZG91dCgiYWlvX3JlYWQgJXAgJWxs
eC4lbGx4ICVsbHV+JXUgdHJ5aW5nIHRvIGdldCBjYXBzIG9uICVwXG4iLA0KPj4gICAgICAgICAg
ICAgIGlub2RlLCBjZXBoX3Zpbm9wKGlub2RlKSwgcG9zLCAodW5zaWduZWQpbGVuLCBpbm9kZSk7
DQo+PiAtYWdhaW46DQo+PiArDQo+PiArICAgICAgIHJldCA9IGdlbmVyaWNfc2VnbWVudF9jaGVj
a3MoaW92LCAmbnJfc2VncywgJmxlbiwgVkVSSUZZX1dSSVRFKTsNCj4+ICsgICAgICAgaWYgKHJl
dCkNCj4+ICsgICAgICAgICAgICAgICByZXR1cm4gcmV0Ow0KPj4gKw0KPj4gICAgICAgICBpZiAo
ZmktPmZtb2RlICYgQ0VQSF9GSUxFX01PREVfTEFaWSkNCj4+ICAgICAgICAgICAgICAgICB3YW50
ID0gQ0VQSF9DQVBfRklMRV9DQUNIRSB8IENFUEhfQ0FQX0ZJTEVfTEFaWUlPOw0KPj4gICAgICAg
ICBlbHNlDQo+PiAgICAgICAgICAgICAgICAgd2FudCA9IENFUEhfQ0FQX0ZJTEVfQ0FDSEU7DQo+
PiAgICAgICAgIHJldCA9IGNlcGhfZ2V0X2NhcHMoY2ksIENFUEhfQ0FQX0ZJTEVfUkQsIHdhbnQs
ICZnb3QsIC0xKTsNCj4+ICAgICAgICAgaWYgKHJldCA8IDApDQo+PiAtICAgICAgICAgICAgICAg
Z290byBvdXQ7DQo+PiArICAgICAgICAgICAgICAgcmV0dXJuIHJldDsNCj4+ICsNCj4+ICAgICAg
ICAgZG91dCgiYWlvX3JlYWQgJXAgJWxseC4lbGx4ICVsbHV+JXUgZ290IGNhcCByZWZzIG9uICVz
XG4iLA0KPj4gICAgICAgICAgICAgIGlub2RlLCBjZXBoX3Zpbm9wKGlub2RlKSwgcG9zLCAodW5z
aWduZWQpbGVuLA0KPj4gICAgICAgICAgICAgIGNlcGhfY2FwX3N0cmluZyhnb3QpKTsNCj4+DQo+
PiAgICAgICAgIGlmICgoZ290ICYgKENFUEhfQ0FQX0ZJTEVfQ0FDSEV8Q0VQSF9DQVBfRklMRV9M
QVpZSU8pKSA9PSAwIHx8DQo+PiAgICAgICAgICAgICAoaW9jYi0+a2lfZmlscC0+Zl9mbGFncyAm
IE9fRElSRUNUKSB8fA0KPj4gLSAgICAgICAgICAgKGZpLT5mbGFncyAmIENFUEhfRl9TWU5DKSkN
Cj4+ICsgICAgICAgICAgIChmaS0+ZmxhZ3MgJiBDRVBIX0ZfU1lOQykpIHsNCj4+ICsgICAgICAg
ICAgICAgICBzdHJ1Y3QgaW92X2l0ZXIgaTsNCj4+ICsNCj4+ICsgICAgICAgICAgICAgICBpb2Ni
LT5raV9sZWZ0ID0gbGVuOw0KPj4gKyAgICAgICAgICAgICAgIGlvdl9pdGVyX2luaXQoJmksIGlv
diwgbnJfc2VncywgbGVuLCAwKTsNCj4+ICthZ2FpbjoNCj4+ICAgICAgICAgICAgICAgICAvKiBo
bW0sIHRoaXMgaXNuJ3QgcmVhbGx5IGFzeW5jLi4uICovDQo+PiAtICAgICAgICAgICAgICAgcmV0
ID0gY2VwaF9zeW5jX3JlYWQoZmlscCwgYmFzZSwgbGVuLCBwcG9zLCAmY2hlY2tlb2YpOw0KPj4g
LSAgICAgICBlbHNlDQo+PiArICAgICAgICAgICAgICAgcmV0ID0gY2VwaF9zeW5jX3JlYWQoaW9j
YiwgJmksICZjaGVja2VvZik7DQo+PiArDQo+PiArICAgICAgICAgICAgICAgaWYgKGNoZWNrZW9m
ICYmIHJldCA+PSAwKSB7DQo+PiArICAgICAgICAgICAgICAgICAgICAgICBpbnQgc3RhdHJldCA9
IGNlcGhfZG9fZ2V0YXR0cihpbm9kZSwNCj4+ICsgICAgICAgICAgICAgICAgICAgICAgICAgICAg
ICAgICAgICAgICAgICAgICAgICAgICAgIENFUEhfU1RBVF9DQVBfU0laRSk7DQo+DQo+SXQncyB3
cm9uZyB0byBtb3ZlIGdldGF0dHIgdG8gaGVyZS4gYmVjYXVzZSBnZXRhdHRyIHdoaWxlIGhvbGRp
bmcgRnINCj5jYXAgY2FuIGNhdXNlIGhhbmcuDQo+DQo+UmVnYXJkcw0KPllhbiwgWmhlbmcNCj4N
CkhpLA0KCUNhbiB5b3UgZXhwbGFpbiBpbiBkZXRhaWw/DQoNClRoYW5rcyENCkppYW5wZW5nIE1h

--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yan, Zheng Sept. 22, 2013, 4:17 a.m. UTC | #4
On 09/22/2013 11:00 AM, majianpeng wrote:
>> On Thu, Sep 12, 2013 at 1:25 PM, majianpeng <majianpeng@gmail.com> wrote:
>>> For readv/preadv sync-operatoin, ceph only do the first iov.
>>> It don't think other iovs.Now implement this.
>>>
>>> V4:
>>>         modify one bug.
>>> V3:
>>>    modify some bug.
>>> V2:
>>>   -add generic_segment_checks
>>>   -using struct iov_iter replace cloning the iovs.
>>>   -return previous successfully copied if ceph_copy_page_vector_to_user
>>>    met error.
>>>
>>>
>>> Signed-off-by: Jianpeng Ma <majianpeng@gmail.com>
>>> Reviewed-by: Yan, Zheng <zheng.z.yan@intel.com>
>>> ---
>>>   fs/ceph/file.c | 157 ++++++++++++++++++++++++++++++++++++++-------------------
>>>  1 file changed, 106 insertions(+), 51 deletions(-)
>>>
>>> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
>>> index 3de8982..bc7fa52 100644
>>> --- a/fs/ceph/file.c
>>> +++ b/fs/ceph/file.c
>>> @@ -408,51 +408,94 @@ more:
>>>   *
>>>   * If the read spans object boundary, just do multiple reads.
>>>   */
>>> -static ssize_t ceph_sync_read(struct file *file, char __user *data,
>>> -                             unsigned len, loff_t *poff, int *checkeof)
>>> +static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i,
>>> +                               int *checkeof)
>>>  {
>>> +       struct file *file = iocb->ki_filp;
>>>         struct inode *inode = file_inode(file);
>>>         struct page **pages;
>>> -       u64 off = *poff;
>>> +       u64 off = iocb->ki_pos;
>>>         int num_pages, ret;
>>>
>>> -       dout("sync_read on file %p %llu~%u %s\n", file, off, len,
>>> +       dout("sync_read on file %p %llu~%u %s\n", file, off,
>>> +            (unsigned)iocb->ki_left,
>>>              (file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
>>> -
>>> -       if (file->f_flags & O_DIRECT) {
>>> -               num_pages = calc_pages_for((unsigned long)data, len);
>>> -               pages = ceph_get_direct_page_vector(data, num_pages, true);
>>> -       } else {
>>> -               num_pages = calc_pages_for(off, len);
>>> -               pages = ceph_alloc_page_vector(num_pages, GFP_NOFS);
>>> -       }
>>> -       if (IS_ERR(pages))
>>> -               return PTR_ERR(pages);
>>> -
>>>         /*
>>>          * flush any page cache pages in this range.  this
>>>          * will make concurrent normal and sync io slow,
>>>          * but it will at least behave sensibly when they are
>>>          * in sequence.
>>>          */
>>> -       ret = filemap_write_and_wait(inode->i_mapping);
>>> +       ret = filemap_write_and_wait_range(inode->i_mapping, off,
>>> +                                               off + iocb->ki_left);
>>>         if (ret < 0)
>>> -               goto done;
>>> -
>>> -       ret = striped_read(inode, off, len, pages, num_pages, checkeof,
>>> -                          file->f_flags & O_DIRECT,
>>> -                          (unsigned long)data & ~PAGE_MASK);
>>> +               return ret;
>>>
>>> -       if (ret >= 0 && (file->f_flags & O_DIRECT) == 0)
>>> -               ret = ceph_copy_page_vector_to_user(pages, data, off, ret);
>>> -       if (ret >= 0)
>>> -               *poff = off + ret;
>>> +       if (file->f_flags & O_DIRECT) {
>>> +               while (iov_iter_count(i)) {
>>> +                       void __user *data = i->iov[0].iov_base + i->iov_offset;
>>> +                       size_t len = i->iov[0].iov_len - i->iov_offset;
>>> +
>>> +                       num_pages = calc_pages_for((unsigned long)data, len);
>>> +                       pages = ceph_get_direct_page_vector(data,
>>> +                                                           num_pages, true);
>>> +                       if (IS_ERR(pages))
>>> +                               return PTR_ERR(pages);
>>> +
>>> +                       ret = striped_read(inode, off, len,
>>> +                                          pages, num_pages, checkeof,
>>> +                                          1, (unsigned long)data & ~PAGE_MASK);
>>> +                       ceph_put_page_vector(pages, num_pages, true);
>>> +
>>> +                       if (ret <= 0)
>>> +                               break;
>>> +                       off += ret;
>>> +                       iov_iter_advance(i, ret);
>>> +                       if (ret < len)
>>> +                               break;
>>> +               }
>>> +       } else {
>>> +               size_t len = iocb->ki_left;
>>>
>>> -done:
>>> -       if (file->f_flags & O_DIRECT)
>>> -               ceph_put_page_vector(pages, num_pages, true);
>>> -       else
>>> +               num_pages = calc_pages_for(off, len);
>>> +               pages = ceph_alloc_page_vector(num_pages, GFP_NOFS);
>>> +               if (IS_ERR(pages))
>>> +                       return PTR_ERR(pages);
>>> +               ret = striped_read(inode, off, len, pages,
>>> +                                       num_pages, checkeof, 0, 0);
>>> +               if (ret > 0) {
>>> +                       int l, k = 0;
>>> +                       size_t left = len = ret;
>>> +
>>> +                       while (left) {
>>> +                               void __user *data = i->iov[0].iov_base
>>> +                                                       + i->iov_offset;
>>> +                               l = min(i->iov[0].iov_len - i->iov_offset,
>>> +                                       left);
>>> +
>>> +                               ret = ceph_copy_page_vector_to_user(&pages[k],
>>> +                                                                   data, off,
>>> +                                                                   l);
>>> +                               if (ret > 0) {
>>> +                                       iov_iter_advance(i, ret);
>>> +                                       left -= ret;
>>> +                                       off += ret;
>>> +                                       k = calc_pages_for(iocb->ki_pos,
>>> +                                                          len - left + 1) - 1;
>>> +                                       BUG_ON(k >= num_pages && left);
>>> +                               } else
>>> +                                       break;
>>> +                       }
>>> +               }
>>>                 ceph_release_page_vector(pages, num_pages);
>>> +       }
>>> +
>>> +       if (off > iocb->ki_pos) {
>>> +               ret = off - iocb->ki_pos;
>>> +               iocb->ki_pos = off;
>>> +               iocb->ki_left -= ret;
>>> +       }
>>> +
>>>         dout("sync_read result %d\n", ret);
>>>         return ret;
>>>  }
>>> @@ -647,55 +690,67 @@ static ssize_t ceph_aio_read(struct kiocb *iocb, const struct iovec *iov,
>>>  {
>>>         struct file *filp = iocb->ki_filp;
>>>         struct ceph_file_info *fi = filp->private_data;
>>> -       loff_t *ppos = &iocb->ki_pos;
>>> -       size_t len = iov->iov_len;
>>> +       size_t len = 0;
>>>         struct inode *inode = file_inode(filp);
>>>         struct ceph_inode_info *ci = ceph_inode(inode);
>>> -       void __user *base = iov->iov_base;
>>>         ssize_t ret;
>>>         int want, got = 0;
>>>         int checkeof = 0, read = 0;
>>>
>>>         dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n",
>>>              inode, ceph_vinop(inode), pos, (unsigned)len, inode);
>>> -again:
>>> +
>>> +       ret = generic_segment_checks(iov, &nr_segs, &len, VERIFY_WRITE);
>>> +       if (ret)
>>> +               return ret;
>>> +
>>>         if (fi->fmode & CEPH_FILE_MODE_LAZY)
>>>                 want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
>>>         else
>>>                 want = CEPH_CAP_FILE_CACHE;
>>>         ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, &got, -1);
>>>         if (ret < 0)
>>> -               goto out;
>>> +               return ret;
>>> +
>>>         dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n",
>>>              inode, ceph_vinop(inode), pos, (unsigned)len,
>>>              ceph_cap_string(got));
>>>
>>>         if ((got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0 ||
>>>             (iocb->ki_filp->f_flags & O_DIRECT) ||
>>> -           (fi->flags & CEPH_F_SYNC))
>>> +           (fi->flags & CEPH_F_SYNC)) {
>>> +               struct iov_iter i;
>>> +
>>> +               iocb->ki_left = len;
>>> +               iov_iter_init(&i, iov, nr_segs, len, 0);
>>> +again:
>>>                 /* hmm, this isn't really async... */
>>> -               ret = ceph_sync_read(filp, base, len, ppos, &checkeof);
>>> -       else
>>> +               ret = ceph_sync_read(iocb, &i, &checkeof);
>>> +
>>> +               if (checkeof && ret >= 0) {
>>> +                       int statret = ceph_do_getattr(inode,
>>> +                                                     CEPH_STAT_CAP_SIZE);
>>
>> It's wrong to move getattr to here. because getattr while holding Fr
>> cap can cause hang.
>>
>> Regards
>> Yan, Zheng
>>
> Hi,
> 	Can you explain in detail?


getattr need to "read lock" inode's filelock. But the lock can be in unstable state.
the getattr request waits for lock's state to become stable, the lock waits for client
to release Fr cap.

your patches are already in master branch of ceph-client, please send incremental patch
to fix the issue.

Regards
Yan, Zheng.


> 
> Thanks!
> Jianpeng Ma
> 

--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 3de8982..bc7fa52 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -408,51 +408,94 @@  more:
  *
  * If the read spans object boundary, just do multiple reads.
  */
-static ssize_t ceph_sync_read(struct file *file, char __user *data,
-			      unsigned len, loff_t *poff, int *checkeof)
+static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i,
+				int *checkeof)
 {
+	struct file *file = iocb->ki_filp;
 	struct inode *inode = file_inode(file);
 	struct page **pages;
-	u64 off = *poff;
+	u64 off = iocb->ki_pos;
 	int num_pages, ret;
 
-	dout("sync_read on file %p %llu~%u %s\n", file, off, len,
+	dout("sync_read on file %p %llu~%u %s\n", file, off,
+	     (unsigned)iocb->ki_left,
 	     (file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
-
-	if (file->f_flags & O_DIRECT) {
-		num_pages = calc_pages_for((unsigned long)data, len);
-		pages = ceph_get_direct_page_vector(data, num_pages, true);
-	} else {
-		num_pages = calc_pages_for(off, len);
-		pages = ceph_alloc_page_vector(num_pages, GFP_NOFS);
-	}
-	if (IS_ERR(pages))
-		return PTR_ERR(pages);
-
 	/*
 	 * flush any page cache pages in this range.  this
 	 * will make concurrent normal and sync io slow,
 	 * but it will at least behave sensibly when they are
 	 * in sequence.
 	 */
-	ret = filemap_write_and_wait(inode->i_mapping);
+	ret = filemap_write_and_wait_range(inode->i_mapping, off,
+						off + iocb->ki_left);
 	if (ret < 0)
-		goto done;
-
-	ret = striped_read(inode, off, len, pages, num_pages, checkeof,
-			   file->f_flags & O_DIRECT,
-			   (unsigned long)data & ~PAGE_MASK);
+		return ret;
 
-	if (ret >= 0 && (file->f_flags & O_DIRECT) == 0)
-		ret = ceph_copy_page_vector_to_user(pages, data, off, ret);
-	if (ret >= 0)
-		*poff = off + ret;
+	if (file->f_flags & O_DIRECT) {
+		while (iov_iter_count(i)) {
+			void __user *data = i->iov[0].iov_base + i->iov_offset;
+			size_t len = i->iov[0].iov_len - i->iov_offset;
+
+			num_pages = calc_pages_for((unsigned long)data, len);
+			pages = ceph_get_direct_page_vector(data,
+							    num_pages, true);
+			if (IS_ERR(pages))
+				return PTR_ERR(pages);
+
+			ret = striped_read(inode, off, len,
+					   pages, num_pages, checkeof,
+					   1, (unsigned long)data & ~PAGE_MASK);
+			ceph_put_page_vector(pages, num_pages, true);
+
+			if (ret <= 0)
+				break;
+			off += ret;
+			iov_iter_advance(i, ret);
+			if (ret < len)
+				break;
+		}
+	} else {
+		size_t len = iocb->ki_left;
 
-done:
-	if (file->f_flags & O_DIRECT)
-		ceph_put_page_vector(pages, num_pages, true);
-	else
+		num_pages = calc_pages_for(off, len);
+		pages = ceph_alloc_page_vector(num_pages, GFP_NOFS);
+		if (IS_ERR(pages))
+			return PTR_ERR(pages);
+		ret = striped_read(inode, off, len, pages,
+					num_pages, checkeof, 0, 0);
+		if (ret > 0) {
+			int l, k = 0;
+			size_t left = len = ret;
+
+			while (left) {
+				void __user *data = i->iov[0].iov_base
+							+ i->iov_offset;
+				l = min(i->iov[0].iov_len - i->iov_offset,
+					left);
+
+				ret = ceph_copy_page_vector_to_user(&pages[k],
+								    data, off,
+								    l);
+				if (ret > 0) {
+					iov_iter_advance(i, ret);
+					left -= ret;
+					off += ret;
+					k = calc_pages_for(iocb->ki_pos,
+							   len - left + 1) - 1;
+					BUG_ON(k >= num_pages && left);
+				} else
+					break;
+			}
+		}
 		ceph_release_page_vector(pages, num_pages);
+	}
+
+	if (off > iocb->ki_pos) {
+		ret = off - iocb->ki_pos;
+		iocb->ki_pos = off;
+		iocb->ki_left -= ret;
+	}
+
 	dout("sync_read result %d\n", ret);
 	return ret;
 }
@@ -647,55 +690,67 @@  static ssize_t ceph_aio_read(struct kiocb *iocb, const struct iovec *iov,
 {
 	struct file *filp = iocb->ki_filp;
 	struct ceph_file_info *fi = filp->private_data;
-	loff_t *ppos = &iocb->ki_pos;
-	size_t len = iov->iov_len;
+	size_t len = 0;
 	struct inode *inode = file_inode(filp);
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	void __user *base = iov->iov_base;
 	ssize_t ret;
 	int want, got = 0;
 	int checkeof = 0, read = 0;
 
 	dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n",
 	     inode, ceph_vinop(inode), pos, (unsigned)len, inode);
-again:
+
+	ret = generic_segment_checks(iov, &nr_segs, &len, VERIFY_WRITE);
+	if (ret)
+		return ret;
+
 	if (fi->fmode & CEPH_FILE_MODE_LAZY)
 		want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
 	else
 		want = CEPH_CAP_FILE_CACHE;
 	ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, &got, -1);
 	if (ret < 0)
-		goto out;
+		return ret;
+
 	dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n",
 	     inode, ceph_vinop(inode), pos, (unsigned)len,
 	     ceph_cap_string(got));
 
 	if ((got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0 ||
 	    (iocb->ki_filp->f_flags & O_DIRECT) ||
-	    (fi->flags & CEPH_F_SYNC))
+	    (fi->flags & CEPH_F_SYNC)) {
+		struct iov_iter i;
+
+		iocb->ki_left = len;
+		iov_iter_init(&i, iov, nr_segs, len, 0);
+again:
 		/* hmm, this isn't really async... */
-		ret = ceph_sync_read(filp, base, len, ppos, &checkeof);
-	else
+		ret = ceph_sync_read(iocb, &i, &checkeof);
+
+		if (checkeof && ret >= 0) {
+			int statret = ceph_do_getattr(inode,
+						      CEPH_STAT_CAP_SIZE);
+
+			/* hit EOF or hole? */
+			if (statret == 0 && iocb->ki_pos < inode->i_size &&
+				iocb->ki_left) {
+				dout("sync_read hit hole, ppos %lld < size %lld"
+				     ", reading more\n", iocb->ki_pos,
+				     inode->i_size);
+
+				read += ret;
+				checkeof = 0;
+				goto again;
+			}
+		}
+
+	} else
 		ret = generic_file_aio_read(iocb, iov, nr_segs, pos);
 
-out:
 	dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n",
 	     inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret);
 	ceph_put_cap_refs(ci, got);
 
-	if (checkeof && ret >= 0) {
-		int statret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE);
-
-		/* hit EOF or hole? */
-		if (statret == 0 && *ppos < inode->i_size) {
-			dout("aio_read sync_read hit hole, ppos %lld < size %lld, reading more\n", *ppos, inode->i_size);
-			read += ret;
-			base += ret;
-			len -= ret;
-			checkeof = 0;
-			goto again;
-		}
-	}
 	if (ret >= 0)
 		ret += read;