diff mbox

[03/34] pnfs: let layoutcommit code handle multiple segments

Message ID 9f228c643de1f0378835d6ec9a493c1320bf001d.1307921137.git.rees@umich.edu (mailing list archive)
State New, archived
Headers show

Commit Message

Jim Rees June 12, 2011, 11:43 p.m. UTC
From: Peng Tao <bergwolf@gmail.com>

Some layout driver like block will have multiple segments.
Generic code should be able to handle it.

Signed-off-by: Peng Tao <peng_tao@emc.com>
---
 fs/nfs/pnfs.c |   17 +++++++++++++----
 fs/nfs/pnfs.h |    1 +
 2 files changed, 14 insertions(+), 4 deletions(-)

Comments

Fred Isaman June 13, 2011, 2:36 p.m. UTC | #1
On Sun, Jun 12, 2011 at 7:43 PM, Jim Rees <rees@umich.edu> wrote:
> From: Peng Tao <bergwolf@gmail.com>
>
> Some layout driver like block will have multiple segments.
> Generic code should be able to handle it.
>
> Signed-off-by: Peng Tao <peng_tao@emc.com>
> ---
>  fs/nfs/pnfs.c |   17 +++++++++++++----
>  fs/nfs/pnfs.h |    1 +
>  2 files changed, 14 insertions(+), 4 deletions(-)
>
> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> index e3d618b..f03a5e0 100644
> --- a/fs/nfs/pnfs.c
> +++ b/fs/nfs/pnfs.c
> @@ -892,7 +892,7 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo,
>        dprintk("%s:Begin\n", __func__);
>
>        assert_spin_locked(&lo->plh_inode->i_lock);
> -       list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
> +       list_for_each_entry_reverse(lseg, &lo->plh_segs, pls_list) {

This is a sortred list, and the order of search matters.  You can't
just reverse it here.

>                if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) &&
>                    is_matching_lseg(&lseg->pls_range, range)) {
>                        ret = get_lseg(lseg);
> @@ -1193,10 +1193,18 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata,
>  static struct pnfs_layout_segment *pnfs_list_write_lseg(struct inode *inode)
>  {
>        struct pnfs_layout_segment *lseg, *rv = NULL;
> +       loff_t max_pos = 0;
> +
> +       list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) {
> +               if (lseg->pls_range.iomode == IOMODE_RW) {
> +                       if (max_pos < lseg->pls_end_pos)
> +                               max_pos = lseg->pls_end_pos;
> +                       if (test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
> +                               rv = lseg;
> +               }
> +       }
> +       rv->pls_end_pos = max_pos;
>

The idea here was that it could be extended to use segment by
returning a list of affected lsegs,
not so,e random one.  Because otherwise you have problems with the
fact that relevant but not
returned lsegs are going to get there refcounts messed up.

Fred

> -       list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list)
> -               if (lseg->pls_range.iomode == IOMODE_RW)
> -                       rv = lseg;
>        return rv;
>  }
>
> @@ -1211,6 +1219,7 @@ pnfs_set_layoutcommit(struct nfs_write_data *wdata)
>        if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
>                /* references matched in nfs4_layoutcommit_release */
>                get_lseg(wdata->lseg);
> +               set_bit(NFS_LSEG_LAYOUTCOMMIT, &wdata->lseg->pls_flags);
>                wdata->lseg->pls_lc_cred =
>                        get_rpccred(wdata->args.context->state->owner->so_cred);
>                mark_as_dirty = true;
> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
> index b071b56..a3fc0f2 100644
> --- a/fs/nfs/pnfs.h
> +++ b/fs/nfs/pnfs.h
> @@ -36,6 +36,7 @@
>  enum {
>        NFS_LSEG_VALID = 0,     /* cleared when lseg is recalled/returned */
>        NFS_LSEG_ROC,           /* roc bit received from server */
> +       NFS_LSEG_LAYOUTCOMMIT,  /* layoutcommit bit set for layoutcommit */
>  };
>
>  struct pnfs_layout_segment {
> --
> 1.7.4.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
tao.peng@emc.com June 14, 2011, 10:40 a.m. UTC | #2
Hi, Fred,

> -----Original Message-----
> From: linux-nfs-owner@vger.kernel.org [mailto:linux-nfs-owner@vger.kernel.org]
> On Behalf Of Fred Isaman
> Sent: Monday, June 13, 2011 10:37 PM
> To: Jim Rees
> Cc: linux-nfs@vger.kernel.org; peter honeyman
> Subject: Re: [PATCH 03/34] pnfs: let layoutcommit code handle multiple segments
> 
> On Sun, Jun 12, 2011 at 7:43 PM, Jim Rees <rees@umich.edu> wrote:
> > From: Peng Tao <bergwolf@gmail.com>
> >
> > Some layout driver like block will have multiple segments.
> > Generic code should be able to handle it.
> >
> > Signed-off-by: Peng Tao <peng_tao@emc.com>
> > ---
> >  fs/nfs/pnfs.c |   17 +++++++++++++----
> >  fs/nfs/pnfs.h |    1 +
> >  2 files changed, 14 insertions(+), 4 deletions(-)
> >
> > diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> > index e3d618b..f03a5e0 100644
> > --- a/fs/nfs/pnfs.c
> > +++ b/fs/nfs/pnfs.c
> > @@ -892,7 +892,7 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo,
> >        dprintk("%s:Begin\n", __func__);
> >
> >        assert_spin_locked(&lo->plh_inode->i_lock);
> > -       list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
> > +       list_for_each_entry_reverse(lseg, &lo->plh_segs, pls_list) {
> 
> This is a sortred list, and the order of search matters.  You can't
> just reverse it here.
The layout segment list is in offset increasing order. But the lookup code here assumes it's a decreasing ordered list.
To fix it, we should either reverse lookup the list, or change the break condition test. Otherwise lookup always fails if not matching the first one.

> 
> >                if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) &&
> >                    is_matching_lseg(&lseg->pls_range, range)) {
> >                        ret = get_lseg(lseg);
> > @@ -1193,10 +1193,18 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata,
> >  static struct pnfs_layout_segment *pnfs_list_write_lseg(struct inode *inode)
> >  {
> >        struct pnfs_layout_segment *lseg, *rv = NULL;
> > +       loff_t max_pos = 0;
> > +
> > +       list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) {
> > +               if (lseg->pls_range.iomode == IOMODE_RW) {
> > +                       if (max_pos < lseg->pls_end_pos)
> > +                               max_pos = lseg->pls_end_pos;
> > +                       if (test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT,
> &lseg->pls_flags))
> > +                               rv = lseg;
> > +               }
> > +       }
> > +       rv->pls_end_pos = max_pos;
> >
> 
> The idea here was that it could be extended to use segment by
> returning a list of affected lsegs,
> not so,e random one.  Because otherwise you have problems with the
> fact that relevant but not
> returned lsegs are going to get there refcounts messed up.
The above code relies on NFS_INO_LAYOUTCOMMIT bit to ensure that only one inode lseg has NFS_LSEG_LAYOUTCOMMIT set. But, you are right. The layoutcommit code needs a second thought.
How about making it return a list of affected lsegs and pass them around layoutcommit_procs?

Thanks,
Tao

> 
> Fred
> 
> > -       list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list)
> > -               if (lseg->pls_range.iomode == IOMODE_RW)
> > -                       rv = lseg;
> >        return rv;
> >  }
> >
> > @@ -1211,6 +1219,7 @@ pnfs_set_layoutcommit(struct nfs_write_data *wdata)
> >        if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
> >                /* references matched in nfs4_layoutcommit_release */
> >                get_lseg(wdata->lseg);
> > +               set_bit(NFS_LSEG_LAYOUTCOMMIT,
> &wdata->lseg->pls_flags);
> >                wdata->lseg->pls_lc_cred =
> >                        get_rpccred(wdata->args.context->state->owner-
> >so_cred);
> >                mark_as_dirty = true;
> > diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
> > index b071b56..a3fc0f2 100644
> > --- a/fs/nfs/pnfs.h
> > +++ b/fs/nfs/pnfs.h
> > @@ -36,6 +36,7 @@
> >  enum {
> >        NFS_LSEG_VALID = 0,     /* cleared when lseg is recalled/returned */
> >        NFS_LSEG_ROC,           /* roc bit received from server */
> > +       NFS_LSEG_LAYOUTCOMMIT,  /* layoutcommit bit set for
> layoutcommit */
> >  };
> >
> >  struct pnfs_layout_segment {
> > --
> > 1.7.4.1
> >
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> >
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Fred Isaman June 14, 2011, 1:58 p.m. UTC | #3
On Tue, Jun 14, 2011 at 6:40 AM,  <tao.peng@emc.com> wrote:
> Hi, Fred,
>
>> -----Original Message-----
>> From: linux-nfs-owner@vger.kernel.org [mailto:linux-nfs-owner@vger.kernel.org]
>> On Behalf Of Fred Isaman
>> Sent: Monday, June 13, 2011 10:37 PM
>> To: Jim Rees
>> Cc: linux-nfs@vger.kernel.org; peter honeyman
>> Subject: Re: [PATCH 03/34] pnfs: let layoutcommit code handle multiple segments
>>
>> On Sun, Jun 12, 2011 at 7:43 PM, Jim Rees <rees@umich.edu> wrote:
>> > From: Peng Tao <bergwolf@gmail.com>
>> >
>> > Some layout driver like block will have multiple segments.
>> > Generic code should be able to handle it.
>> >
>> > Signed-off-by: Peng Tao <peng_tao@emc.com>
>> > ---
>> >  fs/nfs/pnfs.c |   17 +++++++++++++----
>> >  fs/nfs/pnfs.h |    1 +
>> >  2 files changed, 14 insertions(+), 4 deletions(-)
>> >
>> > diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
>> > index e3d618b..f03a5e0 100644
>> > --- a/fs/nfs/pnfs.c
>> > +++ b/fs/nfs/pnfs.c
>> > @@ -892,7 +892,7 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo,
>> >        dprintk("%s:Begin\n", __func__);
>> >
>> >        assert_spin_locked(&lo->plh_inode->i_lock);
>> > -       list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
>> > +       list_for_each_entry_reverse(lseg, &lo->plh_segs, pls_list) {
>>
>> This is a sortred list, and the order of search matters.  You can't
>> just reverse it here.
> The layout segment list is in offset increasing order. But the lookup code here assumes it's a decreasing ordered list.
> To fix it, we should either reverse lookup the list, or change the break condition test. Otherwise lookup always fails if not matching the first one.

I agree there is a problem here that affects the generic code.  I've
just sent a separate patch that deals with that.

Fred

>
>>
>> >                if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) &&
>> >                    is_matching_lseg(&lseg->pls_range, range)) {
>> >                        ret = get_lseg(lseg);
>> > @@ -1193,10 +1193,18 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata,
>> >  static struct pnfs_layout_segment *pnfs_list_write_lseg(struct inode *inode)
>> >  {
>> >        struct pnfs_layout_segment *lseg, *rv = NULL;
>> > +       loff_t max_pos = 0;
>> > +
>> > +       list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) {
>> > +               if (lseg->pls_range.iomode == IOMODE_RW) {
>> > +                       if (max_pos < lseg->pls_end_pos)
>> > +                               max_pos = lseg->pls_end_pos;
>> > +                       if (test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT,
>> &lseg->pls_flags))
>> > +                               rv = lseg;
>> > +               }
>> > +       }
>> > +       rv->pls_end_pos = max_pos;
>> >
>>
>> The idea here was that it could be extended to use segment by
>> returning a list of affected lsegs,
>> not so,e random one.  Because otherwise you have problems with the
>> fact that relevant but not
>> returned lsegs are going to get there refcounts messed up.
> The above code relies on NFS_INO_LAYOUTCOMMIT bit to ensure that only one inode lseg has NFS_LSEG_LAYOUTCOMMIT set. But, you are right. The layoutcommit code needs a second thought.
> How about making it return a list of affected lsegs and pass them around layoutcommit_procs?
>
> Thanks,
> Tao
>
>>
>> Fred
>>
>> > -       list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list)
>> > -               if (lseg->pls_range.iomode == IOMODE_RW)
>> > -                       rv = lseg;
>> >        return rv;
>> >  }
>> >
>> > @@ -1211,6 +1219,7 @@ pnfs_set_layoutcommit(struct nfs_write_data *wdata)
>> >        if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
>> >                /* references matched in nfs4_layoutcommit_release */
>> >                get_lseg(wdata->lseg);
>> > +               set_bit(NFS_LSEG_LAYOUTCOMMIT,
>> &wdata->lseg->pls_flags);
>> >                wdata->lseg->pls_lc_cred =
>> >                        get_rpccred(wdata->args.context->state->owner-
>> >so_cred);
>> >                mark_as_dirty = true;
>> > diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
>> > index b071b56..a3fc0f2 100644
>> > --- a/fs/nfs/pnfs.h
>> > +++ b/fs/nfs/pnfs.h
>> > @@ -36,6 +36,7 @@
>> >  enum {
>> >        NFS_LSEG_VALID = 0,     /* cleared when lseg is recalled/returned */
>> >        NFS_LSEG_ROC,           /* roc bit received from server */
>> > +       NFS_LSEG_LAYOUTCOMMIT,  /* layoutcommit bit set for
>> layoutcommit */
>> >  };
>> >
>> >  struct pnfs_layout_segment {
>> > --
>> > 1.7.4.1
>> >
>> > --
>> > To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>> > the body of a message to majordomo@vger.kernel.org
>> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
>> >
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Benny Halevy June 14, 2011, 2:28 p.m. UTC | #4
On 2011-06-14 06:40, tao.peng@emc.com wrote:
> Hi, Fred,
> 
>> -----Original Message-----
>> From: linux-nfs-owner@vger.kernel.org [mailto:linux-nfs-owner@vger.kernel.org]
>> On Behalf Of Fred Isaman
>> Sent: Monday, June 13, 2011 10:37 PM
>> To: Jim Rees
>> Cc: linux-nfs@vger.kernel.org; peter honeyman
>> Subject: Re: [PATCH 03/34] pnfs: let layoutcommit code handle multiple segments
>>
>> On Sun, Jun 12, 2011 at 7:43 PM, Jim Rees <rees@umich.edu> wrote:
>>> From: Peng Tao <bergwolf@gmail.com>
>>>
>>> Some layout driver like block will have multiple segments.
>>> Generic code should be able to handle it.
>>>
>>> Signed-off-by: Peng Tao <peng_tao@emc.com>
>>> ---
>>>  fs/nfs/pnfs.c |   17 +++++++++++++----
>>>  fs/nfs/pnfs.h |    1 +
>>>  2 files changed, 14 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
>>> index e3d618b..f03a5e0 100644
>>> --- a/fs/nfs/pnfs.c
>>> +++ b/fs/nfs/pnfs.c
>>> @@ -892,7 +892,7 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo,
>>>        dprintk("%s:Begin\n", __func__);
>>>
>>>        assert_spin_locked(&lo->plh_inode->i_lock);
>>> -       list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
>>> +       list_for_each_entry_reverse(lseg, &lo->plh_segs, pls_list) {
>>
>> This is a sortred list, and the order of search matters.  You can't
>> just reverse it here.
> The layout segment list is in offset increasing order. But the lookup code here assumes it's a decreasing ordered list.
> To fix it, we should either reverse lookup the list, or change the break condition test. Otherwise lookup always fails if not matching the first one.
> 

We shouldn't scan the list in reverse.
I'll send a fix upstream to fix the break condition.
This got broken when I last changed cmp_layout.
Basically we want to break out of the loop once we can't find a layout covering the first
byte of the range we're looking up.

Benny

>>
>>>                if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) &&
>>>                    is_matching_lseg(&lseg->pls_range, range)) {
>>>                        ret = get_lseg(lseg);
>>> @@ -1193,10 +1193,18 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata,
>>>  static struct pnfs_layout_segment *pnfs_list_write_lseg(struct inode *inode)
>>>  {
>>>        struct pnfs_layout_segment *lseg, *rv = NULL;
>>> +       loff_t max_pos = 0;
>>> +
>>> +       list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) {
>>> +               if (lseg->pls_range.iomode == IOMODE_RW) {
>>> +                       if (max_pos < lseg->pls_end_pos)
>>> +                               max_pos = lseg->pls_end_pos;
>>> +                       if (test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT,
>> &lseg->pls_flags))
>>> +                               rv = lseg;
>>> +               }
>>> +       }
>>> +       rv->pls_end_pos = max_pos;
>>>
>>
>> The idea here was that it could be extended to use segment by
>> returning a list of affected lsegs,
>> not so,e random one.  Because otherwise you have problems with the
>> fact that relevant but not
>> returned lsegs are going to get there refcounts messed up.
> The above code relies on NFS_INO_LAYOUTCOMMIT bit to ensure that only one inode lseg has NFS_LSEG_LAYOUTCOMMIT set. But, you are right. The layoutcommit code needs a second thought.
> How about making it return a list of affected lsegs and pass them around layoutcommit_procs?
> 
> Thanks,
> Tao
> 
>>
>> Fred
>>
>>> -       list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list)
>>> -               if (lseg->pls_range.iomode == IOMODE_RW)
>>> -                       rv = lseg;
>>>        return rv;
>>>  }
>>>
>>> @@ -1211,6 +1219,7 @@ pnfs_set_layoutcommit(struct nfs_write_data *wdata)
>>>        if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
>>>                /* references matched in nfs4_layoutcommit_release */
>>>                get_lseg(wdata->lseg);
>>> +               set_bit(NFS_LSEG_LAYOUTCOMMIT,
>> &wdata->lseg->pls_flags);
>>>                wdata->lseg->pls_lc_cred =
>>>                        get_rpccred(wdata->args.context->state->owner-
>>> so_cred);
>>>                mark_as_dirty = true;
>>> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
>>> index b071b56..a3fc0f2 100644
>>> --- a/fs/nfs/pnfs.h
>>> +++ b/fs/nfs/pnfs.h
>>> @@ -36,6 +36,7 @@
>>>  enum {
>>>        NFS_LSEG_VALID = 0,     /* cleared when lseg is recalled/returned */
>>>        NFS_LSEG_ROC,           /* roc bit received from server */
>>> +       NFS_LSEG_LAYOUTCOMMIT,  /* layoutcommit bit set for
>> layoutcommit */
>>>  };
>>>
>>>  struct pnfs_layout_segment {
>>> --
>>> 1.7.4.1
>>>
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>>> the body of a message to majordomo@vger.kernel.org
>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index e3d618b..f03a5e0 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -892,7 +892,7 @@  pnfs_find_lseg(struct pnfs_layout_hdr *lo,
 	dprintk("%s:Begin\n", __func__);
 
 	assert_spin_locked(&lo->plh_inode->i_lock);
-	list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
+	list_for_each_entry_reverse(lseg, &lo->plh_segs, pls_list) {
 		if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) &&
 		    is_matching_lseg(&lseg->pls_range, range)) {
 			ret = get_lseg(lseg);
@@ -1193,10 +1193,18 @@  pnfs_try_to_read_data(struct nfs_read_data *rdata,
 static struct pnfs_layout_segment *pnfs_list_write_lseg(struct inode *inode)
 {
 	struct pnfs_layout_segment *lseg, *rv = NULL;
+	loff_t max_pos = 0;
+
+	list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) {
+		if (lseg->pls_range.iomode == IOMODE_RW) {
+			if (max_pos < lseg->pls_end_pos)
+				max_pos = lseg->pls_end_pos;
+			if (test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
+				rv = lseg;
+		}
+	}
+	rv->pls_end_pos = max_pos;
 
-	list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list)
-		if (lseg->pls_range.iomode == IOMODE_RW)
-			rv = lseg;
 	return rv;
 }
 
@@ -1211,6 +1219,7 @@  pnfs_set_layoutcommit(struct nfs_write_data *wdata)
 	if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
 		/* references matched in nfs4_layoutcommit_release */
 		get_lseg(wdata->lseg);
+		set_bit(NFS_LSEG_LAYOUTCOMMIT, &wdata->lseg->pls_flags);
 		wdata->lseg->pls_lc_cred =
 			get_rpccred(wdata->args.context->state->owner->so_cred);
 		mark_as_dirty = true;
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index b071b56..a3fc0f2 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -36,6 +36,7 @@ 
 enum {
 	NFS_LSEG_VALID = 0,	/* cleared when lseg is recalled/returned */
 	NFS_LSEG_ROC,		/* roc bit received from server */
+	NFS_LSEG_LAYOUTCOMMIT,	/* layoutcommit bit set for layoutcommit */
 };
 
 struct pnfs_layout_segment {