diff mbox

[v2,1/3] pNFS: recoalesce when ld write pagelist fails

Message ID 1313197213-1651-2-git-send-email-bergwolf@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Peng Tao Aug. 13, 2011, 1 a.m. UTC
For pnfs pagelist write failure, we need to pg_recoalesce and resend
IO to mds.

Signed-off-by: Peng Tao <peng_tao@emc.com>
---
 fs/nfs/pnfs.c  |   20 +++++++-------------
 fs/nfs/pnfs.h  |    2 +-
 fs/nfs/write.c |   25 ++++++++++++++++++++++++-
 3 files changed, 32 insertions(+), 15 deletions(-)

Comments

Peng Tao Aug. 23, 2011, 2:45 p.m. UTC | #1
Hi, Benny and Boaz,

On Sat, Aug 13, 2011 at 9:00 AM, Peng Tao <bergwolf@gmail.com> wrote:
> For pnfs pagelist write failure, we need to pg_recoalesce and resend
> IO to mds.
>
> Signed-off-by: Peng Tao <peng_tao@emc.com>
> ---
>  fs/nfs/pnfs.c  |   20 +++++++-------------
>  fs/nfs/pnfs.h  |    2 +-
>  fs/nfs/write.c |   25 ++++++++++++++++++++++++-
>  3 files changed, 32 insertions(+), 15 deletions(-)
>
> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> index e550e88..ad0579a 100644
> --- a/fs/nfs/pnfs.c
> +++ b/fs/nfs/pnfs.c
> @@ -1168,23 +1168,17 @@ EXPORT_SYMBOL_GPL(pnfs_generic_pg_test);
>  /*
>  * Called by non rpc-based layout drivers
>  */
> -int
> -pnfs_ld_write_done(struct nfs_write_data *data)
> +void pnfs_ld_write_done(struct nfs_write_data *data)
>  {
> -       int status;
> -
> -       if (!data->pnfs_error) {
> +       if (likely(!data->pnfs_error)) {
>                pnfs_set_layoutcommit(data);
>                data->mds_ops->rpc_call_done(&data->task, data);
> -               data->mds_ops->rpc_release(data);
> -               return 0;
> +       } else {
> +               put_lseg(data->lseg);
> +               data->lseg = NULL;
> +               dprintk("pnfs write error = %d\n", data->pnfs_error);
>        }
> -
> -       dprintk("%s: pnfs_error=%d, retry via MDS\n", __func__,
> -               data->pnfs_error);
> -       status = nfs_initiate_write(data, NFS_CLIENT(data->inode),
> -                                   data->mds_ops, NFS_FILE_SYNC);
> -       return status ? : -EAGAIN;
> +       data->mds_ops->rpc_release(data);
>  }
>  EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
>
> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
> index 01cbfd5..0fb0a41 100644
> --- a/fs/nfs/pnfs.h
> +++ b/fs/nfs/pnfs.h
> @@ -200,7 +200,7 @@ void pnfs_set_layoutcommit(struct nfs_write_data *wdata);
>  void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data);
>  int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
>  int _pnfs_return_layout(struct inode *);
> -int pnfs_ld_write_done(struct nfs_write_data *);
> +void pnfs_ld_write_done(struct nfs_write_data *);
>  int pnfs_ld_read_done(struct nfs_read_data *);
>  struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
>                                               struct nfs_open_context *ctx,
> diff --git a/fs/nfs/write.c b/fs/nfs/write.c
> index b39b37f..26b8d90 100644
> --- a/fs/nfs/write.c
> +++ b/fs/nfs/write.c
> @@ -1165,7 +1165,13 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
>  static void nfs_writeback_release_full(void *calldata)
>  {
>        struct nfs_write_data   *data = calldata;
> -       int status = data->task.tk_status;
> +       int ret, status = data->task.tk_status;
> +       struct nfs_pageio_descriptor pgio;
> +
> +       if (data->pnfs_error) {
> +               nfs_pageio_init_write_mds(&pgio, data->inode, FLUSH_STABLE);
> +               pgio.pg_recoalesce = 1;
> +       }
>
>        /* Update attributes as result of writeback. */
>        while (!list_empty(&data->pages)) {
> @@ -1181,6 +1187,11 @@ static void nfs_writeback_release_full(void *calldata)
>                        req->wb_bytes,
>                        (long long)req_offset(req));
>
> +               if (data->pnfs_error) {
> +                       dprintk(", pnfs error = %d\n", data->pnfs_error);
> +                       goto next;
> +               }
> +
>                if (status < 0) {
>                        nfs_set_pageerror(page);
>                        nfs_context_set_write_error(req->wb_context, status);
> @@ -1200,7 +1211,19 @@ remove_request:
>        next:
>                nfs_clear_page_tag_locked(req);
>                nfs_end_page_writeback(page);
> +               if (data->pnfs_error) {
> +                       lock_page(page);
> +                       nfs_pageio_cond_complete(&pgio, page->index);
> +                       ret = nfs_page_async_flush(&pgio, page, 0);
> +                       if (ret) {
> +                               nfs_set_pageerror(page);
My only concern about the patch is here. If we fail at
nfs_page_async_flush(), is it enough to just call nfs_set_pageerror()
on the page? Do we need to redirty the page as well, and maybe some
other things?

Thanks,
Tao
> +                               dprintk(", error = %d\n", ret);
> +                       }
> +                       unlock_page(page);
> +               }
>        }
> +       if (data->pnfs_error)
> +               nfs_pageio_complete(&pgio);
>        nfs_writedata_release(calldata);
>  }
>
> --
> 1.7.1.262.g5ef3d
>
>
Boaz Harrosh Aug. 27, 2011, 2:43 a.m. UTC | #2
On 08/23/2011 07:45 AM, Peng Tao wrote:
> Hi, Benny and Boaz,
> 
> On Sat, Aug 13, 2011 at 9:00 AM, Peng Tao <bergwolf@gmail.com> wrote:
>> For pnfs pagelist write failure, we need to pg_recoalesce and resend
>> IO to mds.
>>
>> Signed-off-by: Peng Tao <peng_tao@emc.com>
>> ---
>>  fs/nfs/pnfs.c  |   20 +++++++-------------
>>  fs/nfs/pnfs.h  |    2 +-
>>  fs/nfs/write.c |   25 ++++++++++++++++++++++++-
>>  3 files changed, 32 insertions(+), 15 deletions(-)
>>
>> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
>> index e550e88..ad0579a 100644
>> --- a/fs/nfs/pnfs.c
>> +++ b/fs/nfs/pnfs.c
>> @@ -1168,23 +1168,17 @@ EXPORT_SYMBOL_GPL(pnfs_generic_pg_test);
>>  /*
>>  * Called by non rpc-based layout drivers
>>  */
>> -int
>> -pnfs_ld_write_done(struct nfs_write_data *data)
>> +void pnfs_ld_write_done(struct nfs_write_data *data)
>>  {
>> -       int status;
>> -
>> -       if (!data->pnfs_error) {
>> +       if (likely(!data->pnfs_error)) {
>>                pnfs_set_layoutcommit(data);
>>                data->mds_ops->rpc_call_done(&data->task, data);
>> -               data->mds_ops->rpc_release(data);
>> -               return 0;
>> +       } else {
>> +               put_lseg(data->lseg);
>> +               data->lseg = NULL;
>> +               dprintk("pnfs write error = %d\n", data->pnfs_error);
>>        }
>> -
>> -       dprintk("%s: pnfs_error=%d, retry via MDS\n", __func__,
>> -               data->pnfs_error);
>> -       status = nfs_initiate_write(data, NFS_CLIENT(data->inode),
>> -                                   data->mds_ops, NFS_FILE_SYNC);
>> -       return status ? : -EAGAIN;
>> +       data->mds_ops->rpc_release(data);
>>  }
>>  EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
>>
>> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
>> index 01cbfd5..0fb0a41 100644
>> --- a/fs/nfs/pnfs.h
>> +++ b/fs/nfs/pnfs.h
>> @@ -200,7 +200,7 @@ void pnfs_set_layoutcommit(struct nfs_write_data *wdata);
>>  void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data);
>>  int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
>>  int _pnfs_return_layout(struct inode *);
>> -int pnfs_ld_write_done(struct nfs_write_data *);
>> +void pnfs_ld_write_done(struct nfs_write_data *);
>>  int pnfs_ld_read_done(struct nfs_read_data *);
>>  struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
>>                                               struct nfs_open_context *ctx,
>> diff --git a/fs/nfs/write.c b/fs/nfs/write.c
>> index b39b37f..26b8d90 100644
>> --- a/fs/nfs/write.c
>> +++ b/fs/nfs/write.c
>> @@ -1165,7 +1165,13 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
>>  static void nfs_writeback_release_full(void *calldata)
>>  {
>>        struct nfs_write_data   *data = calldata;
>> -       int status = data->task.tk_status;
>> +       int ret, status = data->task.tk_status;
>> +       struct nfs_pageio_descriptor pgio;
>> +
>> +       if (data->pnfs_error) {
>> +               nfs_pageio_init_write_mds(&pgio, data->inode, FLUSH_STABLE);
>> +               pgio.pg_recoalesce = 1;
>> +       }
>>
>>        /* Update attributes as result of writeback. */
>>        while (!list_empty(&data->pages)) {
>> @@ -1181,6 +1187,11 @@ static void nfs_writeback_release_full(void *calldata)
>>                        req->wb_bytes,
>>                        (long long)req_offset(req));
>>
>> +               if (data->pnfs_error) {
>> +                       dprintk(", pnfs error = %d\n", data->pnfs_error);
>> +                       goto next;
>> +               }
>> +
>>                if (status < 0) {
>>                        nfs_set_pageerror(page);
>>                        nfs_context_set_write_error(req->wb_context, status);
>> @@ -1200,7 +1211,19 @@ remove_request:
>>        next:
>>                nfs_clear_page_tag_locked(req);
>>                nfs_end_page_writeback(page);
>> +               if (data->pnfs_error) {
>> +                       lock_page(page);
>> +                       nfs_pageio_cond_complete(&pgio, page->index);
>> +                       ret = nfs_page_async_flush(&pgio, page, 0);
>> +                       if (ret) {
>> +                               nfs_set_pageerror(page);
> My only concern about the patch is here. If we fail at
> nfs_page_async_flush(), is it enough to just call nfs_set_pageerror()
> on the page? Do we need to redirty the page as well, and maybe some
> other things?
> 

Hum? That's a very good question. nfs_end_page_writeback above does
an end_page_writeback(), which does test_clear_page_writeback().
Now in nfs_page_async_flush() you are adding the page back to the nfs writeback
queue. So the question is when does test_set_page_writeback() is called?
If it is called as part of the re-issue of write by NFS then you are
fine because you clear it here but it will be set later on. But if
it was done on entry into the queue. At the original sight of the call to
nfs_page_async_flush(). Then it will not be set again and it might confuse
the VFS since we will be re-clearing a cleared bit.

So in short we should investigate if nfs_clear_page_tag_locked() &&
nfs_end_page_writeback() sould only be called at the *else* clause
of the "if (unlikely(data->pnfs_error))" above.

I'll be testing this next week and we'll see.

Trond do you know?

Boaz


> Thanks,
> Tao
>> +                               dprintk(", error = %d\n", ret);
>> +                       }
>> +                       unlock_page(page);
>> +               }
>>        }
>> +       if (data->pnfs_error)
>> +               nfs_pageio_complete(&pgio);
>>        nfs_writedata_release(calldata);
>>  }
>>
>> --
>> 1.7.1.262.g5ef3d
>>
>>
> 
> 
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Peng Tao Aug. 27, 2011, 10:47 a.m. UTC | #3
Hi, Boaz,

On Sat, Aug 27, 2011 at 10:43 AM, Boaz Harrosh <bharrosh@panasas.com> wrote:
> On 08/23/2011 07:45 AM, Peng Tao wrote:
>> Hi, Benny and Boaz,
>>
>> On Sat, Aug 13, 2011 at 9:00 AM, Peng Tao <bergwolf@gmail.com> wrote:
>>> For pnfs pagelist write failure, we need to pg_recoalesce and resend
>>> IO to mds.
>>>
>>> Signed-off-by: Peng Tao <peng_tao@emc.com>
>>> ---
>>>  fs/nfs/pnfs.c  |   20 +++++++-------------
>>>  fs/nfs/pnfs.h  |    2 +-
>>>  fs/nfs/write.c |   25 ++++++++++++++++++++++++-
>>>  3 files changed, 32 insertions(+), 15 deletions(-)
>>>
>>> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
>>> index e550e88..ad0579a 100644
>>> --- a/fs/nfs/pnfs.c
>>> +++ b/fs/nfs/pnfs.c
>>> @@ -1168,23 +1168,17 @@ EXPORT_SYMBOL_GPL(pnfs_generic_pg_test);
>>>  /*
>>>  * Called by non rpc-based layout drivers
>>>  */
>>> -int
>>> -pnfs_ld_write_done(struct nfs_write_data *data)
>>> +void pnfs_ld_write_done(struct nfs_write_data *data)
>>>  {
>>> -       int status;
>>> -
>>> -       if (!data->pnfs_error) {
>>> +       if (likely(!data->pnfs_error)) {
>>>                pnfs_set_layoutcommit(data);
>>>                data->mds_ops->rpc_call_done(&data->task, data);
>>> -               data->mds_ops->rpc_release(data);
>>> -               return 0;
>>> +       } else {
>>> +               put_lseg(data->lseg);
>>> +               data->lseg = NULL;
>>> +               dprintk("pnfs write error = %d\n", data->pnfs_error);
>>>        }
>>> -
>>> -       dprintk("%s: pnfs_error=%d, retry via MDS\n", __func__,
>>> -               data->pnfs_error);
>>> -       status = nfs_initiate_write(data, NFS_CLIENT(data->inode),
>>> -                                   data->mds_ops, NFS_FILE_SYNC);
>>> -       return status ? : -EAGAIN;
>>> +       data->mds_ops->rpc_release(data);
>>>  }
>>>  EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
>>>
>>> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
>>> index 01cbfd5..0fb0a41 100644
>>> --- a/fs/nfs/pnfs.h
>>> +++ b/fs/nfs/pnfs.h
>>> @@ -200,7 +200,7 @@ void pnfs_set_layoutcommit(struct nfs_write_data *wdata);
>>>  void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data);
>>>  int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
>>>  int _pnfs_return_layout(struct inode *);
>>> -int pnfs_ld_write_done(struct nfs_write_data *);
>>> +void pnfs_ld_write_done(struct nfs_write_data *);
>>>  int pnfs_ld_read_done(struct nfs_read_data *);
>>>  struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
>>>                                               struct nfs_open_context *ctx,
>>> diff --git a/fs/nfs/write.c b/fs/nfs/write.c
>>> index b39b37f..26b8d90 100644
>>> --- a/fs/nfs/write.c
>>> +++ b/fs/nfs/write.c
>>> @@ -1165,7 +1165,13 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
>>>  static void nfs_writeback_release_full(void *calldata)
>>>  {
>>>        struct nfs_write_data   *data = calldata;
>>> -       int status = data->task.tk_status;
>>> +       int ret, status = data->task.tk_status;
>>> +       struct nfs_pageio_descriptor pgio;
>>> +
>>> +       if (data->pnfs_error) {
>>> +               nfs_pageio_init_write_mds(&pgio, data->inode, FLUSH_STABLE);
>>> +               pgio.pg_recoalesce = 1;
>>> +       }
>>>
>>>        /* Update attributes as result of writeback. */
>>>        while (!list_empty(&data->pages)) {
>>> @@ -1181,6 +1187,11 @@ static void nfs_writeback_release_full(void *calldata)
>>>                        req->wb_bytes,
>>>                        (long long)req_offset(req));
>>>
>>> +               if (data->pnfs_error) {
>>> +                       dprintk(", pnfs error = %d\n", data->pnfs_error);
>>> +                       goto next;
>>> +               }
>>> +
>>>                if (status < 0) {
>>>                        nfs_set_pageerror(page);
>>>                        nfs_context_set_write_error(req->wb_context, status);
>>> @@ -1200,7 +1211,19 @@ remove_request:
>>>        next:
>>>                nfs_clear_page_tag_locked(req);
>>>                nfs_end_page_writeback(page);
>>> +               if (data->pnfs_error) {
>>> +                       lock_page(page);
>>> +                       nfs_pageio_cond_complete(&pgio, page->index);
>>> +                       ret = nfs_page_async_flush(&pgio, page, 0);
>>> +                       if (ret) {
>>> +                               nfs_set_pageerror(page);
>> My only concern about the patch is here. If we fail at
>> nfs_page_async_flush(), is it enough to just call nfs_set_pageerror()
>> on the page? Do we need to redirty the page as well, and maybe some
>> other things?
>>
>
> Hum? That's a very good question. nfs_end_page_writeback above does
> an end_page_writeback(), which does test_clear_page_writeback().
> Now in nfs_page_async_flush() you are adding the page back to the nfs writeback
> queue. So the question is when does test_set_page_writeback() is called?
> If it is called as part of the re-issue of write by NFS then you are
> fine because you clear it here but it will be set later on. But if
> it was done on entry into the queue. At the original sight of the call to
> nfs_page_async_flush(). Then it will not be set again and it might confuse
> the VFS since we will be re-clearing a cleared bit.
nfs_set_page_tag_locked() and nfs_set_page_writeback() are called
inside nfs_page_async_flush(), so we need to call
nfs_clear_page_tag_locked() and nfs_end_page_writeback() before
invoking nfs_page_async_flush(). But I am not quite sure about how to
handle nfs_page_async_flush() failure properly here (e.g., OOM). If we
don't re-dirty the page, will it break NFS writeback assumptions?

Thanks,
Tao
>
> So in short we should investigate if nfs_clear_page_tag_locked() &&
> nfs_end_page_writeback() sould only be called at the *else* clause
> of the "if (unlikely(data->pnfs_error))" above.
>
> I'll be testing this next week and we'll see.
>
> Trond do you know?
>
> Boaz
>
>
>> Thanks,
>> Tao
>>> +                               dprintk(", error = %d\n", ret);
>>> +                       }
>>> +                       unlock_page(page);
>>> +               }
>>>        }
>>> +       if (data->pnfs_error)
>>> +               nfs_pageio_complete(&pgio);
>>>        nfs_writedata_release(calldata);
>>>  }
>>>
>>> --
>>> 1.7.1.262.g5ef3d
>>>
>>>
>>
>>
>>
>
>
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Boaz Harrosh Aug. 29, 2011, 9:16 p.m. UTC | #4
On 08/27/2011 03:47 AM, Peng Tao wrote:
> Hi, Boaz,
> 
> On Sat, Aug 27, 2011 at 10:43 AM, Boaz Harrosh <bharrosh@panasas.com> wrote:
>>> My only concern about the patch is here. If we fail at
>>> nfs_page_async_flush(), is it enough to just call nfs_set_pageerror()
>>> on the page? Do we need to redirty the page as well, and maybe some
>>> other things?
>>>
>>
>> Hum? That's a very good question. nfs_end_page_writeback above does
>> an end_page_writeback(), which does test_clear_page_writeback().
>> Now in nfs_page_async_flush() you are adding the page back to the nfs writeback
>> queue. So the question is when does test_set_page_writeback() is called?
>> If it is called as part of the re-issue of write by NFS then you are
>> fine because you clear it here but it will be set later on. But if
>> it was done on entry into the queue. At the original sight of the call to
>> nfs_page_async_flush(). Then it will not be set again and it might confuse
>> the VFS since we will be re-clearing a cleared bit.
>
> nfs_set_page_tag_locked() and nfs_set_page_writeback() are called
> inside nfs_page_async_flush(), so we need to call
> nfs_clear_page_tag_locked() and nfs_end_page_writeback() before
> invoking nfs_page_async_flush(). 

OK grate, so you answered my question, that's good then.

> But I am not quite sure about how to
> handle nfs_page_async_flush() failure properly here (e.g., OOM). If we
> don't re-dirty the page, will it break NFS writeback assumptions?
> 

Hmm, that one is tough. (From the frying-pan into the fire).

Lets add a big FIXME: for now. And later after lots of testing and error
injection we might just check nfs_page_async_flush() return code and
manually re-dirty the page on failure.

> Thanks,
> Tao
>>

Thanks
Boaz
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index e550e88..ad0579a 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1168,23 +1168,17 @@  EXPORT_SYMBOL_GPL(pnfs_generic_pg_test);
 /*
  * Called by non rpc-based layout drivers
  */
-int
-pnfs_ld_write_done(struct nfs_write_data *data)
+void pnfs_ld_write_done(struct nfs_write_data *data)
 {
-	int status;
-
-	if (!data->pnfs_error) {
+	if (likely(!data->pnfs_error)) {
 		pnfs_set_layoutcommit(data);
 		data->mds_ops->rpc_call_done(&data->task, data);
-		data->mds_ops->rpc_release(data);
-		return 0;
+	} else {
+		put_lseg(data->lseg);
+		data->lseg = NULL;
+		dprintk("pnfs write error = %d\n", data->pnfs_error);
 	}
-
-	dprintk("%s: pnfs_error=%d, retry via MDS\n", __func__,
-		data->pnfs_error);
-	status = nfs_initiate_write(data, NFS_CLIENT(data->inode),
-				    data->mds_ops, NFS_FILE_SYNC);
-	return status ? : -EAGAIN;
+	data->mds_ops->rpc_release(data);
 }
 EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
 
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 01cbfd5..0fb0a41 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -200,7 +200,7 @@  void pnfs_set_layoutcommit(struct nfs_write_data *wdata);
 void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data);
 int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
 int _pnfs_return_layout(struct inode *);
-int pnfs_ld_write_done(struct nfs_write_data *);
+void pnfs_ld_write_done(struct nfs_write_data *);
 int pnfs_ld_read_done(struct nfs_read_data *);
 struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
 					       struct nfs_open_context *ctx,
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index b39b37f..26b8d90 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1165,7 +1165,13 @@  static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
 static void nfs_writeback_release_full(void *calldata)
 {
 	struct nfs_write_data	*data = calldata;
-	int status = data->task.tk_status;
+	int ret, status = data->task.tk_status;
+	struct nfs_pageio_descriptor pgio;
+
+	if (data->pnfs_error) {
+		nfs_pageio_init_write_mds(&pgio, data->inode, FLUSH_STABLE);
+		pgio.pg_recoalesce = 1;
+	}
 
 	/* Update attributes as result of writeback. */
 	while (!list_empty(&data->pages)) {
@@ -1181,6 +1187,11 @@  static void nfs_writeback_release_full(void *calldata)
 			req->wb_bytes,
 			(long long)req_offset(req));
 
+		if (data->pnfs_error) {
+			dprintk(", pnfs error = %d\n", data->pnfs_error);
+			goto next;
+		}
+
 		if (status < 0) {
 			nfs_set_pageerror(page);
 			nfs_context_set_write_error(req->wb_context, status);
@@ -1200,7 +1211,19 @@  remove_request:
 	next:
 		nfs_clear_page_tag_locked(req);
 		nfs_end_page_writeback(page);
+		if (data->pnfs_error) {
+			lock_page(page);
+			nfs_pageio_cond_complete(&pgio, page->index);
+			ret = nfs_page_async_flush(&pgio, page, 0);
+			if (ret) {
+				nfs_set_pageerror(page);
+				dprintk(", error = %d\n", ret);
+			}
+			unlock_page(page);
+		}
 	}
+	if (data->pnfs_error)
+		nfs_pageio_complete(&pgio);
 	nfs_writedata_release(calldata);
 }