diff mbox

[v2] NFSv4.1: Fix up replays of interrupted requests

Message ID 20171011170705.45533-1-trond.myklebust@primarydata.com (mailing list archive)
State New, archived
Headers show

Commit Message

Trond Myklebust Oct. 11, 2017, 5:07 p.m. UTC
If the previous request on a slot was interrupted before it was
processed by the server, then our slot sequence number may be out of whack,
and so we try the next operation using the old sequence number.

The problem with this, is that not all servers check to see that the
client is replaying the same operations as previously when they decide
to go to the replay cache, and so instead of the expected error of
NFS4ERR_SEQ_FALSE_RETRY, we get a replay of the old reply, which could
(if the operations match up) be mistaken by the client for a new reply.

To fix this, we attempt to send a COMPOUND containing only the SEQUENCE op
in order to resync our slot sequence number.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4_fs.h  |   2 +-
 fs/nfs/nfs4proc.c | 146 +++++++++++++++++++++++++++++++++++++-----------------
 2 files changed, 101 insertions(+), 47 deletions(-)

Comments

Olga Kornievskaia Oct. 16, 2017, 4:37 p.m. UTC | #1
On Wed, Oct 11, 2017 at 1:07 PM, Trond Myklebust
<trond.myklebust@primarydata.com> wrote:
> If the previous request on a slot was interrupted before it was
> processed by the server, then our slot sequence number may be out of whack,
> and so we try the next operation using the old sequence number.
>
> The problem with this, is that not all servers check to see that the
> client is replaying the same operations as previously when they decide
> to go to the replay cache, and so instead of the expected error of
> NFS4ERR_SEQ_FALSE_RETRY, we get a replay of the old reply, which could
> (if the operations match up) be mistaken by the client for a new reply.
>
> To fix this, we attempt to send a COMPOUND containing only the SEQUENCE op
> in order to resync our slot sequence number.
>
> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
> ---
>  fs/nfs/nfs4_fs.h  |   2 +-
>  fs/nfs/nfs4proc.c | 146 +++++++++++++++++++++++++++++++++++++-----------------
>  2 files changed, 101 insertions(+), 47 deletions(-)
>
> diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
> index ac4f10b7f6c1..b547d935aaf0 100644
> --- a/fs/nfs/nfs4_fs.h
> +++ b/fs/nfs/nfs4_fs.h
> @@ -464,7 +464,7 @@ extern void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid);
>  extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid);
>  extern void nfs_release_seqid(struct nfs_seqid *seqid);
>  extern void nfs_free_seqid(struct nfs_seqid *seqid);
> -extern int nfs4_setup_sequence(const struct nfs_client *client,
> +extern int nfs4_setup_sequence(struct nfs_client *client,
>                                 struct nfs4_sequence_args *args,
>                                 struct nfs4_sequence_res *res,
>                                 struct rpc_task *task);
> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> index f90090e8c959..caa72efe02c9 100644
> --- a/fs/nfs/nfs4proc.c
> +++ b/fs/nfs/nfs4proc.c
> @@ -96,6 +96,10 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
>                             struct nfs_open_context *ctx, struct nfs4_label *ilabel,
>                             struct nfs4_label *olabel);
>  #ifdef CONFIG_NFS_V4_1
> +static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp,
> +               struct rpc_cred *cred,
> +               struct nfs4_slot *slot,
> +               bool is_privileged);
>  static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *,
>                 struct rpc_cred *);
>  static int nfs41_free_stateid(struct nfs_server *, const nfs4_stateid *,
> @@ -644,13 +648,14 @@ static int nfs40_sequence_done(struct rpc_task *task,
>
>  #if defined(CONFIG_NFS_V4_1)
>
> -static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
> +static void nfs41_release_slot(struct nfs4_slot *slot)
>  {
>         struct nfs4_session *session;
>         struct nfs4_slot_table *tbl;
> -       struct nfs4_slot *slot = res->sr_slot;
>         bool send_new_highest_used_slotid = false;
>
> +       if (!slot)
> +               return;
>         tbl = slot->table;
>         session = tbl->session;
>
> @@ -676,13 +681,18 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
>                 send_new_highest_used_slotid = false;
>  out_unlock:
>         spin_unlock(&tbl->slot_tbl_lock);
> -       res->sr_slot = NULL;
>         if (send_new_highest_used_slotid)
>                 nfs41_notify_server(session->clp);
>         if (waitqueue_active(&tbl->slot_waitq))
>                 wake_up_all(&tbl->slot_waitq);
>  }
>
> +static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
> +{
> +       nfs41_release_slot(res->sr_slot);
> +       res->sr_slot = NULL;
> +}
> +
>  static int nfs41_sequence_process(struct rpc_task *task,
>                 struct nfs4_sequence_res *res)
>  {
> @@ -710,13 +720,6 @@ static int nfs41_sequence_process(struct rpc_task *task,
>         /* Check the SEQUENCE operation status */
>         switch (res->sr_status) {
>         case 0:
> -               /* If previous op on slot was interrupted and we reused
> -                * the seq# and got a reply from the cache, then retry
> -                */
> -               if (task->tk_status == -EREMOTEIO && interrupted) {
> -                       ++slot->seq_nr;
> -                       goto retry_nowait;
> -               }
>                 /* Update the slot's sequence and clientid lease timer */
>                 slot->seq_done = 1;
>                 clp = session->clp;
> @@ -750,16 +753,16 @@ static int nfs41_sequence_process(struct rpc_task *task,
>                  * The slot id we used was probably retired. Try again
>                  * using a different slot id.
>                  */
> +               if (slot->seq_nr < slot->table->target_highest_slotid)
> +                       goto session_recover;
>                 goto retry_nowait;
>         case -NFS4ERR_SEQ_MISORDERED:
>                 /*
>                  * Was the last operation on this sequence interrupted?
>                  * If so, retry after bumping the sequence number.
>                  */
> -               if (interrupted) {
> -                       ++slot->seq_nr;
> -                       goto retry_nowait;
> -               }
> +               if (interrupted)
> +                       goto retry_new_seq;
>                 /*
>                  * Could this slot have been previously retired?
>                  * If so, then the server may be expecting seq_nr = 1!
> @@ -768,10 +771,11 @@ static int nfs41_sequence_process(struct rpc_task *task,
>                         slot->seq_nr = 1;
>                         goto retry_nowait;
>                 }
> -               break;
> +               goto session_recover;
>         case -NFS4ERR_SEQ_FALSE_RETRY:
> -               ++slot->seq_nr;
> -               goto retry_nowait;
> +               if (interrupted)
> +                       goto retry_new_seq;
> +               goto session_recover;
>         default:
>                 /* Just update the slot sequence no. */
>                 slot->seq_done = 1;
> @@ -781,6 +785,11 @@ static int nfs41_sequence_process(struct rpc_task *task,
>         dprintk("%s: Error %d free the slot \n", __func__, res->sr_status);
>  out_noaction:
>         return ret;
> +session_recover:
> +       nfs4_schedule_session_recovery(session, res->sr_status);
> +       goto retry_nowait;
> +retry_new_seq:
> +       ++slot->seq_nr;
>  retry_nowait:
>         if (rpc_restart_call_prepare(task)) {
>                 nfs41_sequence_free_slot(res);
> @@ -857,6 +866,17 @@ static const struct rpc_call_ops nfs41_call_sync_ops = {
>         .rpc_call_done = nfs41_call_sync_done,
>  };
>
> +static void
> +nfs4_sequence_process_interrupted(struct nfs_client *client,
> +               struct nfs4_slot *slot, struct rpc_cred *cred)
> +{
> +       struct rpc_task *task;
> +
> +       task = _nfs41_proc_sequence(client, cred, slot, true);
> +       if (!IS_ERR(task))
> +               rpc_put_task_async(task);
> +}
> +
>  #else  /* !CONFIG_NFS_V4_1 */
>
>  static int nfs4_sequence_process(struct rpc_task *task, struct nfs4_sequence_res *res)
> @@ -877,9 +897,32 @@ int nfs4_sequence_done(struct rpc_task *task,
>  }
>  EXPORT_SYMBOL_GPL(nfs4_sequence_done);
>
> +static void
> +nfs4_sequence_process_interrupted(struct nfs_client *client,
> +               struct nfs4_slot *slot, struct rpc_cred *cred)
> +{
> +       WARN_ON_ONCE(1);
> +       slot->interrupted = 0;
> +}
> +
>  #endif /* !CONFIG_NFS_V4_1 */
>
> -int nfs4_setup_sequence(const struct nfs_client *client,
> +static
> +void nfs4_sequence_attach_slot(struct nfs4_sequence_args *args,
> +               struct nfs4_sequence_res *res,
> +               struct nfs4_slot *slot)
> +{
> +       slot->privileged = args->sa_privileged ? 1 : 0;
> +       args->sa_slot = slot;
> +
> +       res->sr_slot = slot;
> +       res->sr_timestamp = jiffies;
> +       res->sr_status_flags = 0;
> +       res->sr_status = 1;
> +
> +}
> +
> +int nfs4_setup_sequence(struct nfs_client *client,
>                         struct nfs4_sequence_args *args,
>                         struct nfs4_sequence_res *res,
>                         struct rpc_task *task)
> @@ -897,29 +940,28 @@ int nfs4_setup_sequence(const struct nfs_client *client,
>                 task->tk_timeout = 0;
>         }
>
> -       spin_lock(&tbl->slot_tbl_lock);
> -       /* The state manager will wait until the slot table is empty */
> -       if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged)
> -               goto out_sleep;
> +       for (;;) {
> +               spin_lock(&tbl->slot_tbl_lock);
> +               /* The state manager will wait until the slot table is empty */
> +               if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged)
> +                       goto out_sleep;
> +
> +               slot = nfs4_alloc_slot(tbl);
> +               if (IS_ERR(slot)) {
> +                       /* Try again in 1/4 second */
> +                       if (slot == ERR_PTR(-ENOMEM))
> +                               task->tk_timeout = HZ >> 2;
> +                       goto out_sleep;
> +               }
> +               spin_unlock(&tbl->slot_tbl_lock);
>
> -       slot = nfs4_alloc_slot(tbl);
> -       if (IS_ERR(slot)) {
> -               /* Try again in 1/4 second */
> -               if (slot == ERR_PTR(-ENOMEM))
> -                       task->tk_timeout = HZ >> 2;
> -               goto out_sleep;
> +               if (likely(!slot->interrupted))
> +                       break;
> +               nfs4_sequence_process_interrupted(client,
> +                               slot, task->tk_msg.rpc_cred);
>         }
> -       spin_unlock(&tbl->slot_tbl_lock);
>
> -       slot->privileged = args->sa_privileged ? 1 : 0;
> -       args->sa_slot = slot;
> -
> -       res->sr_slot = slot;
> -       if (session) {
> -               res->sr_timestamp = jiffies;
> -               res->sr_status_flags = 0;
> -               res->sr_status = 1;
> -       }
> +       nfs4_sequence_attach_slot(args, res, slot);
>
>         trace_nfs4_setup_sequence(session, args);
>  out_start:
> @@ -8135,6 +8177,7 @@ static const struct rpc_call_ops nfs41_sequence_ops = {
>
>  static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp,
>                 struct rpc_cred *cred,
> +               struct nfs4_slot *slot,
>                 bool is_privileged)
>  {
>         struct nfs4_sequence_data *calldata;
> @@ -8148,15 +8191,18 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp,
>                 .callback_ops = &nfs41_sequence_ops,
>                 .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT,
>         };
> +       struct rpc_task *ret;
>
> +       ret = ERR_PTR(-EIO);
>         if (!atomic_inc_not_zero(&clp->cl_count))
> -               return ERR_PTR(-EIO);
> +               goto out_err;
> +
> +       ret = ERR_PTR(-ENOMEM);
>         calldata = kzalloc(sizeof(*calldata), GFP_NOFS);
> -       if (calldata == NULL) {
> -               nfs_put_client(clp);
> -               return ERR_PTR(-ENOMEM);
> -       }
> +       if (calldata == NULL)
> +               goto out_put_clp;
>         nfs4_init_sequence(&calldata->args, &calldata->res, 0);
> +       nfs4_sequence_attach_slot(&calldata->args, &calldata->res, slot);
>         if (is_privileged)
>                 nfs4_set_sequence_privileged(&calldata->args);
>         msg.rpc_argp = &calldata->args;
> @@ -8164,7 +8210,15 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp,
>         calldata->clp = clp;
>         task_setup_data.callback_data = calldata;
>
> -       return rpc_run_task(&task_setup_data);
> +       ret = rpc_run_task(&task_setup_data);
> +       if (IS_ERR(ret))
> +               goto out_err;
> +       return ret;
> +out_put_clp:
> +       nfs_put_client(clp);
> +out_err:
> +       nfs41_release_slot(slot);
> +       return ret;
>  }
>
>  static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cred, unsigned renew_flags)
> @@ -8174,7 +8228,7 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cr
>
>         if ((renew_flags & NFS4_RENEW_TIMEOUT) == 0)
>                 return -EAGAIN;
> -       task = _nfs41_proc_sequence(clp, cred, false);
> +       task = _nfs41_proc_sequence(clp, cred, NULL, false);
>         if (IS_ERR(task))
>                 ret = PTR_ERR(task);
>         else
> @@ -8188,7 +8242,7 @@ static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
>         struct rpc_task *task;
>         int ret;
>
> -       task = _nfs41_proc_sequence(clp, cred, true);
> +       task = _nfs41_proc_sequence(clp, cred, NULL, true);
>         if (IS_ERR(task)) {
>                 ret = PTR_ERR(task);
>                 goto out;
> --

Hi Trond,

I get the following oops with this patch and triggering ctrl-c

[  177.057878] BUG: unable to handle kernel NULL pointer dereference
at 0000000000000020^M
[  177.062500] IP: _nfs41_proc_sequence+0xdd/0x1a0 [nfsv4]^M
[  177.064119] PGD 0 P4D 0 ^M
[  177.064896] Oops: 0002 [#1] SMP^M
[  177.065765] Modules linked in: nfsv4 dns_resolver nfs rfcomm fuse
xt_CHECKSUM ipt_MASQUERADE nf_nat_masquerade_ipv4 tun ip6t_rpfilter
ipt_REJECT nf_reject_ipv4 ip6t_REJECT nf_reject_ipv6 xt_conntrack
ip_set nfnetlink ebtable_nat ebtable_broute bridge stp llc
ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6
ip6table_mangle ip6table_security ip6table_raw iptable_nat
nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack
libcrc32c iptable_mangle iptable_security iptable_raw ebtable_filter
ebtables ip6table_filter ip6_tables iptable_filter
vmw_vsock_vmci_transport vsock bnep dm_mirror dm_region_hash dm_log
dm_mod snd_seq_midi snd_seq_midi_event coretemp crct10dif_pclmul
crc32_pclmul ghash_clmulni_intel pcbc snd_ens1371 snd_ac97_codec
aesni_intel ac97_bus crypto_simd snd_seq cryptd^M
[  177.083060]  glue_helper snd_pcm uvcvideo ppdev videobuf2_vmalloc
vmw_balloon videobuf2_memops videobuf2_v4l2 videobuf2_core btusb btrtl
btbcm pcspkr btintel videodev bluetooth snd_rawmidi snd_timer
snd_seq_device rfkill nfit sg snd ecdh_generic soundcore i2c_piix4
libnvdimm shpchp vmw_vmci parport_pc parport nfsd auth_rpcgss nfs_acl
lockd grace sunrpc ip_tables ext4 mbcache jbd2 sr_mod cdrom
ata_generic sd_mod pata_acpi vmwgfx drm_kms_helper syscopyarea
sysfillrect sysimgblt fb_sys_fops ttm ahci drm mptspi
scsi_transport_spi mptscsih mptbase crc32c_intel libahci ata_piix
libata serio_raw e1000 i2c_core^M
[  177.094284] CPU: 3 PID: 57 Comm: kworker/3:1 Not tainted 4.14.0-rc2+ #40^M
[  177.095712] Hardware name: VMware, Inc. VMware Virtual
Platform/440BX Desktop Reference Platform, BIOS 6.00 07/02/2015^M
[  177.097932] Workqueue: events nfs4_renew_state [nfsv4]^M
[  177.099013] task: ffff8800718aae80 task.stack: ffffc90000aa8000^M
[  177.100240] RIP: 0010:_nfs41_proc_sequence+0xdd/0x1a0 [nfsv4]^M
[  177.101428] RSP: 0018:ffffc90000aabd68 EFLAGS: 00010246^M
[  177.102577] RAX: ffff8800748e8340 RBX: ffff88003f5bd000 RCX:
0000000000000000^M
[  177.104042] RDX: 00000000fffdf000 RSI: 0000000000000000 RDI:
ffff8800748e8380^M
[  177.105496] RBP: ffffc90000aabdf8 R08: 000000000001ee00 R09:
ffff8800748e8340^M
[  177.106944] R10: ffff8800748e8340 R11: 00000000000002bd R12:
ffffc90000aabd90^M
[  177.108510] R13: 0000000000000000 R14: 0000000000000000 R15:
ffffffffa086b4d0^M
[  177.109938] FS:  0000000000000000(0000) GS:ffff88007b6c0000(0000)
knlGS:0000000000000000^M
[  177.111644] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033^M
[  177.113498] CR2: 0000000000000020 CR3: 0000000072ba6001 CR4:
00000000001606e0^M
[  177.115938] Call Trace:^M
[  177.116701]  nfs41_proc_async_sequence+0x1d/0x60 [nfsv4]^M
[  177.118266]  nfs4_renew_state+0x10b/0x1a0 [nfsv4]^M
[  177.119628]  process_one_work+0x149/0x360^M
[  177.120684]  worker_thread+0x4d/0x3c0^M
[  177.121651]  kthread+0x109/0x140^M
[  177.122505]  ? rescuer_thread+0x380/0x380^M
[  177.123461]  ? kthread_park+0x60/0x60^M
[  177.124338]  ret_from_fork+0x25/0x30^M
[  177.125173] Code: e0 48 85 c0 0f 84 8e 00 00 00 0f b6 50 10 48 c7
40 08 00 00 00 00 48 c7 40 18 00 00 00 00 83 e2 fc 88 50 10 48 8b 15
93 0c 3d e1 <41> 80 66 20 fd 45 84 ed 4c 89 70 08 4c 89 70 18 c7 40 2c
00 00 ^M
[  177.129700] RIP: _nfs41_proc_sequence+0xdd/0x1a0 [nfsv4] RSP:
ffffc90000aabd68^M
[  177.131830] CR2: 0000000000000020^M
[  177.132779] ---[ end trace 221b5aa4b7a47014 ]---^M

> 2.13.6
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Olga Kornievskaia Oct. 16, 2017, 5:07 p.m. UTC | #2
On Mon, Oct 16, 2017 at 12:37 PM, Olga Kornievskaia <aglo@umich.edu> wrote:
> On Wed, Oct 11, 2017 at 1:07 PM, Trond Myklebust
> <trond.myklebust@primarydata.com> wrote:
>> If the previous request on a slot was interrupted before it was
>> processed by the server, then our slot sequence number may be out of whack,
>> and so we try the next operation using the old sequence number.
>>
>> The problem with this, is that not all servers check to see that the
>> client is replaying the same operations as previously when they decide
>> to go to the replay cache, and so instead of the expected error of
>> NFS4ERR_SEQ_FALSE_RETRY, we get a replay of the old reply, which could
>> (if the operations match up) be mistaken by the client for a new reply.
>>
>> To fix this, we attempt to send a COMPOUND containing only the SEQUENCE op
>> in order to resync our slot sequence number.
>>
>> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
>> ---
>>  fs/nfs/nfs4_fs.h  |   2 +-
>>  fs/nfs/nfs4proc.c | 146 +++++++++++++++++++++++++++++++++++++-----------------
>>  2 files changed, 101 insertions(+), 47 deletions(-)
>>
>> diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
>> index ac4f10b7f6c1..b547d935aaf0 100644
>> --- a/fs/nfs/nfs4_fs.h
>> +++ b/fs/nfs/nfs4_fs.h
>> @@ -464,7 +464,7 @@ extern void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid);
>>  extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid);
>>  extern void nfs_release_seqid(struct nfs_seqid *seqid);
>>  extern void nfs_free_seqid(struct nfs_seqid *seqid);
>> -extern int nfs4_setup_sequence(const struct nfs_client *client,
>> +extern int nfs4_setup_sequence(struct nfs_client *client,
>>                                 struct nfs4_sequence_args *args,
>>                                 struct nfs4_sequence_res *res,
>>                                 struct rpc_task *task);
>> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
>> index f90090e8c959..caa72efe02c9 100644
>> --- a/fs/nfs/nfs4proc.c
>> +++ b/fs/nfs/nfs4proc.c
>> @@ -96,6 +96,10 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
>>                             struct nfs_open_context *ctx, struct nfs4_label *ilabel,
>>                             struct nfs4_label *olabel);
>>  #ifdef CONFIG_NFS_V4_1
>> +static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp,
>> +               struct rpc_cred *cred,
>> +               struct nfs4_slot *slot,
>> +               bool is_privileged);
>>  static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *,
>>                 struct rpc_cred *);
>>  static int nfs41_free_stateid(struct nfs_server *, const nfs4_stateid *,
>> @@ -644,13 +648,14 @@ static int nfs40_sequence_done(struct rpc_task *task,
>>
>>  #if defined(CONFIG_NFS_V4_1)
>>
>> -static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
>> +static void nfs41_release_slot(struct nfs4_slot *slot)
>>  {
>>         struct nfs4_session *session;
>>         struct nfs4_slot_table *tbl;
>> -       struct nfs4_slot *slot = res->sr_slot;
>>         bool send_new_highest_used_slotid = false;
>>
>> +       if (!slot)
>> +               return;
>>         tbl = slot->table;
>>         session = tbl->session;
>>
>> @@ -676,13 +681,18 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
>>                 send_new_highest_used_slotid = false;
>>  out_unlock:
>>         spin_unlock(&tbl->slot_tbl_lock);
>> -       res->sr_slot = NULL;
>>         if (send_new_highest_used_slotid)
>>                 nfs41_notify_server(session->clp);
>>         if (waitqueue_active(&tbl->slot_waitq))
>>                 wake_up_all(&tbl->slot_waitq);
>>  }
>>
>> +static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
>> +{
>> +       nfs41_release_slot(res->sr_slot);
>> +       res->sr_slot = NULL;
>> +}
>> +
>>  static int nfs41_sequence_process(struct rpc_task *task,
>>                 struct nfs4_sequence_res *res)
>>  {
>> @@ -710,13 +720,6 @@ static int nfs41_sequence_process(struct rpc_task *task,
>>         /* Check the SEQUENCE operation status */
>>         switch (res->sr_status) {
>>         case 0:
>> -               /* If previous op on slot was interrupted and we reused
>> -                * the seq# and got a reply from the cache, then retry
>> -                */
>> -               if (task->tk_status == -EREMOTEIO && interrupted) {
>> -                       ++slot->seq_nr;
>> -                       goto retry_nowait;
>> -               }
>>                 /* Update the slot's sequence and clientid lease timer */
>>                 slot->seq_done = 1;
>>                 clp = session->clp;
>> @@ -750,16 +753,16 @@ static int nfs41_sequence_process(struct rpc_task *task,
>>                  * The slot id we used was probably retired. Try again
>>                  * using a different slot id.
>>                  */
>> +               if (slot->seq_nr < slot->table->target_highest_slotid)
>> +                       goto session_recover;
>>                 goto retry_nowait;
>>         case -NFS4ERR_SEQ_MISORDERED:
>>                 /*
>>                  * Was the last operation on this sequence interrupted?
>>                  * If so, retry after bumping the sequence number.
>>                  */
>> -               if (interrupted) {
>> -                       ++slot->seq_nr;
>> -                       goto retry_nowait;
>> -               }
>> +               if (interrupted)
>> +                       goto retry_new_seq;
>>                 /*
>>                  * Could this slot have been previously retired?
>>                  * If so, then the server may be expecting seq_nr = 1!
>> @@ -768,10 +771,11 @@ static int nfs41_sequence_process(struct rpc_task *task,
>>                         slot->seq_nr = 1;
>>                         goto retry_nowait;
>>                 }
>> -               break;
>> +               goto session_recover;
>>         case -NFS4ERR_SEQ_FALSE_RETRY:
>> -               ++slot->seq_nr;
>> -               goto retry_nowait;
>> +               if (interrupted)
>> +                       goto retry_new_seq;
>> +               goto session_recover;
>>         default:
>>                 /* Just update the slot sequence no. */
>>                 slot->seq_done = 1;
>> @@ -781,6 +785,11 @@ static int nfs41_sequence_process(struct rpc_task *task,
>>         dprintk("%s: Error %d free the slot \n", __func__, res->sr_status);
>>  out_noaction:
>>         return ret;
>> +session_recover:
>> +       nfs4_schedule_session_recovery(session, res->sr_status);
>> +       goto retry_nowait;
>> +retry_new_seq:
>> +       ++slot->seq_nr;
>>  retry_nowait:
>>         if (rpc_restart_call_prepare(task)) {
>>                 nfs41_sequence_free_slot(res);
>> @@ -857,6 +866,17 @@ static const struct rpc_call_ops nfs41_call_sync_ops = {
>>         .rpc_call_done = nfs41_call_sync_done,
>>  };
>>
>> +static void
>> +nfs4_sequence_process_interrupted(struct nfs_client *client,
>> +               struct nfs4_slot *slot, struct rpc_cred *cred)
>> +{
>> +       struct rpc_task *task;
>> +
>> +       task = _nfs41_proc_sequence(client, cred, slot, true);
>> +       if (!IS_ERR(task))
>> +               rpc_put_task_async(task);
>> +}
>> +
>>  #else  /* !CONFIG_NFS_V4_1 */
>>
>>  static int nfs4_sequence_process(struct rpc_task *task, struct nfs4_sequence_res *res)
>> @@ -877,9 +897,32 @@ int nfs4_sequence_done(struct rpc_task *task,
>>  }
>>  EXPORT_SYMBOL_GPL(nfs4_sequence_done);
>>
>> +static void
>> +nfs4_sequence_process_interrupted(struct nfs_client *client,
>> +               struct nfs4_slot *slot, struct rpc_cred *cred)
>> +{
>> +       WARN_ON_ONCE(1);
>> +       slot->interrupted = 0;
>> +}
>> +
>>  #endif /* !CONFIG_NFS_V4_1 */
>>
>> -int nfs4_setup_sequence(const struct nfs_client *client,
>> +static
>> +void nfs4_sequence_attach_slot(struct nfs4_sequence_args *args,
>> +               struct nfs4_sequence_res *res,
>> +               struct nfs4_slot *slot)
>> +{
>> +       slot->privileged = args->sa_privileged ? 1 : 0;
>> +       args->sa_slot = slot;
>> +
>> +       res->sr_slot = slot;
>> +       res->sr_timestamp = jiffies;
>> +       res->sr_status_flags = 0;
>> +       res->sr_status = 1;
>> +
>> +}
>> +
>> +int nfs4_setup_sequence(struct nfs_client *client,
>>                         struct nfs4_sequence_args *args,
>>                         struct nfs4_sequence_res *res,
>>                         struct rpc_task *task)
>> @@ -897,29 +940,28 @@ int nfs4_setup_sequence(const struct nfs_client *client,
>>                 task->tk_timeout = 0;
>>         }
>>
>> -       spin_lock(&tbl->slot_tbl_lock);
>> -       /* The state manager will wait until the slot table is empty */
>> -       if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged)
>> -               goto out_sleep;
>> +       for (;;) {
>> +               spin_lock(&tbl->slot_tbl_lock);
>> +               /* The state manager will wait until the slot table is empty */
>> +               if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged)
>> +                       goto out_sleep;
>> +
>> +               slot = nfs4_alloc_slot(tbl);
>> +               if (IS_ERR(slot)) {
>> +                       /* Try again in 1/4 second */
>> +                       if (slot == ERR_PTR(-ENOMEM))
>> +                               task->tk_timeout = HZ >> 2;
>> +                       goto out_sleep;
>> +               }
>> +               spin_unlock(&tbl->slot_tbl_lock);
>>
>> -       slot = nfs4_alloc_slot(tbl);
>> -       if (IS_ERR(slot)) {
>> -               /* Try again in 1/4 second */
>> -               if (slot == ERR_PTR(-ENOMEM))
>> -                       task->tk_timeout = HZ >> 2;
>> -               goto out_sleep;
>> +               if (likely(!slot->interrupted))
>> +                       break;
>> +               nfs4_sequence_process_interrupted(client,
>> +                               slot, task->tk_msg.rpc_cred);
>>         }
>> -       spin_unlock(&tbl->slot_tbl_lock);
>>
>> -       slot->privileged = args->sa_privileged ? 1 : 0;
>> -       args->sa_slot = slot;
>> -
>> -       res->sr_slot = slot;
>> -       if (session) {
>> -               res->sr_timestamp = jiffies;
>> -               res->sr_status_flags = 0;
>> -               res->sr_status = 1;
>> -       }
>> +       nfs4_sequence_attach_slot(args, res, slot);
>>
>>         trace_nfs4_setup_sequence(session, args);
>>  out_start:
>> @@ -8135,6 +8177,7 @@ static const struct rpc_call_ops nfs41_sequence_ops = {
>>
>>  static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp,
>>                 struct rpc_cred *cred,
>> +               struct nfs4_slot *slot,
>>                 bool is_privileged)
>>  {
>>         struct nfs4_sequence_data *calldata;
>> @@ -8148,15 +8191,18 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp,
>>                 .callback_ops = &nfs41_sequence_ops,
>>                 .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT,
>>         };
>> +       struct rpc_task *ret;
>>
>> +       ret = ERR_PTR(-EIO);
>>         if (!atomic_inc_not_zero(&clp->cl_count))
>> -               return ERR_PTR(-EIO);
>> +               goto out_err;
>> +
>> +       ret = ERR_PTR(-ENOMEM);
>>         calldata = kzalloc(sizeof(*calldata), GFP_NOFS);
>> -       if (calldata == NULL) {
>> -               nfs_put_client(clp);
>> -               return ERR_PTR(-ENOMEM);
>> -       }
>> +       if (calldata == NULL)
>> +               goto out_put_clp;
>>         nfs4_init_sequence(&calldata->args, &calldata->res, 0);
>> +       nfs4_sequence_attach_slot(&calldata->args, &calldata->res, slot);
>>         if (is_privileged)
>>                 nfs4_set_sequence_privileged(&calldata->args);
>>         msg.rpc_argp = &calldata->args;
>> @@ -8164,7 +8210,15 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp,
>>         calldata->clp = clp;
>>         task_setup_data.callback_data = calldata;
>>
>> -       return rpc_run_task(&task_setup_data);
>> +       ret = rpc_run_task(&task_setup_data);
>> +       if (IS_ERR(ret))
>> +               goto out_err;
>> +       return ret;
>> +out_put_clp:
>> +       nfs_put_client(clp);
>> +out_err:
>> +       nfs41_release_slot(slot);
>> +       return ret;
>>  }
>>
>>  static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cred, unsigned renew_flags)
>> @@ -8174,7 +8228,7 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cr
>>
>>         if ((renew_flags & NFS4_RENEW_TIMEOUT) == 0)
>>                 return -EAGAIN;
>> -       task = _nfs41_proc_sequence(clp, cred, false);
>> +       task = _nfs41_proc_sequence(clp, cred, NULL, false);
>>         if (IS_ERR(task))
>>                 ret = PTR_ERR(task);
>>         else
>> @@ -8188,7 +8242,7 @@ static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
>>         struct rpc_task *task;
>>         int ret;
>>
>> -       task = _nfs41_proc_sequence(clp, cred, true);
>> +       task = _nfs41_proc_sequence(clp, cred, NULL, true);
>>         if (IS_ERR(task)) {
>>                 ret = PTR_ERR(task);
>>                 goto out;
>> --
>
> Hi Trond,
>
> I get the following oops with this patch and triggering ctrl-c
>
> [  177.057878] BUG: unable to handle kernel NULL pointer dereference
> at 0000000000000020^M
> [  177.062500] IP: _nfs41_proc_sequence+0xdd/0x1a0 [nfsv4]^M
> [  177.064119] PGD 0 P4D 0 ^M
> [  177.064896] Oops: 0002 [#1] SMP^M
> [  177.065765] Modules linked in: nfsv4 dns_resolver nfs rfcomm fuse
> xt_CHECKSUM ipt_MASQUERADE nf_nat_masquerade_ipv4 tun ip6t_rpfilter
> ipt_REJECT nf_reject_ipv4 ip6t_REJECT nf_reject_ipv6 xt_conntrack
> ip_set nfnetlink ebtable_nat ebtable_broute bridge stp llc
> ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6
> ip6table_mangle ip6table_security ip6table_raw iptable_nat
> nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack
> libcrc32c iptable_mangle iptable_security iptable_raw ebtable_filter
> ebtables ip6table_filter ip6_tables iptable_filter
> vmw_vsock_vmci_transport vsock bnep dm_mirror dm_region_hash dm_log
> dm_mod snd_seq_midi snd_seq_midi_event coretemp crct10dif_pclmul
> crc32_pclmul ghash_clmulni_intel pcbc snd_ens1371 snd_ac97_codec
> aesni_intel ac97_bus crypto_simd snd_seq cryptd^M
> [  177.083060]  glue_helper snd_pcm uvcvideo ppdev videobuf2_vmalloc
> vmw_balloon videobuf2_memops videobuf2_v4l2 videobuf2_core btusb btrtl
> btbcm pcspkr btintel videodev bluetooth snd_rawmidi snd_timer
> snd_seq_device rfkill nfit sg snd ecdh_generic soundcore i2c_piix4
> libnvdimm shpchp vmw_vmci parport_pc parport nfsd auth_rpcgss nfs_acl
> lockd grace sunrpc ip_tables ext4 mbcache jbd2 sr_mod cdrom
> ata_generic sd_mod pata_acpi vmwgfx drm_kms_helper syscopyarea
> sysfillrect sysimgblt fb_sys_fops ttm ahci drm mptspi
> scsi_transport_spi mptscsih mptbase crc32c_intel libahci ata_piix
> libata serio_raw e1000 i2c_core^M
> [  177.094284] CPU: 3 PID: 57 Comm: kworker/3:1 Not tainted 4.14.0-rc2+ #40^M
> [  177.095712] Hardware name: VMware, Inc. VMware Virtual
> Platform/440BX Desktop Reference Platform, BIOS 6.00 07/02/2015^M
> [  177.097932] Workqueue: events nfs4_renew_state [nfsv4]^M
> [  177.099013] task: ffff8800718aae80 task.stack: ffffc90000aa8000^M
> [  177.100240] RIP: 0010:_nfs41_proc_sequence+0xdd/0x1a0 [nfsv4]^M
> [  177.101428] RSP: 0018:ffffc90000aabd68 EFLAGS: 00010246^M
> [  177.102577] RAX: ffff8800748e8340 RBX: ffff88003f5bd000 RCX:
> 0000000000000000^M
> [  177.104042] RDX: 00000000fffdf000 RSI: 0000000000000000 RDI:
> ffff8800748e8380^M
> [  177.105496] RBP: ffffc90000aabdf8 R08: 000000000001ee00 R09:
> ffff8800748e8340^M
> [  177.106944] R10: ffff8800748e8340 R11: 00000000000002bd R12:
> ffffc90000aabd90^M
> [  177.108510] R13: 0000000000000000 R14: 0000000000000000 R15:
> ffffffffa086b4d0^M
> [  177.109938] FS:  0000000000000000(0000) GS:ffff88007b6c0000(0000)
> knlGS:0000000000000000^M
> [  177.111644] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033^M
> [  177.113498] CR2: 0000000000000020 CR3: 0000000072ba6001 CR4:
> 00000000001606e0^M
> [  177.115938] Call Trace:^M
> [  177.116701]  nfs41_proc_async_sequence+0x1d/0x60 [nfsv4]^M
> [  177.118266]  nfs4_renew_state+0x10b/0x1a0 [nfsv4]^M
> [  177.119628]  process_one_work+0x149/0x360^M
> [  177.120684]  worker_thread+0x4d/0x3c0^M
> [  177.121651]  kthread+0x109/0x140^M
> [  177.122505]  ? rescuer_thread+0x380/0x380^M
> [  177.123461]  ? kthread_park+0x60/0x60^M
> [  177.124338]  ret_from_fork+0x25/0x30^M
> [  177.125173] Code: e0 48 85 c0 0f 84 8e 00 00 00 0f b6 50 10 48 c7
> 40 08 00 00 00 00 48 c7 40 18 00 00 00 00 83 e2 fc 88 50 10 48 8b 15
> 93 0c 3d e1 <41> 80 66 20 fd 45 84 ed 4c 89 70 08 4c 89 70 18 c7 40 2c
> 00 00 ^M
> [  177.129700] RIP: _nfs41_proc_sequence+0xdd/0x1a0 [nfsv4] RSP:
> ffffc90000aabd68^M
> [  177.131830] CR2: 0000000000000020^M
> [  177.132779] ---[ end trace 221b5aa4b7a47014 ]---^M
>
>> 2.13.6

Sorry I posted the rc2 oops not rc5 but it's the same.

Network trace reveals that server is not working properly (thus
getting Bruce's attention here).

Skipping ahead, the server replies to a SEQUENCE call with a reply
that has a count=5 operations but only has a sequence in it.

The flow of steps is the following.

Client sends
call COPY seq=16 slot=0 highslot=1(application at this point receives
a ctrl-c so it'll go ahead and close 2files it has opened)
call CLOSE  seq=1 slot=1 highslot=1
call SEQUENCE seq=16 slot=0 highslot=1
reply CLOSE OK
reply SEQUENCE ERR_DELAY
another call CLOSE seq=2 slot=1 and successful reply
reply COPY ..
call SEQUENCE seq=16 slot=0 highslot=0
reply SEQUENCE opcount=5

So I'm assuming server is replying from the reply cache for the COPY
seq=16 slot=0.. but it's only sending part of it back? Is that legit?

In any case, I think the client shouldn't be oops-ing.

[  138.136387] BUG: unable to handle kernel NULL pointer dereference
at 0000000000000020^M
[  138.140134] IP: _nfs41_proc_sequence+0xdd/0x1a0 [nfsv4]^M
[  138.141687] PGD 0 P4D 0 ^M
[  138.142462] Oops: 0002 [#1] SMP^M
[  138.143413] Modules linked in: nfsv4 dns_resolver nfs rfcomm fuse
xt_CHECKSUM ipt_MASQUERADE nf_nat_masquerade_ipv4 tun ip6t_rpfilter
ipt_REJECT nf_reject_ipv4 ip6t_REJECT nf_reject_ipv6 xt_conntrack
ip_set nfnetlink ebtable_nat ebtable_broute bridge stp llc
ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6
ip6table_mangle ip6table_security ip6table_raw iptable_nat
nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack
libcrc32c iptable_mangle iptable_security iptable_raw ebtable_filter
ebtables ip6table_filter ip6_tables iptable_filter
vmw_vsock_vmci_transport vsock bnep dm_mirror dm_region_hash dm_log
dm_mod snd_seq_midi snd_seq_midi_event coretemp crct10dif_pclmul
crc32_pclmul ghash_clmulni_intel pcbc uvcvideo snd_ens1371
snd_ac97_codec ac97_bus snd_seq ppdev videobuf2_vmalloc^M
[  138.158839]  btusb videobuf2_memops videobuf2_v4l2 videobuf2_core
aesni_intel btrtl nfit btbcm crypto_simd cryptd videodev snd_pcm
btintel glue_helper vmw_balloon libnvdimm bluetooth snd_rawmidi
snd_timer pcspkr snd_seq_device snd shpchp rfkill vmw_vmci sg
ecdh_generic soundcore i2c_piix4 parport_pc parport nfsd auth_rpcgss
nfs_acl lockd grace sunrpc ip_tables ext4 mbcache jbd2 sr_mod cdrom
sd_mod ata_generic pata_acpi vmwgfx drm_kms_helper syscopyarea
sysfillrect sysimgblt fb_sys_fops ttm drm ahci libahci crc32c_intel
ata_piix mptspi scsi_transport_spi serio_raw libata mptscsih e1000
mptbase i2c_core^M
[  138.169453] CPU: 3 PID: 541 Comm: kworker/3:3 Not tainted 4.14.0-rc5+ #41^M
[  138.170829] Hardware name: VMware, Inc. VMware Virtual
Platform/440BX Desktop Reference Platform, BIOS 6.00 07/02/2015^M
[  138.172960] Workqueue: events nfs4_renew_state [nfsv4]^M
[  138.174020] task: ffff880033c80000 task.stack: ffffc90000d80000^M
[  138.175232] RIP: 0010:_nfs41_proc_sequence+0xdd/0x1a0 [nfsv4]^M
[  138.176392] RSP: 0018:ffffc90000d83d68 EFLAGS: 00010246^M
[  138.177444] RAX: ffff880073646200 RBX: ffff88002c944800 RCX:
0000000000000000^M
[  138.178932] RDX: 00000000fffd7000 RSI: 0000000000000000 RDI:
ffff880073646240^M
[  138.180357] RBP: ffffc90000d83df8 R08: 000000000001ee40 R09:
ffff880073646200^M
[  138.181955] R10: ffff880073646200 R11: 0000000000000139 R12:
ffffc90000d83d90^M
[  138.184014] R13: 0000000000000000 R14: 0000000000000000 R15:
ffffffffa08784d0^M
[  138.185439] FS:  0000000000000000(0000) GS:ffff88007b6c0000(0000)
knlGS:0000000000000000^M
[  138.187144] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033^M
[  138.188469] CR2: 0000000000000020 CR3: 0000000001c09003 CR4:
00000000001606e0^M
[  138.189952] Call Trace:^M
[  138.190478]  nfs41_proc_async_sequence+0x1d/0x60 [nfsv4]^M
[  138.191549]  nfs4_renew_state+0x10b/0x1a0 [nfsv4]^M
[  138.192555]  process_one_work+0x149/0x360^M
[  138.193367]  worker_thread+0x4d/0x3c0^M
[  138.194157]  kthread+0x109/0x140^M
[  138.194816]  ? rescuer_thread+0x380/0x380^M
[  138.195673]  ? kthread_park+0x60/0x60^M
[  138.196426]  ret_from_fork+0x25/0x30^M
[  138.197153] Code: e0 48 85 c0 0f 84 8e 00 00 00 0f b6 50 10 48 c7
40 08 00 00 00 00 48 c7 40 18 00 00 00 00 83 e2 fc 88 50 10 48 8b 15
b3 0e 3c e1 <41> 80 66 20 fd 45 84 ed 4c 89 70 08 4c 89 70 18 c7 40 2c
00 00 ^M
[  138.200991] RIP: _nfs41_proc_sequence+0xdd/0x1a0 [nfsv4] RSP:
ffffc90000d83d68^M
[  138.202431] CR2: 0000000000000020^M
[  138.203200] ---[ end trace b25c7be5ead1a406 ]---^M

>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
J. Bruce Fields Oct. 16, 2017, 6:36 p.m. UTC | #3
On Mon, Oct 16, 2017 at 01:07:57PM -0400, Olga Kornievskaia wrote:
> Network trace reveals that server is not working properly (thus
> getting Bruce's attention here).
> 
> Skipping ahead, the server replies to a SEQUENCE call with a reply
> that has a count=5 operations but only has a sequence in it.
> 
> The flow of steps is the following.
> 
> Client sends
> call COPY seq=16 slot=0 highslot=1(application at this point receives
> a ctrl-c so it'll go ahead and close 2files it has opened)

Is cachethis set on that the SEQUENCE op in that copy compound?

> call CLOSE  seq=1 slot=1 highslot=1
> call SEQUENCE seq=16 slot=0 highslot=1
> reply CLOSE OK
> reply SEQUENCE ERR_DELAY
> another call CLOSE seq=2 slot=1 and successful reply
> reply COPY ..
> call SEQUENCE seq=16 slot=0 highslot=0
> reply SEQUENCE opcount=5

And that's the whole reply?

Do you have a binary capture that I could look at?

> So I'm assuming server is replying from the reply cache for the COPY
> seq=16 slot=0.. but it's only sending part of it back? Is that legit?

No.--b.

> 
> In any case, I think the client shouldn't be oops-ing.
> 
> [  138.136387] BUG: unable to handle kernel NULL pointer dereference
> at 0000000000000020^M
> [  138.140134] IP: _nfs41_proc_sequence+0xdd/0x1a0 [nfsv4]^M
> [  138.141687] PGD 0 P4D 0 ^M
> [  138.142462] Oops: 0002 [#1] SMP^M
> [  138.143413] Modules linked in: nfsv4 dns_resolver nfs rfcomm fuse
> xt_CHECKSUM ipt_MASQUERADE nf_nat_masquerade_ipv4 tun ip6t_rpfilter
> ipt_REJECT nf_reject_ipv4 ip6t_REJECT nf_reject_ipv6 xt_conntrack
> ip_set nfnetlink ebtable_nat ebtable_broute bridge stp llc
> ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6
> ip6table_mangle ip6table_security ip6table_raw iptable_nat
> nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack
> libcrc32c iptable_mangle iptable_security iptable_raw ebtable_filter
> ebtables ip6table_filter ip6_tables iptable_filter
> vmw_vsock_vmci_transport vsock bnep dm_mirror dm_region_hash dm_log
> dm_mod snd_seq_midi snd_seq_midi_event coretemp crct10dif_pclmul
> crc32_pclmul ghash_clmulni_intel pcbc uvcvideo snd_ens1371
> snd_ac97_codec ac97_bus snd_seq ppdev videobuf2_vmalloc^M
> [  138.158839]  btusb videobuf2_memops videobuf2_v4l2 videobuf2_core
> aesni_intel btrtl nfit btbcm crypto_simd cryptd videodev snd_pcm
> btintel glue_helper vmw_balloon libnvdimm bluetooth snd_rawmidi
> snd_timer pcspkr snd_seq_device snd shpchp rfkill vmw_vmci sg
> ecdh_generic soundcore i2c_piix4 parport_pc parport nfsd auth_rpcgss
> nfs_acl lockd grace sunrpc ip_tables ext4 mbcache jbd2 sr_mod cdrom
> sd_mod ata_generic pata_acpi vmwgfx drm_kms_helper syscopyarea
> sysfillrect sysimgblt fb_sys_fops ttm drm ahci libahci crc32c_intel
> ata_piix mptspi scsi_transport_spi serio_raw libata mptscsih e1000
> mptbase i2c_core^M
> [  138.169453] CPU: 3 PID: 541 Comm: kworker/3:3 Not tainted 4.14.0-rc5+ #41^M
> [  138.170829] Hardware name: VMware, Inc. VMware Virtual
> Platform/440BX Desktop Reference Platform, BIOS 6.00 07/02/2015^M
> [  138.172960] Workqueue: events nfs4_renew_state [nfsv4]^M
> [  138.174020] task: ffff880033c80000 task.stack: ffffc90000d80000^M
> [  138.175232] RIP: 0010:_nfs41_proc_sequence+0xdd/0x1a0 [nfsv4]^M
> [  138.176392] RSP: 0018:ffffc90000d83d68 EFLAGS: 00010246^M
> [  138.177444] RAX: ffff880073646200 RBX: ffff88002c944800 RCX:
> 0000000000000000^M
> [  138.178932] RDX: 00000000fffd7000 RSI: 0000000000000000 RDI:
> ffff880073646240^M
> [  138.180357] RBP: ffffc90000d83df8 R08: 000000000001ee40 R09:
> ffff880073646200^M
> [  138.181955] R10: ffff880073646200 R11: 0000000000000139 R12:
> ffffc90000d83d90^M
> [  138.184014] R13: 0000000000000000 R14: 0000000000000000 R15:
> ffffffffa08784d0^M
> [  138.185439] FS:  0000000000000000(0000) GS:ffff88007b6c0000(0000)
> knlGS:0000000000000000^M
> [  138.187144] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033^M
> [  138.188469] CR2: 0000000000000020 CR3: 0000000001c09003 CR4:
> 00000000001606e0^M
> [  138.189952] Call Trace:^M
> [  138.190478]  nfs41_proc_async_sequence+0x1d/0x60 [nfsv4]^M
> [  138.191549]  nfs4_renew_state+0x10b/0x1a0 [nfsv4]^M
> [  138.192555]  process_one_work+0x149/0x360^M
> [  138.193367]  worker_thread+0x4d/0x3c0^M
> [  138.194157]  kthread+0x109/0x140^M
> [  138.194816]  ? rescuer_thread+0x380/0x380^M
> [  138.195673]  ? kthread_park+0x60/0x60^M
> [  138.196426]  ret_from_fork+0x25/0x30^M
> [  138.197153] Code: e0 48 85 c0 0f 84 8e 00 00 00 0f b6 50 10 48 c7
> 40 08 00 00 00 00 48 c7 40 18 00 00 00 00 83 e2 fc 88 50 10 48 8b 15
> b3 0e 3c e1 <41> 80 66 20 fd 45 84 ed 4c 89 70 08 4c 89 70 18 c7 40 2c
> 00 00 ^M
> [  138.200991] RIP: _nfs41_proc_sequence+0xdd/0x1a0 [nfsv4] RSP:
> ffffc90000d83d68^M
> [  138.202431] CR2: 0000000000000020^M
> [  138.203200] ---[ end trace b25c7be5ead1a406 ]---^M
> 
> >>
> >> --
> >> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> >> the body of a message to majordomo@vger.kernel.org
> >> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Olga Kornievskaia Oct. 16, 2017, 7:20 p.m. UTC | #4
On Mon, Oct 16, 2017 at 2:36 PM, J. Bruce Fields <bfields@fieldses.org> wrote:
> On Mon, Oct 16, 2017 at 01:07:57PM -0400, Olga Kornievskaia wrote:
>> Network trace reveals that server is not working properly (thus
>> getting Bruce's attention here).
>>
>> Skipping ahead, the server replies to a SEQUENCE call with a reply
>> that has a count=5 operations but only has a sequence in it.
>>
>> The flow of steps is the following.
>>
>> Client sends
>> call COPY seq=16 slot=0 highslot=1(application at this point receives
>> a ctrl-c so it'll go ahead and close 2files it has opened)
>
> Is cachethis set on that the SEQUENCE op in that copy compound?

Cachethis=no.

>> call CLOSE  seq=1 slot=1 highslot=1
>> call SEQUENCE seq=16 slot=0 highslot=1
>> reply CLOSE OK
>> reply SEQUENCE ERR_DELAY
>> another call CLOSE seq=2 slot=1 and successful reply
>> reply COPY ..
>> call SEQUENCE seq=16 slot=0 highslot=0
>> reply SEQUENCE opcount=5
>
> And that's the whole reply?

Here's a text of the malformed packet.
  14023 2017-10-16 12:32:41.779109    192.168.1.89
192.168.1.94          NFS      150    V4 Reply (Call In
14022)[Malformed Packet]

Frame 14023: 150 bytes on wire (1200 bits), 150 bytes captured (1200
bits) on interface 0
Ethernet II, Src: Vmware_9f:a3:15 (00:0c:29:9f:a3:15), Dst:
Vmware_8d:33:b1 (00:0c:29:8d:33:b1)
Internet Protocol Version 4, Src: 192.168.1.89, Dst: 192.168.1.94
Transmission Control Protocol, Src Port: 2049, Dst Port: 938, Seq:
5129, Ack: 6141, Len: 84
Remote Procedure Call, Type:Reply XID:0x3c21001c
Network File System, Ops(5): SEQUENCE
    [Program Version: 4]
    [V4 Procedure: COMPOUND (1)]
    Status: NFS4ERR_BAD_STATEID (10025)
    Tag: <EMPTY>
    Operations (count: 5)
        Opcode: SEQUENCE (53)
            Status: NFS4_OK (0)
            sessionid: cbd8e459928bea510200000000000000
            seqid: 0x00000016
            slot id: 0
            high slot id: 30
            target high slot id: 30
            status flags: 0x00000000
                .... .... .... .... .... .... .... ...0 =
SEQ4_STATUS_CB_PATH_DOWN: Not set
                .... .... .... .... .... .... .... ..0. =
SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRING: Not set
                .... .... .... .... .... .... .... .0.. =
SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRED: Not set
                .... .... .... .... .... .... .... 0... =
SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED: Not set
                .... .... .... .... .... .... ...0 .... =
SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED: Not set
                .... .... .... .... .... .... ..0. .... =
SEQ4_STATUS_ADMIN_STATE_REVOKED: Not set
                .... .... .... .... .... .... .0.. .... =
SEQ4_STATUS_RECALLABLE_STATE_REVOKED: Not set
                .... .... .... .... .... .... 0... .... =
SEQ4_STATUS_LEASE_MOVED: Not set
                .... .... .... .... .... ...0 .... .... =
SEQ4_STATUS_RESTART_RECLAIM_NEEDED: Not set
                .... .... .... .... .... ..0. .... .... =
SEQ4_STATUS_CB_PATH_DOWN_SESSION: Not set
                .... .... .... .... .... .0.. .... .... =
SEQ4_STATUS_BACKCHANNEL_FAULT: Not set
                .... .... .... .... .... 0... .... .... =
SEQ4_STATUS_DEVID_CHANGED: Not set
                .... .... .... .... ...0 .... .... .... =
SEQ4_STATUS_DEVID_DELETED: Not set
[Malformed Packet: NFS]
    [Expert Info (Error/Malformed): Malformed Packet (Exception occurred)]

> Do you have a binary capture that I could look at?

I didn't think mailing list allowed attachment. I can send mail just
to you with the packet trace. I can also post to the mailing list text
output from the wireshark for the packets.


>> So I'm assuming server is replying from the reply cache for the COPY
>> seq=16 slot=0.. but it's only sending part of it back? Is that legit?
>
> No.--b.
>
>>
>> In any case, I think the client shouldn't be oops-ing.
>>
>> [  138.136387] BUG: unable to handle kernel NULL pointer dereference
>> at 0000000000000020^M
>> [  138.140134] IP: _nfs41_proc_sequence+0xdd/0x1a0 [nfsv4]^M
>> [  138.141687] PGD 0 P4D 0 ^M
>> [  138.142462] Oops: 0002 [#1] SMP^M
>> [  138.143413] Modules linked in: nfsv4 dns_resolver nfs rfcomm fuse
>> xt_CHECKSUM ipt_MASQUERADE nf_nat_masquerade_ipv4 tun ip6t_rpfilter
>> ipt_REJECT nf_reject_ipv4 ip6t_REJECT nf_reject_ipv6 xt_conntrack
>> ip_set nfnetlink ebtable_nat ebtable_broute bridge stp llc
>> ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6
>> ip6table_mangle ip6table_security ip6table_raw iptable_nat
>> nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack
>> libcrc32c iptable_mangle iptable_security iptable_raw ebtable_filter
>> ebtables ip6table_filter ip6_tables iptable_filter
>> vmw_vsock_vmci_transport vsock bnep dm_mirror dm_region_hash dm_log
>> dm_mod snd_seq_midi snd_seq_midi_event coretemp crct10dif_pclmul
>> crc32_pclmul ghash_clmulni_intel pcbc uvcvideo snd_ens1371
>> snd_ac97_codec ac97_bus snd_seq ppdev videobuf2_vmalloc^M
>> [  138.158839]  btusb videobuf2_memops videobuf2_v4l2 videobuf2_core
>> aesni_intel btrtl nfit btbcm crypto_simd cryptd videodev snd_pcm
>> btintel glue_helper vmw_balloon libnvdimm bluetooth snd_rawmidi
>> snd_timer pcspkr snd_seq_device snd shpchp rfkill vmw_vmci sg
>> ecdh_generic soundcore i2c_piix4 parport_pc parport nfsd auth_rpcgss
>> nfs_acl lockd grace sunrpc ip_tables ext4 mbcache jbd2 sr_mod cdrom
>> sd_mod ata_generic pata_acpi vmwgfx drm_kms_helper syscopyarea
>> sysfillrect sysimgblt fb_sys_fops ttm drm ahci libahci crc32c_intel
>> ata_piix mptspi scsi_transport_spi serio_raw libata mptscsih e1000
>> mptbase i2c_core^M
>> [  138.169453] CPU: 3 PID: 541 Comm: kworker/3:3 Not tainted 4.14.0-rc5+ #41^M
>> [  138.170829] Hardware name: VMware, Inc. VMware Virtual
>> Platform/440BX Desktop Reference Platform, BIOS 6.00 07/02/2015^M
>> [  138.172960] Workqueue: events nfs4_renew_state [nfsv4]^M
>> [  138.174020] task: ffff880033c80000 task.stack: ffffc90000d80000^M
>> [  138.175232] RIP: 0010:_nfs41_proc_sequence+0xdd/0x1a0 [nfsv4]^M
>> [  138.176392] RSP: 0018:ffffc90000d83d68 EFLAGS: 00010246^M
>> [  138.177444] RAX: ffff880073646200 RBX: ffff88002c944800 RCX:
>> 0000000000000000^M
>> [  138.178932] RDX: 00000000fffd7000 RSI: 0000000000000000 RDI:
>> ffff880073646240^M
>> [  138.180357] RBP: ffffc90000d83df8 R08: 000000000001ee40 R09:
>> ffff880073646200^M
>> [  138.181955] R10: ffff880073646200 R11: 0000000000000139 R12:
>> ffffc90000d83d90^M
>> [  138.184014] R13: 0000000000000000 R14: 0000000000000000 R15:
>> ffffffffa08784d0^M
>> [  138.185439] FS:  0000000000000000(0000) GS:ffff88007b6c0000(0000)
>> knlGS:0000000000000000^M
>> [  138.187144] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033^M
>> [  138.188469] CR2: 0000000000000020 CR3: 0000000001c09003 CR4:
>> 00000000001606e0^M
>> [  138.189952] Call Trace:^M
>> [  138.190478]  nfs41_proc_async_sequence+0x1d/0x60 [nfsv4]^M
>> [  138.191549]  nfs4_renew_state+0x10b/0x1a0 [nfsv4]^M
>> [  138.192555]  process_one_work+0x149/0x360^M
>> [  138.193367]  worker_thread+0x4d/0x3c0^M
>> [  138.194157]  kthread+0x109/0x140^M
>> [  138.194816]  ? rescuer_thread+0x380/0x380^M
>> [  138.195673]  ? kthread_park+0x60/0x60^M
>> [  138.196426]  ret_from_fork+0x25/0x30^M
>> [  138.197153] Code: e0 48 85 c0 0f 84 8e 00 00 00 0f b6 50 10 48 c7
>> 40 08 00 00 00 00 48 c7 40 18 00 00 00 00 83 e2 fc 88 50 10 48 8b 15
>> b3 0e 3c e1 <41> 80 66 20 fd 45 84 ed 4c 89 70 08 4c 89 70 18 c7 40 2c
>> 00 00 ^M
>> [  138.200991] RIP: _nfs41_proc_sequence+0xdd/0x1a0 [nfsv4] RSP:
>> ffffc90000d83d68^M
>> [  138.202431] CR2: 0000000000000020^M
>> [  138.203200] ---[ end trace b25c7be5ead1a406 ]---^M
>>
>> >>
>> >> --
>> >> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>> >> the body of a message to majordomo@vger.kernel.org
>> >> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
J. Bruce Fields Oct. 18, 2017, 9:23 p.m. UTC | #5
On Mon, Oct 16, 2017 at 02:36:23PM -0400, bfields wrote:
> On Mon, Oct 16, 2017 at 01:07:57PM -0400, Olga Kornievskaia wrote:
> > Network trace reveals that server is not working properly (thus
> > getting Bruce's attention here).
> > 
> > Skipping ahead, the server replies to a SEQUENCE call with a reply
> > that has a count=5 operations but only has a sequence in it.
> > 
> > The flow of steps is the following.
> > 
> > Client sends
> > call COPY seq=16 slot=0 highslot=1(application at this point receives
> > a ctrl-c so it'll go ahead and close 2files it has opened)
> 
> Is cachethis set on that the SEQUENCE op in that copy compound?
> 
> > call CLOSE  seq=1 slot=1 highslot=1
> > call SEQUENCE seq=16 slot=0 highslot=1
> > reply CLOSE OK
> > reply SEQUENCE ERR_DELAY
> > another call CLOSE seq=2 slot=1 and successful reply
> > reply COPY ..
> > call SEQUENCE seq=16 slot=0 highslot=0
> > reply SEQUENCE opcount=5
> 
> And that's the whole reply?
> 
> Do you have a binary capture that I could look at?

Thanks, yes, the client behavior is arguably out of spec (it's sending a
"retry" that doesn't match the original call), but I understand why it's
doing this, and clearly responding with a corrupted reply isn't right.
(And probably the client can deal with any reply short of one that's
actually corrupted.)  Do the following patches help?  (Actually I think
either one on its own should do the job, but I haven't done much
testing.)

--b.
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Olga Kornievskaia Oct. 19, 2017, 5:07 p.m. UTC | #6
On Wed, Oct 18, 2017 at 5:23 PM, J. Bruce Fields <bfields@fieldses.org> wrote:
> On Mon, Oct 16, 2017 at 02:36:23PM -0400, bfields wrote:
>> On Mon, Oct 16, 2017 at 01:07:57PM -0400, Olga Kornievskaia wrote:
>> > Network trace reveals that server is not working properly (thus
>> > getting Bruce's attention here).
>> >
>> > Skipping ahead, the server replies to a SEQUENCE call with a reply
>> > that has a count=5 operations but only has a sequence in it.
>> >
>> > The flow of steps is the following.
>> >
>> > Client sends
>> > call COPY seq=16 slot=0 highslot=1(application at this point receives
>> > a ctrl-c so it'll go ahead and close 2files it has opened)
>>
>> Is cachethis set on that the SEQUENCE op in that copy compound?
>>
>> > call CLOSE  seq=1 slot=1 highslot=1
>> > call SEQUENCE seq=16 slot=0 highslot=1
>> > reply CLOSE OK
>> > reply SEQUENCE ERR_DELAY
>> > another call CLOSE seq=2 slot=1 and successful reply
>> > reply COPY ..
>> > call SEQUENCE seq=16 slot=0 highslot=0
>> > reply SEQUENCE opcount=5
>>
>> And that's the whole reply?
>>
>> Do you have a binary capture that I could look at?
>
> Thanks, yes, the client behavior is arguably out of spec (it's sending a
> "retry" that doesn't match the original call), but I understand why it's
> doing this, and clearly responding with a corrupted reply isn't right.
> (And probably the client can deal with any reply short of one that's
> actually corrupted.)  Do the following patches help?  (Actually I think
> either one on its own should do the job, but I haven't done much
> testing.)
>

Bruce,

I tested your suggested 2 patches and the same scenario where client
ctrl-c's the COPY. Now the SEQUENCE that client sends that reused the
COPY's slot returns a good reply back (SEQ_MISORDERED)

Trond,

Client is still oops-ing the same way:

[  267.251995] BUG: unable to handle kernel NULL pointer dereference
at 0000000000000020^M
[  267.257917] IP: _nfs41_proc_sequence+0xdd/0x1a0 [nfsv4]^M
[  267.259651] PGD 0 P4D 0 ^M
[  267.260436] Oops: 0002 [#1] SMP^M
[  267.261396] Modules linked in: nfsv4 dns_resolver nfs rfcomm fuse
xt_CHECKSUM ipt_MASQUERADE nf_nat_masquerade_ipv4 tun ip6t_rpfilter
ipt_REJECT nf_reject_ipv4 ip6t_REJECT nf_reject_ipv6 xt_conntrack
ip_set nfnetlink ebtable_nat ebtable_broute bridge stp llc
ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6
ip6table_mangle ip6table_security ip6table_raw iptable_nat
nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack
libcrc32c iptable_mangle iptable_security iptable_raw ebtable_filter
ebtables ip6table_filter ip6_tables iptable_filter bnep
vmw_vsock_vmci_transport vsock dm_mirror dm_region_hash dm_log dm_mod
snd_seq_midi snd_seq_midi_event coretemp crct10dif_pclmul crc32_pclmul
ghash_clmulni_intel pcbc ppdev aesni_intel crypto_simd cryptd
glue_helper vmw_balloon snd_ens1371 btusb^M
[  267.276890]  pcspkr snd_ac97_codec btrtl btbcm btintel ac97_bus
uvcvideo snd_seq videobuf2_vmalloc bluetooth videobuf2_memops
videobuf2_v4l2 videobuf2_core snd_pcm nfit videodev snd_rawmidi
snd_timer rfkill snd_seq_device libnvdimm sg snd vmw_vmci ecdh_generic
soundcore shpchp i2c_piix4 parport_pc parport nfsd auth_rpcgss nfs_acl
lockd grace sunrpc ip_tables ext4 mbcache jbd2 sr_mod cdrom
ata_generic sd_mod pata_acpi vmwgfx drm_kms_helper syscopyarea
sysfillrect sysimgblt fb_sys_fops crc32c_intel ttm drm serio_raw ahci
libahci mptspi scsi_transport_spi ata_piix mptscsih e1000 libata
mptbase i2c_core^M
[  267.287534] CPU: 1 PID: 48 Comm: kworker/1:1 Not tainted 4.14.0-rc5+ #43^M
[  267.288939] Hardware name: VMware, Inc. VMware Virtual
Platform/440BX Desktop Reference Platform, BIOS 6.00 07/02/2015^M
[  267.291096] Workqueue: events nfs4_renew_state [nfsv4]^M
[  267.292159] task: ffff88007a00c5c0 task.stack: ffffc90000b74000^M
[  267.293352] RIP: 0010:_nfs41_proc_sequence+0xdd/0x1a0 [nfsv4]^M
[  267.294514] RSP: 0018:ffffc90000b77d68 EFLAGS: 00010246^M
[  267.295568] RAX: ffff880078165900 RBX: ffff88007807cc00 RCX:
0000000000000000^M
[  267.296995] RDX: 00000000ffff8001 RSI: 0000000000000000 RDI:
ffff880078165940^M
[  267.298422] RBP: ffffc90000b77df8 R08: 000000000001ee40 R09:
ffff880078165900^M
[  267.299883] R10: ffff880078165900 R11: 0000000000000235 R12:
ffffc90000b77d90^M
[  267.301311] R13: 0000000000000000 R14: 0000000000000000 R15:
ffffffffa08744d0^M
[  267.302788] FS:  0000000000000000(0000) GS:ffff88007b640000(0000)
knlGS:0000000000000000^M
[  267.304493] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033^M
[  267.305657] CR2: 0000000000000020 CR3: 0000000001c09001 CR4:
00000000001606e0^M
[  267.307113] Call Trace:^M
[  267.307633]  nfs41_proc_async_sequence+0x1d/0x60 [nfsv4]^M
[  267.308725]  nfs4_renew_state+0x10b/0x1a0 [nfsv4]^M
[  267.309690]  process_one_work+0x149/0x360^M
[  267.310507]  worker_thread+0x4d/0x3c0^M
[  267.311255]  kthread+0x109/0x140^M
[  267.311918]  ? rescuer_thread+0x380/0x380^M
[  267.312798]  ? kthread_park+0x60/0x60^M
[  267.313573]  ret_from_fork+0x25/0x30^M
[  267.314354] Code: e0 48 85 c0 0f 84 8e 00 00 00 0f b6 50 10 48 c7
40 08 00 00 00 00 48 c7 40 18 00 00 00 00 83 e2 fc 88 50 10 48 8b 15
b3 4e 3c e1 <41> 80 66 20 fd 45 84 ed 4c 89 70 08 4c 89 70 18 c7 40 2c
00 00 ^M
[  267.318088] RIP: _nfs41_proc_sequence+0xdd/0x1a0 [nfsv4] RSP:
ffffc90000b77d68^M
[  267.319555] CR2: 0000000000000020^M
[  267.320367] ---[ end trace c6ea9d44a9646e38 ]---^M

> --b.
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Olga Kornievskaia Oct. 19, 2017, 6:33 p.m. UTC | #7
On Wed, Oct 11, 2017 at 1:07 PM, Trond Myklebust
<trond.myklebust@primarydata.com> wrote:
> If the previous request on a slot was interrupted before it was
> processed by the server, then our slot sequence number may be out of whack,
> and so we try the next operation using the old sequence number.
>
> The problem with this, is that not all servers check to see that the
> client is replaying the same operations as previously when they decide
> to go to the replay cache, and so instead of the expected error of
> NFS4ERR_SEQ_FALSE_RETRY, we get a replay of the old reply, which could
> (if the operations match up) be mistaken by the client for a new reply.
>
> To fix this, we attempt to send a COMPOUND containing only the SEQUENCE op
> in order to resync our slot sequence number.
>
> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
> ---
>  fs/nfs/nfs4_fs.h  |   2 +-
>  fs/nfs/nfs4proc.c | 146 +++++++++++++++++++++++++++++++++++++-----------------
>  2 files changed, 101 insertions(+), 47 deletions(-)
>
> diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
> index ac4f10b7f6c1..b547d935aaf0 100644
> --- a/fs/nfs/nfs4_fs.h
> +++ b/fs/nfs/nfs4_fs.h
> @@ -464,7 +464,7 @@ extern void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid);
>  extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid);
>  extern void nfs_release_seqid(struct nfs_seqid *seqid);
>  extern void nfs_free_seqid(struct nfs_seqid *seqid);
> -extern int nfs4_setup_sequence(const struct nfs_client *client,
> +extern int nfs4_setup_sequence(struct nfs_client *client,
>                                 struct nfs4_sequence_args *args,
>                                 struct nfs4_sequence_res *res,
>                                 struct rpc_task *task);
> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> index f90090e8c959..caa72efe02c9 100644
> --- a/fs/nfs/nfs4proc.c
> +++ b/fs/nfs/nfs4proc.c
> @@ -96,6 +96,10 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
>                             struct nfs_open_context *ctx, struct nfs4_label *ilabel,
>                             struct nfs4_label *olabel);
>  #ifdef CONFIG_NFS_V4_1
> +static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp,
> +               struct rpc_cred *cred,
> +               struct nfs4_slot *slot,
> +               bool is_privileged);
>  static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *,
>                 struct rpc_cred *);
>  static int nfs41_free_stateid(struct nfs_server *, const nfs4_stateid *,
> @@ -644,13 +648,14 @@ static int nfs40_sequence_done(struct rpc_task *task,
>
>  #if defined(CONFIG_NFS_V4_1)
>
> -static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
> +static void nfs41_release_slot(struct nfs4_slot *slot)
>  {
>         struct nfs4_session *session;
>         struct nfs4_slot_table *tbl;
> -       struct nfs4_slot *slot = res->sr_slot;
>         bool send_new_highest_used_slotid = false;
>
> +       if (!slot)
> +               return;
>         tbl = slot->table;
>         session = tbl->session;
>
> @@ -676,13 +681,18 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
>                 send_new_highest_used_slotid = false;
>  out_unlock:
>         spin_unlock(&tbl->slot_tbl_lock);
> -       res->sr_slot = NULL;
>         if (send_new_highest_used_slotid)
>                 nfs41_notify_server(session->clp);
>         if (waitqueue_active(&tbl->slot_waitq))
>                 wake_up_all(&tbl->slot_waitq);
>  }
>
> +static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
> +{
> +       nfs41_release_slot(res->sr_slot);
> +       res->sr_slot = NULL;
> +}
> +
>  static int nfs41_sequence_process(struct rpc_task *task,
>                 struct nfs4_sequence_res *res)
>  {
> @@ -710,13 +720,6 @@ static int nfs41_sequence_process(struct rpc_task *task,
>         /* Check the SEQUENCE operation status */
>         switch (res->sr_status) {
>         case 0:
> -               /* If previous op on slot was interrupted and we reused
> -                * the seq# and got a reply from the cache, then retry
> -                */
> -               if (task->tk_status == -EREMOTEIO && interrupted) {
> -                       ++slot->seq_nr;
> -                       goto retry_nowait;
> -               }
>                 /* Update the slot's sequence and clientid lease timer */
>                 slot->seq_done = 1;
>                 clp = session->clp;
> @@ -750,16 +753,16 @@ static int nfs41_sequence_process(struct rpc_task *task,
>                  * The slot id we used was probably retired. Try again
>                  * using a different slot id.
>                  */
> +               if (slot->seq_nr < slot->table->target_highest_slotid)
> +                       goto session_recover;
>                 goto retry_nowait;
>         case -NFS4ERR_SEQ_MISORDERED:
>                 /*
>                  * Was the last operation on this sequence interrupted?
>                  * If so, retry after bumping the sequence number.
>                  */
> -               if (interrupted) {
> -                       ++slot->seq_nr;
> -                       goto retry_nowait;
> -               }
> +               if (interrupted)
> +                       goto retry_new_seq;
>                 /*
>                  * Could this slot have been previously retired?
>                  * If so, then the server may be expecting seq_nr = 1!
> @@ -768,10 +771,11 @@ static int nfs41_sequence_process(struct rpc_task *task,
>                         slot->seq_nr = 1;
>                         goto retry_nowait;
>                 }
> -               break;
> +               goto session_recover;
>         case -NFS4ERR_SEQ_FALSE_RETRY:
> -               ++slot->seq_nr;
> -               goto retry_nowait;
> +               if (interrupted)
> +                       goto retry_new_seq;
> +               goto session_recover;
>         default:
>                 /* Just update the slot sequence no. */
>                 slot->seq_done = 1;
> @@ -781,6 +785,11 @@ static int nfs41_sequence_process(struct rpc_task *task,
>         dprintk("%s: Error %d free the slot \n", __func__, res->sr_status);
>  out_noaction:
>         return ret;
> +session_recover:
> +       nfs4_schedule_session_recovery(session, res->sr_status);
> +       goto retry_nowait;
> +retry_new_seq:
> +       ++slot->seq_nr;
>  retry_nowait:
>         if (rpc_restart_call_prepare(task)) {
>                 nfs41_sequence_free_slot(res);
> @@ -857,6 +866,17 @@ static const struct rpc_call_ops nfs41_call_sync_ops = {
>         .rpc_call_done = nfs41_call_sync_done,
>  };
>
> +static void
> +nfs4_sequence_process_interrupted(struct nfs_client *client,
> +               struct nfs4_slot *slot, struct rpc_cred *cred)
> +{
> +       struct rpc_task *task;
> +
> +       task = _nfs41_proc_sequence(client, cred, slot, true);
> +       if (!IS_ERR(task))
> +               rpc_put_task_async(task);
> +}
> +
>  #else  /* !CONFIG_NFS_V4_1 */
>
>  static int nfs4_sequence_process(struct rpc_task *task, struct nfs4_sequence_res *res)
> @@ -877,9 +897,32 @@ int nfs4_sequence_done(struct rpc_task *task,
>  }
>  EXPORT_SYMBOL_GPL(nfs4_sequence_done);
>
> +static void
> +nfs4_sequence_process_interrupted(struct nfs_client *client,
> +               struct nfs4_slot *slot, struct rpc_cred *cred)
> +{
> +       WARN_ON_ONCE(1);
> +       slot->interrupted = 0;
> +}
> +
>  #endif /* !CONFIG_NFS_V4_1 */
>
> -int nfs4_setup_sequence(const struct nfs_client *client,
> +static
> +void nfs4_sequence_attach_slot(struct nfs4_sequence_args *args,
> +               struct nfs4_sequence_res *res,
> +               struct nfs4_slot *slot)
> +{
> +       slot->privileged = args->sa_privileged ? 1 : 0;

The oops is because in the async sequence, it passes NULL as the slot
which ends up calling this and trying to dereference "slot".

Btw, no operations are needed to actually trigger this. Just do the
mount and wait for the first renew.

Don't know if perhaps the fix is to check that
if (slot)


> +       args->sa_slot = slot;
> +
> +       res->sr_slot = slot;
> +       res->sr_timestamp = jiffies;
> +       res->sr_status_flags = 0;
> +       res->sr_status = 1;
> +
> +}
> +
> +int nfs4_setup_sequence(struct nfs_client *client,
>                         struct nfs4_sequence_args *args,
>                         struct nfs4_sequence_res *res,
>                         struct rpc_task *task)
> @@ -897,29 +940,28 @@ int nfs4_setup_sequence(const struct nfs_client *client,
>                 task->tk_timeout = 0;
>         }
>
> -       spin_lock(&tbl->slot_tbl_lock);
> -       /* The state manager will wait until the slot table is empty */
> -       if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged)
> -               goto out_sleep;
> +       for (;;) {
> +               spin_lock(&tbl->slot_tbl_lock);
> +               /* The state manager will wait until the slot table is empty */
> +               if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged)
> +                       goto out_sleep;
> +
> +               slot = nfs4_alloc_slot(tbl);
> +               if (IS_ERR(slot)) {
> +                       /* Try again in 1/4 second */
> +                       if (slot == ERR_PTR(-ENOMEM))
> +                               task->tk_timeout = HZ >> 2;
> +                       goto out_sleep;
> +               }
> +               spin_unlock(&tbl->slot_tbl_lock);
>
> -       slot = nfs4_alloc_slot(tbl);
> -       if (IS_ERR(slot)) {
> -               /* Try again in 1/4 second */
> -               if (slot == ERR_PTR(-ENOMEM))
> -                       task->tk_timeout = HZ >> 2;
> -               goto out_sleep;
> +               if (likely(!slot->interrupted))
> +                       break;
> +               nfs4_sequence_process_interrupted(client,
> +                               slot, task->tk_msg.rpc_cred);
>         }
> -       spin_unlock(&tbl->slot_tbl_lock);
>
> -       slot->privileged = args->sa_privileged ? 1 : 0;
> -       args->sa_slot = slot;
> -
> -       res->sr_slot = slot;
> -       if (session) {
> -               res->sr_timestamp = jiffies;
> -               res->sr_status_flags = 0;
> -               res->sr_status = 1;
> -       }
> +       nfs4_sequence_attach_slot(args, res, slot);
>
>         trace_nfs4_setup_sequence(session, args);
>  out_start:
> @@ -8135,6 +8177,7 @@ static const struct rpc_call_ops nfs41_sequence_ops = {
>
>  static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp,
>                 struct rpc_cred *cred,
> +               struct nfs4_slot *slot,
>                 bool is_privileged)
>  {
>         struct nfs4_sequence_data *calldata;
> @@ -8148,15 +8191,18 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp,
>                 .callback_ops = &nfs41_sequence_ops,
>                 .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT,
>         };
> +       struct rpc_task *ret;
>
> +       ret = ERR_PTR(-EIO);
>         if (!atomic_inc_not_zero(&clp->cl_count))
> -               return ERR_PTR(-EIO);
> +               goto out_err;
> +
> +       ret = ERR_PTR(-ENOMEM);
>         calldata = kzalloc(sizeof(*calldata), GFP_NOFS);
> -       if (calldata == NULL) {
> -               nfs_put_client(clp);
> -               return ERR_PTR(-ENOMEM);
> -       }
> +       if (calldata == NULL)
> +               goto out_put_clp;
>         nfs4_init_sequence(&calldata->args, &calldata->res, 0);
> +       nfs4_sequence_attach_slot(&calldata->args, &calldata->res, slot);
>         if (is_privileged)
>                 nfs4_set_sequence_privileged(&calldata->args);
>         msg.rpc_argp = &calldata->args;
> @@ -8164,7 +8210,15 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp,
>         calldata->clp = clp;
>         task_setup_data.callback_data = calldata;
>
> -       return rpc_run_task(&task_setup_data);
> +       ret = rpc_run_task(&task_setup_data);
> +       if (IS_ERR(ret))
> +               goto out_err;
> +       return ret;
> +out_put_clp:
> +       nfs_put_client(clp);
> +out_err:
> +       nfs41_release_slot(slot);
> +       return ret;
>  }
>
>  static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cred, unsigned renew_flags)
> @@ -8174,7 +8228,7 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cr
>
>         if ((renew_flags & NFS4_RENEW_TIMEOUT) == 0)
>                 return -EAGAIN;
> -       task = _nfs41_proc_sequence(clp, cred, false);
> +       task = _nfs41_proc_sequence(clp, cred, NULL, false);
>         if (IS_ERR(task))
>                 ret = PTR_ERR(task);
>         else
> @@ -8188,7 +8242,7 @@ static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
>         struct rpc_task *task;
>         int ret;
>
> -       task = _nfs41_proc_sequence(clp, cred, true);
> +       task = _nfs41_proc_sequence(clp, cred, NULL, true);
>         if (IS_ERR(task)) {
>                 ret = PTR_ERR(task);
>                 goto out;
> --
> 2.13.6
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Trond Myklebust Oct. 19, 2017, 6:52 p.m. UTC | #8
T24gVGh1LCAyMDE3LTEwLTE5IGF0IDE0OjMzIC0wNDAwLCBPbGdhIEtvcm5pZXZza2FpYSB3cm90
ZToNCj4gT24gV2VkLCBPY3QgMTEsIDIwMTcgYXQgMTowNyBQTSwgVHJvbmQgTXlrbGVidXN0DQo+
IDx0cm9uZC5teWtsZWJ1c3RAcHJpbWFyeWRhdGEuY29tPiB3cm90ZToNCj4gPiBJZiB0aGUgcHJl
dmlvdXMgcmVxdWVzdCBvbiBhIHNsb3Qgd2FzIGludGVycnVwdGVkIGJlZm9yZSBpdCB3YXMNCj4g
PiBwcm9jZXNzZWQgYnkgdGhlIHNlcnZlciwgdGhlbiBvdXIgc2xvdCBzZXF1ZW5jZSBudW1iZXIg
bWF5IGJlIG91dA0KPiA+IG9mIHdoYWNrLA0KPiA+IGFuZCBzbyB3ZSB0cnkgdGhlIG5leHQgb3Bl
cmF0aW9uIHVzaW5nIHRoZSBvbGQgc2VxdWVuY2UgbnVtYmVyLg0KPiA+IA0KPiA+IFRoZSBwcm9i
bGVtIHdpdGggdGhpcywgaXMgdGhhdCBub3QgYWxsIHNlcnZlcnMgY2hlY2sgdG8gc2VlIHRoYXQN
Cj4gPiB0aGUNCj4gPiBjbGllbnQgaXMgcmVwbGF5aW5nIHRoZSBzYW1lIG9wZXJhdGlvbnMgYXMg
cHJldmlvdXNseSB3aGVuIHRoZXkNCj4gPiBkZWNpZGUNCj4gPiB0byBnbyB0byB0aGUgcmVwbGF5
IGNhY2hlLCBhbmQgc28gaW5zdGVhZCBvZiB0aGUgZXhwZWN0ZWQgZXJyb3Igb2YNCj4gPiBORlM0
RVJSX1NFUV9GQUxTRV9SRVRSWSwgd2UgZ2V0IGEgcmVwbGF5IG9mIHRoZSBvbGQgcmVwbHksIHdo
aWNoDQo+ID4gY291bGQNCj4gPiAoaWYgdGhlIG9wZXJhdGlvbnMgbWF0Y2ggdXApIGJlIG1pc3Rh
a2VuIGJ5IHRoZSBjbGllbnQgZm9yIGEgbmV3DQo+ID4gcmVwbHkuDQo+ID4gDQo+ID4gVG8gZml4
IHRoaXMsIHdlIGF0dGVtcHQgdG8gc2VuZCBhIENPTVBPVU5EIGNvbnRhaW5pbmcgb25seSB0aGUN
Cj4gPiBTRVFVRU5DRSBvcA0KPiA+IGluIG9yZGVyIHRvIHJlc3luYyBvdXIgc2xvdCBzZXF1ZW5j
ZSBudW1iZXIuDQo+ID4gDQo+ID4gU2lnbmVkLW9mZi1ieTogVHJvbmQgTXlrbGVidXN0IDx0cm9u
ZC5teWtsZWJ1c3RAcHJpbWFyeWRhdGEuY29tPg0KPiA+IC0tLQ0KPiA+ICBmcy9uZnMvbmZzNF9m
cy5oICB8ICAgMiArLQ0KPiA+ICBmcy9uZnMvbmZzNHByb2MuYyB8IDE0NiArKysrKysrKysrKysr
KysrKysrKysrKysrKysrKysrKysrKysrLS0tLS0NCj4gPiAtLS0tLS0tLS0tLS0NCj4gPiAgMiBm
aWxlcyBjaGFuZ2VkLCAxMDEgaW5zZXJ0aW9ucygrKSwgNDcgZGVsZXRpb25zKC0pDQo+ID4gDQo+
ID4gZGlmZiAtLWdpdCBhL2ZzL25mcy9uZnM0X2ZzLmggYi9mcy9uZnMvbmZzNF9mcy5oDQo+ID4g
aW5kZXggYWM0ZjEwYjdmNmMxLi5iNTQ3ZDkzNWFhZjAgMTAwNjQ0DQo+ID4gLS0tIGEvZnMvbmZz
L25mczRfZnMuaA0KPiA+ICsrKyBiL2ZzL25mcy9uZnM0X2ZzLmgNCj4gPiBAQCAtNDY0LDcgKzQ2
NCw3IEBAIGV4dGVybiB2b2lkIG5mc19pbmNyZW1lbnRfb3Blbl9zZXFpZChpbnQNCj4gPiBzdGF0
dXMsIHN0cnVjdCBuZnNfc2VxaWQgKnNlcWlkKTsNCj4gPiAgZXh0ZXJuIHZvaWQgbmZzX2luY3Jl
bWVudF9sb2NrX3NlcWlkKGludCBzdGF0dXMsIHN0cnVjdCBuZnNfc2VxaWQNCj4gPiAqc2VxaWQp
Ow0KPiA+ICBleHRlcm4gdm9pZCBuZnNfcmVsZWFzZV9zZXFpZChzdHJ1Y3QgbmZzX3NlcWlkICpz
ZXFpZCk7DQo+ID4gIGV4dGVybiB2b2lkIG5mc19mcmVlX3NlcWlkKHN0cnVjdCBuZnNfc2VxaWQg
KnNlcWlkKTsNCj4gPiAtZXh0ZXJuIGludCBuZnM0X3NldHVwX3NlcXVlbmNlKGNvbnN0IHN0cnVj
dCBuZnNfY2xpZW50ICpjbGllbnQsDQo+ID4gK2V4dGVybiBpbnQgbmZzNF9zZXR1cF9zZXF1ZW5j
ZShzdHJ1Y3QgbmZzX2NsaWVudCAqY2xpZW50LA0KPiA+ICAgICAgICAgICAgICAgICAgICAgICAg
ICAgICAgICAgc3RydWN0IG5mczRfc2VxdWVuY2VfYXJncyAqYXJncywNCj4gPiAgICAgICAgICAg
ICAgICAgICAgICAgICAgICAgICAgIHN0cnVjdCBuZnM0X3NlcXVlbmNlX3JlcyAqcmVzLA0KPiA+
ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgc3RydWN0IHJwY190YXNrICp0YXNrKTsN
Cj4gPiBkaWZmIC0tZ2l0IGEvZnMvbmZzL25mczRwcm9jLmMgYi9mcy9uZnMvbmZzNHByb2MuYw0K
PiA+IGluZGV4IGY5MDA5MGU4Yzk1OS4uY2FhNzJlZmUwMmM5IDEwMDY0NA0KPiA+IC0tLSBhL2Zz
L25mcy9uZnM0cHJvYy5jDQo+ID4gKysrIGIvZnMvbmZzL25mczRwcm9jLmMNCj4gPiBAQCAtOTYs
NiArOTYsMTAgQEAgc3RhdGljIGludCBuZnM0X2RvX3NldGF0dHIoc3RydWN0IGlub2RlICppbm9k
ZSwNCj4gPiBzdHJ1Y3QgcnBjX2NyZWQgKmNyZWQsDQo+ID4gICAgICAgICAgICAgICAgICAgICAg
ICAgICAgIHN0cnVjdCBuZnNfb3Blbl9jb250ZXh0ICpjdHgsIHN0cnVjdA0KPiA+IG5mczRfbGFi
ZWwgKmlsYWJlbCwNCj4gPiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgc3RydWN0IG5mczRf
bGFiZWwgKm9sYWJlbCk7DQo+ID4gICNpZmRlZiBDT05GSUdfTkZTX1Y0XzENCj4gPiArc3RhdGlj
IHN0cnVjdCBycGNfdGFzayAqX25mczQxX3Byb2Nfc2VxdWVuY2Uoc3RydWN0IG5mc19jbGllbnQN
Cj4gPiAqY2xwLA0KPiA+ICsgICAgICAgICAgICAgICBzdHJ1Y3QgcnBjX2NyZWQgKmNyZWQsDQo+
ID4gKyAgICAgICAgICAgICAgIHN0cnVjdCBuZnM0X3Nsb3QgKnNsb3QsDQo+ID4gKyAgICAgICAg
ICAgICAgIGJvb2wgaXNfcHJpdmlsZWdlZCk7DQo+ID4gIHN0YXRpYyBpbnQgbmZzNDFfdGVzdF9z
dGF0ZWlkKHN0cnVjdCBuZnNfc2VydmVyICosIG5mczRfc3RhdGVpZCAqLA0KPiA+ICAgICAgICAg
ICAgICAgICBzdHJ1Y3QgcnBjX2NyZWQgKik7DQo+ID4gIHN0YXRpYyBpbnQgbmZzNDFfZnJlZV9z
dGF0ZWlkKHN0cnVjdCBuZnNfc2VydmVyICosIGNvbnN0DQo+ID4gbmZzNF9zdGF0ZWlkICosDQo+
ID4gQEAgLTY0NCwxMyArNjQ4LDE0IEBAIHN0YXRpYyBpbnQgbmZzNDBfc2VxdWVuY2VfZG9uZShz
dHJ1Y3QNCj4gPiBycGNfdGFzayAqdGFzaywNCj4gPiANCj4gPiAgI2lmIGRlZmluZWQoQ09ORklH
X05GU19WNF8xKQ0KPiA+IA0KPiA+IC1zdGF0aWMgdm9pZCBuZnM0MV9zZXF1ZW5jZV9mcmVlX3Ns
b3Qoc3RydWN0IG5mczRfc2VxdWVuY2VfcmVzDQo+ID4gKnJlcykNCj4gPiArc3RhdGljIHZvaWQg
bmZzNDFfcmVsZWFzZV9zbG90KHN0cnVjdCBuZnM0X3Nsb3QgKnNsb3QpDQo+ID4gIHsNCj4gPiAg
ICAgICAgIHN0cnVjdCBuZnM0X3Nlc3Npb24gKnNlc3Npb247DQo+ID4gICAgICAgICBzdHJ1Y3Qg
bmZzNF9zbG90X3RhYmxlICp0Ymw7DQo+ID4gLSAgICAgICBzdHJ1Y3QgbmZzNF9zbG90ICpzbG90
ID0gcmVzLT5zcl9zbG90Ow0KPiA+ICAgICAgICAgYm9vbCBzZW5kX25ld19oaWdoZXN0X3VzZWRf
c2xvdGlkID0gZmFsc2U7DQo+ID4gDQo+ID4gKyAgICAgICBpZiAoIXNsb3QpDQo+ID4gKyAgICAg
ICAgICAgICAgIHJldHVybjsNCj4gPiAgICAgICAgIHRibCA9IHNsb3QtPnRhYmxlOw0KPiA+ICAg
ICAgICAgc2Vzc2lvbiA9IHRibC0+c2Vzc2lvbjsNCj4gPiANCj4gPiBAQCAtNjc2LDEzICs2ODEs
MTggQEAgc3RhdGljIHZvaWQgbmZzNDFfc2VxdWVuY2VfZnJlZV9zbG90KHN0cnVjdA0KPiA+IG5m
czRfc2VxdWVuY2VfcmVzICpyZXMpDQo+ID4gICAgICAgICAgICAgICAgIHNlbmRfbmV3X2hpZ2hl
c3RfdXNlZF9zbG90aWQgPSBmYWxzZTsNCj4gPiAgb3V0X3VubG9jazoNCj4gPiAgICAgICAgIHNw
aW5fdW5sb2NrKCZ0YmwtPnNsb3RfdGJsX2xvY2spOw0KPiA+IC0gICAgICAgcmVzLT5zcl9zbG90
ID0gTlVMTDsNCj4gPiAgICAgICAgIGlmIChzZW5kX25ld19oaWdoZXN0X3VzZWRfc2xvdGlkKQ0K
PiA+ICAgICAgICAgICAgICAgICBuZnM0MV9ub3RpZnlfc2VydmVyKHNlc3Npb24tPmNscCk7DQo+
ID4gICAgICAgICBpZiAod2FpdHF1ZXVlX2FjdGl2ZSgmdGJsLT5zbG90X3dhaXRxKSkNCj4gPiAg
ICAgICAgICAgICAgICAgd2FrZV91cF9hbGwoJnRibC0+c2xvdF93YWl0cSk7DQo+ID4gIH0NCj4g
PiANCj4gPiArc3RhdGljIHZvaWQgbmZzNDFfc2VxdWVuY2VfZnJlZV9zbG90KHN0cnVjdCBuZnM0
X3NlcXVlbmNlX3Jlcw0KPiA+ICpyZXMpDQo+ID4gK3sNCj4gPiArICAgICAgIG5mczQxX3JlbGVh
c2Vfc2xvdChyZXMtPnNyX3Nsb3QpOw0KPiA+ICsgICAgICAgcmVzLT5zcl9zbG90ID0gTlVMTDsN
Cj4gPiArfQ0KPiA+ICsNCj4gPiAgc3RhdGljIGludCBuZnM0MV9zZXF1ZW5jZV9wcm9jZXNzKHN0
cnVjdCBycGNfdGFzayAqdGFzaywNCj4gPiAgICAgICAgICAgICAgICAgc3RydWN0IG5mczRfc2Vx
dWVuY2VfcmVzICpyZXMpDQo+ID4gIHsNCj4gPiBAQCAtNzEwLDEzICs3MjAsNiBAQCBzdGF0aWMg
aW50IG5mczQxX3NlcXVlbmNlX3Byb2Nlc3Moc3RydWN0DQo+ID4gcnBjX3Rhc2sgKnRhc2ssDQo+
ID4gICAgICAgICAvKiBDaGVjayB0aGUgU0VRVUVOQ0Ugb3BlcmF0aW9uIHN0YXR1cyAqLw0KPiA+
ICAgICAgICAgc3dpdGNoIChyZXMtPnNyX3N0YXR1cykgew0KPiA+ICAgICAgICAgY2FzZSAwOg0K
PiA+IC0gICAgICAgICAgICAgICAvKiBJZiBwcmV2aW91cyBvcCBvbiBzbG90IHdhcyBpbnRlcnJ1
cHRlZCBhbmQgd2UNCj4gPiByZXVzZWQNCj4gPiAtICAgICAgICAgICAgICAgICogdGhlIHNlcSMg
YW5kIGdvdCBhIHJlcGx5IGZyb20gdGhlIGNhY2hlLCB0aGVuDQo+ID4gcmV0cnkNCj4gPiAtICAg
ICAgICAgICAgICAgICovDQo+ID4gLSAgICAgICAgICAgICAgIGlmICh0YXNrLT50a19zdGF0dXMg
PT0gLUVSRU1PVEVJTyAmJiBpbnRlcnJ1cHRlZCkgew0KPiA+IC0gICAgICAgICAgICAgICAgICAg
ICAgICsrc2xvdC0+c2VxX25yOw0KPiA+IC0gICAgICAgICAgICAgICAgICAgICAgIGdvdG8gcmV0
cnlfbm93YWl0Ow0KPiA+IC0gICAgICAgICAgICAgICB9DQo+ID4gICAgICAgICAgICAgICAgIC8q
IFVwZGF0ZSB0aGUgc2xvdCdzIHNlcXVlbmNlIGFuZCBjbGllbnRpZCBsZWFzZQ0KPiA+IHRpbWVy
ICovDQo+ID4gICAgICAgICAgICAgICAgIHNsb3QtPnNlcV9kb25lID0gMTsNCj4gPiAgICAgICAg
ICAgICAgICAgY2xwID0gc2Vzc2lvbi0+Y2xwOw0KPiA+IEBAIC03NTAsMTYgKzc1MywxNiBAQCBz
dGF0aWMgaW50IG5mczQxX3NlcXVlbmNlX3Byb2Nlc3Moc3RydWN0DQo+ID4gcnBjX3Rhc2sgKnRh
c2ssDQo+ID4gICAgICAgICAgICAgICAgICAqIFRoZSBzbG90IGlkIHdlIHVzZWQgd2FzIHByb2Jh
Ymx5IHJldGlyZWQuIFRyeQ0KPiA+IGFnYWluDQo+ID4gICAgICAgICAgICAgICAgICAqIHVzaW5n
IGEgZGlmZmVyZW50IHNsb3QgaWQuDQo+ID4gICAgICAgICAgICAgICAgICAqLw0KPiA+ICsgICAg
ICAgICAgICAgICBpZiAoc2xvdC0+c2VxX25yIDwgc2xvdC0+dGFibGUtDQo+ID4gPnRhcmdldF9o
aWdoZXN0X3Nsb3RpZCkNCj4gPiArICAgICAgICAgICAgICAgICAgICAgICBnb3RvIHNlc3Npb25f
cmVjb3ZlcjsNCj4gPiAgICAgICAgICAgICAgICAgZ290byByZXRyeV9ub3dhaXQ7DQo+ID4gICAg
ICAgICBjYXNlIC1ORlM0RVJSX1NFUV9NSVNPUkRFUkVEOg0KPiA+ICAgICAgICAgICAgICAgICAv
Kg0KPiA+ICAgICAgICAgICAgICAgICAgKiBXYXMgdGhlIGxhc3Qgb3BlcmF0aW9uIG9uIHRoaXMg
c2VxdWVuY2UNCj4gPiBpbnRlcnJ1cHRlZD8NCj4gPiAgICAgICAgICAgICAgICAgICogSWYgc28s
IHJldHJ5IGFmdGVyIGJ1bXBpbmcgdGhlIHNlcXVlbmNlIG51bWJlci4NCj4gPiAgICAgICAgICAg
ICAgICAgICovDQo+ID4gLSAgICAgICAgICAgICAgIGlmIChpbnRlcnJ1cHRlZCkgew0KPiA+IC0g
ICAgICAgICAgICAgICAgICAgICAgICsrc2xvdC0+c2VxX25yOw0KPiA+IC0gICAgICAgICAgICAg
ICAgICAgICAgIGdvdG8gcmV0cnlfbm93YWl0Ow0KPiA+IC0gICAgICAgICAgICAgICB9DQo+ID4g
KyAgICAgICAgICAgICAgIGlmIChpbnRlcnJ1cHRlZCkNCj4gPiArICAgICAgICAgICAgICAgICAg
ICAgICBnb3RvIHJldHJ5X25ld19zZXE7DQo+ID4gICAgICAgICAgICAgICAgIC8qDQo+ID4gICAg
ICAgICAgICAgICAgICAqIENvdWxkIHRoaXMgc2xvdCBoYXZlIGJlZW4gcHJldmlvdXNseSByZXRp
cmVkPw0KPiA+ICAgICAgICAgICAgICAgICAgKiBJZiBzbywgdGhlbiB0aGUgc2VydmVyIG1heSBi
ZSBleHBlY3Rpbmcgc2VxX25yID0NCj4gPiAxIQ0KPiA+IEBAIC03NjgsMTAgKzc3MSwxMSBAQCBz
dGF0aWMgaW50IG5mczQxX3NlcXVlbmNlX3Byb2Nlc3Moc3RydWN0DQo+ID4gcnBjX3Rhc2sgKnRh
c2ssDQo+ID4gICAgICAgICAgICAgICAgICAgICAgICAgc2xvdC0+c2VxX25yID0gMTsNCj4gPiAg
ICAgICAgICAgICAgICAgICAgICAgICBnb3RvIHJldHJ5X25vd2FpdDsNCj4gPiAgICAgICAgICAg
ICAgICAgfQ0KPiA+IC0gICAgICAgICAgICAgICBicmVhazsNCj4gPiArICAgICAgICAgICAgICAg
Z290byBzZXNzaW9uX3JlY292ZXI7DQo+ID4gICAgICAgICBjYXNlIC1ORlM0RVJSX1NFUV9GQUxT
RV9SRVRSWToNCj4gPiAtICAgICAgICAgICAgICAgKytzbG90LT5zZXFfbnI7DQo+ID4gLSAgICAg
ICAgICAgICAgIGdvdG8gcmV0cnlfbm93YWl0Ow0KPiA+ICsgICAgICAgICAgICAgICBpZiAoaW50
ZXJydXB0ZWQpDQo+ID4gKyAgICAgICAgICAgICAgICAgICAgICAgZ290byByZXRyeV9uZXdfc2Vx
Ow0KPiA+ICsgICAgICAgICAgICAgICBnb3RvIHNlc3Npb25fcmVjb3ZlcjsNCj4gPiAgICAgICAg
IGRlZmF1bHQ6DQo+ID4gICAgICAgICAgICAgICAgIC8qIEp1c3QgdXBkYXRlIHRoZSBzbG90IHNl
cXVlbmNlIG5vLiAqLw0KPiA+ICAgICAgICAgICAgICAgICBzbG90LT5zZXFfZG9uZSA9IDE7DQo+
ID4gQEAgLTc4MSw2ICs3ODUsMTEgQEAgc3RhdGljIGludCBuZnM0MV9zZXF1ZW5jZV9wcm9jZXNz
KHN0cnVjdA0KPiA+IHJwY190YXNrICp0YXNrLA0KPiA+ICAgICAgICAgZHByaW50aygiJXM6IEVy
cm9yICVkIGZyZWUgdGhlIHNsb3QgXG4iLCBfX2Z1bmNfXywgcmVzLQ0KPiA+ID5zcl9zdGF0dXMp
Ow0KPiA+ICBvdXRfbm9hY3Rpb246DQo+ID4gICAgICAgICByZXR1cm4gcmV0Ow0KPiA+ICtzZXNz
aW9uX3JlY292ZXI6DQo+ID4gKyAgICAgICBuZnM0X3NjaGVkdWxlX3Nlc3Npb25fcmVjb3Zlcnko
c2Vzc2lvbiwgcmVzLT5zcl9zdGF0dXMpOw0KPiA+ICsgICAgICAgZ290byByZXRyeV9ub3dhaXQ7
DQo+ID4gK3JldHJ5X25ld19zZXE6DQo+ID4gKyAgICAgICArK3Nsb3QtPnNlcV9ucjsNCj4gPiAg
cmV0cnlfbm93YWl0Og0KPiA+ICAgICAgICAgaWYgKHJwY19yZXN0YXJ0X2NhbGxfcHJlcGFyZSh0
YXNrKSkgew0KPiA+ICAgICAgICAgICAgICAgICBuZnM0MV9zZXF1ZW5jZV9mcmVlX3Nsb3QocmVz
KTsNCj4gPiBAQCAtODU3LDYgKzg2NiwxNyBAQCBzdGF0aWMgY29uc3Qgc3RydWN0IHJwY19jYWxs
X29wcw0KPiA+IG5mczQxX2NhbGxfc3luY19vcHMgPSB7DQo+ID4gICAgICAgICAucnBjX2NhbGxf
ZG9uZSA9IG5mczQxX2NhbGxfc3luY19kb25lLA0KPiA+ICB9Ow0KPiA+IA0KPiA+ICtzdGF0aWMg
dm9pZA0KPiA+ICtuZnM0X3NlcXVlbmNlX3Byb2Nlc3NfaW50ZXJydXB0ZWQoc3RydWN0IG5mc19j
bGllbnQgKmNsaWVudCwNCj4gPiArICAgICAgICAgICAgICAgc3RydWN0IG5mczRfc2xvdCAqc2xv
dCwgc3RydWN0IHJwY19jcmVkICpjcmVkKQ0KPiA+ICt7DQo+ID4gKyAgICAgICBzdHJ1Y3QgcnBj
X3Rhc2sgKnRhc2s7DQo+ID4gKw0KPiA+ICsgICAgICAgdGFzayA9IF9uZnM0MV9wcm9jX3NlcXVl
bmNlKGNsaWVudCwgY3JlZCwgc2xvdCwgdHJ1ZSk7DQo+ID4gKyAgICAgICBpZiAoIUlTX0VSUih0
YXNrKSkNCj4gPiArICAgICAgICAgICAgICAgcnBjX3B1dF90YXNrX2FzeW5jKHRhc2spOw0KPiA+
ICt9DQo+ID4gKw0KPiA+ICAjZWxzZSAgLyogIUNPTkZJR19ORlNfVjRfMSAqLw0KPiA+IA0KPiA+
ICBzdGF0aWMgaW50IG5mczRfc2VxdWVuY2VfcHJvY2VzcyhzdHJ1Y3QgcnBjX3Rhc2sgKnRhc2ss
IHN0cnVjdA0KPiA+IG5mczRfc2VxdWVuY2VfcmVzICpyZXMpDQo+ID4gQEAgLTg3Nyw5ICs4OTcs
MzIgQEAgaW50IG5mczRfc2VxdWVuY2VfZG9uZShzdHJ1Y3QgcnBjX3Rhc2sgKnRhc2ssDQo+ID4g
IH0NCj4gPiAgRVhQT1JUX1NZTUJPTF9HUEwobmZzNF9zZXF1ZW5jZV9kb25lKTsNCj4gPiANCj4g
PiArc3RhdGljIHZvaWQNCj4gPiArbmZzNF9zZXF1ZW5jZV9wcm9jZXNzX2ludGVycnVwdGVkKHN0
cnVjdCBuZnNfY2xpZW50ICpjbGllbnQsDQo+ID4gKyAgICAgICAgICAgICAgIHN0cnVjdCBuZnM0
X3Nsb3QgKnNsb3QsIHN0cnVjdCBycGNfY3JlZCAqY3JlZCkNCj4gPiArew0KPiA+ICsgICAgICAg
V0FSTl9PTl9PTkNFKDEpOw0KPiA+ICsgICAgICAgc2xvdC0+aW50ZXJydXB0ZWQgPSAwOw0KPiA+
ICt9DQo+ID4gKw0KPiA+ICAjZW5kaWYgLyogIUNPTkZJR19ORlNfVjRfMSAqLw0KPiA+IA0KPiA+
IC1pbnQgbmZzNF9zZXR1cF9zZXF1ZW5jZShjb25zdCBzdHJ1Y3QgbmZzX2NsaWVudCAqY2xpZW50
LA0KPiA+ICtzdGF0aWMNCj4gPiArdm9pZCBuZnM0X3NlcXVlbmNlX2F0dGFjaF9zbG90KHN0cnVj
dCBuZnM0X3NlcXVlbmNlX2FyZ3MgKmFyZ3MsDQo+ID4gKyAgICAgICAgICAgICAgIHN0cnVjdCBu
ZnM0X3NlcXVlbmNlX3JlcyAqcmVzLA0KPiA+ICsgICAgICAgICAgICAgICBzdHJ1Y3QgbmZzNF9z
bG90ICpzbG90KQ0KPiA+ICt7DQo+ID4gKyAgICAgICBzbG90LT5wcml2aWxlZ2VkID0gYXJncy0+
c2FfcHJpdmlsZWdlZCA/IDEgOiAwOw0KPiANCj4gVGhlIG9vcHMgaXMgYmVjYXVzZSBpbiB0aGUg
YXN5bmMgc2VxdWVuY2UsIGl0IHBhc3NlcyBOVUxMIGFzIHRoZSBzbG90DQo+IHdoaWNoIGVuZHMg
dXAgY2FsbGluZyB0aGlzIGFuZCB0cnlpbmcgdG8gZGVyZWZlcmVuY2UgInNsb3QiLg0KPiANCj4g
QnR3LCBubyBvcGVyYXRpb25zIGFyZSBuZWVkZWQgdG8gYWN0dWFsbHkgdHJpZ2dlciB0aGlzLiBK
dXN0IGRvIHRoZQ0KPiBtb3VudCBhbmQgd2FpdCBmb3IgdGhlIGZpcnN0IHJlbmV3Lg0KPiANCj4g
RG9uJ3Qga25vdyBpZiBwZXJoYXBzIHRoZSBmaXggaXMgdG8gY2hlY2sgdGhhdA0KPiBpZiAoc2xv
dCkNCg0KRG9oISBZZXMsIHRoYXQgd2FzIGEgbGFzdCBtaW51dGUgY2xlYW51cC4gSSB0aG91Z2h0
IEkgZGlkIHRoZSBzYW1lIGZvcg0KYm90aCBuZnM0X3NlcXVlbmNlX2F0dGFjaF9zbG90KCkgYW5k
IG5mczQxX3JlbGVhc2Vfc2xvdCgpLCBidXQgaXQgbG9va3MNCmFzIGlmIEkgb25seSBmaXhlZCB1
cCB0aGUgbGF0dGVyLg0KDQpUaGFua3MhDQogIFRyb25kDQotLSANClRyb25kIE15a2xlYnVzdA0K
TGludXggTkZTIGNsaWVudCBtYWludGFpbmVyLCBQcmltYXJ5RGF0YQ0KdHJvbmQubXlrbGVidXN0
QHByaW1hcnlkYXRhLmNvbQ0K

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Olga Kornievskaia May 22, 2018, 9:28 p.m. UTC | #9
Hi Trond/Anna,

How and who can request for this patch to be included in -stable?

This patch addresses possible data corruption issue if the interrupted
slot is re-used by a same-type operation (WRITE with a different
WRITE).

Thank you.

On Wed, Oct 11, 2017 at 1:07 PM, Trond Myklebust
<trond.myklebust@primarydata.com> wrote:
> If the previous request on a slot was interrupted before it was
> processed by the server, then our slot sequence number may be out of whack,
> and so we try the next operation using the old sequence number.
>
> The problem with this, is that not all servers check to see that the
> client is replaying the same operations as previously when they decide
> to go to the replay cache, and so instead of the expected error of
> NFS4ERR_SEQ_FALSE_RETRY, we get a replay of the old reply, which could
> (if the operations match up) be mistaken by the client for a new reply.
>
> To fix this, we attempt to send a COMPOUND containing only the SEQUENCE op
> in order to resync our slot sequence number.
>
> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
> ---
>  fs/nfs/nfs4_fs.h  |   2 +-
>  fs/nfs/nfs4proc.c | 146 +++++++++++++++++++++++++++++++++++++-----------------
>  2 files changed, 101 insertions(+), 47 deletions(-)
>
> diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
> index ac4f10b7f6c1..b547d935aaf0 100644
> --- a/fs/nfs/nfs4_fs.h
> +++ b/fs/nfs/nfs4_fs.h
> @@ -464,7 +464,7 @@ extern void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid);
>  extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid);
>  extern void nfs_release_seqid(struct nfs_seqid *seqid);
>  extern void nfs_free_seqid(struct nfs_seqid *seqid);
> -extern int nfs4_setup_sequence(const struct nfs_client *client,
> +extern int nfs4_setup_sequence(struct nfs_client *client,
>                                 struct nfs4_sequence_args *args,
>                                 struct nfs4_sequence_res *res,
>                                 struct rpc_task *task);
> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> index f90090e8c959..caa72efe02c9 100644
> --- a/fs/nfs/nfs4proc.c
> +++ b/fs/nfs/nfs4proc.c
> @@ -96,6 +96,10 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
>                             struct nfs_open_context *ctx, struct nfs4_label *ilabel,
>                             struct nfs4_label *olabel);
>  #ifdef CONFIG_NFS_V4_1
> +static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp,
> +               struct rpc_cred *cred,
> +               struct nfs4_slot *slot,
> +               bool is_privileged);
>  static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *,
>                 struct rpc_cred *);
>  static int nfs41_free_stateid(struct nfs_server *, const nfs4_stateid *,
> @@ -644,13 +648,14 @@ static int nfs40_sequence_done(struct rpc_task *task,
>
>  #if defined(CONFIG_NFS_V4_1)
>
> -static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
> +static void nfs41_release_slot(struct nfs4_slot *slot)
>  {
>         struct nfs4_session *session;
>         struct nfs4_slot_table *tbl;
> -       struct nfs4_slot *slot = res->sr_slot;
>         bool send_new_highest_used_slotid = false;
>
> +       if (!slot)
> +               return;
>         tbl = slot->table;
>         session = tbl->session;
>
> @@ -676,13 +681,18 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
>                 send_new_highest_used_slotid = false;
>  out_unlock:
>         spin_unlock(&tbl->slot_tbl_lock);
> -       res->sr_slot = NULL;
>         if (send_new_highest_used_slotid)
>                 nfs41_notify_server(session->clp);
>         if (waitqueue_active(&tbl->slot_waitq))
>                 wake_up_all(&tbl->slot_waitq);
>  }
>
> +static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
> +{
> +       nfs41_release_slot(res->sr_slot);
> +       res->sr_slot = NULL;
> +}
> +
>  static int nfs41_sequence_process(struct rpc_task *task,
>                 struct nfs4_sequence_res *res)
>  {
> @@ -710,13 +720,6 @@ static int nfs41_sequence_process(struct rpc_task *task,
>         /* Check the SEQUENCE operation status */
>         switch (res->sr_status) {
>         case 0:
> -               /* If previous op on slot was interrupted and we reused
> -                * the seq# and got a reply from the cache, then retry
> -                */
> -               if (task->tk_status == -EREMOTEIO && interrupted) {
> -                       ++slot->seq_nr;
> -                       goto retry_nowait;
> -               }
>                 /* Update the slot's sequence and clientid lease timer */
>                 slot->seq_done = 1;
>                 clp = session->clp;
> @@ -750,16 +753,16 @@ static int nfs41_sequence_process(struct rpc_task *task,
>                  * The slot id we used was probably retired. Try again
>                  * using a different slot id.
>                  */
> +               if (slot->seq_nr < slot->table->target_highest_slotid)
> +                       goto session_recover;
>                 goto retry_nowait;
>         case -NFS4ERR_SEQ_MISORDERED:
>                 /*
>                  * Was the last operation on this sequence interrupted?
>                  * If so, retry after bumping the sequence number.
>                  */
> -               if (interrupted) {
> -                       ++slot->seq_nr;
> -                       goto retry_nowait;
> -               }
> +               if (interrupted)
> +                       goto retry_new_seq;
>                 /*
>                  * Could this slot have been previously retired?
>                  * If so, then the server may be expecting seq_nr = 1!
> @@ -768,10 +771,11 @@ static int nfs41_sequence_process(struct rpc_task *task,
>                         slot->seq_nr = 1;
>                         goto retry_nowait;
>                 }
> -               break;
> +               goto session_recover;
>         case -NFS4ERR_SEQ_FALSE_RETRY:
> -               ++slot->seq_nr;
> -               goto retry_nowait;
> +               if (interrupted)
> +                       goto retry_new_seq;
> +               goto session_recover;
>         default:
>                 /* Just update the slot sequence no. */
>                 slot->seq_done = 1;
> @@ -781,6 +785,11 @@ static int nfs41_sequence_process(struct rpc_task *task,
>         dprintk("%s: Error %d free the slot \n", __func__, res->sr_status);
>  out_noaction:
>         return ret;
> +session_recover:
> +       nfs4_schedule_session_recovery(session, res->sr_status);
> +       goto retry_nowait;
> +retry_new_seq:
> +       ++slot->seq_nr;
>  retry_nowait:
>         if (rpc_restart_call_prepare(task)) {
>                 nfs41_sequence_free_slot(res);
> @@ -857,6 +866,17 @@ static const struct rpc_call_ops nfs41_call_sync_ops = {
>         .rpc_call_done = nfs41_call_sync_done,
>  };
>
> +static void
> +nfs4_sequence_process_interrupted(struct nfs_client *client,
> +               struct nfs4_slot *slot, struct rpc_cred *cred)
> +{
> +       struct rpc_task *task;
> +
> +       task = _nfs41_proc_sequence(client, cred, slot, true);
> +       if (!IS_ERR(task))
> +               rpc_put_task_async(task);
> +}
> +
>  #else  /* !CONFIG_NFS_V4_1 */
>
>  static int nfs4_sequence_process(struct rpc_task *task, struct nfs4_sequence_res *res)
> @@ -877,9 +897,32 @@ int nfs4_sequence_done(struct rpc_task *task,
>  }
>  EXPORT_SYMBOL_GPL(nfs4_sequence_done);
>
> +static void
> +nfs4_sequence_process_interrupted(struct nfs_client *client,
> +               struct nfs4_slot *slot, struct rpc_cred *cred)
> +{
> +       WARN_ON_ONCE(1);
> +       slot->interrupted = 0;
> +}
> +
>  #endif /* !CONFIG_NFS_V4_1 */
>
> -int nfs4_setup_sequence(const struct nfs_client *client,
> +static
> +void nfs4_sequence_attach_slot(struct nfs4_sequence_args *args,
> +               struct nfs4_sequence_res *res,
> +               struct nfs4_slot *slot)
> +{
> +       slot->privileged = args->sa_privileged ? 1 : 0;
> +       args->sa_slot = slot;
> +
> +       res->sr_slot = slot;
> +       res->sr_timestamp = jiffies;
> +       res->sr_status_flags = 0;
> +       res->sr_status = 1;
> +
> +}
> +
> +int nfs4_setup_sequence(struct nfs_client *client,
>                         struct nfs4_sequence_args *args,
>                         struct nfs4_sequence_res *res,
>                         struct rpc_task *task)
> @@ -897,29 +940,28 @@ int nfs4_setup_sequence(const struct nfs_client *client,
>                 task->tk_timeout = 0;
>         }
>
> -       spin_lock(&tbl->slot_tbl_lock);
> -       /* The state manager will wait until the slot table is empty */
> -       if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged)
> -               goto out_sleep;
> +       for (;;) {
> +               spin_lock(&tbl->slot_tbl_lock);
> +               /* The state manager will wait until the slot table is empty */
> +               if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged)
> +                       goto out_sleep;
> +
> +               slot = nfs4_alloc_slot(tbl);
> +               if (IS_ERR(slot)) {
> +                       /* Try again in 1/4 second */
> +                       if (slot == ERR_PTR(-ENOMEM))
> +                               task->tk_timeout = HZ >> 2;
> +                       goto out_sleep;
> +               }
> +               spin_unlock(&tbl->slot_tbl_lock);
>
> -       slot = nfs4_alloc_slot(tbl);
> -       if (IS_ERR(slot)) {
> -               /* Try again in 1/4 second */
> -               if (slot == ERR_PTR(-ENOMEM))
> -                       task->tk_timeout = HZ >> 2;
> -               goto out_sleep;
> +               if (likely(!slot->interrupted))
> +                       break;
> +               nfs4_sequence_process_interrupted(client,
> +                               slot, task->tk_msg.rpc_cred);
>         }
> -       spin_unlock(&tbl->slot_tbl_lock);
>
> -       slot->privileged = args->sa_privileged ? 1 : 0;
> -       args->sa_slot = slot;
> -
> -       res->sr_slot = slot;
> -       if (session) {
> -               res->sr_timestamp = jiffies;
> -               res->sr_status_flags = 0;
> -               res->sr_status = 1;
> -       }
> +       nfs4_sequence_attach_slot(args, res, slot);
>
>         trace_nfs4_setup_sequence(session, args);
>  out_start:
> @@ -8135,6 +8177,7 @@ static const struct rpc_call_ops nfs41_sequence_ops = {
>
>  static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp,
>                 struct rpc_cred *cred,
> +               struct nfs4_slot *slot,
>                 bool is_privileged)
>  {
>         struct nfs4_sequence_data *calldata;
> @@ -8148,15 +8191,18 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp,
>                 .callback_ops = &nfs41_sequence_ops,
>                 .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT,
>         };
> +       struct rpc_task *ret;
>
> +       ret = ERR_PTR(-EIO);
>         if (!atomic_inc_not_zero(&clp->cl_count))
> -               return ERR_PTR(-EIO);
> +               goto out_err;
> +
> +       ret = ERR_PTR(-ENOMEM);
>         calldata = kzalloc(sizeof(*calldata), GFP_NOFS);
> -       if (calldata == NULL) {
> -               nfs_put_client(clp);
> -               return ERR_PTR(-ENOMEM);
> -       }
> +       if (calldata == NULL)
> +               goto out_put_clp;
>         nfs4_init_sequence(&calldata->args, &calldata->res, 0);
> +       nfs4_sequence_attach_slot(&calldata->args, &calldata->res, slot);
>         if (is_privileged)
>                 nfs4_set_sequence_privileged(&calldata->args);
>         msg.rpc_argp = &calldata->args;
> @@ -8164,7 +8210,15 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp,
>         calldata->clp = clp;
>         task_setup_data.callback_data = calldata;
>
> -       return rpc_run_task(&task_setup_data);
> +       ret = rpc_run_task(&task_setup_data);
> +       if (IS_ERR(ret))
> +               goto out_err;
> +       return ret;
> +out_put_clp:
> +       nfs_put_client(clp);
> +out_err:
> +       nfs41_release_slot(slot);
> +       return ret;
>  }
>
>  static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cred, unsigned renew_flags)
> @@ -8174,7 +8228,7 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cr
>
>         if ((renew_flags & NFS4_RENEW_TIMEOUT) == 0)
>                 return -EAGAIN;
> -       task = _nfs41_proc_sequence(clp, cred, false);
> +       task = _nfs41_proc_sequence(clp, cred, NULL, false);
>         if (IS_ERR(task))
>                 ret = PTR_ERR(task);
>         else
> @@ -8188,7 +8242,7 @@ static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
>         struct rpc_task *task;
>         int ret;
>
> -       task = _nfs41_proc_sequence(clp, cred, true);
> +       task = _nfs41_proc_sequence(clp, cred, NULL, true);
>         if (IS_ERR(task)) {
>                 ret = PTR_ERR(task);
>                 goto out;
> --
> 2.13.6
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index ac4f10b7f6c1..b547d935aaf0 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -464,7 +464,7 @@  extern void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid);
 extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid);
 extern void nfs_release_seqid(struct nfs_seqid *seqid);
 extern void nfs_free_seqid(struct nfs_seqid *seqid);
-extern int nfs4_setup_sequence(const struct nfs_client *client,
+extern int nfs4_setup_sequence(struct nfs_client *client,
 				struct nfs4_sequence_args *args,
 				struct nfs4_sequence_res *res,
 				struct rpc_task *task);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index f90090e8c959..caa72efe02c9 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -96,6 +96,10 @@  static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
 			    struct nfs_open_context *ctx, struct nfs4_label *ilabel,
 			    struct nfs4_label *olabel);
 #ifdef CONFIG_NFS_V4_1
+static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp,
+		struct rpc_cred *cred,
+		struct nfs4_slot *slot,
+		bool is_privileged);
 static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *,
 		struct rpc_cred *);
 static int nfs41_free_stateid(struct nfs_server *, const nfs4_stateid *,
@@ -644,13 +648,14 @@  static int nfs40_sequence_done(struct rpc_task *task,
 
 #if defined(CONFIG_NFS_V4_1)
 
-static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
+static void nfs41_release_slot(struct nfs4_slot *slot)
 {
 	struct nfs4_session *session;
 	struct nfs4_slot_table *tbl;
-	struct nfs4_slot *slot = res->sr_slot;
 	bool send_new_highest_used_slotid = false;
 
+	if (!slot)
+		return;
 	tbl = slot->table;
 	session = tbl->session;
 
@@ -676,13 +681,18 @@  static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
 		send_new_highest_used_slotid = false;
 out_unlock:
 	spin_unlock(&tbl->slot_tbl_lock);
-	res->sr_slot = NULL;
 	if (send_new_highest_used_slotid)
 		nfs41_notify_server(session->clp);
 	if (waitqueue_active(&tbl->slot_waitq))
 		wake_up_all(&tbl->slot_waitq);
 }
 
+static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
+{
+	nfs41_release_slot(res->sr_slot);
+	res->sr_slot = NULL;
+}
+
 static int nfs41_sequence_process(struct rpc_task *task,
 		struct nfs4_sequence_res *res)
 {
@@ -710,13 +720,6 @@  static int nfs41_sequence_process(struct rpc_task *task,
 	/* Check the SEQUENCE operation status */
 	switch (res->sr_status) {
 	case 0:
-		/* If previous op on slot was interrupted and we reused
-		 * the seq# and got a reply from the cache, then retry
-		 */
-		if (task->tk_status == -EREMOTEIO && interrupted) {
-			++slot->seq_nr;
-			goto retry_nowait;
-		}
 		/* Update the slot's sequence and clientid lease timer */
 		slot->seq_done = 1;
 		clp = session->clp;
@@ -750,16 +753,16 @@  static int nfs41_sequence_process(struct rpc_task *task,
 		 * The slot id we used was probably retired. Try again
 		 * using a different slot id.
 		 */
+		if (slot->seq_nr < slot->table->target_highest_slotid)
+			goto session_recover;
 		goto retry_nowait;
 	case -NFS4ERR_SEQ_MISORDERED:
 		/*
 		 * Was the last operation on this sequence interrupted?
 		 * If so, retry after bumping the sequence number.
 		 */
-		if (interrupted) {
-			++slot->seq_nr;
-			goto retry_nowait;
-		}
+		if (interrupted)
+			goto retry_new_seq;
 		/*
 		 * Could this slot have been previously retired?
 		 * If so, then the server may be expecting seq_nr = 1!
@@ -768,10 +771,11 @@  static int nfs41_sequence_process(struct rpc_task *task,
 			slot->seq_nr = 1;
 			goto retry_nowait;
 		}
-		break;
+		goto session_recover;
 	case -NFS4ERR_SEQ_FALSE_RETRY:
-		++slot->seq_nr;
-		goto retry_nowait;
+		if (interrupted)
+			goto retry_new_seq;
+		goto session_recover;
 	default:
 		/* Just update the slot sequence no. */
 		slot->seq_done = 1;
@@ -781,6 +785,11 @@  static int nfs41_sequence_process(struct rpc_task *task,
 	dprintk("%s: Error %d free the slot \n", __func__, res->sr_status);
 out_noaction:
 	return ret;
+session_recover:
+	nfs4_schedule_session_recovery(session, res->sr_status);
+	goto retry_nowait;
+retry_new_seq:
+	++slot->seq_nr;
 retry_nowait:
 	if (rpc_restart_call_prepare(task)) {
 		nfs41_sequence_free_slot(res);
@@ -857,6 +866,17 @@  static const struct rpc_call_ops nfs41_call_sync_ops = {
 	.rpc_call_done = nfs41_call_sync_done,
 };
 
+static void
+nfs4_sequence_process_interrupted(struct nfs_client *client,
+		struct nfs4_slot *slot, struct rpc_cred *cred)
+{
+	struct rpc_task *task;
+
+	task = _nfs41_proc_sequence(client, cred, slot, true);
+	if (!IS_ERR(task))
+		rpc_put_task_async(task);
+}
+
 #else	/* !CONFIG_NFS_V4_1 */
 
 static int nfs4_sequence_process(struct rpc_task *task, struct nfs4_sequence_res *res)
@@ -877,9 +897,32 @@  int nfs4_sequence_done(struct rpc_task *task,
 }
 EXPORT_SYMBOL_GPL(nfs4_sequence_done);
 
+static void
+nfs4_sequence_process_interrupted(struct nfs_client *client,
+		struct nfs4_slot *slot, struct rpc_cred *cred)
+{
+	WARN_ON_ONCE(1);
+	slot->interrupted = 0;
+}
+
 #endif	/* !CONFIG_NFS_V4_1 */
 
-int nfs4_setup_sequence(const struct nfs_client *client,
+static
+void nfs4_sequence_attach_slot(struct nfs4_sequence_args *args,
+		struct nfs4_sequence_res *res,
+		struct nfs4_slot *slot)
+{
+	slot->privileged = args->sa_privileged ? 1 : 0;
+	args->sa_slot = slot;
+
+	res->sr_slot = slot;
+	res->sr_timestamp = jiffies;
+	res->sr_status_flags = 0;
+	res->sr_status = 1;
+
+}
+
+int nfs4_setup_sequence(struct nfs_client *client,
 			struct nfs4_sequence_args *args,
 			struct nfs4_sequence_res *res,
 			struct rpc_task *task)
@@ -897,29 +940,28 @@  int nfs4_setup_sequence(const struct nfs_client *client,
 		task->tk_timeout = 0;
 	}
 
-	spin_lock(&tbl->slot_tbl_lock);
-	/* The state manager will wait until the slot table is empty */
-	if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged)
-		goto out_sleep;
+	for (;;) {
+		spin_lock(&tbl->slot_tbl_lock);
+		/* The state manager will wait until the slot table is empty */
+		if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged)
+			goto out_sleep;
+
+		slot = nfs4_alloc_slot(tbl);
+		if (IS_ERR(slot)) {
+			/* Try again in 1/4 second */
+			if (slot == ERR_PTR(-ENOMEM))
+				task->tk_timeout = HZ >> 2;
+			goto out_sleep;
+		}
+		spin_unlock(&tbl->slot_tbl_lock);
 
-	slot = nfs4_alloc_slot(tbl);
-	if (IS_ERR(slot)) {
-		/* Try again in 1/4 second */
-		if (slot == ERR_PTR(-ENOMEM))
-			task->tk_timeout = HZ >> 2;
-		goto out_sleep;
+		if (likely(!slot->interrupted))
+			break;
+		nfs4_sequence_process_interrupted(client,
+				slot, task->tk_msg.rpc_cred);
 	}
-	spin_unlock(&tbl->slot_tbl_lock);
 
-	slot->privileged = args->sa_privileged ? 1 : 0;
-	args->sa_slot = slot;
-
-	res->sr_slot = slot;
-	if (session) {
-		res->sr_timestamp = jiffies;
-		res->sr_status_flags = 0;
-		res->sr_status = 1;
-	}
+	nfs4_sequence_attach_slot(args, res, slot);
 
 	trace_nfs4_setup_sequence(session, args);
 out_start:
@@ -8135,6 +8177,7 @@  static const struct rpc_call_ops nfs41_sequence_ops = {
 
 static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp,
 		struct rpc_cred *cred,
+		struct nfs4_slot *slot,
 		bool is_privileged)
 {
 	struct nfs4_sequence_data *calldata;
@@ -8148,15 +8191,18 @@  static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp,
 		.callback_ops = &nfs41_sequence_ops,
 		.flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT,
 	};
+	struct rpc_task *ret;
 
+	ret = ERR_PTR(-EIO);
 	if (!atomic_inc_not_zero(&clp->cl_count))
-		return ERR_PTR(-EIO);
+		goto out_err;
+
+	ret = ERR_PTR(-ENOMEM);
 	calldata = kzalloc(sizeof(*calldata), GFP_NOFS);
-	if (calldata == NULL) {
-		nfs_put_client(clp);
-		return ERR_PTR(-ENOMEM);
-	}
+	if (calldata == NULL)
+		goto out_put_clp;
 	nfs4_init_sequence(&calldata->args, &calldata->res, 0);
+	nfs4_sequence_attach_slot(&calldata->args, &calldata->res, slot);
 	if (is_privileged)
 		nfs4_set_sequence_privileged(&calldata->args);
 	msg.rpc_argp = &calldata->args;
@@ -8164,7 +8210,15 @@  static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp,
 	calldata->clp = clp;
 	task_setup_data.callback_data = calldata;
 
-	return rpc_run_task(&task_setup_data);
+	ret = rpc_run_task(&task_setup_data);
+	if (IS_ERR(ret))
+		goto out_err;
+	return ret;
+out_put_clp:
+	nfs_put_client(clp);
+out_err:
+	nfs41_release_slot(slot);
+	return ret;
 }
 
 static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cred, unsigned renew_flags)
@@ -8174,7 +8228,7 @@  static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cr
 
 	if ((renew_flags & NFS4_RENEW_TIMEOUT) == 0)
 		return -EAGAIN;
-	task = _nfs41_proc_sequence(clp, cred, false);
+	task = _nfs41_proc_sequence(clp, cred, NULL, false);
 	if (IS_ERR(task))
 		ret = PTR_ERR(task);
 	else
@@ -8188,7 +8242,7 @@  static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
 	struct rpc_task *task;
 	int ret;
 
-	task = _nfs41_proc_sequence(clp, cred, true);
+	task = _nfs41_proc_sequence(clp, cred, NULL, true);
 	if (IS_ERR(task)) {
 		ret = PTR_ERR(task);
 		goto out;