diff mbox series

[6/8] lustre: fld: fld client lookup should retry

Message ID 1564022647-17351-7-git-send-email-jsimmons@infradead.org (mailing list archive)
State New, archived
Headers show
Series lustre: some old patches from whamcloud tree | expand

Commit Message

James Simmons July 25, 2019, 2:44 a.m. UTC
From: wang di <di.wang@intel.com>

If FLD client lookup fails because of the remote target
is shutdown (or deactive), it should retry another target,
otherwise it will cause the application failure.

And FLD client should stop retry if the import has
been deactive.

WC-bug-id: https://jira.whamcloud.com/browse/LU-6419
Lustre-commit: 3ededde903c92f8485cae0dc9f958f194ff0b140
Signed-off-by: wang di <di.wang@intel.com>
Reviewed-on: http://review.whamcloud.com/14313
Reviewed-by: Lai Siyao <lai.siyao@intel.com>
Reviewed-by: Fan Yong <fan.yong@intel.com>
Reviewed-by: Alex Zhuravlev <alexey.zhuravlev@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
---
 fs/lustre/fld/fld_request.c | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

Comments

Andreas Dilger Aug. 14, 2019, 4:58 p.m. UTC | #1
This is definitely client code. 

Cheers, Andreas

> On Jul 24, 2019, at 19:44, James Simmons <jsimmons@infradead.org> wrote:
> 
> From: wang di <di.wang@intel.com>
> 
> If FLD client lookup fails because of the remote target
> is shutdown (or deactive), it should retry another target,
> otherwise it will cause the application failure.
> 
> And FLD client should stop retry if the import has
> been deactive.
> 
> WC-bug-id: https://jira.whamcloud.com/browse/LU-6419
> Lustre-commit: 3ededde903c92f8485cae0dc9f958f194ff0b140
> Signed-off-by: wang di <di.wang@intel.com>
> Reviewed-on: http://review.whamcloud.com/14313
> Reviewed-by: Lai Siyao <lai.siyao@intel.com>
> Reviewed-by: Fan Yong <fan.yong@intel.com>
> Reviewed-by: Alex Zhuravlev <alexey.zhuravlev@intel.com>
> Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
> ---
> fs/lustre/fld/fld_request.c | 23 +++++++++++++++++++++--
> 1 file changed, 21 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/lustre/fld/fld_request.c b/fs/lustre/fld/fld_request.c
> index 60e7105..dfd4ae9 100644
> --- a/fs/lustre/fld/fld_request.c
> +++ b/fs/lustre/fld/fld_request.c
> @@ -367,7 +367,7 @@ int fld_client_rpc(struct obd_export *exp,
>    rc = ptlrpc_queue_wait(req);
>    obd_put_request_slot(&exp->exp_obd->u.cli);
>    if (rc != 0) {
> -        if (imp->imp_state != LUSTRE_IMP_CLOSED) {
> +        if (imp->imp_state != LUSTRE_IMP_CLOSED && !imp->imp_deactive) {
>            /* Since LWP is not replayable, so it will keep
>             * trying unless umount happens, otherwise it would
>             * cause unecessary failure of the application.
> @@ -404,6 +404,7 @@ int fld_client_lookup(struct lu_client_fld *fld, u64 seq, u32 *mds,
> {
>    struct lu_seq_range res = { 0 };
>    struct lu_fld_target *target;
> +    struct lu_fld_target *origin;
>    int rc;
> 
>    rc = fld_cache_lookup(fld->lcf_cache, seq, &res);
> @@ -415,7 +416,8 @@ int fld_client_lookup(struct lu_client_fld *fld, u64 seq, u32 *mds,
>    /* Can not find it in the cache */
>    target = fld_client_get_target(fld, seq);
>    LASSERT(target);
> -
> +    origin = target;
> +again:
>    CDEBUG(D_INFO,
>           "%s: Lookup fld entry (seq: %#llx) on target %s (idx %llu)\n",
>           fld->lcf_name, seq, fld_target_name(target), target->ft_idx);
> @@ -424,6 +426,23 @@ int fld_client_lookup(struct lu_client_fld *fld, u64 seq, u32 *mds,
>    fld_range_set_type(&res, flags);
>    rc = fld_client_rpc(target->ft_exp, &res, FLD_QUERY, NULL);
> 
> +    if (rc == -ESHUTDOWN) {
> +        /* If fld lookup failed because the target has been shutdown,
> +         * then try next target in the list, until trying all targets
> +         * or fld lookup succeeds
> +         */
> +        spin_lock(&fld->lcf_lock);
> +        if (target->ft_chain.next == fld->lcf_targets.prev)
> +            target = list_entry(fld->lcf_targets.next,
> +                        struct lu_fld_target, ft_chain);
> +        else
> +            target = list_entry(target->ft_chain.next,
> +                         struct lu_fld_target,
> +                         ft_chain);
> +        spin_unlock(&fld->lcf_lock);
> +        if (target != origin)
> +            goto again;
> +    }
>    if (rc == 0) {
>        *mds = res.lsr_index;
> 
> -- 
> 1.8.3.1
>
diff mbox series

Patch

diff --git a/fs/lustre/fld/fld_request.c b/fs/lustre/fld/fld_request.c
index 60e7105..dfd4ae9 100644
--- a/fs/lustre/fld/fld_request.c
+++ b/fs/lustre/fld/fld_request.c
@@ -367,7 +367,7 @@  int fld_client_rpc(struct obd_export *exp,
 	rc = ptlrpc_queue_wait(req);
 	obd_put_request_slot(&exp->exp_obd->u.cli);
 	if (rc != 0) {
-		if (imp->imp_state != LUSTRE_IMP_CLOSED) {
+		if (imp->imp_state != LUSTRE_IMP_CLOSED && !imp->imp_deactive) {
 			/* Since LWP is not replayable, so it will keep
 			 * trying unless umount happens, otherwise it would
 			 * cause unecessary failure of the application.
@@ -404,6 +404,7 @@  int fld_client_lookup(struct lu_client_fld *fld, u64 seq, u32 *mds,
 {
 	struct lu_seq_range res = { 0 };
 	struct lu_fld_target *target;
+	struct lu_fld_target *origin;
 	int rc;
 
 	rc = fld_cache_lookup(fld->lcf_cache, seq, &res);
@@ -415,7 +416,8 @@  int fld_client_lookup(struct lu_client_fld *fld, u64 seq, u32 *mds,
 	/* Can not find it in the cache */
 	target = fld_client_get_target(fld, seq);
 	LASSERT(target);
-
+	origin = target;
+again:
 	CDEBUG(D_INFO,
 	       "%s: Lookup fld entry (seq: %#llx) on target %s (idx %llu)\n",
 	       fld->lcf_name, seq, fld_target_name(target), target->ft_idx);
@@ -424,6 +426,23 @@  int fld_client_lookup(struct lu_client_fld *fld, u64 seq, u32 *mds,
 	fld_range_set_type(&res, flags);
 	rc = fld_client_rpc(target->ft_exp, &res, FLD_QUERY, NULL);
 
+	if (rc == -ESHUTDOWN) {
+		/* If fld lookup failed because the target has been shutdown,
+		 * then try next target in the list, until trying all targets
+		 * or fld lookup succeeds
+		 */
+		spin_lock(&fld->lcf_lock);
+		if (target->ft_chain.next == fld->lcf_targets.prev)
+			target = list_entry(fld->lcf_targets.next,
+					    struct lu_fld_target, ft_chain);
+		else
+			target = list_entry(target->ft_chain.next,
+						 struct lu_fld_target,
+						 ft_chain);
+		spin_unlock(&fld->lcf_lock);
+		if (target != origin)
+			goto again;
+	}
 	if (rc == 0) {
 		*mds = res.lsr_index;