diff mbox series

[3/3] xfs_repair: add post-phase error injection points

Message ID 161319522176.422860.4620061453225202229.stgit@magnolia (mailing list archive)
State Superseded, archived
Headers show
Series xfs_repair: set needsrepair when dirtying filesystems | expand

Commit Message

Darrick J. Wong Feb. 13, 2021, 5:47 a.m. UTC
From: Darrick J. Wong <djwong@kernel.org>

Create an error injection point so that we can simulate repair failing
after a certain phase.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 repair/globals.c    |    3 +++
 repair/globals.h    |    3 +++
 repair/progress.c   |    3 +++
 repair/xfs_repair.c |    4 ++++
 4 files changed, 13 insertions(+)

Comments

Brian Foster Feb. 16, 2021, 11:58 a.m. UTC | #1
On Fri, Feb 12, 2021 at 09:47:01PM -0800, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
> 
> Create an error injection point so that we can simulate repair failing
> after a certain phase.
> 
> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> ---
>  repair/globals.c    |    3 +++
>  repair/globals.h    |    3 +++
>  repair/progress.c   |    3 +++
>  repair/xfs_repair.c |    4 ++++
>  4 files changed, 13 insertions(+)
> 
> 
...
> diff --git a/repair/progress.c b/repair/progress.c
> index e5a9c1ef..5bbe58ec 100644
> --- a/repair/progress.c
> +++ b/repair/progress.c
> @@ -410,6 +410,9 @@ timestamp(int end, int phase, char *buf)
>  		current_phase = phase;
>  	}
>  
> +	if (fail_after_phase && phase == fail_after_phase)
> +		kill(getpid(), SIGKILL);
> +

It seems a little hacky to bury this in timestamp(). Perhaps we should
at least check for end == PHASE_END (even though PHASE_START is
currently only used in one place). Otherwise seems reasonable..

Brian

>  	if (buf) {
>  		tmp = localtime((const time_t *)&now);
>  		sprintf(buf, _("%02d:%02d:%02d"), tmp->tm_hour, tmp->tm_min, tmp->tm_sec);
> diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
> index 12e319ae..6b60b8f4 100644
> --- a/repair/xfs_repair.c
> +++ b/repair/xfs_repair.c
> @@ -362,6 +362,10 @@ process_args(int argc, char **argv)
>  
>  	if (report_corrected && no_modify)
>  		usage();
> +
> +	p = getenv("XFS_REPAIR_FAIL_AFTER_PHASE");
> +	if (p)
> +		fail_after_phase = (int)strtol(p, NULL, 0);
>  }
>  
>  void __attribute__((noreturn))
>
Darrick J. Wong Feb. 18, 2021, 4:47 a.m. UTC | #2
On Tue, Feb 16, 2021 at 06:58:39AM -0500, Brian Foster wrote:
> On Fri, Feb 12, 2021 at 09:47:01PM -0800, Darrick J. Wong wrote:
> > From: Darrick J. Wong <djwong@kernel.org>
> > 
> > Create an error injection point so that we can simulate repair failing
> > after a certain phase.
> > 
> > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > ---
> >  repair/globals.c    |    3 +++
> >  repair/globals.h    |    3 +++
> >  repair/progress.c   |    3 +++
> >  repair/xfs_repair.c |    4 ++++
> >  4 files changed, 13 insertions(+)
> > 
> > 
> ...
> > diff --git a/repair/progress.c b/repair/progress.c
> > index e5a9c1ef..5bbe58ec 100644
> > --- a/repair/progress.c
> > +++ b/repair/progress.c
> > @@ -410,6 +410,9 @@ timestamp(int end, int phase, char *buf)
> >  		current_phase = phase;
> >  	}
> >  
> > +	if (fail_after_phase && phase == fail_after_phase)
> > +		kill(getpid(), SIGKILL);
> > +
> 
> It seems a little hacky to bury this in timestamp(). Perhaps we should
> at least check for end == PHASE_END (even though PHASE_START is
> currently only used in one place). Otherwise seems reasonable..

Yeah, I don't know of a better place to put it -- adding another call
after every timestamp() just seems like clutter.

Will fix it to check for PHASE_END, though.

Thanks for reading. :)

--D

> Brian
> 
> >  	if (buf) {
> >  		tmp = localtime((const time_t *)&now);
> >  		sprintf(buf, _("%02d:%02d:%02d"), tmp->tm_hour, tmp->tm_min, tmp->tm_sec);
> > diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
> > index 12e319ae..6b60b8f4 100644
> > --- a/repair/xfs_repair.c
> > +++ b/repair/xfs_repair.c
> > @@ -362,6 +362,10 @@ process_args(int argc, char **argv)
> >  
> >  	if (report_corrected && no_modify)
> >  		usage();
> > +
> > +	p = getenv("XFS_REPAIR_FAIL_AFTER_PHASE");
> > +	if (p)
> > +		fail_after_phase = (int)strtol(p, NULL, 0);
> >  }
> >  
> >  void __attribute__((noreturn))
> > 
>
Brian Foster Feb. 18, 2021, 1:02 p.m. UTC | #3
On Wed, Feb 17, 2021 at 08:47:29PM -0800, Darrick J. Wong wrote:
> On Tue, Feb 16, 2021 at 06:58:39AM -0500, Brian Foster wrote:
> > On Fri, Feb 12, 2021 at 09:47:01PM -0800, Darrick J. Wong wrote:
> > > From: Darrick J. Wong <djwong@kernel.org>
> > > 
> > > Create an error injection point so that we can simulate repair failing
> > > after a certain phase.
> > > 
> > > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > > ---
> > >  repair/globals.c    |    3 +++
> > >  repair/globals.h    |    3 +++
> > >  repair/progress.c   |    3 +++
> > >  repair/xfs_repair.c |    4 ++++
> > >  4 files changed, 13 insertions(+)
> > > 
> > > 
> > ...
> > > diff --git a/repair/progress.c b/repair/progress.c
> > > index e5a9c1ef..5bbe58ec 100644
> > > --- a/repair/progress.c
> > > +++ b/repair/progress.c
> > > @@ -410,6 +410,9 @@ timestamp(int end, int phase, char *buf)
> > >  		current_phase = phase;
> > >  	}
> > >  
> > > +	if (fail_after_phase && phase == fail_after_phase)
> > > +		kill(getpid(), SIGKILL);
> > > +
> > 
> > It seems a little hacky to bury this in timestamp(). Perhaps we should
> > at least check for end == PHASE_END (even though PHASE_START is
> > currently only used in one place). Otherwise seems reasonable..
> 
> Yeah, I don't know of a better place to put it -- adding another call
> after every timestamp() just seems like clutter.
> 

I was thinking that factoring timestamp() into a new post-phase helper
seemed a relatively simple cleanup.

Brian

> Will fix it to check for PHASE_END, though.
> 
> Thanks for reading. :)
> 
> --D
> 
> > Brian
> > 
> > >  	if (buf) {
> > >  		tmp = localtime((const time_t *)&now);
> > >  		sprintf(buf, _("%02d:%02d:%02d"), tmp->tm_hour, tmp->tm_min, tmp->tm_sec);
> > > diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
> > > index 12e319ae..6b60b8f4 100644
> > > --- a/repair/xfs_repair.c
> > > +++ b/repair/xfs_repair.c
> > > @@ -362,6 +362,10 @@ process_args(int argc, char **argv)
> > >  
> > >  	if (report_corrected && no_modify)
> > >  		usage();
> > > +
> > > +	p = getenv("XFS_REPAIR_FAIL_AFTER_PHASE");
> > > +	if (p)
> > > +		fail_after_phase = (int)strtol(p, NULL, 0);
> > >  }
> > >  
> > >  void __attribute__((noreturn))
> > > 
> > 
>
Darrick J. Wong Feb. 18, 2021, 6:01 p.m. UTC | #4
On Thu, Feb 18, 2021 at 08:02:28AM -0500, Brian Foster wrote:
> On Wed, Feb 17, 2021 at 08:47:29PM -0800, Darrick J. Wong wrote:
> > On Tue, Feb 16, 2021 at 06:58:39AM -0500, Brian Foster wrote:
> > > On Fri, Feb 12, 2021 at 09:47:01PM -0800, Darrick J. Wong wrote:
> > > > From: Darrick J. Wong <djwong@kernel.org>
> > > > 
> > > > Create an error injection point so that we can simulate repair failing
> > > > after a certain phase.
> > > > 
> > > > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > > > ---
> > > >  repair/globals.c    |    3 +++
> > > >  repair/globals.h    |    3 +++
> > > >  repair/progress.c   |    3 +++
> > > >  repair/xfs_repair.c |    4 ++++
> > > >  4 files changed, 13 insertions(+)
> > > > 
> > > > 
> > > ...
> > > > diff --git a/repair/progress.c b/repair/progress.c
> > > > index e5a9c1ef..5bbe58ec 100644
> > > > --- a/repair/progress.c
> > > > +++ b/repair/progress.c
> > > > @@ -410,6 +410,9 @@ timestamp(int end, int phase, char *buf)
> > > >  		current_phase = phase;
> > > >  	}
> > > >  
> > > > +	if (fail_after_phase && phase == fail_after_phase)
> > > > +		kill(getpid(), SIGKILL);
> > > > +
> > > 
> > > It seems a little hacky to bury this in timestamp(). Perhaps we should
> > > at least check for end == PHASE_END (even though PHASE_START is
> > > currently only used in one place). Otherwise seems reasonable..
> > 
> > Yeah, I don't know of a better place to put it -- adding another call
> > after every timestamp() just seems like clutter.
> > 
> 
> I was thinking that factoring timestamp() into a new post-phase helper
> seemed a relatively simple cleanup.

Ok, will do.

--D

> Brian
> 
> > Will fix it to check for PHASE_END, though.
> > 
> > Thanks for reading. :)
> > 
> > --D
> > 
> > > Brian
> > > 
> > > >  	if (buf) {
> > > >  		tmp = localtime((const time_t *)&now);
> > > >  		sprintf(buf, _("%02d:%02d:%02d"), tmp->tm_hour, tmp->tm_min, tmp->tm_sec);
> > > > diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
> > > > index 12e319ae..6b60b8f4 100644
> > > > --- a/repair/xfs_repair.c
> > > > +++ b/repair/xfs_repair.c
> > > > @@ -362,6 +362,10 @@ process_args(int argc, char **argv)
> > > >  
> > > >  	if (report_corrected && no_modify)
> > > >  		usage();
> > > > +
> > > > +	p = getenv("XFS_REPAIR_FAIL_AFTER_PHASE");
> > > > +	if (p)
> > > > +		fail_after_phase = (int)strtol(p, NULL, 0);
> > > >  }
> > > >  
> > > >  void __attribute__((noreturn))
> > > > 
> > > 
> > 
>
diff mbox series

Patch

diff --git a/repair/globals.c b/repair/globals.c
index 110d98b6..537d068b 100644
--- a/repair/globals.c
+++ b/repair/globals.c
@@ -117,3 +117,6 @@  uint64_t	*prog_rpt_done;
 
 int		ag_stride;
 int		thread_count;
+
+/* If nonzero, simulate failure after this phase. */
+int		fail_after_phase;
diff --git a/repair/globals.h b/repair/globals.h
index 1d397b35..a9287320 100644
--- a/repair/globals.h
+++ b/repair/globals.h
@@ -162,4 +162,7 @@  extern uint64_t		*prog_rpt_done;
 extern int		ag_stride;
 extern int		thread_count;
 
+/* If nonzero, simulate failure after this phase. */
+extern int		fail_after_phase;
+
 #endif /* _XFS_REPAIR_GLOBAL_H */
diff --git a/repair/progress.c b/repair/progress.c
index e5a9c1ef..5bbe58ec 100644
--- a/repair/progress.c
+++ b/repair/progress.c
@@ -410,6 +410,9 @@  timestamp(int end, int phase, char *buf)
 		current_phase = phase;
 	}
 
+	if (fail_after_phase && phase == fail_after_phase)
+		kill(getpid(), SIGKILL);
+
 	if (buf) {
 		tmp = localtime((const time_t *)&now);
 		sprintf(buf, _("%02d:%02d:%02d"), tmp->tm_hour, tmp->tm_min, tmp->tm_sec);
diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
index 12e319ae..6b60b8f4 100644
--- a/repair/xfs_repair.c
+++ b/repair/xfs_repair.c
@@ -362,6 +362,10 @@  process_args(int argc, char **argv)
 
 	if (report_corrected && no_modify)
 		usage();
+
+	p = getenv("XFS_REPAIR_FAIL_AFTER_PHASE");
+	if (p)
+		fail_after_phase = (int)strtol(p, NULL, 0);
 }
 
 void __attribute__((noreturn))