From patchwork Tue Jul 2 01:08:05 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13718814 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id B444D9449 for ; Tue, 2 Jul 2024 01:08:05 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1719882485; cv=none; b=IPp90m06zv3noaE6mbR5yKNX54XkJgUZMYjk4Foi61ewNobD0YW8DrvaXIE4aZWrVXfnIOBaTxCVGsvVmCLAPl73xopwVesNk/TKlSdCu05Ln23u0naQ3MyFyhldWgHxcFOsc0mqY/m02jtgEc8wIvoMw22IlT+72KcFRx0OnaU= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1719882485; c=relaxed/simple; bh=n11NG2CVVQ+P1caIP+uXp/q/qRWmoN6RakwN8CfseUI=; h=Date:Subject:From:To:Cc:Message-ID:In-Reply-To:References: MIME-Version:Content-Type; b=HIcftevnkgGrR7qHSPZPej+8o41niiDSu85v5j1cywgRS7rjWgOz52lV0S1uoPfJAa5erMWV12hWBZpAMcU9pjB89L0ctiwWnWwqYwpKOIzHtfskZVdJ3Tqvc3+e5hy0avDFV4DHCcomh6Yr5XGs158uKU0Mf2Ifkk2hww6lyZ0= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=qsq42Zh8; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="qsq42Zh8" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 8541AC116B1; Tue, 2 Jul 2024 01:08:05 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1719882485; bh=n11NG2CVVQ+P1caIP+uXp/q/qRWmoN6RakwN8CfseUI=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=qsq42Zh8chH2IhuqxniN39Gmxp1WoMkIt+DXHtIIpSznpU5lSrOI0Zy6jRbzITeX8 7cYLG7u2m8+BkjPirriCb/tuTGpanjWseoX2TikUe5lmkxnBsA1o8dMyS5Z/sr4csn UcLBMVwc3XSizWvnu/78/QmYEexlQeXyLiXtlQ1JClzFOXvnAS6/pc/hzuT974AXGX mGd6YQmYYQdAoWgzk6HhpJ/COm2dQwVuB8UUv3EsFwLIH8aUExWKNm0GomNiIIRNhX 3aqW33YEFxlKlA0bYwbDt44anY0t0t9GErDNNETfSL2VPMsFpX0pB9Bx0ng+rvu+xD 1uiArLkwiaYEA== Date: Mon, 01 Jul 2024 18:08:05 -0700 Subject: [PATCH 1/5] xfs_scrub_all: encapsulate all the subprocess code in an object From: "Darrick J. Wong" To: djwong@kernel.org, cem@kernel.org Cc: linux-xfs@vger.kernel.org, hch@lst.de Message-ID: <171988119829.2008718.8789883453476961638.stgit@frogsfrogsfrogs> In-Reply-To: <171988119806.2008718.11057954097670233571.stgit@frogsfrogsfrogs> References: <171988119806.2008718.11057954097670233571.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 Precedence: bulk X-Mailing-List: linux-xfs@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Darrick J. Wong Move all the xfs_scrub subprocess handling code to an object so that we can contain all the details in a single place. This also simplifies the background state management. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- scrub/xfs_scrub_all.in | 68 ++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 54 insertions(+), 14 deletions(-) diff --git a/scrub/xfs_scrub_all.in b/scrub/xfs_scrub_all.in index 9d5cbd2a6487..001c49a70128 100644 --- a/scrub/xfs_scrub_all.in +++ b/scrub/xfs_scrub_all.in @@ -78,15 +78,62 @@ def remove_killfunc(killfuncs, fn): except: pass -def run_killable(cmd, stdout, killfuncs): +class scrub_control(object): + '''Control object for xfs_scrub.''' + def __init__(self): + pass + + def start(self): + '''Start scrub and wait for it to complete. Returns -1 if the + service was not started, 0 if it succeeded, or 1 if it + failed.''' + assert False + + def stop(self): + '''Stop scrub.''' + assert False + +class scrub_subprocess(scrub_control): + '''Control object for xfs_scrub subprocesses.''' + def __init__(self, mnt, scrub_media): + cmd = ['@sbindir@/xfs_scrub'] + if 'SERVICE_MODE' in os.environ: + cmd += '@scrub_service_args@'.split() + cmd += '@scrub_args@'.split() + if scrub_media: + cmd += '-x' + cmd += [mnt] + self.cmdline = cmd + self.proc = None + + def start(self): + '''Start xfs_scrub and wait for it to complete. Returns -1 if + the service was not started, 0 if it succeeded, or 1 if it + failed.''' + try: + self.proc = subprocess.Popen(self.cmdline) + self.proc.wait() + except: + return -1 + + proc = self.proc + self.proc = None + return proc.returncode + + def stop(self): + '''Stop xfs_scrub.''' + if self.proc is not None: + self.proc.terminate() + +def run_subprocess(mnt, scrub_media, killfuncs): '''Run a killable program. Returns program retcode or -1 if we can't start it.''' try: - proc = subprocess.Popen(cmd, stdout = stdout) - killfuncs.add(proc.terminate) - proc.wait() - remove_killfunc(killfuncs, proc.terminate) - return proc.returncode + p = scrub_subprocess(mnt, scrub_media) + killfuncs.add(p.stop) + ret = p.start() + remove_killfunc(killfuncs, p.stop) + return ret except: return -1 @@ -188,14 +235,7 @@ def run_scrub(mnt, cond, running_devs, mntdevs, killfuncs): # Invoke xfs_scrub manually if we're running in the foreground. # We also permit this if we're running as a cronjob where # systemd services are unavailable. - cmd = ['@sbindir@/xfs_scrub'] - if 'SERVICE_MODE' in os.environ: - cmd += '@scrub_service_args@'.split() - cmd += '@scrub_args@'.split() - if scrub_media: - cmd += '-x' - cmd += [mnt] - ret = run_killable(cmd, None, killfuncs) + ret = run_subprocess(mnt, scrub_media, killfuncs) if ret >= 0: print("Scrubbing %s done, (err=%d)" % (mnt, ret)) sys.stdout.flush() From patchwork Tue Jul 2 01:08:20 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13718815 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 998989449 for ; Tue, 2 Jul 2024 01:08:21 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1719882501; cv=none; b=Ho3drTvg7hB/889EvPpI+oC+dx++EPMkG6Mk+qnPzdmACbVoyrrlmYdvvUfPvEVX4sT5yhOCEXWHKUPWRGlRyLlGv9HlPibxJo46MbtTFDVj/EW2BFwWvKEaMjvCWegFDfxW0YNqFQfLdo7JhY7GUa1gT4YmxmoTl8Ulc+XdLOM= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1719882501; c=relaxed/simple; bh=VteR6AAXsaSg/2sn6ntpPDot/EyOk5hjzMM8Iz/MnGg=; h=Date:Subject:From:To:Cc:Message-ID:In-Reply-To:References: MIME-Version:Content-Type; b=LsGlDdGFpVxS0v+wxBujJEcU2UV4CCOipAMVYMEg5qmN6gpPCETZw1D1vJkAL2Z+Unbj3RHB+7u9XcWwSJWZc5w4w+omJtuHgmgnkEee5r0Z9oPRXu2lZ74fcGCw8DuVz8sygEahWKVCu9kDSveT58g8uVgIuVB10qgJg/wW23c= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=nIuTvDE8; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="nIuTvDE8" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 27176C116B1; Tue, 2 Jul 2024 01:08:21 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1719882501; bh=VteR6AAXsaSg/2sn6ntpPDot/EyOk5hjzMM8Iz/MnGg=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=nIuTvDE8Ep8Gc9k6LQ1nMfxZaVpWhfX93LCVcyOSOa1gZhR1p7xJdJe9rdtar8aAL KeR+mL+P3v1A0GK8aNLE3SiyEcqzKh0UvAgaZryBzFYkSpfP4WCuzOLG5IU+uk+n/m yOIr5cJl5iX8tPuTInklpVVEUeEafHTJpyw6vd9IbWflEK7MjNzN/2tD0vgFDTPv1z 7bh1jvjWj8paBDqT4teu5FmImDRh44rF4CgeCRO5AK4DyJuVh4DwCVYl4+/Z+27kWO //1JNm4qcD+TbJF1P4n0gqTrSzJCIlBI36vib3u8LHn38bGcLzUbASKQ06MGCKRt1w JZCVB0AryY0xA== Date: Mon, 01 Jul 2024 18:08:20 -0700 Subject: [PATCH 2/5] xfs_scrub_all: encapsulate all the systemctl code in an object From: "Darrick J. Wong" To: djwong@kernel.org, cem@kernel.org Cc: linux-xfs@vger.kernel.org, hch@lst.de Message-ID: <171988119844.2008718.7103597112377556797.stgit@frogsfrogsfrogs> In-Reply-To: <171988119806.2008718.11057954097670233571.stgit@frogsfrogsfrogs> References: <171988119806.2008718.11057954097670233571.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 Precedence: bulk X-Mailing-List: linux-xfs@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Darrick J. Wong Move all the systemd service handling code to an object so that we can contain all the insanity^Wdetails in a single place. This also makes the killfuncs handling similar to starting background processes. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- scrub/xfs_scrub_all.in | 113 ++++++++++++++++++++++++++---------------------- 1 file changed, 61 insertions(+), 52 deletions(-) diff --git a/scrub/xfs_scrub_all.in b/scrub/xfs_scrub_all.in index 001c49a70128..09fedff9d965 100644 --- a/scrub/xfs_scrub_all.in +++ b/scrub/xfs_scrub_all.in @@ -149,63 +149,73 @@ def path_to_serviceunit(path, scrub_media): svcname = '@scrub_svcname@' cmd = ['systemd-escape', '--template', svcname, '--path', path] - try: - proc = subprocess.Popen(cmd, stdout = subprocess.PIPE) - proc.wait() - for line in proc.stdout: - return line.decode(sys.stdout.encoding).strip() - except: - return None + proc = subprocess.Popen(cmd, stdout = subprocess.PIPE) + proc.wait() + for line in proc.stdout: + return line.decode(sys.stdout.encoding).strip() -def systemctl_stop(unitname): - '''Stop a systemd unit.''' - cmd = ['systemctl', 'stop', unitname] - x = subprocess.Popen(cmd) - x.wait() +class scrub_service(scrub_control): + '''Control object for xfs_scrub systemd service.''' + def __init__(self, mnt, scrub_media): + self.unitname = path_to_serviceunit(mnt, scrub_media) -def systemctl_start(unitname, killfuncs): - '''Start a systemd unit and wait for it to complete.''' - stop_fn = None - cmd = ['systemctl', 'start', unitname] - try: - proc = subprocess.Popen(cmd, stdout = DEVNULL()) - stop_fn = lambda: systemctl_stop(unitname) - killfuncs.add(stop_fn) - proc.wait() - ret = proc.returncode - except: - if stop_fn is not None: - remove_killfunc(killfuncs, stop_fn) - return -1 + def wait(self, interval = 1): + '''Wait until the service finishes.''' - if ret != 1: - remove_killfunc(killfuncs, stop_fn) - return ret + # As of systemd 249, the is-active command returns any of the + # following states: active, reloading, inactive, failed, + # activating, deactivating, or maintenance. Apparently these + # strings are not localized. + while True: + try: + for l in backtick(['systemctl', 'is-active', self.unitname]): + if l == 'failed': + return 1 + if l == 'inactive': + return 0 + except: + return -1 - # If systemctl-start returns 1, it's possible that the service failed - # or that dbus/systemd restarted and the client program lost its - # connection -- according to the systemctl man page, 1 means "unit not - # failed". - # - # Either way, we switch to polling the service status to try to wait - # for the service to end. As of systemd 249, the is-active command - # returns any of the following states: active, reloading, inactive, - # failed, activating, deactivating, or maintenance. Apparently these - # strings are not localized. - while True: + time.sleep(interval) + + def start(self): + '''Start the service and wait for it to complete. Returns -1 + if the service was not started, 0 if it succeeded, or 1 if it + failed.''' + cmd = ['systemctl', 'start', self.unitname] try: - for l in backtick(['systemctl', 'is-active', unitname]): - if l == 'failed': - remove_killfunc(killfuncs, stop_fn) - return 1 - if l == 'inactive': - remove_killfunc(killfuncs, stop_fn) - return 0 + proc = subprocess.Popen(cmd, stdout = DEVNULL()) + proc.wait() + ret = proc.returncode except: - remove_killfunc(killfuncs, stop_fn) return -1 - time.sleep(1) + if ret != 1: + return ret + + # If systemctl-start returns 1, it's possible that the service + # failed or that dbus/systemd restarted and the client program + # lost its connection -- according to the systemctl man page, 1 + # means "unit not failed". + return self.wait() + + def stop(self): + '''Stop the service.''' + cmd = ['systemctl', 'stop', self.unitname] + x = subprocess.Popen(cmd) + x.wait() + +def run_service(mnt, scrub_media, killfuncs): + '''Run scrub as a service.''' + try: + svc = scrub_service(mnt, scrub_media) + except: + return -1 + + killfuncs.add(svc.stop) + retcode = svc.start() + remove_killfunc(killfuncs, svc.stop) + return retcode def run_scrub(mnt, cond, running_devs, mntdevs, killfuncs): '''Run a scrub process.''' @@ -220,9 +230,8 @@ def run_scrub(mnt, cond, running_devs, mntdevs, killfuncs): # Run per-mount systemd xfs_scrub service only if we ourselves # are running as a systemd service. - unitname = path_to_serviceunit(path, scrub_media) - if unitname is not None and 'SERVICE_MODE' in os.environ: - ret = systemctl_start(unitname, killfuncs) + if 'SERVICE_MODE' in os.environ: + ret = run_service(mnt, scrub_media, killfuncs) if ret == 0 or ret == 1: print("Scrubbing %s done, (err=%d)" % (mnt, ret)) sys.stdout.flush() From patchwork Tue Jul 2 01:08:36 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13718816 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id EB6819449 for ; Tue, 2 Jul 2024 01:08:36 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1719882517; cv=none; b=bFGJK1yNyBlRNuq6pkEqeBd9psXMd86mode3q21YqmIog+4HS+0RyZQ2f/XtnQOQmRPzMqW3/XRNdGHkAU29QfS8wYt9mKplWVS01fU8FmxVCbPtppwvAzugV0Hu6DnE1CKqjNofhj3E7tIcxVacVP/yeMSmlooF0Ov2RbXpJTM= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1719882517; c=relaxed/simple; bh=XaMRSo9Pz7LYMXTUPvlmu4Q5q5M1jtivMOSW50j5xKI=; h=Date:Subject:From:To:Cc:Message-ID:In-Reply-To:References: MIME-Version:Content-Type; b=LJhyVgT9oa0EyOPH5fYaYBifceBIKqooFE1UVFCE4qZ1gfHKt+pNLA1eSptZcn8lEK4UoFCPtKD46L9pgtKyT3wJneXeuL70uTN91mpN3/+lPuJxwzd476Wa8ooTG+5tg/50vMY1D7RLnKydvxvEOy80/BiDJ4HSk6njEidXUqs= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=k7eNk+ze; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="k7eNk+ze" Received: by smtp.kernel.org (Postfix) with ESMTPSA id B7264C116B1; Tue, 2 Jul 2024 01:08:36 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1719882516; bh=XaMRSo9Pz7LYMXTUPvlmu4Q5q5M1jtivMOSW50j5xKI=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=k7eNk+zeJroJSTIqZtx/7mR7baaB4szABPma1W/08mlEVwHVLETxvl0CP7desOy8u MqH4kiFIxIHLxSBqjNCQ4qO4531s1jDSQzMnTDWSG/vzUCZEKRsqVMMtNdw48N7aoN 6qy5ngPlyW967F4blaBi78dR3ARMH7N8w064guu47A01JrzEfCjcvuUMVFppfBryWd b6OP2m0/B0nfG5R0WaMTe9s33BCB3B845pQyShzHm6iUAJEpVS1Rvt+vrsPCTTxyAO nkyV1qpZoiLvhtyHnlLBaMf1r3Nfwnqu0j1Hs/27dgBweheN11ia7bv3w8OSJR4Rw0 +i/dE5sHIzu8w== Date: Mon, 01 Jul 2024 18:08:36 -0700 Subject: [PATCH 3/5] xfs_scrub_all: add CLI option for easier debugging From: "Darrick J. Wong" To: djwong@kernel.org, cem@kernel.org Cc: linux-xfs@vger.kernel.org, hch@lst.de Message-ID: <171988119860.2008718.3789689429042344225.stgit@frogsfrogsfrogs> In-Reply-To: <171988119806.2008718.11057954097670233571.stgit@frogsfrogsfrogs> References: <171988119806.2008718.11057954097670233571.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 Precedence: bulk X-Mailing-List: linux-xfs@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Darrick J. Wong Add a new CLI argument to make it easier to figure out what exactly the program is doing. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- scrub/xfs_scrub_all.in | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/scrub/xfs_scrub_all.in b/scrub/xfs_scrub_all.in index 09fedff9d965..d5d1d13a2552 100644 --- a/scrub/xfs_scrub_all.in +++ b/scrub/xfs_scrub_all.in @@ -24,6 +24,7 @@ from datetime import timezone retcode = 0 terminate = False scrub_media = False +debug = False def DEVNULL(): '''Return /dev/null in subprocess writable format.''' @@ -110,6 +111,11 @@ class scrub_subprocess(scrub_control): '''Start xfs_scrub and wait for it to complete. Returns -1 if the service was not started, 0 if it succeeded, or 1 if it failed.''' + global debug + + if debug: + print('run ', ' '.join(self.cmdline)) + try: self.proc = subprocess.Popen(self.cmdline) self.proc.wait() @@ -122,6 +128,10 @@ class scrub_subprocess(scrub_control): def stop(self): '''Stop xfs_scrub.''' + global debug + + if debug: + print('kill ', ' '.join(self.cmdline)) if self.proc is not None: self.proc.terminate() @@ -182,8 +192,12 @@ class scrub_service(scrub_control): '''Start the service and wait for it to complete. Returns -1 if the service was not started, 0 if it succeeded, or 1 if it failed.''' + global debug + cmd = ['systemctl', 'start', self.unitname] try: + if debug: + print(' '.join(cmd)) proc = subprocess.Popen(cmd, stdout = DEVNULL()) proc.wait() ret = proc.returncode @@ -201,7 +215,11 @@ class scrub_service(scrub_control): def stop(self): '''Stop the service.''' + global debug + cmd = ['systemctl', 'stop', self.unitname] + if debug: + print(' '.join(cmd)) x = subprocess.Popen(cmd) x.wait() @@ -366,10 +384,12 @@ def main(): a = (mnt, cond, running_devs, devs, killfuncs) thr = threading.Thread(target = run_scrub, args = a) thr.start() - global retcode, terminate, scrub_media + global retcode, terminate, scrub_media, debug parser = argparse.ArgumentParser( \ description = "Scrub all mounted XFS filesystems.") + parser.add_argument("--debug", help = "Enabling debugging messages.", \ + action = "store_true") parser.add_argument("-V", help = "Report version and exit.", \ action = "store_true") parser.add_argument("-x", help = "Scrub file data after filesystem metadata.", \ @@ -384,6 +404,9 @@ def main(): print("xfs_scrub_all version @pkg_version@") sys.exit(0) + if args.debug: + debug = True + if args.auto_media_scan_interval is not None: try: scrub_media = enable_automatic_media_scan(args) From patchwork Tue Jul 2 01:08:51 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13718817 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id CFC859449 for ; Tue, 2 Jul 2024 01:08:52 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1719882532; cv=none; b=D6LyHzZcS9n7c8t80WHl080CloXc83qigmmgp5ywFse2nfiQQQXdfjowzGhwWJIj2KJoOm65tT6tPV+2uYMSejdsgJa5ExVGgm9IBgld6OxtBmXQn5SYyYXnxiClgg23on36bNAx5XmDa2pxtFK1LHFOgIDwLBooSOJyNQ+6nrA= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1719882532; c=relaxed/simple; bh=I2tMiyF5go41YuIuljvw4aoYS9iLoBKsWmqCzX/QuuA=; h=Date:Subject:From:To:Cc:Message-ID:In-Reply-To:References: MIME-Version:Content-Type; b=UuPZvRXR0v+CrtzSYk7QvrNXCa+00kqeWTVmGfA9e6IqN/7V1xz9bR8jV2urJUZ7gNPRdVWBE6guQb0y5lA0yoOMXSzmKZb/SIENc/KjTFvTGT2G/AvKxmHWXs0pZk76fY+GOs7L0RzO5iCuXXARGl9xyUfZKzmYKLvKWn4HtbE= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=pkDZ46dc; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="pkDZ46dc" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 5541BC116B1; Tue, 2 Jul 2024 01:08:52 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1719882532; bh=I2tMiyF5go41YuIuljvw4aoYS9iLoBKsWmqCzX/QuuA=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=pkDZ46dcsEvkVs+/UWRvGXa+tilC5NcORvVUM1Abkhmk95tqp/bER94tf2IRP2boN h4Djx6JONuejJpUDGPy+EGeY0aa8Xi6CYYbCvQ4jOKLFrnau9/7ONNphJUuoxKE0a8 svjjGLB5xPQJWpOX2qnbRVA7NrYDRdpGmrbBy6bzUtnUWFveAxKUXYgQEB6oqsrYCb olxy3GnPceghDY5pk2Qp4apLbor5cRQq5G0rOhLhSH87q/pUUcJp0BfY3b2uNqOLaD XCuqAr3oiNua7wyg+aHjmmmKUf58JnlHg7sjg65nHiMRwKcYZrV5FIi42+y8T/Gwdd EtUlvyg796Mjg== Date: Mon, 01 Jul 2024 18:08:51 -0700 Subject: [PATCH 4/5] xfs_scrub_all: convert systemctl calls to dbus From: "Darrick J. Wong" To: djwong@kernel.org, cem@kernel.org Cc: linux-xfs@vger.kernel.org, hch@lst.de Message-ID: <171988119875.2008718.8969308689929459520.stgit@frogsfrogsfrogs> In-Reply-To: <171988119806.2008718.11057954097670233571.stgit@frogsfrogsfrogs> References: <171988119806.2008718.11057954097670233571.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 Precedence: bulk X-Mailing-List: linux-xfs@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Darrick J. Wong Convert the systemctl invocations to direct dbus calls, which decouples us from the CLI in favor of direct API calls. This spares us from some of the insanity of divining service state from program outputs. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- debian/control | 2 + scrub/xfs_scrub_all.in | 96 +++++++++++++++++++++++++++++++----------------- 2 files changed, 63 insertions(+), 35 deletions(-) diff --git a/debian/control b/debian/control index 344466de0161..31773e53a19a 100644 --- a/debian/control +++ b/debian/control @@ -8,7 +8,7 @@ Standards-Version: 4.0.0 Homepage: https://xfs.wiki.kernel.org/ Package: xfsprogs -Depends: ${shlibs:Depends}, ${misc:Depends}, python3:any +Depends: ${shlibs:Depends}, ${misc:Depends}, python3-dbus, python3:any Provides: fsck-backend Suggests: xfsdump, acl, attr, quota Breaks: xfsdump (<< 3.0.0) diff --git a/scrub/xfs_scrub_all.in b/scrub/xfs_scrub_all.in index d5d1d13a2552..a09566efdcd8 100644 --- a/scrub/xfs_scrub_all.in +++ b/scrub/xfs_scrub_all.in @@ -15,6 +15,7 @@ import sys import os import argparse import signal +import dbus from io import TextIOWrapper from pathlib import Path from datetime import timedelta @@ -168,25 +169,57 @@ class scrub_service(scrub_control): '''Control object for xfs_scrub systemd service.''' def __init__(self, mnt, scrub_media): self.unitname = path_to_serviceunit(mnt, scrub_media) + self.prop = None + self.unit = None + self.bind() + + def bind(self): + '''Bind to the dbus proxy object for this service.''' + sysbus = dbus.SystemBus() + systemd1 = sysbus.get_object('org.freedesktop.systemd1', + '/org/freedesktop/systemd1') + manager = dbus.Interface(systemd1, + 'org.freedesktop.systemd1.Manager') + path = manager.LoadUnit(self.unitname) + + svc_obj = sysbus.get_object('org.freedesktop.systemd1', path) + self.prop = dbus.Interface(svc_obj, + 'org.freedesktop.DBus.Properties') + self.unit = dbus.Interface(svc_obj, + 'org.freedesktop.systemd1.Unit') + + def state(self): + '''Retrieve the active state for a systemd service. As of + systemd 249, this is supposed to be one of the following: + "active", "reloading", "inactive", "failed", "activating", + or "deactivating". These strings are not localized.''' + global debug + + try: + return self.prop.Get('org.freedesktop.systemd1.Unit', 'ActiveState') + except Exception as e: + if debug: + print(e, file = sys.stderr) + return 'failed' def wait(self, interval = 1): '''Wait until the service finishes.''' + global debug - # As of systemd 249, the is-active command returns any of the - # following states: active, reloading, inactive, failed, - # activating, deactivating, or maintenance. Apparently these - # strings are not localized. - while True: - try: - for l in backtick(['systemctl', 'is-active', self.unitname]): - if l == 'failed': - return 1 - if l == 'inactive': - return 0 - except: - return -1 - + # Use a poll/sleep loop to wait for the service to finish. + # Avoid adding a dependency on python3 glib, which is required + # to use an event loop to receive a dbus signal. + s = self.state() + while s not in ['failed', 'inactive']: + if debug: + print('waiting %s %s' % (self.unitname, s)) time.sleep(interval) + s = self.state() + if debug: + print('waited %s %s' % (self.unitname, s)) + if s == 'failed': + return 1 + return 0 def start(self): '''Start the service and wait for it to complete. Returns -1 @@ -194,34 +227,29 @@ class scrub_service(scrub_control): failed.''' global debug - cmd = ['systemctl', 'start', self.unitname] + if debug: + print('starting %s' % self.unitname) + try: - if debug: - print(' '.join(cmd)) - proc = subprocess.Popen(cmd, stdout = DEVNULL()) - proc.wait() - ret = proc.returncode - except: + self.unit.Start('replace') + return self.wait() + except Exception as e: + print(e, file = sys.stderr) return -1 - if ret != 1: - return ret - - # If systemctl-start returns 1, it's possible that the service - # failed or that dbus/systemd restarted and the client program - # lost its connection -- according to the systemctl man page, 1 - # means "unit not failed". - return self.wait() - def stop(self): '''Stop the service.''' global debug - cmd = ['systemctl', 'stop', self.unitname] if debug: - print(' '.join(cmd)) - x = subprocess.Popen(cmd) - x.wait() + print('stopping %s' % self.unitname) + + try: + self.unit.Stop('replace') + return self.wait() + except Exception as e: + print(e, file = sys.stderr) + return -1 def run_service(mnt, scrub_media, killfuncs): '''Run scrub as a service.''' From patchwork Tue Jul 2 01:09:07 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13718818 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 35CC09449 for ; Tue, 2 Jul 2024 01:09:08 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1719882548; cv=none; b=lwMl5JWxbHlMBcrRLWYICFtTZctjujAR8QZNtMPfwtSak7hCi8TsxPXzXYr1whsTJcdkvEcylEqA0AB/TgYDs32m3HVmSssFnRddz319Fav20raXt71GXTOEphRQ1+1w9ZJNVOVwnjzMwtjsjZriYVwzrLCQFGE+ANrGQF8Cnuo= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1719882548; c=relaxed/simple; bh=/13cmUZH5qZFXt1bOtFOHM3T3iDqZDEV3kKDJmvf6RE=; h=Date:Subject:From:To:Cc:Message-ID:In-Reply-To:References: MIME-Version:Content-Type; b=JMfJuFKRPzRruxRpHoXxs7V7lZmrAm8YqCQU9Ax8ce+9Ed9voek1BY6ekIC+cq2PLXscy6ytBAVD7yQxoUm6J6Isz1FSBSkawNqtqT00+Y5DdVnFuJ7sloerLLcuXfeAdgCTXgXz/xnADtnxVP8u3PAYKxLcPx+UkKfhKTzuEWY= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=epnl5NEk; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="epnl5NEk" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 09669C116B1; Tue, 2 Jul 2024 01:09:08 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1719882548; bh=/13cmUZH5qZFXt1bOtFOHM3T3iDqZDEV3kKDJmvf6RE=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=epnl5NEk6+6ASLYh8s2RZC2UVPy/2qMLF6sWbfsT1VZsEgleH1OxB7Hu582NeEzc8 avF8wforRbbOSXDhNgQ+P2ig+/48qNoNQA6mjDAEtWE+/EZXENNbiQEaPzm7eABwxT q++9L9XS0PA9Bq4BOByko6EUlUvGvFTSuCtRqLmC6LDLWsJRR18DRmrgauJag7Ql4B 6ygC0yuJn1b3OMcQzoRSYn8kw9mMthHLiJ0J+jKxw5w695dxuSR8xIik4t+NdXqBBb SwP3WwuR3tDh6GgSXUB2dM2K6y5vEhLprHSS6TlQP+ZbC13E39mfH8zafRnMiNyu8u c+ysgY+Y1qrJg== Date: Mon, 01 Jul 2024 18:09:07 -0700 Subject: [PATCH 5/5] xfs_scrub_all: implement retry and backoff for dbus calls From: "Darrick J. Wong" To: djwong@kernel.org, cem@kernel.org Cc: linux-xfs@vger.kernel.org, hch@lst.de Message-ID: <171988119890.2008718.14517701027636602626.stgit@frogsfrogsfrogs> In-Reply-To: <171988119806.2008718.11057954097670233571.stgit@frogsfrogsfrogs> References: <171988119806.2008718.11057954097670233571.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 Precedence: bulk X-Mailing-List: linux-xfs@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Darrick J. Wong Calls to systemd across dbus are remote procedure calls, which means that they're subject to transitory connection failures (e.g. systemd re-exec itself). We don't want to fail at the *first* sign of what could be temporary trouble, so implement a limited retry with fibonacci backoff before we resort to invoking xfs_scrub as a subprocess. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- scrub/xfs_scrub_all.in | 43 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/scrub/xfs_scrub_all.in b/scrub/xfs_scrub_all.in index a09566efdcd8..71726cdf36d5 100644 --- a/scrub/xfs_scrub_all.in +++ b/scrub/xfs_scrub_all.in @@ -165,6 +165,22 @@ def path_to_serviceunit(path, scrub_media): for line in proc.stdout: return line.decode(sys.stdout.encoding).strip() +def fibonacci(max_ret): + '''Yield fibonacci sequence up to but not including max_ret.''' + if max_ret < 1: + return + + x = 0 + y = 1 + yield 1 + + z = x + y + while z <= max_ret: + yield z + x = y + y = z + z = x + y + class scrub_service(scrub_control): '''Control object for xfs_scrub systemd service.''' def __init__(self, mnt, scrub_media): @@ -188,6 +204,25 @@ class scrub_service(scrub_control): self.unit = dbus.Interface(svc_obj, 'org.freedesktop.systemd1.Unit') + def __dbusrun(self, lambda_fn): + '''Call the lambda function to execute something on dbus. dbus + exceptions result in retries with Fibonacci backoff, and the + bindings will be rebuilt every time.''' + global debug + + fatal_ex = None + + for i in fibonacci(30): + try: + return lambda_fn() + except dbus.exceptions.DBusException as e: + if debug: + print(e) + fatal_ex = e + time.sleep(i) + self.bind() + raise fatal_ex + def state(self): '''Retrieve the active state for a systemd service. As of systemd 249, this is supposed to be one of the following: @@ -195,8 +230,10 @@ class scrub_service(scrub_control): or "deactivating". These strings are not localized.''' global debug + l = lambda: self.prop.Get('org.freedesktop.systemd1.Unit', + 'ActiveState') try: - return self.prop.Get('org.freedesktop.systemd1.Unit', 'ActiveState') + return self.__dbusrun(l) except Exception as e: if debug: print(e, file = sys.stderr) @@ -231,7 +268,7 @@ class scrub_service(scrub_control): print('starting %s' % self.unitname) try: - self.unit.Start('replace') + self.__dbusrun(lambda: self.unit.Start('replace')) return self.wait() except Exception as e: print(e, file = sys.stderr) @@ -245,7 +282,7 @@ class scrub_service(scrub_control): print('stopping %s' % self.unitname) try: - self.unit.Stop('replace') + self.__dbusrun(lambda: self.unit.Stop('replace')) return self.wait() except Exception as e: print(e, file = sys.stderr)