From patchwork Tue Jul 30 01:16:14 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13746151 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 2888928EB for ; Tue, 30 Jul 2024 01:16:14 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1722302175; cv=none; b=nITHZ4tofyiyiFBmD5j5P04ERqWBuP/hu3Tjt7gjqHQEvhlEWTNWzk0duE/hr8X5ltmssBV/802jdGpETOPbybqJbEWeBT3KEZPspg9OvZc6jawtDdFP5PTzIsQQbrtRKKBMvTsg0o77XpvBGRRT1wuyKUH2QUF+5zLfsIRBOZ8= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1722302175; c=relaxed/simple; bh=z9zEh0ewYiSURzD+RYvb6UV2Vh2tVO19xkyd/drxtw4=; h=Date:Subject:From:To:Cc:Message-ID:In-Reply-To:References: MIME-Version:Content-Type; b=UJ+vDESv4GKtoNnUZjulREX/CdHJ78EZkj+5u7FRAFojsfIPMAhG2Ke13JoTpgI8WgdV+7fQPtaleCnNvT7WIdZKEbF076W1rXzdgkH0V7iTuw5xl4sw84XYoSgjyuaWrH0Vz/t7eb5Ub5hbMW3ZXc7ooSIWoJQHNkxP+MutWJs= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=CwrrzDuD; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="CwrrzDuD" Received: by smtp.kernel.org (Postfix) with ESMTPSA id AFF4FC4AF07; Tue, 30 Jul 2024 01:16:14 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1722302174; bh=z9zEh0ewYiSURzD+RYvb6UV2Vh2tVO19xkyd/drxtw4=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=CwrrzDuDZGa8mFmC+Y6K84GaBEt/XySmsQqnwBL0ymulyI/z5d1Ea2Mr2OBhIRSuq ASvvG85SRX4uNpKhoxQZ8f1B874hoK8vImC7jV/zk84qyF5NsMzktH0z3xFjCvy04Q /wa53ZvSP/gBiRZifK1C2mEKABxzvuhrqg4BkRbu+NZ2R2Ht6Hv/x1DFwOFlsc6Lg1 XGmYPg1P3KnCf7bybtQmvA+7fjxr58HNSHWYAoXGrlLySJZLVOaqF5btb4iSO6lGls u9HAFS+LOjlnKYZ0Gmca8JOEnN7sh64nth/EByAlfmz4H+tPk/e+h4ln1fCA/RKxHw omJnyR6MWnBag== Date: Mon, 29 Jul 2024 18:16:14 -0700 Subject: [PATCH 1/5] xfs_scrub_all: encapsulate all the subprocess code in an object From: "Darrick J. Wong" To: djwong@kernel.org, cem@kernel.org Cc: Christoph Hellwig , linux-xfs@vger.kernel.org Message-ID: <172229849658.1350420.7261977076960290061.stgit@frogsfrogsfrogs> In-Reply-To: <172229849638.1350420.756131243612881227.stgit@frogsfrogsfrogs> References: <172229849638.1350420.756131243612881227.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 Precedence: bulk X-Mailing-List: linux-xfs@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Darrick J. Wong Move all the xfs_scrub subprocess handling code to an object so that we can contain all the details in a single place. This also simplifies the background state management. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- scrub/xfs_scrub_all.in | 68 ++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 54 insertions(+), 14 deletions(-) diff --git a/scrub/xfs_scrub_all.in b/scrub/xfs_scrub_all.in index 9dd6347fb..25286f57c 100644 --- a/scrub/xfs_scrub_all.in +++ b/scrub/xfs_scrub_all.in @@ -78,15 +78,62 @@ def remove_killfunc(killfuncs, fn): except: pass -def run_killable(cmd, stdout, killfuncs): +class scrub_control(object): + '''Control object for xfs_scrub.''' + def __init__(self): + pass + + def start(self): + '''Start scrub and wait for it to complete. Returns -1 if the + service was not started, 0 if it succeeded, or 1 if it + failed.''' + assert False + + def stop(self): + '''Stop scrub.''' + assert False + +class scrub_subprocess(scrub_control): + '''Control object for xfs_scrub subprocesses.''' + def __init__(self, mnt, scrub_media): + cmd = ['@sbindir@/xfs_scrub'] + if 'SERVICE_MODE' in os.environ: + cmd += '@scrub_service_args@'.split() + cmd += '@scrub_args@'.split() + if scrub_media: + cmd += '-x' + cmd += [mnt] + self.cmdline = cmd + self.proc = None + + def start(self): + '''Start xfs_scrub and wait for it to complete. Returns -1 if + the service was not started, 0 if it succeeded, or 1 if it + failed.''' + try: + self.proc = subprocess.Popen(self.cmdline) + self.proc.wait() + except: + return -1 + + proc = self.proc + self.proc = None + return proc.returncode + + def stop(self): + '''Stop xfs_scrub.''' + if self.proc is not None: + self.proc.terminate() + +def run_subprocess(mnt, scrub_media, killfuncs): '''Run a killable program. Returns program retcode or -1 if we can't start it.''' try: - proc = subprocess.Popen(cmd, stdout = stdout) - killfuncs.add(proc.terminate) - proc.wait() - remove_killfunc(killfuncs, proc.terminate) - return proc.returncode + p = scrub_subprocess(mnt, scrub_media) + killfuncs.add(p.stop) + ret = p.start() + remove_killfunc(killfuncs, p.stop) + return ret except: return -1 @@ -190,14 +237,7 @@ def run_scrub(mnt, cond, running_devs, mntdevs, killfuncs): # Invoke xfs_scrub manually if we're running in the foreground. # We also permit this if we're running as a cronjob where # systemd services are unavailable. - cmd = ['@sbindir@/xfs_scrub'] - if 'SERVICE_MODE' in os.environ: - cmd += '@scrub_service_args@'.split() - cmd += '@scrub_args@'.split() - if scrub_media: - cmd += '-x' - cmd += [mnt] - ret = run_killable(cmd, None, killfuncs) + ret = run_subprocess(mnt, scrub_media, killfuncs) if ret >= 0: print("Scrubbing %s done, (err=%d)" % (mnt, ret)) sys.stdout.flush() From patchwork Tue Jul 30 01:16:29 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13746152 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id F39D1442C for ; Tue, 30 Jul 2024 01:16:30 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1722302191; cv=none; b=HYpR+LsqoIR22oxN9/5Ot5eLm+QspB05Ct8hVpHvVMxUi2gW4I6Y/ioHnWHqdnV+wxeKvjwVG6dph1aoPTH6QaVi2JdjPDcGAbCWqKh8tq6Kz0JkZy3LUpr+LuIRpWPT6tWawPG5AfTdnFRsr1joF/Jz5SpqyV5XnvbShsu/fX4= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1722302191; c=relaxed/simple; bh=egTRFwZF8NBK8I8fU7KpnUUKmbPpjCVrI6RxM+SCekA=; h=Date:Subject:From:To:Cc:Message-ID:In-Reply-To:References: MIME-Version:Content-Type; b=ADJotcvgrhbzgPg8mKQDNMePJyo/TygAco7kvKpFzkzY2PeRaz3WDHAjEiIX77XRTA3UbMaHN0oDOkZeLcDkFYk6PKNrd7Oq4NsUv2jgXs5L9C4RX77hKKm5syEdkoIkqi5n7RE4slennmHdnNSINU+5FKKnTMAoHJrs8XHJ0VI= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=K8Kng4wc; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="K8Kng4wc" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 63C2FC32786; Tue, 30 Jul 2024 01:16:30 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1722302190; bh=egTRFwZF8NBK8I8fU7KpnUUKmbPpjCVrI6RxM+SCekA=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=K8Kng4wcqGSMJOymFRdLs9fD9ObmJ3bHPE641fUDTwIYUiRpHpaXFRziBbYJsSdfd /DWicZZJ8svJRG+HFflCfDOdO2tavgyqLl8rhmbvb9XFfNwzTjj2aLvlbtdAv4GXbs 4MHTytIHYiMgMWh4+AvaMjl+EcYu0mfSIMTEQ+BzOdqJYCDwwwmJiBnBHCiyQaOHYf /DWpvRUhO/7ihOn7ATzC4amf23fxYHJM8fmyaviVWD/tOMCQR05+BgBxgdO0fLj9JJ KspssyqVGKFCI+MIzRhlVGIyQntrk201fBXkoX7n5ocTOgpDnWJZl/6jStlzUcfU+N Ja/qfqTKSGJNw== Date: Mon, 29 Jul 2024 18:16:29 -0700 Subject: [PATCH 2/5] xfs_scrub_all: encapsulate all the systemctl code in an object From: "Darrick J. Wong" To: djwong@kernel.org, cem@kernel.org Cc: Christoph Hellwig , linux-xfs@vger.kernel.org Message-ID: <172229849673.1350420.5830833626781403630.stgit@frogsfrogsfrogs> In-Reply-To: <172229849638.1350420.756131243612881227.stgit@frogsfrogsfrogs> References: <172229849638.1350420.756131243612881227.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 Precedence: bulk X-Mailing-List: linux-xfs@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Darrick J. Wong Move all the systemd service handling code to an object so that we can contain all the insanity^Wdetails in a single place. This also makes the killfuncs handling similar to starting background processes. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- scrub/xfs_scrub_all.in | 113 ++++++++++++++++++++++++++---------------------- 1 file changed, 61 insertions(+), 52 deletions(-) diff --git a/scrub/xfs_scrub_all.in b/scrub/xfs_scrub_all.in index 25286f57c..4130a98e9 100644 --- a/scrub/xfs_scrub_all.in +++ b/scrub/xfs_scrub_all.in @@ -149,63 +149,73 @@ def path_to_serviceunit(path, scrub_media): svcname = '@scrub_svcname@' cmd = ['systemd-escape', '--template', svcname, '--path', path] - try: - proc = subprocess.Popen(cmd, stdout = subprocess.PIPE) - proc.wait() - for line in proc.stdout: - return line.decode(sys.stdout.encoding).strip() - except: - return None + proc = subprocess.Popen(cmd, stdout = subprocess.PIPE) + proc.wait() + for line in proc.stdout: + return line.decode(sys.stdout.encoding).strip() -def systemctl_stop(unitname): - '''Stop a systemd unit.''' - cmd = ['systemctl', 'stop', unitname] - x = subprocess.Popen(cmd) - x.wait() +class scrub_service(scrub_control): + '''Control object for xfs_scrub systemd service.''' + def __init__(self, mnt, scrub_media): + self.unitname = path_to_serviceunit(mnt, scrub_media) -def systemctl_start(unitname, killfuncs): - '''Start a systemd unit and wait for it to complete.''' - stop_fn = None - cmd = ['systemctl', 'start', unitname] - try: - proc = subprocess.Popen(cmd, stdout = DEVNULL()) - stop_fn = lambda: systemctl_stop(unitname) - killfuncs.add(stop_fn) - proc.wait() - ret = proc.returncode - except: - if stop_fn is not None: - remove_killfunc(killfuncs, stop_fn) - return -1 + def wait(self, interval = 1): + '''Wait until the service finishes.''' - if ret != 1: - remove_killfunc(killfuncs, stop_fn) - return ret + # As of systemd 249, the is-active command returns any of the + # following states: active, reloading, inactive, failed, + # activating, deactivating, or maintenance. Apparently these + # strings are not localized. + while True: + try: + for l in backtick(['systemctl', 'is-active', self.unitname]): + if l == 'failed': + return 1 + if l == 'inactive': + return 0 + except: + return -1 - # If systemctl-start returns 1, it's possible that the service failed - # or that dbus/systemd restarted and the client program lost its - # connection -- according to the systemctl man page, 1 means "unit not - # failed". - # - # Either way, we switch to polling the service status to try to wait - # for the service to end. As of systemd 249, the is-active command - # returns any of the following states: active, reloading, inactive, - # failed, activating, deactivating, or maintenance. Apparently these - # strings are not localized. - while True: + time.sleep(interval) + + def start(self): + '''Start the service and wait for it to complete. Returns -1 + if the service was not started, 0 if it succeeded, or 1 if it + failed.''' + cmd = ['systemctl', 'start', self.unitname] try: - for l in backtick(['systemctl', 'is-active', unitname]): - if l == 'failed': - remove_killfunc(killfuncs, stop_fn) - return 1 - if l == 'inactive': - remove_killfunc(killfuncs, stop_fn) - return 0 + proc = subprocess.Popen(cmd, stdout = DEVNULL()) + proc.wait() + ret = proc.returncode except: - remove_killfunc(killfuncs, stop_fn) return -1 - time.sleep(1) + if ret != 1: + return ret + + # If systemctl-start returns 1, it's possible that the service + # failed or that dbus/systemd restarted and the client program + # lost its connection -- according to the systemctl man page, 1 + # means "unit not failed". + return self.wait() + + def stop(self): + '''Stop the service.''' + cmd = ['systemctl', 'stop', self.unitname] + x = subprocess.Popen(cmd) + x.wait() + +def run_service(mnt, scrub_media, killfuncs): + '''Run scrub as a service.''' + try: + svc = scrub_service(mnt, scrub_media) + except: + return -1 + + killfuncs.add(svc.stop) + retcode = svc.start() + remove_killfunc(killfuncs, svc.stop) + return retcode def run_scrub(mnt, cond, running_devs, mntdevs, killfuncs): '''Run a scrub process.''' @@ -222,9 +232,8 @@ def run_scrub(mnt, cond, running_devs, mntdevs, killfuncs): # Run per-mount systemd xfs_scrub service only if we ourselves # are running as a systemd service. - unitname = path_to_serviceunit(path, scrub_media) - if unitname is not None and 'SERVICE_MODE' in os.environ: - ret = systemctl_start(unitname, killfuncs) + if 'SERVICE_MODE' in os.environ: + ret = run_service(mnt, scrub_media, killfuncs) if ret == 0 or ret == 1: print("Scrubbing %s done, (err=%d)" % (mnt, ret)) sys.stdout.flush() From patchwork Tue Jul 30 01:16:45 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13746153 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 3ED1D2905 for ; Tue, 30 Jul 2024 01:16:46 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1722302206; cv=none; b=iV0RGtfeiMQoX+QiyOEN3FyWGLCT6BGr7h/I6QRmWXMijKHyW0tKkyC07Xsgm26w/Y5o4CdKUxOwyHcIcsJCk/bcnsmUdS7eJ1bOmEqKS7XSH50pTpWxvu+I/D/84oLhxYo+oCwanYtOZcjXNwLzYc7SDHeJwB7nWCSqWiQ7i7g= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1722302206; c=relaxed/simple; bh=PO4ftwcXfIvkvCYKG2HDTwX3XiEF+gneYkuq5ZrVvyg=; h=Date:Subject:From:To:Cc:Message-ID:In-Reply-To:References: MIME-Version:Content-Type; b=qTSPXo9sJa+SYhuJvDXDaZTf1B7+Ujs+8scUlDU4nJbyh8Fpv13Rw14x8YuW3rYAxjnqJKJdhxv/deWhVTEhJYVfeL5ZRbI3KPIk2vkm4tdJ46Ro2+LNbG1jM2Vcv31Yu0E6xxYn5q3sj48Vd6SpjT1wKviZGaKbD3R6GrifMpU= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=Z4/gumw0; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="Z4/gumw0" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 0FDC8C32786; Tue, 30 Jul 2024 01:16:46 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1722302206; bh=PO4ftwcXfIvkvCYKG2HDTwX3XiEF+gneYkuq5ZrVvyg=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=Z4/gumw0GudL4oPyO1BD9oFBJHKXXBoqiWG+gubswux6d0v5HZ/9IIvxCuEcwNY6M IpLYXJK46KJpL25P7/eYMBA82oz1nlmmxei9VIeQfo+/SCj0UGYWA70cVKNEbYTCAA MuM9noUOdJTPJ8VfpSduJDowJJsYM29amSe7hNV26TbpJlFyX8KqchxE2Jntn2kdYX cHPsPLb8IhgwnZxTbc+8mJpgGtzxTJvG6lgLG7GyaKri0uRF+WLnP7SW+WgvVWBQP2 vfsKtDg4CGhR17E2tdd9aW6Hoe/1HHbJfNYSeEchyop79/xUV8+dCFdmT5YNVefWMS +Y3HqjtuOE4MA== Date: Mon, 29 Jul 2024 18:16:45 -0700 Subject: [PATCH 3/5] xfs_scrub_all: add CLI option for easier debugging From: "Darrick J. Wong" To: djwong@kernel.org, cem@kernel.org Cc: Christoph Hellwig , linux-xfs@vger.kernel.org Message-ID: <172229849688.1350420.1760902598558162682.stgit@frogsfrogsfrogs> In-Reply-To: <172229849638.1350420.756131243612881227.stgit@frogsfrogsfrogs> References: <172229849638.1350420.756131243612881227.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 Precedence: bulk X-Mailing-List: linux-xfs@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Darrick J. Wong Add a new CLI argument to make it easier to figure out what exactly the program is doing. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- scrub/xfs_scrub_all.in | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/scrub/xfs_scrub_all.in b/scrub/xfs_scrub_all.in index 4130a98e9..8954b4740 100644 --- a/scrub/xfs_scrub_all.in +++ b/scrub/xfs_scrub_all.in @@ -24,6 +24,7 @@ from datetime import timezone retcode = 0 terminate = False scrub_media = False +debug = False def DEVNULL(): '''Return /dev/null in subprocess writable format.''' @@ -110,6 +111,11 @@ class scrub_subprocess(scrub_control): '''Start xfs_scrub and wait for it to complete. Returns -1 if the service was not started, 0 if it succeeded, or 1 if it failed.''' + global debug + + if debug: + print('run ', ' '.join(self.cmdline)) + try: self.proc = subprocess.Popen(self.cmdline) self.proc.wait() @@ -122,6 +128,10 @@ class scrub_subprocess(scrub_control): def stop(self): '''Stop xfs_scrub.''' + global debug + + if debug: + print('kill ', ' '.join(self.cmdline)) if self.proc is not None: self.proc.terminate() @@ -182,8 +192,12 @@ class scrub_service(scrub_control): '''Start the service and wait for it to complete. Returns -1 if the service was not started, 0 if it succeeded, or 1 if it failed.''' + global debug + cmd = ['systemctl', 'start', self.unitname] try: + if debug: + print(' '.join(cmd)) proc = subprocess.Popen(cmd, stdout = DEVNULL()) proc.wait() ret = proc.returncode @@ -201,7 +215,11 @@ class scrub_service(scrub_control): def stop(self): '''Stop the service.''' + global debug + cmd = ['systemctl', 'stop', self.unitname] + if debug: + print(' '.join(cmd)) x = subprocess.Popen(cmd) x.wait() @@ -266,7 +284,8 @@ def run_scrub(mnt, cond, running_devs, mntdevs, killfuncs): def signal_scrubs(signum, cond): '''Handle termination signals by killing xfs_scrub children.''' - global debug, terminate + global debug + global terminate if debug: print('Signal handler called with signal', signum) @@ -280,7 +299,8 @@ def signal_scrubs(signum, cond): def wait_for_termination(cond, killfuncs): '''Wait for a child thread to terminate. Returns True if we should abort the program, False otherwise.''' - global debug, terminate + global debug + global terminate if debug: print('waiting for threads to terminate') @@ -371,9 +391,12 @@ def main(): global retcode global terminate global scrub_media + global debug parser = argparse.ArgumentParser( \ description = "Scrub all mounted XFS filesystems.") + parser.add_argument("--debug", help = "Enabling debugging messages.", \ + action = "store_true") parser.add_argument("-V", help = "Report version and exit.", \ action = "store_true") parser.add_argument("-x", help = "Scrub file data after filesystem metadata.", \ @@ -388,6 +411,9 @@ def main(): print("xfs_scrub_all version @pkg_version@") sys.exit(0) + if args.debug: + debug = True + if args.auto_media_scan_interval is not None: try: scrub_media = enable_automatic_media_scan(args) From patchwork Tue Jul 30 01:17:01 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13746154 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 3D89E7464 for ; Tue, 30 Jul 2024 01:17:01 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1722302222; cv=none; b=orr8fkmSnAl28erb1n5zMFBhjHqvl5mKiSMho+aWQ/MankRHR70+SbMPY++kl2W//OT/IBYji8lQsV90ZJV+ZtctmFdIhcqz5diQ3zbptBqDzbQFbXLsSljBor9SMYRc3UxN+rS6p2CJtybRkoo3t2FVDEXo3AuMASs1iC//CRY= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1722302222; c=relaxed/simple; bh=lfjtvYRmkSEbR/T6lQa31y3p8E0z9tE3pSvcLZKrAdU=; h=Date:Subject:From:To:Cc:Message-ID:In-Reply-To:References: MIME-Version:Content-Type; b=GYHceNT5Yms82SE/8Qq7bhXxky3Z29jrISI07IUTAl2kNKPXw2zGR4YiamKw9PiqC69/2X4iLoDJEdeMG1TyvHZZBcLg9HfGVhIwgC0NHvos9mfWXSveUFfUhTfm7ZacR12NEWeFrseOQ0qVv4ACsSCXyx/fVEr6ndwMEQw4w4A= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=ke7iz06F; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="ke7iz06F" Received: by smtp.kernel.org (Postfix) with ESMTPSA id B6364C32786; Tue, 30 Jul 2024 01:17:01 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1722302221; bh=lfjtvYRmkSEbR/T6lQa31y3p8E0z9tE3pSvcLZKrAdU=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=ke7iz06FuSWjSwUXBvgSa6ffcrhGxBec/A2NIOl0o1c/w5dfN6hn5riDfczw/NDh6 atMho8No0pQb6iJQZrTF84wIODRyJi7bHPbNHVnU+bGzcZM0d8AsZCj8GPij0Uc0bc 6wwzSJbyyzecWdyZq6hEbBdDi267qhnCe4zpn5IKUnPuJMhjzeYEk5dK3jBzDjZs4N EF6/vXfk6m5nDyR/f4+PSAut5sU0GO8LPKMYgm2KYrnN7JQO8Yw/GJu9KXCk2H+48X r/WCrBgRMjue+SEMva4YO7DWMqjoqZliYA4Shhb6x+dnU2CKWQSLgrCJ9n0JL5aMT4 Zm/D727AQ2u6g== Date: Mon, 29 Jul 2024 18:17:01 -0700 Subject: [PATCH 4/5] xfs_scrub_all: convert systemctl calls to dbus From: "Darrick J. Wong" To: djwong@kernel.org, cem@kernel.org Cc: Christoph Hellwig , linux-xfs@vger.kernel.org Message-ID: <172229849704.1350420.15182762451486145269.stgit@frogsfrogsfrogs> In-Reply-To: <172229849638.1350420.756131243612881227.stgit@frogsfrogsfrogs> References: <172229849638.1350420.756131243612881227.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 Precedence: bulk X-Mailing-List: linux-xfs@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Darrick J. Wong Convert the systemctl invocations to direct dbus calls, which decouples us from the CLI in favor of direct API calls. This spares us from some of the insanity of divining service state from program outputs. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- debian/control | 2 + scrub/xfs_scrub_all.in | 96 +++++++++++++++++++++++++++++++----------------- 2 files changed, 63 insertions(+), 35 deletions(-) diff --git a/debian/control b/debian/control index 344466de0..31773e53a 100644 --- a/debian/control +++ b/debian/control @@ -8,7 +8,7 @@ Standards-Version: 4.0.0 Homepage: https://xfs.wiki.kernel.org/ Package: xfsprogs -Depends: ${shlibs:Depends}, ${misc:Depends}, python3:any +Depends: ${shlibs:Depends}, ${misc:Depends}, python3-dbus, python3:any Provides: fsck-backend Suggests: xfsdump, acl, attr, quota Breaks: xfsdump (<< 3.0.0) diff --git a/scrub/xfs_scrub_all.in b/scrub/xfs_scrub_all.in index 8954b4740..f2e916513 100644 --- a/scrub/xfs_scrub_all.in +++ b/scrub/xfs_scrub_all.in @@ -15,6 +15,7 @@ import sys import os import argparse import signal +import dbus from io import TextIOWrapper from pathlib import Path from datetime import timedelta @@ -168,25 +169,57 @@ class scrub_service(scrub_control): '''Control object for xfs_scrub systemd service.''' def __init__(self, mnt, scrub_media): self.unitname = path_to_serviceunit(mnt, scrub_media) + self.prop = None + self.unit = None + self.bind() + + def bind(self): + '''Bind to the dbus proxy object for this service.''' + sysbus = dbus.SystemBus() + systemd1 = sysbus.get_object('org.freedesktop.systemd1', + '/org/freedesktop/systemd1') + manager = dbus.Interface(systemd1, + 'org.freedesktop.systemd1.Manager') + path = manager.LoadUnit(self.unitname) + + svc_obj = sysbus.get_object('org.freedesktop.systemd1', path) + self.prop = dbus.Interface(svc_obj, + 'org.freedesktop.DBus.Properties') + self.unit = dbus.Interface(svc_obj, + 'org.freedesktop.systemd1.Unit') + + def state(self): + '''Retrieve the active state for a systemd service. As of + systemd 249, this is supposed to be one of the following: + "active", "reloading", "inactive", "failed", "activating", + or "deactivating". These strings are not localized.''' + global debug + + try: + return self.prop.Get('org.freedesktop.systemd1.Unit', 'ActiveState') + except Exception as e: + if debug: + print(e, file = sys.stderr) + return 'failed' def wait(self, interval = 1): '''Wait until the service finishes.''' + global debug - # As of systemd 249, the is-active command returns any of the - # following states: active, reloading, inactive, failed, - # activating, deactivating, or maintenance. Apparently these - # strings are not localized. - while True: - try: - for l in backtick(['systemctl', 'is-active', self.unitname]): - if l == 'failed': - return 1 - if l == 'inactive': - return 0 - except: - return -1 - + # Use a poll/sleep loop to wait for the service to finish. + # Avoid adding a dependency on python3 glib, which is required + # to use an event loop to receive a dbus signal. + s = self.state() + while s not in ['failed', 'inactive']: + if debug: + print('waiting %s %s' % (self.unitname, s)) time.sleep(interval) + s = self.state() + if debug: + print('waited %s %s' % (self.unitname, s)) + if s == 'failed': + return 1 + return 0 def start(self): '''Start the service and wait for it to complete. Returns -1 @@ -194,34 +227,29 @@ class scrub_service(scrub_control): failed.''' global debug - cmd = ['systemctl', 'start', self.unitname] + if debug: + print('starting %s' % self.unitname) + try: - if debug: - print(' '.join(cmd)) - proc = subprocess.Popen(cmd, stdout = DEVNULL()) - proc.wait() - ret = proc.returncode - except: + self.unit.Start('replace') + return self.wait() + except Exception as e: + print(e, file = sys.stderr) return -1 - if ret != 1: - return ret - - # If systemctl-start returns 1, it's possible that the service - # failed or that dbus/systemd restarted and the client program - # lost its connection -- according to the systemctl man page, 1 - # means "unit not failed". - return self.wait() - def stop(self): '''Stop the service.''' global debug - cmd = ['systemctl', 'stop', self.unitname] if debug: - print(' '.join(cmd)) - x = subprocess.Popen(cmd) - x.wait() + print('stopping %s' % self.unitname) + + try: + self.unit.Stop('replace') + return self.wait() + except Exception as e: + print(e, file = sys.stderr) + return -1 def run_service(mnt, scrub_media, killfuncs): '''Run scrub as a service.''' From patchwork Tue Jul 30 01:17:16 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13746155 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id E4A7579CC for ; Tue, 30 Jul 2024 01:17:17 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1722302238; cv=none; b=sSRuOhAEWmNuLeRqRHGvv6bKQ8auj4yAmV8H2lMuF1aaEqR4VbPS/MTZbykbdF2omiyM0m8F14sLD674YEq+i7NnxpSZvI/ZnsuGgjnBXLiLMZBwmsEeA6M9pBqAYiUEH1NkRIL3GP8sG6u+G15zJxXDvKfdVqAeXeQ1GEiGmB4= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1722302238; c=relaxed/simple; bh=nmsqq6k6vUS5Cdk+hijbiB7KqE1P3rTPRNYLaQOeLPY=; h=Date:Subject:From:To:Cc:Message-ID:In-Reply-To:References: MIME-Version:Content-Type; b=SZOT5zt1qkvOFjV+9yuI+yLwjZJn8y6PhFB6LFKVK6iH619R+mXO3PJRxSOsF+IbG0ueHMMsNBXYXqsxm1oU40xEf/1m+n1L9W8NnbsIdbMrQS+TlbZJ7CROoCaRR1q2boYcfZdFDqH1eZxkzl2I0yH97tohREBDbc1HQL0b1Z0= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=DQ2CLwvW; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="DQ2CLwvW" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 591AEC32786; Tue, 30 Jul 2024 01:17:17 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1722302237; bh=nmsqq6k6vUS5Cdk+hijbiB7KqE1P3rTPRNYLaQOeLPY=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=DQ2CLwvWgWApgZtPwh20hWaRIbOQay8s3FxwceR3Zw+DEJGcSMYNt5Y3Gpu1EXGls 4unrypLFD4yXGEst5Pt5GjrJ9QB7GxLLqLid+yoHLdfSSIncyKXXfewIoepQf6lRm6 OcwGIH8KOwQf00b9S7F6BDSFn/B2k5AstVMesTnV++RO2FS0JUdYjf1DnoXF6M2SKw hd4F1JC/vX+HLZEDIaikwhrwBAlD/eoTDhH2WcbjeVZMhn/1jcd/x2oxlTo4y3nXgJ nBydFlS6Gm35Et9d06OdJBRt5CwMfUJARVMrkWudqriyzUWgSkS1Vc2Ozab3x+kCLh Gty2tAhHJGAYw== Date: Mon, 29 Jul 2024 18:17:16 -0700 Subject: [PATCH 5/5] xfs_scrub_all: implement retry and backoff for dbus calls From: "Darrick J. Wong" To: djwong@kernel.org, cem@kernel.org Cc: Christoph Hellwig , linux-xfs@vger.kernel.org Message-ID: <172229849719.1350420.4990697396905953343.stgit@frogsfrogsfrogs> In-Reply-To: <172229849638.1350420.756131243612881227.stgit@frogsfrogsfrogs> References: <172229849638.1350420.756131243612881227.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 Precedence: bulk X-Mailing-List: linux-xfs@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Darrick J. Wong Calls to systemd across dbus are remote procedure calls, which means that they're subject to transitory connection failures (e.g. systemd re-exec itself). We don't want to fail at the *first* sign of what could be temporary trouble, so implement a limited retry with fibonacci backoff before we resort to invoking xfs_scrub as a subprocess. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- scrub/xfs_scrub_all.in | 43 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/scrub/xfs_scrub_all.in b/scrub/xfs_scrub_all.in index f2e916513..5440e51c0 100644 --- a/scrub/xfs_scrub_all.in +++ b/scrub/xfs_scrub_all.in @@ -165,6 +165,22 @@ def path_to_serviceunit(path, scrub_media): for line in proc.stdout: return line.decode(sys.stdout.encoding).strip() +def fibonacci(max_ret): + '''Yield fibonacci sequence up to but not including max_ret.''' + if max_ret < 1: + return + + x = 0 + y = 1 + yield 1 + + z = x + y + while z <= max_ret: + yield z + x = y + y = z + z = x + y + class scrub_service(scrub_control): '''Control object for xfs_scrub systemd service.''' def __init__(self, mnt, scrub_media): @@ -188,6 +204,25 @@ class scrub_service(scrub_control): self.unit = dbus.Interface(svc_obj, 'org.freedesktop.systemd1.Unit') + def __dbusrun(self, lambda_fn): + '''Call the lambda function to execute something on dbus. dbus + exceptions result in retries with Fibonacci backoff, and the + bindings will be rebuilt every time.''' + global debug + + fatal_ex = None + + for i in fibonacci(30): + try: + return lambda_fn() + except dbus.exceptions.DBusException as e: + if debug: + print(e) + fatal_ex = e + time.sleep(i) + self.bind() + raise fatal_ex + def state(self): '''Retrieve the active state for a systemd service. As of systemd 249, this is supposed to be one of the following: @@ -195,8 +230,10 @@ class scrub_service(scrub_control): or "deactivating". These strings are not localized.''' global debug + l = lambda: self.prop.Get('org.freedesktop.systemd1.Unit', + 'ActiveState') try: - return self.prop.Get('org.freedesktop.systemd1.Unit', 'ActiveState') + return self.__dbusrun(l) except Exception as e: if debug: print(e, file = sys.stderr) @@ -231,7 +268,7 @@ class scrub_service(scrub_control): print('starting %s' % self.unitname) try: - self.unit.Start('replace') + self.__dbusrun(lambda: self.unit.Start('replace')) return self.wait() except Exception as e: print(e, file = sys.stderr) @@ -245,7 +282,7 @@ class scrub_service(scrub_control): print('stopping %s' % self.unitname) try: - self.unit.Stop('replace') + self.__dbusrun(lambda: self.unit.Stop('replace')) return self.wait() except Exception as e: print(e, file = sys.stderr)