From patchwork Mon Jan 8 12:18:31 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Patrick Steinhardt X-Patchwork-Id: 13513368 Received: from wout4-smtp.messagingengine.com (wout4-smtp.messagingengine.com [64.147.123.20]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 0065A3FE59 for ; Mon, 8 Jan 2024 12:18:35 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=pks.im Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=pks.im Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=pks.im header.i=@pks.im header.b="me7ULOhD"; dkim=pass (2048-bit key) header.d=messagingengine.com header.i=@messagingengine.com header.b="qHaOquxK" Received: from compute3.internal (compute3.nyi.internal [10.202.2.43]) by mailout.west.internal (Postfix) with ESMTP id C59E53201202; Mon, 8 Jan 2024 07:18:34 -0500 (EST) Received: from mailfrontend2 ([10.202.2.163]) by compute3.internal (MEProxy); Mon, 08 Jan 2024 07:18:35 -0500 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=pks.im; h=cc:cc :content-type:content-type:date:date:from:from:in-reply-to :in-reply-to:message-id:mime-version:references:reply-to:subject :subject:to:to; s=fm2; t=1704716314; x=1704802714; bh=HJBG+tHJIO nQlbNESF+ioFtJ5tnNG/slUH8eiY2yJek=; b=me7ULOhDop/wHzTVbHxyNgofFJ 6UjAVL4WIk3NXtrk0eeKtz95UN05AXGo7e18Ms1Fa5R8GXQyAafaIArZXxKcm0TB f2LlvO+6IC+oavXWj0UeLHVmXyp20KVZSFrmMHmPgXfTYnG+1C0PYwW8Tdm0zfMr QkfJgyIjVmcau7IUUqW4cXKJcpqFB86+VYKEg1g4o7zhhcpvu/QeVtggY3BnJPFO B0XsjAfcQTyPPvkwuJbTrYMovyqLvQ9z8xxYtWEHkHQSwIlWX2Hd0Fx+IcVCIRAC gvy5ia1cbwLOH6XA2C5/PhJ+RL4WToK/MNFQ0mF1qJv3pQuSbnAkDK4xZ4DQ== DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d= messagingengine.com; h=cc:cc:content-type:content-type:date:date :feedback-id:feedback-id:from:from:in-reply-to:in-reply-to :message-id:mime-version:references:reply-to:subject:subject:to :to:x-me-proxy:x-me-proxy:x-me-sender:x-me-sender:x-sasl-enc; s= fm2; t=1704716314; x=1704802714; bh=HJBG+tHJIOnQlbNESF+ioFtJ5tnN G/slUH8eiY2yJek=; b=qHaOquxKKphwds7USyPX2QtQWxqSdA/0KejXhFENp2G+ XCOwA7aX5sQuhidPEsuKWCuvuEffBGFaYmcnJL8trRMI0WyYOw345jb+eGH91/5J WqotEEKiiddiLoAzSJtEb906Wz3zmJOn1UIKy7ABhwmU3R92BOl3bnOxD043KzNS dvBCdUO0lB/xFVMRFS/eW4VbnyE4CVuPRub5l1yI6HsWvt9DEyqzsT+EzuG1YAX+ 8tLoqdSqhyv4hhi5v6n6zi+zBUmFlqWPqBifXNVOUpPe4BONsdG7U86RVenWnZHU DZ8Vf9hjMHxOJizplMQ4fBxw4pcl79XuGjNfWXZAjw== X-ME-Sender: X-ME-Received: X-ME-Proxy-Cause: gggruggvucftvghtrhhoucdtuddrgedvkedrvdehjedgfeejucetufdoteggodetrfdotf fvucfrrhhofhhilhgvmecuhfgrshhtofgrihhlpdfqfgfvpdfurfetoffkrfgpnffqhgen uceurghilhhouhhtmecufedttdenucenucfjughrpeffhffvvefukfhfgggtuggjsehgtd erredttddvnecuhfhrohhmpefrrghtrhhitghkucfuthgvihhnhhgrrhguthcuoehpshes phhkshdrihhmqeenucggtffrrghtthgvrhhnpeeukedtvedtffevleejtefgheehieegke eluddvfeefgeehgfeltddtheejleffteenucevlhhushhtvghrufhiiigvpedtnecurfgr rhgrmhepmhgrihhlfhhrohhmpehpshesphhkshdrihhm X-ME-Proxy: Feedback-ID: i197146af:Fastmail Received: by mail.messagingengine.com (Postfix) with ESMTPA; Mon, 8 Jan 2024 07:18:33 -0500 (EST) Received: by vm-mail (OpenSMTPD) with ESMTPSA id 457f4ca1 (TLSv1.3:TLS_AES_256_GCM_SHA384:256:NO); Mon, 8 Jan 2024 12:15:56 +0000 (UTC) Date: Mon, 8 Jan 2024 13:18:31 +0100 From: Patrick Steinhardt To: git@vger.kernel.org Cc: Han-Wen Nienhuys Subject: [PATCH 1/4] reftable/stack: refactor stack reloading to have common exit path Message-ID: <01ece2626dd4cb494829e146d99c172fa8428478.1704714575.git.ps@pks.im> References: Precedence: bulk X-Mailing-List: git@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Disposition: inline In-Reply-To: The `reftable_stack_reload_maybe_reuse()` function is responsible for reloading the reftable list from disk. The function is quite hard to follow though because it has a bunch of different exit paths, many of which have to free the same set of resources. Refactor the function to have a common exit path. While at it, touch up the style of this function a bit to match our usual coding style better. --- reftable/stack.c | 86 +++++++++++++++++++++++------------------------- 1 file changed, 42 insertions(+), 44 deletions(-) diff --git a/reftable/stack.c b/reftable/stack.c index 16bab82063..bf869a6772 100644 --- a/reftable/stack.c +++ b/reftable/stack.c @@ -304,69 +304,67 @@ static int tv_cmp(struct timeval *a, struct timeval *b) static int reftable_stack_reload_maybe_reuse(struct reftable_stack *st, int reuse_open) { - struct timeval deadline = { 0 }; - int err = gettimeofday(&deadline, NULL); + char **names = NULL, **names_after = NULL; + struct timeval deadline; int64_t delay = 0; - int tries = 0; - if (err < 0) - return err; + int tries = 0, err; + err = gettimeofday(&deadline, NULL); + if (err < 0) + goto out; deadline.tv_sec += 3; + while (1) { - char **names = NULL; - char **names_after = NULL; - struct timeval now = { 0 }; - int err = gettimeofday(&now, NULL); - int err2 = 0; - if (err < 0) { - return err; - } + struct timeval now; - /* Only look at deadlines after the first few times. This - simplifies debugging in GDB */ + err = gettimeofday(&now, NULL); + if (err < 0) + goto out; + + /* + * Only look at deadlines after the first few times. This + * simplifies debugging in GDB. + */ tries++; - if (tries > 3 && tv_cmp(&now, &deadline) >= 0) { - break; - } + if (tries > 3 && tv_cmp(&now, &deadline) >= 0) + goto out; err = read_lines(st->list_file, &names); - if (err < 0) { - free_names(names); - return err; - } + if (err < 0) + goto out; + err = reftable_stack_reload_once(st, names, reuse_open); - if (err == 0) { - free_names(names); + if (!err) break; - } - if (err != REFTABLE_NOT_EXIST_ERROR) { - free_names(names); - return err; - } - - /* err == REFTABLE_NOT_EXIST_ERROR can be caused by a concurrent - writer. Check if there was one by checking if the name list - changed. - */ - err2 = read_lines(st->list_file, &names_after); - if (err2 < 0) { - free_names(names); - return err2; - } - + if (err != REFTABLE_NOT_EXIST_ERROR) + goto out; + + /* + * REFTABLE_NOT_EXIST_ERROR can be caused by a concurrent + * writer. Check if there was one by checking if the name list + * changed. + */ + err = read_lines(st->list_file, &names_after); + if (err < 0) + goto out; if (names_equal(names_after, names)) { - free_names(names); - free_names(names_after); - return err; + err = REFTABLE_NOT_EXIST_ERROR; + goto out; } + free_names(names); + names = NULL; free_names(names_after); + names_after = NULL; delay = delay + (delay * rand()) / RAND_MAX + 1; sleep_millisec(delay); } - return 0; +out: + free_names(names); + free_names(names_after); + return err; } /* -1 = error From patchwork Mon Jan 8 12:18:35 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Patrick Steinhardt X-Patchwork-Id: 13513369 Received: from wout4-smtp.messagingengine.com (wout4-smtp.messagingengine.com [64.147.123.20]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id CFB86405C3 for ; Mon, 8 Jan 2024 12:18:39 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=pks.im Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=pks.im Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=pks.im header.i=@pks.im header.b="CcIOlCe+"; dkim=pass (2048-bit key) header.d=messagingengine.com header.i=@messagingengine.com header.b="CBKbrS2A" Received: from compute2.internal (compute2.nyi.internal [10.202.2.46]) by mailout.west.internal (Postfix) with ESMTP id B22F53201202; Mon, 8 Jan 2024 07:18:38 -0500 (EST) Received: from mailfrontend2 ([10.202.2.163]) by compute2.internal (MEProxy); Mon, 08 Jan 2024 07:18:38 -0500 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=pks.im; h=cc:cc :content-type:content-type:date:date:from:from:in-reply-to :in-reply-to:message-id:mime-version:references:reply-to:subject :subject:to:to; s=fm2; t=1704716318; x=1704802718; bh=wUGPbTya2H I0qwMWHWvVPKRa6Lo6fizHuck+wBGXDI0=; b=CcIOlCe+npb7dq06NqV5t50vxs yJf607jTUz4I7hNyLP0t+gSk1G8R10RDmXLiNdTpud0gIZHcdRdorWs0l2jjDwao TxTZex8xRMnBrSECadAvFca+r3TUa2yGfybe/CmGg86+UoU+W4ZppxpHxX8vpbD4 Cr5q0UhwsEMu2DNI+1I7Aar8nHe25BETGJZv6cn82iX3UshMJ5p3zfy1z6A1pu1/ bngtcFB0jpMtISemw5IqaOCrH6JlmeLOpMT8QmjbM68hRVxI5LpN+IsAQ69T4KIO nbgCDfOyGDl15PziP+9rY+4ocnNMF0UqkE5T48B5/uyWm9e0f15O9wYBJhUw== DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d= messagingengine.com; h=cc:cc:content-type:content-type:date:date :feedback-id:feedback-id:from:from:in-reply-to:in-reply-to :message-id:mime-version:references:reply-to:subject:subject:to :to:x-me-proxy:x-me-proxy:x-me-sender:x-me-sender:x-sasl-enc; s= fm2; t=1704716318; x=1704802718; bh=wUGPbTya2HI0qwMWHWvVPKRa6Lo6 fizHuck+wBGXDI0=; b=CBKbrS2AxOJ8e8XYNJY8bdu6a8yUmAC2RYH2rvrE1x2U kwKqRXA+vwsHTFEVzyPngC2qmJJuRBWAK0ehcdvaQej19/Hyo/4oJvOI+9zAFLP5 ZTpvhbFJdSCLysvDGLq/tejb+P90Q7nBvhAoI665skR5ZZpxmUIqTv3Xh6ZRmucd hze5wUykByaIJaO22zTvNS1eJhb5Zx5v+gDh/9n6+VuLtPmFderhnTvkd/wlFLtG 7wiNkB18Syn6OrG59BTY6CGf1dcOninVwtqOSSn87u77x4bINX3ijBjkXRTXRosh emNV+P9giAniLcZW1F1FQbXSxF/Bt5xplMfzU4zQFg== X-ME-Sender: X-ME-Received: X-ME-Proxy-Cause: gggruggvucftvghtrhhoucdtuddrgedvkedrvdehjedgfeejucetufdoteggodetrfdotf fvucfrrhhofhhilhgvmecuhfgrshhtofgrihhlpdfqfgfvpdfurfetoffkrfgpnffqhgen uceurghilhhouhhtmecufedttdenucenucfjughrpeffhffvvefukfhfgggtuggjsehgtd erredttddvnecuhfhrohhmpefrrghtrhhitghkucfuthgvihhnhhgrrhguthcuoehpshes phhkshdrihhmqeenucggtffrrghtthgvrhhnpeeukedtvedtffevleejtefgheehieegke eluddvfeefgeehgfeltddtheejleffteenucevlhhushhtvghrufhiiigvpedtnecurfgr rhgrmhepmhgrihhlfhhrohhmpehpshesphhkshdrihhm X-ME-Proxy: Feedback-ID: i197146af:Fastmail Received: by mail.messagingengine.com (Postfix) with ESMTPA; Mon, 8 Jan 2024 07:18:37 -0500 (EST) Received: by vm-mail (OpenSMTPD) with ESMTPSA id 9de32cc2 (TLSv1.3:TLS_AES_256_GCM_SHA384:256:NO); Mon, 8 Jan 2024 12:16:01 +0000 (UTC) Date: Mon, 8 Jan 2024 13:18:35 +0100 From: Patrick Steinhardt To: git@vger.kernel.org Cc: Han-Wen Nienhuys Subject: [PATCH 2/4] reftable/stack: refactor reloading to use file descriptor Message-ID: <726d302d7b21a5b948575492c717cfdb701de5cd.1704714575.git.ps@pks.im> References: Precedence: bulk X-Mailing-List: git@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Disposition: inline In-Reply-To: We're about to introduce a stat(3P)-based caching mechanism to reload the list of stacks only when it has changed. In order to avoid race conditions this requires us to have a file descriptor available that we can use to call fstat(3P) on. Prepare for this by converting the code to use `fd_read_lines()` so that we have the file descriptor readily available. --- reftable/stack.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/reftable/stack.c b/reftable/stack.c index bf869a6772..b1ee247601 100644 --- a/reftable/stack.c +++ b/reftable/stack.c @@ -308,6 +308,7 @@ static int reftable_stack_reload_maybe_reuse(struct reftable_stack *st, struct timeval deadline; int64_t delay = 0; int tries = 0, err; + int fd = -1; err = gettimeofday(&deadline, NULL); if (err < 0) @@ -329,9 +330,19 @@ static int reftable_stack_reload_maybe_reuse(struct reftable_stack *st, if (tries > 3 && tv_cmp(&now, &deadline) >= 0) goto out; - err = read_lines(st->list_file, &names); - if (err < 0) - goto out; + fd = open(st->list_file, O_RDONLY); + if (fd < 0) { + if (errno != ENOENT) { + err = REFTABLE_IO_ERROR; + goto out; + } + + names = reftable_calloc(sizeof(char *)); + } else { + err = fd_read_lines(fd, &names); + if (err < 0) + goto out; + } err = reftable_stack_reload_once(st, names, reuse_open); if (!err) @@ -356,12 +367,16 @@ static int reftable_stack_reload_maybe_reuse(struct reftable_stack *st, names = NULL; free_names(names_after); names_after = NULL; + close(fd); + fd = -1; delay = delay + (delay * rand()) / RAND_MAX + 1; sleep_millisec(delay); } out: + if (fd >= 0) + close(fd); free_names(names); free_names(names_after); return err; From patchwork Mon Jan 8 12:18:39 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: Patrick Steinhardt X-Patchwork-Id: 13513370 Received: from wout4-smtp.messagingengine.com (wout4-smtp.messagingengine.com [64.147.123.20]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 97190405CD for ; Mon, 8 Jan 2024 12:18:43 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=pks.im Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=pks.im Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=pks.im header.i=@pks.im header.b="sYBlQCxq"; dkim=pass (2048-bit key) header.d=messagingengine.com header.i=@messagingengine.com header.b="UM5Jsk8p" Received: from compute5.internal (compute5.nyi.internal [10.202.2.45]) by mailout.west.internal (Postfix) with ESMTP id A1BC83201289; Mon, 8 Jan 2024 07:18:42 -0500 (EST) Received: from mailfrontend2 ([10.202.2.163]) by compute5.internal (MEProxy); Mon, 08 Jan 2024 07:18:42 -0500 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=pks.im; h=cc:cc :content-type:content-type:date:date:from:from:in-reply-to :in-reply-to:message-id:mime-version:references:reply-to:subject :subject:to:to; s=fm2; t=1704716322; x=1704802722; bh=PbmsRiNoE5 9+zE90j2HD0dSEDlpS3Rsze2wAZJpAAoQ=; b=sYBlQCxqYDzgtJeP4uZ/Rw41td fpLlbGoZTqQZaP9sad2pjOzi4vcXVCchD7ftHvxwklgfX6IlKialR7xSthrBtzp8 ZG4JMPUGKnjEUH6e0I8R4YKrdN2A0z2qJ8e7aumohDpx5anyd47SPvyDt5tm0Mcz 51AeVUvMEerGx3mI+6hR9UlFWMRglzn/Pp5efhkC6dm8Q/Mhwtx09d+hC06u2Xtd IQb6bAK5zwQMLpqfkbEnRjj6vOXj+kQ/0cVb/xnR09QOG+INpgPm1UY4Nw9E/ZAu lHG1V9E5nwI0A2kpy3Q7cS3QB8Z5CVuHB/zryHVie/yfKDBcd9E2C7ptwjvw== DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d= messagingengine.com; h=cc:cc:content-type:content-type:date:date :feedback-id:feedback-id:from:from:in-reply-to:in-reply-to :message-id:mime-version:references:reply-to:subject:subject:to :to:x-me-proxy:x-me-proxy:x-me-sender:x-me-sender:x-sasl-enc; s= fm2; t=1704716322; x=1704802722; bh=PbmsRiNoE59+zE90j2HD0dSEDlpS 3Rsze2wAZJpAAoQ=; b=UM5Jsk8puYLQev4pCWd8fsz2M+jsXtKOtPSBgWLssWXg NpyolPtbeyxIZLKj+THTZltXkxEh9dK6x8s8vCMzgrYlbp5LVxKo5p3qAzv/YV1D 4dEhfcAVIgWSEGrnbwzLENQyUe2TUaIDXDCdaFjbc7FOxRCdQS6/eneDfpwcPmHB gCcqeGRLpdfmSn55l7OEzAaRQQh//SbDY0f4fuao895AtzZIpuBU7EtEcLAW65ZG ZBKegFVvlVTcAJvpfyt7ILjRJu1rtpWINIHFbMhD1BXGexWlJTET9Z14HMmTgGGn R091gXUHlg4fvX7WP+3bObJl7vsKJIAeX7UGI90UeQ== X-ME-Sender: X-ME-Received: X-ME-Proxy-Cause: gggruggvucftvghtrhhoucdtuddrgedvkedrvdehjedgfeekucetufdoteggodetrfdotf fvucfrrhhofhhilhgvmecuhfgrshhtofgrihhlpdfqfgfvpdfurfetoffkrfgpnffqhgen uceurghilhhouhhtmecufedttdenucenucfjughrpeffhffvvefukfhfgggtuggjsehgtd erredttdejnecuhfhrohhmpefrrghtrhhitghkucfuthgvihhnhhgrrhguthcuoehpshes phhkshdrihhmqeenucggtffrrghtthgvrhhnpeetueevhffhudefvdegieeuieelgedthf egfedtueevjeejtdfgjeehudejuedtudenucevlhhushhtvghrufhiiigvpedtnecurfgr rhgrmhepmhgrihhlfhhrohhmpehpshesphhkshdrihhm X-ME-Proxy: Feedback-ID: i197146af:Fastmail Received: by mail.messagingengine.com (Postfix) with ESMTPA; Mon, 8 Jan 2024 07:18:41 -0500 (EST) Received: by vm-mail (OpenSMTPD) with ESMTPSA id 39b5d7e8 (TLSv1.3:TLS_AES_256_GCM_SHA384:256:NO); Mon, 8 Jan 2024 12:16:05 +0000 (UTC) Date: Mon, 8 Jan 2024 13:18:39 +0100 From: Patrick Steinhardt To: git@vger.kernel.org Cc: Han-Wen Nienhuys Subject: [PATCH 3/4] reftable/stack: use stat info to avoid re-reading stack list Message-ID: <4fabdc3d8016dbc1e20fbe90058ee7320a5f770b.1704714575.git.ps@pks.im> References: Precedence: bulk X-Mailing-List: git@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Disposition: inline In-Reply-To: Whenever we call into the refs interfaces we potentially have to reload refs in case they have been concurrently modified, either in-process or externally. While this happens somewhat automatically for loose refs because we simply try to re-read the files, the "packed" backend will reload its snapshot of the packed-refs file in case its stat info has changed since last reading it. In the reftable backend we have a similar mechanism that is provided by `reftable_stack_reload()`. This function will read the list of stacks from "tables.list" and, if they have changed from the currently stored list, reload the stacks. This is heavily inefficient though, as we have to check whether the stack is up-to-date on basically every read and thus keep on re-reading the file all the time even if it didn't change at all. We can do better and use the same stat(3P)-based mechanism that the "packed" backend uses. Instead of reading the file, we will only open the file descriptor, fstat(3P) it, and then compare the info against the cached value from the last time we have updated the stack. This should always work alright because "tables.list" is updated atomically via a rename, so even if the ctime or mtime wasn't granular enough to identify a change, at least the inode number should have changed. This change significantly speeds up operations where many refs are read, like when using git-update-ref(1). The following benchmark creates N refs in an otherwise-empty repository via `git update-ref --stdin`: Benchmark 1: update-ref: create many refs (refcount = 1, revision = HEAD~) Time (mean ± σ): 5.6 ms ± 0.2 ms [User: 2.5 ms, System: 3.0 ms] Range (min … max): 5.2 ms … 6.0 ms 89 runs Benchmark 2: update-ref: create many refs (refcount = 100, revision = HEAD~) Time (mean ± σ): 19.2 ms ± 0.3 ms [User: 8.6 ms, System: 10.4 ms] Range (min … max): 18.4 ms … 19.8 ms 63 runs Benchmark 3: update-ref: create many refs (refcount = 10000, revision = HEAD~) Time (mean ± σ): 1.310 s ± 0.014 s [User: 0.566 s, System: 0.724 s] Range (min … max): 1.291 s … 1.342 s 10 runs Benchmark 4: update-ref: create many refs (refcount = 1, revision = HEAD) Time (mean ± σ): 5.7 ms ± 0.4 ms [User: 2.6 ms, System: 3.1 ms] Range (min … max): 5.1 ms … 9.5 ms 91 runs Benchmark 5: update-ref: create many refs (refcount = 100, revision = HEAD) Time (mean ± σ): 15.2 ms ± 0.3 ms [User: 7.0 ms, System: 8.1 ms] Range (min … max): 14.3 ms … 17.1 ms 71 runs Benchmark 6: update-ref: create many refs (refcount = 10000, revision = HEAD) Time (mean ± σ): 916.1 ms ± 11.0 ms [User: 420.8 ms, System: 495.0 ms] Range (min … max): 906.9 ms … 943.8 ms 10 runs Summary update-ref: create many refs (refcount = 1, revision = HEAD~) ran 1.01 ± 0.09 times faster than update-ref: create many refs (refcount = 1, revision = HEAD) 2.72 ± 0.11 times faster than update-ref: create many refs (refcount = 100, revision = HEAD) 3.42 ± 0.13 times faster than update-ref: create many refs (refcount = 100, revision = HEAD~) 163.59 ± 5.62 times faster than update-ref: create many refs (refcount = 10000, revision = HEAD) 233.91 ± 7.92 times faster than update-ref: create many refs (refcount = 10000, revision = HEAD~) --- reftable/stack.c | 10 +++++++++- reftable/stack.h | 1 + reftable/system.h | 1 + 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/reftable/stack.c b/reftable/stack.c index b1ee247601..a51a2dbf1f 100644 --- a/reftable/stack.c +++ b/reftable/stack.c @@ -175,6 +175,7 @@ void reftable_stack_destroy(struct reftable_stack *st) st->readers_len = 0; FREE_AND_NULL(st->readers); } + stat_validity_clear(&st->list_validity); FREE_AND_NULL(st->list_file); FREE_AND_NULL(st->reftable_dir); reftable_free(st); @@ -374,6 +375,8 @@ static int reftable_stack_reload_maybe_reuse(struct reftable_stack *st, sleep_millisec(delay); } + stat_validity_update(&st->list_validity, fd); + out: if (fd >= 0) close(fd); @@ -388,8 +391,13 @@ static int reftable_stack_reload_maybe_reuse(struct reftable_stack *st, static int stack_uptodate(struct reftable_stack *st) { char **names = NULL; - int err = read_lines(st->list_file, &names); + int err; int i = 0; + + if (stat_validity_check(&st->list_validity, st->list_file)) + return 0; + + err = read_lines(st->list_file, &names); if (err < 0) return err; diff --git a/reftable/stack.h b/reftable/stack.h index f57005846e..3f80cc598a 100644 --- a/reftable/stack.h +++ b/reftable/stack.h @@ -14,6 +14,7 @@ license that can be found in the LICENSE file or at #include "reftable-stack.h" struct reftable_stack { + struct stat_validity list_validity; char *list_file; char *reftable_dir; int disable_auto_compact; diff --git a/reftable/system.h b/reftable/system.h index 6b74a81514..2cc7adf271 100644 --- a/reftable/system.h +++ b/reftable/system.h @@ -12,6 +12,7 @@ license that can be found in the LICENSE file or at /* This header glues the reftable library to the rest of Git */ #include "git-compat-util.h" +#include "statinfo.h" #include "strbuf.h" #include "hash-ll.h" /* hash ID, sizes.*/ #include "dir.h" /* remove_dir_recursively, for tests.*/ From patchwork Mon Jan 8 12:18:43 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: Patrick Steinhardt X-Patchwork-Id: 13513371 Received: from wout4-smtp.messagingengine.com (wout4-smtp.messagingengine.com [64.147.123.20]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 923EC405E1 for ; Mon, 8 Jan 2024 12:18:47 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=pks.im Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=pks.im Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=pks.im header.i=@pks.im header.b="TkXplaOx"; dkim=pass (2048-bit key) header.d=messagingengine.com header.i=@messagingengine.com header.b="XG7Sg43d" Received: from compute3.internal (compute3.nyi.internal [10.202.2.43]) by mailout.west.internal (Postfix) with ESMTP id 866E53201289; Mon, 8 Jan 2024 07:18:46 -0500 (EST) Received: from mailfrontend2 ([10.202.2.163]) by compute3.internal (MEProxy); Mon, 08 Jan 2024 07:18:46 -0500 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=pks.im; h=cc:cc :content-type:content-type:date:date:from:from:in-reply-to :in-reply-to:message-id:mime-version:references:reply-to:subject :subject:to:to; s=fm2; t=1704716326; x=1704802726; bh=UF/ry2kAj9 LIC50i1Nw1RkSQaNsR/6vArd7ZxNIQpH8=; b=TkXplaOxgpF+adqWXF+R54Yfg1 9t8Gpi6Lj/J7wgJ33OufiGeqJ9njrVYuJQRhRXlJl4EGqRvjHtppt78xmUp2VUY1 +BwFzUuJ4Nx6qSizbQPAQw/umNgKsIF51+r/o5qwCdMJ7fw2w7zJRDzb5/kOGPeW l/reOk227k9nMFpBWKb8G6eGEk2cURxV1hnyDGYyO1bkP5SvY25GFZa9yoIkBExt dkeK+aqsG5KOMZr3vDT4DDkmmvW0/phQsKq1pMe/cTy4Q+gHoD0UzOI3mC2HN+4J UyfgLTqhQqH/XNIjzh5TA7T5onMhlhIIHxxjc1mH0epkyIXGboB9/jADi+XQ== DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d= messagingengine.com; h=cc:cc:content-type:content-type:date:date :feedback-id:feedback-id:from:from:in-reply-to:in-reply-to :message-id:mime-version:references:reply-to:subject:subject:to :to:x-me-proxy:x-me-proxy:x-me-sender:x-me-sender:x-sasl-enc; s= fm2; t=1704716326; x=1704802726; bh=UF/ry2kAj9LIC50i1Nw1RkSQaNsR /6vArd7ZxNIQpH8=; b=XG7Sg43dVudyHg5XhXcNIZZK9WczrmynqELZ0mVac1KM MhWEQM7tbg3sdcOVyZ8qK6tee2x0rkyBD0Ygq8SMNN2A8jpQZ1k4gcJM90KDmB7/ hgPMVh2C9u8vzPTqWL3nmP6l/ghgwg7Fj1bDsHUYC53L2IUEPKbTJY670cCsGR5+ 58CWOuNNfZAUnelKce9/jzKXwwvpRoEi8uCGN1yDPU5RDlpLC8neFB/ojqPUGk3m uXTs3Mh4fVvxiqk7R1FS4Tw+xc9/EAvTPTVo4fwSSSdqcAXQOju2O3jDoorTmLrW P3XpC54oiXJBdSmi3N1Dmn2icS8tmJLWEmsshLT9hg== X-ME-Sender: X-ME-Received: X-ME-Proxy-Cause: gggruggvucftvghtrhhoucdtuddrgedvkedrvdehjedgfeejucetufdoteggodetrfdotf fvucfrrhhofhhilhgvmecuhfgrshhtofgrihhlpdfqfgfvpdfurfetoffkrfgpnffqhgen uceurghilhhouhhtmecufedttdenucenucfjughrpeffhffvvefukfhfgggtuggjsehgtd erredttdejnecuhfhrohhmpefrrghtrhhitghkucfuthgvihhnhhgrrhguthcuoehpshes phhkshdrihhmqeenucggtffrrghtthgvrhhnpeetueevhffhudefvdegieeuieelgedthf egfedtueevjeejtdfgjeehudejuedtudenucevlhhushhtvghrufhiiigvpedtnecurfgr rhgrmhepmhgrihhlfhhrohhmpehpshesphhkshdrihhm X-ME-Proxy: Feedback-ID: i197146af:Fastmail Received: by mail.messagingengine.com (Postfix) with ESMTPA; Mon, 8 Jan 2024 07:18:45 -0500 (EST) Received: by vm-mail (OpenSMTPD) with ESMTPSA id c775b688 (TLSv1.3:TLS_AES_256_GCM_SHA384:256:NO); Mon, 8 Jan 2024 12:16:09 +0000 (UTC) Date: Mon, 8 Jan 2024 13:18:43 +0100 From: Patrick Steinhardt To: git@vger.kernel.org Cc: Han-Wen Nienhuys Subject: [PATCH 4/4] reftable/blocksource: use mmap to read tables Message-ID: References: Precedence: bulk X-Mailing-List: git@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Disposition: inline In-Reply-To: The blocksource interface provides an interface to read blocks from a reftable table. This interface is implemented using read(3P) calls on the underlying file descriptor. While this works alright, this pattern is very inefficient when repeatedly querying the reftable stack for one or more refs. This inefficiency can mostly be attributed to the fact that we often need to re-read the same blocks over and over again, and every single time we need to call read(3P) again. A natural fit in this context is to use mmap(3P) instead of read(3P), which has a bunch of benefits: - We do not need to come up with a caching strategy for some of the blocks as this will be handled by the kernel already. - We can avoid the overhead of having to call into the read(3P) syscall repeatedly. - We do not need to allocate returned blocks repeatedly, but can instead hand out pointers into the mmapped region directly. Using mmap comes with a significant drawback on Windows though, because mmapped files cannot be deleted and neither is it possible to rename files onto an mmapped file. But for one, the reftable library gracefully handles the case where auto-compaction cannot delete a still-open stack already and ignores any such errors. Also, `reftable_stack_clean()` will prune stale tables which are not referenced by "tables.list" anymore so that those files can eventually be pruned. And second, we never rewrite already-rewritten stacks, so it does not matter that we cannot rename a file over an mmaped file, either. Another unfortunate property of mmap is that it is not supported by all systems. But given that the size of reftables should typically be rather limited (megabytes at most in the vast majority of repositories), we can provide an easy fallback by just reading the complete table into memory on such platforms. This is the same strategy that the "packed" backend uses in case the platform does not provide mmap. While this change doesn't significantly improve performance in the case where we're seeking through stacks once (like e.g. git-for-each-ref(1) would). But it does speed up usecases where there is lots of random access to refs, e.g. when writing. The following benchmark demonstrates these savings with git-update-ref(1) creating N refs in an otherwise empty repository: Benchmark 1: update-ref: create many refs (refcount = 1, revision = HEAD~) Time (mean ± σ): 5.6 ms ± 0.2 ms [User: 2.7 ms, System: 2.8 ms] Range (min … max): 5.1 ms … 6.0 ms 90 runs Benchmark 2: update-ref: create many refs (refcount = 100, revision = HEAD~) Time (mean ± σ): 15.1 ms ± 0.4 ms [User: 7.1 ms, System: 8.0 ms] Range (min … max): 14.2 ms … 16.5 ms 71 runs Benchmark 3: update-ref: create many refs (refcount = 10000, revision = HEAD~) Time (mean ± σ): 919.4 ms ± 11.2 ms [User: 427.5 ms, System: 491.7 ms] Range (min … max): 908.1 ms … 936.6 ms 10 runs Benchmark 4: update-ref: create many refs (refcount = 1, revision = HEAD) Time (mean ± σ): 5.5 ms ± 0.3 ms [User: 2.4 ms, System: 3.1 ms] Range (min … max): 5.0 ms … 7.3 ms 89 runs Benchmark 5: update-ref: create many refs (refcount = 100, revision = HEAD) Time (mean ± σ): 10.4 ms ± 0.3 ms [User: 5.1 ms, System: 5.3 ms] Range (min … max): 9.7 ms … 11.0 ms 78 runs Benchmark 6: update-ref: create many refs (refcount = 10000, revision = HEAD) Time (mean ± σ): 483.5 ms ± 6.3 ms [User: 227.8 ms, System: 255.6 ms] Range (min … max): 477.5 ms … 498.8 ms 10 runs Summary update-ref: create many refs (refcount = 1, revision = HEAD) ran 1.01 ± 0.06 times faster than update-ref: create many refs (refcount = 1, revision = HEAD~) 1.89 ± 0.11 times faster than update-ref: create many refs (refcount = 100, revision = HEAD) 2.73 ± 0.16 times faster than update-ref: create many refs (refcount = 100, revision = HEAD~) 87.55 ± 4.65 times faster than update-ref: create many refs (refcount = 10000, revision = HEAD) 166.48 ± 8.80 times faster than update-ref: create many refs (refcount = 10000, revision = HEAD~) Theoretically, we could also replicate the strategy of the "packed" backend where small tables are read into memory instead of using mmap. Benchmarks did not confirm that this has a performance benefit though. --- reftable/blocksource.c | 48 ++++++++++++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 13 deletions(-) diff --git a/reftable/blocksource.c b/reftable/blocksource.c index a1ea304429..5d3f3d264c 100644 --- a/reftable/blocksource.c +++ b/reftable/blocksource.c @@ -13,6 +13,12 @@ license that can be found in the LICENSE file or at #include "reftable-blocksource.h" #include "reftable-error.h" +#if defined(NO_MMAP) +static int use_mmap = 0; +#else +static int use_mmap = 1; +#endif + static void strbuf_return_block(void *b, struct reftable_block *dest) { if (dest->len) @@ -78,6 +84,7 @@ struct reftable_block_source malloc_block_source(void) struct file_block_source { int fd; uint64_t size; + unsigned char *data; }; static uint64_t file_size(void *b) @@ -87,19 +94,23 @@ static uint64_t file_size(void *b) static void file_return_block(void *b, struct reftable_block *dest) { - if (dest->len) - memset(dest->data, 0xff, dest->len); - reftable_free(dest->data); } -static void file_close(void *b) +static void file_close(void *v) { - int fd = ((struct file_block_source *)b)->fd; - if (fd > 0) { - close(fd); - ((struct file_block_source *)b)->fd = 0; + struct file_block_source *b = v; + + if (b->fd >= 0) { + close(b->fd); + b->fd = -1; } + if (use_mmap) + munmap(b->data, b->size); + else + reftable_free(b->data); + b->data = NULL; + reftable_free(b); } @@ -108,9 +119,7 @@ static int file_read_block(void *v, struct reftable_block *dest, uint64_t off, { struct file_block_source *b = v; assert(off + size <= b->size); - dest->data = reftable_malloc(size); - if (pread_in_full(b->fd, dest->data, size, off) != size) - return -1; + dest->data = b->data + off; dest->len = size; return size; } @@ -127,8 +136,10 @@ int reftable_block_source_from_file(struct reftable_block_source *bs, { struct stat st = { 0 }; int err = 0; - int fd = open(name, O_RDONLY); + int fd; struct file_block_source *p = NULL; + + fd = open(name, O_RDONLY); if (fd < 0) { if (errno == ENOENT) { return REFTABLE_NOT_EXIST_ERROR; @@ -144,7 +155,18 @@ int reftable_block_source_from_file(struct reftable_block_source *bs, p = reftable_calloc(sizeof(struct file_block_source)); p->size = st.st_size; - p->fd = fd; + if (use_mmap) { + p->data = xmmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + p->fd = fd; + } else { + p->data = xmalloc(st.st_size); + if (read_in_full(fd, p->data, st.st_size) != st.st_size) { + close(fd); + return -1; + } + close(fd); + p->fd = -1; + } assert(!bs->ops); bs->ops = &file_vtable;