diff mbox series

[3/3] tests/functional/asset: Add AssetError exception class

Message ID 20250312051739.938441-4-npiggin@gmail.com (mailing list archive)
State New
Headers show
Series tests/functional/asset: improve partial-download handling | expand

Commit Message

Nicholas Piggin March 12, 2025, 5:17 a.m. UTC
Assets are uniquely identified by human-readable-ish url, so make an
AssetError exception class that prints url with error message.

A property 'transient' is used to capture whether the client may retry
or try again later, or if it is a serious and likely permanent error.
This is used to retain the existing behaviour of treating HTTP errors
other than 404 as 'transient' and not causing precache step to fail.
Additionally, partial-downloads and stale asset caches that fail to
resolve after the retry limit are now treated as transient and do not
cause precache step to fail.

For background: The NetBSD archive is, at the time of writing, failing
with short transfer. Retrying the fetch at that position (as wget does)
results in a "503 backend unavailable" error. We would like to get that
error code directly, but I have not found a way to do that with urllib,
so treating the short-copy as a transient failure covers that case (and
seems like a reasonable way to handle it in general).

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 tests/functional/qemu_test/asset.py | 41 ++++++++++++++++++-----------
 1 file changed, 26 insertions(+), 15 deletions(-)

Comments

Thomas Huth March 12, 2025, 6:56 a.m. UTC | #1
On 12/03/2025 06.17, Nicholas Piggin wrote:
> Assets are uniquely identified by human-readable-ish url, so make an
> AssetError exception class that prints url with error message.
> 
> A property 'transient' is used to capture whether the client may retry
> or try again later, or if it is a serious and likely permanent error.
> This is used to retain the existing behaviour of treating HTTP errors
> other than 404 as 'transient' and not causing precache step to fail.
> Additionally, partial-downloads and stale asset caches that fail to
> resolve after the retry limit are now treated as transient and do not
> cause precache step to fail.
> 
> For background: The NetBSD archive is, at the time of writing, failing
> with short transfer. Retrying the fetch at that position (as wget does)
> results in a "503 backend unavailable" error. We would like to get that
> error code directly, but I have not found a way to do that with urllib,
> so treating the short-copy as a transient failure covers that case (and
> seems like a reasonable way to handle it in general).
> 
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
>   tests/functional/qemu_test/asset.py | 41 ++++++++++++++++++-----------
>   1 file changed, 26 insertions(+), 15 deletions(-)

Reviewed-by: Thomas Huth <thuth@redhat.com>
diff mbox series

Patch

diff --git a/tests/functional/qemu_test/asset.py b/tests/functional/qemu_test/asset.py
index d34e8f5e2ad..69b7a5ecb0e 100644
--- a/tests/functional/qemu_test/asset.py
+++ b/tests/functional/qemu_test/asset.py
@@ -17,6 +17,14 @@ 
 from shutil import copyfileobj
 from urllib.error import HTTPError
 
+class AssetError(Exception):
+    def __init__(self, asset, msg, transient=False):
+        self.url = asset.url
+        self.msg = msg
+        self.transient = transient
+
+    def __str__(self):
+        return "%s: %s" % (self.url, self.msg)
 
 # Instances of this class must be declared as class level variables
 # starting with a name "ASSET_". This enables the pre-caching logic
@@ -51,7 +59,7 @@  def _check(self, cache_file):
         elif len(self.hash) == 128:
             hl = hashlib.sha512()
         else:
-            raise Exception("unknown hash type")
+            raise AssetError(self, "unknown hash type")
 
         # Calculate the hash of the file:
         with open(cache_file, 'rb') as file:
@@ -111,14 +119,16 @@  def fetch(self):
             return str(self.cache_file)
 
         if not self.fetchable():
-            raise Exception("Asset cache is invalid and downloads disabled")
+            raise AssetError(self,
+                             "Asset cache is invalid and downloads disabled")
 
         self.log.info("Downloading %s to %s...", self.url, self.cache_file)
         tmp_cache_file = self.cache_file.with_suffix(".download")
 
         for retries in range(4):
             if retries == 3:
-                raise Exception("Retries exceeded downloading %s", self.url)
+                raise AssetError(self, "Download retries exceeded",
+                                 transient=True)
 
             try:
                 with tmp_cache_file.open("xb") as dst:
@@ -152,10 +162,17 @@  def fetch(self):
                                tmp_cache_file)
                 tmp_cache_file.unlink()
                 continue
+            except HTTPError as e:
+                tmp_cache_file.unlink()
+                # Treat 404 as fatal, since it is highly likely to
+                # indicate a broken test rather than a transient
+                # server or networking problem
+                raise AssetError(self, "Unable to download: "
+                                 "HTTP error %d" % e.code,
+                                 transient = e.code != 404)
             except Exception as e:
-                self.log.error("Unable to download %s: %s", self.url, e)
                 tmp_cache_file.unlink()
-                raise
+                raise AssetError(self, "Unable to download: " % e)
 
         try:
             # Set these just for informational purposes
@@ -169,8 +186,7 @@  def fetch(self):
 
         if not self._check(tmp_cache_file):
             tmp_cache_file.unlink()
-            raise Exception("Hash of %s does not match %s" %
-                            (self.url, self.hash))
+            raise AssetError(self, "Hash does not match %s" % self.hash)
         tmp_cache_file.replace(self.cache_file)
         # Remove write perms to stop tests accidentally modifying them
         os.chmod(self.cache_file, stat.S_IRUSR | stat.S_IRGRP)
@@ -192,15 +208,10 @@  def precache_test(test):
                 log.info("Attempting to cache '%s'" % asset)
                 try:
                     asset.fetch()
-                except HTTPError as e:
-                    # Treat 404 as fatal, since it is highly likely to
-                    # indicate a broken test rather than a transient
-                    # server or networking problem
-                    if e.code == 404:
+                except AssetError as e:
+                    if not e.transient:
                         raise
-
-                    log.debug(f"HTTP error {e.code} from {asset.url} " +
-                              "skipping asset precache")
+                    log.error("%s: skipping asset precache" % e)
 
         log.removeHandler(handler)