From eac67a908d9b788fb7c4004c87b40b7d99a209ee Mon Sep 17 00:00:00 2001 From: jaimergp Date: Fri, 12 Apr 2024 17:39:35 +0200 Subject: [PATCH 01/33] add content_sha256 hash checks --- conda_build/source.py | 22 +++++++++++++++---- conda_build/utils.py | 17 +++++++++++++- .../metadata/source_url/meta.yaml | 1 + 3 files changed, 35 insertions(+), 5 deletions(-) diff --git a/conda_build/source.py b/conda_build/source.py index 984fb239e8..68cc1e8e3d 100644 --- a/conda_build/source.py +++ b/conda_build/source.py @@ -25,6 +25,7 @@ LoggingContext, check_call_env, check_output_env, + compute_content_hash, convert_path_for_cygwin_or_msys2, convert_unix_path_to_win, copy_into, @@ -44,6 +45,8 @@ git_submod_re = re.compile(r"(?:.+)\.(.+)\.(?:.+)\s(.+)") ext_re = re.compile(r"(.*?)(\.(?:tar\.)?[^.]+)$") +HASH_KEYS = ("md5", "sha1", "sha256") +CONTENT_HASH_KEYS = ("content_md5", "content_sha1", "content_sha256") def append_hash_to_fn(fn, hash_value): @@ -64,7 +67,7 @@ def download_to_cache(cache_folder, recipe_path, source_dict, verbose=False): source_dict["fn"] if "fn" in source_dict else basename(source_urls[0]) ) hash_added = False - for hash_type in ("md5", "sha1", "sha256"): + for hash_type in HASH_KEYS: if hash_type in source_dict: if source_dict[hash_type] in (None, ""): raise ValueError(f"Empty {hash_type} hash provided for {fn}") @@ -73,7 +76,7 @@ def download_to_cache(cache_folder, recipe_path, source_dict, verbose=False): break else: log.warn( - f"No hash (md5, sha1, sha256) provided for {unhashed_fn}. Source download forced. " + f"No hash {HASH_KEYS} provided for {unhashed_fn}. Source download forced. " "Add hash to recipe to use source cache." ) path = join(cache_folder, fn) @@ -114,7 +117,7 @@ def download_to_cache(cache_folder, recipe_path, source_dict, verbose=False): raise RuntimeError("Could not download %s" % url) hashed = None - for tp in ("md5", "sha1", "sha256"): + for tp in HASH_KEYS: if tp in source_dict: expected_hash = source_dict[tp] hashed = compute_sum(path, tp) @@ -1026,7 +1029,7 @@ def provide(metadata): git = None try: - for source_dict in metadata.get_section("source"): + for idx, source_dict in enumerate(metadata.get_section("source")): folder = source_dict.get("folder") src_dir = os.path.join(metadata.config.work_dir, folder if folder else "") if any(k in source_dict for k in ("fn", "url")): @@ -1105,6 +1108,17 @@ def provide(metadata): if not isdir(src_dir): os.makedirs(src_dir) + for hash_type in CONTENT_HASH_KEYS: + if hash_type in source_dict: + expected_content_hash = source_dict[hash_type] + if expected_content_hash in (None, ""): + raise ValueError(f"Empty {hash_type} hash provided for source item #{idx}") + obtained_content_hash = compute_content_hash(src_dir, hash_type) + if expected_content_hash != obtained_content_hash: + raise RuntimeError( + f"{hash_type.upper()} mismatch: " + f"'{obtained_content_hash}' != '{expected_content_hash}'" + ) patches = ensure_list(source_dict.get("patches", [])) patch_attributes_output = [] for patch in patches: diff --git a/conda_build/utils.py b/conda_build/utils.py index 05b0d827ff..20dc3015dd 100644 --- a/conda_build/utils.py +++ b/conda_build/utils.py @@ -21,7 +21,7 @@ import urllib.parse as urlparse import urllib.request as urllib from collections import OrderedDict, defaultdict -from functools import lru_cache +from functools import lru_cache, partial from glob import glob from itertools import filterfalse from json.decoder import JSONDecodeError @@ -2017,6 +2017,21 @@ def sha256_checksum(filename, buffersize=65536): return sha256.hexdigest() +def compute_content_hash(directory, algorithm="sha256"): + "Compute the hash of the recursively traversed and sorted contents of a directory." + log = get_logger(__name__) + files = sorted(glob("**", root_dir=directory, recursive=True, include_hidden=True)) + hasher = hashlib.new(algorithm) + for path in files: + try: + with open(path, "rb") as fh: + for chunk in iter(partial(fh.read, 8192), b""): + hasher.update(chunk) + except OSError as exc: + log.debug("Skipping %s for hashing", path, exc_info=exc) + return hasher.hexdigest() + + def write_bat_activation_text(file_handle, m): from .os_utils.external import find_executable diff --git a/tests/test-recipes/metadata/source_url/meta.yaml b/tests/test-recipes/metadata/source_url/meta.yaml index e8fc55dad7..2a6b60c709 100644 --- a/tests/test-recipes/metadata/source_url/meta.yaml +++ b/tests/test-recipes/metadata/source_url/meta.yaml @@ -8,6 +8,7 @@ source: md5: 0bf1f3598a659a0e8fb5ee6bbb3fd9fd sha1: c464a8995ad6bbf0480abd2883876cc9b4913fa7 sha256: f82b0bd5c809c9a7c7256c26364a0065e57732788b7a74c7ea2169135ed2f598 + content_sha256: "pending calculation" requirements: build: From af571afdcf22a9e3f6d3810d25dfb1e85d710423 Mon Sep 17 00:00:00 2001 From: jaimergp Date: Fri, 12 Apr 2024 18:41:46 +0200 Subject: [PATCH 02/33] fix algo id --- conda_build/source.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/conda_build/source.py b/conda_build/source.py index 68cc1e8e3d..1c661552d1 100644 --- a/conda_build/source.py +++ b/conda_build/source.py @@ -1113,7 +1113,8 @@ def provide(metadata): expected_content_hash = source_dict[hash_type] if expected_content_hash in (None, ""): raise ValueError(f"Empty {hash_type} hash provided for source item #{idx}") - obtained_content_hash = compute_content_hash(src_dir, hash_type) + algorithm = hash_type[len("content_"): ] + obtained_content_hash = compute_content_hash(src_dir, algorithm) if expected_content_hash != obtained_content_hash: raise RuntimeError( f"{hash_type.upper()} mismatch: " From 08d769107bbf36356d4a4c8fb7fa3749b9dc26b4 Mon Sep 17 00:00:00 2001 From: jaimergp Date: Fri, 12 Apr 2024 18:42:15 +0200 Subject: [PATCH 03/33] pre-commit --- conda_build/source.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/conda_build/source.py b/conda_build/source.py index 1c661552d1..0fdbd430fb 100644 --- a/conda_build/source.py +++ b/conda_build/source.py @@ -1112,8 +1112,10 @@ def provide(metadata): if hash_type in source_dict: expected_content_hash = source_dict[hash_type] if expected_content_hash in (None, ""): - raise ValueError(f"Empty {hash_type} hash provided for source item #{idx}") - algorithm = hash_type[len("content_"): ] + raise ValueError( + f"Empty {hash_type} hash provided for source item #{idx}" + ) + algorithm = hash_type[len("content_") :] obtained_content_hash = compute_content_hash(src_dir, algorithm) if expected_content_hash != obtained_content_hash: raise RuntimeError( From 19235f1a930c91886e5c9f155f68122bd7875b19 Mon Sep 17 00:00:00 2001 From: jaimergp Date: Sat, 13 Apr 2024 14:01:58 +0200 Subject: [PATCH 04/33] extend tests and include path, type and executable bit in the hash --- conda_build/utils.py | 32 +++++++++++++------ .../metadata/source_url/meta.yaml | 23 +++++++++---- 2 files changed, 40 insertions(+), 15 deletions(-) diff --git a/conda_build/utils.py b/conda_build/utils.py index 20dc3015dd..104e6bb771 100644 --- a/conda_build/utils.py +++ b/conda_build/utils.py @@ -2018,17 +2018,31 @@ def sha256_checksum(filename, buffersize=65536): def compute_content_hash(directory, algorithm="sha256"): - "Compute the hash of the recursively traversed and sorted contents of a directory." + """ + Compute the hash of the recursively traversed and sorted contents of a directory. + The hash will include these elements, in this order: + - Relative path to 'directory' + - Type of file / path + - Executable bit(s) of the file / path + - Contents of the file, if readable. Following symlinks. + """ log = get_logger(__name__) - files = sorted(glob("**", root_dir=directory, recursive=True, include_hidden=True)) hasher = hashlib.new(algorithm) - for path in files: - try: - with open(path, "rb") as fh: - for chunk in iter(partial(fh.read, 8192), b""): - hasher.update(chunk) - except OSError as exc: - log.debug("Skipping %s for hashing", path, exc_info=exc) + for entry in sorted(os.scandir(directory), key=lambda f: f.name): + # encode the relative path to directory, for files, dirs and others + hasher.update(entry.name.encode("utf-8")) + st_mode = entry.stat().st_mode + file_type = stat.S_IFMT(st_mode) + hasher.update(file_type.to_bytes(2, "little")) + executable = file_type & (stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH) + hasher.update(executable.to_bytes(2, "little")) + if entry.is_file(): + try: + with open(entry.path, "rb") as fh: + for chunk in iter(partial(fh.read, 8192), b""): + hasher.update(chunk) + except OSError as exc: + log.debug("Skipping %s for hashing", entry.name, exc_info=exc) return hasher.hexdigest() diff --git a/tests/test-recipes/metadata/source_url/meta.yaml b/tests/test-recipes/metadata/source_url/meta.yaml index 2a6b60c709..9f1dd6d18c 100644 --- a/tests/test-recipes/metadata/source_url/meta.yaml +++ b/tests/test-recipes/metadata/source_url/meta.yaml @@ -3,12 +3,23 @@ package: version: 1.0 source: - fn: conda-build-1.8.1.tar.gz - url: https://github.com/conda/conda-build/archive/1.8.1.tar.gz - md5: 0bf1f3598a659a0e8fb5ee6bbb3fd9fd - sha1: c464a8995ad6bbf0480abd2883876cc9b4913fa7 - sha256: f82b0bd5c809c9a7c7256c26364a0065e57732788b7a74c7ea2169135ed2f598 - content_sha256: "pending calculation" + - fn: conda-build-1.8.1.tar.gz + url: https://github.com/conda/conda-build/archive/1.8.1.tar.gz + md5: 0bf1f3598a659a0e8fb5ee6bbb3fd9fd + sha1: c464a8995ad6bbf0480abd2883876cc9b4913fa7 + sha256: f82b0bd5c809c9a7c7256c26364a0065e57732788b7a74c7ea2169135ed2f598 + content_md5: 1088a8437a8317991713f6442b601c32 + content_sha1: 501514b1b9b6b1558698ac88a15940b2edb6eeef + content_sha256: fed99dc3815124b2fc896c022797dfa3d844b86b1de43581f784b37bd80144a6 + # This is the same tarball but compressed differently. They should have the same content hashes! + - fn: conda-build-1.8.1.zip + url: https://github.com/conda/conda-build/archive/1.8.1.zip + md5: 25d59bc816f3d1107f063d77ddfcbe76 + sha1: 195104165d395a92c7ecd4c7f98975906950b9dd + sha256: 6d142da3f0f47613d1d0124ec8caf0faf66ec524e6aa356ac49987c3e32b6d95 + content_md5: 1088a8437a8317991713f6442b601c32 + content_sha1: 501514b1b9b6b1558698ac88a15940b2edb6eeef + content_sha256: fed99dc3815124b2fc896c022797dfa3d844b86b1de43581f784b37bd80144a6 requirements: build: From 704ba21c884b55399efd0786ae4d076ad3ad0c13 Mon Sep 17 00:00:00 2001 From: jaimergp Date: Mon, 15 Apr 2024 08:34:42 +0200 Subject: [PATCH 05/33] make it cross-platform --- conda_build/utils.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/conda_build/utils.py b/conda_build/utils.py index 104e6bb771..0228ea7f06 100644 --- a/conda_build/utils.py +++ b/conda_build/utils.py @@ -2017,32 +2017,36 @@ def sha256_checksum(filename, buffersize=65536): return sha256.hexdigest() -def compute_content_hash(directory, algorithm="sha256"): +def compute_content_hash(directory: str, algorithm="sha256"): """ Compute the hash of the recursively traversed and sorted contents of a directory. The hash will include these elements, in this order: - - Relative path to 'directory' - - Type of file / path - - Executable bit(s) of the file / path - - Contents of the file, if readable. Following symlinks. + - Relative path to 'directory', normalized (backslash as forward slashes). + - Empty string, as a separator. + - "Contents" of the path: + - If the path is directory or symlink, use the bytes for "directory" and "symlink", + respectively. + - If the path is a file cand can be read, the contents of the file. + - Empty string, as a separator. """ log = get_logger(__name__) hasher = hashlib.new(algorithm) for entry in sorted(os.scandir(directory), key=lambda f: f.name): # encode the relative path to directory, for files, dirs and others - hasher.update(entry.name.encode("utf-8")) - st_mode = entry.stat().st_mode - file_type = stat.S_IFMT(st_mode) - hasher.update(file_type.to_bytes(2, "little")) - executable = file_type & (stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH) - hasher.update(executable.to_bytes(2, "little")) - if entry.is_file(): + hasher.update(entry.name.replace("\\", "/").encode("utf-8")) + hasher.update(b"") + if entry.is_dir(follow_symlinks=False): + hasher.update(b"directory") + elif entry.is_symlink(): + hasher.update(b"symlink") + elif entry.is_file(): try: with open(entry.path, "rb") as fh: for chunk in iter(partial(fh.read, 8192), b""): hasher.update(chunk) except OSError as exc: log.debug("Skipping %s for hashing", entry.name, exc_info=exc) + hasher.update(b"") return hasher.hexdigest() From 0426db24831d2410f8f20845bd3061638fe8b8d9 Mon Sep 17 00:00:00 2001 From: jaimergp Date: Mon, 15 Apr 2024 08:34:46 +0200 Subject: [PATCH 06/33] add news --- news/5277-content-hash | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 news/5277-content-hash diff --git a/news/5277-content-hash b/news/5277-content-hash new file mode 100644 index 0000000000..cc6d357dc1 --- /dev/null +++ b/news/5277-content-hash @@ -0,0 +1,20 @@ +### Enhancements + +* Add new hashing methods (`content_md5`, `content_sha1`, `content_sha256`) to calculate the + checksum of the extracted contents of the downloaded artifacts. (#4821 via #5277) + +### Bug fixes + +* + +### Deprecations + +* + +### Docs + +* + +### Other + +* From 47fe18d95066aad8b79fc00360d47a487e22d06b Mon Sep 17 00:00:00 2001 From: jaimergp Date: Mon, 15 Apr 2024 08:41:47 +0200 Subject: [PATCH 07/33] use dash separator --- conda_build/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/conda_build/utils.py b/conda_build/utils.py index 0228ea7f06..4076ed46f2 100644 --- a/conda_build/utils.py +++ b/conda_build/utils.py @@ -2027,14 +2027,14 @@ def compute_content_hash(directory: str, algorithm="sha256"): - If the path is directory or symlink, use the bytes for "directory" and "symlink", respectively. - If the path is a file cand can be read, the contents of the file. - - Empty string, as a separator. + - Single dash string, as a separator. """ log = get_logger(__name__) hasher = hashlib.new(algorithm) for entry in sorted(os.scandir(directory), key=lambda f: f.name): # encode the relative path to directory, for files, dirs and others hasher.update(entry.name.replace("\\", "/").encode("utf-8")) - hasher.update(b"") + hasher.update(b"-") if entry.is_dir(follow_symlinks=False): hasher.update(b"directory") elif entry.is_symlink(): @@ -2046,7 +2046,7 @@ def compute_content_hash(directory: str, algorithm="sha256"): hasher.update(chunk) except OSError as exc: log.debug("Skipping %s for hashing", entry.name, exc_info=exc) - hasher.update(b"") + hasher.update(b"-") return hasher.hexdigest() From ab810a4b4b5e9eea030c9ba04739303a940e1ff4 Mon Sep 17 00:00:00 2001 From: jaimergp Date: Mon, 15 Apr 2024 08:41:54 +0200 Subject: [PATCH 08/33] update hashes --- tests/test-recipes/metadata/source_url/meta.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/test-recipes/metadata/source_url/meta.yaml b/tests/test-recipes/metadata/source_url/meta.yaml index 9f1dd6d18c..6e74155501 100644 --- a/tests/test-recipes/metadata/source_url/meta.yaml +++ b/tests/test-recipes/metadata/source_url/meta.yaml @@ -8,18 +8,18 @@ source: md5: 0bf1f3598a659a0e8fb5ee6bbb3fd9fd sha1: c464a8995ad6bbf0480abd2883876cc9b4913fa7 sha256: f82b0bd5c809c9a7c7256c26364a0065e57732788b7a74c7ea2169135ed2f598 - content_md5: 1088a8437a8317991713f6442b601c32 - content_sha1: 501514b1b9b6b1558698ac88a15940b2edb6eeef - content_sha256: fed99dc3815124b2fc896c022797dfa3d844b86b1de43581f784b37bd80144a6 + content_md5: 5b9edf6a1a8c9bc02f4562cd4a2ad646 + content_sha1: 2bffd9abf485e0c6abada46e1bdbdc510356cb9b + content_sha256: a30338559d8947599dacab14774c14ec7ee587276bad23374b114c17761d77dd # This is the same tarball but compressed differently. They should have the same content hashes! - fn: conda-build-1.8.1.zip url: https://github.com/conda/conda-build/archive/1.8.1.zip md5: 25d59bc816f3d1107f063d77ddfcbe76 sha1: 195104165d395a92c7ecd4c7f98975906950b9dd sha256: 6d142da3f0f47613d1d0124ec8caf0faf66ec524e6aa356ac49987c3e32b6d95 - content_md5: 1088a8437a8317991713f6442b601c32 - content_sha1: 501514b1b9b6b1558698ac88a15940b2edb6eeef - content_sha256: fed99dc3815124b2fc896c022797dfa3d844b86b1de43581f784b37bd80144a6 + content_md5: 5b9edf6a1a8c9bc02f4562cd4a2ad646 + content_sha1: 2bffd9abf485e0c6abada46e1bdbdc510356cb9b + content_sha256: a30338559d8947599dacab14774c14ec7ee587276bad23374b114c17761d77dd requirements: build: From 4e0f6dd110b871a2eeef4ccaa78cdeb13a86e7f7 Mon Sep 17 00:00:00 2001 From: jaimergp Date: Tue, 18 Jun 2024 20:48:19 +0200 Subject: [PATCH 09/33] Update source.py --- conda_build/source.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/conda_build/source.py b/conda_build/source.py index 488dd149df..4f87234d2e 100644 --- a/conda_build/source.py +++ b/conda_build/source.py @@ -77,14 +77,11 @@ def download_to_cache(cache_folder, recipe_path, source_dict, verbose=False): hash_added = True break else: -<<<<<<< content-hash - log.warn( - f"No hash {HASH_KEYS} provided for {unhashed_fn}. Source download forced. " -======= log.warning( - f"No hash (md5, sha1, sha256) provided for {unhashed_fn}. Source download forced. " ->>>>>>> main - "Add hash to recipe to use source cache." + "No hash %s provided for %s. Source download forced. " + "Add hash to recipe to use source cache.", + HASH_KEYS, + unhashed_fn, ) path = join(cache_folder, fn) if isfile(path): From 1439e4eebc5b069a48842625049dcc450d4a299b Mon Sep 17 00:00:00 2001 From: jaimergp Date: Tue, 19 Nov 2024 23:48:30 +0100 Subject: [PATCH 10/33] change algorithm a bit and update tests --- conda_build/source.py | 10 ++-- conda_build/utils.py | 49 +++++++++++++------ .../metadata/source_url/meta.yaml | 21 +++++--- 3 files changed, 55 insertions(+), 25 deletions(-) diff --git a/conda_build/source.py b/conda_build/source.py index f52bdce16f..6728b4cb97 100644 --- a/conda_build/source.py +++ b/conda_build/source.py @@ -1116,17 +1116,21 @@ def provide(metadata): for hash_type in CONTENT_HASH_KEYS: if hash_type in source_dict: + skip_glob = ".git/*" if "git_url" in source_dict else "" expected_content_hash = source_dict[hash_type] if expected_content_hash in (None, ""): raise ValueError( f"Empty {hash_type} hash provided for source item #{idx}" ) algorithm = hash_type[len("content_") :] - obtained_content_hash = compute_content_hash(src_dir, algorithm) + obtained_content_hash = compute_content_hash( + src_dir, algorithm, skip=skip_glob + ) if expected_content_hash != obtained_content_hash: raise RuntimeError( - f"{hash_type.upper()} mismatch: " - f"'{obtained_content_hash}' != '{expected_content_hash}'" + f"{hash_type} mismatch: " + f"obtained '{obtained_content_hash}' != " + f"expected '{expected_content_hash}'" ) patches = ensure_list(source_dict.get("patches", [])) patch_attributes_output = [] diff --git a/conda_build/utils.py b/conda_build/utils.py index 34fb1032be..134bca271c 100644 --- a/conda_build/utils.py +++ b/conda_build/utils.py @@ -1987,31 +1987,48 @@ def sha256_checksum(filename, buffersize=65536): return sha256.hexdigest() -def compute_content_hash(directory: str, algorithm="sha256") -> str: +def compute_content_hash(directory: str, algorithm="sha256", skip: str = "") -> str: """ - Compute the hash of the recursively traversed and sorted contents of a directory. - For each path found in 'directory', the hash will include these elements, in this order: - - - UTF-8 encoded bytes for the relative path to 'directory', - normalized (backslash as forward slashes). - - Single dash string, as a separator. - - "Contents" of the path: - - If the path is directory or symlink, use the bytes for "directory" and "symlink", - respectively. - - If the path is a file and can be read, the byte contents of the file. - - Single dash string, as a separator. + Given a directory, recursively scan all its contents (without following symlinks) and sort them + by their full path. For each entry in the contents table, compute the hash for the concatenated + bytes of: + + - UTF-8 encoded path, relative to the input directory. Backslashes are normalized + to forward slashes before encoding. + - Then, depending on the type: + - For regular files, the UTF-8 bytes of an `F` separator, followed by the bytes of its + contents. + - For a directory, the UTF-8 bytes of a `D` separator, and nothing else. + - For a symlink, the UTF-8 bytes of an `L` separator, followed by the UTF-8 encoded bytes + for the path it points to. Backslashes MUST be normalized to forward slashes before + encoding. + - UTF-8 encoded bytes of the string `-`, as separator. + + Parameters + ---------- + directory: The path whose contents will be hashed + algorithm: Name of the algorithm to be used, as expected by `hashlib.new()` + skip: `fnmatch.fnmatchase`-compatible glob. If matched, that path won't be included in the hash. + + Returns + ------- + str + The hexdigest of the computed hash, as described above. """ log = get_logger(__name__) hasher = hashlib.new(algorithm) for entry in sorted(os.scandir(directory), key=lambda f: f.name): + if skip and fnmatch.fnmatchcase(entry.name, skip): + continue # encode the relative path to directory, for files, dirs and others hasher.update(entry.name.replace("\\", "/").encode("utf-8")) - hasher.update(b"-") if entry.is_dir(follow_symlinks=False): - hasher.update(b"directory") + hasher.update(b"D") elif entry.is_symlink(): - hasher.update(b"symlink") + hasher.update(b"L") + hasher.update(os.readlink(entry).replace("\\", "/").encode("utf-8")) elif entry.is_file(): + hasher.update(b"F") try: with open(entry.path, "rb") as fh: for chunk in iter(partial(fh.read, 8192), b""): @@ -2019,7 +2036,7 @@ def compute_content_hash(directory: str, algorithm="sha256") -> str: except OSError as exc: log.debug("Skipping %s for hashing", entry.name, exc_info=exc) else: - log.debug("Can't detect type for path %s", entry) + log.debug("Can't detect type for path %s. Skipping...", entry) hasher.update(b"-") return hasher.hexdigest() diff --git a/tests/test-recipes/metadata/source_url/meta.yaml b/tests/test-recipes/metadata/source_url/meta.yaml index 6e74155501..52f5343f19 100644 --- a/tests/test-recipes/metadata/source_url/meta.yaml +++ b/tests/test-recipes/metadata/source_url/meta.yaml @@ -4,22 +4,31 @@ package: source: - fn: conda-build-1.8.1.tar.gz + folder: conda-build-tar-gz url: https://github.com/conda/conda-build/archive/1.8.1.tar.gz md5: 0bf1f3598a659a0e8fb5ee6bbb3fd9fd sha1: c464a8995ad6bbf0480abd2883876cc9b4913fa7 sha256: f82b0bd5c809c9a7c7256c26364a0065e57732788b7a74c7ea2169135ed2f598 - content_md5: 5b9edf6a1a8c9bc02f4562cd4a2ad646 - content_sha1: 2bffd9abf485e0c6abada46e1bdbdc510356cb9b - content_sha256: a30338559d8947599dacab14774c14ec7ee587276bad23374b114c17761d77dd + content_md5: 393d42eecfec86558640786775299a69 + content_sha1: 2479df3c0df8f462bfa4358717b1c593a8a268de + content_sha256: fc610a55567f605c5d60453058426389ea9baddea098436acfeb4370c180563a # This is the same tarball but compressed differently. They should have the same content hashes! - fn: conda-build-1.8.1.zip + folder: conda-build-zip url: https://github.com/conda/conda-build/archive/1.8.1.zip md5: 25d59bc816f3d1107f063d77ddfcbe76 sha1: 195104165d395a92c7ecd4c7f98975906950b9dd sha256: 6d142da3f0f47613d1d0124ec8caf0faf66ec524e6aa356ac49987c3e32b6d95 - content_md5: 5b9edf6a1a8c9bc02f4562cd4a2ad646 - content_sha1: 2bffd9abf485e0c6abada46e1bdbdc510356cb9b - content_sha256: a30338559d8947599dacab14774c14ec7ee587276bad23374b114c17761d77dd + content_md5: 393d42eecfec86558640786775299a69 + content_sha1: 2479df3c0df8f462bfa4358717b1c593a8a268de + content_sha256: fc610a55567f605c5d60453058426389ea9baddea098436acfeb4370c180563a + # This is the same tag as above, but cloned directly. They should have the same content hashes! + - folder: conda-build-git + git_url: https://github.com/conda/conda-build.git + git_rev: "1.8.1" + content_md5: 393d42eecfec86558640786775299a69 + content_sha1: 2479df3c0df8f462bfa4358717b1c593a8a268de + content_sha256: fc610a55567f605c5d60453058426389ea9baddea098436acfeb4370c180563a requirements: build: From 4f4178b0dafa6534807b74ad130ca917032e3a74 Mon Sep 17 00:00:00 2001 From: jaimergp Date: Wed, 20 Nov 2024 01:28:59 +0100 Subject: [PATCH 11/33] move to Path.rglob() and allow skips --- conda_build/source.py | 7 ++++--- conda_build/utils.py | 42 +++++++++++++++++++++++++++++------------- 2 files changed, 33 insertions(+), 16 deletions(-) diff --git a/conda_build/source.py b/conda_build/source.py index 6728b4cb97..f6126fabf6 100644 --- a/conda_build/source.py +++ b/conda_build/source.py @@ -1116,7 +1116,6 @@ def provide(metadata): for hash_type in CONTENT_HASH_KEYS: if hash_type in source_dict: - skip_glob = ".git/*" if "git_url" in source_dict else "" expected_content_hash = source_dict[hash_type] if expected_content_hash in (None, ""): raise ValueError( @@ -1124,11 +1123,13 @@ def provide(metadata): ) algorithm = hash_type[len("content_") :] obtained_content_hash = compute_content_hash( - src_dir, algorithm, skip=skip_glob + src_dir, + algorithm, + skip=ensure_list(source_dict.get("content_hash_skip") or ()) ) if expected_content_hash != obtained_content_hash: raise RuntimeError( - f"{hash_type} mismatch: " + f"{hash_type} mismatch in source item #{idx}: " f"obtained '{obtained_content_hash}' != " f"expected '{expected_content_hash}'" ) diff --git a/conda_build/utils.py b/conda_build/utils.py index 134bca271c..fed46d121a 100644 --- a/conda_build/utils.py +++ b/conda_build/utils.py @@ -1987,7 +1987,9 @@ def sha256_checksum(filename, buffersize=65536): return sha256.hexdigest() -def compute_content_hash(directory: str, algorithm="sha256", skip: str = "") -> str: +def compute_content_hash( + directory: str | Path, algorithm="sha256", skip: Iterable[str] = () +) -> str: """ Given a directory, recursively scan all its contents (without following symlinks) and sort them by their full path. For each entry in the contents table, compute the hash for the concatenated @@ -2008,7 +2010,9 @@ def compute_content_hash(directory: str, algorithm="sha256", skip: str = "") -> ---------- directory: The path whose contents will be hashed algorithm: Name of the algorithm to be used, as expected by `hashlib.new()` - skip: `fnmatch.fnmatchase`-compatible glob. If matched, that path won't be included in the hash. + skip: iterable of paths that should not be checked. If a path ends with a slash, it's + interpreted as a directory that won't be traversed. It matches the relative paths + already slashed-normalized. Returns ------- @@ -2017,26 +2021,38 @@ def compute_content_hash(directory: str, algorithm="sha256", skip: str = "") -> """ log = get_logger(__name__) hasher = hashlib.new(algorithm) - for entry in sorted(os.scandir(directory), key=lambda f: f.name): - if skip and fnmatch.fnmatchcase(entry.name, skip): + for path in sorted(Path(directory).rglob("*")): + relpath = path.relative_to(directory) + relpathstr = str(relpath).replace("\\", "/") + if skip and any( + ( + # Skip directories like .git/ + skip_item.endswith("/") + and relpathstr.startswith(skip_item) + or f"{relpathstr}/" == skip_item + ) + # Skip full relpath match + or relpathstr == skip_item + for skip_item in skip + ): continue # encode the relative path to directory, for files, dirs and others - hasher.update(entry.name.replace("\\", "/").encode("utf-8")) - if entry.is_dir(follow_symlinks=False): - hasher.update(b"D") - elif entry.is_symlink(): + hasher.update(relpathstr.encode("utf-8")) + if path.is_symlink(): hasher.update(b"L") - hasher.update(os.readlink(entry).replace("\\", "/").encode("utf-8")) - elif entry.is_file(): + hasher.update(str(path.readlink()).replace("\\", "/").encode("utf-8")) + elif path.is_dir(): + hasher.update(b"D") + elif path.is_file(): hasher.update(b"F") try: - with open(entry.path, "rb") as fh: + with open(path, "rb") as fh: for chunk in iter(partial(fh.read, 8192), b""): hasher.update(chunk) except OSError as exc: - log.debug("Skipping %s for hashing", entry.name, exc_info=exc) + log.debug("Skipping %s for hashing", path.name, exc_info=exc) else: - log.debug("Can't detect type for path %s. Skipping...", entry) + log.debug("Can't detect type for path %s. Skipping...", path) hasher.update(b"-") return hasher.hexdigest() From 190e1207d2819a8be07e74da6d98c38fe69b9545 Mon Sep 17 00:00:00 2001 From: jaimergp Date: Wed, 20 Nov 2024 01:29:07 +0100 Subject: [PATCH 12/33] register new keys --- conda_build/metadata.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/conda_build/metadata.py b/conda_build/metadata.py index 5cc5bc129a..bad85cb16e 100644 --- a/conda_build/metadata.py +++ b/conda_build/metadata.py @@ -567,6 +567,10 @@ def parse(data, config, path=None): "md5": str, "sha1": None, "sha256": None, + "content_md5": str, + "content_sha1": None, + "content_sha256": None, + "content_hash_skip": list, "path": str, "path_via_symlink": None, "git_url": str, From 4b3d56d703c28391df5baadf23ed04c066247f0e Mon Sep 17 00:00:00 2001 From: jaimergp Date: Wed, 20 Nov 2024 01:29:15 +0100 Subject: [PATCH 13/33] update recipe --- .../test-recipes/metadata/source_url/bld.bat | 1 + .../test-recipes/metadata/source_url/build.sh | 1 + .../metadata/source_url/meta.yaml | 25 ++++++++++++------- 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/tests/test-recipes/metadata/source_url/bld.bat b/tests/test-recipes/metadata/source_url/bld.bat index 18d415c28f..8db8556ec5 100644 --- a/tests/test-recipes/metadata/source_url/bld.bat +++ b/tests/test-recipes/metadata/source_url/bld.bat @@ -1,3 +1,4 @@ +cd conda-build-tar-gz set PYTHONPATH=. python -c "import conda_build; assert conda_build.__version__ == 'tag: 1.8.1'" if errorlevel 1 exit 1 diff --git a/tests/test-recipes/metadata/source_url/build.sh b/tests/test-recipes/metadata/source_url/build.sh index fd284b9f64..da83c6822a 100644 --- a/tests/test-recipes/metadata/source_url/build.sh +++ b/tests/test-recipes/metadata/source_url/build.sh @@ -1,2 +1,3 @@ +cd conda-build-tar-gz # Not sure how versioneer comes up with this version PYTHONPATH=. python -c "import conda_build; assert conda_build.__version__ == 'tag: 1.8.1'" diff --git a/tests/test-recipes/metadata/source_url/meta.yaml b/tests/test-recipes/metadata/source_url/meta.yaml index 52f5343f19..8c622f4de1 100644 --- a/tests/test-recipes/metadata/source_url/meta.yaml +++ b/tests/test-recipes/metadata/source_url/meta.yaml @@ -9,9 +9,11 @@ source: md5: 0bf1f3598a659a0e8fb5ee6bbb3fd9fd sha1: c464a8995ad6bbf0480abd2883876cc9b4913fa7 sha256: f82b0bd5c809c9a7c7256c26364a0065e57732788b7a74c7ea2169135ed2f598 - content_md5: 393d42eecfec86558640786775299a69 - content_sha1: 2479df3c0df8f462bfa4358717b1c593a8a268de - content_sha256: fc610a55567f605c5d60453058426389ea9baddea098436acfeb4370c180563a + content_md5: 4d6349dbe4bb1430dc2155a1895e84ce + content_sha1: 9eceebcd86adcf64c2dcd31e36058a377ae2e8cf + content_sha256: 0e3d93b4ba3e6e156a2fc365f5825f0079403cd23f00f89aba21c091e4b0f41a + content_hash_skip: + - conda_build/_version.py # This is the same tarball but compressed differently. They should have the same content hashes! - fn: conda-build-1.8.1.zip folder: conda-build-zip @@ -19,16 +21,21 @@ source: md5: 25d59bc816f3d1107f063d77ddfcbe76 sha1: 195104165d395a92c7ecd4c7f98975906950b9dd sha256: 6d142da3f0f47613d1d0124ec8caf0faf66ec524e6aa356ac49987c3e32b6d95 - content_md5: 393d42eecfec86558640786775299a69 - content_sha1: 2479df3c0df8f462bfa4358717b1c593a8a268de - content_sha256: fc610a55567f605c5d60453058426389ea9baddea098436acfeb4370c180563a + content_md5: 4d6349dbe4bb1430dc2155a1895e84ce + content_sha1: 9eceebcd86adcf64c2dcd31e36058a377ae2e8cf + content_sha256: 0e3d93b4ba3e6e156a2fc365f5825f0079403cd23f00f89aba21c091e4b0f41a + content_hash_skip: + - conda_build/_version.py # This is the same tag as above, but cloned directly. They should have the same content hashes! - folder: conda-build-git git_url: https://github.com/conda/conda-build.git git_rev: "1.8.1" - content_md5: 393d42eecfec86558640786775299a69 - content_sha1: 2479df3c0df8f462bfa4358717b1c593a8a268de - content_sha256: fc610a55567f605c5d60453058426389ea9baddea098436acfeb4370c180563a + content_md5: 4d6349dbe4bb1430dc2155a1895e84ce + content_sha1: 9eceebcd86adcf64c2dcd31e36058a377ae2e8cf + content_sha256: 0e3d93b4ba3e6e156a2fc365f5825f0079403cd23f00f89aba21c091e4b0f41a + content_hash_skip: + - .git/ + - conda_build/_version.py requirements: build: From f513069090d7854094927776e2a2f376fc8ffa70 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 20 Nov 2024 00:34:21 +0000 Subject: [PATCH 14/33] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- conda_build/source.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conda_build/source.py b/conda_build/source.py index f6126fabf6..6dd9f9d4a9 100644 --- a/conda_build/source.py +++ b/conda_build/source.py @@ -1125,7 +1125,7 @@ def provide(metadata): obtained_content_hash = compute_content_hash( src_dir, algorithm, - skip=ensure_list(source_dict.get("content_hash_skip") or ()) + skip=ensure_list(source_dict.get("content_hash_skip") or ()), ) if expected_content_hash != obtained_content_hash: raise RuntimeError( From c409505ad3aedf53dd22d5d11faae2d553b7c92c Mon Sep 17 00:00:00 2001 From: jaimergp Date: Wed, 20 Nov 2024 01:51:58 +0100 Subject: [PATCH 15/33] add docs --- docs/source/resources/define-metadata.rst | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/docs/source/resources/define-metadata.rst b/docs/source/resources/define-metadata.rst index 06cc7234cb..7f3f18f8fa 100644 --- a/docs/source/resources/define-metadata.rst +++ b/docs/source/resources/define-metadata.rst @@ -207,6 +207,20 @@ the repository. Using path allows you to build packages with unstaged and uncommitted changes in the working directory. git_url can build only up to the latest commit. +Hashes +------ + +Conda-build can check the integrity of the provided sources +using different hashing algorithms: + +- ``md5``, ``sha1`` and ``sha256`` will check the provided + hexdigest against the downloaded archive, prior to extraction. +- ``content_md5``, ``content_sha1`` and ``content_sha256`` will + check the provided hexdigest against the contents of the + (extracted) directory. ``content_hash_skip`` can take a list of + relative files and directories to be ignored during the check + (e.g. useful to ignore the ``.git/`` directory when ``git_url`` + is used to clone a repository). Patches ------- From 5327e4aa632e2d2aff6e5c76045f9808533eef5f Mon Sep 17 00:00:00 2001 From: jaimergp Date: Wed, 20 Nov 2024 01:52:55 +0100 Subject: [PATCH 16/33] pre-commit --- docs/source/resources/define-metadata.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/resources/define-metadata.rst b/docs/source/resources/define-metadata.rst index 7f3f18f8fa..0580b55d11 100644 --- a/docs/source/resources/define-metadata.rst +++ b/docs/source/resources/define-metadata.rst @@ -215,7 +215,7 @@ using different hashing algorithms: - ``md5``, ``sha1`` and ``sha256`` will check the provided hexdigest against the downloaded archive, prior to extraction. -- ``content_md5``, ``content_sha1`` and ``content_sha256`` will +- ``content_md5``, ``content_sha1`` and ``content_sha256`` will check the provided hexdigest against the contents of the (extracted) directory. ``content_hash_skip`` can take a list of relative files and directories to be ignored during the check From 27b9eafaa2f0fa8ec2fb387fd1d40d7df8df5876 Mon Sep 17 00:00:00 2001 From: jaimergp Date: Wed, 20 Nov 2024 15:56:02 +0100 Subject: [PATCH 17/33] normalize line endings --- conda_build/utils.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/conda_build/utils.py b/conda_build/utils.py index fed46d121a..ea92044dba 100644 --- a/conda_build/utils.py +++ b/conda_build/utils.py @@ -2045,10 +2045,20 @@ def compute_content_hash( hasher.update(b"D") elif path.is_file(): hasher.update(b"F") + # We need to normalize line endings for Windows-Unix compat + # Attempt normalized line-by-line hashing (text mode). If + # Python fails to open in text mode, then it's binary and we hash + # the raw bytes directly. try: - with open(path, "rb") as fh: - for chunk in iter(partial(fh.read, 8192), b""): - hasher.update(chunk) + try: + with open(path) as fh: + for line in fh: + hasher.update(line.replace("\r\n", "\n").encode("utf-8")) + except UnicodeDecodeError: + # file must be binary + with open(path, "rb") as fh: + for chunk in iter(partial(fh.read, 8192), b""): + hasher.update(chunk) except OSError as exc: log.debug("Skipping %s for hashing", path.name, exc_info=exc) else: From 73a23aec9ae3c6f72d0637bac5f280e3a93e85fd Mon Sep 17 00:00:00 2001 From: jaimergp Date: Wed, 20 Nov 2024 16:28:47 +0100 Subject: [PATCH 18/33] prevent partial hash changes in hybrid text/binary files --- conda_build/utils.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/conda_build/utils.py b/conda_build/utils.py index ea92044dba..eaf22e437d 100644 --- a/conda_build/utils.py +++ b/conda_build/utils.py @@ -2051,11 +2051,18 @@ def compute_content_hash( # the raw bytes directly. try: try: + lines = [] with open(path) as fh: for line in fh: - hasher.update(line.replace("\r\n", "\n").encode("utf-8")) + # Accumulate all line-ending normalized lines first + # to make sure the whole file is read. This prevents + # partial updates to the hash with hybrid text/binary + # files (e.g. like the constructor shell installers). + lines.append(line.replace("\r\n", "\n")) + for line in lines: + hasher.update(line.encode("utf-8")) except UnicodeDecodeError: - # file must be binary + # file must be binary, read the bytes directly with open(path, "rb") as fh: for chunk in iter(partial(fh.read, 8192), b""): hasher.update(chunk) From 16260bf433e4b8110916c29df0b868873ac0f4c3 Mon Sep 17 00:00:00 2001 From: jaimergp Date: Sat, 23 Nov 2024 16:02:09 +0100 Subject: [PATCH 19/33] sort by str, not Path --- conda_build/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conda_build/utils.py b/conda_build/utils.py index eaf22e437d..98f66ca7f9 100644 --- a/conda_build/utils.py +++ b/conda_build/utils.py @@ -2021,7 +2021,7 @@ def compute_content_hash( """ log = get_logger(__name__) hasher = hashlib.new(algorithm) - for path in sorted(Path(directory).rglob("*")): + for path in sorted(Path(directory).rglob("*"), key=str): relpath = path.relative_to(directory) relpathstr = str(relpath).replace("\\", "/") if skip and any( From b7f59ef07e38e88bb22647f2b6f92a3d9a77c06e Mon Sep 17 00:00:00 2001 From: jaimergp Date: Sun, 24 Nov 2024 22:58:20 +0100 Subject: [PATCH 20/33] use separate git cache for this one --- .github/workflows/tests.yml | 6 +++--- tests/test_api_build.py | 4 ++++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 3bd38b9e89..b3d71d00fc 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -147,7 +147,7 @@ jobs: run: > pytest --cov=conda_build - -n auto + -n ${{ env.PYTEST_MARKER == 'serial' && 1 || 'auto' }} -m "${{ env.PYTEST_MARKER }}" - name: Upload Coverage @@ -315,7 +315,7 @@ jobs: pytest --cov=conda_build --basetemp=${{ runner.temp }} - -n auto + -n ${{ env.PYTEST_MARKER == 'serial' && 1 || 'auto' }} -m "${{ env.PYTEST_MARKER }}" - name: Upload Coverage @@ -431,7 +431,7 @@ jobs: run: > pytest --cov=conda_build - -n auto + -n ${{ env.PYTEST_MARKER == 'serial' && 1 || 'auto' }} -m "${{ env.PYTEST_MARKER }}" env: CONDA_BUILD_SYSROOT: ${{ env.MACOSX_SDK_ROOT }} diff --git a/tests/test_api_build.py b/tests/test_api_build.py index d6a519cc5a..db3c5880b6 100644 --- a/tests/test_api_build.py +++ b/tests/test_api_build.py @@ -129,6 +129,7 @@ def test_recipe_builds( testing_config, monkeypatch: pytest.MonkeyPatch, conda_build_test_recipe_envvar: str, + tmp_path_factory: pytest.TempPathFactory, ): # TODO: After we fix #3754 this mark can be removed. This specific test # ``source_setup_py_data_subdir`` reproduces the problem. @@ -136,6 +137,9 @@ def test_recipe_builds( pytest.xfail("Issue related to #3754 on conda-build.") elif recipe.name == "unicode_all_over" and context.solver == "libmamba": pytest.xfail("Unicode package names not supported in libmamba.") + elif recipe.name == "source_url": + # Several recipes are cloning conda-build, which causes problems with checkouts + testing_config.git_cache = tmp_path_factory.mktemp("git_cache", numbered=False) # These variables are defined solely for testing purposes, # so they can be checked within build scripts From c9b7e7bd195ee88042e0cbe6af7153851edb0b6c Mon Sep 17 00:00:00 2001 From: jaimergp Date: Sun, 24 Nov 2024 23:58:25 +0100 Subject: [PATCH 21/33] override src_cache_root instead --- tests/test_api_build.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_api_build.py b/tests/test_api_build.py index db3c5880b6..d2ff46aba4 100644 --- a/tests/test_api_build.py +++ b/tests/test_api_build.py @@ -139,7 +139,7 @@ def test_recipe_builds( pytest.xfail("Unicode package names not supported in libmamba.") elif recipe.name == "source_url": # Several recipes are cloning conda-build, which causes problems with checkouts - testing_config.git_cache = tmp_path_factory.mktemp("git_cache", numbered=False) + testing_config.src_cache_root = tmp_path_factory.mktemp("src_cache_root", numbered=False) # These variables are defined solely for testing purposes, # so they can be checked within build scripts From b7a45479bd4278902bfdcfa2b4b6d89996a0b2e7 Mon Sep 17 00:00:00 2001 From: jaimergp Date: Mon, 25 Nov 2024 00:14:51 +0100 Subject: [PATCH 22/33] pre-commit --- tests/test_api_build.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_api_build.py b/tests/test_api_build.py index d2ff46aba4..01011aaf16 100644 --- a/tests/test_api_build.py +++ b/tests/test_api_build.py @@ -139,7 +139,9 @@ def test_recipe_builds( pytest.xfail("Unicode package names not supported in libmamba.") elif recipe.name == "source_url": # Several recipes are cloning conda-build, which causes problems with checkouts - testing_config.src_cache_root = tmp_path_factory.mktemp("src_cache_root", numbered=False) + testing_config.src_cache_root = tmp_path_factory.mktemp( + "src_cache_root", numbered=False + ) # These variables are defined solely for testing purposes, # so they can be checked within build scripts From 50b219ecd711b5878ca79229bb00f9b9b6708ce9 Mon Sep 17 00:00:00 2001 From: jaimergp Date: Mon, 25 Nov 2024 10:06:00 +0100 Subject: [PATCH 23/33] revert --- .github/workflows/tests.yml | 6 +++--- tests/test_api_build.py | 6 ------ 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index b3d71d00fc..3bd38b9e89 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -147,7 +147,7 @@ jobs: run: > pytest --cov=conda_build - -n ${{ env.PYTEST_MARKER == 'serial' && 1 || 'auto' }} + -n auto -m "${{ env.PYTEST_MARKER }}" - name: Upload Coverage @@ -315,7 +315,7 @@ jobs: pytest --cov=conda_build --basetemp=${{ runner.temp }} - -n ${{ env.PYTEST_MARKER == 'serial' && 1 || 'auto' }} + -n auto -m "${{ env.PYTEST_MARKER }}" - name: Upload Coverage @@ -431,7 +431,7 @@ jobs: run: > pytest --cov=conda_build - -n ${{ env.PYTEST_MARKER == 'serial' && 1 || 'auto' }} + -n auto -m "${{ env.PYTEST_MARKER }}" env: CONDA_BUILD_SYSROOT: ${{ env.MACOSX_SDK_ROOT }} diff --git a/tests/test_api_build.py b/tests/test_api_build.py index 01011aaf16..d6a519cc5a 100644 --- a/tests/test_api_build.py +++ b/tests/test_api_build.py @@ -129,7 +129,6 @@ def test_recipe_builds( testing_config, monkeypatch: pytest.MonkeyPatch, conda_build_test_recipe_envvar: str, - tmp_path_factory: pytest.TempPathFactory, ): # TODO: After we fix #3754 this mark can be removed. This specific test # ``source_setup_py_data_subdir`` reproduces the problem. @@ -137,11 +136,6 @@ def test_recipe_builds( pytest.xfail("Issue related to #3754 on conda-build.") elif recipe.name == "unicode_all_over" and context.solver == "libmamba": pytest.xfail("Unicode package names not supported in libmamba.") - elif recipe.name == "source_url": - # Several recipes are cloning conda-build, which causes problems with checkouts - testing_config.src_cache_root = tmp_path_factory.mktemp( - "src_cache_root", numbered=False - ) # These variables are defined solely for testing purposes, # so they can be checked within build scripts From f60bcdd57f5fbe00210d6f067e3b85a7025d2a92 Mon Sep 17 00:00:00 2001 From: jaimergp Date: Mon, 25 Nov 2024 10:12:00 +0100 Subject: [PATCH 24/33] force checkout --- conda_build/source.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conda_build/source.py b/conda_build/source.py index 6dd9f9d4a9..edb4e92685 100644 --- a/conda_build/source.py +++ b/conda_build/source.py @@ -352,7 +352,7 @@ def git_mirror_checkout_recursive( print(f"checkout: {checkout!r}") if checkout: check_call_env( - [git, "checkout", checkout], + [git, "checkout", "-f", checkout], # UNDO cwd=checkout_dir, stdout=stdout, stderr=stderr, From ffcda69a637242af4a000b32b34f8376193febea Mon Sep 17 00:00:00 2001 From: jaimergp Date: Mon, 25 Nov 2024 17:07:11 +0100 Subject: [PATCH 25/33] try with constructor --- conda_build/source.py | 3 +- .../test-recipes/metadata/source_url/bld.bat | 4 +- .../test-recipes/metadata/source_url/build.sh | 4 +- .../metadata/source_url/meta.yaml | 55 ++++++++++--------- 4 files changed, 34 insertions(+), 32 deletions(-) diff --git a/conda_build/source.py b/conda_build/source.py index edb4e92685..c336d58bfa 100644 --- a/conda_build/source.py +++ b/conda_build/source.py @@ -128,7 +128,8 @@ def download_to_cache(cache_folder, recipe_path, source_dict, verbose=False): if expected_hash != hashed: rm_rf(path) raise RuntimeError( - f"{tp.upper()} mismatch: '{hashed}' != '{expected_hash}'" + f"{tp.upper()} mismatch for {unhashed_fn}: " + f"obtained '{hashed}' != expected '{expected_hash}'" ) break diff --git a/tests/test-recipes/metadata/source_url/bld.bat b/tests/test-recipes/metadata/source_url/bld.bat index 8db8556ec5..9f2a69a731 100644 --- a/tests/test-recipes/metadata/source_url/bld.bat +++ b/tests/test-recipes/metadata/source_url/bld.bat @@ -1,4 +1,4 @@ -cd conda-build-tar-gz +cd constructor-tar-gz set PYTHONPATH=. -python -c "import conda_build; assert conda_build.__version__ == 'tag: 1.8.1'" +python -c "import constructor; assert constructor.__version__ == '3.0.0'" if errorlevel 1 exit 1 diff --git a/tests/test-recipes/metadata/source_url/build.sh b/tests/test-recipes/metadata/source_url/build.sh index da83c6822a..aa3cc8e3a7 100644 --- a/tests/test-recipes/metadata/source_url/build.sh +++ b/tests/test-recipes/metadata/source_url/build.sh @@ -1,3 +1,3 @@ -cd conda-build-tar-gz +cd constructor-tar-gz # Not sure how versioneer comes up with this version -PYTHONPATH=. python -c "import conda_build; assert conda_build.__version__ == 'tag: 1.8.1'" +PYTHONPATH=. python -c "import constructor; assert constructor.__version__ == '3.0.0'" diff --git a/tests/test-recipes/metadata/source_url/meta.yaml b/tests/test-recipes/metadata/source_url/meta.yaml index 8c622f4de1..45b29c067c 100644 --- a/tests/test-recipes/metadata/source_url/meta.yaml +++ b/tests/test-recipes/metadata/source_url/meta.yaml @@ -3,39 +3,40 @@ package: version: 1.0 source: - - fn: conda-build-1.8.1.tar.gz - folder: conda-build-tar-gz - url: https://github.com/conda/conda-build/archive/1.8.1.tar.gz - md5: 0bf1f3598a659a0e8fb5ee6bbb3fd9fd - sha1: c464a8995ad6bbf0480abd2883876cc9b4913fa7 - sha256: f82b0bd5c809c9a7c7256c26364a0065e57732788b7a74c7ea2169135ed2f598 - content_md5: 4d6349dbe4bb1430dc2155a1895e84ce - content_sha1: 9eceebcd86adcf64c2dcd31e36058a377ae2e8cf - content_sha256: 0e3d93b4ba3e6e156a2fc365f5825f0079403cd23f00f89aba21c091e4b0f41a + # Same code, but the tarball is generated by GH on the fly + - fn: constructor-3.0.0.tar.gz + folder: constructor-tar-gz + url: https://github.com/conda/constructor/archive/3.0.0.tar.gz + md5: 999ba62a678fd65d2be8c8c9160dff35 + sha1: 0e6df9cfb04e99a8899cc1105d9c09e22811b146 + sha256: a1932d36ac8ea0dcc3a0b7848a090aedc9247d4bcd75fa75e1771c2b2b01f9ff + content_md5: 998ccec2e7912f3fd79f53892114053b + content_sha1: cb021882f5b1e3418fbf1e7942ce2d41a17121f2 + content_sha256: a884ace5aa3a7e7f5a8b5adeb5cbfa7209f2ae88134d362c8bbca9c82ad2bb06 content_hash_skip: - - conda_build/_version.py + - constructor/_version.py # This is the same tarball but compressed differently. They should have the same content hashes! - - fn: conda-build-1.8.1.zip - folder: conda-build-zip - url: https://github.com/conda/conda-build/archive/1.8.1.zip - md5: 25d59bc816f3d1107f063d77ddfcbe76 - sha1: 195104165d395a92c7ecd4c7f98975906950b9dd - sha256: 6d142da3f0f47613d1d0124ec8caf0faf66ec524e6aa356ac49987c3e32b6d95 - content_md5: 4d6349dbe4bb1430dc2155a1895e84ce - content_sha1: 9eceebcd86adcf64c2dcd31e36058a377ae2e8cf - content_sha256: 0e3d93b4ba3e6e156a2fc365f5825f0079403cd23f00f89aba21c091e4b0f41a + - fn: constructor-3.0.0.zip + folder: constructor-zip + url: https://github.com/conda/constructor/archive/3.0.0.zip + md5: e4f6e0dd740e50fdb86bd5dae4d547c2 + sha1: 75138e708ebfe818138dae23b6763890c1972d25 + sha256: 77406614899f5c2e21e2133a774b8470ba75a86e76dda799c2b39bcbce860955 + content_md5: 998ccec2e7912f3fd79f53892114053b + content_sha1: cb021882f5b1e3418fbf1e7942ce2d41a17121f2 + content_sha256: a884ace5aa3a7e7f5a8b5adeb5cbfa7209f2ae88134d362c8bbca9c82ad2bb06 content_hash_skip: - - conda_build/_version.py + - constructor/_version.py # This is the same tag as above, but cloned directly. They should have the same content hashes! - - folder: conda-build-git - git_url: https://github.com/conda/conda-build.git - git_rev: "1.8.1" - content_md5: 4d6349dbe4bb1430dc2155a1895e84ce - content_sha1: 9eceebcd86adcf64c2dcd31e36058a377ae2e8cf - content_sha256: 0e3d93b4ba3e6e156a2fc365f5825f0079403cd23f00f89aba21c091e4b0f41a + - folder: constructor-git + git_url: https://github.com/conda/constructor.git + git_rev: "3.0.0" + content_md5: 998ccec2e7912f3fd79f53892114053b + content_sha1: cb021882f5b1e3418fbf1e7942ce2d41a17121f2 + content_sha256: a884ace5aa3a7e7f5a8b5adeb5cbfa7209f2ae88134d362c8bbca9c82ad2bb06 content_hash_skip: - .git/ - - conda_build/_version.py + - constructor/_version.py requirements: build: From d434dce3f91f3258eb58d1ae1540a030e863939a Mon Sep 17 00:00:00 2001 From: jaimergp Date: Mon, 25 Nov 2024 17:13:54 +0100 Subject: [PATCH 26/33] stop force --- conda_build/source.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conda_build/source.py b/conda_build/source.py index c336d58bfa..8fd852aad0 100644 --- a/conda_build/source.py +++ b/conda_build/source.py @@ -353,7 +353,7 @@ def git_mirror_checkout_recursive( print(f"checkout: {checkout!r}") if checkout: check_call_env( - [git, "checkout", "-f", checkout], # UNDO + [git, "checkout", checkout], cwd=checkout_dir, stdout=stdout, stderr=stderr, From a95095830a04cc83ce3a8f26f1d31ff1f04e6cb7 Mon Sep 17 00:00:00 2001 From: jaimergp Date: Tue, 26 Nov 2024 13:02:57 +0100 Subject: [PATCH 27/33] add `?` separator for unknown file types --- conda_build/utils.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/conda_build/utils.py b/conda_build/utils.py index 98f66ca7f9..470c6985e3 100644 --- a/conda_build/utils.py +++ b/conda_build/utils.py @@ -1998,12 +1998,16 @@ def compute_content_hash( - UTF-8 encoded path, relative to the input directory. Backslashes are normalized to forward slashes before encoding. - Then, depending on the type: - - For regular files, the UTF-8 bytes of an `F` separator, followed by the bytes of its - contents. + - For regular files, the UTF-8 bytes of an `F` separator, followed by: + - UTF-8 bytes of the line-ending normalized text (`\r\n` to `\n`), if the file is text. + - The raw bytes of the file contents, if binary. + - Note: If the file can't be opened or read, no contents are hashed; + it's treated as empty. - For a directory, the UTF-8 bytes of a `D` separator, and nothing else. - For a symlink, the UTF-8 bytes of an `L` separator, followed by the UTF-8 encoded bytes for the path it points to. Backslashes MUST be normalized to forward slashes before encoding. + - For any other types, the UTF-8 bytes of a `?` separator, and nothing else. - UTF-8 encoded bytes of the string `-`, as separator. Parameters @@ -2012,7 +2016,7 @@ def compute_content_hash( algorithm: Name of the algorithm to be used, as expected by `hashlib.new()` skip: iterable of paths that should not be checked. If a path ends with a slash, it's interpreted as a directory that won't be traversed. It matches the relative paths - already slashed-normalized. + already slashed-normalized (i.e. backwards slashes replaced with forward slashes). Returns ------- @@ -2067,9 +2071,10 @@ def compute_content_hash( for chunk in iter(partial(fh.read, 8192), b""): hasher.update(chunk) except OSError as exc: - log.debug("Skipping %s for hashing", path.name, exc_info=exc) + log.warning("Can't open file %s. Hashing path only...", path.name, exc_info=exc) else: - log.debug("Can't detect type for path %s. Skipping...", path) + log.warning("Can't detect type for path %s. Hashing path only...", path) + hasher.update(b"?") hasher.update(b"-") return hasher.hexdigest() From d97d0811f4a3f49f1daffff0a6121490b667656d Mon Sep 17 00:00:00 2001 From: jaimergp Date: Tue, 26 Nov 2024 13:09:42 +0100 Subject: [PATCH 28/33] pre-commit --- conda_build/utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/conda_build/utils.py b/conda_build/utils.py index 470c6985e3..6fcceac7e9 100644 --- a/conda_build/utils.py +++ b/conda_build/utils.py @@ -2071,7 +2071,9 @@ def compute_content_hash( for chunk in iter(partial(fh.read, 8192), b""): hasher.update(chunk) except OSError as exc: - log.warning("Can't open file %s. Hashing path only...", path.name, exc_info=exc) + log.warning( + "Can't open file %s. Hashing path only...", path.name, exc_info=exc + ) else: log.warning("Can't detect type for path %s. Hashing path only...", path) hasher.update(b"?") From 2afa293a523cd40ba387b0ef3274f271ee7a3173 Mon Sep 17 00:00:00 2001 From: jaimergp Date: Tue, 26 Nov 2024 16:09:06 +0100 Subject: [PATCH 29/33] drop content_{md5,sha1} and add content_{sha384,sha512} --- conda_build/source.py | 5 +---- news/5277-content-hash | 4 ++-- tests/test-recipes/metadata/source_url/meta.yaml | 12 ++++++------ 3 files changed, 9 insertions(+), 12 deletions(-) diff --git a/conda_build/source.py b/conda_build/source.py index 37bf98f41b..d8257459a9 100644 --- a/conda_build/source.py +++ b/conda_build/source.py @@ -47,11 +47,8 @@ git_submod_re = re.compile(r"(?:.+)\.(.+)\.(?:.+)\s(.+)") ext_re = re.compile(r"(.*?)(\.(?:tar\.)?[^.]+)$") -HASH_KEYS = ("md5", "sha1", "sha256") -CONTENT_HASH_KEYS = ("content_md5", "content_sha1", "content_sha256") - - ACCEPTED_HASH_TYPES = ("md5", "sha1", "sha224", "sha256", "sha384", "sha512") +CONTENT_HASH_KEYS = ("content_sha256", "content_sha384", "content_sha512") def append_hash_to_fn(fn, hash_value): diff --git a/news/5277-content-hash b/news/5277-content-hash index cc6d357dc1..c1fc09c742 100644 --- a/news/5277-content-hash +++ b/news/5277-content-hash @@ -1,7 +1,7 @@ ### Enhancements -* Add new hashing methods (`content_md5`, `content_sha1`, `content_sha256`) to calculate the - checksum of the extracted contents of the downloaded artifacts. (#4821 via #5277) +* Add new hashing methods (`content_sha256`, `content_sha384`, `content_sha512`) to calculate the + checksum of the extracted contents of the downloaded source artifacts. (#4821 via #5277) ### Bug fixes diff --git a/tests/test-recipes/metadata/source_url/meta.yaml b/tests/test-recipes/metadata/source_url/meta.yaml index a4d881dc7b..5dd5bab9b6 100644 --- a/tests/test-recipes/metadata/source_url/meta.yaml +++ b/tests/test-recipes/metadata/source_url/meta.yaml @@ -13,9 +13,9 @@ source: sha256: a1932d36ac8ea0dcc3a0b7848a090aedc9247d4bcd75fa75e1771c2b2b01f9ff sha384: d366de5e995a4ff6ad9266774e483efb91d9c291c0487c5cf0af055a7b48fd58af205c9455a5b2f654d92d7f3f39ef68 sha512: 33d2c8f8189f0fe8528bef0c32e62a3acd4362285e447680e7f0af16137df9ab45bf12b6928bdaaf99b5a53e71db4d385a0c1d91bdc0b2ad1d0b1a7bc6d790f1 - content_md5: 998ccec2e7912f3fd79f53892114053b - content_sha1: cb021882f5b1e3418fbf1e7942ce2d41a17121f2 content_sha256: a884ace5aa3a7e7f5a8b5adeb5cbfa7209f2ae88134d362c8bbca9c82ad2bb06 + content_sha384: 3644cb7e55fb8f6d7328b19da3ec46be6af1e67291cc48948687cf9493d9b2caea3b5a637d1dfc1a19dd2893ddc38d27 + content_sha512: 79a0c5edc29f979b599f0b694c3f0f07cc91e590c2c3fcb9c3f965767bf5a22fe634f0f142c626ef0859249d0242f3d8cc93922cf14e7ba527eedc3e8c8b354e content_hash_skip: - constructor/_version.py # This is the same tarball but compressed differently. They should have the same content hashes! @@ -28,18 +28,18 @@ source: sha256: 77406614899f5c2e21e2133a774b8470ba75a86e76dda799c2b39bcbce860955 sha384: e93d217376c86ab374be93c44fa03b05673e23de78033812a8f0620ce1ca6a4082fedd8b2599341ffd8dcfd201479ff4 sha512: 23e2ef512e43cb3b75637650901d5c86e0edc812a95fe85b19b45feddabe74bd72d6affac30b133c37a69046b3e27635a84107df5f64e403e1b21dc8f56ceedb - content_md5: 998ccec2e7912f3fd79f53892114053b - content_sha1: cb021882f5b1e3418fbf1e7942ce2d41a17121f2 content_sha256: a884ace5aa3a7e7f5a8b5adeb5cbfa7209f2ae88134d362c8bbca9c82ad2bb06 + content_sha384: 3644cb7e55fb8f6d7328b19da3ec46be6af1e67291cc48948687cf9493d9b2caea3b5a637d1dfc1a19dd2893ddc38d27 + content_sha512: 79a0c5edc29f979b599f0b694c3f0f07cc91e590c2c3fcb9c3f965767bf5a22fe634f0f142c626ef0859249d0242f3d8cc93922cf14e7ba527eedc3e8c8b354e content_hash_skip: - constructor/_version.py # This is the same tag as above, but cloned directly. They should have the same content hashes! - folder: constructor-git git_url: https://github.com/conda/constructor.git git_rev: "3.0.0" - content_md5: 998ccec2e7912f3fd79f53892114053b - content_sha1: cb021882f5b1e3418fbf1e7942ce2d41a17121f2 content_sha256: a884ace5aa3a7e7f5a8b5adeb5cbfa7209f2ae88134d362c8bbca9c82ad2bb06 + content_sha384: 3644cb7e55fb8f6d7328b19da3ec46be6af1e67291cc48948687cf9493d9b2caea3b5a637d1dfc1a19dd2893ddc38d27 + content_sha512: 79a0c5edc29f979b599f0b694c3f0f07cc91e590c2c3fcb9c3f965767bf5a22fe634f0f142c626ef0859249d0242f3d8cc93922cf14e7ba527eedc3e8c8b354e content_hash_skip: - .git/ - constructor/_version.py From 98d88138b5cd7028762c38f51951e8c8451cd2ad Mon Sep 17 00:00:00 2001 From: jaimergp Date: Tue, 26 Nov 2024 16:09:57 +0100 Subject: [PATCH 30/33] add here too --- conda_build/metadata.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conda_build/metadata.py b/conda_build/metadata.py index e870765489..ef09b4fba2 100644 --- a/conda_build/metadata.py +++ b/conda_build/metadata.py @@ -570,9 +570,9 @@ def parse(data, config, path=None): "sha256": None, "sha384": None, "sha512": None, - "content_md5": str, - "content_sha1": None, "content_sha256": None, + "content_sha384": None, + "content_sha512": None, "content_hash_skip": list, "path": str, "path_via_symlink": None, From c192799513f83d5715656c8e5030bdaf9234939d Mon Sep 17 00:00:00 2001 From: jaimergp Date: Wed, 27 Nov 2024 23:05:00 +0100 Subject: [PATCH 31/33] use a 10MB SpooledTemporaryFile --- conda_build/utils.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/conda_build/utils.py b/conda_build/utils.py index ba2484ea51..005bf1f11a 100644 --- a/conda_build/utils.py +++ b/conda_build/utils.py @@ -2023,7 +2023,6 @@ def compute_content_hash( str The hexdigest of the computed hash, as described above. """ - log = get_logger(__name__) hasher = hashlib.new(algorithm) for path in sorted(Path(directory).rglob("*"), key=str): relpath = path.relative_to(directory) @@ -2055,16 +2054,18 @@ def compute_content_hash( # the raw bytes directly. try: try: - lines = [] - with open(path) as fh: - for line in fh: - # Accumulate all line-ending normalized lines first - # to make sure the whole file is read. This prevents - # partial updates to the hash with hybrid text/binary - # files (e.g. like the constructor shell installers). - lines.append(line.replace("\r\n", "\n")) - for line in lines: - hasher.update(line.encode("utf-8")) + with tempfile.SpooledTemporaryFile(max_size=10*1024*1024) as tmp: + with open(path) as fh: + for line in fh: + # Accumulate all line-ending normalized lines first + # to make sure the whole file is read. This prevents + # partial updates to the hash with hybrid text/binary + # files (e.g. like the constructor shell installers). + tmp.write(line.replace("\r\n", "\n").encode("utf-8")) + tmp.flush() + tmp.seek(0) + for chunk in iter(partial(tmp.read, 8192), b""): + hasher.update(chunk) except UnicodeDecodeError: # file must be binary, read the bytes directly with open(path, "rb") as fh: @@ -2233,3 +2234,6 @@ def is_conda_pkg(pkg_path: str) -> bool: def package_record_to_requirement(prec: PackageRecord) -> str: return f"{prec.name} {prec.version} {prec.build}" + + +log = get_logger(__name__) From 5edfb20917366ce10b406725a58cdc981d9462f1 Mon Sep 17 00:00:00 2001 From: jaimergp Date: Wed, 27 Nov 2024 23:07:31 +0100 Subject: [PATCH 32/33] pre-commit --- conda_build/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/conda_build/utils.py b/conda_build/utils.py index 005bf1f11a..c508f4c6c0 100644 --- a/conda_build/utils.py +++ b/conda_build/utils.py @@ -2054,7 +2054,8 @@ def compute_content_hash( # the raw bytes directly. try: try: - with tempfile.SpooledTemporaryFile(max_size=10*1024*1024) as tmp: + ten_mb = 10 * 1024 * 1024 + with tempfile.SpooledTemporaryFile(max_size=ten_mb) as tmp: with open(path) as fh: for line in fh: # Accumulate all line-ending normalized lines first From bcc7ad5d44cc76248a0b5c2d1c7c731478809011 Mon Sep 17 00:00:00 2001 From: jaimergp Date: Wed, 27 Nov 2024 23:29:51 +0100 Subject: [PATCH 33/33] do error on unreadable files and unknown types --- conda_build/utils.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/conda_build/utils.py b/conda_build/utils.py index c508f4c6c0..8cad9ca006 100644 --- a/conda_build/utils.py +++ b/conda_build/utils.py @@ -2001,13 +2001,12 @@ def compute_content_hash( - For regular files, the UTF-8 bytes of an `F` separator, followed by: - UTF-8 bytes of the line-ending normalized text (`\r\n` to `\n`), if the file is text. - The raw bytes of the file contents, if binary. - - Note: If the file can't be opened or read, no contents are hashed; - it's treated as empty. + - If it can't be read, error out. - For a directory, the UTF-8 bytes of a `D` separator, and nothing else. - For a symlink, the UTF-8 bytes of an `L` separator, followed by the UTF-8 encoded bytes for the path it points to. Backslashes MUST be normalized to forward slashes before encoding. - - For any other types, the UTF-8 bytes of a `?` separator, and nothing else. + - For any other types, error out. - UTF-8 encoded bytes of the string `-`, as separator. Parameters @@ -2073,12 +2072,15 @@ def compute_content_hash( for chunk in iter(partial(fh.read, 8192), b""): hasher.update(chunk) except OSError as exc: - log.warning( - "Can't open file %s. Hashing path only...", path.name, exc_info=exc + raise RuntimeError( + f"Could not read file '{relpath}' in directory '{directory}'. " + f"Content hash verification cannot continue. Error: {exc}" ) else: - log.warning("Can't detect type for path %s. Hashing path only...", path) - hasher.update(b"?") + raise RuntimeError( + f"Can't detect type for path '{relpath}' in directory '{directory}'. " + "Content hash verification cannot continue." + ) hasher.update(b"-") return hasher.hexdigest()