Skip to content

Commit

Permalink
[build] Apply name change pragzip -> rapidgzip
Browse files Browse the repository at this point in the history
  • Loading branch information
mxmlnkn committed Sep 2, 2023
1 parent 4af9173 commit 55b428e
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 24 deletions.
2 changes: 1 addition & 1 deletion .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ init-hook='import sys; sys.path.append("./core")'
# A comma-separated list of package or module names from where C extensions may
# be loaded. Extensions are loading into the active Python interpreter and may
# run arbitrary code.
extension-pkg-whitelist=indexed_gzip,indexed_bzip2,indexed_zstd,lzmaffi,pragzip
extension-pkg-whitelist=indexed_gzip,indexed_bzip2,indexed_zstd,lzmaffi,rapidgzip

# Specify a score threshold to be exceeded before program exits with error.
fail-under=10.0
Expand Down
4 changes: 2 additions & 2 deletions AppImage/build-ratarmount-appimage.sh
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,10 @@ function installAppImagePythonPackages()
"$APP_PYTHON_BIN" -I -m pip install --no-cache-dir \
'git+https://github.com/mxmlnkn/indexed_bzip2.git@master#egginfo=indexed_bzip2&subdirectory=python/indexed_bzip2'
"$APP_PYTHON_BIN" -I -m pip install --no-cache-dir \
'git+https://github.com/mxmlnkn/indexed_bzip2.git@master#egginfo=pragzip&subdirectory=python/pragzip'
'git+https://github.com/mxmlnkn/indexed_bzip2.git@master#egginfo=rapidgzip&subdirectory=python/rapidgzip'
else
"$APP_PYTHON_BIN" -I -m pip install --no-cache-dir indexed_bzip2
"$APP_PYTHON_BIN" -I -m pip install --no-cache-dir pragzip
"$APP_PYTHON_BIN" -I -m pip install --no-cache-dir rapidgzip
fi
"$APP_PYTHON_BIN" -I -m pip install --no-cache-dir ../core
"$APP_PYTHON_BIN" -I -m pip install --no-cache-dir ..
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ And in contrast to [tarindexer](https://github.com/devsnd/tarindexer), which als
*TAR compressions supported for random access:*

- **BZip2** as provided by [indexed_bzip2](https://github.com/mxmlnkn/indexed_bzip2) as a backend, which is a refactored and extended version of [bzcat](https://github.com/landley/toybox/blob/c77b66455762f42bb824c1aa8cc60e7f4d44bdab/toys/other/bzcat.c) from [toybox](https://landley.net/code/toybox/). See also the [reverse engineered specification](https://github.com/dsnet/compress/blob/master/doc/bzip2-format.pdf).
- **Gzip** as provided by [pragzip](https://github.com/mxmlnkn/pragzip) or [indexed_gzip](https://github.com/pauldmccarthy/indexed_gzip) by Paul McCarthy. See also [RFC1952](https://tools.ietf.org/html/rfc1952).
- **Gzip** as provided by [rapidgzip](https://github.com/mxmlnkn/rapidgzip) or [indexed_gzip](https://github.com/pauldmccarthy/indexed_gzip) by Paul McCarthy. See also [RFC1952](https://tools.ietf.org/html/rfc1952).
- **Xz** as provided by [python-xz](https://github.com/Rogdham/python-xz) by Rogdham or [lzmaffi](https://github.com/r3m0t/backports.lzma) by Tomer Chachamu. See also [The .xz File Format](https://tukaani.org/xz/xz-file-format.txt).
- **Zstd** as provided by [indexed_zstd](https://github.com/martinellimarco/indexed_zstd) by Marco Martinelli. See also [Zstandard Compression Format](https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md).

Expand Down Expand Up @@ -428,7 +428,7 @@ Advanced Options:
opened with multiple backends. Arguments specified last will have the
highest priority. A comma-separated list may be specified. Possible
backends: ['indexed_bzip2', 'indexed_gzip', 'indexed_zstd', 'lzmaffi',
'pragzip', 'rarfile', 'xz', 'zipfile'] (default: None)
'rapidgzip', 'rarfile', 'xz', 'zipfile'] (default: None)
-d DEBUG, --debug DEBUG
Sets the debugging level. Higher means more output. Currently, 3 is the
highest. (default: 1)
Expand Down
34 changes: 17 additions & 17 deletions core/ratarmountcore/SQLiteIndexedTar.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
pass

try:
import pragzip
import rapidgzip
except ImportError:
pass

Expand Down Expand Up @@ -1536,36 +1536,36 @@ def _loadOrStoreCompressionOffsets(self):
self.index.synchronizeCompressionOffsets(self.tarFileObject, self.compression)

if self.compression == 'gz':
self._reloadWithPragzip()
self._reloadWithRapidgzip()

def joinThreads(self):
if hasattr(self.tarFileObject, 'join_threads'):
self.tarFileObject.join_threads()

def _reloadWithPragzip(self):
# TODO Currently, only use pragzip when explicitly specified because it is still in development.
def _reloadWithRapidgzip(self):
# TODO Currently, only use rapidgzip when explicitly specified because it is still in development.
# Note that the runaway memory isn't so much an issue when the index has been created with indexed_gzip
# because it splits at roughly equal decompressed chunk sizes! I could also use the single-threaded
# pragzip version to create the index to avoid memory issue but then what would be the point?
# rapidgzip version to create the index to avoid memory issue but then what would be the point?
# Getting rid of dependencies?
if self.rawFileObject is None or self.compression != 'gz' or 'pragzip' not in self.prioritizedBackends:
if self.rawFileObject is None or self.compression != 'gz' or 'rapidgzip' not in self.prioritizedBackends:
return

if 'pragzip' not in sys.modules:
print("[Warning] Cannot use pragzip for access to gzip file because it is not installed. Try:")
print("[Warning] python3 -m pip install --user pragzip")
if 'rapidgzip' not in sys.modules:
print("[Warning] Cannot use rapidgzip for access to gzip file because it is not installed. Try:")
print("[Warning] python3 -m pip install --user rapidgzip")
return

# Check whether indexed_gzip might have a higher priority than pragzip if both are listed.
# Check whether indexed_gzip might have a higher priority than rapidgzip if both are listed.
if (
'indexed_gzip' in self.prioritizedBackends
and 'pragzip' in self.prioritizedBackends
and self.prioritizedBackends.index('indexed_gzip') < self.prioritizedBackends.index('pragzip')
and 'rapidgzip' in self.prioritizedBackends
and self.prioritizedBackends.index('indexed_gzip') < self.prioritizedBackends.index('rapidgzip')
):
# Low index have higher priority (because normally the list would be checked from lowest indexes).
return

# Only allow mounting of real files. Pragzip does work with Python file objects but we don't want to
# Only allow mounting of real files. Rapidgzip does work with Python file objects but we don't want to
# mount recursive archives all with the parallel gzip decoder because then the cores would be oversubscribed!
# Similarly, small files would result in being wholly cached into memory, which probably isn't what the user
# had intended by using ratarmount?
Expand All @@ -1575,7 +1575,7 @@ def _reloadWithPragzip(self):
hasMultipleChunks = os.stat(self.rawFileObject.name).st_size >= 4 * self.gzipSeekPointSpacing
if not isRealFile or not hasMultipleChunks:
if self.printDebug >= 2:
print("[Info] Do not reopen with pragzip backend because:")
print("[Info] Do not reopen with rapidgzip backend because:")
if not isRealFile:
print("[Info] - the file to open is a recursive file, which limits the usability of ")
print("[Info] parallel decompression.")
Expand All @@ -1589,12 +1589,12 @@ def _reloadWithPragzip(self):
gzindex = self.index.openGzipIndex()
if gzindex:
if self.printDebug >= 1:
print("[Info] Reopening the gzip with the pragzip backend...")
print("[Info] Reopening the gzip with the rapidgzip backend...")

self.tarFileObject = pragzip.PragzipFile(
self.tarFileObject = rapidgzip.RapidgzipFile(
self.rawFileObject, parallelization=self.parallelization, verbose=self.printDebug >= 2
)
self.tarFileObject.import_index(gzindex)

if self.printDebug >= 1:
print("[Info] Reopened the gzip with the pragzip backend.")
print("[Info] Reopened the gzip with the rapidgzip backend.")
4 changes: 2 additions & 2 deletions core/ratarmountcore/compressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,8 @@
[
CompressionModuleInfo('indexed_gzip', lambda x: indexed_gzip.IndexedGzipFile(fileobj=x)),
# TODO Declare existence of this module but do not provide an open method yet because it
# is still in development. SQLiteIndexedTar has a special case for opening with pragzip.
CompressionModuleInfo('pragzip', None),
# is still in development. SQLiteIndexedTar has a special case for opening with rapidgzip.
CompressionModuleInfo('rapidgzip', None),
],
lambda x: x.read(2) == b'\x1F\x8B',
),
Expand Down

0 comments on commit 55b428e

Please sign in to comment.