Skip to content

Commit

Permalink
[refactor] Use the bzip2 backend that is now integrated with rapidgzi…
Browse files Browse the repository at this point in the history
…p 0.11.0+
  • Loading branch information
mxmlnkn committed Dec 24, 2023
1 parent 819bd7a commit 1b4045a
Show file tree
Hide file tree
Showing 9 changed files with 20 additions and 33 deletions.
1 change: 0 additions & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,6 @@ jobs:
python3 ratarmount.py README.md || [ $? != 139 ]
python3 -m pip uninstall --yes "$module"
done
python3 -m pip install --upgrade 'git+https://github.com/mxmlnkn/indexed_bzip2.git@master#egginfo=indexed_bzip2&subdirectory=python/indexed_bzip2'
python3 -m pip install --upgrade 'git+https://github.com/mxmlnkn/indexed_bzip2.git@master#egginfo=rapidgzip&subdirectory=python/rapidgzip'
- name: Test ratarmountcore Installation From Tarball
Expand Down
3 changes: 0 additions & 3 deletions AppImage/build-ratarmount-appimage.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,9 @@ function installAppImagePythonPackages()
# The compilation call will contain this: -I/opt/_internal/cpython-3.9.15/include/python3.9
# Even though it should be like this: -I<path-to-appdir>/opt/_internal/cpython-3.9.15/include/python3.9
if [[ -n "$USE_CUTTING_EDGE_BACKENDS" ]]; then
"$APP_PYTHON_BIN" -I -m pip install --no-cache-dir \
'git+https://github.com/mxmlnkn/indexed_bzip2.git@master#egginfo=indexed_bzip2&subdirectory=python/indexed_bzip2'
"$APP_PYTHON_BIN" -I -m pip install --no-cache-dir \
'git+https://github.com/mxmlnkn/indexed_bzip2.git@master#egginfo=rapidgzip&subdirectory=python/rapidgzip'
else
"$APP_PYTHON_BIN" -I -m pip install --no-cache-dir indexed_bzip2
"$APP_PYTHON_BIN" -I -m pip install --no-cache-dir rapidgzip
fi
"$APP_PYTHON_BIN" -I -m pip install --no-cache-dir ../core
Expand Down
16 changes: 6 additions & 10 deletions core/ratarmountcore/SQLiteIndexedTar.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,6 @@
from timeit import default_timer as timer
from typing import Any, Callable, cast, Dict, Generator, IO, Iterable, List, Optional, Tuple, Union

try:
import indexed_bzip2
except ImportError:
pass

try:
import indexed_gzip
except ImportError:
Expand Down Expand Up @@ -896,9 +891,10 @@ def _updateProgressBar(self, progressBar, fileobj: Any) -> None:
return

try:
if hasattr(fileobj, 'tell_compressed') and (
('indexed_bzip2' in sys.modules and isinstance(fileobj, indexed_bzip2.IndexedBzip2File))
or ('rapidgzip' in sys.modules and isinstance(fileobj, rapidgzip.RapidgzipFile))
if (
hasattr(fileobj, 'tell_compressed')
and 'rapidgzip' in sys.modules
and (isinstance(fileobj, rapidgzip.IndexedBzip2File) or isinstance(fileobj, rapidgzip.RapidgzipFile))
):
# Note that because bz2 works on a bitstream the tell_compressed returns the offset in bits
progressBar.update(fileobj.tell_compressed() // 8)
Expand Down Expand Up @@ -1254,7 +1250,7 @@ def _tryToMarkAsAppended(self, storedStats: Dict[str, Any], archiveStats: os.sta
# of this size should not take more than 10s, so pretty negligible in my opinion.
#
# For compressed archives, detecting appended archives does not help much because the bottleneck is
# the decompression not the indexing of files. And because indexed_bzip2 and indexed_gzip probably
# the decompression not the indexing of files. And because rapidgzip and indexed_gzip probably
# assume that the index is complete once import_index has been called, we have to recreate the full
# block offsets anyway.
if self.compression:
Expand Down Expand Up @@ -1534,7 +1530,7 @@ def _openCompressedFile(
fileobj=fileobj, drop_handles=False, spacing=gzipSeekPointSpacing, buffer_size=bufferSize
)
elif compression == 'bz2':
tar_file = indexed_bzip2.open(fileobj, parallelization=parallelization) # type: ignore
tar_file = rapidgzip.IndexedBzip2File(fileobj, parallelization=parallelization) # type: ignore
elif (
compression == 'xz'
and xz
Expand Down
7 changes: 1 addition & 6 deletions core/ratarmountcore/compressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,6 @@

from .utils import isLatinAlpha, isLatinDigit, isLatinHexAlpha, formatNumber, ALPHA, DIGITS, HEX

try:
import indexed_bzip2
except ImportError:
indexed_bzip2 = None # type: ignore

try:
import indexed_gzip
except ImportError:
Expand Down Expand Up @@ -74,7 +69,7 @@
'bz2': CompressionInfo(
['bz2', 'bzip2'],
['tb2', 'tbz', 'tbz2', 'tz2'],
[CompressionModuleInfo('indexed_bzip2', lambda x: indexed_bzip2.open(x))], # type: ignore
[CompressionModuleInfo('rapidgzip', lambda x: rapidgzip.IndexedBzip2File(x))], # type: ignore
lambda x: (x.read(4)[:3] == b'BZh' and x.read(6) == (0x314159265359).to_bytes(6, 'big')),
),
'gz': CompressionInfo(
Expand Down
5 changes: 2 additions & 3 deletions core/setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,14 @@ install_requires =

[options.extras_require]
full =
indexed_bzip2 >= 1.3.1, < 2.0
indexed_gzip >= 1.6.3, < 2.0
indexed_zstd >= 1.3.1, < 2.0; sys_platform=="darwin"
indexed_zstd >= 1.2.2, < 2.0; platform_system!="Windows"
python-xz ~= 0.4.0
rapidgzip >= 0.10.0
rapidgzip >= 0.11.1
rarfile ~= 4.0

bzip2 = indexed_bzip2 >= 1.3.1, < 2.0
bzip2 = rapidgzip >= 0.11.1
gzip = indexed_gzip >= 1.6.3, < 2.0
rar = rarfile ~= 4.0
xz = python-xz ~= 0.4.0
Expand Down
12 changes: 7 additions & 5 deletions core/tests/test_SQLiteIndexedTar.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import tarfile
import tempfile

import indexed_bzip2
import rapidgzip

sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

Expand Down Expand Up @@ -48,7 +48,9 @@ def _makeFolder(tarArchive, name):

@staticmethod
def test_context_manager(parallelization):
with SQLiteIndexedTar(findTestFile('single-file.tar'), writeIndex=False, parallelization=parallelization) as indexedTar:
with SQLiteIndexedTar(
findTestFile('single-file.tar'), writeIndex=False, parallelization=parallelization
) as indexedTar:
assert indexedTar.listDir('/')

@staticmethod
Expand Down Expand Up @@ -438,9 +440,9 @@ def test_appending_to_large_archive(parallelization, tmpdir):

# Create a TAR large in size as well as file count
tarPath = os.path.join(tmpdir, "foo.tar")
with indexed_bzip2.open(findTestFile("tar-with-300-folders-with-1000-files-0B-files.tar.bz2")) as file, open(
tarPath, 'wb'
) as extracted:
with rapidgzip.IndexedBzip2File(
findTestFile("tar-with-300-folders-with-1000-files-0B-files.tar.bz2")
) as file, open(tarPath, 'wb') as extracted:
while True:
data = file.read(1024 * 1024)
if not data:
Expand Down
2 changes: 1 addition & 1 deletion ratarmount.py
Original file line number Diff line number Diff line change
Expand Up @@ -949,9 +949,9 @@ def __call__(self, parser, args, values, option_string=None):
("zstd", "/facebook/zstd/dev/LICENSE"),
("zlib", "/madler/zlib/master/LICENSE"),
("ratarmountcore", "/mxmlnkn/ratarmount/master/core/LICENSE"),
("indexed_bzip2", "/mxmlnkn/indexed_bzip2/master/LICENSE"),
("indexed_gzip", "/mxmlnkn/indexed_gzip/master/LICENSE"),
("indexed_zstd", "/martinellimarco/indexed_zstd/master/LICENSE"),
("rapidgzip", "/mxmlnkn/rapidgzip/master/LICENSE-MIT"),
]:
licenseUrl = "https://raw.githubusercontent.com" + githubPath
licenseContents = urllib.request.urlopen(licenseUrl).read().decode()
Expand Down
3 changes: 1 addition & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,12 @@ py_modules = ratarmount
install_requires =
ratarmountcore ~= 0.6.0
fusepy
indexed_bzip2 >= 1.3.1, < 2.0
indexed_gzip >= 1.6.3, < 2.0
indexed_zstd >= 1.3.1, < 2.0; sys_platform=="darwin"
indexed_zstd >= 1.2.2, < 2.0; platform_system!="Windows"
dataclasses; python_version < "3.7.0"
python-xz ~= 0.4.0
rapidgzip ~= 0.10.0
rapidgzip >= 0.11.1
rarfile ~= 4.0

[options.extras_require]
Expand Down
4 changes: 2 additions & 2 deletions tests/runtests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -822,9 +822,9 @@ benchmarkDecoderBackends()
printf '% 5s : ' "$compression"
case "$compression" in
bzip2)
python3 -m timeit 'from indexed_bzip2 import IndexedBzip2File as IBF; IBF( '"'$file'"' ).read();'
python3 -m timeit 'from rapidgzip import IndexedBzip2File as IBF; IBF( '"'$file'"' ).read();'
printf '% 5s : ' "pbz2"
python3 -m timeit 'from indexed_bzip2 import IndexedBzip2File as IBF; IBF( '"'$file'"', parallelization = 0 ).read();'
python3 -m timeit 'from rapidgzip import IndexedBzip2File as IBF; IBF( '"'$file'"', parallelization = 0 ).read();'
;;
gzip)
python3 -m timeit 'from indexed_gzip import IndexedGzipFile as IBF; IBF( '"'$file'"' ).read();'
Expand Down

0 comments on commit 1b4045a

Please sign in to comment.