Skip to content

Commit

Permalink
[wip][feature] Add support for FAT images via PyFatFS
Browse files Browse the repository at this point in the history
  • Loading branch information
mxmlnkn committed Sep 15, 2024
1 parent 8526497 commit 8b3ca1a
Show file tree
Hide file tree
Showing 12 changed files with 192 additions and 4 deletions.
5 changes: 5 additions & 0 deletions AppImage/build-ratarmount-appimage.sh
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,11 @@ function installAppImagePythonPackages()
else
"$APP_PYTHON_BIN" -I -m pip install --no-cache-dir rapidgzip
fi

# https://github.com/nathanhi/pyfatfs/issues/41
"$APP_PYTHON_BIN" -I -m pip install --no-cache-dir \
'git+https://github.com/mxmlnkn/pyfatfs.git@master#egginfo=pyfatfs'

"$APP_PYTHON_BIN" -I -m pip install --no-cache-dir ../core
"$APP_PYTHON_BIN" -I -m pip install --no-cache-dir ..[full]
}
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ And in contrast to [tarindexer](https://github.com/devsnd/tarindexer), which als
- **Rar** as provided by [rarfile](https://github.com/markokr/rarfile) by Marko Kreen. See also the [RAR 5.0 archive format](https://www.rarlab.com/technote.htm).
- **SquashFS, AppImage, Snap** as provided by [PySquashfsImage](https://github.com/matteomattei/PySquashfsImage) by Matteo Mattei. There seems to be no authoritative, open format specification, only [this nicely-done reverse-engineered description](https://dr-emann.github.io/squashfs/squashfs.html), I assume based on the [source code](https://github.com/plougher/squashfs-tools). Note that [Snaps](https://snapcraft.io/docs/the-snap-format) and [Appimages](https://github.com/AppImage/AppImageSpec/blob/master/draft.md#type-2-image-format) are both SquashFS images, with an executable prepended for AppImages.
- **Zip** as provided by [zipfile](https://docs.python.org/3/library/zipfile.html), which is distributed with Python itself. See also the [ZIP File Format Specification](https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT).
- **FAT12/FAT16/FAT32/VFAT** as provided by [PyFatFS](https://github.com/nathanhi/pyfatfs) by Nathan-J. Hirschauer. See also [Microsoft's FAT32 File System Specification](https://download.microsoft.com/download/1/6/1/161ba512-40e2-4cc9-843a-923143f3456c/fatgen103.doc).
- **Many Others** as provided by [libarchive](https://github.com/libarchive/libarchive) via [python-libarchive-c](https://github.com/Changaco/python-libarchive-c).
- Formats with tests:
[7z](https://github.com/ip7z/7zip/blob/main/DOC/7zFormat.txt),
Expand Down
2 changes: 2 additions & 0 deletions core/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ full = [
"python-lzo ~= 1.0",
"zstandard ~= 0.22.0",
"isal ~= 1.0",
'pyfatfs ~= 1.0; python_version < "3.12.0"',
]
bzip2 = ["rapidgzip >= 0.13.1"]
gzip = ["indexed_gzip >= 1.6.3, < 2.0"]
Expand All @@ -80,6 +81,7 @@ zstd = [
'indexed_zstd >= 1.2.2, < 2.0; platform_system!="Windows"',
]
7z = ["libarchive-c ~= 5.1, < 6.0"]
fat = ["pyfatfs ~= 1.0"]

[tool.setuptools]
license-files = [
Expand Down
106 changes: 106 additions & 0 deletions core/ratarmountcore/FATMountSource.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import errno
import os
import stat
from typing import Dict, IO, Iterable, Optional, Union

from .MountSource import FileInfo, MountSource
from .utils import overrides

try:
from pyfatfs.FatIO import FatIO
from pyfatfs.PyFat import PyFat
from pyfatfs import PyFATException
except ImportError:
FatIO = None # type: ignore
PyFat = None # type: ignore


class FATMountSource(MountSource):
def __init__(self, fileOrPath: Union[str, IO[bytes]], **options) -> None:
self.fileSystem = PyFat()
if isinstance(fileOrPath, str):
# TODO Probably good idea for performance on Lustre to open the file unbuffered.
self.fileSystem.open(fileOrPath, read_only=True)
else:
self.fileSystem.set_fp(fileOrPath)
self.options = options

@staticmethod
def _convertFATDirectoryEntryToFileInfo(entry, path) -> FileInfo:
"""
info: of type pyfatfs.FATDirectoryEntry.FATDirectoryEntry.
"""
mode = 0o555 | (stat.S_IFDIR if entry.is_directory() else stat.S_IFREG)

return FileInfo(
# fmt: off
size = entry.filesize,
mtime = entry.get_mtime().timestamp(),
mode = mode,
linkname = "", # FAT has no support for hard or symbolic links
uid = os.getuid(),
gid = os.getgid(),
userdata = [path],
# fmt: on
)

@overrides(MountSource)
def isImmutable(self) -> bool:
return True

@overrides(MountSource)
def exists(self, path: str) -> bool:
try:
self.fileSystem.root_dir.get_entry(path)
except PyFATException as exception:
if exception.errno == errno.ENOENT:
return False
raise exception
return True

def _listDir(self, path: str) -> Optional[Iterable]:
try:
directories, files, _ = self.fileSystem.root_dir.get_entry(os.path.normpath(path)).get_entries()
except PyFATException as exception:
if exception.errno in [errno.ENOENT, errno.ENOTDIR]:
return None
raise exception
return [str(entry) for entry in directories + files]

@overrides(MountSource)
def listDir(self, path: str) -> Optional[Union[Iterable[str], Dict[str, FileInfo]]]:
# TODO I think with the low-level API, we could also get the FileInfos
return self._listDir(path)

@overrides(MountSource)
def listDirModeOnly(self, path: str) -> Optional[Union[Iterable[str], Dict[str, int]]]:
# TODO I think with the low-level API, we could also get the FileInfos
return self._listDir(path)

@overrides(MountSource)
def getFileInfo(self, path: str, fileVersion: int = 0) -> Optional[FileInfo]:
try:
entry = self.fileSystem.root_dir.get_entry(path)
except PyFATException as exception:
if exception.errno in [errno.ENOTDIR, errno.ENOENT]:
return None
raise exception
return self._convertFATDirectoryEntryToFileInfo(entry, path)

@overrides(MountSource)
def fileVersions(self, path: str) -> int:
return 1

@overrides(MountSource)
def open(self, fileInfo: FileInfo, buffering=-1) -> IO[bytes]:
path = fileInfo.userdata[-1]
assert isinstance(path, str)
# TODO There is no option in FatIO to configure the buffering yet.
return FatIO(self.fileSystem, path)

@overrides(MountSource)
def __exit__(self, exception_type, exception_value, exception_traceback):
self.fileSystem.close()
2 changes: 1 addition & 1 deletion core/ratarmountcore/MountSource.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def getMountSource(self, fileInfo: FileInfo):
"""
return '/', self, fileInfo

def exists(self, path: str):
def exists(self, path: str) -> bool:
return self.getFileInfo(path) is not None

def isdir(self, path: str):
Expand Down
37 changes: 37 additions & 0 deletions core/ratarmountcore/compressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,13 @@
except ImportError:
PySquashfsImage = None

try:
import pyfatfs
from pyfatfs import PyFat
except ImportError:
pyfatfs = None
PyFat = None


CompressionModuleInfo = collections.namedtuple('CompressionModuleInfo', ['name', 'open'])
# Defining lambdas does not yet check the names of entities used inside the lambda!
Expand Down Expand Up @@ -238,6 +245,36 @@ def findSquashFSOffset(fileObject, maxSkip=1024 * 1024) -> int:
)


def isFATImage(fileObject) -> bool:
offset = fileObject.tell()
try:
fs = PyFat.PyFat()
# TODO Avoid possibly slow full FAT parsing here. Only do some quick checks such as PyFatFS.PyFat.parse_header
# Calling __set_fp instead of set_fp avoids that but it is not part of the public interface per convention!
fs._PyFat__set_fp(fileObject)
fs.is_read_only = True
try:
fs.parse_header()
return True
except (pyfatfs.PyFATException, ValueError):
return False
finally:
# Reset file object so that it does not get closed! Cannot be None because that is checked.
fs._PyFat__fp = io.BytesIO()

finally:
fileObject.seek(offset)


if 'pyfatfs' in sys.modules and isinstance(pyfatfs, types.ModuleType):
ARCHIVE_FORMATS['fat'] = CompressionInfo(
['fat', 'img', 'dd', 'fat12', 'fat16', 'fat32', 'raw'],
[],
[CompressionModuleInfo('pyfatfs', lambda x: x)],
lambda x: isFATImage(x),
)


# libarchive support is split into filters (compressors or encoders working on a single file) and (archive) formats.
# For now, only list formats here that are not supported by other backends, because libarchive is slower anyway.
LIBARCHIVE_FILTER_FORMATS: Dict[str, CompressionInfo] = {}
Expand Down
22 changes: 21 additions & 1 deletion core/ratarmountcore/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,18 @@

from typing import IO, Optional, Union

from .compressions import checkForSplitFile, libarchive, PySquashfsImage, rarfile, TAR_COMPRESSION_FORMATS, zipfile
from .compressions import (
checkForSplitFile,
libarchive,
PySquashfsImage,
pyfatfs,
rarfile,
TAR_COMPRESSION_FORMATS,
zipfile,
)
from .utils import CompressionError, RatarmountError
from .MountSource import MountSource
from .FATMountSource import FATMountSource
from .FolderMountSource import FolderMountSource
from .RarMountSource import RarMountSource
from .SingleFileMountSource import SingleFileMountSource
Expand Down Expand Up @@ -94,12 +103,23 @@ def _openPySquashfsImage(fileOrPath: Union[str, IO[bytes]], **options) -> Option
return None


def _openFATImage(fileOrPath: Union[str, IO[bytes]], **options) -> Optional[MountSource]:
try:
if pyfatfs is not None:
return FATMountSource(fileOrPath, **options)
finally:
if hasattr(fileOrPath, 'seek'):
fileOrPath.seek(0) # type: ignore
return None


_BACKENDS = {
"rarfile": _openRarMountSource,
"tarfile": _openTarMountSource,
"zipfile": _openZipMountSource,
"pysquashfsimage": _openPySquashfsImage,
"libarchive": _openLibarchiveMountSource,
"pyfatfs": _openFATImage,
}


Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ dependencies = [
# Need >= 4.1 because of https://github.com/markokr/rarfile/issues/73
"rarfile ~= 4.1",
"libarchive-c ~= 5.1, < 6.0",
'pyfatfs ~= 1.0; python_version < "3.12.0"',
]

[project.readme]
Expand Down
8 changes: 8 additions & 0 deletions tests/create-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -388,3 +388,11 @@ true > zeros-32-MiB.txt; for i in $( seq $(( 32 * 1024 )) ); do printf '%01023d\
true > spaces-32-MiB.txt; for i in $( seq $(( 32 * 1024 )) ); do printf '%1023s' $'\n' >> spaces-32-MiB.txt; done
true > zeros-32-MiB.txt; for i in $( seq $(( 32 * 1024 )) ); do printf '%01022d\n' 0 >> zeros-32-MiB.txt; done
7z a two-large-files-32Ki-lines-each-1023B.7z spaces-32-MiB.txt zeros-32-MiB.txt

# Would be nice to have this without sudo, but I don't want to create test cases with the same program being tested.
head -c $(( 1024 * 1024 )) /dev/zero > 'folder-symlink.fat'
mkfs.fat 'folder-symlink.fat'
mkdir mounted
sudo mount 'folder-symlink.fat' mounted
( cd mounted && sudo unzip ../tests/folder-symlink.zip )
sudo umount mounted
Binary file added tests/folder-symlink.fat12.bz2
Binary file not shown.
4 changes: 2 additions & 2 deletions tests/ratarmount-help.txt

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions tests/runtests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1964,6 +1964,14 @@ pytestedTests+=(
)
fi

# pyfatfs depends on PyFilesystem2, which only works for Python < 3.12 because of the removed pkg_resources.
# https://github.com/nathanhi/pyfatfs/issues/41
if [[ -n "$python3MinorVersion" && "$python3MinorVersion" -lt 12 ]]; then
tests+=(
2709a3348eb2c52302a7606ecf5860bc tests/folder-symlink.fat12.bz2 folder-symlink.fat12/foo/fighter/ufo
)
fi

tests+=(
f47c75614087a8dd938ba4acff252494 tests/simple-file-split.001 simple-file-split
f47c75614087a8dd938ba4acff252494 tests/simple-file-split.002 simple-file-split
Expand Down

0 comments on commit 8b3ca1a

Please sign in to comment.