Skip to content

Commit

Permalink
fixup! [wip] Add tests and documentation for IPFS, TODO: SMB, DropBox…
Browse files Browse the repository at this point in the history
…, Hf
  • Loading branch information
mxmlnkn committed Oct 12, 2024
1 parent 3ee588b commit c730b30
Show file tree
Hide file tree
Showing 4 changed files with 140 additions and 9 deletions.
26 changes: 20 additions & 6 deletions core/ratarmountcore/FSSpecMountSource.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@
except ImportError:
fsspec = None # type: ignore

try:
from webdav4.fsspec import WebdavFileSystem
except ImportError:
WebdavFileSystem = None # type: ignore


class FSSpecMountSource(MountSource):
"""
Expand Down Expand Up @@ -71,12 +76,16 @@ def __init__(self, urlOrFS, prefix: Optional[str] = None, **options) -> None:

# The fsspec filesystems are not uniform! http:// expects the arguments to isdir with prefixed
# protocol while other filesystem implementations are fine with only the path.
# https://github.com/ray-project/ray/issues/26423#issuecomment-1179561181
self._isHTTP = isinstance(self.fileSystem, fsspec.implementations.http.HTTPFileSystem)
# - https://github.com/ray-project/ray/issues/26423#issuecomment-1179561181
# - https://github.com/fsspec/filesystem_spec/issues/1713
# - https://github.com/skshetry/webdav4/issues/198
self._pathsRequireQuoting = isinstance(
self.fileSystem, (fsspec.implementations.http.HTTPFileSystem, WebdavFileSystem)
)
self.prefix = prefix.rstrip("/") if prefix and prefix.strip("/") and self.fileSystem.isdir(prefix) else ""

def _getPath(self, path: str) -> str:
if self._isHTTP:
if self._pathsRequireQuoting:
path = urllib.parse.quote(path)
if self.prefix:
if not path or path == "/":
Expand Down Expand Up @@ -110,9 +119,10 @@ def _convertToFileInfo(entry, path) -> FileInfo:
# They kinda work only like hardlinks.
# https://github.com/fsspec/filesystem_spec/issues/1679
# https://github.com/fsspec/filesystem_spec/issues/1680
size = entry.get('size', 0)
return FileInfo(
# fmt: off
size = entry.get('size', 0),
size = size if size else 0,
mtime = FSSpecMountSource._getModificationTime(entry),
mode = FSSpecMountSource._getMode(entry),
linkname = "",
Expand Down Expand Up @@ -181,10 +191,14 @@ def _listDir(self, path: str, onlyMode: bool) -> Optional[Union[Iterable[str], D
)
for entry in result
}
if self._isHTTP:

# For HTTPFileSystem, we need to filter out the entries for sorting.
# For WebDAV we do not even need to unquote! We get unquoted file names with ls!
if isinstance(self.fileSystem, fsspec.implementations.http.HTTPFileSystem):
return {
urllib.parse.unquote(name): info for name, info in result.items() if not name.startswith(('?', '#'))
}

return result

@overrides(MountSource)
Expand Down Expand Up @@ -214,7 +228,7 @@ def _getFileInfoHTTP(self, path: str) -> Optional[FileInfo]:

@overrides(MountSource)
def getFileInfo(self, path: str, fileVersion: int = 0) -> Optional[FileInfo]:
if self._isHTTP:
if isinstance(self.fileSystem, fsspec.implementations.http.HTTPFileSystem):
return self._getFileInfoHTTP(path)

path = self._getPath(path)
Expand Down
38 changes: 38 additions & 0 deletions core/ratarmountcore/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
# pylint: disable=no-member,abstract-method
# Disable pylint errors. See https://github.com/fsspec/filesystem_spec/issues/1678

import http
import os
import re
import stat
import sys
import traceback
Expand Down Expand Up @@ -255,6 +257,42 @@ def tryOpenURL(url, printDebug: int) -> Union[MountSource, IO[bytes], str]:
with warnings.catch_warnings():
warnings.simplefilter("ignore")
fileSystem, path = fsspec.url_to_fs(url)
elif protocol == 'webdav':
# This needs special handling because we need to decide between HTTP and HTTPS and because of:
# https://github.com/skshetry/webdav4/issues/197
try:
from webdav4.fsspec import WebdavFileSystem
except ImportError:
raise RatarmountError("Install the webdav4 Python package to mount webdav://.")

matchedURI = re.match("(?:([^:/]*):([^@/]*)@)?([^/]*)(.*)", splitURI[1])
if not matchedURI:
raise RatarmountError(
"Failed to match WebDAV URI of the format webdav://[user:password@]host[:port]/path\n"
"If your user name or password contains special characters such as ':/@', then use the environment "
"variables WEBDAV_USER and WEBDAV_PASSWORD to specify them."
)
username, password, baseURL, path = matchedURI.groups()
if path is None:
path = ""
if username is None and 'WEBDAV_USER' in os.environ:
username = os.environ.get('WEBDAV_USER')
if password is None and 'WEBDAV_PASSWORD' in os.environ:
password = os.environ.get('WEBDAV_PASSWORD')
auth = None if username is None or password is None else (username, password)

def checkForHTTPS(url):
try:
connection = http.client.HTTPSConnection(url, timeout=2)
connection.request("HEAD", "/")
return bool(connection.getresponse())
except Exception as exception:
if printDebug >= 3:
print("[Info] Determined WebDAV URL to not use HTTP instead HTTPS because of:", exception)
return False

transportProtocol = "https" if checkForHTTPS(baseURL) else "http"
fileSystem = WebdavFileSystem(f"{transportProtocol}://{baseURL}", auth=auth)
else:
fileSystem, path = fsspec.url_to_fs(url)

Expand Down
2 changes: 2 additions & 0 deletions tests/requirements-tests.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,5 @@ pyminizip
pyopenssl>=23
rangehttpserver
boto3
wsgidav
cheroot
83 changes: 80 additions & 3 deletions tests/runtests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2115,7 +2115,7 @@ checkURLProtocolS3()

weedFolder=$( mktemp -d --suffix .test.ratarmount )
TMP_FILES_TO_CLEANUP+=( "$weedFolder" )
./weed server -dir="$weedFolder" -s3 -s3.port "$port" -idleTimeout=30 -ip 127.0.0.1 2>weed.log &
./weed server -dir="$weedFolder" -s3 -s3.port "$port" -idleTimeout=30 -ip 127.0.0.1 &
pid=$!

# Wait for port to open
Expand Down Expand Up @@ -2209,6 +2209,13 @@ checkURLProtocolSamba()

checkURLProtocolIPFS()
{
# TODO ipfsspec still fails to import with Python 3.14
# https://github.com/eigenein/protobuf/issues/177
python3MinorVersion=$( python3 -c 'import sys; print(sys.version_info.minor)' )
if [[ -n "$python3MinorVersion" && "$python3MinorVersion" -ge 14 ]]; then
return 0
fi

# Using impacket/examples/smbserver.py does not work for a multidude of reasons.
# Therefore set up a server with tests/install-smbd.sh from outside and check for its existence here.
local ipfs
Expand All @@ -2222,9 +2229,11 @@ checkURLProtocolIPFS()
ipfs=kubo/ipfs
fi

local pid=
$ipfs init --profile server
if ! pgrep ipfs; then
$ipfs daemon &
pid=$!
sleep 5
fi

Expand All @@ -2240,6 +2249,75 @@ checkURLProtocolIPFS()
d3b07384d113edec49eaa6238ad5ff00 || returnError "$LINENO" 'Failed to read from IPFS'
checkFileInTARForeground "ipfs://QmZwm9gKZaayGWqYtMgj6cd4JaNK1Yp2ChYZhXrERGq4Gi" single-file.tar \
1a28538854d1884e4415cb9bfb7a2ad8 || returnError "$LINENO" 'Failed to read from IPFS'

if [[ -n "$pid" ]]; then kill "$pid"; fi
}


checkURLProtocolWebDAV()
{
if ! pip show wsgidav &>/dev/null; then
echoerr "Skipping WebDAV test because wsigdav package is not installed."
return 0
fi

local port=8047
# BEWARE OF LOOP MOUNTS when testing locally!
# It will time out, when trying to expose PWD via WebDAV while mounting into PWD/mounted.
wsgidav --host=127.0.0.1 --port=8047 --root="$PWD" --auth=anonymous &
local pid=$!
sleep 5

checkFileInTARForeground "webdav://127.0.0.1:$port/single-file.tar" bar \
d3b07384d113edec49eaa6238ad5ff00 || returnError "$LINENO" 'Failed to read from WebDAV server'
checkFileInTARForeground "webdav://127.0.0.1:$port" single-file.tar \
1a28538854d1884e4415cb9bfb7a2ad8 || returnError "$LINENO" 'Failed to read from WebDAV server'

kill "$pid"

local user password
user='pqvfumqbqp'
password='ioweb123GUIweb'

cat <<EOF > wsgidav-config.yaml
http_authenticator:
domain_controller: null # Same as wsgidav.dc.simple_dc.SimpleDomainController
accept_basic: true # Pass false to prevent sending clear text passwords
accept_digest: true
default_to_digest: true
simple_dc:
user_mapping:
"*":
"$user":
password: "$password"
EOF

wsgidav --host=127.0.0.1 --port=8047 --root="$PWD" --config=wsgidav-config.yaml &
pid=$!
sleep 5

checkFileInTARForeground "webdav://$user:$password@127.0.0.1:$port/single-file.tar" bar \
d3b07384d113edec49eaa6238ad5ff00 || returnError "$LINENO" 'Failed to read from WebDAV server'
checkFileInTARForeground "webdav://$user:$password@127.0.0.1:$port" single-file.tar \
1a28538854d1884e4415cb9bfb7a2ad8 || returnError "$LINENO" 'Failed to read from WebDAV server'

export WEBDAV_USER=$user
export WEBDAV_PASSWORD=$password
checkFileInTARForeground "webdav://127.0.0.1:$port/single-file.tar" bar \
d3b07384d113edec49eaa6238ad5ff00 || returnError "$LINENO" 'Failed to read from WebDAV server'
checkFileInTARForeground "webdav://127.0.0.1:$port" single-file.tar \
1a28538854d1884e4415cb9bfb7a2ad8 || returnError "$LINENO" 'Failed to read from WebDAV server'
unset WEBDAV_USER
unset WEBDAV_PASSWORD

# This server using SSL also works, but do not overload it with regular tests.
# ratarmount 'webdav://www.dlp-test.com\WebDAV:[email protected]/webdav' mounted
# checkFileInTARForeground "webdav://www.dlp-test.com\WebDAV:[email protected]/webdav" \
# mounted/WebDAV_README.txt 87d13914fe24e486be943cb6b1f4e224 ||
# returnError "$LINENO" 'Failed to read from WebDAV server'

kill "$pid"
}


Expand All @@ -2259,8 +2337,7 @@ checkRemoteSupport()
checkURLProtocolSSH || returnError 'Failed ssh:// check'

checkURLProtocolSamba || returnError 'Failed smb:// check'
# TODO Add and test IPFS
# TODO look for other fsspec implementations in an automated manner
checkURLProtocolWebDAV || returnError 'Failed webdav:// check'
}


Expand Down

0 comments on commit c730b30

Please sign in to comment.