From 79fbf801c5908f4d1d9bc90004b74cfaaeeed2df Mon Sep 17 00:00:00 2001 From: Min RK Date: Fri, 31 May 2024 03:27:40 +0200 Subject: [PATCH] Merge pull request from GHSA-hrw6-wg82-cm62 * filefind: avoid handling absolute paths we don't need or want absolute path support, which we inherited from generic ipython_genutils only supporting relative paths lets us avoid attempting to accessing files we know we won't accept * Apply suggestions from code review Co-authored-by: M Bussonnier * filefind: only accept Sequence[str] we only call it one place, might as well be simple about it * version_info gate for is_relative_to * clarify docstring Co-authored-by: Carol Willing --------- Co-authored-by: M Bussonnier Co-authored-by: Carol Willing --- jupyter_server/utils.py | 99 +++++++++++++++++------------------------ tests/test_utils.py | 39 ++++++++++++++++ 2 files changed, 81 insertions(+), 57 deletions(-) diff --git a/jupyter_server/utils.py b/jupyter_server/utils.py index e9225b746d..9a1bd89afb 100644 --- a/jupyter_server/utils.py +++ b/jupyter_server/utils.py @@ -11,6 +11,7 @@ import sys import warnings from contextlib import contextmanager +from pathlib import Path from typing import Any, Generator, NewType, Sequence from urllib.parse import ( SplitResult, @@ -338,81 +339,65 @@ def is_namespace_package(namespace: str) -> bool | None: return isinstance(spec.submodule_search_locations, _NamespacePath) -def filefind(filename: str, path_dirs: Sequence[str] | str | None = None) -> str: +def filefind(filename: str, path_dirs: Sequence[str]) -> str: """Find a file by looking through a sequence of paths. - This iterates through a sequence of paths looking for a file and returns - the full, absolute path of the first occurrence of the file. If no set of - path dirs is given, the filename is tested as is, after running through - :func:`expandvars` and :func:`expanduser`. Thus a simple call:: - filefind("myfile.txt") + For use in FileFindHandler. - will find the file in the current working dir, but:: + Iterates through a sequence of paths looking for a file and returns + the full, absolute path of the first occurrence of the file. - filefind("~/myfile.txt") + Absolute paths are not accepted for inputs. - Will find the file in the users home directory. This function does not - automatically try any paths, such as the cwd or the user's home directory. + This function does not automatically try any paths, + such as the cwd or the user's home directory. Parameters ---------- filename : str - The filename to look for. - path_dirs : str, None or sequence of str - The sequence of paths to look for the file in. If None, the filename - need to be absolute or be in the cwd. If a string, the string is - put into a sequence and the searched. If a sequence, walk through - each element and join with ``filename``, calling :func:`expandvars` - and :func:`expanduser` before testing for existence. + The filename to look for. Must be a relative path. + path_dirs : sequence of str + The sequence of paths to look in for the file. + Walk through each element and join with ``filename``. + Only after ensuring the path resolves within the directory is it checked for existence. Returns ------- - Raises :exc:`IOError` or returns absolute path to file. + Raises :exc:`OSError` or returns absolute path to file. """ - - # If paths are quoted, abspath gets confused, strip them... - filename = filename.strip('"').strip("'") - # If the input is an absolute path, just check it exists - if os.path.isabs(filename) and os.path.isfile(filename): - return filename - - if path_dirs is None: - path_dirs = ("",) - elif isinstance(path_dirs, str): - path_dirs = (path_dirs,) - - for path in path_dirs: - if path == ".": - path = os.getcwd() # noqa: PLW2901 - testname = expand_path(os.path.join(path, filename)) - if os.path.isfile(testname): - return os.path.abspath(testname) + file_path = Path(filename) + + # If the input is an absolute path, reject it + if file_path.is_absolute(): + msg = f"{filename} is absolute, filefind only accepts relative paths." + raise OSError(msg) + + for path_str in path_dirs: + path = Path(path_str).absolute() + test_path = path / file_path + # os.path.abspath resolves '..', but Path.absolute() doesn't + # Path.resolve() does, but traverses symlinks, which we don't want + test_path = Path(os.path.abspath(test_path)) + if sys.version_info >= (3, 9): + if not test_path.is_relative_to(path): + # points outside root, e.g. via `filename='../foo'` + continue + else: + # is_relative_to is new in 3.9 + try: + test_path.relative_to(path) + except ValueError: + # points outside root, e.g. via `filename='../foo'` + continue + # make sure we don't call is_file before we know it's a file within a prefix + # GHSA-hrw6-wg82-cm62 - can leak password hash on windows. + if test_path.is_file(): + return os.path.abspath(test_path) msg = f"File {filename!r} does not exist in any of the search paths: {path_dirs!r}" raise OSError(msg) -def expand_path(s: str) -> str: - """Expand $VARS and ~names in a string, like a shell - - :Examples: - In [2]: os.environ['FOO']='test' - In [3]: expand_path('variable FOO is $FOO') - Out[3]: 'variable FOO is test' - """ - # This is a pretty subtle hack. When expand user is given a UNC path - # on Windows (\\server\share$\%username%), os.path.expandvars, removes - # the $ to get (\\server\share\%username%). I think it considered $ - # alone an empty var. But, we need the $ to remains there (it indicates - # a hidden share). - if os.name == "nt": - s = s.replace("$\\", "IPYTHON_TEMP") - s = os.path.expandvars(os.path.expanduser(s)) - if os.name == "nt": - s = s.replace("IPYTHON_TEMP", "$\\") - return s - - def import_item(name: str) -> Any: """Import and return ``bar`` given the string ``foo.bar``. Calling ``bar = import_item("foo.bar")`` is the functional equivalent of diff --git a/tests/test_utils.py b/tests/test_utils.py index 83d2a1d926..fdbfd60587 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -13,6 +13,7 @@ from jupyter_server.utils import ( check_pid, check_version, + filefind, is_namespace_package, path2url, run_sync_in_loop, @@ -125,3 +126,41 @@ def test_unix_socket_in_use(tmp_path): sock.listen(0) assert unix_socket_in_use(server_address) sock.close() + + +@pytest.mark.parametrize( + "filename, result", + [ + ("/foo", OSError), + ("../c/in-c", OSError), + ("in-a", "a/in-a"), + ("in-b", "b/in-b"), + ("in-both", "a/in-both"), + (r"\in-a", OSError), + ("not-found", OSError), + ], +) +def test_filefind(tmp_path, filename, result): + a = tmp_path / "a" + a.mkdir() + b = tmp_path / "b" + b.mkdir() + c = tmp_path / "c" + c.mkdir() + for parent in (a, b): + with parent.joinpath("in-both").open("w"): + pass + with a.joinpath("in-a").open("w"): + pass + with b.joinpath("in-b").open("w"): + pass + with c.joinpath("in-c").open("w"): + pass + + if isinstance(result, str): + found = filefind(filename, [str(a), str(b)]) + found_relative = Path(found).relative_to(tmp_path) + assert str(found_relative) == result + else: + with pytest.raises(result): + filefind(filename, [str(a), str(b)])