diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 02350a36..fefb5448 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -124,7 +124,18 @@ jobs: # https://github.com/libarchive/libarchive/blob/ad5a0b542c027883d7069f6844045e6788c7d70c/libarchive/ # archive_read_support_filter_lrzip.c#L68 sudo apt-get -y install libfuse2 fuse3 bzip2 pbzip2 pixz zstd unar lrzip lzop gcc liblzo2-dev - set -x + + - name: Install Dependencies For Unreleased Python Versions (Linux) + if: > + startsWith( matrix.os, 'ubuntu' ) && ( + matrix.python-version == '3.13.0-rc.2' || + matrix.python-version == '3.14.0-alpha.0') + run: | + #libgit2-dev is too old on Ubuntu 22.04. Leads to error about missing git2/sys/errors.h + #sudo apt-get -y install libgit2-dev + sudo apt-get -y install cmake + git clone --branch v1.8.1 --depth 1 https://github.com/libgit2/libgit2.git + ( cd libgit2 && mkdir build && cd build && cmake .. && cmake --build . && sudo cmake --build . -- install ) - name: Install Dependencies (MacOS) if: startsWith( matrix.os, 'macos' ) @@ -139,6 +150,14 @@ jobs: # Add brew installation binary folder to PATH so that command line tools like zstd can be found export PATH="$PATH:/usr/local/bin" + - name: Install Dependencies For Unreleased Python Versions (MacOS) + if: > + startsWith( matrix.os, 'macos' ) && ( + matrix.python-version == '3.13.0-rc.2' || + matrix.python-version == '3.14.0-alpha.0') + run: | + brew install libgit2 + - name: Install pip Dependencies run: | python3 -m pip install --upgrade pip diff --git a/AppImage/build-ratarmount-appimage.sh b/AppImage/build-ratarmount-appimage.sh index 362fe2a4..e712db1d 100644 --- a/AppImage/build-ratarmount-appimage.sh +++ b/AppImage/build-ratarmount-appimage.sh @@ -62,6 +62,18 @@ function installAppImagePythonPackages() fi "$APP_PYTHON_BIN" -I -m pip install --no-cache-dir ../core "$APP_PYTHON_BIN" -I -m pip install --no-cache-dir ..[full] + + # ratarmount-0.10.0-manylinux2014_x86_64.AppImage (the first one!) was 13.6 MB + # ratarmount-v0.11.3-manylinux2014_x86_64.AppImage was 13.6 MB + # ratarmount-0.12.0-manylinux2014_x86_64.AppImage was 26.3 MB thanks to an error with the trime-down script. + # ratarmount-0.15.0-x86_64.AppImage was 14.8 MB + # ratarmount-0.15.1-x86_64.AppImage was 13.3 MB (manylinux_2014) + # ratarmount-0.15.2-x86_64.AppImage was 11.7 MB (manylinux_2_28) + # At this point, with pyfatfs, the AppImage is/was 13.0 MB. Extracts to 45.1 MB + # This bloats the AppImage to 23.7 MB, which is still ok, I guess. Extracts to 83.1 MB + "$APP_PYTHON_BIN" -I -m pip install --no-cache-dir requests aiohttp sshfs smbprotocol pygit2<1.15 fsspec + # This bloats the AppImage to 38.5 MB :/. Extracts to 121.0 MB + "$APP_PYTHON_BIN" -I -m pip install --no-cache-dir s3fs gcsfs adlfs dropboxdrivefs } function installAppImageSystemLibraries() diff --git a/README.md b/README.md index 257494f9..4107acc4 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,10 @@ And in contrast to [tarindexer](https://github.com/devsnd/tarindexer), which als - **Union Mounting:** Multiple TARs, compressed files, and bind mounted folders can be mounted under the same mountpoint. - **Write Overlay:** A folder can be specified as write overlay. All changes below the mountpoint will be redirected to this folder and deletions are tracked so that all changes can be applied back to the archive. + - **Remote Files and Folders:** A remote archive or whole folder structure can be mounted similar to tools like [sshfs](https://github.com/libfuse/sshfs) thanks to the [filesystem_spec](https://github.com/fsspec/filesystem_spec) project. + These can be specified with URIs as explained in the section ["Remote Files"](#remote-files). + Supported remote protocols include: FTP, SFTP, HTTP, HTTPS, SSH, Git, Github, S3, Samba, Azure Datalake, Dropbox, Google Cloud Storage, ... + *TAR compressions supported for random access:* @@ -102,7 +106,9 @@ And in contrast to [tarindexer](https://github.com/devsnd/tarindexer), which als 4. [File versions](#file-versions) 5. [Compressed non-TAR files](#compressed-non-tar-files) 6. [Xz and Zst Files](#xz-and-zst-files) - 7. [As a Library](#as-a-library) + 7. [Remote Files](#remote-files) + 8. [Writable Mounting](#writable-mounting) + 9. [As a Library](#as-a-library) # Installation @@ -506,6 +512,29 @@ lbzip2 -cd well-compressed-file.bz2 | createMultiFrameZstd $(( 4*1024*1024 )) > +# Remote Files + +The [fsspec](https://github.com/fsspec/filesystem_spec) API backend adds suport for mounting many remote archive or folders: + + - `git://[path-to-repo:][ref@]path/to/file` + Uses the current path if no repository path is specified. + - `github://org:repo@[sha]/path-to/file-or-folder` + E.g. github://mxmlnkn:ratarmount@v0.15.2/tests/single-file.tar + - `http[s]://hostname[:port]/path-to/archive.rar` + - `s3://[endpoint-hostname[:port]]/bucket/single-file.tar` + Will default to AWS according to the Boto3 library defaults + when no endpoint is specified. Boto3 will check these environment + variables for credentials: + - `AWS_ACCESS_KEY_ID` + - `AWS_SECRET_ACCESS_KEY` + - `AWS_SESSION_TOKEN` + - `[s]ftp://[user[:password]@]hostname[:port]/path-to/archive.rar` + - `ssh://[user[:password]@]hostname[:port]/path-to/archive.rar` + - `smb://[workgroup;][user:password@]server[:port]/share/folder/file.tar` + +Many others fsspec-based projects may also work when installed. + + # Writable Mounting The `--write-overlay ` option can be used to create a writable mount point. diff --git a/core/pyproject.toml b/core/pyproject.toml index 821bd38c..7e118ab4 100644 --- a/core/pyproject.toml +++ b/core/pyproject.toml @@ -72,9 +72,51 @@ full = [ # With Python 3.14, when building the wheel, I get: # /usr/bin/ld: cannot find /tmp/tmpcuw21d78/bin/isa-l.a: No such file or directory 'isal ~= 1.0; python_version < "3.14.0"', + # Pin to < 3.12 because of https://github.com/nathanhi/pyfatfs/issues/41 + 'pyfatfs ~= 1.0; python_version < "3.12.0"', + # fsspec: + "requests", + "aiohttp", + "sshfs", + # Need newer pyopenssl than comes with Ubuntu 22.04. + # https://github.com/ronf/asyncssh/issues/690 + "pyopenssl>=23", + "smbprotocol", + # pygit2 1.15 introduced many breaking changes! + # https://github.com/libgit2/pygit2/issues/1316 + # https://github.com/fsspec/filesystem_spec/pull/1703 + "pygit2<1.15", + "fsspec", + "s3fs", + "gcsfs", + "adlfs", + "dropboxdrivefs", ] bzip2 = ["rapidgzip >= 0.13.1"] gzip = ["indexed_gzip >= 1.6.3, < 2.0"] +fsspec = [ + # Copy-pasted from fsspec[full] list. Some were excluded because they are too unproportionally large. + "requests", + "aiohttp", + "sshfs", + # Need newer pyopenssl than comes with Ubuntu 22.04. + # https://github.com/ronf/asyncssh/issues/690 + "pyopenssl>=23", + "smbprotocol", # build error in Python 3.13 + # pygit2 1.15 introduced many breaking changes! + # https://github.com/libgit2/pygit2/issues/1316 + # https://github.com/fsspec/filesystem_spec/pull/1703 + "pygit2<1.15", # build error in Python 3.13 because it requires libgit2 1.8.1 + "fsspec", + "s3fs", + "gcsfs", + "adlfs", # build error in Python 3.13 + "dropboxdrivefs", + # "dask", "distributed" : ~34 MB, ~10 MB gzip-compressed + # "pyarrow >= 1" : ~196 MB, ~60 MB gzip-compressed, build error in Python 3.13 + # "ocifs" : ~350 MB + # "panel" : only for fsspec GUI +] # Need >= 4.1 because of https://github.com/markokr/rarfile/issues/73 rar = ["rarfile ~= 4.1"] # For now, only optional (and installed in the AppImage) because it is unstable and depends on many other packages diff --git a/core/ratarmountcore/SQLiteIndex.py b/core/ratarmountcore/SQLiteIndex.py index c94ad1e4..40e7f3e3 100644 --- a/core/ratarmountcore/SQLiteIndex.py +++ b/core/ratarmountcore/SQLiteIndex.py @@ -183,6 +183,7 @@ def __init__( preferMemory: bool = False, indexMinimumFileCount: int = 0, backendName: str = '', + ignoreCurrentFolder: bool = False, ): """ indexFilePath @@ -206,6 +207,9 @@ def __init__( exceeded. It may also be written to a file if a gzip index is stored. backendName The backend name to be stored as metadata and to determine compatibility of found indexes. + ignoreCurrentFolder + If true, then do not store the index into the current path. This was introduced for URL + opened as file objects but may be useful for any archive given via a file object. """ if not backendName: @@ -217,7 +221,7 @@ def __init__( self.indexFilePath: Optional[str] = None self.encoding = encoding self.possibleIndexFilePaths = SQLiteIndex.getPossibleIndexFilePaths( - indexFilePath, indexFolders, archiveFilePath + indexFilePath, indexFolders, archiveFilePath, ignoreCurrentFolder ) # stores which parent folders were last tried to add to database and therefore do exist self.parentFolderCache: List[Tuple[str, str]] = [] @@ -247,7 +251,10 @@ def __init__( @staticmethod def getPossibleIndexFilePaths( - indexFilePath: Optional[str], indexFolders: Optional[List[str]] = None, archiveFilePath: Optional[str] = None + indexFilePath: Optional[str], + indexFolders: Optional[List[str]] = None, + archiveFilePath: Optional[str] = None, + ignoreCurrentFolder: bool = False, ) -> List[str]: if indexFilePath: return [] if indexFilePath == ':memory:' else [os.path.abspath(os.path.expanduser(indexFilePath))] @@ -265,7 +272,7 @@ def getPossibleIndexFilePaths( if folder: indexPath = os.path.join(folder, indexPathAsName) possibleIndexFilePaths.append(os.path.abspath(os.path.expanduser(indexPath))) - else: + elif not ignoreCurrentFolder: possibleIndexFilePaths.append(defaultIndexFilePath) return possibleIndexFilePaths @@ -563,6 +570,9 @@ def reloadIndexReadOnly(self): self.sqlConnection = SQLiteIndex._openSqlDb(f"file:{uriPath}?mode=ro", uri=True, check_same_thread=False) def _reloadIndexOnDisk(self): + if self.printDebug >= 2: + print("[Info] Try to reopen SQLite database on disk at:", self.indexFilePath) + print("other index paths:", self.possibleIndexFilePaths) if not self.indexFilePath or self.indexFilePath != ':memory:' or not self.sqlConnection: return diff --git a/core/ratarmountcore/SQLiteIndexedTar.py b/core/ratarmountcore/SQLiteIndexedTar.py index 45bb2dc9..3a776893 100644 --- a/core/ratarmountcore/SQLiteIndexedTar.py +++ b/core/ratarmountcore/SQLiteIndexedTar.py @@ -704,6 +704,7 @@ def __init__( self.tarFileName = tarFileName else: raise RatarmountError("At least one of tarFileName and fileObject arguments should be set!") + self._fileNameIsURL = re.match('[A-Za-z0-9]*://', self.tarFileName) is not None # If no fileObject given, then self.tarFileName is the path to the archive to open. if not fileObject: @@ -771,16 +772,19 @@ def __init__( if indexFolders and isinstance(indexFolders, str): indexFolders = [indexFolders] + archiveFilePath = self.tarFileName if not self.isFileObject or self._fileNameIsURL else None + super().__init__( SQLiteIndex( indexFilePath, indexFolders=indexFolders, - archiveFilePath=None if self.isFileObject else self.tarFileName, + archiveFilePath=archiveFilePath, encoding=self.encoding, checkMetadata=self._checkMetadata, printDebug=self.printDebug, indexMinimumFileCount=indexMinimumFileCount, backendName='SQLiteIndexedTar', + ignoreCurrentFolder=self.isFileObject and self._fileNameIsURL, ), clearIndexCache=clearIndexCache, ) @@ -829,9 +833,9 @@ def __init__( # Open new database when we didn't find an existing one. if not self.index.indexIsLoaded(): - # Simply open in memory without an error even if writeIndex is True but when not indication - # for a index file location has been given. - if writeIndex and (indexFilePath or not self.isFileObject): + # Simply open in memory without an error even if writeIndex is True but when no indication + # for an index file location has been given. + if writeIndex and (indexFilePath or self._getArchivePath() or not self.isFileObject): self.index.openWritable() else: self.index.openInMemory() @@ -890,6 +894,9 @@ def __exit__(self, exception_type, exception_value, exception_traceback): if not self.isFileObject and self.rawFileObject: self.rawFileObject.close() + def _getArchivePath(self) -> Optional[str]: + return None if self.tarFileName == '' else self.tarFileName + def _storeMetadata(self) -> None: argumentsToSave = [ 'mountRecursively', @@ -902,6 +909,7 @@ def _storeMetadata(self) -> None: ] argumentsMetadata = json.dumps({argument: getattr(self, argument) for argument in argumentsToSave}) + # The second argument must be a path to a file to call os.stat with, not simply a file name. self.index.storeMetadata(argumentsMetadata, None if self.isFileObject else self.tarFileName) self.index.storeMetadataKeyValue('isGnuIncremental', '1' if self._isGnuIncremental else '0') diff --git a/core/ratarmountcore/compressions.py b/core/ratarmountcore/compressions.py index dbbfb44a..63aba74f 100644 --- a/core/ratarmountcore/compressions.py +++ b/core/ratarmountcore/compressions.py @@ -99,7 +99,12 @@ def checkZlibHeader(file): 'bz2': CompressionInfo( ['bz2', 'bzip2'], ['tb2', 'tbz', 'tbz2', 'tz2'], - [CompressionModuleInfo('rapidgzip', lambda x, parallelization=0: rapidgzip.IndexedBzip2File(x, parallelization=parallelization))], # type: ignore + [ + CompressionModuleInfo( + 'rapidgzip', + (lambda x, parallelization=0: rapidgzip.IndexedBzip2File(x, parallelization=parallelization)), + ) + ], lambda x: (x.read(4)[:3] == b'BZh' and x.read(6) == (0x314159265359).to_bytes(6, 'big')), ), 'gz': CompressionInfo( @@ -532,9 +537,21 @@ def detectCompression( ) -> Optional[str]: # isinstance(fileobj, io.IOBase) does not work for everything, e.g., for paramiko.sftp_file.SFTPFile # because it does not inherit from io.IOBase. Therefore, do duck-typing and test for required methods. - if any(not hasattr(fileobj, method) for method in ['seekable', 'seek', 'read', 'tell']) or not fileobj.seekable(): + expectedMethods = ['seekable', 'seek', 'read', 'tell'] + isFileObject = any(not hasattr(fileobj, method) for method in expectedMethods) + if isFileObject or not fileobj.seekable(): + if printDebug >= 2: + seekable = fileobj.seekable() if isFileObject else None + print( + f"[Warning] Cannot detect compression for given Python object {fileobj} " + f"because it does not look like a file object or is not seekable ({seekable})." + ) if printDebug >= 3: - print("[Warning] Cannot detect compression for give Python object that does not look like a file object.") + print(dir(fileobj)) + for name in ['readable', 'seekable', 'writable', 'closed', 'tell']: + method = getattr(fileobj, name, None) + if method is not None: + print(f" fileobj.{name}:", method() if callable(method) else method) traceback.print_exc() return None diff --git a/core/ratarmountcore/factory.py b/core/ratarmountcore/factory.py index 81d65d97..255ffae2 100644 --- a/core/ratarmountcore/factory.py +++ b/core/ratarmountcore/factory.py @@ -18,6 +18,11 @@ from .ZipMountSource import ZipMountSource from .LibarchiveMountSource import LibarchiveMountSource +try: + import fsspec +except ImportError: + fsspec = None # type: ignore + def _openRarMountSource(fileOrPath: Union[str, IO[bytes]], **options) -> Optional[MountSource]: try: @@ -105,6 +110,68 @@ def _openPySquashfsImage(fileOrPath: Union[str, IO[bytes]], **options) -> Option def openMountSource(fileOrPath: Union[str, IO[bytes]], **options) -> MountSource: printDebug = int(options.get("printDebug", 0)) if isinstance(options.get("printDebug", 0), int) else 0 + closeFileOnError = False + + if isinstance(fileOrPath, str): + + splitURI = fileOrPath.split('://', 1) + protocol = splitURI[0] if len(splitURI) > 1 else '' + + if fsspec and protocol: + if protocol == 'file': + fileOrPath = splitURI[1] + else: + name = 'fsspec' + try: + if printDebug >= 3: + print(f"[Info] Try to open with {name}") + + openFile = fsspec.open(fileOrPath) + assert isinstance(openFile, fsspec.core.OpenFile) + + if printDebug >= 3: + print(f"[Info] Opened file {name}") + + # Note that http:// URLs are always files. Folders are only regex-parsed HTML files! + # By checking with isdir instead of isfile, we give isdir a higher precedence. + # TODO the filesystems are not uniform! http:// expects the arguments to isdir with prefixed + # protocol while other filesystem implementations are fine with only the path. + # https://github.com/ray-project/ray/issues/26423#issuecomment-1179561181 + # Disable pylint errors. See https://github.com/fsspec/filesystem_spec/issues/1678 + if openFile.fs.isdir(openFile.path): # pylint: disable=no-member + # TODO check if it is a filesystem or a file and create a MountSource if necessary. + raise Exception("Expected file.") + + # This open call can fail with FileNotFoundError, IsADirectoryError, and probably others. + result = openFile.open() # pylint: disable=no-member + closeFileOnError = True + + # Check that seeking works. May fail when, e.g., the HTTP server does not support range requests. + # Use https://github.com/danvk/RangeHTTPServer for testing purposes because + # "python3 -m http.server 9000" does not have range support. Use "python3 -m RangeHTTPServer 9000". + result.seek(1) + result.read(1) + result.seek(0) + + if 'tarFileName' not in options: + options['tarFileName'] = fileOrPath + fileOrPath = result + except Exception as exception: + if closeFileOnError: + result.close() + if printDebug >= 1: + print(f"[Warning] Trying to open with {name} raised an exception:", exception) + if printDebug >= 3: + traceback.print_exc() + + # Note that asycnssh SSHFile does not implement seekable correctly! + # https://github.com/fsspec/sshfs/pull/50 + if 'sshfs.file.SSHFile' in str(type(fileOrPath)): + fileOrPath.seekable = lambda: True + + if not isinstance(fileOrPath, str) and printDebug >= 3: + print(f"[Info] Opened remote file with fsspec.") + joinedFileName = '' if isinstance(fileOrPath, str): @@ -158,4 +225,9 @@ def openMountSource(fileOrPath: Union[str, IO[bytes]], **options) -> MountSource if joinedFileName and not isinstance(fileOrPath, str): return SingleFileMountSource(joinedFileName, fileOrPath) + if closeFileOnError: + close = getattr(fileOrPath, 'close', None) + if close: + close() + raise CompressionError(f"Archive to open ({str(fileOrPath)}) has unrecognized format!") diff --git a/pyproject.toml b/pyproject.toml index c4dd1154..7f2391d3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -60,6 +60,7 @@ xz = ["ratarmountcore[xz]"] zip = ["ratarmountcore[zip]"] zstd = ["ratarmountcore[zstd]"] squashfs = ["ratarmountcore[squashfs]"] +fsspec = ["ratarmountcore[fsspec]"] [project.scripts] ratarmount = "ratarmount:cli" diff --git a/ratarmount.py b/ratarmount.py index bf65a52d..d03ffa37 100755 --- a/ratarmount.py +++ b/ratarmount.py @@ -51,6 +51,12 @@ except ImportError: pass +try: + import fsspec +except ImportError: + fsspec = None # type: ignore + + import ratarmountcore as core from ratarmountcore import ( AutoMountLayer, @@ -535,7 +541,7 @@ def __init__(self, pathToMount: Union[str, List[str]], mountPoint: str, **option pass hadPathsToMount = bool(pathToMount) - pathToMount = list(filter(os.path.exists, pathToMount)) + pathToMount = list(filter(lambda x: os.path.exists(x) or '://' in x, pathToMount)) if hadPathsToMount and not pathToMount: raise ValueError("No paths to mount left over after filtering!") @@ -679,8 +685,9 @@ def __del__(self) -> None: pass try: - if self.mountPointFd is not None: - os.close(self.mountPointFd) + mountPointFd = getattr(self, 'mountPointFd', None) + if mountPointFd is not None: + os.close(mountPointFd) except Exception as exception: if self.printDebug >= 1: print("[Warning] Failed to close mount point folder descriptor because of:", exception) @@ -908,6 +915,18 @@ def checkInputFileType( ) -> Tuple[str, Optional[str]]: """Raises an exception if it is not an accepted archive format else returns the real path and compression type.""" + splitURI = tarFile.split('://') + if len(splitURI) > 1: + protocol = splitURI[0] + if fsspec is None: + raise argparse.ArgumentTypeError("Detected an URI, but fsspec was not found. Try: pip install fsspec.") + if protocol not in fsspec.available_protocols(): + raise argparse.ArgumentTypeError( + f"URI: {tarFile} uses an unknown protocol. Protocols known by fsspec are: " + + ', '.join(fsspec.available_protocols()) + ) + return tarFile, None + if not os.path.isfile(tarFile): raise argparse.ArgumentTypeError(f"File '{tarFile}' is not a file!") tarFile = os.path.realpath(tarFile) @@ -1810,7 +1829,7 @@ def main(): try: cli(args) - except (FileNotFoundError, RatarmountError, argparse.ArgumentTypeError) as exception: + except (FileNotFoundError, RatarmountError, argparse.ArgumentTypeError, ValueError) as exception: print("[Error]", exception) if debug >= 3: traceback.print_exc() diff --git a/tests/requirements-tests.txt b/tests/requirements-tests.txt index a4293897..2fa3d47f 100644 --- a/tests/requirements-tests.txt +++ b/tests/requirements-tests.txt @@ -3,16 +3,20 @@ build codespell fusepy flake8 +impacket indexed_bzip2 indexed_gzip indexed_zstd mypy pandas +pyftpdlib pylint +pyopenssl>=23 pytest pytest-xdist python-xz pytype +rangehttpserver twine wheel zstandard diff --git a/tests/runtests.sh b/tests/runtests.sh index 77c566e0..56bd261d 100755 --- a/tests/runtests.sh +++ b/tests/runtests.sh @@ -1778,6 +1778,240 @@ checkStatfsWriteOverlay() } +checkURLProtocolFile() +{ + checkFileInTAR 'file://tests/single-file.tar' bar d3b07384d113edec49eaa6238ad5ff00 || + returnError "$LINENO" 'Failed to read via file:// protocol' +} + + +checkURLProtocolHTTP() +{ + local pid + # Failed alternatives to set up a test HTTP server: + # python3 -m http.server -b 127.0.0.1 8000 & # Does not support range requests + # python3 -m RangeHTTPServer -b 127.0.0.1 8000 & # Client has spurious errors every 5th test or so with this. + # TODO Debug this... Bug could be in fsspec/implementations/http.py, aiohttp, RangeHTTPServer, ... + # sudo apt install busybox-static + # busybox httpd -f -p 8000 & # Does not support range requests. + # sudo apt install ruby-webrick + ruby -run -e httpd . --port 8000 --bind-address=127.0.0.1 &>/dev/null & + pid=$! + sleep 1 + checkFileInTAR 'http://127.0.0.1:8000/tests/single-file.tar' bar d3b07384d113edec49eaa6238ad5ff00 || + returnError "$LINENO" 'Failed to read from HTTP server' + kill $pid +} + + +checkURLProtocolFTP() +{ + local pid user password + # python3 -m pip install pyftpdlib pyopenssl>=23 + user='pqvFUMqbqp' + password='ioweb123GUIweb' + port=8021 + python3 -m pyftpdlib --user="$user" --password="$password" --port "$port" --interface 127.0.0.1 2>/dev/null & + pid=$! + sleep 1 + checkFileInTAR "ftp://$user:$password@127.0.0.1:8021/tests/single-file.tar" bar d3b07384d113edec49eaa6238ad5ff00 || + returnError "$LINENO" 'Failed to read from FTP server' + kill $pid +} + + +checkURLProtocolSSH() +{ + local pid fingerprint + # rm -f ssh_host_key; ssh-keygen -q -N "" -C "" -t ed25519 -f ssh_host_key + cat < ssh_host_key +-----BEGIN OPENSSH PRIVATE KEY----- +b3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAAAMwAAAAtzc2gtZW +QyNTUxOQAAACA6luxe0F9n0zBbFW6DExxYAMz2tinaHPb9IwLmreJMzgAAAIhe3ftsXt37 +bAAAAAtzc2gtZWQyNTUxOQAAACA6luxe0F9n0zBbFW6DExxYAMz2tinaHPb9IwLmreJMzg +AAAECRurZq3m4qFnBUpJG3+SwhdL410zFoUODgRIU4aLTbpjqW7F7QX2fTMFsVboMTHFgA +zPa2Kdoc9v0jAuat4kzOAAAAAAECAwQF +-----END OPENSSH PRIVATE KEY----- +EOF + # Only works on server. Also not hashed in not in known_hosts format. + #fingerprint=$( ssh-keygen -lf ssh_host_key ) + fingerprint=$( ssh-keyscan -H -p 8022 127.0.0.1 2>/dev/null ) + 'grep' -q -F "$fingerprint" ~/.ssh/known_hosts || echo "$fingerprint" >> ~/.ssh/known_hosts + + python3 tests/start-asyncssh-server.py & + pid=$! + sleep 1 + [[ -f ~/.ssh/id_ed25519 ]] || ssh-keygen -q -N "" -t ed25519 -f ~/.ssh/id_ed25519 + cat ~/.ssh/id_ed25519.pub >> ssh_user_ca + + checkFileInTAR 'ssh://127.0.0.1:8022/tests/single-file.tar' bar d3b07384d113edec49eaa6238ad5ff00 || + returnError "$LINENO" 'Failed to read from HTTP server' + kill $pid +} + + +checkURLProtocolGit() +{ + # https://github.com/fsspec/filesystem_spec/blob/360e46d13069b0426565429f9f610bf704cfa062/ + # fsspec/implementations/git.py#L28C14-L28C58 + # > "git://[path-to-repo[:]][ref@]path/to/file" (but the actual + # > file path should not contain "@" or ":"). + checkFileInTAR 'git://v0.15.2@tests/single-file.tar' bar d3b07384d113edec49eaa6238ad5ff00 || + returnError "$LINENO" 'Failed to read from HTTP server' + + # https://github.com/fsspec/filesystem_spec/blob/360e46d13069b0426565429f9f610bf704cfa062/ + # fsspec/implementations/github.py#L26 + # https://github.com/fsspec/filesystem_spec/blob/360e46d13069b0426565429f9f610bf704cfa062/ + # fsspec/implementations/github.py#L202 + # https://github.com/fsspec/filesystem_spec/blob/360e46d13069b0426565429f9f610bf704cfa062/fsspec/utils.py#L37 + # + # - "github://path/file", in which case you must specify org, repo and + # may specify sha in the extra args + # - 'github://org:repo@/precip/catalog.yml', where the org and repo are + # part of the URI + # - 'github://org:repo@sha/precip/catalog.yml', where the sha is also included + # + # ``sha`` can be the full or abbreviated hex of the commit you want to fetch + # from, or a branch or tag name (so long as it doesn't contain special characters + # like "/", "?", which would have to be HTTP-encoded). + checkFileInTAR 'git://v0.15.2@tests/single-file.tar' bar d3b07384d113edec49eaa6238ad5ff00 || + returnError "$LINENO" 'Failed to read from HTTP server' +} + + +checkURLProtocolS3() +{ + local pid weedFolder + + if [[ ! -f weed ]]; then + wget 'https://github.com/seaweedfs/seaweedfs/releases/download/3.74/linux_amd64_large_disk.tar.gz' + tar -xf 'linux_amd64_large_disk.tar.gz' + fi + [[ -x weed ]] || chmod u+x weed + + weedFolder=$( mktemp -d ) + TMP_FILES_TO_CLEANUP+=( "$weedFolder" ) + ./weed server -dir="$weedFolder" -s3 -s3.port 8053 -idleTimeout=30 -ip 127.0.0.1 2>weed.log & + pid=$! + + # Wait for port to open + python3 -c ' +import socket +import time +from contextlib import closing + +t0 = time.time() +with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock: + for i in range(10): + if sock.connect_ex(("127.0.0.1", 8053)) == 0: + print(f"Weed port opened after {time.time() - t0:.1f} s.") + break + time.sleep(5) +' + kill "$pid" + + # Create bucket and upload test file + python3 -c " +import boto3 + +def list_buckets(client): + result = client.list_buckets() + return [x['Name'] for x in result['Buckets']] if 'Buckets' in result else [] + +def list_bucket_files(client, bucket_name): + result = client.list_objects_v2(Bucket=bucket_name) + return [x['Key'] for x in result['Contents']] if 'Contents' in result else [] + +client = boto3.client( + 's3', endpoint_url='http://127.0.0.1:8053', + aws_access_key_id = 'aaaaaaaaaaaaaaaaaaaa', + aws_secret_access_key = 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb' +) +bucket_name = 'bucket' +if bucket_name not in list_buckets(client): + client.create_bucket(Bucket=bucket_name) +client.upload_file('tests/single-file.tar', bucket_name, 'single-file.tar') +" + + # At last, test ratarmount. + # TODO No way to specify an endpoint currently! Need to extend the URL format for that... + checkFileInTAR 's3://127.0.0.1:8053/bucket/single-file.tar' bar d3b07384d113edec49eaa6238ad5ff00 || + returnError "$LINENO" 'Failed to read from S3 server' + kill $pid + + 'rm' -r "$weedFolder" +} + + +checkURLProtocolSamba() +{ + exit 1 # Cannot automate because of the myriad of bugs and issues explained below. + + local pid user password + + user='pqvFUMqbqp' + password='ioweb123GUIweb' + + # Unusable because tests should not be run as root. + if false; then + sudo apt install samba + cat <&dev/null & + pid=$! + + checkFileInTAR "smb://$user:$password@127.0.0.1:8445/test-share/single-file.tar" bar d3b07384d113edec49eaa6238ad5ff00 || + returnError "$LINENO" 'Failed to read from Samba server' +} + + +checkRemoteSupport() +{ + checkURLProtocolFile + checkURLProtocolSamba + checkURLProtocolGit + checkURLProtocolHTTP + checkURLProtocolFTP + checkURLProtocolSSH + checkURLProtocolS3 + + # TODO Add and test IPFS? + + exit 1 +} + + rm -f ratarmount.{stdout,stderr}.log # Linting only to be done locally because in CI it is in separate steps @@ -2100,6 +2334,7 @@ if [[ ! -f tests/2k-recursive-tars.tar ]]; then bzip2 -q -d -k tests/2k-recursive-tars.tar.bz2 fi +checkRemoteSupport checkStatfs || returnError "$LINENO" 'Statfs failed!' checkStatfsWriteOverlay || returnError "$LINENO" 'Statfs with write overlay failed!' checkSymbolicLinkRecursion || returnError "$LINENO" 'Symbolic link recursion failed!' diff --git a/tests/start-asyncssh-server.py b/tests/start-asyncssh-server.py new file mode 100644 index 00000000..d5e43482 --- /dev/null +++ b/tests/start-asyncssh-server.py @@ -0,0 +1,19 @@ +import asyncio + +import asyncssh + + +async def start_server(): + await asyncssh.listen( + "127.0.0.1", + 8022, + server_host_keys=["ssh_host_key"], + authorized_client_keys="ssh_user_ca", + sftp_factory=True, + allow_scp=True, + ) + + +loop = asyncio.new_event_loop() +loop.run_until_complete(start_server()) +loop.run_forever()