Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce bind mounting (and thus --bind args) in containers. #1568

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions cwltool/job.py
Original file line number Diff line number Diff line change
Expand Up @@ -693,6 +693,9 @@ def add_volumes(
any_path_okay: bool = False,
) -> None:
"""Append volume mappings to the runtime option list."""
stage_source_dir = os.environ.get(
"STAGE_SRC_DIR", os.path.join(tempfile.gettempdir(), "cwl-stg-src-dir")
)
container_outdir = self.builder.outdir
for key, vol in (itm for itm in pathmapper.items() if itm[1].staged):
host_outdir_tgt = None # type: Optional[str]
Expand All @@ -706,6 +709,8 @@ def add_volumes(
"the designated output directory, also know as "
"$(runtime.outdir): {}".format(vol)
)
if stage_source_dir and vol.resolved.startswith(stage_source_dir):
continue # path is already staged; only mount the host directory (at the end of this function)
if vol.type in ("File", "Directory"):
self.add_file_or_directory_volume(runtime, vol, host_outdir_tgt)
elif vol.type == "WritableFile":
Expand All @@ -721,6 +726,11 @@ def add_volumes(
runtime, vol, host_outdir_tgt, secret_store, tmpdir_prefix
)
pathmapper.update(key, new_path, vol.target, vol.type, vol.staged)
if stage_source_dir and pathmapper.stagedir != container_outdir:
# mount a single host directory for all staged input files
self.append_volume(
runtime, stage_source_dir, pathmapper.stagedir, writable=True
)

def run(
self,
Expand Down
31 changes: 28 additions & 3 deletions cwltool/pathmapper.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import collections
import logging
import os
import tempfile
import shutil
import stat
import urllib
import uuid
Expand Down Expand Up @@ -93,7 +95,10 @@ def visit(
basedir: str,
copy: bool = False,
staged: bool = False,
stage_source_dir: Optional[str] = None,
) -> None:
if stage_source_dir:
os.makedirs(stage_source_dir, exist_ok=True)
stagedir = cast(Optional[str], obj.get("dirname")) or stagedir
tgt = os.path.join(
stagedir,
Expand Down Expand Up @@ -150,7 +155,15 @@ def visit(
else os.path.join(os.path.dirname(deref), rl)
)
st = os.lstat(deref)

if stage_source_dir:
staged_source_file = os.path.join(
stage_source_dir, os.path.basename(deref)
)
try:
os.link(deref, staged_source_file)
except OSError:
shutil.copyfile(deref, staged_source_file)
deref = staged_source_file
self._pathmap[path] = MapperEnt(
deref, tgt, "WritableFile" if copy else "File", staged
)
Expand All @@ -163,19 +176,31 @@ def visit(
)

def setup(self, referenced_files: List[CWLObjectType], basedir: str) -> None:

# Go through each file and set the target to its own directory along
# with any secondary files.
stagedir = self.stagedir
stage_source_dir = os.environ.get(
"STAGE_SRC_DIR", os.path.join(tempfile.gettempdir(), "cwl-stg-src-dir")
)
for fob in referenced_files:
staging_uuid = str(uuid.uuid4())
if self.separateDirs:
stagedir = os.path.join(self.stagedir, "stg%s" % uuid.uuid4())
# this is what the path will be inside of the container environment
stagedir = os.path.join(self.stagedir, "stg%s" % staging_uuid)
# if STAGE_SRC_DIR is set, this is where input paths will be linked/staged at
unique_stage_source_dir = None
if stage_source_dir:
unique_stage_source_dir = os.path.join(
stage_source_dir, "stg%s" % staging_uuid
)
os.makedirs(stage_source_dir, exist_ok=True)
self.visit(
fob,
stagedir,
basedir,
copy=cast(bool, fob.get("writable", False)),
staged=True,
stage_source_dir=unique_stage_source_dir,
)

def mapper(self, src: str) -> MapperEnt:
Expand Down