Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

288 yaml dephi input #290

Draft
wants to merge 9 commits into
base: main
Choose a base branch
from
11 changes: 7 additions & 4 deletions imagedephi/gui/api/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,14 +141,15 @@ def get_redaction_plan(
if not input_path.is_dir():
raise HTTPException(status_code=404, detail="Input directory not found")

# TODO: Add support for multiple input directories in the UI
if rules_path:
if not Path(rules_path).exists():
raise HTTPException(status_code=404, detail="Rules file not found")
return show_redaction_plan(
input_path, override_rules=Path(rules_path), limit=limit, offset=offset, update=update
[input_path], override_rules=Path(rules_path), limit=limit, offset=offset, update=update
)._asdict()

return show_redaction_plan(input_path, limit=limit, offset=offset, update=update)._asdict()
return show_redaction_plan([input_path], limit=limit, offset=offset, update=update)._asdict()


@router.post("/redact/")
Expand All @@ -163,10 +164,12 @@ def redact(
raise HTTPException(status_code=404, detail="Input directory not found")
if not output_path.is_dir():
raise HTTPException(status_code=404, detail="Output directory not found")

# TODO: Add support for multiple input directories in the UI
if rules_path:
redact_images(input_path, output_path, override_rules=Path(rules_path))
redact_images([input_path], output_path, override_rules=Path(rules_path))
else:
redact_images(input_path, output_path)
redact_images([input_path], output_path)


@router.websocket("/ws")
Expand Down
4 changes: 2 additions & 2 deletions imagedephi/gui/utils/directory.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os
from pathlib import Path

from imagedephi.redact import iter_image_files
from imagedephi.redact import iter_image_dirs


class DirectoryData:
Expand All @@ -26,7 +26,7 @@ def __init__(self, directory: Path):
]

self.child_images = [
{"name": image.name, "path": image} for image in list(iter_image_files(directory))
{"name": image.name, "path": image} for image in list(iter_image_dirs([directory]))
]
self.child_yaml_files = [
{"name": yaml_file.name, "path": yaml_file} for yaml_file in _iter_yaml_files(directory)
Expand Down
54 changes: 43 additions & 11 deletions imagedephi/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import pooch
from tqdm import tqdm
from tqdm.contrib.logging import logging_redirect_tqdm
import yaml

from imagedephi.gui.app import app
from imagedephi.redact import ProfileChoice, redact_images, show_redaction_plan
Expand Down Expand Up @@ -129,8 +130,20 @@ def imagedephi(

@imagedephi.command(no_args_is_help=True)
@global_options
@click.argument("input-path", type=click.Path(exists=True, readable=True, path_type=Path))
@click.argument(
"input-path",
type=click.Path(exists=True, readable=True, path_type=Path),
required=False,
nargs=-1,
)
@click.option("-i", "--index", default=1, help="Starting index of the images to redact.", type=int)
@click.option(
"-c",
"--command-file",
type=click.Path(exists=True, readable=True, path_type=Path),
help="File containing redaction command. "
"[INPUT_PATH] must be provided as an argument or in the command file.",
)
@click.option(
"-o",
"--output-dir",
Expand All @@ -143,7 +156,7 @@ def imagedephi(
@click.pass_context
def run(
ctx,
input_path: Path,
input_path: list[Path],
output_dir: Path,
override_rules: Path | None,
profile: str,
Expand All @@ -153,29 +166,48 @@ def run(
verbose,
log_file,
index,
command_file: Path,
):
"""Perform the redaction of images."""
params = _check_parent_params(ctx, profile, override_rules, recursive, quiet, verbose, log_file)
if params["verbose"] or params["quiet"] or params["log_file"]:
set_logging_config(params["verbose"], params["quiet"], params["log_file"])
command_params = {}
if command_file:
with command_file.open() as f:
command_params = yaml.safe_load(f)
command_input: list[Path] = [Path(path) for path in command_params["input_path"]]
command_output = Path(command_params["output_dir"])
if input_path is None and command_params.get("input_path") is None:
raise click.BadParameter(
"Input path must be provided either in the command file or as an argument."
)
for path in command_input:
if not Path(path).exists():
raise click.BadParameter(f"Path {path} does not exist.")

redact_images(
input_path,
output_dir,
override_rules=params["override_rules"],
rename=rename,
recursive=params["recursive"],
profile=params["profile"],
index=index,
input_path or command_input,
output_dir or command_output,
override_rules=params["override_rules"] or command_params.get("override_rules"),
rename=rename if "rename" not in command_params else command_params["rename"],
recursive=(
params["recursive"]
if "recursive" not in command_params
else command_params["recursive"]
),
profile=params["profile"] if "profile" not in command_params else command_params["profile"],
index=index if "index" not in command_params else command_params["index"],
)


@imagedephi.command(no_args_is_help=True)
@global_options
@click.argument("input-path", type=click.Path(exists=True, readable=True, path_type=Path))
@click.argument("input-path", type=click.Path(exists=True, readable=True, path_type=Path), nargs=-1)
@click.pass_context
def plan(
ctx,
input_path: Path,
input_path: list[Path],
profile: str,
override_rules: Path | None,
recursive: bool,
Expand Down
4 changes: 2 additions & 2 deletions imagedephi/redact/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from .redact import ProfileChoice, iter_image_files, redact_images, show_redaction_plan
from .redact import ProfileChoice, iter_image_dirs, redact_images, show_redaction_plan

__all__ = ["iter_image_files", "redact_images", "show_redaction_plan", "ProfileChoice"]
__all__ = ["iter_image_dirs", "redact_images", "show_redaction_plan", "ProfileChoice"]
141 changes: 82 additions & 59 deletions imagedephi/redact/redact.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,21 +77,27 @@ def get_base_rules(profile: str = "") -> Ruleset:
return base_rule_set


def iter_image_files(directory: Path, recursive: bool = False) -> Generator[Path, None, None]:
"""Given a directory return an iterable of available images."""
for child in sorted(directory.iterdir()):
# Use first four bits to check if its a tiff file
if child.is_file():
file_format = None
try:
file_format = get_file_format_from_path(child)
except PermissionError:
# Don't attempt to redact inaccessible files
pass
if file_format:
yield child
elif child.is_dir() and recursive:
yield from iter_image_files(child, recursive)
def iter_image_dirs(paths: list[Path], recursive: bool = False) -> Generator[Path, None, None]:
for path in paths:
if path.is_file():
yield from iter_image_files(path)
elif path.is_dir() and recursive:
yield from iter_image_dirs(sorted(path.iterdir()), recursive)
elif path.is_dir() and not recursive:
for child in path.iterdir():
if child.is_file():
yield from iter_image_files(child)


def iter_image_files(path: Path) -> Generator[Path, None, None]:
file_format = None
try:
file_format = get_file_format_from_path(path)
except PermissionError:
# Don't attempt to redact inaccessible files
pass
if file_format:
yield path


def generator_to_list_with_progress(
Expand Down Expand Up @@ -125,7 +131,7 @@ def create_redact_dir_and_manifest(base_output_dir: Path, time_stamp: str) -> tu


def redact_images(
input_path: Path,
input_paths: list[Path],
output_dir: Path,
override_rules: Path | None = None,
rename: bool = True,
Expand All @@ -148,15 +154,13 @@ def redact_images(
output_file_name_base = (
override_ruleset.output_file_name if override_ruleset else base_rules.output_file_name
)
# Convert to a list in order to get the length
if input_path.is_dir():
with logging_redirect_tqdm(loggers=[logger]):
images_to_redact = generator_to_list_with_progress(
iter_image_files(input_path, recursive),
progress_bar_desc="Collecting files to redact...",
)
else:
images_to_redact = [input_path]
images_to_redact = []

with logging_redirect_tqdm(loggers=[logger]):
images_to_redact += generator_to_list_with_progress(
iter_image_dirs(input_paths, recursive),
progress_bar_desc="Collecting files to redact...",
)

output_file_counter = 1
output_file_max = len(images_to_redact)
Expand Down Expand Up @@ -203,8 +207,16 @@ def redact_images(
failed_manifest_file.touch()

if recursive:
if image_file.parent in input_paths:
failed_parent_index = input_paths.index(image_file.parent)
for ancestor in image_file.parents:
if ancestor in input_paths:
failed_parent_index = input_paths.index(ancestor)
break
nested_failed_dir = Path(
str(image_file).replace(str(input_path), str(failed_dir), 1)
str(image_file).replace(
str(input_paths[failed_parent_index]), str(failed_dir), 1
)
).parent
nested_failed_dir.mkdir(parents=True, exist_ok=True)

Expand Down Expand Up @@ -240,8 +252,15 @@ def redact_images(
redaction_plan.execute_plan()
output_parent_dir = redact_dir
if recursive:
if image_file.parent in input_paths:
parent_index = input_paths.index(image_file.parent)
for ancestor in image_file.parents:
if ancestor in input_paths:
parent_index = input_paths.index(ancestor)
break

output_parent_dir = Path(
str(image_file).replace(str(input_path), str(redact_dir), 1)
str(image_file).replace(str(input_paths[parent_index]), str(redact_dir), 1)
).parent
output_parent_dir.mkdir(parents=True, exist_ok=True)
output_path = (
Expand Down Expand Up @@ -350,7 +369,7 @@ def _sort_data(data):


def show_redaction_plan(
input_path: Path,
input_paths: list[Path],
override_rules: Path | None = None,
recursive=False,
profile="",
Expand All @@ -363,16 +382,14 @@ def show_redaction_plan(
if override_rules:
override_ruleset = _get_user_rules(override_rules)
starting_logging_level = logger.getEffectiveLevel()
if input_path.is_dir():
with logging_redirect_tqdm(loggers=[logger]):
image_paths = generator_to_list_with_progress(
iter_image_files(input_path, recursive),
progress_bar_desc="Collecting files to redact...",
)
else:
with logging_redirect_tqdm(loggers=[logger]):
image_paths = generator_to_list_with_progress(
iter_image_dirs(input_paths, recursive),
progress_bar_desc="Collecting files to redact...",
)
if len(image_paths) == 1:
# For a single image, log all details of the plan
logger.setLevel(logging.DEBUG)
image_paths = [input_path]

global tags_used

Expand Down Expand Up @@ -426,31 +443,37 @@ def _create_redaction_plan_report():
sorted_dict = OrderedDict(list(sorted_dict.items())[offset * limit : (offset + 1) * limit])
images_plan = namedtuple("images_plan", ["data", "total", "tags", "missing_rules"])

if input_path.is_dir():
# Provide a summary if the input path is a directory of images
logger.info(f"ImageDePHI summary for {input_path}:")
incomplete = [
file_path
for file_path in redaction_plan_report
if not redaction_plan_report[file_path]["comprehensive"]
]
logger.info(
f"{len(image_paths) - (len(incomplete) + len(unprocessable_image_messages))}"
" images able to be redacted with the provided rule set."
)
if incomplete:
logger.info(f"{len(incomplete)} file(s) could not be redacted with the provided rules.")
logger.info("The following images could not be redacted given the current rule set:")
for file in incomplete:
logger.info(f"\t{file}")
for input_path in input_paths:
if input_path.is_dir():
# Provide a summary if the input path is a directory of images
logger.info(f"ImageDePHI summary for {input_path}:")
incomplete = [
file_path
for file_path in redaction_plan_report
if not redaction_plan_report[file_path]["comprehensive"]
]
logger.info(
"For more details about individual images that couldn't be redacted, run "
"'imagedephi plan <unredactable_file>'"
)
if unprocessable_image_messages:
logger.info(
f"{len(unprocessable_image_messages)} file(s) could not be processed by ImageDePHI."
f"{len(image_paths) - (len(incomplete) + len(unprocessable_image_messages))}"
" images able to be redacted with the provided rule set."
)
if incomplete:
logger.info(
f"{len(incomplete)} file(s) could not be redacted with the provided rules."
)
logger.info(
"The following images could not be redacted given the current rule set:"
)
for file in incomplete:
logger.info(f"\t{file}")
logger.info(
"For more details about individual images that couldn't be redacted, run "
"'imagedephi plan <unredactable_file>'"
)
if unprocessable_image_messages:
logger.info(
f"""{len(unprocessable_image_messages)}
file(s) could not be processed by ImageDePHI."""
)

# Report exceptions outside of the directory level report
for message in unprocessable_image_messages:
Expand Down
17 changes: 11 additions & 6 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,9 @@ def retrieve_file(file_name: str, output_path: Path) -> Path:


@pytest.fixture
def secret_metadata_image() -> Path:
return retrieve_file("secret_metadata.tiff", Path(__file__).with_name("data") / "input")
def secret_metadata_image() -> list[Path]:
path_list = [retrieve_file("secret_metadata.tiff", Path(__file__).with_name("data") / "input")]
return path_list


@pytest.fixture
Expand All @@ -43,13 +44,17 @@ def test_image_svs() -> Path:


@pytest.fixture
def test_image_dcm() -> Path:
return retrieve_file("test_dcm_image.dcm", Path(__file__).with_name("data") / "input" / "dcm")
def test_image_dcm() -> list[Path]:
path_list = [
retrieve_file("test_dcm_image.dcm", Path(__file__).with_name("data") / "input" / "dcm")
]
return path_list


@pytest.fixture
def test_image_svs_no_extension() -> Path:
return retrieve_file("test_svs_no_extension", Path(__file__).with_name("data") / "input")
def test_image_svs_no_extension() -> list[Path]:
path_list = [retrieve_file("test_svs_no_extension", Path(__file__).with_name("data") / "input")]
return path_list


@pytest.fixture
Expand Down
Loading
Loading