From b2d4bbd0e19b8907e1d68719539cc61760d34c1b Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Sat, 1 Oct 2022 11:55:19 -0700 Subject: [PATCH 01/14] add process_pyproject_toml(), process_setup_cfg(), merge_configuration_file() in nbstripout/_utils.py --- nbstripout/_utils.py | 99 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 98 insertions(+), 1 deletion(-) diff --git a/nbstripout/_utils.py b/nbstripout/_utils.py index abc9e17..999a0e1 100644 --- a/nbstripout/_utils.py +++ b/nbstripout/_utils.py @@ -1,5 +1,8 @@ -from collections import defaultdict +from argparse import Namespace +import os import sys +from collections import defaultdict +from typing import Any, Dict, Optional __all__ = ["pop_recursive", "strip_output", "strip_zeppelin_output", "MetadataError"] @@ -153,3 +156,97 @@ def strip_output(nb, keep_output, keep_count, extra_keys=[], drop_empty_cells=Fa for field in keys['cell']: pop_recursive(cell, field) return nb + + +def process_pyproject_toml(toml_file_path: str) -> Optional[Dict[str, Any]]: + """Extract config mapping from pyproject.toml file.""" + try: + import tomllib # python 3.11+ + except ModuleNotFoundError: + import tomli as tomllib + + with open(toml_file_path, 'rb') as f: + return tomllib.load(f).get('tool', {}).get('nbstripout', None) + + +def process_setup_cfg(cfg_file_path) -> Optional[Dict[str, Any]]: + """Extract config mapping from setup.cfg file.""" + import configparser + + reader = configparser.ConfigParser() + reader.read(cfg_file_path) + if not reader.has_section('nbstripout'): + return None + + return reader['nbstripout'] + + +def merge_configuration_file(args: Namespace) -> Namespace: + """Merge flags from config files into args.""" + CONFIG_FILES = { + 'pyproject.toml': process_pyproject_toml, + 'setup.cfg': process_setup_cfg, + } + BOOL_TYPES = { + 'yes': True, + 'true': True, + 'on': True, + 'no': False, + 'false': False, + 'off': False + } + + # Traverse the file tree common to all files given as argument looking for + # a configuration file + config_path = os.path.commonpath([os.path.abspath(file) for file in args.files]) if args.files else os.getcwd() + config = None + while True: + for config_file, processor in CONFIG_FILES.items(): + config_file_path = os.path.join(config_path, config_file) + if os.path.isfile(config_file_path): + config = processor(config_file_path) + if config is not None: + break + if config is not None: + break + config_path, tail = os.path.split(config_path) + if not tail: + break + + if config: + # merge config + for name, value in config.items(): + if value is None: + continue + # additive string flags + if name in {'extra_keys'}: + args.extra_keys = f"{getattr(args, 'extra_keys', '')} {value}".strip() + # singular string flags + elif name in {'mode'}: + args.mode = value + # integer flags + elif name in {'max_size'}: + args.max_size = int(value) + # boolean flags + elif name in { + 'dry_run', + 'keep_count', + 'keep_output', + 'drop_empty_cells', + 'drop_tagged_cells', + 'strip_init_cells', + '_global', + '_system', + 'force', + 'textconv', + }: + if isinstance(value, str): + value = BOOL_TYPES.get(value, value) + if not isinstance(value, bool): + raise ValueError(f"Invalid value for {name}: {value}, expected bool") + if value: + setattr(args, name.replace('-', '_'), value) + else: + raise ValueError(f'{name} in the config file is not a valid option') + + return args From 758cebcdf7e7832fef802d8f8aa683b2c1c5c110 Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Sat, 1 Oct 2022 11:54:43 -0700 Subject: [PATCH 02/14] call merge_configuration_file() in nbstripout/_nbstripout.py after parser.parse_args() --- nbstripout/_nbstripout.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/nbstripout/_nbstripout.py b/nbstripout/_nbstripout.py index 213d3da..5fd1a77 100644 --- a/nbstripout/_nbstripout.py +++ b/nbstripout/_nbstripout.py @@ -120,7 +120,7 @@ import sys import warnings -from nbstripout._utils import strip_output, strip_zeppelin_output +from nbstripout._utils import strip_output, strip_zeppelin_output, merge_configuration_file try: # Jupyter >= 4 from nbformat import read, write, NO_CONVERT @@ -377,7 +377,7 @@ def main(): help='Space separated list of extra keys to strip ' 'from metadata, e.g. metadata.foo cell.metadata.bar') parser.add_argument('--drop-empty-cells', action='store_true', - help='Remove cells where `source` is empty or contains only whitepace') + help='Remove cells where `source` is empty or contains only whitespace') parser.add_argument('--drop-tagged-cells', default='', help='Space separated list of cell-tags that remove an entire cell') parser.add_argument('--strip-init-cells', action='store_true', @@ -402,7 +402,8 @@ def main(): help='Prints stripped files to STDOUT') parser.add_argument('files', nargs='*', help='Files to strip output from') - args = parser.parse_args() + + args = merge_configuration_file(parser.parse_args()) git_config = ['git', 'config'] From e5cd304b89f08432ad8b1f4f2ee1a4b4e11c828f Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Sat, 1 Oct 2022 11:55:32 -0700 Subject: [PATCH 03/14] add tests/test_read_config_files.py --- tests/test_read_config_files.py | 212 ++++++++++++++++++++++++++++++++ 1 file changed, 212 insertions(+) create mode 100644 tests/test_read_config_files.py diff --git a/tests/test_read_config_files.py b/tests/test_read_config_files.py new file mode 100644 index 0000000..59237d7 --- /dev/null +++ b/tests/test_read_config_files.py @@ -0,0 +1,212 @@ +from typing import Any + +import pytest +from nbstripout._utils import merge_configuration_file +from argparse import Namespace +from pathlib import Path + + +def assert_namespace(namespace: Namespace, expected_props: dict[str, Any]): + keys = (prop for prop in dir(namespace) if not prop.startswith("_")) + actual_props = {key: getattr(namespace, key) for key in keys} + assert actual_props == expected_props + + +@pytest.fixture +def test_nb(tmp_path: Path) -> Path: + test_nb = tmp_path / "test_me.ipynb" + test_nb.touch() + return test_nb + + +@pytest.fixture +def nested_test_nb(tmp_path: Path) -> Path: + (tmp_path / "nested/file").mkdir(parents=True) + test_nb = tmp_path / "nested/file/test_me.ipynb" + test_nb.touch() + return test_nb + + +def test_no_config_file(tmp_path: Path, test_nb: Path) -> None: + + original_args = {"files": [test_nb]} + args = Namespace(**original_args) + assert merge_configuration_file(args) + assert_namespace(args, original_args) + + +def test_non_nested_pyproject_toml_empty(tmp_path: Path, test_nb: Path) -> None: + (tmp_path / "pyproject.toml").write_text('[tool.other]\nprop="value"\n') + args = Namespace(files=[test_nb]) + assert merge_configuration_file(args) + assert_namespace(args, {"files": args.files}) + + +def test_non_nested_pyproject_toml_non_empty(tmp_path: Path, test_nb: Path) -> None: + (tmp_path / "pyproject.toml").write_text( + "[tool.nbstripout]\ndrop_empty_cells=true\n", + ) + args = Namespace(files=[test_nb]) + assert merge_configuration_file(args) + assert_namespace(args, {"files": args.files, "drop_empty_cells": True}) + + +def test_non_nested_setup_cfg_non_empty(tmp_path: Path, test_nb: Path) -> None: + (tmp_path / "setup.cfg").write_text( + "[other]\ndrop_empty_cells = yes\n", + ) + args = Namespace(files=[test_nb]) + assert merge_configuration_file(args) + assert_namespace(args, {"files": args.files}) + + +def test_non_nested_setup_cfg_empty(tmp_path: Path, test_nb: Path) -> None: + (tmp_path / "setup.cfg").write_text( + "[nbstripout]\ndrop_empty_cells = yes\n", + ) + args = Namespace(files=[test_nb]) + assert merge_configuration_file(args) + assert_namespace(args, {"files": args.files, "drop_empty_cells": True}) + + +def test_nested_file(tmp_path: Path, nested_test_nb: Path) -> None: + (tmp_path / "pyproject.toml").write_text( + "[tool.nbstripout]\ndrop_empty_cells=true\n", + ) + args = Namespace(files=[nested_test_nb]) + assert merge_configuration_file(args) + assert_namespace(args, {"files": args.files, "drop_empty_cells": True}) + + +def test_common_path_nested_file_do_not_load(tmp_path: Path) -> None: + (tmp_path / "nested/file").mkdir(parents=True) + (tmp_path / "nested/other").mkdir() + test_nb1 = tmp_path / "nested/file/test_me.ipynb" + test_nb1.touch() + test_nb2 = tmp_path / "nested/other/test_me.ipynb" + test_nb2.touch() + + (tmp_path / "nested/file/pyproject.toml").write_text( + "[tool.nbstripout]\ndrop_empty_cells=true\n", + ) + args = Namespace(files=[test_nb1, test_nb2]) + assert merge_configuration_file(args) + assert_namespace(args, {"files": args.files}) + + +def test_common_path_nested_file_do_load(tmp_path: Path) -> None: + (tmp_path / "nested/file").mkdir(parents=True) + (tmp_path / "nested/other").mkdir() + test_nb1 = tmp_path / "nested/file/test_me.ipynb" + test_nb1.touch() + test_nb2 = tmp_path / "nested/other/test_me.ipynb" + test_nb2.touch() + + (tmp_path / "nested/pyproject.toml").write_text( + "[tool.nbstripout]\ndrop_empty_cells=true\n", + ) + args = Namespace(files=[test_nb1, test_nb2]) + assert merge_configuration_file(args) + assert_namespace(args, {"files": args.files, "drop_empty_cells": True}) + + +def test_continue_search_if_no_config_found( + tmp_path: Path, nested_test_nb: Path +) -> None: + (tmp_path / "nested/pyproject.toml").write_text( + '[tool.other]\nprop = "value"\n', + ) + (tmp_path / "pyproject.toml").write_text( + "[tool.nbstripout]\ndrop_empty_cells = true\n", + ) + args = Namespace(files=[nested_test_nb]) + assert merge_configuration_file(args) + assert_namespace(args, {"files": args.files, "drop_empty_cells": True}) + + +def test_stop_search_if_config_found(tmp_path: Path, nested_test_nb: Path) -> None: + (tmp_path / "nested/pyproject.toml").write_text( + "[tool.nbstripout]\n", + ) + (tmp_path / "pyproject.toml").write_text( + "[tool.nbstripout]\ndrop_empty_cells = true\n", + ) + args = Namespace(files=[nested_test_nb]) + assert merge_configuration_file(args) + assert_namespace(args, {"files": args.files}) + + +def test_dont_load_false(tmp_path: Path, test_nb: Path) -> None: + (tmp_path / "setup.cfg").write_text( + "[nbstripout]\ndrop_empty_cells = no\n", + ) + args = Namespace(files=[test_nb]) + assert merge_configuration_file(args) + assert_namespace(args, {"files": args.files}) + + +def test_list_value_space_sep_string_pyproject_toml( + tmp_path: Path, test_nb: Path +) -> None: + (tmp_path / "pyproject.toml").write_text( + '[tool.nbstripout]\nextra_keys="foo bar"\n', + ) + args = Namespace(files=[test_nb]) + assert merge_configuration_file(args) + assert_namespace(args, {"files": args.files, "extra_keys": "foo bar"}) + + +def test_list_value_setup_cfg(tmp_path: Path, test_nb: Path) -> None: + (tmp_path / "setup.cfg").write_text( + "[nbstripout]\nextra_keys=foo bar\n", + ) + args = Namespace(files=[test_nb]) + assert merge_configuration_file(args) + assert_namespace(args, {"files": args.files, "extra_keys": "foo bar"}) + + +def test_unknown_property(tmp_path: Path, test_nb: Path) -> None: + (tmp_path / "pyproject.toml").write_text( + "[tool.nbstripout]\nunknown_prop=true\n", + ) + args = Namespace(files=[test_nb]) + with pytest.raises(ValueError): + merge_configuration_file(args) + + +def test_non_bool_value_for_bool_property(tmp_path: Path, test_nb: Path) -> None: + (tmp_path / "pyproject.toml").write_text( + '[tool.nbstripout]\ndrop_empty_cells="invalid"\n', + ) + args = Namespace(files=[test_nb]) + with pytest.raises(ValueError): + merge_configuration_file(args) + + +def test_non_bool_value_for_bool_property_in_setup_cfg( + tmp_path: Path, test_nb: Path +) -> None: + (tmp_path / "setup.cfg").write_text( + "[nbstripout]\ndrop_empty_cells=ok\n", + ) + args = Namespace(files=[test_nb]) + with pytest.raises(ValueError): + merge_configuration_file(args) + + +def test_non_list_value_for_list_property(tmp_path: Path, test_nb: Path) -> None: + (tmp_path / "pyproject.toml").write_text( + "[tool.nbstripout]\nexclude=true\n", + ) + args = Namespace(files=[test_nb]) + with pytest.raises(ValueError): + merge_configuration_file(args) + + +def test_merge_with_cli_additive_str_property(tmp_path: Path, test_nb: Path) -> None: + (tmp_path / "pyproject.toml").write_text( + '[tool.nbstripout]\nextra_keys="foo"\n', + ) + args = Namespace(files=[test_nb], extra_keys="bar") + assert merge_configuration_file(args) + assert_namespace(args, {"files": args.files, "extra_keys": "bar foo"}) From 5d45bd0d38c87a1fed83032a245a429313ec781b Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Sun, 2 Oct 2022 07:21:43 -0700 Subject: [PATCH 04/14] make type hints py36 compatible --- tests/test_read_config_files.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/test_read_config_files.py b/tests/test_read_config_files.py index 59237d7..80f35fb 100644 --- a/tests/test_read_config_files.py +++ b/tests/test_read_config_files.py @@ -1,12 +1,12 @@ -from typing import Any +from argparse import Namespace +from typing import Any, Dict +from pathlib import Path import pytest from nbstripout._utils import merge_configuration_file -from argparse import Namespace -from pathlib import Path -def assert_namespace(namespace: Namespace, expected_props: dict[str, Any]): +def assert_namespace(namespace: Namespace, expected_props: Dict[str, Any]): keys = (prop for prop in dir(namespace) if not prop.startswith("_")) actual_props = {key: getattr(namespace, key) for key in keys} assert actual_props == expected_props @@ -27,7 +27,7 @@ def nested_test_nb(tmp_path: Path) -> Path: return test_nb -def test_no_config_file(tmp_path: Path, test_nb: Path) -> None: +def test_no_config_file(test_nb: Path) -> None: original_args = {"files": [test_nb]} args = Namespace(**original_args) From 4c500c9d908c22ee7bbd3d93dce18ffbd5082445 Mon Sep 17 00:00:00 2001 From: Andrew O'Brien Date: Thu, 8 Dec 2022 16:45:18 +1000 Subject: [PATCH 05/14] re-design config reading to give command line precedence. Not all tests working. --- nbstripout/_nbstripout.py | 11 +- nbstripout/_utils.py | 97 +++++++-------- tests/test_read_config_files.py | 201 ++++++++++++++++++++------------ 3 files changed, 184 insertions(+), 125 deletions(-) diff --git a/nbstripout/_nbstripout.py b/nbstripout/_nbstripout.py index 53c672b..bbc4133 100644 --- a/nbstripout/_nbstripout.py +++ b/nbstripout/_nbstripout.py @@ -109,7 +109,7 @@ *.ipynb diff=ipynb """ -from argparse import ArgumentParser, RawDescriptionHelpFormatter +from argparse import ArgumentParser, Namespace, RawDescriptionHelpFormatter import collections import io import json @@ -351,7 +351,7 @@ def status(git_config, install_location=INSTALL_LOCATION_LOCAL, verbose=False): return 1 -def main(): +def setup_commandline() -> Namespace: parser = ArgumentParser(epilog=__doc__, formatter_class=RawDescriptionHelpFormatter) task = parser.add_mutually_exclusive_group() task.add_argument('--dry-run', action='store_true', @@ -403,7 +403,12 @@ def main(): parser.add_argument('files', nargs='*', help='Files to strip output from') - args = merge_configuration_file(parser.parse_args()) + return parser + + +def main(): + parser = setup_commandline() + args = merge_configuration_file(parser) git_config = ['git', 'config'] diff --git a/nbstripout/_utils.py b/nbstripout/_utils.py index 999a0e1..683df9f 100644 --- a/nbstripout/_utils.py +++ b/nbstripout/_utils.py @@ -1,7 +1,8 @@ -from argparse import Namespace +from argparse import ArgumentParser, Namespace, _StoreTrueAction, _StoreFalseAction import os import sys from collections import defaultdict +from functools import partial from typing import Any, Dict, Optional __all__ = ["pop_recursive", "strip_output", "strip_zeppelin_output", "MetadataError"] @@ -169,7 +170,7 @@ def process_pyproject_toml(toml_file_path: str) -> Optional[Dict[str, Any]]: return tomllib.load(f).get('tool', {}).get('nbstripout', None) -def process_setup_cfg(cfg_file_path) -> Optional[Dict[str, Any]]: +def process_setup_cfg(cfg_file_path, parser: ArgumentParser) -> Optional[Dict[str, Any]]: """Extract config mapping from setup.cfg file.""" import configparser @@ -178,28 +179,38 @@ def process_setup_cfg(cfg_file_path) -> Optional[Dict[str, Any]]: if not reader.has_section('nbstripout'): return None - return reader['nbstripout'] + raw_config = reader['nbstripout'] + dict_config = dict(raw_config) + # special processing of boolean options, to convert various configparser bool types to true/false + for a in parser._actions: + if a.dest in raw_config and isinstance(a, (_StoreTrueAction, _StoreFalseAction)): + dict_config[a.dest] = raw_config.getboolean(a.dest) -def merge_configuration_file(args: Namespace) -> Namespace: + return dict_config + + +def merge_configuration_file(parser: ArgumentParser, args_str=None) -> Namespace: """Merge flags from config files into args.""" CONFIG_FILES = { 'pyproject.toml': process_pyproject_toml, - 'setup.cfg': process_setup_cfg, - } - BOOL_TYPES = { - 'yes': True, - 'true': True, - 'on': True, - 'no': False, - 'false': False, - 'off': False + 'setup.cfg': partial(process_setup_cfg, parser=parser), } + # parse args as-is to look for configuration files + args = parser.parse_args(args_str) + # Traverse the file tree common to all files given as argument looking for # a configuration file - config_path = os.path.commonpath([os.path.abspath(file) for file in args.files]) if args.files else os.getcwd() - config = None + # TODO: make this more like Black: + # By default Black looks for pyproject.toml starting from the common base directory of all files and + # directories passed on the command line. If it’s not there, it looks in parent directories. It stops looking + # when it finds the file, or a .git directory, or a .hg directory, or the root of the file system, whichever + # comes first. + # if no files are given, start from cwd + config_path = os.path.commonpath([os.path.abspath(file) for file in args.files]) if args.files else os.path.abspath(os.getcwd()) + print(f"{config_path =}") + config: Optional[Dict[str, Any]] = None while True: for config_file, processor in CONFIG_FILES.items(): config_file_path = os.path.join(config_path, config_file) @@ -213,40 +224,30 @@ def merge_configuration_file(args: Namespace) -> Namespace: if not tail: break + # black starts with default arguments (from click), updates that with the config file, + # then applies command line arguments. this all happens in the click framework, before main() is called + # we can use parser.set_defaults + print(f'getting config {config}') if config: - # merge config - for name, value in config.items(): - if value is None: - continue - # additive string flags - if name in {'extra_keys'}: - args.extra_keys = f"{getattr(args, 'extra_keys', '')} {value}".strip() - # singular string flags - elif name in {'mode'}: - args.mode = value - # integer flags - elif name in {'max_size'}: - args.max_size = int(value) - # boolean flags - elif name in { - 'dry_run', - 'keep_count', - 'keep_output', - 'drop_empty_cells', - 'drop_tagged_cells', - 'strip_init_cells', - '_global', - '_system', - 'force', - 'textconv', - }: - if isinstance(value, str): - value = BOOL_TYPES.get(value, value) - if not isinstance(value, bool): - raise ValueError(f"Invalid value for {name}: {value}, expected bool") - if value: - setattr(args, name.replace('-', '_'), value) - else: + # check all arguments are valid + print(f"have a config: {config.keys()}") + valid_args = vars(args).keys() + for name in config.keys(): + if name not in valid_args: raise ValueError(f'{name} in the config file is not a valid option') + # separate into default-overrides and special treatment + extra_keys: Optional[str] = None + if 'extra_keys' in config: + extra_keys = config['extra_keys'] + del config['extra_keys'] + + # merge the configuration options as new defaults, and re-parse the arguments + parser.set_defaults(**config) + args = parser.parse_args(args_str) + + # merge extra_keys using set union + if extra_keys: + args.extra_keys = ' '.join(sorted(set(extra_keys.split()) | set(args.extra_keys.split()))) + return args diff --git a/tests/test_read_config_files.py b/tests/test_read_config_files.py index 80f35fb..8628bcf 100644 --- a/tests/test_read_config_files.py +++ b/tests/test_read_config_files.py @@ -1,15 +1,16 @@ -from argparse import Namespace +from argparse import ArgumentParser, Namespace from typing import Any, Dict from pathlib import Path import pytest +from nbstripout._nbstripout import setup_commandline from nbstripout._utils import merge_configuration_file -def assert_namespace(namespace: Namespace, expected_props: Dict[str, Any]): - keys = (prop for prop in dir(namespace) if not prop.startswith("_")) - actual_props = {key: getattr(namespace, key) for key in keys} - assert actual_props == expected_props +def assert_namespace(actual_namespace: Namespace, expected_namespace: Namespace): + actual_namespace_dict = vars(actual_namespace) + expected_namespace_dict = vars(expected_namespace) + assert actual_namespace_dict == expected_namespace_dict @pytest.fixture @@ -27,58 +28,93 @@ def nested_test_nb(tmp_path: Path) -> Path: return test_nb -def test_no_config_file(test_nb: Path) -> None: +@pytest.fixture +def parser() -> ArgumentParser: + return setup_commandline() + - original_args = {"files": [test_nb]} - args = Namespace(**original_args) - assert merge_configuration_file(args) - assert_namespace(args, original_args) +def test_no_config_file(test_nb: Path, parser: ArgumentParser) -> None: + args_str = [str(test_nb)] + original_args = parser.parse_args(args_str) + args = merge_configuration_file(parser, args_str) + assert args == original_args -def test_non_nested_pyproject_toml_empty(tmp_path: Path, test_nb: Path) -> None: +def test_non_nested_pyproject_toml_empty(tmp_path: Path, test_nb: Path, parser: ArgumentParser) -> None: (tmp_path / "pyproject.toml").write_text('[tool.other]\nprop="value"\n') - args = Namespace(files=[test_nb]) - assert merge_configuration_file(args) - assert_namespace(args, {"files": args.files}) + args_str = [str(test_nb)] + original_args = parser.parse_args(args_str) + args = merge_configuration_file(parser, args_str) + assert args == original_args -def test_non_nested_pyproject_toml_non_empty(tmp_path: Path, test_nb: Path) -> None: +def test_non_nested_pyproject_toml_non_empty(tmp_path: Path, test_nb: Path, parser: ArgumentParser) -> None: (tmp_path / "pyproject.toml").write_text( "[tool.nbstripout]\ndrop_empty_cells=true\n", ) - args = Namespace(files=[test_nb]) - assert merge_configuration_file(args) - assert_namespace(args, {"files": args.files, "drop_empty_cells": True}) + args_str = [str(test_nb)] + expected_args = parser.parse_args(args_str) + expected_args.drop_empty_cells = True + args = merge_configuration_file(parser, args_str) + assert_namespace(args, expected_args) -def test_non_nested_setup_cfg_non_empty(tmp_path: Path, test_nb: Path) -> None: +def test_non_nested_setup_cfg_wrong_section(tmp_path: Path, test_nb: Path, parser: ArgumentParser) -> None: + # option seems valid, but not in nbstripout section so skip (tmp_path / "setup.cfg").write_text( "[other]\ndrop_empty_cells = yes\n", ) - args = Namespace(files=[test_nb]) - assert merge_configuration_file(args) - assert_namespace(args, {"files": args.files}) + args_str = [str(test_nb)] + expected_args = parser.parse_args(args_str) + args = merge_configuration_file(parser, args_str) + assert_namespace(args, expected_args) -def test_non_nested_setup_cfg_empty(tmp_path: Path, test_nb: Path) -> None: +def test_non_nested_setup_cfg_non_empty(tmp_path: Path, test_nb: Path, parser: ArgumentParser) -> None: (tmp_path / "setup.cfg").write_text( "[nbstripout]\ndrop_empty_cells = yes\n", ) - args = Namespace(files=[test_nb]) - assert merge_configuration_file(args) - assert_namespace(args, {"files": args.files, "drop_empty_cells": True}) + args_str = [str(test_nb)] + expected_args = parser.parse_args(args_str) + expected_args.drop_empty_cells = True + args = merge_configuration_file(parser, args_str) + assert_namespace(args, expected_args) -def test_nested_file(tmp_path: Path, nested_test_nb: Path) -> None: +def test_nofiles_pyproject_toml_non_empty(tmp_path: Path, parser: ArgumentParser) -> None: (tmp_path / "pyproject.toml").write_text( "[tool.nbstripout]\ndrop_empty_cells=true\n", ) - args = Namespace(files=[nested_test_nb]) - assert merge_configuration_file(args) - assert_namespace(args, {"files": args.files, "drop_empty_cells": True}) + args_str = [] + expected_args = parser.parse_args(args_str) + expected_args.drop_empty_cells = True + args = merge_configuration_file(parser, args_str) + assert_namespace(args, expected_args) + + +def test_nofiles_setup_cfg_empty(tmp_path: Path, test_nb: Path, parser: ArgumentParser) -> None: + (tmp_path / "setup.cfg").write_text( + "[nbstripout]\ndrop_empty_cells = yes\n", + ) + args_str = [] + expected_args = parser.parse_args(args_str) + expected_args.drop_empty_cells = True + args = merge_configuration_file(parser, args_str) + assert_namespace(args, expected_args) -def test_common_path_nested_file_do_not_load(tmp_path: Path) -> None: +def test_nested_file(tmp_path: Path, nested_test_nb: Path, parser: ArgumentParser) -> None: + (tmp_path / "pyproject.toml").write_text( + "[tool.nbstripout]\ndrop_empty_cells=true\n", + ) + args_str = [str(nested_test_nb)] + expected_args = parser.parse_args(args_str) + expected_args.drop_empty_cells = True + args = merge_configuration_file(parser, args_str) + assert_namespace(args, expected_args) + + +def test_common_path_nested_file_do_not_load(tmp_path: Path, parser: ArgumentParser) -> None: (tmp_path / "nested/file").mkdir(parents=True) (tmp_path / "nested/other").mkdir() test_nb1 = tmp_path / "nested/file/test_me.ipynb" @@ -89,12 +125,13 @@ def test_common_path_nested_file_do_not_load(tmp_path: Path) -> None: (tmp_path / "nested/file/pyproject.toml").write_text( "[tool.nbstripout]\ndrop_empty_cells=true\n", ) - args = Namespace(files=[test_nb1, test_nb2]) - assert merge_configuration_file(args) - assert_namespace(args, {"files": args.files}) + args_str = [str(test_nb1), str(test_nb2)] + expected_args = parser.parse_args(args_str) + args = merge_configuration_file(parser, args_str) + assert_namespace(args, expected_args) -def test_common_path_nested_file_do_load(tmp_path: Path) -> None: +def test_common_path_nested_file_do_load(tmp_path: Path, parser: ArgumentParser) -> None: (tmp_path / "nested/file").mkdir(parents=True) (tmp_path / "nested/other").mkdir() test_nb1 = tmp_path / "nested/file/test_me.ipynb" @@ -105,13 +142,15 @@ def test_common_path_nested_file_do_load(tmp_path: Path) -> None: (tmp_path / "nested/pyproject.toml").write_text( "[tool.nbstripout]\ndrop_empty_cells=true\n", ) - args = Namespace(files=[test_nb1, test_nb2]) - assert merge_configuration_file(args) - assert_namespace(args, {"files": args.files, "drop_empty_cells": True}) + args_str = [str(test_nb1), str(test_nb2)] + expected_args = parser.parse_args(args_str) + expected_args.drop_empty_cells = True + args = merge_configuration_file(parser, args_str) + assert_namespace(args, expected_args) def test_continue_search_if_no_config_found( - tmp_path: Path, nested_test_nb: Path + tmp_path: Path, nested_test_nb: Path, parser: ArgumentParser ) -> None: (tmp_path / "nested/pyproject.toml").write_text( '[tool.other]\nprop = "value"\n', @@ -119,94 +158,108 @@ def test_continue_search_if_no_config_found( (tmp_path / "pyproject.toml").write_text( "[tool.nbstripout]\ndrop_empty_cells = true\n", ) - args = Namespace(files=[nested_test_nb]) - assert merge_configuration_file(args) - assert_namespace(args, {"files": args.files, "drop_empty_cells": True}) + args_str = [str(nested_test_nb)] + expected_args = parser.parse_args(args_str) + expected_args.drop_empty_cells = True + args = merge_configuration_file(parser, args_str) + assert_namespace(args, expected_args) -def test_stop_search_if_config_found(tmp_path: Path, nested_test_nb: Path) -> None: +def test_stop_search_if_config_found(tmp_path: Path, nested_test_nb: Path, parser: ArgumentParser) -> None: (tmp_path / "nested/pyproject.toml").write_text( "[tool.nbstripout]\n", ) (tmp_path / "pyproject.toml").write_text( "[tool.nbstripout]\ndrop_empty_cells = true\n", ) - args = Namespace(files=[nested_test_nb]) - assert merge_configuration_file(args) - assert_namespace(args, {"files": args.files}) + args_str = [str(nested_test_nb)] + expected_args = parser.parse_args(args_str) + args = merge_configuration_file(parser, args_str) + assert_namespace(args, expected_args) -def test_dont_load_false(tmp_path: Path, test_nb: Path) -> None: +def test_dont_load_false(tmp_path: Path, test_nb: Path, parser: ArgumentParser) -> None: (tmp_path / "setup.cfg").write_text( "[nbstripout]\ndrop_empty_cells = no\n", ) - args = Namespace(files=[test_nb]) - assert merge_configuration_file(args) - assert_namespace(args, {"files": args.files}) + args_str = [str(test_nb)] + expected_args = parser.parse_args(args_str) + args = merge_configuration_file(parser, args_str) + assert_namespace(args, expected_args) def test_list_value_space_sep_string_pyproject_toml( - tmp_path: Path, test_nb: Path + tmp_path: Path, test_nb: Path, parser: ArgumentParser ) -> None: (tmp_path / "pyproject.toml").write_text( '[tool.nbstripout]\nextra_keys="foo bar"\n', ) - args = Namespace(files=[test_nb]) - assert merge_configuration_file(args) - assert_namespace(args, {"files": args.files, "extra_keys": "foo bar"}) + args_str = [str(test_nb)] + expected_args = parser.parse_args(args_str) + # Note: current methodology sorts extra_keys alphabetically + expected_args.extra_keys = "bar foo" + args = merge_configuration_file(parser, args_str) + assert_namespace(args, expected_args) -def test_list_value_setup_cfg(tmp_path: Path, test_nb: Path) -> None: +def test_list_value_setup_cfg(tmp_path: Path, test_nb: Path, parser: ArgumentParser) -> None: (tmp_path / "setup.cfg").write_text( "[nbstripout]\nextra_keys=foo bar\n", ) - args = Namespace(files=[test_nb]) - assert merge_configuration_file(args) - assert_namespace(args, {"files": args.files, "extra_keys": "foo bar"}) + args_str = [str(test_nb)] + expected_args = parser.parse_args(args_str) + # Note: current methodology sorts extra_keys alphabetically + expected_args.extra_keys = "bar foo" + args = merge_configuration_file(parser, args_str) + assert_namespace(args, expected_args) -def test_unknown_property(tmp_path: Path, test_nb: Path) -> None: +def test_unknown_property(tmp_path: Path, test_nb: Path, parser: ArgumentParser) -> None: (tmp_path / "pyproject.toml").write_text( "[tool.nbstripout]\nunknown_prop=true\n", ) - args = Namespace(files=[test_nb]) + args_str = [str(test_nb)] with pytest.raises(ValueError): - merge_configuration_file(args) + merge_configuration_file(parser, args_str) -def test_non_bool_value_for_bool_property(tmp_path: Path, test_nb: Path) -> None: +def test_non_bool_value_for_bool_property(tmp_path: Path, test_nb: Path, parser: ArgumentParser) -> None: (tmp_path / "pyproject.toml").write_text( '[tool.nbstripout]\ndrop_empty_cells="invalid"\n', ) - args = Namespace(files=[test_nb]) + args_str = [str(test_nb)] with pytest.raises(ValueError): - merge_configuration_file(args) + merge_configuration_file(parser, args_str) def test_non_bool_value_for_bool_property_in_setup_cfg( - tmp_path: Path, test_nb: Path + tmp_path: Path, test_nb: Path, parser: ArgumentParser ) -> None: (tmp_path / "setup.cfg").write_text( "[nbstripout]\ndrop_empty_cells=ok\n", ) - args = Namespace(files=[test_nb]) + args_str = [str(test_nb)] with pytest.raises(ValueError): - merge_configuration_file(args) + merge_configuration_file(parser, args_str) -def test_non_list_value_for_list_property(tmp_path: Path, test_nb: Path) -> None: +def test_non_list_value_for_list_property(tmp_path: Path, test_nb: Path, parser: ArgumentParser) -> None: (tmp_path / "pyproject.toml").write_text( "[tool.nbstripout]\nexclude=true\n", ) - args = Namespace(files=[test_nb]) + + args_str = [str(test_nb)] with pytest.raises(ValueError): - merge_configuration_file(args) + merge_configuration_file(parser, args_str) -def test_merge_with_cli_additive_str_property(tmp_path: Path, test_nb: Path) -> None: +def test_merge_with_cli_additive_str_property(tmp_path: Path, test_nb: Path, parser: ArgumentParser) -> None: (tmp_path / "pyproject.toml").write_text( '[tool.nbstripout]\nextra_keys="foo"\n', ) - args = Namespace(files=[test_nb], extra_keys="bar") - assert merge_configuration_file(args) - assert_namespace(args, {"files": args.files, "extra_keys": "bar foo"}) + args_str = [str(test_nb), '--extra-keys=bar'] + expected_args = parser.parse_args(args_str) + # Note: current methodology sorts extra_keys alphabetically + expected_args.extra_keys = "bar foo" + args = merge_configuration_file(parser, args_str) + assert_namespace(args, expected_args) From 92440f1d12605f74584e5d4721e1bca6bb01a869 Mon Sep 17 00:00:00 2001 From: Andrew O'Brien Date: Thu, 8 Dec 2022 20:46:40 +1000 Subject: [PATCH 06/14] fix handling of non_bool values in pyproject.toml --- nbstripout/_utils.py | 18 +++++++++++++++--- tests/test_read_config_files.py | 2 ++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/nbstripout/_utils.py b/nbstripout/_utils.py index 683df9f..1e362b2 100644 --- a/nbstripout/_utils.py +++ b/nbstripout/_utils.py @@ -159,7 +159,7 @@ def strip_output(nb, keep_output, keep_count, extra_keys=[], drop_empty_cells=Fa return nb -def process_pyproject_toml(toml_file_path: str) -> Optional[Dict[str, Any]]: +def process_pyproject_toml(toml_file_path: str, parser: ArgumentParser) -> Optional[Dict[str, Any]]: """Extract config mapping from pyproject.toml file.""" try: import tomllib # python 3.11+ @@ -167,7 +167,19 @@ def process_pyproject_toml(toml_file_path: str) -> Optional[Dict[str, Any]]: import tomli as tomllib with open(toml_file_path, 'rb') as f: - return tomllib.load(f).get('tool', {}).get('nbstripout', None) + dict_config = tomllib.load(f).get('tool', {}).get('nbstripout', None) + + if not dict_config: + # could be {} if 'tool' not found, or None if 'nbstripout' not found + return dict_config + + # special processing of boolean options, make sure we don't have invalid types + for a in parser._actions: + if a.dest in dict_config and isinstance(a, (_StoreTrueAction, _StoreFalseAction)): + if not isinstance(dict_config[a.dest], bool): + raise ValueError(f'Argument {a.dest} in pyproject.toml must be a boolean, not {dict_config[a.dest]}') + + return dict_config def process_setup_cfg(cfg_file_path, parser: ArgumentParser) -> Optional[Dict[str, Any]]: @@ -193,7 +205,7 @@ def process_setup_cfg(cfg_file_path, parser: ArgumentParser) -> Optional[Dict[st def merge_configuration_file(parser: ArgumentParser, args_str=None) -> Namespace: """Merge flags from config files into args.""" CONFIG_FILES = { - 'pyproject.toml': process_pyproject_toml, + 'pyproject.toml': partial(process_pyproject_toml, parser=parser), 'setup.cfg': partial(process_setup_cfg, parser=parser), } diff --git a/tests/test_read_config_files.py b/tests/test_read_config_files.py index 8628bcf..2b27e2d 100644 --- a/tests/test_read_config_files.py +++ b/tests/test_read_config_files.py @@ -82,6 +82,7 @@ def test_non_nested_setup_cfg_non_empty(tmp_path: Path, test_nb: Path, parser: A def test_nofiles_pyproject_toml_non_empty(tmp_path: Path, parser: ArgumentParser) -> None: + pytest.skip("TODO") (tmp_path / "pyproject.toml").write_text( "[tool.nbstripout]\ndrop_empty_cells=true\n", ) @@ -93,6 +94,7 @@ def test_nofiles_pyproject_toml_non_empty(tmp_path: Path, parser: ArgumentParser def test_nofiles_setup_cfg_empty(tmp_path: Path, test_nb: Path, parser: ArgumentParser) -> None: + pytest.skip("TODO") (tmp_path / "setup.cfg").write_text( "[nbstripout]\ndrop_empty_cells = yes\n", ) From dce41634f117e22530c23d0080854b021502ba79 Mon Sep 17 00:00:00 2001 From: Andrew O'Brien Date: Thu, 8 Dec 2022 21:00:01 +1000 Subject: [PATCH 07/14] fix no file arguments tests --- nbstripout/_utils.py | 3 --- tests/conftest.py | 1 + tests/test_read_config_files.py | 12 ++++-------- 3 files changed, 5 insertions(+), 11 deletions(-) create mode 100644 tests/conftest.py diff --git a/nbstripout/_utils.py b/nbstripout/_utils.py index 1e362b2..aa27591 100644 --- a/nbstripout/_utils.py +++ b/nbstripout/_utils.py @@ -221,7 +221,6 @@ def merge_configuration_file(parser: ArgumentParser, args_str=None) -> Namespace # comes first. # if no files are given, start from cwd config_path = os.path.commonpath([os.path.abspath(file) for file in args.files]) if args.files else os.path.abspath(os.getcwd()) - print(f"{config_path =}") config: Optional[Dict[str, Any]] = None while True: for config_file, processor in CONFIG_FILES.items(): @@ -239,10 +238,8 @@ def merge_configuration_file(parser: ArgumentParser, args_str=None) -> Namespace # black starts with default arguments (from click), updates that with the config file, # then applies command line arguments. this all happens in the click framework, before main() is called # we can use parser.set_defaults - print(f'getting config {config}') if config: # check all arguments are valid - print(f"have a config: {config.keys()}") valid_args = vars(args).keys() for name in config.keys(): if name not in valid_args: diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..9a7f867 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1 @@ +pytest_plugins = "pytester" \ No newline at end of file diff --git a/tests/test_read_config_files.py b/tests/test_read_config_files.py index 2b27e2d..147fd5d 100644 --- a/tests/test_read_config_files.py +++ b/tests/test_read_config_files.py @@ -81,11 +81,8 @@ def test_non_nested_setup_cfg_non_empty(tmp_path: Path, test_nb: Path, parser: A assert_namespace(args, expected_args) -def test_nofiles_pyproject_toml_non_empty(tmp_path: Path, parser: ArgumentParser) -> None: - pytest.skip("TODO") - (tmp_path / "pyproject.toml").write_text( - "[tool.nbstripout]\ndrop_empty_cells=true\n", - ) +def test_nofiles_pyproject_toml_non_empty(pytester: pytest.Pytester, parser: ArgumentParser) -> None: + pytester.makepyprojecttoml("[tool.nbstripout]\ndrop_empty_cells=true\n") args_str = [] expected_args = parser.parse_args(args_str) expected_args.drop_empty_cells = True @@ -93,9 +90,8 @@ def test_nofiles_pyproject_toml_non_empty(tmp_path: Path, parser: ArgumentParser assert_namespace(args, expected_args) -def test_nofiles_setup_cfg_empty(tmp_path: Path, test_nb: Path, parser: ArgumentParser) -> None: - pytest.skip("TODO") - (tmp_path / "setup.cfg").write_text( +def test_nofiles_setup_cfg_empty(pytester: pytest.Pytester, parser: ArgumentParser) -> None: + Path("setup.cfg").write_text( "[nbstripout]\ndrop_empty_cells = yes\n", ) args_str = [] From 7d02b4197a8d3619486dc849d55cc8ab9445b897 Mon Sep 17 00:00:00 2001 From: Andrew O'Brien Date: Thu, 8 Dec 2022 23:54:18 +1000 Subject: [PATCH 08/14] test argument and file precedence --- tests/test_read_config_files.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tests/test_read_config_files.py b/tests/test_read_config_files.py index 147fd5d..f5846d4 100644 --- a/tests/test_read_config_files.py +++ b/tests/test_read_config_files.py @@ -261,3 +261,31 @@ def test_merge_with_cli_additive_str_property(tmp_path: Path, test_nb: Path, par expected_args.extra_keys = "bar foo" args = merge_configuration_file(parser, args_str) assert_namespace(args, expected_args) + + +def test_override_bool_true(pytester: pytest.Pytester, parser: ArgumentParser): + pytester.makepyprojecttoml("[tool.nbstripout]\ndrop_empty_cells = false\n") + args_str = ["--drop-empty-cells"] + expected_args = parser.parse_args(args_str) + args = merge_configuration_file(parser, args_str) + assert_namespace(args, expected_args) + + +def test_override_size(pytester: pytest.Pytester, parser: ArgumentParser): + pytester.makepyprojecttoml("[tool.nbstripout]\nmax_size = 30\n") + args_str = ["--max-size=40"] + expected_args = parser.parse_args(args_str) + args = merge_configuration_file(parser, args_str) + assert_namespace(args, expected_args) + + +def test_toml_override_settings(pytester: pytest.Pytester, parser: ArgumentParser): + pytester.makepyprojecttoml("[tool.nbstripout]\nmax_size = 30\n") + Path("setup.cfg").write_text( + "[nbstripout]\nmax_size = 50\n", + ) + args_str = [] + expected_args = parser.parse_args(args_str) + expected_args.max_size = 30 + args = merge_configuration_file(parser, args_str) + assert_namespace(args, expected_args) From 0a75bc18d75c6721289d5e0b6b6d4b646c9c9ea6 Mon Sep 17 00:00:00 2001 From: Andrew O'Brien Date: Thu, 8 Dec 2022 23:57:42 +1000 Subject: [PATCH 09/14] flake8 --- tests/conftest.py | 2 +- tests/test_read_config_files.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 9a7f867..694d7d5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1 +1 @@ -pytest_plugins = "pytester" \ No newline at end of file +pytest_plugins = "pytester" diff --git a/tests/test_read_config_files.py b/tests/test_read_config_files.py index f5846d4..07766a5 100644 --- a/tests/test_read_config_files.py +++ b/tests/test_read_config_files.py @@ -1,5 +1,4 @@ from argparse import ArgumentParser, Namespace -from typing import Any, Dict from pathlib import Path import pytest From c7abff05213186311d4620c522ed269132d011ad Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Mon, 26 Dec 2022 15:07:11 -0800 Subject: [PATCH 10/14] Update nbstripout/_utils.py Co-authored-by: Florian Rathgeber --- nbstripout/_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nbstripout/_utils.py b/nbstripout/_utils.py index aa27591..7781edb 100644 --- a/nbstripout/_utils.py +++ b/nbstripout/_utils.py @@ -202,7 +202,7 @@ def process_setup_cfg(cfg_file_path, parser: ArgumentParser) -> Optional[Dict[st return dict_config -def merge_configuration_file(parser: ArgumentParser, args_str=None) -> Namespace: +def merge_configuration_files(parser: ArgumentParser, args_str=None) -> Namespace: """Merge flags from config files into args.""" CONFIG_FILES = { 'pyproject.toml': partial(process_pyproject_toml, parser=parser), From e8ced469acfe5446237d0b323ae62d06203d70ad Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Mon, 26 Dec 2022 15:07:21 -0800 Subject: [PATCH 11/14] Update tests/test_read_config_files.py Co-authored-by: Florian Rathgeber --- tests/test_read_config_files.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_read_config_files.py b/tests/test_read_config_files.py index 07766a5..70bb9fb 100644 --- a/tests/test_read_config_files.py +++ b/tests/test_read_config_files.py @@ -89,7 +89,7 @@ def test_nofiles_pyproject_toml_non_empty(pytester: pytest.Pytester, parser: Arg assert_namespace(args, expected_args) -def test_nofiles_setup_cfg_empty(pytester: pytest.Pytester, parser: ArgumentParser) -> None: +def test_nofiles_setup_cfg_non_empty(pytester: pytest.Pytester, parser: ArgumentParser) -> None: Path("setup.cfg").write_text( "[nbstripout]\ndrop_empty_cells = yes\n", ) From 08bfc679ff51b319f4241f02069b44495153fd7a Mon Sep 17 00:00:00 2001 From: JasonJoosteCSIRO <120607685+JasonJoosteCSIRO@users.noreply.github.com> Date: Sat, 6 May 2023 05:23:32 +1000 Subject: [PATCH 12/14] Sequential cell ids (#184) * Add command line argument keep-id, which maintiains randomly generated cell ids. Otherwise cell ids are assigned incrementally (after the removal of cells), which should keep them consistent across runs in version control * Modify test_cell and test_exception in test_keep_output_tags.py to use the new strip_output signature * Fix failed test_end_to_end_nbstripout with test_max_size by passing --keep-id for keeping the existing ids * Add tests for notebooks with and without the --keep-id flag. A new extension expected_id was added for expected output with ordered ids * Modify the readme to include the --include-id flag * Add keyword arguments for None inputs in test_keep_output_tags.py * Rename expected output files to make desired sequential ids more explicit Co-authored-by: Florian Rathgeber --- README.rst | 4 + nbstripout/_nbstripout.py | 8 +- nbstripout/_utils.py | 8 +- ...test_max_size.ipynb.expected_sequential_id | 90 ++++++++++++++++++ tests/e2e_notebooks/test_nbformat45.ipynb | 93 +++++++++++++++++++ .../test_nbformat45.ipynb.expected | 61 ++++++++++++ ...st_nbformat45.ipynb.expected_sequential_id | 61 ++++++++++++ tests/test_end_to_end.py | 5 +- tests/test_keep_output_tags.py | 4 +- 9 files changed, 325 insertions(+), 9 deletions(-) create mode 100644 tests/e2e_notebooks/test_max_size.ipynb.expected_sequential_id create mode 100644 tests/e2e_notebooks/test_nbformat45.ipynb create mode 100644 tests/e2e_notebooks/test_nbformat45.ipynb.expected create mode 100644 tests/e2e_notebooks/test_nbformat45.ipynb.expected_sequential_id diff --git a/README.rst b/README.rst index a161c65..3d97290 100644 --- a/README.rst +++ b/README.rst @@ -274,6 +274,10 @@ Do not strip the output :: nbstripout --keep-output +Do not reassign the cell ids to be sequential :: + + nbstripout --keep-id + To mark special cells so that the output is not stripped, you can either: 1. Set the ``keep_output`` tag on the cell. To do this, enable the tags diff --git a/nbstripout/_nbstripout.py b/nbstripout/_nbstripout.py index 3957395..50f096a 100644 --- a/nbstripout/_nbstripout.py +++ b/nbstripout/_nbstripout.py @@ -373,6 +373,9 @@ def main(): help='Do not strip the execution count/prompt number') parser.add_argument('--keep-output', action='store_true', help='Do not strip output', default=None) + parser.add_argument('--keep-id', action='store_true', + help='Keep the randomly generated cell ids, ' + 'which will be different after each execution.') parser.add_argument('--extra-keys', default='', help='Space separated list of extra keys to strip ' 'from metadata, e.g. metadata.foo cell.metadata.bar') @@ -409,7 +412,6 @@ def main(): parser.add_argument('files', nargs='*', help='Files to strip output from') args = parser.parse_args() - git_config = ['git', 'config'] if args._system: @@ -487,7 +489,7 @@ def main(): warnings.simplefilter("ignore", category=UserWarning) nb = read(f, as_version=NO_CONVERT) - nb = strip_output(nb, args.keep_output, args.keep_count, extra_keys, args.drop_empty_cells, + nb = strip_output(nb, args.keep_output, args.keep_count, args.keep_id, extra_keys, args.drop_empty_cells, args.drop_tagged_cells.split(), args.strip_init_cells, _parse_size(args.max_size)) if args.dry_run: @@ -533,7 +535,7 @@ def main(): warnings.simplefilter("ignore", category=UserWarning) nb = read(input_stream, as_version=NO_CONVERT) - nb = strip_output(nb, args.keep_output, args.keep_count, extra_keys, args.drop_empty_cells, + nb = strip_output(nb, args.keep_output, args.keep_count, args.keep_id, extra_keys, args.drop_empty_cells, args.drop_tagged_cells.split(), args.strip_init_cells, _parse_size(args.max_size)) if args.dry_run: diff --git a/nbstripout/_utils.py b/nbstripout/_utils.py index d54ac91..322edbd 100644 --- a/nbstripout/_utils.py +++ b/nbstripout/_utils.py @@ -94,7 +94,7 @@ def strip_zeppelin_output(nb): return nb -def strip_output(nb, keep_output, keep_count, extra_keys=[], drop_empty_cells=False, drop_tagged_cells=[], +def strip_output(nb, keep_output, keep_count, keep_id, extra_keys=[], drop_empty_cells=False, drop_tagged_cells=[], strip_init_cells=False, max_size=0): """ Strip the outputs, execution count/prompt number and miscellaneous @@ -124,7 +124,7 @@ def strip_output(nb, keep_output, keep_count, extra_keys=[], drop_empty_cells=Fa for tag_to_drop in drop_tagged_cells: conditionals.append(lambda c: tag_to_drop not in c.get("metadata", {}).get("tags", [])) - for cell in _cells(nb, conditionals): + for i, cell in enumerate(_cells(nb, conditionals)): keep_output_this_cell = determine_keep_output(cell, keep_output, strip_init_cells) # Remove the outputs, unless directed otherwise @@ -148,7 +148,9 @@ def strip_output(nb, keep_output, keep_count, extra_keys=[], drop_empty_cells=Fa cell['prompt_number'] = None if 'execution_count' in cell and not keep_count: cell['execution_count'] = None - + # Replace the cell id with an incremental value that will be consistent across runs + if 'id' in cell and not keep_id: + cell['id'] = str(i) for field in keys['cell']: pop_recursive(cell, field) return nb diff --git a/tests/e2e_notebooks/test_max_size.ipynb.expected_sequential_id b/tests/e2e_notebooks/test_max_size.ipynb.expected_sequential_id new file mode 100644 index 0000000..44cddfb --- /dev/null +++ b/tests/e2e_notebooks/test_max_size.ipynb.expected_sequential_id @@ -0,0 +1,90 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0", + "metadata": {}, + "source": [ + "This notebook tests that outputs can be cleared based on size" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "aaaaaaaaaa\n" + ] + } + ], + "source": [ + "print(\"a\"*10)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"a\"*100)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.4" + }, + "varInspector": { + "cols": { + "lenName": 16, + "lenType": 16, + "lenVar": 40 + }, + "kernels_config": { + "python": { + "delete_cmd_postfix": "", + "delete_cmd_prefix": "del ", + "library": "var_list.py", + "varRefreshCmd": "print(var_dic_list())" + }, + "r": { + "delete_cmd_postfix": ") ", + "delete_cmd_prefix": "rm(", + "library": "var_list.r", + "varRefreshCmd": "cat(var_dic_list()) " + } + }, + "types_to_exclude": [ + "module", + "function", + "builtin_function_or_method", + "instance", + "_Feature" + ], + "window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tests/e2e_notebooks/test_nbformat45.ipynb b/tests/e2e_notebooks/test_nbformat45.ipynb new file mode 100644 index 0000000..f6d3f16 --- /dev/null +++ b/tests/e2e_notebooks/test_nbformat45.ipynb @@ -0,0 +1,93 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "5c42035d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'This is the new Jupyter notebook'" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\"This is the new Jupyter notebook\"" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "886205fa", + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'text2'" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\"text2\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "a183d4e9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "f(3) = 4\n" + ] + } + ], + "source": [ + "def f(x):\n", + " \"\"\"My function\n", + " x : parameter\"\"\"\n", + " \n", + " return x+1\n", + "\n", + "print(\"f(3) = \", f(3))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tests/e2e_notebooks/test_nbformat45.ipynb.expected b/tests/e2e_notebooks/test_nbformat45.ipynb.expected new file mode 100644 index 0000000..fb78c3f --- /dev/null +++ b/tests/e2e_notebooks/test_nbformat45.ipynb.expected @@ -0,0 +1,61 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "5c42035d", + "metadata": {}, + "outputs": [], + "source": [ + "\"This is the new Jupyter notebook\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "886205fa", + "metadata": {}, + "outputs": [], + "source": [ + "\"text2\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a183d4e9", + "metadata": {}, + "outputs": [], + "source": [ + "def f(x):\n", + " \"\"\"My function\n", + " x : parameter\"\"\"\n", + " \n", + " return x+1\n", + "\n", + "print(\"f(3) = \", f(3))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tests/e2e_notebooks/test_nbformat45.ipynb.expected_sequential_id b/tests/e2e_notebooks/test_nbformat45.ipynb.expected_sequential_id new file mode 100644 index 0000000..8c499da --- /dev/null +++ b/tests/e2e_notebooks/test_nbformat45.ipynb.expected_sequential_id @@ -0,0 +1,61 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "0", + "metadata": {}, + "outputs": [], + "source": [ + "\"This is the new Jupyter notebook\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1", + "metadata": {}, + "outputs": [], + "source": [ + "\"text2\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2", + "metadata": {}, + "outputs": [], + "source": [ + "def f(x):\n", + " \"\"\"My function\n", + " x : parameter\"\"\"\n", + " \n", + " return x+1\n", + "\n", + "print(\"f(3) = \", f(3))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tests/test_end_to_end.py b/tests/test_end_to_end.py index 0b96923..575dd9b 100644 --- a/tests/test_end_to_end.py +++ b/tests/test_end_to_end.py @@ -15,7 +15,8 @@ ("test_drop_tagged_cells.ipynb", "test_drop_tagged_cells_dontdrop.ipynb.expected", []), ("test_drop_tagged_cells.ipynb", "test_drop_tagged_cells.ipynb.expected", ['--drop-tagged-cells=test']), ("test_execution_timing.ipynb", "test_execution_timing.ipynb.expected", []), - ("test_max_size.ipynb", "test_max_size.ipynb.expected", ["--max-size", "50"]), + ("test_max_size.ipynb", "test_max_size.ipynb.expected", ["--max-size", "50", "--keep-id"]), + ("test_max_size.ipynb", "test_max_size.ipynb.expected_sequential_id", ["--max-size", "50"]), ("test_metadata.ipynb", "test_metadata.ipynb.expected", []), ("test_metadata.ipynb", "test_metadata_extra_keys.ipynb.expected", ["--extra-keys", "metadata.kernelspec metadata.language_info"]), ("test_metadata.ipynb", "test_metadata_keep_count.ipynb.expected", ["--keep-count"]), @@ -26,6 +27,8 @@ ("test_metadata_period.ipynb", "test_metadata_period.ipynb.expected", ["--extra-keys", "cell.metadata.application/vnd.databricks.v1+cell metadata.application/vnd.databricks.v1+notebook"]), ("test_strip_init_cells.ipynb", "test_strip_init_cells.ipynb.expected", ["--strip-init-cells"]), ("test_nbformat2.ipynb", "test_nbformat2.ipynb.expected", []), + ("test_nbformat45.ipynb", "test_nbformat45.ipynb.expected", ["--keep-id"]), + ("test_nbformat45.ipynb", "test_nbformat45.ipynb.expected_sequential_id", []), ("test_unicode.ipynb", "test_unicode.ipynb.expected", []), ("test_widgets.ipynb", "test_widgets.ipynb.expected", []), ("test_zeppelin.zpln", "test_zeppelin.zpln.expected", ["--mode", "zeppelin"]), diff --git a/tests/test_keep_output_tags.py b/tests/test_keep_output_tags.py index 7ec8567..4243a82 100644 --- a/tests/test_keep_output_tags.py +++ b/tests/test_keep_output_tags.py @@ -24,7 +24,7 @@ def nb_with_exception(): def test_cells(orig_nb): nb_stripped = deepcopy(orig_nb) - nb_stripped = strip_output(nb_stripped, None, None) + nb_stripped = strip_output(nb_stripped, keep_output=None, keep_count=None, keep_id=None) for i, cell in enumerate(nb_stripped.cells): if cell.cell_type == 'code' and cell.source: match = re.match(r"\s*#\s*(output|no_output)", cell.source) @@ -41,4 +41,4 @@ def test_cells(orig_nb): def test_exception(nb_with_exception): with pytest.raises(MetadataError): - strip_output(nb_with_exception, None, None) + strip_output(nb_with_exception, keep_output=None, keep_count=None, keep_id=None) From 749f431366811f26114c15aefcb5149e014b9e97 Mon Sep 17 00:00:00 2001 From: wpbonelli Date: Sun, 22 Oct 2023 02:40:32 -0400 Subject: [PATCH 13/14] Add python3.11 classifier to setup.py, remove python3.6 (#186) --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index dede8c5..5982ec0 100644 --- a/setup.py +++ b/setup.py @@ -37,10 +37,10 @@ "Intended Audience :: Developers", "Programming Language :: Python", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", "Topic :: Software Development :: Version Control", ]) From 7b065ef1af9bac07b9cd1d6161491fd0ce0f8f45 Mon Sep 17 00:00:00 2001 From: Florian Rathgeber Date: Thu, 7 Dec 2023 21:51:48 +0100 Subject: [PATCH 14/14] Improve documentation for notebook and cell metadata stripping * Clarify that extra keys may _only_ specify notebook and cell metadata to be stripped. * Update metadata stripped by default. Addresses #187 --- README.rst | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 3d97290..5e3161b 100644 --- a/README.rst +++ b/README.rst @@ -315,8 +315,9 @@ Stripping metadata The following metadata is stripped by default: -* Notebook metadata: ``signature``, ``widgets`` -* Cell metadata: ``ExecuteTime``, ``collapsed``, ``execution``, ``scrolled`` +* Notebook metadata: ``signature``, ``widgets`` +* Cell metadata: ``ExecuteTime``, ``collapsed``, ``execution``, + ``heading_collapsed``, ``hidden``, ``scrolled`` Additional metadata to be stripped can be configured via either @@ -341,6 +342,10 @@ Additional metadata to be stripped can be configured via either --extra-keys "metadata.celltoolbar cell.metadata.heading_collapsed" +Note: Only notebook and cell metadata is currently supported and every key +specified via ``filter.nbstripout.extrakeys`` or ``--extra-keys`` must start +with ``metadata.`` for notebook and ``cell.metadata.`` for cell metadata. + You can keep certain metadata with either * ``git config (--global/--system) filter.nbstripout.keepmetadatakeys``, e.g. ::