kynan · janosh · Oct 1, 2022 · Oct 1, 2022 · Oct 1, 2022 · Oct 2, 2022
diff --git a/README.rst b/README.rst
@@ -274,6 +274,10 @@ Do not strip the output ::
 
     nbstripout --keep-output
 
+Do not reassign the cell ids to be sequential ::
+
+    nbstripout --keep-id
+
 To mark special cells so that the output is not stripped, you can either:
 
 1.  Set the ``keep_output`` tag on the cell. To do this, enable the tags
@@ -311,8 +315,9 @@ Stripping metadata
 
 The following metadata is stripped by default:
 
-* Notebook metadata: ``signature``, ``widgets``
-* Cell metadata: ``ExecuteTime``, ``collapsed``, ``execution``, ``scrolled``
+*   Notebook metadata: ``signature``, ``widgets``
+*   Cell metadata: ``ExecuteTime``, ``collapsed``, ``execution``,
+    ``heading_collapsed``, ``hidden``, ``scrolled``
 
 Additional metadata to be stripped can be configured via either
 
@@ -337,6 +342,10 @@ Additional metadata to be stripped can be configured via either
 
         --extra-keys "metadata.celltoolbar cell.metadata.heading_collapsed"
 
+Note: Only notebook and cell metadata is currently supported and every key
+specified via ``filter.nbstripout.extrakeys`` or ``--extra-keys`` must start
+with ``metadata.`` for notebook and ``cell.metadata.`` for cell metadata.
+
 You can keep certain metadata with either
 
 *   ``git config (--global/--system) filter.nbstripout.keepmetadatakeys``, e.g. ::

diff --git a/nbstripout/_nbstripout.py b/nbstripout/_nbstripout.py
@@ -109,7 +109,7 @@
     *.ipynb diff=ipynb
 """
 
-from argparse import ArgumentParser, RawDescriptionHelpFormatter
+from argparse import ArgumentParser, Namespace, RawDescriptionHelpFormatter
 import collections
 import io
 import json
@@ -120,7 +120,7 @@
 import sys
 import warnings
 
-from nbstripout._utils import strip_output, strip_zeppelin_output
+from nbstripout._utils import strip_output, strip_zeppelin_output, merge_configuration_file
-from nbstripout._utils import strip_output, strip_zeppelin_output, merge_configuration_file
+from nbstripout._utils import strip_output, strip_zeppelin_output, merge_configuration_files
-from nbstripout._utils import strip_output, strip_zeppelin_output, merge_configuration_file
+from nbstripout._utils import strip_output, strip_zeppelin_output, merge_configuration_files
 try:
     # Jupyter >= 4
     from nbformat import read, write, NO_CONVERT
@@ -351,7 +351,7 @@ def status(git_config, install_location=INSTALL_LOCATION_LOCAL, verbose=False):
         return 1
 
 
-def main():
+def setup_commandline() -> Namespace:
     parser = ArgumentParser(epilog=__doc__, formatter_class=RawDescriptionHelpFormatter)
     task = parser.add_mutually_exclusive_group()
     task.add_argument('--dry-run', action='store_true',
@@ -373,14 +373,17 @@ def main():
                         help='Do not strip the execution count/prompt number')
     parser.add_argument('--keep-output', action='store_true',
                         help='Do not strip output', default=None)
+    parser.add_argument('--keep-id', action='store_true',
+                        help='Keep the randomly generated cell ids, '
+                        'which will be different after each execution.')
     parser.add_argument('--extra-keys', default='',
                         help='Space separated list of extra keys to strip '
                         'from metadata, e.g. metadata.foo cell.metadata.bar')
     parser.add_argument('--keep-metadata-keys', default='',
                         help='Space separated list of metadata keys to keep'
                         ', e.g. metadata.foo cell.metadata.bar')
     parser.add_argument('--drop-empty-cells', action='store_true',
-                        help='Remove cells where `source` is empty or contains only whitepace')
+                        help='Remove cells where `source` is empty or contains only whitespace')
     parser.add_argument('--drop-tagged-cells', default='',
                         help='Space separated list of cell-tags that remove an entire cell')
     parser.add_argument('--strip-init-cells', action='store_true',
@@ -408,7 +411,13 @@ def main():
                         help='Prints stripped files to STDOUT')
 
     parser.add_argument('files', nargs='*', help='Files to strip output from')
-    args = parser.parse_args()
+
+    return parser
+
+
+def main():
+    parser = setup_commandline()
+    args = merge_configuration_file(parser)
-    args = merge_configuration_file(parser)
+    args = merge_configuration_files(parser)
-    args = merge_configuration_file(parser)
+    args = merge_configuration_files(parser)
 
     git_config = ['git', 'config']
 
@@ -487,7 +496,7 @@ def main():
                     warnings.simplefilter("ignore", category=UserWarning)
                     nb = read(f, as_version=NO_CONVERT)
 
-            nb = strip_output(nb, args.keep_output, args.keep_count, extra_keys, args.drop_empty_cells,
+            nb = strip_output(nb, args.keep_output, args.keep_count, args.keep_id, extra_keys, args.drop_empty_cells,
                               args.drop_tagged_cells.split(), args.strip_init_cells, _parse_size(args.max_size))
 
             if args.dry_run:
@@ -533,7 +542,7 @@ def main():
                 warnings.simplefilter("ignore", category=UserWarning)
                 nb = read(input_stream, as_version=NO_CONVERT)
 
-            nb = strip_output(nb, args.keep_output, args.keep_count, extra_keys, args.drop_empty_cells,
+            nb = strip_output(nb, args.keep_output, args.keep_count, args.keep_id, extra_keys, args.drop_empty_cells,
                               args.drop_tagged_cells.split(), args.strip_init_cells, _parse_size(args.max_size))
 
             if args.dry_run:

diff --git a/nbstripout/_utils.py b/nbstripout/_utils.py
@@ -1,5 +1,9 @@
-from collections import defaultdict
+from argparse import ArgumentParser, Namespace, _StoreTrueAction, _StoreFalseAction
+import os
 import sys
+from collections import defaultdict
+from functools import partial
+from typing import Any, Dict, Optional
 
 __all__ = ["pop_recursive", "strip_output", "strip_zeppelin_output", "MetadataError"]
 
@@ -94,7 +98,7 @@ def strip_zeppelin_output(nb):
     return nb
 
 
-def strip_output(nb, keep_output, keep_count, extra_keys=[], drop_empty_cells=False, drop_tagged_cells=[],
+def strip_output(nb, keep_output, keep_count, keep_id, extra_keys=[], drop_empty_cells=False, drop_tagged_cells=[],
                  strip_init_cells=False, max_size=0):
     """
     Strip the outputs, execution count/prompt number and miscellaneous
@@ -124,7 +128,7 @@ def strip_output(nb, keep_output, keep_count, extra_keys=[], drop_empty_cells=Fa
     for tag_to_drop in drop_tagged_cells:
         conditionals.append(lambda c: tag_to_drop not in c.get("metadata", {}).get("tags", []))
 
-    for cell in _cells(nb, conditionals):
+    for i, cell in enumerate(_cells(nb, conditionals)):
         keep_output_this_cell = determine_keep_output(cell, keep_output, strip_init_cells)
 
         # Remove the outputs, unless directed otherwise
@@ -148,7 +152,112 @@ def strip_output(nb, keep_output, keep_count, extra_keys=[], drop_empty_cells=Fa
             cell['prompt_number'] = None
         if 'execution_count' in cell and not keep_count:
             cell['execution_count'] = None
-
+        # Replace the cell id with an incremental value that will be consistent across runs
+        if 'id' in cell and not keep_id:
+            cell['id'] = str(i)
         for field in keys['cell']:
             pop_recursive(cell, field)
     return nb
+
+
+def process_pyproject_toml(toml_file_path: str, parser: ArgumentParser) -> Optional[Dict[str, Any]]:
+    """Extract config mapping from pyproject.toml file."""
+    try:
+        import tomllib  # python 3.11+
+    except ModuleNotFoundError:
+        import tomli as tomllib
+
+    with open(toml_file_path, 'rb') as f:
+        dict_config = tomllib.load(f).get('tool', {}).get('nbstripout', None)
+
+    if not dict_config:
+        # could be {} if 'tool' not found, or None if 'nbstripout' not found
+        return dict_config
+
+    # special processing of boolean options, make sure we don't have invalid types
+    for a in parser._actions:
+        if a.dest in dict_config and isinstance(a, (_StoreTrueAction, _StoreFalseAction)):
+            if not isinstance(dict_config[a.dest], bool):
+                raise ValueError(f'Argument {a.dest} in pyproject.toml must be a boolean, not {dict_config[a.dest]}')
+
+    return dict_config
+
+
+def process_setup_cfg(cfg_file_path, parser: ArgumentParser) -> Optional[Dict[str, Any]]:
+    """Extract config mapping from setup.cfg file."""
+    import configparser
+
+    reader = configparser.ConfigParser()
+    reader.read(cfg_file_path)
+    if not reader.has_section('nbstripout'):
+        return None
+
+    raw_config = reader['nbstripout']
+    dict_config = dict(raw_config)
+
+    # special processing of boolean options, to convert various configparser bool types to true/false
+    for a in parser._actions:
+        if a.dest in raw_config and isinstance(a, (_StoreTrueAction, _StoreFalseAction)):
+            dict_config[a.dest] = raw_config.getboolean(a.dest)
+
+    return dict_config
+
+
+def merge_configuration_files(parser: ArgumentParser, args_str=None) -> Namespace:
+    """Merge flags from config files into args."""
+    CONFIG_FILES = {
+        'pyproject.toml': partial(process_pyproject_toml, parser=parser),
+        'setup.cfg': partial(process_setup_cfg, parser=parser),
+    }
+
+    # parse args as-is to look for configuration files
+    args = parser.parse_args(args_str)
+
+    # Traverse the file tree common to all files given as argument looking for
+    # a configuration file
+    # TODO: make this more like Black:
+    #       By default Black looks for pyproject.toml starting from the common base directory of all files and
+    #       directories passed on the command line. If it’s not there, it looks in parent directories. It stops looking
+    #       when it finds the file, or a .git directory, or a .hg directory, or the root of the file system, whichever
+    #       comes first.
+    # if no files are given, start from cwd
+    config_path = os.path.commonpath([os.path.abspath(file) for file in args.files]) if args.files else os.path.abspath(os.getcwd())
+    config: Optional[Dict[str, Any]] = None
+    while True:
+        for config_file, processor in CONFIG_FILES.items():
+            config_file_path = os.path.join(config_path, config_file)
+            if os.path.isfile(config_file_path):
+                config = processor(config_file_path)
+                if config is not None:
+                    break
+        if config is not None:
+            break
+        config_path, tail = os.path.split(config_path)
+        if not tail:
+            break
+
+    # black starts with default arguments (from click), updates that with the config file,
+    # then applies command line arguments. this all happens in the click framework, before main() is called
+    # we can use parser.set_defaults
+    if config:
+        # check all arguments are valid
+        valid_args = vars(args).keys()
+        for name in config.keys():
+            if name not in valid_args:
+                raise ValueError(f'{name} in the config file is not a valid option')
+
+        # separate into default-overrides and special treatment
+        extra_keys: Optional[str] = None
+        if 'extra_keys' in config:
+            extra_keys = config['extra_keys']
+            del config['extra_keys']
+
+        # merge the configuration options as new defaults, and re-parse the arguments
+        parser.set_defaults(**config)
+        args = parser.parse_args(args_str)
+
+        # merge extra_keys using set union
+        if extra_keys:
+            args.extra_keys = ' '.join(sorted(set(extra_keys.split()) | set(args.extra_keys.split())))
+
+    return args
diff --git a/setup.py b/setup.py
@@ -37,10 +37,10 @@
           "Intended Audience :: Developers",
           "Programming Language :: Python",
           "Programming Language :: Python :: 3",
-          "Programming Language :: Python :: 3.6",
           "Programming Language :: Python :: 3.7",
           "Programming Language :: Python :: 3.8",
           "Programming Language :: Python :: 3.9",
           "Programming Language :: Python :: 3.10",
+          "Programming Language :: Python :: 3.11",
           "Topic :: Software Development :: Version Control",
       ])
diff --git a/tests/e2e_notebooks/test_max_size.ipynb.expected_sequential_id b/tests/e2e_notebooks/test_max_size.ipynb.expected_sequential_id
@@ -0,0 +1,90 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "0",
+   "metadata": {},
+   "source": [
+    "This notebook tests that outputs can be cleared based on size"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "aaaaaaaaaa\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"a\"*10)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"a\"*100)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.4"
+  },
+  "varInspector": {
+   "cols": {
+    "lenName": 16,
+    "lenType": 16,
+    "lenVar": 40
+   },
+   "kernels_config": {
+    "python": {
+     "delete_cmd_postfix": "",
+     "delete_cmd_prefix": "del ",
+     "library": "var_list.py",
+     "varRefreshCmd": "print(var_dic_list())"
+    },
+    "r": {
+     "delete_cmd_postfix": ") ",
+     "delete_cmd_prefix": "rm(",
+     "library": "var_list.r",
+     "varRefreshCmd": "cat(var_dic_list()) "
+    }
+   },
+   "types_to_exclude": [
+    "module",
+    "function",
+    "builtin_function_or_method",
+    "instance",
+    "_Feature"
+   ],
+   "window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}