Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement --remove-tagged-cells #738

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 5 additions & 0 deletions papermill/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ def print_papermill_version(ctx, param, value):
@click.option(
'--parameters_base64', '-b', multiple=True, help='Base64 encoded YAML string as parameters.'
)
@click.option(
'--remove-tagged-cells', type=str, help='Remove cells with the specified tag before execution.'
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about

Suggested change
'--remove-tagged-cells', type=str, help='Remove cells with the specified tag before execution.'
'--remove-cells-tagged', type=str, help='Remove cells with the specified tag before execution.'

Then it'd be pretty fluent on the command line:

--remove-cells-tagged=a-tag

)
@click.option(
'--inject-input-path',
is_flag=True,
Expand Down Expand Up @@ -165,6 +168,7 @@ def papermill(
parameters_file,
parameters_yaml,
parameters_base64,
remove_tagged_cells,
inject_input_path,
inject_output_path,
inject_paths,
Expand Down Expand Up @@ -258,6 +262,7 @@ def papermill(
request_save_on_cell_execute=request_save_on_cell_execute,
autosave_cell_every=autosave_cell_every,
prepare_only=prepare_only,
remove_tagged_cells=remove_tagged_cells,
kernel_name=kernel,
language=language,
progress_bar=progress_bar,
Expand Down
34 changes: 34 additions & 0 deletions papermill/execute.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import nbformat
from pathlib import Path
from copy import deepcopy

from .log import logger
from .exceptions import PapermillExecutionError
Expand All @@ -17,6 +18,7 @@ def execute_notebook(
engine_name=None,
request_save_on_cell_execute=True,
prepare_only=False,
remove_tagged_cells=None,
kernel_name=None,
language=None,
progress_bar=True,
Expand Down Expand Up @@ -46,6 +48,9 @@ def execute_notebook(
How often in seconds to save in the middle of long cell executions
prepare_only : bool, optional
Flag to determine if execution should occur or not
remove_tagged_cells : str, optional
If specified, cells with the specified tag will be removed
before execution and will not be present in the output notebook.
kernel_name : str, optional
Name of kernel to execute the notebook against
language : str, optional
Expand Down Expand Up @@ -104,6 +109,8 @@ def execute_notebook(
)

nb = prepare_notebook_metadata(nb, input_path, output_path, report_mode)
if remove_tagged_cells is not None:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if remove_tagged_cells is not None:
if remove_tagged_cells:

should be enough – the empty string isn't a valid tag, right..?

nb = remove_tagged_cells_from_notebook(nb, remove_tagged_cells)
# clear out any existing error markers from previous papermill runs
nb = remove_error_markers(nb)

Expand Down Expand Up @@ -165,6 +172,33 @@ def prepare_notebook_metadata(nb, input_path, output_path, report_mode=False):
return nb


def remove_tagged_cells_from_notebook(nb, tag):
"""
Remove cells with a matching tag.

Parameters
----------
nb : NotebookNode
Executable notebook object
tag : str
Tag to used to identify cells to remove.
"""

# Copy the notebook to avoid changing the input one
nb = deepcopy(nb)
Comment on lines +187 to +188
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

E.g. remove_error_markers modifies the notebook in place...


# Filter out cells containing the tag
cells = []
for cell in nb.cells:
if hasattr(cell, 'metadata') and 'tags' in cell.metadata:
if tag not in cell.metadata['tags']:
cells.append(cell)

nb.cells = cells

return nb


ERROR_MARKER_TAG = "papermill-error-cell-tag"

ERROR_STYLE = (
Expand Down
69 changes: 69 additions & 0 deletions papermill/tests/notebooks/simple_with_tags.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "a0730871-38e0-4f31-9466-1a117965e5a1",
"metadata": {},
"source": [
"### Markdown cell"
]
},
{
"cell_type": "raw",
"id": "00000b67-913e-459a-80dc-2520b2483d7d",
"metadata": {},
"source": [
"Raw cell"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7979d49a-abb1-4815-9534-ad76e4505b56",
"metadata": {
"tags": [
"assigncell"
]
},
"outputs": [],
"source": [
"a = 1"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "778581dd-f385-4039-be97-4050615fa271",
"metadata": {
"tags": [
"printcell"
]
},
"outputs": [],
"source": [
"print(a)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
43 changes: 43 additions & 0 deletions papermill/tests/test_execute.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,49 @@ def test_prepare_only(self):
['# Parameters', r'foo = "do\\ not\\ crash"', ''],
)

def test_remove_tagged_cells(self):
notebook_name = 'simple_with_tags.ipynb'

# Default case, no cells are skipped
nb_test_executed_fname = os.path.join(self.test_dir, f'output_{notebook_name}')
execute_notebook(get_notebook_path(notebook_name), nb_test_executed_fname, {})
output_nb = load_notebook_node(nb_test_executed_fname)
assert len(output_nb.cells) == 4

# If a nonexistent tag is specified, no cells are skipped
nb_test_executed_fname = os.path.join(self.test_dir, f'output_{notebook_name}')
execute_notebook(
get_notebook_path(notebook_name),
nb_test_executed_fname,
{},
remove_tagged_cells="nonexistent",
)
output_nb = load_notebook_node(nb_test_executed_fname)
assert len(output_nb.cells) == 4

# If cells with the 'printcell' tag are skipped, the output notebook is missing one cell
nb_test_executed_fname = os.path.join(self.test_dir, f'output_{notebook_name}')
execute_notebook(
get_notebook_path(notebook_name),
nb_test_executed_fname,
{},
remove_tagged_cells="printcell",
)
output_nb = load_notebook_node(nb_test_executed_fname)
assert len(output_nb.cells) == 3

# If cells with the 'assigncell' tag are skipped, the execution raises an error
nb_test_executed_fname = os.path.join(self.test_dir, f'output_{notebook_name}')
with self.assertRaises(PapermillExecutionError):
execute_notebook(
get_notebook_path(notebook_name),
nb_test_executed_fname,
{},
remove_tagged_cells="assigncell",
)
output_nb = load_notebook_node(nb_test_executed_fname)
self.assertEqual(output_nb.cells[4].outputs[0]["evalue"], "name 'a' is not defined")


class TestBrokenNotebook1(unittest.TestCase):
def setUp(self):
Expand Down