Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions papermill/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from .exceptions import PapermillException, PapermillExecutionError # noqa: F401
from .execute import execute_notebook # noqa: F401
from .inspection import inspect_notebook # noqa: F401
from .profile import profile_notebook, build_profile, build_sections # noqa: F401
from .version import version as __version__ # noqa: F401
59 changes: 59 additions & 0 deletions papermill/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from .execute import execute_notebook
from .inspection import display_notebook_help
from .iorw import NoDatesSafeLoader, read_yaml_file
from .profile import profile_notebook
from .version import version as papermill_version

click.disable_unicode_literals_warning = True
Expand Down Expand Up @@ -95,6 +96,14 @@ def print_papermill_version(ctx, param, value):
)
@click.option('--cwd', default=None, help='Working directory to run notebook in.')
@click.option('--progress-bar/--no-progress-bar', default=None, help="Flag for turning on the progress bar.")
@click.option(
'--live-tree/--no-live-tree',
default=False,
help=(
"Show a live Rich tree of notebook sections and per-cell timing during execution, "
"replacing the tqdm progress bar. Requires: pip install 'papermill[rich]'."
),
)
@click.option(
'--log-output/--no-log-output',
default=False,
Expand Down Expand Up @@ -158,6 +167,7 @@ def papermill(
language,
cwd,
progress_bar,
live_tree,
log_output,
log_level,
start_timeout,
Expand Down Expand Up @@ -250,13 +260,62 @@ def papermill(
report_mode=report_mode,
cwd=cwd,
execution_timeout=execution_timeout,
live_tree=live_tree,
)
except nbclient.exceptions.DeadKernelError:
# Exiting with a special exit code for dead kernels
traceback.print_exc()
sys.exit(138)


@click.command('profile', context_settings=dict(help_option_names=['-h', '--help']))
@click.argument('notebook_path')
@click.option(
'--output',
'-o',
default=None,
help='Path to write profile JSON (default: <notebook>.profile.json).',
)
def papermill_profile(notebook_path, output):
"""Profile an already-executed notebook and print a timing summary.

NOTEBOOK_PATH must be an executed .ipynb file that contains papermill
timing metadata (i.e. it was run via ``papermill`` or
``execute_notebook``).

Writes a JSON report with per-section and per-cell durations, output
types, bottleneck identification, and the five slowest cells.
"""
import json
from pathlib import Path

out_path = output or str(Path(notebook_path).with_suffix('.profile.json'))
profile = profile_notebook(notebook_path, output=out_path)

click.echo(f"\nNotebook : {profile['notebook']}")
click.echo(f"Total : {profile.get('total_duration_s', '—')}s")
click.echo(f"Cells : {profile['n_code_cells']} code | Errors: {profile['n_errors']}")

if profile.get('bottleneck'):
b = profile['bottleneck']
click.echo(f"Bottleneck: [{b['cell_index']}] in «{b['section']}» — {b['duration_s']}s ({b['pct_of_total']}%)")

click.echo("\nSections:")
for s in profile['sections']:
indent = " " * s['level']
click.echo(f" {indent}{s['label']:<40} {s['duration_s']:.3f}s")

if profile.get('slowest_cells'):
click.echo("\nSlowest cells:")
for c in profile['slowest_cells']:
click.echo(
f" [{c['index']}] {c['source_preview'][:50]:<52} "
f"{c['duration_s']}s {','.join(c['output_types']) or '—'}"
)

click.echo(f"\nProfile written to: {out_path}")


def _resolve_type(value):
if value == "True":
return True
Expand Down
26 changes: 20 additions & 6 deletions papermill/engines.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,9 @@ class NotebookExecutionManager:
COMPLETED = "completed"
FAILED = "failed"

def __init__(self, nb, output_path=None, log_output=False, progress_bar=True, autosave_cell_every=30):
def __init__(
self, nb, output_path=None, log_output=False, progress_bar=True, autosave_cell_every=30, live_display=None
):
self.nb = nb
self.output_path = output_path
self.log_output = log_output
Expand All @@ -105,6 +107,7 @@ def __init__(self, nb, output_path=None, log_output=False, progress_bar=True, au
self.autosave_cell_every = autosave_cell_every
self.max_autosave_pct = 25
self.last_save_time = self.now() # Not exactly true, but simplifies testing logic
self.live_display = live_display # optional LiveTreeDisplay — replaces tqdm when set
self.pbar = None
if progress_bar:
# lazy import due to implicit slow ipython import
Expand Down Expand Up @@ -227,10 +230,14 @@ def cell_start(self, cell, cell_index=None, **kwargs):
cell.metadata.papermill["status"] = self.RUNNING
cell.metadata.papermill['exception'] = False

# injects optional description of the current cell directly in the tqdm
cell_description = self.get_cell_description(cell)
if cell_description is not None and hasattr(self, 'pbar') and self.pbar:
self.pbar.set_description(f"Executing {cell_description}")
if self.live_display is not None:
if cell_index is not None:
self.live_display.on_cell_start(cell_index)
else:
# injects optional description of the current cell directly in the tqdm
cell_description = self.get_cell_description(cell)
if cell_description is not None and hasattr(self, 'pbar') and self.pbar:
self.pbar.set_description(f"Executing {cell_description}")

self.save()

Expand All @@ -246,6 +253,8 @@ def cell_exception(self, cell, cell_index=None, **kwargs):
cell.metadata.papermill['exception'] = True
cell.metadata.papermill['status'] = self.FAILED
self.nb.metadata.papermill['exception'] = True
if self.live_display is not None and cell_index is not None:
self.live_display.on_cell_exception(cell_index)

@catch_nb_assignment
def cell_complete(self, cell, cell_index=None, **kwargs):
Expand All @@ -272,7 +281,10 @@ def cell_complete(self, cell, cell_index=None, **kwargs):
cell.metadata.papermill['status'] = self.COMPLETED

self.save()
if self.pbar:
if self.live_display is not None:
if cell_index is not None:
self.live_display.on_cell_complete(self.nb.cells[cell_index], cell_index)
elif self.pbar:
self.pbar.update(1)

@catch_nb_assignment
Expand Down Expand Up @@ -348,6 +360,7 @@ def execute_notebook(
progress_bar=True,
log_output=False,
autosave_cell_every=30,
live_display=None,
**kwargs,
):
"""
Expand All @@ -364,6 +377,7 @@ def execute_notebook(
progress_bar=progress_bar,
log_output=log_output,
autosave_cell_every=autosave_cell_every,
live_display=live_display,
)

nb_man.notebook_start()
Expand Down
55 changes: 42 additions & 13 deletions papermill/execute.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def execute_notebook(
start_timeout=60,
report_mode=False,
cwd=None,
live_tree=False,
**engine_kwargs,
):
"""Executes a single notebook locally.
Expand Down Expand Up @@ -61,6 +62,9 @@ def execute_notebook(
Flag for whether or not to hide input.
cwd : str or Path, optional
Working directory to use when executing the notebook
live_tree : bool, optional
Show a Rich live tree of sections and per-cell timing instead of the
default tqdm progress bar. Requires ``pip install 'papermill[rich]'``.
**kwargs
Arbitrary keyword arguments to pass to the notebook engine

Expand Down Expand Up @@ -111,21 +115,46 @@ def execute_notebook(
if not prepare_only:
# Dropdown to the engine to fetch the kernel name from the notebook document
kernel_name = papermill_engines.nb_kernel_name(engine_name=engine_name, nb=nb, name=kernel_name)

# Resolve live_tree: if requested, disable tqdm and attach the Rich display
_live_display = None
if live_tree:
from .live_tree import LiveTreeDisplay, is_available as _rich_ok

if _rich_ok():
import os

nb_name = os.path.basename(input_path) if isinstance(input_path, str) else "notebook.ipynb"
_live_display = LiveTreeDisplay(nb, nb_name)
progress_bar = False # Rich tree replaces tqdm
else:
logger.warning(
"live_tree=True requested but 'rich' is not installed. "
"Falling back to tqdm. Install with: pip install 'papermill[rich]'"
)

# Execute the Notebook in `cwd` if it is set
with chdir(cwd):
nb = papermill_engines.execute_notebook_with_engine(
engine_name,
nb,
input_path=input_path,
output_path=output_path if request_save_on_cell_execute else None,
kernel_name=kernel_name,
progress_bar=progress_bar,
log_output=log_output,
start_timeout=start_timeout,
stdout_file=stdout_file,
stderr_file=stderr_file,
**engine_kwargs,
)
if _live_display is not None:
_live_display.start()
try:
nb = papermill_engines.execute_notebook_with_engine(
engine_name,
nb,
input_path=input_path,
output_path=output_path if request_save_on_cell_execute else None,
kernel_name=kernel_name,
progress_bar=progress_bar,
log_output=log_output,
start_timeout=start_timeout,
stdout_file=stdout_file,
stderr_file=stderr_file,
live_display=_live_display,
**engine_kwargs,
)
finally:
if _live_display is not None:
_live_display.stop()

# Check for errors first (it saves on error before raising)
raise_for_execution_errors(nb, output_path)
Expand Down
Loading