Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 26 additions & 1 deletion backend/packages/harness/deerflow/skills/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,31 @@
logger = logging.getLogger(__name__)


def _format_yaml_error(skill_file: Path, exc: yaml.YAMLError, source: str) -> str:
"""Render a developer-friendly explanation of a YAML front-matter error."""

lines = [f"Invalid YAML front-matter in {skill_file}: {exc}"]

mark = getattr(exc, "problem_mark", None)
source_lines = source.splitlines()
if mark is not None and 0 <= mark.line < len(source_lines):
offending = source_lines[mark.line]
lines.append(f" line {mark.line + 1}: {offending}")

# Targeted hint for the most common authoring mistake: an unquoted
# scalar value whose body contains ``: ``. We only surface the hint
# when we are confident it applies, to avoid misleading authors who
# hit unrelated YAML errors.
if "mapping values are not allowed" in str(exc) and ":" in offending:
key, _, value = offending.partition(":")
value = value.strip()
if value and value[0] not in {'"', "'", "|", ">", "[", "{"}:
escaped = value.replace('"', '\\"')
lines.append(f' hint: values containing ":" must be quoted, e.g. {key.strip()}: "{escaped}"')

return "\n".join(lines)


def parse_allowed_tools(raw: object, skill_file: Path) -> list[str] | None:
"""Parse the optional allowed-tools frontmatter field.

Expand Down Expand Up @@ -60,7 +85,7 @@ def parse_skill_file(skill_file: Path, category: SkillCategory, relative_path: P
try:
metadata = yaml.safe_load(front_matter_text)
except yaml.YAMLError as exc:
logger.error("Invalid YAML front-matter in %s: %s", skill_file, exc)
logger.error("%s", _format_yaml_error(skill_file, exc, front_matter_text))
return None

if not isinstance(metadata, dict):
Expand Down
60 changes: 60 additions & 0 deletions backend/tests/test_skills_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

from __future__ import annotations

import logging
from pathlib import Path

from deerflow.skills.parser import parse_skill_file
Expand Down Expand Up @@ -156,3 +157,62 @@ def test_parse_nonexistent_file_returns_none(tmp_path):
"""Non-existent files are handled gracefully."""
skill = parse_skill_file(tmp_path / "ghost" / "SKILL.md", category="custom")
assert skill is None


# ---------------------------------------------------------------------------
# Friendly YAML error reporting
# ---------------------------------------------------------------------------


def test_parse_unquoted_colon_value_logs_line_and_hint(tmp_path, caplog):
"""Unquoted value with ': ' produces a log that includes the offending line and a fix hint.

Regression for issue #3333: SKILL.md authored by an LLM frequently
contains ``description: foo: bar`` which PyYAML rejects with
``mapping values are not allowed here``. The skill is correctly skipped
(the file is not silently accepted), but the error log must point the
author at the exact line and how to quote it.
"""

front_matter = "name: collect-startrun\ndescription: StarRun collector: progress, errors, tables out"
skill_file = _write_skill(tmp_path, front_matter)

with caplog.at_level(logging.ERROR, logger="deerflow.skills.parser"):
skill = parse_skill_file(skill_file, category="custom")

assert skill is None
combined = "\n".join(rec.getMessage() for rec in caplog.records)
assert "Invalid YAML front-matter" in combined
# The offending source line is echoed back to the author.
assert "StarRun collector: progress" in combined
# The hint shows the canonical fix (quoted value).
assert 'description: "StarRun collector: progress, errors, tables out"' in combined


def test_parse_unrelated_yaml_error_omits_quoting_hint(tmp_path, caplog):
"""Errors other than 'mapping values are not allowed' must NOT carry the quoting hint."""

# Unclosed flow sequence is a scanner error of a different shape; the
# quoting hint would be misleading and must be suppressed.
skill_file = _write_skill(tmp_path, "name: [unclosed\ndescription: x")

with caplog.at_level(logging.ERROR, logger="deerflow.skills.parser"):
skill = parse_skill_file(skill_file, category="custom")

assert skill is None
combined = "\n".join(rec.getMessage() for rec in caplog.records)
assert "Invalid YAML front-matter" in combined
assert "hint:" not in combined


def test_parse_valid_skill_emits_no_error_log(tmp_path, caplog):
"""Sanity check: a valid SKILL.md must not produce any error logs."""

skill_file = _write_skill(tmp_path, 'name: ok-skill\ndescription: "Foo: bar"')

with caplog.at_level(logging.ERROR, logger="deerflow.skills.parser"):
skill = parse_skill_file(skill_file, category="custom")

assert skill is not None
assert skill.description == "Foo: bar"
assert not caplog.records, "valid SKILL.md must not log errors"