From 07ad83f4d6dc573e43e553eb05e91c4ba865d134 Mon Sep 17 00:00:00 2001 From: Huixin615 Date: Mon, 1 Jun 2026 16:39:01 +0800 Subject: [PATCH 1/3] fix(skills): surface offending line and quoting hint on SKILL.md YAML errors When a SKILL.md front-matter fails to parse, the existing log only echoes PyYAML's raw message, leaving authors to grep the file for the offending line. This is especially painful for the very common LLM-authored mistake of an unquoted scalar containing ': ' (e.g. 'description: foo: bar'), which fails with 'mapping values are not allowed here' and silently drops the skill. Enrich the error log with: - the source line PyYAML pointed at via problem_mark - a targeted, copy-pasteable quoting hint when (and only when) the error is the well-known 'mapping values are not allowed' scanner error on an unquoted value The skill is still rejected (no semantics are guessed or rewritten); only the diagnostic is improved. Fixes #3333 --- .../harness/deerflow/skills/parser.py | 27 ++++++++- backend/tests/test_skills_parser.py | 60 +++++++++++++++++++ 2 files changed, 86 insertions(+), 1 deletion(-) diff --git a/backend/packages/harness/deerflow/skills/parser.py b/backend/packages/harness/deerflow/skills/parser.py index 54de8cbac4..249fa017f3 100644 --- a/backend/packages/harness/deerflow/skills/parser.py +++ b/backend/packages/harness/deerflow/skills/parser.py @@ -9,6 +9,31 @@ logger = logging.getLogger(__name__) +def _format_yaml_error(skill_file: Path, exc: yaml.YAMLError, source: str) -> str: + """Render a developer-friendly explanation of a YAML front-matter error.""" + + lines = [f"Invalid YAML front-matter in {skill_file}: {exc}"] + + mark = getattr(exc, "problem_mark", None) + source_lines = source.splitlines() + if mark is not None and 0 <= mark.line < len(source_lines): + offending = source_lines[mark.line] + lines.append(f" line {mark.line + 1}: {offending}") + + # Targeted hint for the most common authoring mistake: an unquoted + # scalar value whose body contains ``: ``. We only surface the hint + # when we are confident it applies, to avoid misleading authors who + # hit unrelated YAML errors. + if "mapping values are not allowed" in str(exc) and ":" in offending: + key, _, value = offending.partition(":") + value = value.strip() + if value and value[0] not in {'"', "'", "|", ">", "[", "{"}: + escaped = value.replace('"', '\\"') + lines.append(f' hint: values containing ":" must be quoted, e.g. {key.strip()}: "{escaped}"') + + return "\n".join(lines) + + def parse_allowed_tools(raw: object, skill_file: Path) -> list[str] | None: """Parse the optional allowed-tools frontmatter field. @@ -60,7 +85,7 @@ def parse_skill_file(skill_file: Path, category: SkillCategory, relative_path: P try: metadata = yaml.safe_load(front_matter_text) except yaml.YAMLError as exc: - logger.error("Invalid YAML front-matter in %s: %s", skill_file, exc) + logger.error("%s", _format_yaml_error(skill_file, exc, front_matter_text)) return None if not isinstance(metadata, dict): diff --git a/backend/tests/test_skills_parser.py b/backend/tests/test_skills_parser.py index 43cec34c91..711c6ec83b 100644 --- a/backend/tests/test_skills_parser.py +++ b/backend/tests/test_skills_parser.py @@ -10,6 +10,7 @@ from __future__ import annotations +import logging from pathlib import Path from deerflow.skills.parser import parse_skill_file @@ -156,3 +157,62 @@ def test_parse_nonexistent_file_returns_none(tmp_path): """Non-existent files are handled gracefully.""" skill = parse_skill_file(tmp_path / "ghost" / "SKILL.md", category="custom") assert skill is None + + +# --------------------------------------------------------------------------- +# Friendly YAML error reporting +# --------------------------------------------------------------------------- + + +def test_parse_unquoted_colon_value_logs_line_and_hint(tmp_path, caplog): + """Unquoted value with ': ' produces a log that includes the offending line and a fix hint. + + Regression for issue #3333: SKILL.md authored by an LLM frequently + contains ``description: foo: bar`` which PyYAML rejects with + ``mapping values are not allowed here``. The skill is correctly skipped + (the file is not silently accepted), but the error log must point the + author at the exact line and how to quote it. + """ + + front_matter = "name: collect-startrun\ndescription: StarRun collector: progress, errors, tables out" + skill_file = _write_skill(tmp_path, front_matter) + + with caplog.at_level(logging.ERROR, logger="deerflow.skills.parser"): + skill = parse_skill_file(skill_file, category="custom") + + assert skill is None + combined = "\n".join(rec.getMessage() for rec in caplog.records) + assert "Invalid YAML front-matter" in combined + # The offending source line is echoed back to the author. + assert "StarRun collector: progress" in combined + # The hint shows the canonical fix (quoted value). + assert 'description: "StarRun collector: progress, errors, tables out"' in combined + + +def test_parse_unrelated_yaml_error_omits_quoting_hint(tmp_path, caplog): + """Errors other than 'mapping values are not allowed' must NOT carry the quoting hint.""" + + # Unclosed flow sequence is a scanner error of a different shape; the + # quoting hint would be misleading and must be suppressed. + skill_file = _write_skill(tmp_path, "name: [unclosed\ndescription: x") + + with caplog.at_level(logging.ERROR, logger="deerflow.skills.parser"): + skill = parse_skill_file(skill_file, category="custom") + + assert skill is None + combined = "\n".join(rec.getMessage() for rec in caplog.records) + assert "Invalid YAML front-matter" in combined + assert "hint:" not in combined + + +def test_parse_valid_skill_emits_no_error_log(tmp_path, caplog): + """Sanity check: a valid SKILL.md must not produce any error logs.""" + + skill_file = _write_skill(tmp_path, 'name: ok-skill\ndescription: "Foo: bar"') + + with caplog.at_level(logging.ERROR, logger="deerflow.skills.parser"): + skill = parse_skill_file(skill_file, category="custom") + + assert skill is not None + assert skill.description == "Foo: bar" + assert not caplog.records, "valid SKILL.md must not log errors" From 5e3d5e7160c28a58b44a9848d8bede81cd711bfb Mon Sep 17 00:00:00 2001 From: Huixin615 Date: Mon, 1 Jun 2026 21:33:31 +0800 Subject: [PATCH 2/3] improve(skills): address CR feedback on SKILL.md YAML error diagnostics Per review on #3335: - Log the file line number (mark.line + 2) instead of the front-matter-internal line number, so authors land on the right row in their editor. - Use exc.problem == "mapping values are not allowed here" for a tighter match than substring-scanning str(exc). - Preserve the offending key's leading whitespace in the quoting hint so nested mappings stay nested when authors paste the fix back. - Rewrite the regression test to actually exercise the new behaviour: PyYAML's own message already echoes the offending line (and truncates it with "..."), so the old assertion passed on main. New assertions pin (a) the file-line number, (b) the full untruncated line, and (c) the copy-pasteable hint. - Add a guard test for nested-key indentation so the partition()/strip() shape cannot regress silently. Refs #3333, #3335 --- .../harness/deerflow/skills/parser.py | 12 +++- backend/tests/test_skills_parser.py | 63 ++++++++++++++++--- 2 files changed, 64 insertions(+), 11 deletions(-) diff --git a/backend/packages/harness/deerflow/skills/parser.py b/backend/packages/harness/deerflow/skills/parser.py index 249fa017f3..18c133e79c 100644 --- a/backend/packages/harness/deerflow/skills/parser.py +++ b/backend/packages/harness/deerflow/skills/parser.py @@ -18,18 +18,24 @@ def _format_yaml_error(skill_file: Path, exc: yaml.YAMLError, source: str) -> st source_lines = source.splitlines() if mark is not None and 0 <= mark.line < len(source_lines): offending = source_lines[mark.line] - lines.append(f" line {mark.line + 1}: {offending}") + + # mark.line is 0-based within the front-matter body; +1 makes it + # 1-based, +1 more accounts for the leading `---` fence that the + # front-matter regex strips before yaml.safe_load sees it. The + # result matches the line number an author sees in their editor. + file_line_number = mark.line + 2 + lines.append(f" line {file_line_number}: {offending}") # Targeted hint for the most common authoring mistake: an unquoted # scalar value whose body contains ``: ``. We only surface the hint # when we are confident it applies, to avoid misleading authors who # hit unrelated YAML errors. - if "mapping values are not allowed" in str(exc) and ":" in offending: + if getattr(exc, "problem", "") == "mapping values are not allowed here" and ":" in offending: key, _, value = offending.partition(":") value = value.strip() if value and value[0] not in {'"', "'", "|", ">", "[", "{"}: escaped = value.replace('"', '\\"') - lines.append(f' hint: values containing ":" must be quoted, e.g. {key.strip()}: "{escaped}"') + lines.append(f' hint: values containing ":" must be quoted, e.g. {key}: "{escaped}"') return "\n".join(lines) diff --git a/backend/tests/test_skills_parser.py b/backend/tests/test_skills_parser.py index 711c6ec83b..8ba69869ee 100644 --- a/backend/tests/test_skills_parser.py +++ b/backend/tests/test_skills_parser.py @@ -165,16 +165,27 @@ def test_parse_nonexistent_file_returns_none(tmp_path): def test_parse_unquoted_colon_value_logs_line_and_hint(tmp_path, caplog): - """Unquoted value with ': ' produces a log that includes the offending line and a fix hint. + """Unquoted value with ': ' produces a log that exposes the full offending line + (PyYAML truncates long lines with `...`) and a copy-pasteable quoting hint. Regression for issue #3333: SKILL.md authored by an LLM frequently contains ``description: foo: bar`` which PyYAML rejects with ``mapping values are not allowed here``. The skill is correctly skipped - (the file is not silently accepted), but the error log must point the - author at the exact line and how to quote it. + (the file is not silently accepted). Before this change the only + diagnostic was PyYAML's own message, which (a) numbers lines within + the front-matter body rather than the file and (b) truncates long + values with '...'. The new behaviour pins: + * the line number an author sees in their editor (file-line, not + front-matter-line), + * the *full* offending line (no '...' truncation), and + * a copy-pasteable `key: "value"` hint. """ - front_matter = "name: collect-startrun\ndescription: StarRun collector: progress, errors, tables out" + # The description value is intentionally long enough to trigger + # PyYAML's own '...' truncation in the rendered str(exc); our hint + # must echo the *full* value regardless. + long_value = "StarRun collector: progress, errors, tables out, plus assorted diagnostic notes" + front_matter = f"name: collect-startrun\ndescription: {long_value}" skill_file = _write_skill(tmp_path, front_matter) with caplog.at_level(logging.ERROR, logger="deerflow.skills.parser"): @@ -183,10 +194,46 @@ def test_parse_unquoted_colon_value_logs_line_and_hint(tmp_path, caplog): assert skill is None combined = "\n".join(rec.getMessage() for rec in caplog.records) assert "Invalid YAML front-matter" in combined - # The offending source line is echoed back to the author. - assert "StarRun collector: progress" in combined - # The hint shows the canonical fix (quoted value). - assert 'description: "StarRun collector: progress, errors, tables out"' in combined + + # 1. File-line, not front-matter-line. `description` is the 2nd line + # of the front-matter body, which is line 3 of the file (line 1 + # is the leading `---` fence). Before this PR the log said + # `line 2`, which sent authors to the wrong row. + assert f"line 3: description: {long_value}" in combined + + # 2. The full value is preserved -- PyYAML's own message truncates + # long values with '...', so the presence of the un-truncated tail + # proves we are reading the source line ourselves, not echoing + # PyYAML's snippet. + assert "plus assorted diagnostic notes" in combined + assert "..." not in [line for line in combined.splitlines() if line.startswith(" line ")][0] + + # 3. The copy-pasteable quoting hint is the actually-new diagnostic. + assert f'hint: values containing ":" must be quoted, e.g. description: "{long_value}"' in combined + + +def test_parse_unquoted_colon_value_preserves_nested_key_indent(tmp_path, caplog): + """Nested keys must keep their leading indentation in the quoting hint. + + Regression guard for CR feedback on PR #3335: an earlier version of + the hint called ``key.strip()``, which turned `` author: foo: bar`` + into ``author: "foo: bar"``. Pasting that back under a parent mapping + silently moved the field to the top level. The hint must preserve + the original indentation so authors can copy-paste-fix in place. + """ + + # A two-space-indented nested key triggers the same scanner error, + # but its hint must keep the indentation. + front_matter = "name: nested-skill\nmetadata:\n author: Jane: Doe" + skill_file = _write_skill(tmp_path, front_matter) + + with caplog.at_level(logging.ERROR, logger="deerflow.skills.parser"): + skill = parse_skill_file(skill_file, category="custom") + + assert skill is None + combined = "\n".join(rec.getMessage() for rec in caplog.records) + # Two leading spaces in front of `author` are preserved. + assert 'hint: values containing ":" must be quoted, e.g. author: "Jane: Doe"' in combined def test_parse_unrelated_yaml_error_omits_quoting_hint(tmp_path, caplog): From e83af2de5b401531cd8c93f177f46bdef6de0d4b Mon Sep 17 00:00:00 2001 From: Huixin615 Date: Tue, 2 Jun 2026 10:39:26 +0800 Subject: [PATCH 3/3] fix(skills): escape backslashes in YAML quoting hint The hint emitted by _format_yaml_error previously escaped only double quotes, so values containing backslashes (e.g. Windows paths like C:\Temp or regex escapes like \d) produced a suggested scalar that was either invalid YAML or silently re-interpreted by PyYAML's double-quoted escape rules when pasted back. Escape order matters: backslashes first, then double quotes. Adds two regression tests covering Windows-path and regex-style backslashes. Address Copilot CR feedback on PR #3335. --- .../harness/deerflow/skills/parser.py | 2 +- backend/tests/test_skills_parser.py | 47 +++++++++++++++++++ 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/backend/packages/harness/deerflow/skills/parser.py b/backend/packages/harness/deerflow/skills/parser.py index 18c133e79c..7571bd9fc3 100644 --- a/backend/packages/harness/deerflow/skills/parser.py +++ b/backend/packages/harness/deerflow/skills/parser.py @@ -34,7 +34,7 @@ def _format_yaml_error(skill_file: Path, exc: yaml.YAMLError, source: str) -> st key, _, value = offending.partition(":") value = value.strip() if value and value[0] not in {'"', "'", "|", ">", "[", "{"}: - escaped = value.replace('"', '\\"') + escaped = value.replace("\\", "\\\\").replace('"', '\\"') lines.append(f' hint: values containing ":" must be quoted, e.g. {key}: "{escaped}"') return "\n".join(lines) diff --git a/backend/tests/test_skills_parser.py b/backend/tests/test_skills_parser.py index 8ba69869ee..52172125f2 100644 --- a/backend/tests/test_skills_parser.py +++ b/backend/tests/test_skills_parser.py @@ -263,3 +263,50 @@ def test_parse_valid_skill_emits_no_error_log(tmp_path, caplog): assert skill is not None assert skill.description == "Foo: bar" assert not caplog.records, "valid SKILL.md must not log errors" + + +def test_parse_unquoted_colon_value_escapes_backslashes_in_hint(tmp_path, caplog): + """Backslashes in the offending value must be doubled in the hint. + + Regression guard for CR feedback on PR #3335: an earlier version of + the hint only escaped ``"`` but left ``\\`` untouched. Pasting the + suggested ``key: "..."`` back into the file would then be reparsed + as an escape sequence by PyYAML's double-quoted scalar rules and + either fail to load or silently change meaning (e.g. ``C:\\Temp`` + becoming ``C:emp``). The hint must double the backslash so the + suggested scalar is valid YAML when pasted back. + """ + + # The second ``: `` (after ``path``) is what trips PyYAML's + # "mapping values are not allowed here"; the ``C:\Temp`` segment + # carries the backslash that the hint must escape. + front_matter = "name: path-skill\ndescription: Windows path: C:\\Temp" + skill_file = _write_skill(tmp_path, front_matter) + + with caplog.at_level(logging.ERROR, logger="deerflow.skills.parser"): + skill = parse_skill_file(skill_file, category="custom") + + assert skill is None + combined = "\n".join(rec.getMessage() for rec in caplog.records) + assert r'description: "Windows path: C:\\Temp"' in combined + + +def test_parse_unquoted_colon_value_escapes_regex_in_hint(tmp_path, caplog): + """Regex-style ``\\d`` must also be escaped in the hint. + + Same root cause as the Windows-path guard above, but with a + regex-style escape that is even more likely to appear in + LLM-authored skills (e.g. a ``description`` that quotes a regex). + PyYAML rejects ``\\d`` in double-quoted scalars, so the hint must + emit ``\\\\d`` to remain valid. + """ + + front_matter = "name: regex-skill\ndescription: match: \\d+ digits" + skill_file = _write_skill(tmp_path, front_matter) + + with caplog.at_level(logging.ERROR, logger="deerflow.skills.parser"): + skill = parse_skill_file(skill_file, category="custom") + + assert skill is None + combined = "\n".join(rec.getMessage() for rec in caplog.records) + assert r'description: "match: \\d+ digits"' in combined