diff --git a/src/reviewer/cli.py b/src/reviewer/cli.py index c5828d7..021dde5 100644 --- a/src/reviewer/cli.py +++ b/src/reviewer/cli.py @@ -187,14 +187,17 @@ def _build_paper_json( comments = [] for i, c in enumerate(result.comments): - comments.append({ + comment_dict = { "id": f"{key}_{i}", "title": c.title, "quote": c.quote, "explanation": c.explanation, "comment_type": c.comment_type, "paragraph_index": c.paragraph_index, - }) + } + if c.suggested_fix: + comment_dict["suggested_fix"] = c.suggested_fix + comments.append(comment_dict) model_short = _model_short_name(result.model) if result.model else "" label = method.replace("_", " ").title() diff --git a/src/reviewer/method_progressive.py b/src/reviewer/method_progressive.py index e0012b0..e646123 100644 --- a/src/reviewer/method_progressive.py +++ b/src/reviewer/method_progressive.py @@ -162,13 +162,20 @@ def consolidate_comments( try: items = json.loads(arr_match.group(0)) consolidated = parse_comments_from_list(items) - # Preserve paragraph_index from original comments by matching quotes + # Preserve paragraph_index and suggested_fix from original comments + # by matching quotes (safety net if consolidation drops either field) orig_by_quote = {} for c in comments: - orig_by_quote[c.quote[:200]] = c.paragraph_index + orig_by_quote[c.quote[:200]] = (c.paragraph_index, c.suggested_fix) for c in consolidated: + orig = orig_by_quote.get(c.quote[:200]) + if orig is None: + continue + orig_paragraph_index, orig_suggested_fix = orig if c.paragraph_index is None: - c.paragraph_index = orig_by_quote.get(c.quote[:200]) + c.paragraph_index = orig_paragraph_index + if not c.suggested_fix and orig_suggested_fix: + c.suggested_fix = orig_suggested_fix return consolidated except json.JSONDecodeError: pass diff --git a/src/reviewer/models.py b/src/reviewer/models.py index 671bbc0..d6052f7 100644 --- a/src/reviewer/models.py +++ b/src/reviewer/models.py @@ -11,6 +11,7 @@ class Comment: explanation: str # reviewer's explanation comment_type: str # "technical" or "logical" paragraph_index: int | None = None # 0-based index in split paragraphs + suggested_fix: str = "" # optional concrete, grounded correction (empty if none) def to_dict(self) -> dict: d = { @@ -21,6 +22,8 @@ def to_dict(self) -> dict: } if self.paragraph_index is not None: d["paragraph_index"] = self.paragraph_index + if self.suggested_fix: + d["suggested_fix"] = self.suggested_fix return d diff --git a/src/reviewer/prompts.py b/src/reviewer/prompts.py index 6ca28f3..2ca3f6c 100644 --- a/src/reviewer/prompts.py +++ b/src/reviewer/prompts.py @@ -47,6 +47,7 @@ - "quote": the exact verbatim text (preserving LaTeX) - "explanation": deep reasoning — what you initially thought, whether context resolves it, and what specifically remains problematic - "type": "technical" or "logical" +- "suggested_fix": a single concrete, grounded correction the authors could apply, such as the corrected formula, the specific missing assumption to state, or the exact comparison to add. Leave as an empty string "" if you cannot propose a specific, actionable fix. Do not write vague advice like "clarify this," "add more detail," "improve the explanation," or "run more experiments." """ # ── Deep-check prompt (used by local and progressive methods) ─────────────── @@ -106,7 +107,8 @@ "title": "concise title of the issue", "quote": "exact verbatim text from the paper (preserving LaTeX)", "explanation": "precise explanation of what is wrong and why", - "type": "technical" or "logical" + "type": "technical" or "logical", + "suggested_fix": "a single concrete, grounded correction the authors could apply, such as the corrected formula, the specific missing assumption to state, or the exact comparison to add. Leave as an empty string \\"\\" if you cannot propose a specific, actionable fix. Do not write vague advice like 'clarify this,' 'add more detail,' 'improve the explanation,' or 'run more experiments.'" }}}} ] }}}} @@ -141,7 +143,8 @@ "title": "concise title of the issue", "quote": "exact verbatim text from the paper (preserving LaTeX)", "explanation": "precise explanation of what is wrong and why", - "type": "technical" or "logical" + "type": "technical" or "logical", + "suggested_fix": "a single concrete, grounded correction the authors could apply, such as the corrected formula, the specific missing assumption to state, or the exact comparison to add. Leave as an empty string \\"\\" if you cannot propose a specific, actionable fix. Do not write vague advice like 'clarify this,' 'add more detail,' 'improve the explanation,' or 'run more experiments.'" }}}} ] }}}} @@ -197,11 +200,13 @@ CONSOLIDATION_PROMPT = """\ You are reviewing the complete list of issues found in an academic paper. \ -Your job is to consolidate this list by removing duplicates. If multiple issues flag the SAME underlying problem, keep the most detailed and well-explained one and remove the others. +Your job is to consolidate this list by removing duplicates. If multiple issues flag the SAME underlying problem, keep the most detailed and well-explained one and remove the others. ISSUES FOUND: {issues_json} +Preserve each surviving issue's "suggested_fix" field unchanged. When merging duplicates, keep the most concrete suggested_fix among them. Do NOT invent new suggested fixes — if an issue has no suggested_fix, leave it absent or empty. + Return a JSON array containing the consolidated issues (same format as input). \ Return [] if none survive filtering.""" diff --git a/src/reviewer/utils.py b/src/reviewer/utils.py index ec62cce..afb2d92 100644 --- a/src/reviewer/utils.py +++ b/src/reviewer/utils.py @@ -205,12 +205,14 @@ def parse_comments_from_list(items: list[dict]) -> list[Comment]: paragraph_index = item.get("paragraph_index", None) if paragraph_index is not None: paragraph_index = int(paragraph_index) + suggested_fix = item.get("suggested_fix", "") or "" comments.append(Comment( title=title, quote=quote, explanation=explanation, comment_type=comment_type, paragraph_index=paragraph_index, + suggested_fix=suggested_fix, )) return comments diff --git a/src/reviewer/viz/index.html b/src/reviewer/viz/index.html index acd2450..b873539 100644 --- a/src/reviewer/viz/index.html +++ b/src/reviewer/viz/index.html @@ -412,6 +412,23 @@ color: var(--text); margin-top: 8px; } +.comment-card .suggested-fix { + font-size: 13px; + line-height: 1.6; + color: var(--text); + border-left: 2px solid var(--green); + padding-left: 12px; + margin-top: 8px; +} +.comment-card .suggested-fix .sf-label { + display: block; + font-size: 11px; + font-weight: 600; + text-transform: uppercase; + letter-spacing: 0.3px; + color: var(--green); + margin-bottom: 4px; +} .comment-card .para-link { font-size: 11px; font-weight: 500; @@ -474,6 +491,7 @@ } .comment-card.resolved .quote, .comment-card.resolved .explanation, +.comment-card.resolved .suggested-fix, .comment-card.resolved .para-link { display: none; } @@ -1276,6 +1294,7 @@ ${quoteText ? `
${renderMarkdownWithMath(wrapBareLaTeX(quoteText))}
` : ''} ${explanationText ? `
${renderMarkdownWithMath(explanationText)}
` : ''} + ${c.suggested_fix ? `
Suggested fix${renderMarkdownWithMath(c.suggested_fix)}
` : ''} ${paraLink} `; } diff --git a/tests/test_models.py b/tests/test_models.py index 3c0d4e4..d0a76b0 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -16,6 +16,21 @@ def test_comment_to_dict_no_paragraph(): assert "paragraph_index" not in d +def test_comment_to_dict_omits_empty_suggested_fix(): + c = Comment(title="Bug", quote="x", explanation="y", comment_type="logical") + d = c.to_dict() + assert "suggested_fix" not in d + + +def test_comment_to_dict_includes_suggested_fix(): + c = Comment( + title="Bug", quote="x", explanation="y", comment_type="technical", + suggested_fix="Use x = y instead.", + ) + d = c.to_dict() + assert d["suggested_fix"] == "Use x = y instead." + + def test_review_result_num_comments(): r = ReviewResult(method="test", paper_slug="slug") assert r.num_comments == 0 diff --git a/tests/test_utils.py b/tests/test_utils.py index 38decae..0ce539c 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -137,6 +137,24 @@ def test_parse_comments_from_list_infers_type(): assert comments[0].comment_type == "technical" +def test_parse_comments_from_list_preserves_suggested_fix(): + items = [{ + "title": "Wrong sign", + "quote": "x = -y", + "explanation": "Should be positive.", + "type": "technical", + "suggested_fix": "Replace with x = y.", + }] + comments = parse_comments_from_list(items) + assert comments[0].suggested_fix == "Replace with x = y." + + +def test_parse_comments_from_list_defaults_missing_suggested_fix(): + items = [{"title": "T", "quote": "q", "explanation": "e", "type": "logical"}] + comments = parse_comments_from_list(items) + assert comments[0].suggested_fix == "" + + def test_parse_review_response_json_object(): response = json.dumps({ "overall_feedback": "Good paper.",