diff --git a/README.md b/README.md index f56cd59..f20c17f 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,8 @@ PR-Review 是一个具备上下文感知能力的 AI 代码审查系统,帮助 ### 已实现 - **GitHub PR 数据获取**:完整获取 PR 元数据、变更文件、提交记录、评论 - **Diff 解析**:统一 diff 格式解析,提取 hunk、行号、变更类型 -- **Diff 语义分析**:函数/类/interface/import/export/async 变更检测(regex,可扩展 Tree-sitter) +- **Diff 语义分析**:函数/类/interface/import/export/async 变更检测 +- **工程风险分析**:auth、数据库、缓存、async、错误处理、并发等规则检测(含 confidence) - **上下文构建**: - 复用 diff-parser 语义层,映射为审查上下文 - 分析 import 依赖关系 @@ -104,6 +105,19 @@ console.log(semantic.imports); // { added: [], removed: [] } console.log(semantic.asyncChanges); ``` +### 工程风险分析 + +```typescript +import { parseUnifiedDiff, analyzeSemantics, analyzeRisk } from '@pr-review/diff-parser'; + +const parsed = parseUnifiedDiff('src/auth.ts', patch); +const semantic = analyzeSemantics(parsed, { language: 'typescript' }); +const risk = analyzeRisk({ filename: 'src/auth.ts', language: 'typescript', semantic, parsed }); + +console.log(risk.riskHints); // 高置信度风险提示 +console.log(risk.findings); // 含 confidence 与 evidence +``` + ### 构建审查上下文 ```typescript diff --git a/docs/architecture.md b/docs/architecture.md index 06cdbbb..61a0fdc 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -52,6 +52,11 @@ packages/diff-parser/src/ extractors/ # functions, classes, imports, exports, async patterns/ # per-language regex rules interfaces/ # SemanticExtractor (Tree-sitter hook) + risk/ + analyze-risk.ts # analyzeRisk, parseAnalyzeAndAssessRisk + detectors/ # auth, db, cache, async, error, concurrency + engine/run-detectors.ts # composable rule pipeline + confidence filter + interfaces/ # RiskDetector ``` ### Public API @@ -59,10 +64,12 @@ packages/diff-parser/src/ ```ts const parsed = parseUnifiedDiff(filename, patch); const semantic = analyzeSemantics(parsed, { language: "typescript" }); -// semantic.functions, semantic.classes, semantic.imports, semantic.asyncChanges, ... +const risk = analyzeRisk({ filename, language, semantic, parsed }); +// risk.riskHints — high-confidence engineering risk messages +// risk.findings — structured { id, message, confidence, evidence } ``` -MVP uses `RegexSemanticExtractor`. No AST or LSP. +MVP uses `RegexSemanticExtractor` and rule-based `RiskDetector`s. No AST or LSP. --- @@ -79,7 +86,7 @@ PullRequestData → extract-imports (mapSemanticToImportEdges) → build-dependency-graph → group-changes - → build-summaries (deterministic) + → build-summaries (deterministic + analyzeRisk riskHints) → compress-context (token budget) → ReviewContext ``` @@ -107,7 +114,8 @@ packages/context-builder/src/ | Interface | MVP | Future | |-----------|-----|--------| -| `SymbolExtractor` | `HeuristicSymbolExtractor` | Tree-sitter / TS compiler | +| `SemanticExtractor` (diff-parser) | `RegexSemanticExtractor` | Tree-sitter | +| `RiskDetector` (diff-parser) | 6 rule-based detectors | Custom rules / ML scoring | | `AstAnalyzer` | `NoopAstAnalyzer` | Full AST analysis, call graphs | | `FileContentResolver` | Not wired | Fetch blobs from GitHub | diff --git a/packages/context-builder/src/pipeline/stages/build-summaries.ts b/packages/context-builder/src/pipeline/stages/build-summaries.ts index 04c9264..c1f5f75 100644 --- a/packages/context-builder/src/pipeline/stages/build-summaries.ts +++ b/packages/context-builder/src/pipeline/stages/build-summaries.ts @@ -1,19 +1,8 @@ import type { ChangeProfile, SemanticSummary } from "@pr-review/shared"; +import { analyzeRisk } from "@pr-review/diff-parser"; import type { PipelineState } from "../types.js"; -const CONCURRENCY_HINTS = [ - "mutex", - "lock", - "atomic", - "concurrent", - "parallel", - "async", - "thread", - "race", - "semaphore", -]; - function directoryArea(filename: string): string { const parts = filename.split("/"); @@ -100,11 +89,14 @@ function buildRiskHints(state: PipelineState): string[] { } for (const entry of state.parsedFiles) { - const lower = entry.changedFile.filename.toLowerCase(); + const risk = analyzeRisk({ + filename: entry.changedFile.filename, + language: entry.language, + semantic: entry.semantic, + parsed: entry.parsedDiff, + }); - if (CONCURRENCY_HINTS.some((hint) => lower.includes(hint))) { - hints.push(`Concurrency-related path: ${entry.changedFile.filename}`); - } + hints.push(...risk.riskHints); const largeHunk = entry.parsedDiff.hunks.some( (h) => h.lines.length > 80, @@ -113,12 +105,6 @@ function buildRiskHints(state: PipelineState): string[] { if (largeHunk) { hints.push(`Large hunk in ${entry.changedFile.filename}`); } - - if (entry.semantic.asyncChanges) { - hints.push( - `Async/sync signature change detected in ${entry.changedFile.filename}`, - ); - } } if (state.skippedFiles.length > 0) { diff --git a/packages/diff-parser/src/index.ts b/packages/diff-parser/src/index.ts index fd11767..5ea23dd 100644 --- a/packages/diff-parser/src/index.ts +++ b/packages/diff-parser/src/index.ts @@ -1,4 +1,13 @@ export { getChangedLines, parseUnifiedDiff } from "./parse-unified-diff.js"; +export { + analyzeRisk, + parseAnalyzeAndAssessRisk, +} from "./risk/analyze-risk.js"; +export type { + AnalyzeRiskOptions, + ParseAnalyzeAndAssessRiskOptions, + ParsedFileDiffWithSemanticAndRisk, +} from "./risk/analyze-risk.js"; export { analyzeSemantics, parseAndAnalyze, @@ -6,7 +15,9 @@ export { export type { AnalyzeSemanticsOptions, ParsedFileDiffWithSemantic } from "./semantic/analyze-semantics.js"; export { detectLanguage } from "./semantic/detect-language.js"; export { RegexSemanticExtractor } from "./semantic/regex-semantic-extractor.js"; +export { createDefaultDetectors, DEFAULT_MIN_CONFIDENCE } from "./risk/engine/run-detectors.js"; export type { SemanticExtractor } from "./semantic/interfaces/semantic-extractor.js"; +export type { RiskDetector } from "./risk/interfaces/risk-detector.js"; export type { DiffHunk, DiffLine, @@ -21,4 +32,11 @@ export type { SemanticFunction, SemanticInterface, } from "./semantic/types.js"; +export type { + RiskAnalysisInput, + RiskAnalysisResult, + RiskCategory, + RiskFinding, +} from "./risk/types.js"; +export { EMPTY_RISK_ANALYSIS } from "./risk/types.js"; export { EMPTY_SEMANTIC_ANALYSIS } from "./semantic/types.js"; diff --git a/packages/diff-parser/src/risk/analyze-risk.test.ts b/packages/diff-parser/src/risk/analyze-risk.test.ts new file mode 100644 index 0000000..1022dac --- /dev/null +++ b/packages/diff-parser/src/risk/analyze-risk.test.ts @@ -0,0 +1,120 @@ +import { describe, expect, it } from "vitest"; + +import { parseUnifiedDiff } from "../parse-unified-diff.js"; +import { analyzeSemantics } from "../semantic/analyze-semantics.js"; +import { analyzeRisk } from "./analyze-risk.js"; + +function assess(filename: string, patch: string, language = "typescript") { + const parsed = parseUnifiedDiff(filename, patch); + const semantic = analyzeSemantics(parsed, { language }); + + return analyzeRisk({ filename, language, semantic, parsed }); +} + +describe("analyzeRisk", () => { + it("detects authLogicChanged for auth path and login symbol", () => { + const patch = `@@ -0,0 +1,3 @@ ++export function login() { ++ return verifyToken(); ++} +`; + const result = assess("src/auth/service.ts", patch); + + const finding = result.findings.find((f) => f.id === "authLogicChanged"); + expect(finding).toBeDefined(); + expect(finding?.confidence).toBeGreaterThanOrEqual(0.7); + expect(result.riskHints.length).toBeGreaterThan(0); + }); + + it("detects databaseOperationModified for prisma query changes", () => { + const patch = `@@ -1,1 +1,1 @@ +-await prisma.user.findMany() ++await prisma.user.findFirst() +`; + const result = assess("src/db/user-repo.ts", patch); + + expect(result.findings.some((f) => f.id === "databaseOperationModified")).toBe( + true, + ); + }); + + it("detects cacheLayerTouched for redis import", () => { + const patch = `@@ -0,0 +1,1 @@ ++import Redis from 'ioredis'; +`; + const result = assess("src/cache/client.ts", patch); + + expect(result.findings.some((f) => f.id === "cacheLayerTouched")).toBe(true); + }); + + it("detects asyncIntroduced for sync to async change", () => { + const patch = `@@ -1,2 +1,2 @@ +-function fetchData() { ++async function fetchData() { +`; + const result = assess("src/api.ts", patch); + + expect(result.findings.some((f) => f.id === "asyncIntroduced")).toBe(true); + expect(result.riskHints.some((hint) => hint.includes("Async"))).toBe(true); + }); + + it("detects errorHandlingRemoved when try/catch is deleted", () => { + const patch = `@@ -1,4 +1,1 @@ +-try { +- doWork(); +-} catch (error) { +-} ++doWork(); +`; + const result = assess("src/worker.ts", patch); + + expect(result.findings.some((f) => f.id === "errorHandlingRemoved")).toBe(true); + }); + + it("detects concurrencyRisk when lock keyword and async change coexist", () => { + const patch = `@@ -1,3 +1,3 @@ +-function acquireLock() { ++async function acquireLock() { + mutex.lock(); +`; + const result = assess("src/concurrent/lock.ts", patch); + + expect(result.findings.some((f) => f.id === "concurrencyRisk")).toBe(true); + }); + + it("returns empty risk for null patch", () => { + const parsed = parseUnifiedDiff("empty.ts", null); + const semantic = analyzeSemantics(parsed); + + const result = analyzeRisk({ + filename: "empty.ts", + language: "typescript", + semantic, + parsed, + }); + + expect(result.riskHints).toHaveLength(0); + expect(result.findings).toHaveLength(0); + }); + + it("filters hints by minConfidence", () => { + const patch = `@@ -0,0 +1,1 @@ ++// minor comment in migrations folder +`; + const parsed = parseUnifiedDiff("src/db/migrations/readme.md", patch); + const semantic = analyzeSemantics(parsed, { language: "typescript" }); + + const lowThreshold = analyzeRisk( + { filename: parsed.filename, language: "typescript", semantic, parsed }, + { minConfidence: 0.6 }, + ); + const highThreshold = analyzeRisk( + { filename: parsed.filename, language: "typescript", semantic, parsed }, + { minConfidence: 0.9 }, + ); + + expect(lowThreshold.riskHints.length).toBeGreaterThanOrEqual( + highThreshold.riskHints.length, + ); + }); +}); diff --git a/packages/diff-parser/src/risk/analyze-risk.ts b/packages/diff-parser/src/risk/analyze-risk.ts new file mode 100644 index 0000000..9a06f8e --- /dev/null +++ b/packages/diff-parser/src/risk/analyze-risk.ts @@ -0,0 +1,65 @@ +import { parseUnifiedDiff } from "../parse-unified-diff.js"; +import type { ParsedFileDiff } from "../types.js"; +import { analyzeSemantics } from "../semantic/analyze-semantics.js"; +import type { AnalyzeSemanticsOptions } from "../semantic/analyze-semantics.js"; +import { detectLanguage } from "../semantic/detect-language.js"; +import { + DEFAULT_MIN_CONFIDENCE, + runRiskDetectors, +} from "./engine/run-detectors.js"; +import type { RiskDetector } from "./interfaces/risk-detector.js"; +import { + EMPTY_RISK_ANALYSIS, + type RiskAnalysisInput, + type RiskAnalysisResult, +} from "./types.js"; + +export interface AnalyzeRiskOptions { + minConfidence?: number; + detectors?: RiskDetector[]; +} + +export function analyzeRisk( + input: RiskAnalysisInput, + options: AnalyzeRiskOptions = {}, +): RiskAnalysisResult { + if (input.parsed.isEmpty) { + return { ...EMPTY_RISK_ANALYSIS }; + } + + return runRiskDetectors(input, { + minConfidence: options.minConfidence ?? DEFAULT_MIN_CONFIDENCE, + detectors: options.detectors, + }); +} + +export interface ParseAnalyzeAndAssessRiskOptions extends AnalyzeSemanticsOptions { + minConfidence?: number; + detectors?: RiskDetector[]; +} + +export interface ParsedFileDiffWithSemanticAndRisk extends ParsedFileDiff { + semantic: ReturnType; + risk: RiskAnalysisResult; +} + +/** Parses patch, runs semantic + risk analysis in one step. */ +export function parseAnalyzeAndAssessRisk( + filename: string, + patch: string | null, + options: ParseAnalyzeAndAssessRiskOptions = {}, +): ParsedFileDiffWithSemanticAndRisk { + const language = options.language ?? detectLanguage(filename); + const parsed = parseUnifiedDiff(filename, patch); + const semantic = analyzeSemantics(parsed, { ...options, language }); + + const risk = analyzeRisk( + { filename, language, semantic, parsed }, + { + minConfidence: options.minConfidence, + detectors: options.detectors, + }, + ); + + return { ...parsed, semantic, risk }; +} diff --git a/packages/diff-parser/src/risk/detectors/async-detector.ts b/packages/diff-parser/src/risk/detectors/async-detector.ts new file mode 100644 index 0000000..e687bd7 --- /dev/null +++ b/packages/diff-parser/src/risk/detectors/async-detector.ts @@ -0,0 +1,39 @@ +import type { RiskDetector } from "../interfaces/risk-detector.js"; +import type { RiskAnalysisInput, RiskFinding } from "../types.js"; +import { truncateEvidence } from "../utils/confidence.js"; +import { collectChangedLines } from "../utils/line-scan.js"; + +export class AsyncDetector implements RiskDetector { + readonly id = "asyncIntroduced" as const; + + detect(input: RiskAnalysisInput): RiskFinding | null { + const evidence: string[] = []; + + if (input.semantic.asyncChanges) { + evidence.push("semantic.asyncChanges=true"); + } + + const asyncAdds = collectChangedLines(input.parsed).filter( + (line) => + line.side === "add" && + (/\basync\s+function\b/.test(line.content) || + /\basync\s+def\b/.test(line.content) || + /\basync\s+\w+\s*\(/.test(line.content)), + ); + + if (asyncAdds.length > 0) { + evidence.push(`async additions: ${asyncAdds.length}`); + } + + if (evidence.length === 0) { + return null; + } + + return { + id: this.id, + message: `Async behavior introduced in ${input.filename}`, + confidence: 0.9, + evidence: evidence.map(truncateEvidence), + }; + } +} diff --git a/packages/diff-parser/src/risk/detectors/auth-logic-detector.ts b/packages/diff-parser/src/risk/detectors/auth-logic-detector.ts new file mode 100644 index 0000000..59ad09e --- /dev/null +++ b/packages/diff-parser/src/risk/detectors/auth-logic-detector.ts @@ -0,0 +1,63 @@ +import type { RiskDetector } from "../interfaces/risk-detector.js"; +import type { RiskAnalysisInput, RiskFinding } from "../types.js"; +import { maxConfidence, truncateEvidence } from "../utils/confidence.js"; +import { + AUTH_IMPORT_KEYWORDS, + AUTH_PATH_KEYWORDS, + AUTH_SYMBOL_KEYWORDS, +} from "../utils/keywords.js"; +import { + importsText, + pathContainsAny, + symbolNames, + textContainsAny, +} from "../utils/line-scan.js"; + +export class AuthLogicDetector implements RiskDetector { + readonly id = "authLogicChanged" as const; + + detect(input: RiskAnalysisInput): RiskFinding | null { + const evidence: string[] = []; + const scores: number[] = []; + + const pathHit = pathContainsAny(input.filename, AUTH_PATH_KEYWORDS); + if (pathHit) { + scores.push(0.65); + evidence.push(`path contains "${pathHit}"`); + } + + const symbols = symbolNames(input.semantic).filter((name) => + AUTH_SYMBOL_KEYWORDS.some((keyword) => + name.toLowerCase().includes(keyword), + ), + ); + + if (symbols.length > 0) { + scores.push(0.75); + evidence.push(`symbols: ${symbols.join(", ")}`); + } + + const importHit = textContainsAny( + importsText(input.semantic), + AUTH_IMPORT_KEYWORDS, + ); + + if (importHit) { + scores.push(0.85); + evidence.push(`import: ${importHit}`); + } + + if (scores.length === 0) { + return null; + } + + const matchedSymbols = symbols.length > 0 ? symbols.join(", ") : "n/a"; + + return { + id: this.id, + message: `Auth-related logic changed in ${input.filename} (symbols: ${matchedSymbols})`, + confidence: maxConfidence(scores), + evidence: evidence.map(truncateEvidence), + }; + } +} diff --git a/packages/diff-parser/src/risk/detectors/cache-detector.ts b/packages/diff-parser/src/risk/detectors/cache-detector.ts new file mode 100644 index 0000000..153d249 --- /dev/null +++ b/packages/diff-parser/src/risk/detectors/cache-detector.ts @@ -0,0 +1,48 @@ +import type { RiskDetector } from "../interfaces/risk-detector.js"; +import type { RiskAnalysisInput, RiskFinding } from "../types.js"; +import { maxConfidence, truncateEvidence } from "../utils/confidence.js"; +import { CACHE_KEYWORDS } from "../utils/keywords.js"; +import { + importsText, + joinChangedText, + pathContainsAny, + textContainsAny, +} from "../utils/line-scan.js"; + +export class CacheDetector implements RiskDetector { + readonly id = "cacheLayerTouched" as const; + + detect(input: RiskAnalysisInput): RiskFinding | null { + const evidence: string[] = []; + const scores: number[] = []; + + const pathHit = pathContainsAny(input.filename, CACHE_KEYWORDS); + if (pathHit) { + scores.push(0.75); + evidence.push(`path contains "${pathHit}"`); + } + + const changedHit = textContainsAny(joinChangedText(input.parsed), CACHE_KEYWORDS); + if (changedHit) { + scores.push(0.75); + evidence.push(`changed line keyword: ${changedHit}`); + } + + const importHit = textContainsAny(importsText(input.semantic), CACHE_KEYWORDS); + if (importHit) { + scores.push(0.75); + evidence.push(`import keyword: ${importHit}`); + } + + if (scores.length === 0) { + return null; + } + + return { + id: this.id, + message: `Cache layer touched in ${input.filename}`, + confidence: maxConfidence(scores), + evidence: evidence.map(truncateEvidence), + }; + } +} diff --git a/packages/diff-parser/src/risk/detectors/concurrency-detector.ts b/packages/diff-parser/src/risk/detectors/concurrency-detector.ts new file mode 100644 index 0000000..9fab924 --- /dev/null +++ b/packages/diff-parser/src/risk/detectors/concurrency-detector.ts @@ -0,0 +1,50 @@ +import type { RiskDetector } from "../interfaces/risk-detector.js"; +import type { RiskAnalysisInput, RiskFinding } from "../types.js"; +import { maxConfidence, truncateEvidence } from "../utils/confidence.js"; +import { CONCURRENCY_KEYWORDS } from "../utils/keywords.js"; +import { + joinChangedText, + pathContainsAny, + textContainsAny, +} from "../utils/line-scan.js"; + +export class ConcurrencyDetector implements RiskDetector { + readonly id = "concurrencyRisk" as const; + + detect(input: RiskAnalysisInput): RiskFinding | null { + const evidence: string[] = []; + const scores: number[] = []; + + const pathHit = pathContainsAny(input.filename, CONCURRENCY_KEYWORDS); + if (pathHit) { + scores.push(0.75); + evidence.push(`path contains "${pathHit}"`); + } + + const changedHit = textContainsAny( + joinChangedText(input.parsed), + CONCURRENCY_KEYWORDS, + ); + + if (changedHit) { + scores.push(0.8); + evidence.push(`changed line keyword: ${changedHit}`); + } + + if (input.semantic.asyncChanges && (pathHit || changedHit)) { + scores.push(0.85); + evidence.push("async change combined with concurrency keyword"); + } + + if (scores.length === 0) { + return null; + } + + return { + id: this.id, + message: `Concurrency-related risk in ${input.filename}`, + confidence: maxConfidence(scores), + evidence: evidence.map(truncateEvidence), + }; + } +} diff --git a/packages/diff-parser/src/risk/detectors/database-detector.ts b/packages/diff-parser/src/risk/detectors/database-detector.ts new file mode 100644 index 0000000..c90f127 --- /dev/null +++ b/packages/diff-parser/src/risk/detectors/database-detector.ts @@ -0,0 +1,52 @@ +import type { RiskDetector } from "../interfaces/risk-detector.js"; +import type { RiskAnalysisInput, RiskFinding } from "../types.js"; +import { maxConfidence, truncateEvidence } from "../utils/confidence.js"; +import { DATABASE_KEYWORDS, DATABASE_PATH_KEYWORDS } from "../utils/keywords.js"; +import { + importsText, + joinChangedText, + pathContainsAny, + textContainsAny, +} from "../utils/line-scan.js"; + +export class DatabaseDetector implements RiskDetector { + readonly id = "databaseOperationModified" as const; + + detect(input: RiskAnalysisInput): RiskFinding | null { + const evidence: string[] = []; + const scores: number[] = []; + + const pathHit = pathContainsAny(input.filename, DATABASE_PATH_KEYWORDS); + if (pathHit) { + scores.push(0.7); + evidence.push(`path contains "${pathHit}"`); + } + + const changedHit = textContainsAny( + joinChangedText(input.parsed), + DATABASE_KEYWORDS, + ); + + if (changedHit) { + scores.push(0.8); + evidence.push(`changed line keyword: ${changedHit}`); + } + + const importHit = textContainsAny(importsText(input.semantic), DATABASE_KEYWORDS); + if (importHit) { + scores.push(0.8); + evidence.push(`import keyword: ${importHit}`); + } + + if (scores.length === 0) { + return null; + } + + return { + id: this.id, + message: `Database-related change detected in ${input.filename}`, + confidence: maxConfidence(scores), + evidence: evidence.map(truncateEvidence), + }; + } +} diff --git a/packages/diff-parser/src/risk/detectors/error-handling-detector.ts b/packages/diff-parser/src/risk/detectors/error-handling-detector.ts new file mode 100644 index 0000000..5a09824 --- /dev/null +++ b/packages/diff-parser/src/risk/detectors/error-handling-detector.ts @@ -0,0 +1,48 @@ +import type { DiffLine } from "../../types.js"; +import type { RiskDetector } from "../interfaces/risk-detector.js"; +import type { RiskAnalysisInput, RiskFinding } from "../types.js"; +import { truncateEvidence } from "../utils/confidence.js"; +import { ERROR_HANDLING_KEYWORDS } from "../utils/keywords.js"; +import { textContainsAny } from "../utils/line-scan.js"; + +export class ErrorHandlingDetector implements RiskDetector { + readonly id = "errorHandlingRemoved" as const; + + detect(input: RiskAnalysisInput): RiskFinding | null { + const evidence: string[] = []; + + for (const hunk of input.parsed.hunks) { + const deleted = hunk.lines.filter( + (line: DiffLine) => line.type === "delete", + ); + const added = hunk.lines.filter((line: DiffLine) => line.type === "add"); + + for (const line of deleted) { + const keyword = textContainsAny(line.content, ERROR_HANDLING_KEYWORDS); + + if (!keyword) { + continue; + } + + const reAdded = added.some((addLine: DiffLine) => + textContainsAny(addLine.content, [keyword]), + ); + + if (!reAdded) { + evidence.push(`removed ${keyword}: ${line.content.trim()}`); + } + } + } + + if (evidence.length === 0) { + return null; + } + + return { + id: this.id, + message: `Error handling removed in ${input.filename}`, + confidence: 0.7, + evidence: evidence.map(truncateEvidence), + }; + } +} diff --git a/packages/diff-parser/src/risk/engine/run-detectors.ts b/packages/diff-parser/src/risk/engine/run-detectors.ts new file mode 100644 index 0000000..557c7df --- /dev/null +++ b/packages/diff-parser/src/risk/engine/run-detectors.ts @@ -0,0 +1,70 @@ +import { AsyncDetector } from "../detectors/async-detector.js"; +import { AuthLogicDetector } from "../detectors/auth-logic-detector.js"; +import { CacheDetector } from "../detectors/cache-detector.js"; +import { ConcurrencyDetector } from "../detectors/concurrency-detector.js"; +import { DatabaseDetector } from "../detectors/database-detector.js"; +import { ErrorHandlingDetector } from "../detectors/error-handling-detector.js"; +import type { RiskDetector } from "../interfaces/risk-detector.js"; +import type { + RiskAnalysisInput, + RiskAnalysisResult, + RiskFinding, +} from "../types.js"; +import { clampConfidence } from "../utils/confidence.js"; + +export const DEFAULT_MIN_CONFIDENCE = 0.6; + +export function createDefaultDetectors(): RiskDetector[] { + return [ + new AuthLogicDetector(), + new DatabaseDetector(), + new CacheDetector(), + new AsyncDetector(), + new ErrorHandlingDetector(), + new ConcurrencyDetector(), + ]; +} + +export interface RunRiskDetectorsOptions { + minConfidence?: number; + detectors?: RiskDetector[]; +} + +function dedupeFindings(findings: RiskFinding[]): RiskFinding[] { + const byId = new Map(); + + for (const finding of findings) { + const existing = byId.get(finding.id); + + if (!existing || finding.confidence > existing.confidence) { + byId.set(finding.id, finding); + } + } + + return [...byId.values()].sort((a, b) => b.confidence - a.confidence); +} + +export function runRiskDetectors( + input: RiskAnalysisInput, + options: RunRiskDetectorsOptions = {}, +): RiskAnalysisResult { + const minConfidence = clampConfidence( + options.minConfidence ?? DEFAULT_MIN_CONFIDENCE, + ); + const detectors = options.detectors ?? createDefaultDetectors(); + + const findings = dedupeFindings( + detectors + .map((detector) => detector.detect(input)) + .filter((finding): finding is RiskFinding => finding !== null), + ); + + const riskHints = findings + .filter((finding) => finding.confidence >= minConfidence) + .map((finding) => finding.message); + + return { + findings, + riskHints: [...new Set(riskHints)], + }; +} diff --git a/packages/diff-parser/src/risk/interfaces/risk-detector.ts b/packages/diff-parser/src/risk/interfaces/risk-detector.ts new file mode 100644 index 0000000..8674a22 --- /dev/null +++ b/packages/diff-parser/src/risk/interfaces/risk-detector.ts @@ -0,0 +1,8 @@ +import type { RiskCategory } from "../types.js"; +import type { RiskAnalysisInput, RiskFinding } from "../types.js"; + +/** Pluggable risk detector (rule-based MVP; extensible later). */ +export interface RiskDetector { + readonly id: RiskCategory; + detect(input: RiskAnalysisInput): RiskFinding | null; +} diff --git a/packages/diff-parser/src/risk/types.ts b/packages/diff-parser/src/risk/types.ts new file mode 100644 index 0000000..5a28d27 --- /dev/null +++ b/packages/diff-parser/src/risk/types.ts @@ -0,0 +1,34 @@ +import type { ParsedFileDiff } from "../types.js"; +import type { SemanticAnalysis } from "../semantic/types.js"; + +export type RiskCategory = + | "authLogicChanged" + | "databaseOperationModified" + | "cacheLayerTouched" + | "asyncIntroduced" + | "errorHandlingRemoved" + | "concurrencyRisk"; + +export interface RiskFinding { + id: RiskCategory; + message: string; + confidence: number; + evidence: string[]; +} + +export interface RiskAnalysisInput { + filename: string; + language: string; + semantic: SemanticAnalysis; + parsed: ParsedFileDiff; +} + +export interface RiskAnalysisResult { + findings: RiskFinding[]; + riskHints: string[]; +} + +export const EMPTY_RISK_ANALYSIS: RiskAnalysisResult = { + findings: [], + riskHints: [], +}; diff --git a/packages/diff-parser/src/risk/utils/confidence.ts b/packages/diff-parser/src/risk/utils/confidence.ts new file mode 100644 index 0000000..c7a3c7c --- /dev/null +++ b/packages/diff-parser/src/risk/utils/confidence.ts @@ -0,0 +1,21 @@ +export function clampConfidence(value: number): number { + return Math.min(1, Math.max(0, value)); +} + +export function maxConfidence(values: number[]): number { + if (values.length === 0) { + return 0; + } + + return clampConfidence(Math.max(...values)); +} + +export function truncateEvidence(text: string, maxLength = 80): string { + const trimmed = text.trim(); + + if (trimmed.length <= maxLength) { + return trimmed; + } + + return `${trimmed.slice(0, maxLength - 3)}...`; +} diff --git a/packages/diff-parser/src/risk/utils/keywords.ts b/packages/diff-parser/src/risk/utils/keywords.ts new file mode 100644 index 0000000..75d1971 --- /dev/null +++ b/packages/diff-parser/src/risk/utils/keywords.ts @@ -0,0 +1,74 @@ +export const AUTH_PATH_KEYWORDS = [ + "auth", + "login", + "session", + "jwt", + "oauth", + "password", + "credential", +] as const; + +export const AUTH_SYMBOL_KEYWORDS = [ + "login", + "logout", + "authenticate", + "authorize", + "verifytoken", + "signin", + "signup", + "password", +] as const; + +export const AUTH_IMPORT_KEYWORDS = [ + "bcrypt", + "jsonwebtoken", + "passport", + "jose", + "oauth", +] as const; + +export const DATABASE_KEYWORDS = [ + "prisma", + "knex", + "typeorm", + "sequelize", + "sql", + "query", + "migration", + "drizzle", + "mongoose", +] as const; + +export const DATABASE_PATH_KEYWORDS = [ + "db", + "database", + "migrations", + "repository", + "repositories", +] as const; + +export const CACHE_KEYWORDS = [ + "redis", + "cache", + "memcached", + "ioredis", + "redlock", +] as const; + +export const CONCURRENCY_KEYWORDS = [ + "mutex", + "lock", + "race", + "semaphore", + "atomic", + "concurrent", + "parallel", + "thread", +] as const; + +export const ERROR_HANDLING_KEYWORDS = [ + "try", + "catch", + "throw", + "finally", +] as const; diff --git a/packages/diff-parser/src/risk/utils/line-scan.ts b/packages/diff-parser/src/risk/utils/line-scan.ts new file mode 100644 index 0000000..fd0ecf1 --- /dev/null +++ b/packages/diff-parser/src/risk/utils/line-scan.ts @@ -0,0 +1,71 @@ +import type { ParsedFileDiff } from "../../types.js"; + +export interface ChangedLineText { + content: string; + side: "add" | "delete"; +} + +export function collectChangedLines(parsed: ParsedFileDiff): ChangedLineText[] { + const lines: ChangedLineText[] = []; + + for (const hunk of parsed.hunks) { + for (const line of hunk.lines) { + if (line.type === "add") { + lines.push({ content: line.content, side: "add" }); + } else if (line.type === "delete") { + lines.push({ content: line.content, side: "delete" }); + } + } + } + + return lines; +} + +export function joinChangedText(parsed: ParsedFileDiff): string { + return collectChangedLines(parsed) + .map((line) => line.content) + .join("\n"); +} + +export function pathContainsAny(path: string, keywords: readonly string[]): string | null { + const lower = path.toLowerCase(); + + for (const keyword of keywords) { + if (lower.includes(keyword)) { + return keyword; + } + } + + return null; +} + +export function textContainsAny( + text: string, + keywords: readonly string[], +): string | null { + const lower = text.toLowerCase(); + + for (const keyword of keywords) { + if (lower.includes(keyword)) { + return keyword; + } + } + + return null; +} + +export function importsText(semantic: { + imports: { added: string[]; removed: string[] }; +}): string { + return [...semantic.imports.added, ...semantic.imports.removed].join(" "); +} + +export function symbolNames(semantic: { + functions: { name: string }[]; + classes: { name: string }[]; +}): string[] { + return [ + ...semantic.functions.map((fn) => fn.name), + ...semantic.classes.map((cls) => cls.name), + ]; +}