From 642e651d053231a53697986145f5a9212edb11c4 Mon Sep 17 00:00:00 2001 From: Bruno Garcia Date: Wed, 3 Jun 2026 10:54:12 -0300 Subject: [PATCH 1/5] make bcore-mutation suitable for secp256k1 --- README.md | 9 +- src/analyze.rs | 73 ++------ src/ast_analysis.rs | 22 ++- src/commands.rs | 155 +++++++++++++++++ src/db.rs | 49 ++++-- src/git_changes.rs | 93 +++++++--- src/lib.rs | 4 + src/main.rs | 33 +++- src/mutation.rs | 78 ++++++--- src/operators.rs | 308 ---------------------------------- src/operators/bitcoin_core.rs | 231 +++++++++++++++++++++++++ src/operators/common.rs | 86 ++++++++++ src/operators/mod.rs | 159 ++++++++++++++++++ src/operators/secp256k1.rs | 79 +++++++++ src/project.rs | 36 ++++ 15 files changed, 972 insertions(+), 443 deletions(-) create mode 100644 src/commands.rs delete mode 100644 src/operators.rs create mode 100644 src/operators/bitcoin_core.rs create mode 100644 src/operators/common.rs create mode 100644 src/operators/mod.rs create mode 100644 src/operators/secp256k1.rs create mode 100644 src/project.rs diff --git a/README.md b/README.md index 2da396c..7d1a591 100644 --- a/README.md +++ b/README.md @@ -50,9 +50,10 @@ Generates mutants for the target code and optionally persists them to a SQLite d | Flag | Short | Default | Description | |------|-------|---------|-------------| +| `--project NAME` | | `bitcoin-core` | Project to mutate. Accepts `bitcoin-core` or `secp256k1`. When `--pr` is used, the PR is fetched from this project's repository. | | `--sqlite [PATH]` | | `mutation.db` | Persist mutants to a SQLite database. Accepts an optional custom path. | | `--file PATH` | `-f` | | File to mutate. Mutually exclusive with `--pr`. | -| `--pr NUMBER` | `-p` | `0` (current branch) | Bitcoin Core PR number to mutate. Mutually exclusive with `--file`. | +| `--pr NUMBER` | `-p` | `0` (current branch) | PR number to mutate (fetched from the `--project` repository). Mutually exclusive with `--file`. | | `--range START END` | `-r` | | Restrict mutation to a line range within the target file. Cannot be combined with `--cov`. | | `--cov PATH` | `-c` | | Path to a coverage file (`*.info` generated with `cmake -P build/Coverage.cmake`). Only lines covered by tests will be mutated. Cannot be combined with `--range`. | | `--skip-lines PATH` | | | Path to a JSON file listing lines to skip per file (see format below). | @@ -74,6 +75,11 @@ bcore-mutation mutate --sqlite -f src/wallet/wallet.cpp bcore-mutation mutate --sqlite -p 12345 ``` +**Mutate a secp256k1 PR (fetched from `bitcoin-core/secp256k1`):** +```bash +bcore-mutation mutate --sqlite --project secp256k1 -p 1234 +``` + **Restrict to a line range:** ```bash bcore-mutation mutate --sqlite -f src/wallet/wallet.cpp --range 10 50 @@ -127,6 +133,7 @@ When `--sqlite` is used, the `mutate` command prints a `run_id` that you pass to | Flag | Short | Default | Description | |------|-------|---------|-------------| +| `--project NAME` | | `bitcoin-core` | Project being analyzed. Accepts `bitcoin-core` or `secp256k1`. | | `--sqlite [PATH]` | | `mutation.db` | SQLite database to read mutants from. Requires `--run-id`. Accepts an optional custom path. | | `--run-id ID` | | | Run ID returned by the `mutate` command. Requires `--sqlite`. | | `--command CMD` | `-c` | | Shell command used to test each mutant (e.g. a build + test invocation). Required when using `--run-id`. | diff --git a/src/analyze.rs b/src/analyze.rs index a6cd68c..5721c35 100644 --- a/src/analyze.rs +++ b/src/analyze.rs @@ -1,5 +1,7 @@ +use crate::commands::{self, ProjectCommands}; use crate::db::Database; use crate::error::{MutationError, Result}; +use crate::project::Project; use crate::report::generate_report; use std::fs; use std::path::{Path, PathBuf}; @@ -10,6 +12,7 @@ use tokio::time::timeout; use walkdir::WalkDir; pub async fn run_analysis( + project: Project, folder: Option, command: Option, jobs: u32, @@ -20,6 +23,8 @@ pub async fn run_analysis( file_path: Option, survivors_only: bool, ) -> Result<()> { + println!("Analyzing mutants for project: {}", project.db_name()); + // DB-based analysis mode: read mutants from DB and test them. if let (Some(ref path), Some(rid)) = (sqlite_path.as_ref(), run_id) { let command = command.ok_or_else(|| { @@ -49,6 +54,8 @@ pub async fn run_analysis( find_mutation_folders()? }; + let project_commands = commands::for_project(project); + for folder_path in folders { analyze_folder( &folder_path, @@ -56,6 +63,7 @@ pub async fn run_analysis( jobs, timeout_secs, survival_threshold, + project_commands.as_ref(), ) .await?; } @@ -208,6 +216,7 @@ pub async fn analyze_folder( jobs: u32, timeout_secs: u64, survival_threshold: f64, + project_commands: &dyn ProjectCommands, ) -> Result<()> { let mut num_killed: u64 = 0; let mut not_killed = Vec::new(); @@ -221,8 +230,8 @@ pub async fn analyze_folder( let test_command = if let Some(cmd) = command { cmd } else { - run_build_command().await?; - get_command_to_kill(&target_file_path, jobs)? + run_build_command(project_commands).await?; + project_commands.test_command(&target_file_path, jobs)? }; // Get list of mutant files @@ -349,11 +358,10 @@ async fn run_command(command: &str, timeout_secs: u64) -> Result { } } -async fn run_build_command() -> Result<()> { - let build_command = - "rm -rf build && cmake -B build -DENABLE_IPC=OFF && cmake --build build -j $(nproc)"; +async fn run_build_command(project_commands: &dyn ProjectCommands) -> Result<()> { + let build_command = project_commands.build_command(); - let success = run_command(build_command, 3600).await?; // 1 hour timeout for build + let success = run_command(&build_command, project_commands.build_timeout_secs()).await?; if !success { return Err(MutationError::Command("Build command failed".to_string())); } @@ -361,39 +369,6 @@ async fn run_build_command() -> Result<()> { Ok(()) } -fn get_command_to_kill(target_file_path: &str, jobs: u32) -> Result { - let mut build_command = "cmake --build build".to_string(); - if jobs > 0 { - build_command.push_str(&format!(" -j{}", jobs)); - } - - let command = if target_file_path.contains("functional") { - format!("./build/{}", target_file_path) - } else if target_file_path.contains("test") { - let filename_with_extension = Path::new(target_file_path) - .file_name() - .and_then(|n| n.to_str()) - .ok_or_else(|| MutationError::InvalidInput("Invalid file path".to_string()))?; - - let test_to_run = filename_with_extension - .rsplit('.') - .nth(1) - .ok_or_else(|| MutationError::InvalidInput("Cannot extract test name".to_string()))?; - - format!( - "{} && ./build/bin/test_bitcoin --run_test={}", - build_command, test_to_run - ) - } else { - format!( - "{} && ctest --output-on-failure --stop-on-failure -C Release && CI_FAILFAST_TEST_LEAVE_DANGLING=1 ./build/test/functional/test_runner.py -F", - build_command - ) - }; - - Ok(command) -} - async fn restore_file(target_file_path: &str) -> Result<()> { let restore_command = format!("git restore {}", target_file_path); let success = run_command(&restore_command, 30).await?; @@ -412,26 +387,6 @@ mod tests { use std::fs; use tempfile::tempdir; - #[test] - fn test_get_command_to_kill() { - // Test functional test - let cmd = get_command_to_kill("test/functional/test_example.py", 4).unwrap(); - assert_eq!(cmd, "./build/test/functional/test_example.py"); - - // Test unit test - let cmd = get_command_to_kill("src/test/test_example.cpp", 0).unwrap(); - assert_eq!( - cmd, - "cmake --build build && ./build/bin/test_bitcoin --run_test=test_example" - ); - - // Test general case - let cmd = get_command_to_kill("src/wallet/wallet.cpp", 2).unwrap(); - assert!(cmd.contains("cmake --build build -j2")); - assert!(cmd.contains("ctest")); - assert!(cmd.contains("test_runner.py")); - } - #[tokio::test] async fn test_run_command() { // Test successful command diff --git a/src/ast_analysis.rs b/src/ast_analysis.rs index 49496ad..79e3b46 100644 --- a/src/ast_analysis.rs +++ b/src/ast_analysis.rs @@ -279,11 +279,7 @@ impl AridNodeDetector { /// Context-aware version that checks if a line should be mutated /// Takes all lines and the current line index to understand control structures - pub fn should_mutate_line_with_context( - &mut self, - lines: &[String], - line_index: usize, - ) -> bool { + pub fn should_mutate_line_with_context(&mut self, lines: &[String], line_index: usize) -> bool { let line = &lines[line_index]; let trimmed = line.trim(); @@ -863,7 +859,10 @@ mod tests { 0, 30, ); - assert!(expert.is_arid_simple_node(&log_debug_node), "LogDebug should be recognized as arid"); + assert!( + expert.is_arid_simple_node(&log_debug_node), + "LogDebug should be recognized as arid" + ); } #[test] @@ -931,8 +930,15 @@ mod tests { // First, let's test that LogDebug itself is recognized as arid let log_line = lines[1].trim(); let log_node = detector.parse_line_to_simple_ast(log_line, 2); - assert_eq!(log_node.node_type, AstNodeType::FunctionCall, "LogDebug line should be classified as FunctionCall"); - assert!(detector.is_arid(&log_node), "LogDebug should be recognized as arid"); + assert_eq!( + log_node.node_type, + AstNodeType::FunctionCall, + "LogDebug line should be classified as FunctionCall" + ); + assert!( + detector.is_arid(&log_node), + "LogDebug should be recognized as arid" + ); let mutatable_lines = filter_mutatable_lines(&lines, &mut detector); diff --git a/src/commands.rs b/src/commands.rs new file mode 100644 index 0000000..f921ee4 --- /dev/null +++ b/src/commands.rs @@ -0,0 +1,155 @@ +//! Project-specific build and test commands used by folder-based analysis. +//! +//! When the user does not pass `--command`, `analyze` has to build the project +//! and derive a test command for the mutated file itself. Those commands differ +//! per project (Bitcoin Core uses CMake + `test_bitcoin` + the functional +//! `test_runner.py`; secp256k1 uses CMake + CTest), so each project provides a +//! [`ProjectCommands`] implementation, selected by [`for_project`]. +//! +//! This mirrors the [`crate::operators`] module: a trait per concern, one +//! implementation per project, and a single `for_project` selector. + +use crate::error::{MutationError, Result}; +use crate::project::Project; +use std::path::Path; + +/// Build and test commands for a single project. +pub trait ProjectCommands { + /// Full clean build, run once before analyzing a folder when the user did + /// not supply an explicit `--command`. + fn build_command(&self) -> String; + + /// Timeout, in seconds, allowed for [`ProjectCommands::build_command`]. + fn build_timeout_secs(&self) -> u64 { + 3600 // 1 hour + } + + /// Incremental-build-and-test command for the mutated `target_file_path`. + /// `jobs` is the parallelism passed to the compiler (0 = system default). + fn test_command(&self, target_file_path: &str, jobs: u32) -> Result; +} + +/// Return the [`ProjectCommands`] for the given project. +pub fn for_project(project: Project) -> Box { + match project { + Project::BitcoinCore => Box::new(BitcoinCore), + Project::Secp256k1 => Box::new(Secp256k1), + } +} + +/// Append ` -j{jobs}` to `build` when `jobs > 0`. +fn with_jobs(mut build: String, jobs: u32) -> String { + if jobs > 0 { + build.push_str(&format!(" -j{}", jobs)); + } + build +} + +pub struct BitcoinCore; + +impl ProjectCommands for BitcoinCore { + fn build_command(&self) -> String { + "rm -rf build && cmake -B build -DENABLE_IPC=OFF && cmake --build build -j $(nproc)" + .to_string() + } + + fn test_command(&self, target_file_path: &str, jobs: u32) -> Result { + let build_command = with_jobs("cmake --build build".to_string(), jobs); + + let command = if target_file_path.contains("functional") { + format!("./build/{}", target_file_path) + } else if target_file_path.contains("test") { + let filename_with_extension = Path::new(target_file_path) + .file_name() + .and_then(|n| n.to_str()) + .ok_or_else(|| MutationError::InvalidInput("Invalid file path".to_string()))?; + + let test_to_run = filename_with_extension + .rsplit('.') + .nth(1) + .ok_or_else(|| { + MutationError::InvalidInput("Cannot extract test name".to_string()) + })?; + + format!( + "{} && ./build/bin/test_bitcoin --run_test={}", + build_command, test_to_run + ) + } else { + format!( + "{} && ctest --output-on-failure --stop-on-failure -C Release && CI_FAILFAST_TEST_LEAVE_DANGLING=1 ./build/test/functional/test_runner.py -F", + build_command + ) + }; + + Ok(command) + } +} + +pub struct Secp256k1; + +impl ProjectCommands for Secp256k1 { + fn build_command(&self) -> String { + // secp256k1 builds with CMake (no IPC option). Tests are enabled by + // default; this is a starting point and may need extra feature flags + // (e.g. -DSECP256K1_BUILD_EXHAUSTIVE_TESTS) as the tool is exercised. + "rm -rf build && cmake -B build && cmake --build build -j $(nproc)".to_string() + } + + fn test_command(&self, _target_file_path: &str, jobs: u32) -> Result { + // secp256k1 does not map source files to per-file test binaries the way + // Bitcoin Core does, so we rebuild incrementally and run the whole CTest + // suite regardless of which file was mutated. + let build_command = with_jobs("cmake --build build".to_string(), jobs); + Ok(format!( + "{} && ctest --test-dir build --output-on-failure", + build_command + )) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_bitcoin_core_test_command() { + let cmds = BitcoinCore; + + // Functional test + let cmd = cmds + .test_command("test/functional/test_example.py", 4) + .unwrap(); + assert_eq!(cmd, "./build/test/functional/test_example.py"); + + // Unit test + let cmd = cmds.test_command("src/test/test_example.cpp", 0).unwrap(); + assert_eq!( + cmd, + "cmake --build build && ./build/bin/test_bitcoin --run_test=test_example" + ); + + // General case + let cmd = cmds.test_command("src/wallet/wallet.cpp", 2).unwrap(); + assert!(cmd.contains("cmake --build build -j2")); + assert!(cmd.contains("ctest")); + assert!(cmd.contains("test_runner.py")); + } + + #[test] + fn test_secp256k1_test_command_runs_ctest() { + let cmds = Secp256k1; + let cmd = cmds.test_command("src/field_impl.h", 3).unwrap(); + assert!(cmd.contains("cmake --build build -j3")); + assert!(cmd.contains("ctest --test-dir build")); + // No Bitcoin Core specifics leak in. + assert!(!cmd.contains("test_bitcoin")); + assert!(!cmd.contains("test_runner.py")); + } + + #[test] + fn test_build_commands_differ() { + assert!(BitcoinCore.build_command().contains("-DENABLE_IPC=OFF")); + assert!(!Secp256k1.build_command().contains("-DENABLE_IPC=OFF")); + } +} diff --git a/src/db.rs b/src/db.rs index 5da2301..f1015a3 100644 --- a/src/db.rs +++ b/src/db.rs @@ -94,20 +94,25 @@ impl Database { Ok(()) } - /// Insert the Bitcoin Core project row if not already present. + /// Insert the known project rows if not already present. pub fn seed_projects(&self) -> Result<()> { - self.conn.execute( - "INSERT OR IGNORE INTO projects (name, repository_url) VALUES (?1, ?2)", - params!["Bitcoin Core", "https://github.com/bitcoin/bitcoin"], - )?; + for (name, url) in [ + ("Bitcoin Core", "https://github.com/bitcoin/bitcoin"), + ("secp256k1", "https://github.com/bitcoin-core/secp256k1"), + ] { + self.conn.execute( + "INSERT OR IGNORE INTO projects (name, repository_url) VALUES (?1, ?2)", + params![name, url], + )?; + } Ok(()) } - /// Return the id of the Bitcoin Core project row. - pub fn get_bitcoin_core_project_id(&self) -> Result { + /// Return the id of the project row with the given name. + pub fn get_project_id(&self, name: &str) -> Result { let id = self.conn.query_row( - "SELECT id FROM projects WHERE name = 'Bitcoin Core'", - [], + "SELECT id FROM projects WHERE name = ?1", + params![name], |row| row.get(0), )?; Ok(id) @@ -125,7 +130,13 @@ impl Database { self.conn.execute( "INSERT INTO runs (project_id, commit_hash, tool_version, pr_number, config_json) VALUES (?1, ?2, ?3, ?4, ?5)", - params![project_id, commit_hash, tool_version, pr_number, config_json], + params![ + project_id, + commit_hash, + tool_version, + pr_number, + config_json + ], )?; Ok(self.conn.last_insert_rowid()) } @@ -175,7 +186,8 @@ impl Database { let mut stmt = self.conn.prepare( "SELECT id, diff, file_path FROM mutants WHERE run_id = ?1 AND file_path = ?2", )?; - let rows = stmt.query_map(params![run_id, fp], map_row)? + let rows = stmt + .query_map(params![run_id, fp], map_row)? .collect::>()?; rows } @@ -184,15 +196,17 @@ impl Database { "SELECT id, diff, file_path FROM mutants \ WHERE run_id = ?1 AND file_path = ?2 AND status = 'survived'", )?; - let rows = stmt.query_map(params![run_id, fp], map_row)? + let rows = stmt + .query_map(params![run_id, fp], map_row)? .collect::>()?; rows } (None, false) => { - let mut stmt = self.conn.prepare( - "SELECT id, diff, file_path FROM mutants WHERE run_id = ?1", - )?; - let rows = stmt.query_map(params![run_id], map_row)? + let mut stmt = self + .conn + .prepare("SELECT id, diff, file_path FROM mutants WHERE run_id = ?1")?; + let rows = stmt + .query_map(params![run_id], map_row)? .collect::>()?; rows } @@ -201,7 +215,8 @@ impl Database { "SELECT id, diff, file_path FROM mutants \ WHERE run_id = ?1 AND status = 'survived'", )?; - let rows = stmt.query_map(params![run_id], map_row)? + let rows = stmt + .query_map(params![run_id], map_row)? .collect::>()?; rows } diff --git a/src/git_changes.rs b/src/git_changes.rs index 3d1802f..a795a7c 100644 --- a/src/git_changes.rs +++ b/src/git_changes.rs @@ -1,8 +1,12 @@ use crate::error::{MutationError, Result}; +use crate::project::Project; use regex::Regex; use std::process::Command; use std::str; +/// Local branch name where the secp256k1 `master` is fetched, used as the diff base. +const SECP256K1_BASE_REF: &str = "secp256k1-master"; + pub async fn run_git_command(args: &[&str]) -> Result> { let output = Command::new("git") .args(args) @@ -28,7 +32,43 @@ pub async fn get_commit_hash() -> Result { Ok(lines.into_iter().next().unwrap_or_default()) } -pub async fn get_changed_files(pr_number: Option) -> Result> { +pub async fn get_changed_files(pr_number: Option, project: Project) -> Result> { + match project { + Project::BitcoinCore => get_changed_files_bitcoin_core(pr_number).await, + Project::Secp256k1 => get_changed_files_secp256k1(pr_number).await, + } +} + +/// Fetch a secp256k1 PR directly from its GitHub URL and return the changed files. +/// +/// Unlike Bitcoin Core (which relies on a configured `upstream`/`origin` remote), +/// secp256k1 PRs are fetched straight from the repository URL. `master` is also +/// fetched into a local ref so we have a base to diff against. +async fn get_changed_files_secp256k1(pr_number: Option) -> Result> { + let url = Project::Secp256k1.repository_url(); + + // Fetch master into a local ref to diff against (force-update to stay current). + let fetch_master_args = &["fetch", url, &format!("+master:{}", SECP256K1_BASE_REF)]; + run_git_command(fetch_master_args).await?; + + if let Some(pr) = pr_number { + println!("Fetching secp256k1 PR #{} from {}", pr, url); + let fetch_pr_args = &["fetch", url, &format!("pull/{}/head:pr/{}", pr, pr)]; + run_git_command(fetch_pr_args).await?; + println!("Checking out pr/{}...", pr); + run_git_command(&["checkout", &format!("pr/{}", pr)]).await?; + } + + let diff_args = &[ + "diff", + "--name-only", + "--diff-filter=d", + &format!("{}...HEAD", SECP256K1_BASE_REF), + ]; + run_git_command(diff_args).await +} + +async fn get_changed_files_bitcoin_core(pr_number: Option) -> Result> { let mut used_remote = "upstream"; // Track which remote we successfully used if let Some(pr) = pr_number { @@ -88,29 +128,44 @@ pub async fn get_changed_files(pr_number: Option) -> Result> { } } -pub async fn get_lines_touched(file_path: &str) -> Result> { - // Try upstream first - let diff_args_upstream = &[ - "diff", - "--unified=0", - "upstream/master...HEAD", - "--", - file_path, - ]; - - let diff_output = match run_git_command(diff_args_upstream).await { - Ok(output) => output, - Err(_) => { - // Fall back to origin if upstream fails - println!("Diff with upstream/master failed, trying origin/master..."); - let diff_args_origin = &[ +pub async fn get_lines_touched(file_path: &str, project: Project) -> Result> { + let diff_output = match project { + Project::Secp256k1 => { + // master was fetched into a local ref by get_changed_files_secp256k1. + let diff_args = &[ + "diff", + "--unified=0", + &format!("{}...HEAD", SECP256K1_BASE_REF), + "--", + file_path, + ]; + run_git_command(diff_args).await? + } + Project::BitcoinCore => { + // Try upstream first + let diff_args_upstream = &[ "diff", "--unified=0", - "origin/master...HEAD", + "upstream/master...HEAD", "--", file_path, ]; - run_git_command(diff_args_origin).await? + + match run_git_command(diff_args_upstream).await { + Ok(output) => output, + Err(_) => { + // Fall back to origin if upstream fails + println!("Diff with upstream/master failed, trying origin/master..."); + let diff_args_origin = &[ + "diff", + "--unified=0", + "origin/master...HEAD", + "--", + file_path, + ]; + run_git_command(diff_args_origin).await? + } + } } }; diff --git a/src/lib.rs b/src/lib.rs index 59d7f51..f6555e9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,12 +12,14 @@ //! //! ```rust,no_run //! use bcore_mutation::mutation; +//! use bcore_mutation::project::Project; //! use std::collections::HashMap; //! //! #[tokio::main] //! async fn main() -> Result<(), Box> { //! // Generate mutants for a specific file with AST filtering //! mutation::run_mutation( +//! Project::BitcoinCore, // project //! None, // PR number //! Some("src/test.cpp".into()), // file path //! false, // one_mutant @@ -37,12 +39,14 @@ pub mod analyze; pub mod ast_analysis; +pub mod commands; pub mod coverage; pub mod db; pub mod error; pub mod git_changes; pub mod mutation; pub mod operators; +pub mod project; pub mod report; pub use error::{MutationError, Result}; diff --git a/src/main.rs b/src/main.rs index 9a5fcc2..31deb05 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,15 +4,18 @@ use std::path::PathBuf; mod analyze; mod ast_analysis; +mod commands; mod coverage; mod db; mod error; mod git_changes; mod mutation; mod operators; +mod project; mod report; use error::{MutationError, Result}; +use project::Project; #[derive(Parser)] #[command(name = "bcore-mutation")] @@ -24,9 +27,13 @@ struct Cli { #[derive(Subcommand)] enum Commands { - /// Create mutants for a specific Bitcoin Core PR or file + /// Create mutants for a specific PR or file Mutate { - /// Bitcoin Core's PR number (0 = current branch) + /// Project to mutate (bitcoin-core or secp256k1) + #[arg(long, value_enum, default_value_t = Project::default())] + project: Project, + + /// PR number (0 = current branch) #[arg(short, long, default_value = "0")] pr: u32, @@ -72,6 +79,10 @@ enum Commands { }, /// Analyze mutants Analyze { + /// Project being analyzed (bitcoin-core or secp256k1) + #[arg(long, value_enum, default_value_t = Project::default())] + project: Project, + /// Folder with the mutants #[arg(short, long)] folder: Option, @@ -116,6 +127,7 @@ async fn main() -> Result<()> { match cli.command { Commands::Mutate { + project, pr, test_only, cov, @@ -167,6 +179,7 @@ async fn main() -> Result<()> { } mutation::run_mutation( + project, if pr == 0 { None } else { Some(pr) }, file, one_mutant, @@ -182,6 +195,7 @@ async fn main() -> Result<()> { .await?; } Commands::Analyze { + project, folder, timeout, jobs, @@ -204,8 +218,19 @@ async fn main() -> Result<()> { )); } - analyze::run_analysis(folder, command, jobs, timeout, survival_threshold, sqlite, run_id, file_path, survivors_only) - .await?; + analyze::run_analysis( + project, + folder, + command, + jobs, + timeout, + survival_threshold, + sqlite, + run_id, + file_path, + survivors_only, + ) + .await?; } } diff --git a/src/mutation.rs b/src/mutation.rs index 7d852d6..144a73f 100644 --- a/src/mutation.rs +++ b/src/mutation.rs @@ -2,11 +2,8 @@ use crate::ast_analysis::{filter_mutatable_lines, AridNodeDetector}; use crate::db::{compute_patch_hash, generate_diff, Database, MutantData}; use crate::error::{MutationError, Result}; use crate::git_changes::{get_changed_files, get_commit_hash, get_lines_touched}; -use crate::operators::{ - get_do_not_mutate_patterns, get_do_not_mutate_py_patterns, get_do_not_mutate_unit_patterns, - get_regex_operators, get_security_operators, get_skip_if_contain_patterns, get_test_operators, - should_mutate_test_line, -}; +use crate::operators::{self, OperatorSet}; +use crate::project::Project; use regex::Regex; use std::collections::HashMap; use std::fs; @@ -29,6 +26,7 @@ fn build_config_json(range_lines: Option<(usize, usize)>) -> Option { } pub async fn run_mutation( + project: Project, pr_number: Option, file: Option, one_mutant: bool, @@ -47,8 +45,10 @@ pub async fn run_mutation( let db = Database::open(path)?; db.ensure_schema()?; db.seed_projects()?; - let project_id = db.get_bitcoin_core_project_id()?; - let commit_hash = get_commit_hash().await.unwrap_or_else(|_| "unknown".to_string()); + let project_id = db.get_project_id(project.db_name())?; + let commit_hash = get_commit_hash() + .await + .unwrap_or_else(|_| "unknown".to_string()); let tool_version = env!("CARGO_PKG_VERSION"); let config_json = build_config_json(range_lines); let run_id = db.create_run( @@ -62,6 +62,8 @@ pub async fn run_mutation( db_and_run = Some((db, run_id)); } + let operator_set = operators::for_project(project); + let mut all_mutants: Vec = Vec::new(); if let Some(file_path) = file { @@ -80,11 +82,12 @@ pub async fn run_mutation( &skip_lines, enable_ast_filtering, custom_expert_rule, + operator_set.as_ref(), ) .await?; all_mutants.extend(mutants); } else { - let files_changed = get_changed_files(pr_number).await?; + let files_changed = get_changed_files(pr_number, project).await?; let mut files_to_mutate = Vec::new(); for file_changed in files_changed { @@ -101,7 +104,7 @@ pub async fn run_mutation( continue; } - let lines_touched = get_lines_touched(&file_changed).await?; + let lines_touched = get_lines_touched(&file_changed, project).await?; let is_unit_test = file_changed.contains("test") && !file_changed.contains(".py") && !file_changed.contains("util"); @@ -130,6 +133,7 @@ pub async fn run_mutation( &skip_lines, enable_ast_filtering, custom_expert_rule.clone(), + operator_set.as_ref(), ) .await?; all_mutants.extend(mutants); @@ -165,6 +169,7 @@ pub async fn mutate_file( skip_lines: &HashMap>, enable_ast_filtering: bool, custom_expert_rule: Option, + operator_set: &dyn OperatorSet, ) -> Result> { println!("\n\nGenerating mutants for {}...", file_to_mutate); @@ -218,13 +223,13 @@ pub async fn mutate_file( // Select operators based on file type and options let operators = if only_security_mutations { println!("Using security operators"); - get_security_operators()? + operator_set.security_operators()? } else if file_to_mutate.contains(".py") || is_unit_test { println!("Using test operators (Python or unit test file)"); - get_test_operators()? + operator_set.test_operators()? } else { println!("Using regex operators"); - get_regex_operators()? + operator_set.regex_operators()? }; println!("Loaded {} operators", operators.len()); @@ -295,7 +300,12 @@ pub async fn mutate_file( let line_before_mutation = lines[line_idx]; // Check if line should be skipped (traditional approach) - if should_skip_line(line_before_mutation, file_to_mutate, is_unit_test)? { + if should_skip_line( + line_before_mutation, + file_to_mutate, + is_unit_test, + operator_set, + )? { continue; } @@ -304,7 +314,7 @@ pub async fn mutate_file( for operator in &operators { // Special handling for test operators if file_to_mutate.contains(".py") || is_unit_test { - if !should_mutate_test_line(line_before_mutation) { + if !operator_set.should_mutate_test_line(line_before_mutation) { continue; } } @@ -381,18 +391,23 @@ pub async fn mutate_file( Ok(collected) } -fn should_skip_line(line: &str, file_path: &str, is_unit_test: bool) -> Result { +fn should_skip_line( + line: &str, + file_path: &str, + is_unit_test: bool, + operator_set: &dyn OperatorSet, +) -> Result { let trimmed = line.trim_start(); // Check basic patterns to skip - for pattern in get_do_not_mutate_patterns() { + for pattern in operator_set.do_not_mutate_patterns() { if trimmed.starts_with(pattern) { return Ok(true); } } // Check skip if contain patterns - for pattern in get_skip_if_contain_patterns() { + for pattern in operator_set.skip_if_contain_patterns() { if line.contains(pattern) { return Ok(true); } @@ -401,9 +416,9 @@ fn should_skip_line(line: &str, file_path: &str, is_unit_test: bool) -> Result String { let parent_str = parent.to_str().unwrap_or(""); // Remove "src/" prefix if it exists - let without_src = parent_str.strip_prefix("src/") + let without_src = parent_str + .strip_prefix("src/") .or_else(|| parent_str.strip_prefix("src")) .unwrap_or(parent_str); @@ -486,7 +502,12 @@ fn write_mutation( let folder = if let Some(pr) = pr_number { format!("muts-pr-{}-{}-{}", pr, file_name.replace('/', "-"), ext) } else if let Some(range) = range_lines { - format!("muts-pr-{}-{}-{}", file_name.replace('/', "-"), range.0, range.1) + format!( + "muts-pr-{}-{}-{}", + file_name.replace('/', "-"), + range.0, + range.1 + ) } else { format!("muts-{}-{}", file_name.replace('/', "-"), ext) }; @@ -522,15 +543,18 @@ mod tests { #[test] fn test_should_skip_line() { + let ops = operators::for_project(Project::BitcoinCore); + let ops = ops.as_ref(); + // Test basic skip patterns - assert!(should_skip_line("// This is a comment", "test.cpp", false).unwrap()); - assert!(should_skip_line("assert(condition);", "test.cpp", false).unwrap()); - assert!(should_skip_line("LogPrintf(\"test\");", "test.cpp", false).unwrap()); - assert!(should_skip_line("LogDebug(\"test\");", "test.cpp", false).unwrap()); + assert!(should_skip_line("// This is a comment", "test.cpp", false, ops).unwrap()); + assert!(should_skip_line("assert(condition);", "test.cpp", false, ops).unwrap()); + assert!(should_skip_line("LogPrintf(\"test\");", "test.cpp", false, ops).unwrap()); + assert!(should_skip_line("LogDebug(\"test\");", "test.cpp", false, ops).unwrap()); // Test normal lines that shouldn't be skipped - assert!(!should_skip_line("int x = 5;", "test.cpp", false).unwrap()); - assert!(!should_skip_line("return value;", "test.cpp", false).unwrap()); + assert!(!should_skip_line("int x = 5;", "test.cpp", false, ops).unwrap()); + assert!(!should_skip_line("return value;", "test.cpp", false, ops).unwrap()); } #[test] diff --git a/src/operators.rs b/src/operators.rs deleted file mode 100644 index 6a62f18..0000000 --- a/src/operators.rs +++ /dev/null @@ -1,308 +0,0 @@ -use regex::Regex; -#[derive(Debug, Clone)] -pub struct MutationOperator { - pub pattern: Regex, - pub replacement: String, -} - -impl MutationOperator { - pub fn new(pattern: &str, replacement: &str) -> Result { - Ok(MutationOperator { - pattern: Regex::new(pattern)?, - replacement: replacement.to_string(), - }) - } -} - -pub fn get_regex_operators() -> Result, regex::Error> { - let operators = vec![ - (r"--(\b\w+\b)", r"++$1"), - (r"(\b\w+\b)--", r"$1++"), - //(r"CAmount\s+(\w+)\s*=\s*([0-9]+)", r"CAmount $1 = $2 + 1"), - //(r"CAmount\s+(\w+)\s*=\s*([0-9]+)", r"CAmount $1 = $2 - 1"), - ("Misbehaving", "//Misbehaving"), - ("continue", "break"), - ("break", "continue"), - ("std::all_of", "std::any_of"), - ("std::any_of", "std::all_of"), - ("std::min", "std::max"), - ("std::max", "std::min"), - ("std::begin", "std::end"), - ("std::end", "std::begin"), - ("true", "false"), - ("false", "true"), - (r" / ", " * "), - // Boundary (off-by-one) mutations first — hardest to kill - (r" >= ", " > "), - (r" <= ", " < "), - (r" > ", " >= "), - (r" < ", " <= "), - // Direction flips — easier to detect - (r" >= ", " <= "), - (r" <= ", " >= "), - (r" > ", " < "), - (r" < ", " > "), - // Cross-boundary - (r" > ", " <= "), - (r" < ", " >= "), - (r"&&", "||"), - (r"\|\|", "&&"), - (r" == ", " != "), - (r" != ", " == "), - (" - ", " + "), - (r" \+ ", " - "), - (r" \+ ", " * "), - (r" \+ ", " / "), - (r"\((-?\d+)\)", r"($1 - 1)"), - (r"\((-?\d+)\)", r"($1 + 1)"), - (r"\b(if|else\s+if|while)\s*\(([^()]*)\)", r"$1 (1==1)"), - (r"\b(if|else\s+if|while)\s*\(([^()]*)\)", r"$1 (1==0)"), - (r".*\berase\(.+", ""), - (r"^\s*[a-zA-Z_]\w*(?:::[a-zA-Z_]\w*)*(?:(?:->|\.)[a-zA-Z_]\w*)*\s*\([^;]*\)\s*;$", ""), - (r"^.*if\s*\(.*\)\s*continue;.*$", ""), - (r"^.*if\s*\(.*\)\s*return;.*$", ""), - (r"^.*if\s*\(.*\)\s*return.*;.*$", ""), - (r"^(.*for\s*\(.*;.*;.*\)\s*\{.*)$", r"$1break;"), - (r"^(.*while\s*\(.*\)\s*\{.*)$", r"$1break;"), - /* Seems they're unproductive - ( - r"\b(int64_t|uint64_t|int32_t|uint32_t)\s+(\w+)\s*=\s*(.*?);$", - r"$1 $2 = ($3) + 1;", - ), - ( - r"\b(int64_t|uint64_t|int32_t|uint32_t)\s+(\w+)\s*=\s*(.*?);$", - r"$1 $2 = ($3) - 1;", - ), - ( - r"static\s+const\s+size_t\s+(\w+)\s*=\s*([^;]+);", - r"static const size_t $1 = $2 - 1;", - ), - ( - r"static\s+const\s+size_t\s+(\w+)\s*=\s*([^;]+);", - r"static const size_t $1 = $2 + 1;", - ), - //(r"NodeClock::now\(\)", r"NodeClock::now() - 1"), - //(r"NodeClock::now\(\)", r"NodeClock::now() + 1"),*/ - ]; - - operators - .into_iter() - .map(|(pattern, replacement)| MutationOperator::new(pattern, replacement)) - .collect() -} - -pub fn get_security_operators() -> Result, regex::Error> { - let operators = vec![ - ("==", "="), - (r" - ", " + "), - (r"\s\+\s", "-"), - ( - r"std::array<\s*([\w:]+)\s*,\s*(\d+)\s*>", - r"std::array<$1, $2 - 2>", - ), - ( - r"\b((?:int16_t|uint16_t|int32_t|uint32_t|int64_t|uint64_t|int)\s*[\(\{])([^\)\}]*)[\)\}]", - "$2", - ), - (r"ignore\((\s*(\d+)\s*)\)", r"ignore($2 + 100)"), - (r"(\w+)\[(\w+)\]", r"$1[$2 + 5]"), - ( - r"^\s*(?:\(void\)\s*)?[a-zA-Z_][\w:]*\s*\([\w\s,]*\)\s*;\s*$", - "", - ), - (r"if\s*\(\s*(.*?)\s*\|\|\s*(.*?)\s*\)", r"if($2||$1)"), - ( - r"GetSelectionAmount\(\)", - r"GetSelectionAmount() + std::numeric_limits::max() - 1", - ), - (r"resetBlock\(\);", ""), - ( - r"\w+(\.|->)GetMedianTimePast\(\)", - "std::numeric_limits::max()", - ), - ("break", ""), - ]; - - operators - .into_iter() - .map(|(pattern, replacement)| MutationOperator::new(pattern, replacement)) - .collect() -} - -pub fn get_test_operators() -> Result, regex::Error> { - // Instead of using negative lookahead, we'll use a simpler approach - // This will match function calls but we'll filter out assert functions in the application logic - let operators = vec![ - (r"^\s*(?:\w+(?:\.|->|::))*(\w+)\s*\([^)]*\)\s*;?\s*$", ""), // Function calls (will be filtered by skip logic) - ]; - - operators - .into_iter() - .map(|(pattern, replacement)| MutationOperator::new(pattern, replacement)) - .collect() -} - -pub fn get_do_not_mutate_patterns() -> Vec<&'static str> { - vec![ - "/", - "//", - "#", - "*", - "assert", - "self.log", - "Assume", - "CHECK_NONFATAL", - "/*", - "LogPrintf", - "LogPrint", - "LogDebug", - "strprintf", - "G_FUZZING", - // no-op for FindAndDelete - "if (nFound > 0)", - ] -} - -pub fn get_do_not_mutate_py_patterns() -> Vec<&'static str> { - vec![ - "wait_for", - "wait_until", - "check_", - "for", - "expected_error", - "def", - "send_and_ping", - "test_", - "rehash", - "start_", - "solve()", - "restart_", - "stop_", - "connect_", - "sync_", - "class", - "return", - "generate(", - "continue", - "sleep", - "break", - "getcontext().prec", - "if", - "else", - "assert", - ] -} - -pub fn get_do_not_mutate_unit_patterns() -> Vec<&'static str> { - vec![ - "while", - "for", - "if", - "test_", - "_test", - "reset", - "class", - "return", - "continue", - "break", - "else", - "reserve", - "resize", - "static", - "void", - "BOOST_", - "LOCK(", - "LOCK2(", - "Test", - "Assert", - "EXCLUSIVE_LOCKS_REQUIRED", - "catch", - ] -} - -pub fn get_skip_if_contain_patterns() -> Vec<&'static str> { - vec!["EnableFuzzDeterminism", "nLostUnk", "RPCArg::Type::"] -} - -// Helper function to check if a line should be mutated by test operators -// This replaces the negative lookahead functionality -pub fn should_mutate_test_line(line: &str) -> bool { - let line_trimmed = line.trim(); - - // Don't mutate lines that start with assert or other test-specific patterns - let skip_patterns = vec![ - "assert", - "BOOST_", - "EXPECT_", - "ASSERT_", - "CHECK_", - "REQUIRE_", - "wait_for", - "wait_until", - "send_and_ping", - ]; - - for pattern in skip_patterns { - if line_trimmed.starts_with(pattern) { - return false; - } - } - - // Only mutate if it looks like a function call - let function_call_pattern = - Regex::new(r"^\s*(?:\w+(?:\.|->|::))*(\w+)\s*\([^)]*\)\s*;?\s*$").unwrap(); - function_call_pattern.is_match(line) -} - -#[cfg(test)] -mod tests { - use super::*; - - fn generic_call_deletion_op() -> MutationOperator { - MutationOperator::new( - r"^\s*[a-zA-Z_]\w*(?:::[a-zA-Z_]\w*)*(?:(?:->|\.)[a-zA-Z_]\w*)*\s*\([^;]*\)\s*;$", - "", - ) - .unwrap() - } - - #[test] - fn test_generic_call_deletion_matches_free_function() { - let op = generic_call_deletion_op(); - assert!(op.pattern.is_match(" Foo(arg1, arg2);")); - assert!(op.pattern.is_match("DoSomething();")); - } - - #[test] - fn test_generic_call_deletion_matches_dot_member_call() { - let op = generic_call_deletion_op(); - assert!(op.pattern.is_match(" obj.Method(arg);")); - assert!(op.pattern.is_match("obj.Method();")); - } - - #[test] - fn test_generic_call_deletion_matches_arrow_member_call() { - let op = generic_call_deletion_op(); - assert!(op.pattern.is_match(" ptr->Method(arg);")); - assert!(op.pattern.is_match("ptr->Method();")); - } - - #[test] - fn test_generic_call_deletion_matches_namespaced_call() { - let op = generic_call_deletion_op(); - assert!(op.pattern.is_match(" Namespace::Function(arg);")); - assert!(op.pattern.is_match("ns::Foo();")); - } - - #[test] - fn test_generic_call_deletion_ignores_control_flow_and_keywords() { - let op = generic_call_deletion_op(); - assert!(!op.pattern.is_match(" if (condition) {")); - assert!(!op.pattern.is_match(" while (x > 0) {")); - assert!(!op.pattern.is_match(" for (int i = 0; i < n; i++) {")); - assert!(!op.pattern.is_match(" switch (value) {")); - assert!(!op.pattern.is_match(" return Foo();")); - assert!(!op.pattern.is_match(" delete ptr;")); - assert!(!op.pattern.is_match(" throw std::runtime_error(\"err\");")); - } -} diff --git a/src/operators/bitcoin_core.rs b/src/operators/bitcoin_core.rs new file mode 100644 index 0000000..586643d --- /dev/null +++ b/src/operators/bitcoin_core.rs @@ -0,0 +1,231 @@ +//! Bitcoin Core operator set. +//! +//! These lists are kept verbatim from the original `operators.rs` so the +//! refactor introduces no behavioural change for Bitcoin Core. The operator +//! order is intentionally tuned (boundary mutations first, etc.). Over time the +//! generic entries here could be migrated onto [`super::common`], but only with +//! care to preserve ordering. + +use super::{build, MutationOperator, OperatorSet}; + +pub struct BitcoinCore; + +impl OperatorSet for BitcoinCore { + fn regex_operators(&self) -> Result, regex::Error> { + let operators = vec![ + (r"--(\b\w+\b)", r"++$1"), + (r"(\b\w+\b)--", r"$1++"), + //(r"CAmount\s+(\w+)\s*=\s*([0-9]+)", r"CAmount $1 = $2 + 1"), + //(r"CAmount\s+(\w+)\s*=\s*([0-9]+)", r"CAmount $1 = $2 - 1"), + ("Misbehaving", "//Misbehaving"), + ("continue", "break"), + ("break", "continue"), + ("std::all_of", "std::any_of"), + ("std::any_of", "std::all_of"), + ("std::min", "std::max"), + ("std::max", "std::min"), + ("std::begin", "std::end"), + ("std::end", "std::begin"), + ("true", "false"), + ("false", "true"), + (r" / ", " * "), + // Boundary (off-by-one) mutations first — hardest to kill + (r" >= ", " > "), + (r" <= ", " < "), + (r" > ", " >= "), + (r" < ", " <= "), + // Direction flips — easier to detect + (r" >= ", " <= "), + (r" <= ", " >= "), + (r" > ", " < "), + (r" < ", " > "), + // Cross-boundary + (r" > ", " <= "), + (r" < ", " >= "), + (r"&&", "||"), + (r"\|\|", "&&"), + (r" == ", " != "), + (r" != ", " == "), + (" - ", " + "), + (r" \+ ", " - "), + (r" \+ ", " * "), + (r" \+ ", " / "), + (r"\((-?\d+)\)", r"($1 - 1)"), + (r"\((-?\d+)\)", r"($1 + 1)"), + (r"\b(if|else\s+if|while)\s*\(([^()]*)\)", r"$1 (1==1)"), + (r"\b(if|else\s+if|while)\s*\(([^()]*)\)", r"$1 (1==0)"), + (r".*\berase\(.+", ""), + ( + r"^\s*[a-zA-Z_]\w*(?:::[a-zA-Z_]\w*)*(?:(?:->|\.)[a-zA-Z_]\w*)*\s*\([^;]*\)\s*;$", + "", + ), + (r"^.*if\s*\(.*\)\s*continue;.*$", ""), + (r"^.*if\s*\(.*\)\s*return;.*$", ""), + (r"^.*if\s*\(.*\)\s*return.*;.*$", ""), + (r"^(.*for\s*\(.*;.*;.*\)\s*\{.*)$", r"$1break;"), + (r"^(.*while\s*\(.*\)\s*\{.*)$", r"$1break;"), + /* Seems they're unproductive + ( + r"\b(int64_t|uint64_t|int32_t|uint32_t)\s+(\w+)\s*=\s*(.*?);$", + r"$1 $2 = ($3) + 1;", + ), + ( + r"\b(int64_t|uint64_t|int32_t|uint32_t)\s+(\w+)\s*=\s*(.*?);$", + r"$1 $2 = ($3) - 1;", + ), + ( + r"static\s+const\s+size_t\s+(\w+)\s*=\s*([^;]+);", + r"static const size_t $1 = $2 - 1;", + ), + ( + r"static\s+const\s+size_t\s+(\w+)\s*=\s*([^;]+);", + r"static const size_t $1 = $2 + 1;", + ), + //(r"NodeClock::now\(\)", r"NodeClock::now() - 1"), + //(r"NodeClock::now\(\)", r"NodeClock::now() + 1"),*/ + ]; + + build(operators) + } + + fn security_operators(&self) -> Result, regex::Error> { + let operators = vec![ + ("==", "="), + (r" - ", " + "), + (r"\s\+\s", "-"), + ( + r"std::array<\s*([\w:]+)\s*,\s*(\d+)\s*>", + r"std::array<$1, $2 - 2>", + ), + ( + r"\b((?:int16_t|uint16_t|int32_t|uint32_t|int64_t|uint64_t|int)\s*[\(\{])([^\)\}]*)[\)\}]", + "$2", + ), + (r"ignore\((\s*(\d+)\s*)\)", r"ignore($2 + 100)"), + (r"(\w+)\[(\w+)\]", r"$1[$2 + 5]"), + ( + r"^\s*(?:\(void\)\s*)?[a-zA-Z_][\w:]*\s*\([\w\s,]*\)\s*;\s*$", + "", + ), + (r"if\s*\(\s*(.*?)\s*\|\|\s*(.*?)\s*\)", r"if($2||$1)"), + ( + r"GetSelectionAmount\(\)", + r"GetSelectionAmount() + std::numeric_limits::max() - 1", + ), + (r"resetBlock\(\);", ""), + ( + r"\w+(\.|->)GetMedianTimePast\(\)", + "std::numeric_limits::max()", + ), + ("break", ""), + ]; + + build(operators) + } + + fn test_operators(&self) -> Result, regex::Error> { + // Instead of using negative lookahead, we'll use a simpler approach + // This will match function calls but we'll filter out assert functions in the application logic + let operators = vec![ + (r"^\s*(?:\w+(?:\.|->|::))*(\w+)\s*\([^)]*\)\s*;?\s*$", ""), // Function calls (will be filtered by skip logic) + ]; + + build(operators) + } + + fn do_not_mutate_patterns(&self) -> Vec<&'static str> { + vec![ + "/", + "//", + "#", + "*", + "assert", + "self.log", + "Assume", + "CHECK_NONFATAL", + "/*", + "LogPrintf", + "LogPrint", + "LogDebug", + "strprintf", + "G_FUZZING", + // no-op for FindAndDelete + "if (nFound > 0)", + ] + } + + fn do_not_mutate_py_patterns(&self) -> Vec<&'static str> { + vec![ + "wait_for", + "wait_until", + "check_", + "for", + "expected_error", + "def", + "send_and_ping", + "test_", + "rehash", + "start_", + "solve()", + "restart_", + "stop_", + "connect_", + "sync_", + "class", + "return", + "generate(", + "continue", + "sleep", + "break", + "getcontext().prec", + "if", + "else", + "assert", + ] + } + + fn do_not_mutate_unit_patterns(&self) -> Vec<&'static str> { + vec![ + "while", + "for", + "if", + "test_", + "_test", + "reset", + "class", + "return", + "continue", + "break", + "else", + "reserve", + "resize", + "static", + "void", + "BOOST_", + "LOCK(", + "LOCK2(", + "Test", + "Assert", + "EXCLUSIVE_LOCKS_REQUIRED", + "catch", + ] + } + + fn skip_if_contain_patterns(&self) -> Vec<&'static str> { + vec!["EnableFuzzDeterminism", "nLostUnk", "RPCArg::Type::"] + } + + fn test_line_skip_prefixes(&self) -> Vec<&'static str> { + vec![ + "assert", + "BOOST_", + "EXPECT_", + "ASSERT_", + "CHECK_", + "REQUIRE_", + "wait_for", + "wait_until", + "send_and_ping", + ] + } +} diff --git a/src/operators/common.rs b/src/operators/common.rs new file mode 100644 index 0000000..51b5424 --- /dev/null +++ b/src/operators/common.rs @@ -0,0 +1,86 @@ +//! Language-level mutation operators and skip rules shared across C-family +//! projects. These contain no project-specific identifiers (no `LogPrintf`, +//! `BOOST_`, RPC types, etc.), so they are safe to reuse for any C or C++ +//! codebase. Project modules compose these with their own additions. +//! +//! Operators are returned as `(pattern, replacement)` pairs and compiled by +//! [`super::build`]. Order is significant: boundary (off-by-one) mutations are +//! listed before direction flips because they are harder to kill. + +/// Generic arithmetic, relational, boolean and control-flow operators. +pub(crate) fn regex_operators() -> Vec<(&'static str, &'static str)> { + vec![ + (r"--(\b\w+\b)", r"++$1"), + (r"(\b\w+\b)--", r"$1++"), + ("continue", "break"), + ("break", "continue"), + ("true", "false"), + ("false", "true"), + (r" / ", " * "), + // Boundary (off-by-one) mutations first — hardest to kill + (r" >= ", " > "), + (r" <= ", " < "), + (r" > ", " >= "), + (r" < ", " <= "), + // Direction flips — easier to detect + (r" >= ", " <= "), + (r" <= ", " >= "), + (r" > ", " < "), + (r" < ", " > "), + // Cross-boundary + (r" > ", " <= "), + (r" < ", " >= "), + (r"&&", "||"), + (r"\|\|", "&&"), + (r" == ", " != "), + (r" != ", " == "), + (" - ", " + "), + (r" \+ ", " - "), + (r" \+ ", " * "), + (r" \+ ", " / "), + (r"\((-?\d+)\)", r"($1 - 1)"), + (r"\((-?\d+)\)", r"($1 + 1)"), + (r"\b(if|else\s+if|while)\s*\(([^()]*)\)", r"$1 (1==1)"), + (r"\b(if|else\s+if|while)\s*\(([^()]*)\)", r"$1 (1==0)"), + ( + r"^\s*[a-zA-Z_]\w*(?:::[a-zA-Z_]\w*)*(?:(?:->|\.)[a-zA-Z_]\w*)*\s*\([^;]*\)\s*;$", + "", + ), + (r"^.*if\s*\(.*\)\s*continue;.*$", ""), + (r"^.*if\s*\(.*\)\s*return;.*$", ""), + (r"^.*if\s*\(.*\)\s*return.*;.*$", ""), + (r"^(.*for\s*\(.*;.*;.*\)\s*\{.*)$", r"$1break;"), + (r"^(.*while\s*\(.*\)\s*\{.*)$", r"$1break;"), + ] +} + +/// Generic security/fuzzing-oriented operators (no project-specific symbols). +pub(crate) fn security_operators() -> Vec<(&'static str, &'static str)> { + vec![ + ("==", "="), + (r" - ", " + "), + (r"\s\+\s", "-"), + ( + r"\b((?:int16_t|uint16_t|int32_t|uint32_t|int64_t|uint64_t|int)\s*[\(\{])([^\)\}]*)[\)\}]", + "$2", + ), + (r"ignore\((\s*(\d+)\s*)\)", r"ignore($2 + 100)"), + (r"(\w+)\[(\w+)\]", r"$1[$2 + 5]"), + ( + r"^\s*(?:\(void\)\s*)?[a-zA-Z_][\w:]*\s*\([\w\s,]*\)\s*;\s*$", + "", + ), + (r"if\s*\(\s*(.*?)\s*\|\|\s*(.*?)\s*\)", r"if($2||$1)"), + ] +} + +/// Generic test operator: delete a standalone function call. +pub(crate) fn test_operators() -> Vec<(&'static str, &'static str)> { + vec![(r"^\s*(?:\w+(?:\.|->|::))*(\w+)\s*\([^)]*\)\s*;?\s*$", "")] +} + +/// Comment and assertion prefixes that should never be mutated, regardless of +/// project. Project modules extend this with their own guard macros. +pub(crate) fn do_not_mutate_patterns() -> Vec<&'static str> { + vec!["/", "//", "#", "*", "/*", "assert"] +} diff --git a/src/operators/mod.rs b/src/operators/mod.rs new file mode 100644 index 0000000..1725f7c --- /dev/null +++ b/src/operators/mod.rs @@ -0,0 +1,159 @@ +use regex::Regex; + +use crate::project::Project; + +mod bitcoin_core; +mod common; +mod secp256k1; + +pub use bitcoin_core::BitcoinCore; +pub use secp256k1::Secp256k1; + +#[derive(Debug, Clone)] +pub struct MutationOperator { + pub pattern: Regex, + pub replacement: String, +} + +impl MutationOperator { + pub fn new(pattern: &str, replacement: &str) -> Result { + Ok(MutationOperator { + pattern: Regex::new(pattern)?, + replacement: replacement.to_string(), + }) + } +} + +/// Compile a list of `(pattern, replacement)` pairs into mutation operators, +/// preserving order. Used by the per-project [`OperatorSet`] implementations. +pub(crate) fn build(pairs: Vec<(&str, &str)>) -> Result, regex::Error> { + pairs + .into_iter() + .map(|(pattern, replacement)| MutationOperator::new(pattern, replacement)) + .collect() +} + +/// The mutation operators and skip rules for a single project. +/// +/// Each project (Bitcoin Core, secp256k1, …) provides its own operators and +/// "do not mutate" lists. Implementations typically compose the language-level +/// operators in [`common`] with project-specific additions. +pub trait OperatorSet { + /// Operators applied to general (non-test) source files. + fn regex_operators(&self) -> Result, regex::Error>; + + /// Operators applied when `--only-security-mutations` is set. + fn security_operators(&self) -> Result, regex::Error>; + + /// Operators applied to test files (unit and, where applicable, functional). + fn test_operators(&self) -> Result, regex::Error>; + + /// Line prefixes that disable mutation of a line entirely. + fn do_not_mutate_patterns(&self) -> Vec<&'static str>; + + /// Substrings that disable mutation of a Python test line. + fn do_not_mutate_py_patterns(&self) -> Vec<&'static str>; + + /// Substrings that disable mutation of a (C/C++) unit-test line. + fn do_not_mutate_unit_patterns(&self) -> Vec<&'static str>; + + /// Substrings that disable mutation of any line when contained. + fn skip_if_contain_patterns(&self) -> Vec<&'static str>; + + /// Prefixes that mark a test line as not worth mutating (asserts, helpers). + /// Consumed by the default [`OperatorSet::should_mutate_test_line`]. + fn test_line_skip_prefixes(&self) -> Vec<&'static str>; + + /// Whether a test line should be mutated by [`OperatorSet::test_operators`]. + /// + /// Default behaviour: skip lines starting with any + /// [`OperatorSet::test_line_skip_prefixes`], then only mutate lines that + /// look like a standalone function call. + fn should_mutate_test_line(&self, line: &str) -> bool { + let trimmed = line.trim(); + + for pattern in self.test_line_skip_prefixes() { + if trimmed.starts_with(pattern) { + return false; + } + } + + // Only mutate if it looks like a function call. + let function_call_pattern = + Regex::new(r"^\s*(?:\w+(?:\.|->|::))*(\w+)\s*\([^)]*\)\s*;?\s*$").unwrap(); + function_call_pattern.is_match(line) + } +} + +/// Return the [`OperatorSet`] for the given project. +pub fn for_project(project: Project) -> Box { + match project { + Project::BitcoinCore => Box::new(BitcoinCore), + Project::Secp256k1 => Box::new(Secp256k1), + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn generic_call_deletion_op() -> MutationOperator { + MutationOperator::new( + r"^\s*[a-zA-Z_]\w*(?:::[a-zA-Z_]\w*)*(?:(?:->|\.)[a-zA-Z_]\w*)*\s*\([^;]*\)\s*;$", + "", + ) + .unwrap() + } + + #[test] + fn test_generic_call_deletion_matches_free_function() { + let op = generic_call_deletion_op(); + assert!(op.pattern.is_match(" Foo(arg1, arg2);")); + assert!(op.pattern.is_match("DoSomething();")); + } + + #[test] + fn test_generic_call_deletion_matches_dot_member_call() { + let op = generic_call_deletion_op(); + assert!(op.pattern.is_match(" obj.Method(arg);")); + assert!(op.pattern.is_match("obj.Method();")); + } + + #[test] + fn test_generic_call_deletion_matches_arrow_member_call() { + let op = generic_call_deletion_op(); + assert!(op.pattern.is_match(" ptr->Method(arg);")); + assert!(op.pattern.is_match("ptr->Method();")); + } + + #[test] + fn test_generic_call_deletion_matches_namespaced_call() { + let op = generic_call_deletion_op(); + assert!(op.pattern.is_match(" Namespace::Function(arg);")); + assert!(op.pattern.is_match("ns::Foo();")); + } + + #[test] + fn test_generic_call_deletion_ignores_control_flow_and_keywords() { + let op = generic_call_deletion_op(); + assert!(!op.pattern.is_match(" if (condition) {")); + assert!(!op.pattern.is_match(" while (x > 0) {")); + assert!(!op.pattern.is_match(" for (int i = 0; i < n; i++) {")); + assert!(!op.pattern.is_match(" switch (value) {")); + assert!(!op.pattern.is_match(" return Foo();")); + assert!(!op.pattern.is_match(" delete ptr;")); + assert!(!op + .pattern + .is_match(" throw std::runtime_error(\"err\");")); + } + + #[test] + fn test_for_project_returns_distinct_sets() { + // secp256k1 has no Python functional tests, so its Python skip list is empty; + // Bitcoin Core's is not. This is a cheap proxy for "the sets differ". + let btc = for_project(Project::BitcoinCore); + let secp = for_project(Project::Secp256k1); + assert!(!btc.do_not_mutate_py_patterns().is_empty()); + assert!(secp.do_not_mutate_py_patterns().is_empty()); + } +} diff --git a/src/operators/secp256k1.rs b/src/operators/secp256k1.rs new file mode 100644 index 0000000..44c0c4b --- /dev/null +++ b/src/operators/secp256k1.rs @@ -0,0 +1,79 @@ +//! secp256k1 operator set. +//! +//! secp256k1 is a pure C library with no Python functional tests and no Boost +//! unit tests — its tests are C programs (`tests.c`, `tests_exhaustive.c`, +//! `*_impl.h` test sections) driven by CTest. The operators here therefore +//! reuse the language-level [`super::common`] set and add C-specific guard +//! macros (`VERIFY_CHECK`, `ARG_CHECK`, `CHECK`, …) to the skip lists. +//! +//! These project-specific entries are a starting point and are expected to be +//! refined as the tool is exercised against secp256k1. + +use super::{build, common, MutationOperator, OperatorSet}; + +pub struct Secp256k1; + +impl OperatorSet for Secp256k1 { + fn regex_operators(&self) -> Result, regex::Error> { + build(common::regex_operators()) + } + + fn security_operators(&self) -> Result, regex::Error> { + build(common::security_operators()) + } + + fn test_operators(&self) -> Result, regex::Error> { + build(common::test_operators()) + } + + fn do_not_mutate_patterns(&self) -> Vec<&'static str> { + let mut patterns = common::do_not_mutate_patterns(); + // secp256k1 invariant/argument guards: mutating these produces + // unproductive or always-aborting mutants. + patterns.extend([ + "VERIFY_CHECK", + "VERIFY_SETUP", + "ARG_CHECK", + "ARG_CHECK_VOID", + "CHECK", + "secp256k1_fe_verify", + "secp256k1_ge_verify", + "secp256k1_gej_verify", + "secp256k1_scalar_verify", + ]); + patterns + } + + fn do_not_mutate_py_patterns(&self) -> Vec<&'static str> { + // secp256k1 has no Python functional test suite. + Vec::new() + } + + fn do_not_mutate_unit_patterns(&self) -> Vec<&'static str> { + vec![ + "while", + "for", + "if", + "else", + "return", + "continue", + "break", + "static", + "void", + // secp256k1 test harness helpers + "CHECK", + "VERIFY_CHECK", + "run_", + "test_", + "secp256k1_", + ] + } + + fn skip_if_contain_patterns(&self) -> Vec<&'static str> { + vec!["VERIFY_CHECK", "ARG_CHECK"] + } + + fn test_line_skip_prefixes(&self) -> Vec<&'static str> { + vec!["assert", "CHECK", "VERIFY_CHECK", "run_", "test_"] + } +} diff --git a/src/project.rs b/src/project.rs new file mode 100644 index 0000000..0660c6f --- /dev/null +++ b/src/project.rs @@ -0,0 +1,36 @@ +use clap::ValueEnum; + +/// A project that this tool can generate and analyze mutants for. +#[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum)] +pub enum Project { + /// Bitcoin Core (https://github.com/bitcoin/bitcoin) + #[value(name = "bitcoin-core")] + BitcoinCore, + /// libsecp256k1 (https://github.com/bitcoin-core/secp256k1) + #[value(name = "secp256k1")] + Secp256k1, +} + +impl Default for Project { + fn default() -> Self { + Project::BitcoinCore + } +} + +impl Project { + /// The upstream repository URL for this project. + pub fn repository_url(&self) -> &'static str { + match self { + Project::BitcoinCore => "https://github.com/bitcoin/bitcoin", + Project::Secp256k1 => "https://github.com/bitcoin-core/secp256k1", + } + } + + /// The name used for this project in the SQLite `projects` table. + pub fn db_name(&self) -> &'static str { + match self { + Project::BitcoinCore => "Bitcoin Core", + Project::Secp256k1 => "secp256k1", + } + } +} From f8907dd47175591da5eb7c1665489943f242f799 Mon Sep 17 00:00:00 2001 From: Bruno Garcia Date: Wed, 3 Jun 2026 11:07:29 -0300 Subject: [PATCH 2/5] analyze: add --min-score CI gate and build once in folder mode Add an optional --min-score flag to the analyze command: when the final mutation score (killed / total, aggregated across all analyzed folders) falls below it, analyze returns an error and exits non-zero, failing CI. When unset, the score is not enforced. Backed by a dedicated ScoreBelowThreshold error variant; the value is validated to be in [0,1]. Also hoist the one-time clean build out of analyze_folder into run_analysis so it runs once per invocation instead of once per mutant folder. Per-mutant incremental rebuilds still happen inside each test command. Guarded so it is skipped when --command is given or there are no folders to analyze. Co-Authored-By: Claude Opus 4.8 --- README.md | 11 +++++ src/analyze.rs | 122 +++++++++++++++++++++++++++++++++++++++++++++---- src/error.rs | 3 ++ src/main.rs | 15 ++++++ 4 files changed, 141 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 7d1a591..d3735f1 100644 --- a/README.md +++ b/README.md @@ -142,6 +142,7 @@ When `--sqlite` is used, the `mutate` command prints a `run_id` that you pass to | `--timeout SECONDS` | `-t` | `300` | Timeout in seconds for each mutant's test run. | | `--jobs N` | `-j` | `0` | Number of parallel jobs passed to the compiler (e.g. `make -j N`). `0` uses the system default. | | `--survival-threshold RATE` | | `0.75` | Maximum acceptable mutant survival rate (e.g. `0.3` = 30%). The run exits with an error if the threshold is exceeded. | +| `--min-score RATE` | | | CI gate: fail with a non-zero exit code if the final mutation score (killed / total) is below this value (e.g. `0.8` = 80%). Aggregated across all analyzed folders. When unset, the score is not enforced. | | `--surviving` | | | Only analyze mutants that survived a previous run. Requires `--run-id`. | ### Examples @@ -166,6 +167,16 @@ bcore-mutation analyze --sqlite --run-id=1 --surviving \ -c "cmake --build build && ./build/test/functional/wallet_test.py" ``` +**Fail CI when the mutation score drops below 80% (folder mode, no database):** +```bash +# 1. Generate mutants for the PR — writes muts-* folders to disk +bcore-mutation mutate --project secp256k1 --pr 1234 + +# 2. Analyze them and fail the job if the score is under 80%. +# With no --command, the built-in secp256k1 build/test commands are used. +bcore-mutation analyze --project secp256k1 --min-score 0.8 +``` + **Set a custom timeout and job count:** ```bash bcore-mutation analyze --sqlite --run-id=1 -t 120 -j 8 \ diff --git a/src/analyze.rs b/src/analyze.rs index 5721c35..d599777 100644 --- a/src/analyze.rs +++ b/src/analyze.rs @@ -11,6 +11,61 @@ use tokio::process::Command as TokioCommand; use tokio::time::timeout; use walkdir::WalkDir; +/// Killed/total counts produced by an analysis pass. Used to compute the +/// mutation score and apply the optional `--min-score` CI gate. +#[derive(Default, Clone, Copy)] +pub struct ScoreSummary { + pub killed: u64, + pub total: u64, +} + +impl ScoreSummary { + /// Mutation score as a fraction in `[0.0, 1.0]` (killed / total). + /// Returns `0.0` when no mutants were analyzed. + pub fn score(&self) -> f64 { + if self.total == 0 { + 0.0 + } else { + self.killed as f64 / self.total as f64 + } + } + + /// Accumulate another pass's counts (used to aggregate across folders). + fn add(&mut self, other: ScoreSummary) { + self.killed += other.killed; + self.total += other.total; + } +} + +/// Fail (return an error) when `min_score` is set and the achieved mutation +/// score is below it. This is the CI gate: an `Err` here propagates out of +/// `main` and exits the process with a non-zero status. +fn enforce_min_score(summary: ScoreSummary, min_score: Option) -> Result<()> { + let Some(min) = min_score else { + return Ok(()); + }; + + let score = summary.score(); + println!( + "\nOverall mutation score: {:.2}% ({}/{} killed); required minimum: {:.2}%", + score * 100.0, + summary.killed, + summary.total, + min * 100.0 + ); + + if score < min { + return Err(MutationError::ScoreBelowThreshold(format!( + "mutation score {:.2}% is below the required minimum of {:.2}%", + score * 100.0, + min * 100.0 + ))); + } + + println!("Mutation score meets the required minimum ✅"); + Ok(()) +} + pub async fn run_analysis( project: Project, folder: Option, @@ -18,6 +73,7 @@ pub async fn run_analysis( jobs: u32, timeout_secs: u64, survival_threshold: f64, + min_score: Option, sqlite_path: Option, run_id: Option, file_path: Option, @@ -35,7 +91,7 @@ pub async fn run_analysis( let db = Database::open(path)?; db.ensure_schema()?; db.seed_projects()?; - return run_db_analysis( + let summary = run_db_analysis( &db, rid, &command, @@ -43,7 +99,8 @@ pub async fn run_analysis( file_path.as_deref(), survivors_only, ) - .await; + .await?; + return enforce_min_score(summary, min_score); } // Folder-based analysis mode (existing behaviour). @@ -56,8 +113,16 @@ pub async fn run_analysis( let project_commands = commands::for_project(project); + // When we derive the test command ourselves (no --command), do the one-time + // clean build up front rather than once per folder. Each mutant still + // triggers an incremental rebuild inside its test command. + if command.is_none() && !folders.is_empty() { + run_build_command(project_commands.as_ref()).await?; + } + + let mut overall = ScoreSummary::default(); for folder_path in folders { - analyze_folder( + let summary = analyze_folder( &folder_path, command.clone(), jobs, @@ -66,9 +131,10 @@ pub async fn run_analysis( project_commands.as_ref(), ) .await?; + overall.add(summary); } - Ok(()) + enforce_min_score(overall, min_score) } /// Test all pending mutants in `run_id` from the database, optionally filtered by `file_path`. @@ -80,7 +146,7 @@ async fn run_db_analysis( timeout_secs: u64, file_path: Option<&str>, survivors_only: bool, -) -> Result<()> { +) -> Result { let mutants = db.get_mutants_for_run(run_id, file_path, survivors_only)?; let total = mutants.len(); @@ -163,7 +229,10 @@ async fn run_db_analysis( ); println!("Survived: {}", num_survived); - Ok(()) + Ok(ScoreSummary { + killed: num_killed, + total: total as u64, + }) } /// Apply a unified diff patch using `git apply`. @@ -217,7 +286,7 @@ pub async fn analyze_folder( timeout_secs: u64, survival_threshold: f64, project_commands: &dyn ProjectCommands, -) -> Result<()> { +) -> Result { let mut num_killed: u64 = 0; let mut not_killed = Vec::new(); @@ -226,11 +295,12 @@ pub async fn analyze_folder( let target_file_path = fs::read_to_string(original_file_path)?; let target_file_path = target_file_path.trim(); - // Setup command if not provided + // Derive the test command when one isn't provided. The clean build is done + // once by the caller (run_analysis); here we only build the per-file test + // command, whose incremental `cmake --build` picks up each mutant. let test_command = if let Some(cmd) = command { cmd } else { - run_build_command(project_commands).await?; project_commands.test_command(&target_file_path, jobs)? }; @@ -306,7 +376,10 @@ pub async fn analyze_folder( // Restore the original file restore_file(&target_file_path).await?; - Ok(()) + Ok(ScoreSummary { + killed: num_killed, + total: total_mutants as u64, + }) } async fn run_command(command: &str, timeout_secs: u64) -> Result { @@ -387,6 +460,35 @@ mod tests { use std::fs; use tempfile::tempdir; + #[test] + fn test_enforce_min_score() { + // No gate configured: always Ok regardless of score. + assert!(enforce_min_score(ScoreSummary { killed: 0, total: 10 }, None).is_ok()); + + // Above threshold passes. + assert!(enforce_min_score(ScoreSummary { killed: 9, total: 10 }, Some(0.8)).is_ok()); + + // Exactly at threshold passes (gate uses `<`, so >= passes). + assert!(enforce_min_score(ScoreSummary { killed: 8, total: 10 }, Some(0.8)).is_ok()); + + // Below threshold fails with the dedicated error variant. + let result = enforce_min_score(ScoreSummary { killed: 5, total: 10 }, Some(0.8)); + assert!(matches!( + result, + Err(MutationError::ScoreBelowThreshold(_)) + )); + } + + #[test] + fn test_score_summary_aggregates() { + let mut overall = ScoreSummary::default(); + overall.add(ScoreSummary { killed: 3, total: 4 }); + overall.add(ScoreSummary { killed: 1, total: 6 }); + assert_eq!(overall.killed, 4); + assert_eq!(overall.total, 10); + assert!((overall.score() - 0.4).abs() < f64::EPSILON); + } + #[tokio::test] async fn test_run_command() { // Test successful command diff --git a/src/error.rs b/src/error.rs index 69a9f6a..42d1df1 100644 --- a/src/error.rs +++ b/src/error.rs @@ -17,6 +17,9 @@ pub enum MutationError { #[error("Invalid input: {0}")] InvalidInput(String), + #[error("Mutation score below threshold: {0}")] + ScoreBelowThreshold(String), + #[error("Coverage parsing error: {0}")] Coverage(String), diff --git a/src/main.rs b/src/main.rs index 31deb05..7341658 100644 --- a/src/main.rs +++ b/src/main.rs @@ -103,6 +103,11 @@ enum Commands { #[arg(long, default_value = "0.75")] survival_threshold: f64, + /// Fail (non-zero exit) if the final mutation score is below this value + /// (0.8 = 80%). Intended as a CI gate. When unset, the score is not enforced. + #[arg(long, value_name = "RATE")] + min_score: Option, + /// SQLite database path to read mutants from (requires --run_id) #[arg(long, value_name = "PATH", num_args = 0..=1, default_missing_value = "mutation.db")] sqlite: Option, @@ -201,6 +206,7 @@ async fn main() -> Result<()> { jobs, command, survival_threshold, + min_score, sqlite, run_id, file_path, @@ -218,6 +224,14 @@ async fn main() -> Result<()> { )); } + if let Some(min) = min_score { + if !(0.0..=1.0).contains(&min) { + return Err(MutationError::InvalidInput( + "--min-score must be between 0.0 and 1.0".to_string(), + )); + } + } + analyze::run_analysis( project, folder, @@ -225,6 +239,7 @@ async fn main() -> Result<()> { jobs, timeout, survival_threshold, + min_score, sqlite, run_id, file_path, From 24111b63814033a22b01e480a13d196cf7bd45b7 Mon Sep 17 00:00:00 2001 From: Bruno Garcia Date: Wed, 3 Jun 2026 19:25:12 -0300 Subject: [PATCH 3/5] add specific skips for secp256k1 --- src/operators/secp256k1.rs | 143 +++++++++++++++++++++++++++++++------ 1 file changed, 123 insertions(+), 20 deletions(-) diff --git a/src/operators/secp256k1.rs b/src/operators/secp256k1.rs index 44c0c4b..b6b52e3 100644 --- a/src/operators/secp256k1.rs +++ b/src/operators/secp256k1.rs @@ -1,18 +1,59 @@ //! secp256k1 operator set. //! //! secp256k1 is a pure C library with no Python functional tests and no Boost -//! unit tests — its tests are C programs (`tests.c`, `tests_exhaustive.c`, +//! unit tests - its tests are C programs (`tests.c`, `tests_exhaustive.c`, //! `*_impl.h` test sections) driven by CTest. The operators here therefore //! reuse the language-level [`super::common`] set and add C-specific guard -//! macros (`VERIFY_CHECK`, `ARG_CHECK`, `CHECK`, …) to the skip lists. -//! -//! These project-specific entries are a starting point and are expected to be -//! refined as the tool is exercised against secp256k1. +//! macros, annotations, cleanup calls, and diagnostics to the skip lists. use super::{build, common, MutationOperator, OperatorSet}; pub struct Secp256k1; +const SECP256K1_SKIP_PREFIXES: &[&str] = &[ + // Invariant and public API guards. + "VERIFY_CHECK", + "VERIFY_BITS", + "ARG_CHECK", + "ARG_CHECK_VOID", + "CHECK", + "secp256k1_fe_verify", + "secp256k1_ge_verify", + "secp256k1_gej_verify", + "secp256k1_scalar_verify", + // Defensive output zeroing and testrand diagnostics. + "memset", + "printf", + "fprintf", +]; + +const SECP256K1_SKIP_SUBSTRINGS: &[&str] = &[ + // VERIFY-style macro variants such as SECP256K1_FE_VERIFY(...). + "VERIFY_CHECK", + "VERIFY_BITS", + "_VERIFY", + "ARG_CHECK", + // Callback/error reporting and verification annotations. + "secp256k1_callback_call", + "secp256k1_declassify", + "SECP256K1_CHECKMEM_", + // Secret/object cleanup functions. + "secp256k1_memclear_explicit", + "secp256k1_scalar_clear", + "secp256k1_ge_clear", + "secp256k1_gej_clear", + "secp256k1_fe_clear", + "secp256k1_sha256_clear", + "secp256k1_hmac_sha256_clear", + "secp256k1_rfc6979_hmac_sha256_clear", + // Conditional zeroing on failure. + "secp256k1_memczero", + // testrand /dev/urandom file I/O. + "fopen(", + "fread(", + "fclose(", +]; + impl OperatorSet for Secp256k1 { fn regex_operators(&self) -> Result, regex::Error> { build(common::regex_operators()) @@ -28,19 +69,7 @@ impl OperatorSet for Secp256k1 { fn do_not_mutate_patterns(&self) -> Vec<&'static str> { let mut patterns = common::do_not_mutate_patterns(); - // secp256k1 invariant/argument guards: mutating these produces - // unproductive or always-aborting mutants. - patterns.extend([ - "VERIFY_CHECK", - "VERIFY_SETUP", - "ARG_CHECK", - "ARG_CHECK_VOID", - "CHECK", - "secp256k1_fe_verify", - "secp256k1_ge_verify", - "secp256k1_gej_verify", - "secp256k1_scalar_verify", - ]); + patterns.extend(SECP256K1_SKIP_PREFIXES); patterns } @@ -70,10 +99,84 @@ impl OperatorSet for Secp256k1 { } fn skip_if_contain_patterns(&self) -> Vec<&'static str> { - vec!["VERIFY_CHECK", "ARG_CHECK"] + SECP256K1_SKIP_SUBSTRINGS.to_vec() } fn test_line_skip_prefixes(&self) -> Vec<&'static str> { - vec!["assert", "CHECK", "VERIFY_CHECK", "run_", "test_"] + let mut prefixes = vec!["assert", "CHECK", "run_", "test_"]; + prefixes.extend(SECP256K1_SKIP_PREFIXES); + prefixes + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn skipped_by_global_secp256k1_lists(line: &str) -> bool { + let ops = Secp256k1; + let trimmed = line.trim_start(); + + ops.do_not_mutate_patterns() + .iter() + .any(|pattern| trimmed.starts_with(pattern)) + || ops + .skip_if_contain_patterns() + .iter() + .any(|pattern| line.contains(pattern)) + } + + #[test] + fn skips_verify_and_api_guard_lines() { + assert!(skipped_by_global_secp256k1_lists( + " VERIFY_CHECK(r != NULL);" + )); + assert!(skipped_by_global_secp256k1_lists( + " VERIFY_BITS_128(x, 64);" + )); + assert!(skipped_by_global_secp256k1_lists( + " SECP256K1_SCALAR_VERIFY (&s);" + )); + assert!(skipped_by_global_secp256k1_lists( + " ARG_CHECK(ctx != NULL);" + )); + } + + #[test] + fn skips_annotations_cleanup_and_zeroing() { + assert!(skipped_by_global_secp256k1_lists( + " secp256k1_callback_call(&ctx->error_callback, \"bad\");" + )); + assert!(skipped_by_global_secp256k1_lists( + " secp256k1_declassify(ctx, &ret, sizeof(ret));" + )); + assert!(skipped_by_global_secp256k1_lists( + " SECP256K1_CHECKMEM_CHECK(p, len);" + )); + assert!(skipped_by_global_secp256k1_lists( + " secp256k1_scalar_clear(&s);" + )); + assert!(skipped_by_global_secp256k1_lists( + " secp256k1_memczero(sig64, 64, !ret);" + )); + assert!(skipped_by_global_secp256k1_lists( + " memset(sig64, 0, 64);" + )); + } + + #[test] + fn skips_preprocessor_comments_and_testrand_diagnostics() { + assert!(skipped_by_global_secp256k1_lists("# ifdef VERIFY")); + assert!(skipped_by_global_secp256k1_lists( + " fprintf(stderr, \"random seed failure\\n\");" + )); + assert!(skipped_by_global_secp256k1_lists( + " fp = fopen(\"/dev/urandom\", \"rb\");" + )); + assert!(skipped_by_global_secp256k1_lists( + " fread(seed, 1, sizeof(seed), fp);" + )); + assert!(skipped_by_global_secp256k1_lists(" // comment")); + assert!(!skipped_by_global_secp256k1_lists(" ret = a + b;")); } } From 2557d8a362d6ff99748f9c22339e7d6c9cebc9a7 Mon Sep 17 00:00:00 2001 From: Bruno Garcia Date: Thu, 4 Jun 2026 10:21:37 -0300 Subject: [PATCH 4/5] mutation: make skipped file types project-specific Move the hardcoded non-source file skip list out of run_mutation and into Project::should_skip_file, backed by per-project substring and suffix tables. Bitcoin Core behavior is unchanged; secp256k1 now skips its own docs, tooling, examples, benchmarks and test/bench impl files. Co-Authored-By: Claude Opus 4.8 --- src/mutation.rs | 13 +++---------- src/project.rs | 44 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 10 deletions(-) diff --git a/src/mutation.rs b/src/mutation.rs index 144a73f..7650657 100644 --- a/src/mutation.rs +++ b/src/mutation.rs @@ -91,16 +91,9 @@ pub async fn run_mutation( let mut files_to_mutate = Vec::new(); for file_changed in files_changed { - // Skip certain file types - if file_changed.contains("doc") - || file_changed.contains("contrib") - || file_changed.contains("fuzz") - || file_changed.contains("bench") - || file_changed.contains("util") - || file_changed.contains("sanitizer_supressions") - || file_changed.contains("test_framework.py") - || file_changed.ends_with(".txt") - { + // Skip non-source files (docs, tooling, benchmarks, ...). + // The exact set is project-specific; see `Project::should_skip_file`. + if project.should_skip_file(&file_changed) { continue; } diff --git a/src/project.rs b/src/project.rs index 0660c6f..68b81be 100644 --- a/src/project.rs +++ b/src/project.rs @@ -33,4 +33,48 @@ impl Project { Project::Secp256k1 => "secp256k1", } } + + /// Returns true if the given changed-file path should be excluded from + /// mutation. Documentation, tooling, benchmarks and other non-source + /// files are not worth mutating, and the exact set differs per project + /// because each repository has its own layout and auxiliary files. + pub fn should_skip_file(&self, path: &str) -> bool { + self.skip_substrings().iter().any(|s| path.contains(s)) + || self.skip_suffixes().iter().any(|s| path.ends_with(s)) + } + + /// Path substrings that mark a file as non-source for this project. + fn skip_substrings(&self) -> &'static [&'static str] { + match self { + Project::BitcoinCore => &[ + "doc", + "contrib", + "fuzz", + "bench", + "util", + "sanitizer_supressions", + "test_framework.py", + ], + Project::Secp256k1 => &[ + // e.g. tests_impl and bench_impl + "tests_", + "bench_", + "doc", + "contrib", + "examples", + "ci", + "tools", + "bench", + ], + } + } + + /// File suffixes that mark a file as non-source for this project. + fn skip_suffixes(&self) -> &'static [&'static str] { + match self { + Project::BitcoinCore => &[".txt"], + // CMakeLists.txt, *.md, configure.ac, Makefile.am, etc. + Project::Secp256k1 => &[".txt", ".md", ".ac", ".am"], + } + } } From 626062b5f67280531158e27929ad7c29557b1ef1 Mon Sep 17 00:00:00 2001 From: Bruno Garcia Date: Fri, 5 Jun 2026 16:32:04 -0300 Subject: [PATCH 5/5] mutation: skip deleting secp256k1 field normalization calls Field normalization functions (secp256k1_fe_normalize and its _weak, _var, _to_zero, _to_zero_var variants) only canonicalize the internal limb representation without changing the represented value. Deleting a call to them almost always yields an equivalent, non-useful mutant, so add the secp256k1_fe_normalize substring to the skip list. Co-Authored-By: Claude Opus 4.8 --- src/operators/secp256k1.rs | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/operators/secp256k1.rs b/src/operators/secp256k1.rs index b6b52e3..8f14db0 100644 --- a/src/operators/secp256k1.rs +++ b/src/operators/secp256k1.rs @@ -48,6 +48,11 @@ const SECP256K1_SKIP_SUBSTRINGS: &[&str] = &[ "secp256k1_rfc6979_hmac_sha256_clear", // Conditional zeroing on failure. "secp256k1_memczero", + // Field normalization. These only canonicalize the internal limb + // representation without changing the represented value, so deleting a + // call almost always yields an equivalent (non-useful) mutant. Covers + // secp256k1_fe_normalize{,_weak,_var,_to_zero,_to_zero_var}. + "secp256k1_fe_normalize", // testrand /dev/urandom file I/O. "fopen(", "fread(", @@ -164,6 +169,22 @@ mod tests { )); } + #[test] + fn skips_field_normalization_calls() { + assert!(skipped_by_global_secp256k1_lists( + " secp256k1_fe_normalize(&r->x);" + )); + assert!(skipped_by_global_secp256k1_lists( + " secp256k1_fe_normalize_weak(&r->x);" + )); + assert!(skipped_by_global_secp256k1_lists( + " secp256k1_fe_normalize_var(&r->x);" + )); + assert!(skipped_by_global_secp256k1_lists( + " secp256k1_fe_normalize_to_zero_var(&t);" + )); + } + #[test] fn skips_preprocessor_comments_and_testrand_diagnostics() { assert!(skipped_by_global_secp256k1_lists("# ifdef VERIFY"));