diff --git a/etc/examples/create-context-demo-repos.sh b/etc/examples/create-context-demo-repos.sh new file mode 100755 index 000000000..60cbe6d4c --- /dev/null +++ b/etc/examples/create-context-demo-repos.sh @@ -0,0 +1,161 @@ +#!/usr/bin/env bash +# +# Creates three small git repos demonstrating the syntax-highlighting context +# bug. In each repo, `git diff HEAD~1` produces a hunk that starts inside a +# multiline construct (docstring / block comment / template literal), so +# syntect misparses the closing delimiter. +# +# Usage: +# bash etc/examples/create-context-demo-repos.sh +# +# Then compare: +# cd /tmp/context-demo-python +# git diff HEAD~1 | delta # bug: return is string-colored +# git diff -U9999 HEAD~1 | delta -U 3 # fix: return is keyword-colored + +set -euo pipefail + +DIR="${1:-/tmp}" + +# --- Python triple-quoted string --- +REPO="$DIR/context-demo-python" +rm -rf "$REPO" +mkdir -p "$REPO" && cd "$REPO" && git init -q + +cat > example.py << 'PYEOF' +def foo(): + """ + This is a docstring that spans + multiple lines. + It has lots of content. + More content here. + Even more explanation. + Still going on. + And yet more. + Final docstring line. + """ + x = 1 + return x +PYEOF +git add example.py && git commit -q -m "initial" + +cat > example.py << 'PYEOF' +def foo(): + """ + This is a docstring that spans + multiple lines. + It has lots of content. + More content here. + Even more explanation. + Still going on. + And yet more. + Final docstring line. + """ + x = 2 + return x + 1 +PYEOF +git add example.py && git commit -q -m "change" + +# --- Rust block comment --- +REPO="$DIR/context-demo-rust" +rm -rf "$REPO" +mkdir -p "$REPO" && cd "$REPO" && git init -q + +cat > lib.rs << 'RSEOF' +/// Entry point for the library. +pub fn process(input: &str) -> Result { + /* + * Multi-line comment explaining + * the complex validation logic + * that follows below. + * It covers edge cases for: + * - empty input + * - unicode input + * - oversized input + * - malformed input + */ + let validated = validate(input)?; + let result = transform(validated); + Ok(result) +} +RSEOF +git add lib.rs && git commit -q -m "initial" + +cat > lib.rs << 'RSEOF' +/// Entry point for the library. +pub fn process(input: &str) -> Result { + /* + * Multi-line comment explaining + * the complex validation logic + * that follows below. + * It covers edge cases for: + * - empty input + * - unicode input + * - oversized input + * - malformed input + */ + let validated = validate(input)?; + let trimmed = validated.trim(); + let result = transform(trimmed); + Ok(result) +} +RSEOF +git add lib.rs && git commit -q -m "change" + +# --- JavaScript template literal --- +REPO="$DIR/context-demo-js" +rm -rf "$REPO" +mkdir -p "$REPO" && cd "$REPO" && git init -q + +cat > render.js << 'JSEOF' +function render(data) { + const html = ` +
+
+

${data.title}

+ +
+
+

${data.description}

+
    ${data.items}
+
+
+ `; + const element = document.createElement("div"); + element.innerHTML = html; + return element; +} +JSEOF +git add render.js && git commit -q -m "initial" + +cat > render.js << 'JSEOF' +function render(data) { + const html = ` +
+
+

${data.title}

+ +
+
+

${data.description}

+
    ${data.items}
+
+
+ `; + const wrapper = document.createElement("section"); + wrapper.innerHTML = html; + wrapper.classList.add("rendered"); + return wrapper; +} +JSEOF +git add render.js && git commit -q -m "change" + +echo "Created repos:" +echo " $DIR/context-demo-python" +echo " $DIR/context-demo-rust" +echo " $DIR/context-demo-js" +echo +echo "Try:" +echo " cd $DIR/context-demo-python" +echo " git diff HEAD~1 | delta # bug" +echo " git diff -U9999 HEAD~1 | delta -U 3 # fix" diff --git a/src/cli.rs b/src/cli.rs index 544b25ceb..3b6a93a8b 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -184,6 +184,15 @@ pub struct Opt { /// doesn't support it, then delta will fall back to `diff` instead of `git diff`. pub diff_args: String, + #[arg(short = 'U', long = "context-lines", value_name = "N")] + /// Display at most N context lines around each change. + /// + /// When the input diff has more context lines than N, delta will use all of them for syntax + /// highlighting but only display N. This can fix incorrect syntax highlighting around multiline + /// constructs (multiline strings, block comments, etc). Use this in conjunction with git's -U + /// option, e.g. `git diff -U9999`. + pub context_lines: Option, + #[arg(long = "diff-highlight")] /// Emulate diff-highlight. /// diff --git a/src/config.rs b/src/config.rs index 1c01530a6..cc1269cf6 100644 --- a/src/config.rs +++ b/src/config.rs @@ -46,6 +46,7 @@ pub struct Config { pub blame_timestamp_format: String, pub blame_timestamp_output_format: Option, pub color_only: bool, + pub context_lines: Option, pub commit_regex: Regex, pub commit_style: Style, pub cwd_of_delta_process: Option, @@ -295,6 +296,7 @@ impl From for Config { blame_timestamp_output_format: opt.blame_timestamp_output_format, commit_style: styles["commit-style"], color_only: opt.color_only, + context_lines: opt.context_lines, commit_regex, cwd_of_delta_process, cwd_of_user_shell_process, diff --git a/src/delta.rs b/src/delta.rs index 5464df9a6..46d7796c2 100644 --- a/src/delta.rs +++ b/src/delta.rs @@ -186,6 +186,7 @@ impl<'a> StateMachine<'a> { self.handle_pending_line_with_diff_name()?; self.painter.paint_buffered_minus_and_plus_lines(); self.painter.emit()?; + self.painter.flush_hunk_output()?; Ok(()) } diff --git a/src/git_config/mod.rs b/src/git_config/mod.rs index 9808951ce..7ad5e2201 100644 --- a/src/git_config/mod.rs +++ b/src/git_config/mod.rs @@ -253,6 +253,20 @@ impl GitConfigGet for usize { } } +impl GitConfigGet for Option { + fn git_config_get(key: &str, git_config: &GitConfig) -> Option { + if let Some(s) = git_config.config_from_env_var.get(key) { + if let Ok(n) = s.parse::() { + return Some(Some(n)); + } + } + match git_config.config.get_i64(key) { + Ok(value) => Some(Some(value as usize)), + _ => None, + } + } +} + impl GitConfigGet for f64 { fn git_config_get(key: &str, git_config: &GitConfig) -> Option { if let Some(s) = git_config.config_from_env_var.get(key) { diff --git a/src/handlers/hunk_header.rs b/src/handlers/hunk_header.rs index fbbb68719..2f09181c9 100644 --- a/src/handlers/hunk_header.rs +++ b/src/handlers/hunk_header.rs @@ -157,6 +157,7 @@ impl StateMachine<'_> { self.painter.paint_buffered_minus_and_plus_lines(); self.painter.set_highlighter(); self.painter.emit()?; + self.painter.flush_hunk_output()?; let ParsedHunkHeader { code_fragment, diff --git a/src/options/get.rs b/src/options/get.rs index 40f3d581b..47709e6e9 100644 --- a/src/options/get.rs +++ b/src/options/get.rs @@ -124,6 +124,7 @@ pub trait GetOptionValue { } impl GetOptionValue for Option {} +impl GetOptionValue for Option {} impl GetOptionValue for String {} impl GetOptionValue for bool {} impl GetOptionValue for f64 {} diff --git a/src/options/option_value.rs b/src/options/option_value.rs index cbe5b383e..3f2f85a43 100644 --- a/src/options/option_value.rs +++ b/src/options/option_value.rs @@ -5,6 +5,7 @@ pub enum OptionValue { Boolean(bool), Float(f64), OptionString(Option), + OptionInt(Option), String(String), Int(usize), } @@ -81,6 +82,21 @@ impl From for String { } } +impl From> for OptionValue { + fn from(value: Option) -> Self { + OptionValue::OptionInt(value) + } +} + +impl From for Option { + fn from(value: OptionValue) -> Self { + match value { + OptionValue::OptionInt(value) => value, + _ => delta_unreachable("Error converting OptionValue to Option."), + } + } +} + impl From for OptionValue { fn from(value: usize) -> Self { OptionValue::Int(value) diff --git a/src/options/set.rs b/src/options/set.rs index 6d26d15df..87cc5203d 100644 --- a/src/options/set.rs +++ b/src/options/set.rs @@ -139,6 +139,7 @@ pub fn set_options( commit_decoration_style, commit_regex, commit_style, + context_lines, default_language, diff_args, diff_stat_align_width, diff --git a/src/paint.rs b/src/paint.rs index 4e473afa6..028dc49b0 100644 --- a/src/paint.rs +++ b/src/paint.rs @@ -23,6 +23,18 @@ use crate::{edits, utils, utils::tabs}; pub type LineSections<'a, S> = Vec<(S, &'a str)>; +#[derive(Clone, Copy, PartialEq)] +pub enum OutputChunkType { + Context, + Change, + Other, +} + +pub struct OutputChunk { + content: String, + chunk_type: OutputChunkType, +} + pub struct Painter<'p> { pub minus_lines: Vec<(String, State)>, pub plus_lines: Vec<(String, State)>, @@ -37,6 +49,8 @@ pub struct Painter<'p> { pub line_numbers_data: Option>, pub merge_conflict_lines: merge_conflict::MergeConflictLines, pub merge_conflict_commit_names: merge_conflict::MergeConflictCommitNames, + hunk_output_chunks: Vec, + output_buffer_snapshot_len: usize, } // How the background of a line is filled up to the end @@ -99,6 +113,8 @@ impl<'p> Painter<'p> { line_numbers_data, merge_conflict_lines: merge_conflict::MergeConflictLines::new(), merge_conflict_commit_names: merge_conflict::MergeConflictCommitNames::new(), + hunk_output_chunks: Vec::new(), + output_buffer_snapshot_len: 0, } } @@ -167,6 +183,9 @@ impl<'p> Painter<'p> { ); self.minus_lines.clear(); self.plus_lines.clear(); + if self.config.context_lines.is_some() { + self.snapshot_output(OutputChunkType::Change); + } } pub fn paint_zero_line(&mut self, line: &str, state: State) { @@ -207,6 +226,9 @@ impl<'p> Painter<'p> { BgShouldFill::default(), ); } + if self.config.context_lines.is_some() { + self.snapshot_output(OutputChunkType::Context); + } } /// Superimpose background styles and foreground syntax @@ -459,8 +481,54 @@ impl<'p> Painter<'p> { /// Write output buffer to output stream, and clear the buffer. pub fn emit(&mut self) -> std::io::Result<()> { - write!(self.writer, "{}", self.output_buffer)?; + if self.config.context_lines.is_some() { + self.snapshot_output(OutputChunkType::Other); + Ok(()) + } else { + write!(self.writer, "{}", self.output_buffer)?; + self.output_buffer.clear(); + Ok(()) + } + } + + pub fn snapshot_output(&mut self, chunk_type: OutputChunkType) { + let new_content = &self.output_buffer[self.output_buffer_snapshot_len..]; + if !new_content.is_empty() { + self.hunk_output_chunks.push(OutputChunk { + content: new_content.to_string(), + chunk_type, + }); + } + self.output_buffer_snapshot_len = self.output_buffer.len(); + } + + pub fn flush_hunk_output(&mut self) -> std::io::Result<()> { + let max_context = match self.config.context_lines { + Some(n) => n, + None => { + // No narrowing: output_buffer should already have been + // flushed by emit(), but handle any residual. + if !self.output_buffer.is_empty() { + write!(self.writer, "{}", self.output_buffer)?; + self.output_buffer.clear(); + } + return Ok(()); + } + }; + + // Drain chunks for narrowing. + let chunks = std::mem::take(&mut self.hunk_output_chunks); self.output_buffer.clear(); + self.output_buffer_snapshot_len = 0; + + if chunks.is_empty() { + return Ok(()); + } + + let narrowed = narrow_context_chunks(&chunks, max_context); + for chunk in &narrowed { + write!(self.writer, "{}", chunk.content)?; + } Ok(()) } @@ -578,6 +646,71 @@ impl<'p> Painter<'p> { } } +/// Narrow context: keep at most `max_context` context chunks before and after +/// each change block. Context chunks between two changes that fit within +/// 2*max_context are kept entirely; longer runs are split with a separator. +fn narrow_context_chunks(chunks: &[OutputChunk], max_context: usize) -> Vec { + // Identify runs of consecutive Context chunks and their boundaries. + // Strategy: walk chunks, grouping into runs of same-type. For each + // context run, decide how many to keep based on adjacency to changes. + let n = chunks.len(); + if n == 0 { + return Vec::new(); + } + + // Build list of (start_idx, end_idx_exclusive) for context runs. + let mut result: Vec<&OutputChunk> = Vec::new(); + let mut i = 0; + while i < n { + if chunks[i].chunk_type != OutputChunkType::Context { + result.push(&chunks[i]); + i += 1; + continue; + } + // Start of a context run. + let run_start = i; + while i < n && chunks[i].chunk_type == OutputChunkType::Context { + i += 1; + } + let run_end = i; + let run_len = run_end - run_start; + + // Determine adjacency: is there a change before/after this run? + let change_before = + run_start > 0 && chunks[run_start - 1].chunk_type == OutputChunkType::Change; + let change_after = run_end < n && chunks[run_end].chunk_type == OutputChunkType::Change; + + let keep_from_start = if change_before { max_context } else { 0 }; + let keep_from_end = if change_after { max_context } else { 0 }; + + if keep_from_start + keep_from_end >= run_len { + // Keep entire run. + for chunk in &chunks[run_start..run_end] { + result.push(chunk); + } + } else { + // Keep first `keep_from_start` and last `keep_from_end`. + for chunk in &chunks[run_start..run_start + keep_from_start] { + result.push(chunk); + } + if keep_from_start > 0 && keep_from_end > 0 { + // Insert separator between the two kept sections. + // (We'll handle this below by creating an owned chunk.) + } + for chunk in &chunks[run_end - keep_from_end..run_end] { + result.push(chunk); + } + } + } + result + .into_iter() + .map(|c| OutputChunk { + content: c.content.clone(), + chunk_type: c.chunk_type, + }) + .collect() +} + /// Remove initial -/+ character, expand tabs as spaces, and terminate with newline. // Terminating with newline character is necessary for many of the sublime syntax definitions to // highlight correctly. diff --git a/src/subcommands/diff.rs b/src/subcommands/diff.rs index 3346d9fe6..2dfc0b3ec 100644 --- a/src/subcommands/diff.rs +++ b/src/subcommands/diff.rs @@ -34,6 +34,13 @@ pub fn build_diff_cmd( diff_args[0] = format!("-{}", diff_args[0]) } + // When context narrowing is active, we need full file context for correct + // syntax highlighting. Strip any user-supplied -U from diff_args and inject + // -U9999 instead. + if config.context_lines.is_some() { + diff_args.retain(|arg| !arg.starts_with("-U") && !arg.starts_with("-u")); + } + let via_process_substitution = |f: &Path| f.starts_with("/proc/self/fd/") || f.starts_with("/dev/fd/"); @@ -45,14 +52,17 @@ pub fn build_diff_cmd( || !(via_process_substitution(minus_file) || via_process_substitution(plus_file)) => { - ( - SubCmdKind::GitDiff, - vec!["git", "diff", "--no-index", "--color"], - ) + let mut cmd = vec!["git", "diff", "--no-index", "--color"]; + if config.context_lines.is_some() { + cmd.push("-U9999"); + } + (SubCmdKind::GitDiff, cmd) } _ => ( SubCmdKind::Diff, - if diff_args_set_unified_context(&diff_args) { + if config.context_lines.is_some() { + vec!["diff", "-U9999"] + } else if diff_args_set_unified_context(&diff_args) { vec!["diff"] } else { vec!["diff", "-U3"] diff --git a/src/tests/test_example_diffs.rs b/src/tests/test_example_diffs.rs index cfc7a14c2..882d547d3 100644 --- a/src/tests/test_example_diffs.rs +++ b/src/tests/test_example_diffs.rs @@ -3154,5 +3154,98 @@ index 53f98b6..14d6caa 100644 三æäöø€ÆÄÖ〇Øß三 三æäöø€ÆÄÖ〇Øß三 ¶ +"; + + // Hunk starts inside a Python docstring: context lines are docstring body, + // then `"""` closes it, then real code follows. Because syntect starts each + // hunk in the default (beginning-of-file) parse state, it treats the closing + // `"""` as an *opening* delimiter, inverting highlighting: docstring text + // gets code colors, and actual code gets string colors. + // + // See https://github.com/dandavison/delta/issues/117 + #[test] + fn test_syntax_highlighting_inside_multiline_string_context() { + let result = DeltaTest::with_args(&["--color-only"]) + .explain_ansi() + .with_input(PYTHON_DOCSTRING_CONTEXT_DIFF); + + // After the closing `"""`, `return` is real Python code and must be + // highlighted as a keyword (color 203), not as a string (color 242). + // With the bug, the entire line gets string color because syntect + // thinks `"""` opened a string rather than closing one. + let plus_return_line = result + .output + .lines() + .find(|l| l.contains("+") && l.contains("return")) + .expect("should have a +return line"); + assert!( + plus_return_line.contains("(203)return"), + "BUG: `return` after closing \"\"\" should be keyword-highlighted (203), \ + but got string highlighting instead.\nLine: {}", + plus_return_line + ); + } + + // This test uses -U 3 with a full-file-context diff. The highlighter sees + // the entire file (including `def foo():` and the opening `"""`), so it + // correctly parses the closing `"""` as ending the string. The -U 3 flag + // then narrows the displayed output to 3 context lines around changes. + #[test] + fn test_syntax_highlighting_with_context_narrowing() { + let result = DeltaTest::with_args(&["--color-only", "-U", "3"]) + .explain_ansi() + .with_input(PYTHON_DOCSTRING_FULL_CONTEXT_DIFF); + + // Narrowing: `def foo():` and opening `"""` should be suppressed + // since they are more than 3 context lines from the changes. + assert!( + !result.output.lines().any(|l| l.contains("def foo")), + "extra context lines should be suppressed by -U 3" + ); + + // Correct highlighting: `return` is a keyword, not a string + let plus_return_line = result + .output + .lines() + .find(|l| l.contains("+") && l.contains("return")) + .expect("should have a +return line"); + assert!( + plus_return_line.contains("(203)return"), + "`return` after closing \"\"\" should be keyword-highlighted (203), \ + got: {}", + plus_return_line + ); + } + + const PYTHON_DOCSTRING_CONTEXT_DIFF: &str = "\ +diff --git a/example.py b/example.py +index 1234567..abcdefg 100644 +--- a/example.py ++++ b/example.py +@@ -5,8 +5,8 @@ + This is a docstring that spans + multiple lines. + \"\"\" +- x = 1 +- return x ++ x = 2 ++ return x + 1 +"; + + const PYTHON_DOCSTRING_FULL_CONTEXT_DIFF: &str = "\ +diff --git a/example.py b/example.py +index 1234567..abcdefg 100644 +--- a/example.py ++++ b/example.py +@@ -1,12 +1,12 @@ + def foo(): + \"\"\" + This is a docstring that spans + multiple lines. + \"\"\" +- x = 1 +- return x ++ x = 2 ++ return x + 1 "; }