diff --git a/Cargo.lock b/Cargo.lock index a8ba3a104..59a982725 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -748,7 +748,7 @@ dependencies = [ "gix-trace", "gix-traverse", "gix-worktree", - "imara-diff", + "imara-diff 0.1.8", "thiserror 2.0.12", ] @@ -1409,7 +1409,7 @@ dependencies = [ "helix-loader", "helix-parsec", "helix-stdx", - "imara-diff", + "imara-diff 0.2.0", "indoc", "log", "nucleo", @@ -1604,7 +1604,7 @@ dependencies = [ "gix", "helix-core", "helix-event", - "imara-diff", + "imara-diff 0.2.0", "log", "parking_lot", "tempfile", @@ -1848,6 +1848,16 @@ dependencies = [ "hashbrown 0.15.3", ] +[[package]] +name = "imara-diff" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f01d462f766df78ab820dd06f5eb700233c51f0f4c2e846520eaf4ba6aa5c5c" +dependencies = [ + "hashbrown 0.15.3", + "memchr", +] + [[package]] name = "indexmap" version = "2.9.0" diff --git a/helix-core/Cargo.toml b/helix-core/Cargo.toml index b05207615..4e825364b 100644 --- a/helix-core/Cargo.toml +++ b/helix-core/Cargo.toml @@ -46,8 +46,7 @@ serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" toml = "0.8" -imara-diff = "0.1.8" - +imara-diff = "0.2.0" encoding_rs = "0.8" chrono = { version = "0.4", default-features = false, features = ["alloc", "std"] } diff --git a/helix-core/src/diff.rs b/helix-core/src/diff.rs index a5d6d7229..5937f91cb 100644 --- a/helix-core/src/diff.rs +++ b/helix-core/src/diff.rs @@ -1,51 +1,22 @@ use std::ops::Range; use std::time::Instant; -use imara_diff::intern::InternedInput; -use imara_diff::Algorithm; +use imara_diff::{Algorithm, Diff, Hunk, IndentHeuristic, IndentLevel, InternedInput}; use ropey::RopeSlice; use crate::{ChangeSet, Rope, Tendril, Transaction}; -/// A `imara_diff::Sink` that builds a `ChangeSet` for a character diff of a hunk -struct CharChangeSetBuilder<'a> { - res: &'a mut ChangeSet, - hunk: &'a InternedInput, - pos: u32, -} - -impl imara_diff::Sink for CharChangeSetBuilder<'_> { - type Out = (); - fn process_change(&mut self, before: Range, after: Range) { - self.res.retain((before.start - self.pos) as usize); - self.res.delete(before.len()); - self.pos = before.end; - - let res = self.hunk.after[after.start as usize..after.end as usize] - .iter() - .map(|&token| self.hunk.interner[token]) - .collect(); - - self.res.insert(res); - } - - fn finish(self) -> Self::Out { - self.res.retain(self.hunk.before.len() - self.pos as usize); - } -} - -struct LineChangeSetBuilder<'a> { +struct ChangeSetBuilder<'a> { res: ChangeSet, after: RopeSlice<'a>, file: &'a InternedInput>, current_hunk: InternedInput, + char_diff: Diff, pos: u32, } -impl imara_diff::Sink for LineChangeSetBuilder<'_> { - type Out = ChangeSet; - - fn process_change(&mut self, before: Range, after: Range) { +impl ChangeSetBuilder<'_> { + fn process_hunk(&mut self, before: Range, after: Range) { let len = self.file.before[self.pos as usize..before.start as usize] .iter() .map(|&it| self.file.interner[it].len_chars()) @@ -109,25 +80,36 @@ impl imara_diff::Sink for LineChangeSetBuilder<'_> { .flat_map(|&it| self.file.interner[it].chars()); self.current_hunk.update_before(hunk_before); self.current_hunk.update_after(hunk_after); - // the histogram heuristic does not work as well // for characters because the same characters often reoccur // use myer diff instead - imara_diff::diff( + self.char_diff.compute_with( Algorithm::Myers, - &self.current_hunk, - CharChangeSetBuilder { - res: &mut self.res, - hunk: &self.current_hunk, - pos: 0, - }, + &self.current_hunk.before, + &self.current_hunk.after, + self.current_hunk.interner.num_tokens(), ); + let mut pos = 0; + for Hunk { before, after } in self.char_diff.hunks() { + self.res.retain((before.start - pos) as usize); + self.res.delete(before.len()); + pos = before.end; + let res = self.current_hunk.after[after.start as usize..after.end as usize] + .iter() + .map(|&token| self.current_hunk.interner[token]) + .collect(); + + self.res.insert(res); + } + self.res + .retain(self.current_hunk.before.len() - pos as usize); + // reuse allocations self.current_hunk.clear(); } } - fn finish(mut self) -> Self::Out { + fn finish(mut self) -> ChangeSet { let len = self.file.before[self.pos as usize..] .iter() .map(|&it| self.file.interner[it].len_chars()) @@ -140,7 +122,7 @@ impl imara_diff::Sink for LineChangeSetBuilder<'_> { struct RopeLines<'a>(RopeSlice<'a>); -impl<'a> imara_diff::intern::TokenSource for RopeLines<'a> { +impl<'a> imara_diff::TokenSource for RopeLines<'a> { type Token = RopeSlice<'a>; type Tokenizer = ropey::iter::Lines<'a>; @@ -161,15 +143,23 @@ pub fn compare_ropes(before: &Rope, after: &Rope) -> Transaction { let res = ChangeSet::with_capacity(32); let after = after.slice(..); let file = InternedInput::new(RopeLines(before.slice(..)), RopeLines(after)); - let builder = LineChangeSetBuilder { + let mut builder = ChangeSetBuilder { res, file: &file, after, pos: 0, current_hunk: InternedInput::default(), + char_diff: Diff::default(), }; - - let res = imara_diff::diff(Algorithm::Histogram, &file, builder).into(); + let mut diff = Diff::compute(Algorithm::Histogram, &file); + diff.postprocess_with_heuristic( + &file, + IndentHeuristic::new(|token| IndentLevel::for_ascii_line(file.interner[token].bytes(), 4)), + ); + for hunk in diff.hunks() { + builder.process_hunk(hunk.before, hunk.after) + } + let res = builder.finish().into(); log::debug!( "rope diff took {}s", diff --git a/helix-vcs/Cargo.toml b/helix-vcs/Cargo.toml index b5c150d31..d89366868 100644 --- a/helix-vcs/Cargo.toml +++ b/helix-vcs/Cargo.toml @@ -20,7 +20,7 @@ parking_lot.workspace = true arc-swap = { version = "1.7.1" } gix = { version = "0.72.1", features = ["attributes", "status"], default-features = false, optional = true } -imara-diff = "0.1.8" +imara-diff = "0.2.0" anyhow = "1" log = "0.4" diff --git a/helix-vcs/src/diff.rs b/helix-vcs/src/diff.rs index e49e171dd..6ec29813b 100644 --- a/helix-vcs/src/diff.rs +++ b/helix-vcs/src/diff.rs @@ -1,5 +1,4 @@ use std::iter::Peekable; -use std::ops::Range; use std::sync::Arc; use helix_core::Rope; @@ -12,6 +11,8 @@ use tokio::time::Instant; use crate::diff::worker::DiffWorker; +pub use imara_diff::Hunk; + mod line_cache; mod worker; @@ -52,8 +53,8 @@ impl DiffHandle { let worker = DiffWorker { channel: receiver, diff: diff.clone(), - new_hunks: Vec::default(), diff_finished_notify: Arc::default(), + diff_alloc: imara_diff::Diff::default(), }; let handle = tokio::spawn(worker.run(diff_base, doc)); let differ = DiffHandle { @@ -118,48 +119,6 @@ const MAX_DIFF_LINES: usize = 64 * u16::MAX as usize; // cap average line length to 128 for files with MAX_DIFF_LINES const MAX_DIFF_BYTES: usize = MAX_DIFF_LINES * 128; -/// A single change in a file potentially spanning multiple lines -/// Hunks produced by the differs are always ordered by their position -/// in the file and non-overlapping. -/// Specifically for any two hunks `x` and `y` the following properties hold: -/// -/// ``` no_compile -/// assert!(x.before.end <= y.before.start); -/// assert!(x.after.end <= y.after.start); -/// ``` -#[derive(PartialEq, Eq, Clone, Debug)] -pub struct Hunk { - pub before: Range, - pub after: Range, -} - -impl Hunk { - /// Can be used instead of `Option::None` for better performance - /// because lines larger then `i32::MAX` are not supported by `imara-diff` anyways. - /// Has some nice properties where it usually is not necessary to check for `None` separately: - /// Empty ranges fail contains checks and also fails smaller then checks. - pub const NONE: Hunk = Hunk { - before: u32::MAX..u32::MAX, - after: u32::MAX..u32::MAX, - }; - - /// Inverts a change so that `before` - pub fn invert(&self) -> Hunk { - Hunk { - before: self.after.clone(), - after: self.before.clone(), - } - } - - pub fn is_pure_insertion(&self) -> bool { - self.before.is_empty() - } - - pub fn is_pure_removal(&self) -> bool { - self.after.is_empty() - } -} - /// A list of changes in a file sorted in ascending /// non-overlapping order #[derive(Debug)] diff --git a/helix-vcs/src/diff/line_cache.rs b/helix-vcs/src/diff/line_cache.rs index 460a2065e..4a03a8f5a 100644 --- a/helix-vcs/src/diff/line_cache.rs +++ b/helix-vcs/src/diff/line_cache.rs @@ -13,7 +13,7 @@ use std::mem::transmute; use helix_core::{Rope, RopeSlice}; -use imara_diff::intern::{InternedInput, Interner}; +use imara_diff::{InternedInput, Interner}; use super::{MAX_DIFF_BYTES, MAX_DIFF_LINES}; diff --git a/helix-vcs/src/diff/worker.rs b/helix-vcs/src/diff/worker.rs index 2b6466f63..3471b4cb3 100644 --- a/helix-vcs/src/diff/worker.rs +++ b/helix-vcs/src/diff/worker.rs @@ -1,9 +1,7 @@ -use std::mem::swap; -use std::ops::Range; use std::sync::Arc; use helix_core::{Rope, RopeSlice}; -use imara_diff::intern::InternedInput; +use imara_diff::{IndentHeuristic, IndentLevel, InternedInput}; use parking_lot::RwLock; use tokio::sync::mpsc::UnboundedReceiver; use tokio::sync::Notify; @@ -14,7 +12,6 @@ use crate::diff::{ }; use super::line_cache::InternedRopeLines; -use super::Hunk; #[cfg(test)] mod test; @@ -22,8 +19,8 @@ mod test; pub(super) struct DiffWorker { pub channel: UnboundedReceiver, pub diff: Arc>, - pub new_hunks: Vec, pub diff_finished_notify: Arc, + pub diff_alloc: imara_diff::Diff, } impl DiffWorker { @@ -76,15 +73,26 @@ impl DiffWorker { let mut diff = self.diff.write(); diff.diff_base = diff_base; diff.doc = doc; - swap(&mut diff.hunks, &mut self.new_hunks); + diff.hunks.clear(); + diff.hunks.extend(self.diff_alloc.hunks()); + drop(diff); self.diff_finished_notify.notify_waiters(); - self.new_hunks.clear(); } fn perform_diff(&mut self, input: &InternedInput) { - imara_diff::diff(ALGORITHM, input, |before: Range, after: Range| { - self.new_hunks.push(Hunk { before, after }) - }) + self.diff_alloc.compute_with( + ALGORITHM, + &input.before, + &input.after, + input.interner.num_tokens(), + ); + self.diff_alloc.postprocess_with( + &input.before, + &input.after, + IndentHeuristic::new(|token| { + IndentLevel::for_ascii_line(input.interner[token].bytes(), 4) + }), + ); } } @@ -94,7 +102,7 @@ struct EventAccumulator { render_lock: Option, } -impl EventAccumulator { +impl<'a> EventAccumulator { fn new() -> EventAccumulator { EventAccumulator { diff_base: None,