update imara-diff

pull/13722/head
Pascal Kuthe 2025-06-09 17:17:27 +02:00
parent 2b26d27416
commit 554df74a96
No known key found for this signature in database
GPG Key ID: 4E01BF060BDE0D8B
7 changed files with 75 additions and 109 deletions

16
Cargo.lock generated
View File

@ -748,7 +748,7 @@ dependencies = [
"gix-trace",
"gix-traverse",
"gix-worktree",
"imara-diff",
"imara-diff 0.1.8",
"thiserror 2.0.12",
]
@ -1409,7 +1409,7 @@ dependencies = [
"helix-loader",
"helix-parsec",
"helix-stdx",
"imara-diff",
"imara-diff 0.2.0",
"indoc",
"log",
"nucleo",
@ -1604,7 +1604,7 @@ dependencies = [
"gix",
"helix-core",
"helix-event",
"imara-diff",
"imara-diff 0.2.0",
"log",
"parking_lot",
"tempfile",
@ -1848,6 +1848,16 @@ dependencies = [
"hashbrown 0.15.3",
]
[[package]]
name = "imara-diff"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2f01d462f766df78ab820dd06f5eb700233c51f0f4c2e846520eaf4ba6aa5c5c"
dependencies = [
"hashbrown 0.15.3",
"memchr",
]
[[package]]
name = "indexmap"
version = "2.9.0"

View File

@ -46,8 +46,7 @@ serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
toml = "0.8"
imara-diff = "0.1.8"
imara-diff = "0.2.0"
encoding_rs = "0.8"
chrono = { version = "0.4", default-features = false, features = ["alloc", "std"] }

View File

@ -1,51 +1,22 @@
use std::ops::Range;
use std::time::Instant;
use imara_diff::intern::InternedInput;
use imara_diff::Algorithm;
use imara_diff::{Algorithm, Diff, Hunk, IndentHeuristic, IndentLevel, InternedInput};
use ropey::RopeSlice;
use crate::{ChangeSet, Rope, Tendril, Transaction};
/// A `imara_diff::Sink` that builds a `ChangeSet` for a character diff of a hunk
struct CharChangeSetBuilder<'a> {
res: &'a mut ChangeSet,
hunk: &'a InternedInput<char>,
pos: u32,
}
impl imara_diff::Sink for CharChangeSetBuilder<'_> {
type Out = ();
fn process_change(&mut self, before: Range<u32>, after: Range<u32>) {
self.res.retain((before.start - self.pos) as usize);
self.res.delete(before.len());
self.pos = before.end;
let res = self.hunk.after[after.start as usize..after.end as usize]
.iter()
.map(|&token| self.hunk.interner[token])
.collect();
self.res.insert(res);
}
fn finish(self) -> Self::Out {
self.res.retain(self.hunk.before.len() - self.pos as usize);
}
}
struct LineChangeSetBuilder<'a> {
struct ChangeSetBuilder<'a> {
res: ChangeSet,
after: RopeSlice<'a>,
file: &'a InternedInput<RopeSlice<'a>>,
current_hunk: InternedInput<char>,
char_diff: Diff,
pos: u32,
}
impl imara_diff::Sink for LineChangeSetBuilder<'_> {
type Out = ChangeSet;
fn process_change(&mut self, before: Range<u32>, after: Range<u32>) {
impl ChangeSetBuilder<'_> {
fn process_hunk(&mut self, before: Range<u32>, after: Range<u32>) {
let len = self.file.before[self.pos as usize..before.start as usize]
.iter()
.map(|&it| self.file.interner[it].len_chars())
@ -109,25 +80,36 @@ impl imara_diff::Sink for LineChangeSetBuilder<'_> {
.flat_map(|&it| self.file.interner[it].chars());
self.current_hunk.update_before(hunk_before);
self.current_hunk.update_after(hunk_after);
// the histogram heuristic does not work as well
// for characters because the same characters often reoccur
// use myer diff instead
imara_diff::diff(
self.char_diff.compute_with(
Algorithm::Myers,
&self.current_hunk,
CharChangeSetBuilder {
res: &mut self.res,
hunk: &self.current_hunk,
pos: 0,
},
&self.current_hunk.before,
&self.current_hunk.after,
self.current_hunk.interner.num_tokens(),
);
let mut pos = 0;
for Hunk { before, after } in self.char_diff.hunks() {
self.res.retain((before.start - pos) as usize);
self.res.delete(before.len());
pos = before.end;
let res = self.current_hunk.after[after.start as usize..after.end as usize]
.iter()
.map(|&token| self.current_hunk.interner[token])
.collect();
self.res.insert(res);
}
self.res
.retain(self.current_hunk.before.len() - pos as usize);
// reuse allocations
self.current_hunk.clear();
}
}
fn finish(mut self) -> Self::Out {
fn finish(mut self) -> ChangeSet {
let len = self.file.before[self.pos as usize..]
.iter()
.map(|&it| self.file.interner[it].len_chars())
@ -140,7 +122,7 @@ impl imara_diff::Sink for LineChangeSetBuilder<'_> {
struct RopeLines<'a>(RopeSlice<'a>);
impl<'a> imara_diff::intern::TokenSource for RopeLines<'a> {
impl<'a> imara_diff::TokenSource for RopeLines<'a> {
type Token = RopeSlice<'a>;
type Tokenizer = ropey::iter::Lines<'a>;
@ -161,15 +143,23 @@ pub fn compare_ropes(before: &Rope, after: &Rope) -> Transaction {
let res = ChangeSet::with_capacity(32);
let after = after.slice(..);
let file = InternedInput::new(RopeLines(before.slice(..)), RopeLines(after));
let builder = LineChangeSetBuilder {
let mut builder = ChangeSetBuilder {
res,
file: &file,
after,
pos: 0,
current_hunk: InternedInput::default(),
char_diff: Diff::default(),
};
let res = imara_diff::diff(Algorithm::Histogram, &file, builder).into();
let mut diff = Diff::compute(Algorithm::Histogram, &file);
diff.postprocess_with_heuristic(
&file,
IndentHeuristic::new(|token| IndentLevel::for_ascii_line(file.interner[token].bytes(), 4)),
);
for hunk in diff.hunks() {
builder.process_hunk(hunk.before, hunk.after)
}
let res = builder.finish().into();
log::debug!(
"rope diff took {}s",

View File

@ -20,7 +20,7 @@ parking_lot.workspace = true
arc-swap = { version = "1.7.1" }
gix = { version = "0.72.1", features = ["attributes", "status"], default-features = false, optional = true }
imara-diff = "0.1.8"
imara-diff = "0.2.0"
anyhow = "1"
log = "0.4"

View File

@ -1,5 +1,4 @@
use std::iter::Peekable;
use std::ops::Range;
use std::sync::Arc;
use helix_core::Rope;
@ -12,6 +11,8 @@ use tokio::time::Instant;
use crate::diff::worker::DiffWorker;
pub use imara_diff::Hunk;
mod line_cache;
mod worker;
@ -52,8 +53,8 @@ impl DiffHandle {
let worker = DiffWorker {
channel: receiver,
diff: diff.clone(),
new_hunks: Vec::default(),
diff_finished_notify: Arc::default(),
diff_alloc: imara_diff::Diff::default(),
};
let handle = tokio::spawn(worker.run(diff_base, doc));
let differ = DiffHandle {
@ -118,48 +119,6 @@ const MAX_DIFF_LINES: usize = 64 * u16::MAX as usize;
// cap average line length to 128 for files with MAX_DIFF_LINES
const MAX_DIFF_BYTES: usize = MAX_DIFF_LINES * 128;
/// A single change in a file potentially spanning multiple lines
/// Hunks produced by the differs are always ordered by their position
/// in the file and non-overlapping.
/// Specifically for any two hunks `x` and `y` the following properties hold:
///
/// ``` no_compile
/// assert!(x.before.end <= y.before.start);
/// assert!(x.after.end <= y.after.start);
/// ```
#[derive(PartialEq, Eq, Clone, Debug)]
pub struct Hunk {
pub before: Range<u32>,
pub after: Range<u32>,
}
impl Hunk {
/// Can be used instead of `Option::None` for better performance
/// because lines larger then `i32::MAX` are not supported by `imara-diff` anyways.
/// Has some nice properties where it usually is not necessary to check for `None` separately:
/// Empty ranges fail contains checks and also fails smaller then checks.
pub const NONE: Hunk = Hunk {
before: u32::MAX..u32::MAX,
after: u32::MAX..u32::MAX,
};
/// Inverts a change so that `before`
pub fn invert(&self) -> Hunk {
Hunk {
before: self.after.clone(),
after: self.before.clone(),
}
}
pub fn is_pure_insertion(&self) -> bool {
self.before.is_empty()
}
pub fn is_pure_removal(&self) -> bool {
self.after.is_empty()
}
}
/// A list of changes in a file sorted in ascending
/// non-overlapping order
#[derive(Debug)]

View File

@ -13,7 +13,7 @@
use std::mem::transmute;
use helix_core::{Rope, RopeSlice};
use imara_diff::intern::{InternedInput, Interner};
use imara_diff::{InternedInput, Interner};
use super::{MAX_DIFF_BYTES, MAX_DIFF_LINES};

View File

@ -1,9 +1,7 @@
use std::mem::swap;
use std::ops::Range;
use std::sync::Arc;
use helix_core::{Rope, RopeSlice};
use imara_diff::intern::InternedInput;
use imara_diff::{IndentHeuristic, IndentLevel, InternedInput};
use parking_lot::RwLock;
use tokio::sync::mpsc::UnboundedReceiver;
use tokio::sync::Notify;
@ -14,7 +12,6 @@ use crate::diff::{
};
use super::line_cache::InternedRopeLines;
use super::Hunk;
#[cfg(test)]
mod test;
@ -22,8 +19,8 @@ mod test;
pub(super) struct DiffWorker {
pub channel: UnboundedReceiver<Event>,
pub diff: Arc<RwLock<DiffInner>>,
pub new_hunks: Vec<Hunk>,
pub diff_finished_notify: Arc<Notify>,
pub diff_alloc: imara_diff::Diff,
}
impl DiffWorker {
@ -76,15 +73,26 @@ impl DiffWorker {
let mut diff = self.diff.write();
diff.diff_base = diff_base;
diff.doc = doc;
swap(&mut diff.hunks, &mut self.new_hunks);
diff.hunks.clear();
diff.hunks.extend(self.diff_alloc.hunks());
drop(diff);
self.diff_finished_notify.notify_waiters();
self.new_hunks.clear();
}
fn perform_diff(&mut self, input: &InternedInput<RopeSlice>) {
imara_diff::diff(ALGORITHM, input, |before: Range<u32>, after: Range<u32>| {
self.new_hunks.push(Hunk { before, after })
})
self.diff_alloc.compute_with(
ALGORITHM,
&input.before,
&input.after,
input.interner.num_tokens(),
);
self.diff_alloc.postprocess_with(
&input.before,
&input.after,
IndentHeuristic::new(|token| {
IndentLevel::for_ascii_line(input.interner[token].bytes(), 4)
}),
);
}
}
@ -94,7 +102,7 @@ struct EventAccumulator {
render_lock: Option<RenderLock>,
}
impl EventAccumulator {
impl<'a> EventAccumulator {
fn new() -> EventAccumulator {
EventAccumulator {
diff_base: None,