mirror of https://github.com/helix-editor/helix
update imara-diff
parent
2b26d27416
commit
554df74a96
|
@ -748,7 +748,7 @@ dependencies = [
|
|||
"gix-trace",
|
||||
"gix-traverse",
|
||||
"gix-worktree",
|
||||
"imara-diff",
|
||||
"imara-diff 0.1.8",
|
||||
"thiserror 2.0.12",
|
||||
]
|
||||
|
||||
|
@ -1409,7 +1409,7 @@ dependencies = [
|
|||
"helix-loader",
|
||||
"helix-parsec",
|
||||
"helix-stdx",
|
||||
"imara-diff",
|
||||
"imara-diff 0.2.0",
|
||||
"indoc",
|
||||
"log",
|
||||
"nucleo",
|
||||
|
@ -1604,7 +1604,7 @@ dependencies = [
|
|||
"gix",
|
||||
"helix-core",
|
||||
"helix-event",
|
||||
"imara-diff",
|
||||
"imara-diff 0.2.0",
|
||||
"log",
|
||||
"parking_lot",
|
||||
"tempfile",
|
||||
|
@ -1848,6 +1848,16 @@ dependencies = [
|
|||
"hashbrown 0.15.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "imara-diff"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2f01d462f766df78ab820dd06f5eb700233c51f0f4c2e846520eaf4ba6aa5c5c"
|
||||
dependencies = [
|
||||
"hashbrown 0.15.3",
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "indexmap"
|
||||
version = "2.9.0"
|
||||
|
|
|
@ -46,8 +46,7 @@ serde = { version = "1.0", features = ["derive"] }
|
|||
serde_json = "1.0"
|
||||
toml = "0.8"
|
||||
|
||||
imara-diff = "0.1.8"
|
||||
|
||||
imara-diff = "0.2.0"
|
||||
encoding_rs = "0.8"
|
||||
|
||||
chrono = { version = "0.4", default-features = false, features = ["alloc", "std"] }
|
||||
|
|
|
@ -1,51 +1,22 @@
|
|||
use std::ops::Range;
|
||||
use std::time::Instant;
|
||||
|
||||
use imara_diff::intern::InternedInput;
|
||||
use imara_diff::Algorithm;
|
||||
use imara_diff::{Algorithm, Diff, Hunk, IndentHeuristic, IndentLevel, InternedInput};
|
||||
use ropey::RopeSlice;
|
||||
|
||||
use crate::{ChangeSet, Rope, Tendril, Transaction};
|
||||
|
||||
/// A `imara_diff::Sink` that builds a `ChangeSet` for a character diff of a hunk
|
||||
struct CharChangeSetBuilder<'a> {
|
||||
res: &'a mut ChangeSet,
|
||||
hunk: &'a InternedInput<char>,
|
||||
pos: u32,
|
||||
}
|
||||
|
||||
impl imara_diff::Sink for CharChangeSetBuilder<'_> {
|
||||
type Out = ();
|
||||
fn process_change(&mut self, before: Range<u32>, after: Range<u32>) {
|
||||
self.res.retain((before.start - self.pos) as usize);
|
||||
self.res.delete(before.len());
|
||||
self.pos = before.end;
|
||||
|
||||
let res = self.hunk.after[after.start as usize..after.end as usize]
|
||||
.iter()
|
||||
.map(|&token| self.hunk.interner[token])
|
||||
.collect();
|
||||
|
||||
self.res.insert(res);
|
||||
}
|
||||
|
||||
fn finish(self) -> Self::Out {
|
||||
self.res.retain(self.hunk.before.len() - self.pos as usize);
|
||||
}
|
||||
}
|
||||
|
||||
struct LineChangeSetBuilder<'a> {
|
||||
struct ChangeSetBuilder<'a> {
|
||||
res: ChangeSet,
|
||||
after: RopeSlice<'a>,
|
||||
file: &'a InternedInput<RopeSlice<'a>>,
|
||||
current_hunk: InternedInput<char>,
|
||||
char_diff: Diff,
|
||||
pos: u32,
|
||||
}
|
||||
|
||||
impl imara_diff::Sink for LineChangeSetBuilder<'_> {
|
||||
type Out = ChangeSet;
|
||||
|
||||
fn process_change(&mut self, before: Range<u32>, after: Range<u32>) {
|
||||
impl ChangeSetBuilder<'_> {
|
||||
fn process_hunk(&mut self, before: Range<u32>, after: Range<u32>) {
|
||||
let len = self.file.before[self.pos as usize..before.start as usize]
|
||||
.iter()
|
||||
.map(|&it| self.file.interner[it].len_chars())
|
||||
|
@ -109,25 +80,36 @@ impl imara_diff::Sink for LineChangeSetBuilder<'_> {
|
|||
.flat_map(|&it| self.file.interner[it].chars());
|
||||
self.current_hunk.update_before(hunk_before);
|
||||
self.current_hunk.update_after(hunk_after);
|
||||
|
||||
// the histogram heuristic does not work as well
|
||||
// for characters because the same characters often reoccur
|
||||
// use myer diff instead
|
||||
imara_diff::diff(
|
||||
self.char_diff.compute_with(
|
||||
Algorithm::Myers,
|
||||
&self.current_hunk,
|
||||
CharChangeSetBuilder {
|
||||
res: &mut self.res,
|
||||
hunk: &self.current_hunk,
|
||||
pos: 0,
|
||||
},
|
||||
&self.current_hunk.before,
|
||||
&self.current_hunk.after,
|
||||
self.current_hunk.interner.num_tokens(),
|
||||
);
|
||||
let mut pos = 0;
|
||||
for Hunk { before, after } in self.char_diff.hunks() {
|
||||
self.res.retain((before.start - pos) as usize);
|
||||
self.res.delete(before.len());
|
||||
pos = before.end;
|
||||
|
||||
let res = self.current_hunk.after[after.start as usize..after.end as usize]
|
||||
.iter()
|
||||
.map(|&token| self.current_hunk.interner[token])
|
||||
.collect();
|
||||
|
||||
self.res.insert(res);
|
||||
}
|
||||
self.res
|
||||
.retain(self.current_hunk.before.len() - pos as usize);
|
||||
// reuse allocations
|
||||
self.current_hunk.clear();
|
||||
}
|
||||
}
|
||||
|
||||
fn finish(mut self) -> Self::Out {
|
||||
fn finish(mut self) -> ChangeSet {
|
||||
let len = self.file.before[self.pos as usize..]
|
||||
.iter()
|
||||
.map(|&it| self.file.interner[it].len_chars())
|
||||
|
@ -140,7 +122,7 @@ impl imara_diff::Sink for LineChangeSetBuilder<'_> {
|
|||
|
||||
struct RopeLines<'a>(RopeSlice<'a>);
|
||||
|
||||
impl<'a> imara_diff::intern::TokenSource for RopeLines<'a> {
|
||||
impl<'a> imara_diff::TokenSource for RopeLines<'a> {
|
||||
type Token = RopeSlice<'a>;
|
||||
type Tokenizer = ropey::iter::Lines<'a>;
|
||||
|
||||
|
@ -161,15 +143,23 @@ pub fn compare_ropes(before: &Rope, after: &Rope) -> Transaction {
|
|||
let res = ChangeSet::with_capacity(32);
|
||||
let after = after.slice(..);
|
||||
let file = InternedInput::new(RopeLines(before.slice(..)), RopeLines(after));
|
||||
let builder = LineChangeSetBuilder {
|
||||
let mut builder = ChangeSetBuilder {
|
||||
res,
|
||||
file: &file,
|
||||
after,
|
||||
pos: 0,
|
||||
current_hunk: InternedInput::default(),
|
||||
char_diff: Diff::default(),
|
||||
};
|
||||
|
||||
let res = imara_diff::diff(Algorithm::Histogram, &file, builder).into();
|
||||
let mut diff = Diff::compute(Algorithm::Histogram, &file);
|
||||
diff.postprocess_with_heuristic(
|
||||
&file,
|
||||
IndentHeuristic::new(|token| IndentLevel::for_ascii_line(file.interner[token].bytes(), 4)),
|
||||
);
|
||||
for hunk in diff.hunks() {
|
||||
builder.process_hunk(hunk.before, hunk.after)
|
||||
}
|
||||
let res = builder.finish().into();
|
||||
|
||||
log::debug!(
|
||||
"rope diff took {}s",
|
||||
|
|
|
@ -20,7 +20,7 @@ parking_lot.workspace = true
|
|||
arc-swap = { version = "1.7.1" }
|
||||
|
||||
gix = { version = "0.72.1", features = ["attributes", "status"], default-features = false, optional = true }
|
||||
imara-diff = "0.1.8"
|
||||
imara-diff = "0.2.0"
|
||||
anyhow = "1"
|
||||
|
||||
log = "0.4"
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
use std::iter::Peekable;
|
||||
use std::ops::Range;
|
||||
use std::sync::Arc;
|
||||
|
||||
use helix_core::Rope;
|
||||
|
@ -12,6 +11,8 @@ use tokio::time::Instant;
|
|||
|
||||
use crate::diff::worker::DiffWorker;
|
||||
|
||||
pub use imara_diff::Hunk;
|
||||
|
||||
mod line_cache;
|
||||
mod worker;
|
||||
|
||||
|
@ -52,8 +53,8 @@ impl DiffHandle {
|
|||
let worker = DiffWorker {
|
||||
channel: receiver,
|
||||
diff: diff.clone(),
|
||||
new_hunks: Vec::default(),
|
||||
diff_finished_notify: Arc::default(),
|
||||
diff_alloc: imara_diff::Diff::default(),
|
||||
};
|
||||
let handle = tokio::spawn(worker.run(diff_base, doc));
|
||||
let differ = DiffHandle {
|
||||
|
@ -118,48 +119,6 @@ const MAX_DIFF_LINES: usize = 64 * u16::MAX as usize;
|
|||
// cap average line length to 128 for files with MAX_DIFF_LINES
|
||||
const MAX_DIFF_BYTES: usize = MAX_DIFF_LINES * 128;
|
||||
|
||||
/// A single change in a file potentially spanning multiple lines
|
||||
/// Hunks produced by the differs are always ordered by their position
|
||||
/// in the file and non-overlapping.
|
||||
/// Specifically for any two hunks `x` and `y` the following properties hold:
|
||||
///
|
||||
/// ``` no_compile
|
||||
/// assert!(x.before.end <= y.before.start);
|
||||
/// assert!(x.after.end <= y.after.start);
|
||||
/// ```
|
||||
#[derive(PartialEq, Eq, Clone, Debug)]
|
||||
pub struct Hunk {
|
||||
pub before: Range<u32>,
|
||||
pub after: Range<u32>,
|
||||
}
|
||||
|
||||
impl Hunk {
|
||||
/// Can be used instead of `Option::None` for better performance
|
||||
/// because lines larger then `i32::MAX` are not supported by `imara-diff` anyways.
|
||||
/// Has some nice properties where it usually is not necessary to check for `None` separately:
|
||||
/// Empty ranges fail contains checks and also fails smaller then checks.
|
||||
pub const NONE: Hunk = Hunk {
|
||||
before: u32::MAX..u32::MAX,
|
||||
after: u32::MAX..u32::MAX,
|
||||
};
|
||||
|
||||
/// Inverts a change so that `before`
|
||||
pub fn invert(&self) -> Hunk {
|
||||
Hunk {
|
||||
before: self.after.clone(),
|
||||
after: self.before.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_pure_insertion(&self) -> bool {
|
||||
self.before.is_empty()
|
||||
}
|
||||
|
||||
pub fn is_pure_removal(&self) -> bool {
|
||||
self.after.is_empty()
|
||||
}
|
||||
}
|
||||
|
||||
/// A list of changes in a file sorted in ascending
|
||||
/// non-overlapping order
|
||||
#[derive(Debug)]
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
use std::mem::transmute;
|
||||
|
||||
use helix_core::{Rope, RopeSlice};
|
||||
use imara_diff::intern::{InternedInput, Interner};
|
||||
use imara_diff::{InternedInput, Interner};
|
||||
|
||||
use super::{MAX_DIFF_BYTES, MAX_DIFF_LINES};
|
||||
|
||||
|
|
|
@ -1,9 +1,7 @@
|
|||
use std::mem::swap;
|
||||
use std::ops::Range;
|
||||
use std::sync::Arc;
|
||||
|
||||
use helix_core::{Rope, RopeSlice};
|
||||
use imara_diff::intern::InternedInput;
|
||||
use imara_diff::{IndentHeuristic, IndentLevel, InternedInput};
|
||||
use parking_lot::RwLock;
|
||||
use tokio::sync::mpsc::UnboundedReceiver;
|
||||
use tokio::sync::Notify;
|
||||
|
@ -14,7 +12,6 @@ use crate::diff::{
|
|||
};
|
||||
|
||||
use super::line_cache::InternedRopeLines;
|
||||
use super::Hunk;
|
||||
|
||||
#[cfg(test)]
|
||||
mod test;
|
||||
|
@ -22,8 +19,8 @@ mod test;
|
|||
pub(super) struct DiffWorker {
|
||||
pub channel: UnboundedReceiver<Event>,
|
||||
pub diff: Arc<RwLock<DiffInner>>,
|
||||
pub new_hunks: Vec<Hunk>,
|
||||
pub diff_finished_notify: Arc<Notify>,
|
||||
pub diff_alloc: imara_diff::Diff,
|
||||
}
|
||||
|
||||
impl DiffWorker {
|
||||
|
@ -76,15 +73,26 @@ impl DiffWorker {
|
|||
let mut diff = self.diff.write();
|
||||
diff.diff_base = diff_base;
|
||||
diff.doc = doc;
|
||||
swap(&mut diff.hunks, &mut self.new_hunks);
|
||||
diff.hunks.clear();
|
||||
diff.hunks.extend(self.diff_alloc.hunks());
|
||||
drop(diff);
|
||||
self.diff_finished_notify.notify_waiters();
|
||||
self.new_hunks.clear();
|
||||
}
|
||||
|
||||
fn perform_diff(&mut self, input: &InternedInput<RopeSlice>) {
|
||||
imara_diff::diff(ALGORITHM, input, |before: Range<u32>, after: Range<u32>| {
|
||||
self.new_hunks.push(Hunk { before, after })
|
||||
})
|
||||
self.diff_alloc.compute_with(
|
||||
ALGORITHM,
|
||||
&input.before,
|
||||
&input.after,
|
||||
input.interner.num_tokens(),
|
||||
);
|
||||
self.diff_alloc.postprocess_with(
|
||||
&input.before,
|
||||
&input.after,
|
||||
IndentHeuristic::new(|token| {
|
||||
IndentLevel::for_ascii_line(input.interner[token].bytes(), 4)
|
||||
}),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -94,7 +102,7 @@ struct EventAccumulator {
|
|||
render_lock: Option<RenderLock>,
|
||||
}
|
||||
|
||||
impl EventAccumulator {
|
||||
impl<'a> EventAccumulator {
|
||||
fn new() -> EventAccumulator {
|
||||
EventAccumulator {
|
||||
diff_base: None,
|
||||
|
|
Loading…
Reference in New Issue