mirror of https://github.com/helix-editor/helix
update imara-diff
parent
2b26d27416
commit
554df74a96
|
@ -748,7 +748,7 @@ dependencies = [
|
||||||
"gix-trace",
|
"gix-trace",
|
||||||
"gix-traverse",
|
"gix-traverse",
|
||||||
"gix-worktree",
|
"gix-worktree",
|
||||||
"imara-diff",
|
"imara-diff 0.1.8",
|
||||||
"thiserror 2.0.12",
|
"thiserror 2.0.12",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -1409,7 +1409,7 @@ dependencies = [
|
||||||
"helix-loader",
|
"helix-loader",
|
||||||
"helix-parsec",
|
"helix-parsec",
|
||||||
"helix-stdx",
|
"helix-stdx",
|
||||||
"imara-diff",
|
"imara-diff 0.2.0",
|
||||||
"indoc",
|
"indoc",
|
||||||
"log",
|
"log",
|
||||||
"nucleo",
|
"nucleo",
|
||||||
|
@ -1604,7 +1604,7 @@ dependencies = [
|
||||||
"gix",
|
"gix",
|
||||||
"helix-core",
|
"helix-core",
|
||||||
"helix-event",
|
"helix-event",
|
||||||
"imara-diff",
|
"imara-diff 0.2.0",
|
||||||
"log",
|
"log",
|
||||||
"parking_lot",
|
"parking_lot",
|
||||||
"tempfile",
|
"tempfile",
|
||||||
|
@ -1848,6 +1848,16 @@ dependencies = [
|
||||||
"hashbrown 0.15.3",
|
"hashbrown 0.15.3",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "imara-diff"
|
||||||
|
version = "0.2.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2f01d462f766df78ab820dd06f5eb700233c51f0f4c2e846520eaf4ba6aa5c5c"
|
||||||
|
dependencies = [
|
||||||
|
"hashbrown 0.15.3",
|
||||||
|
"memchr",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "indexmap"
|
name = "indexmap"
|
||||||
version = "2.9.0"
|
version = "2.9.0"
|
||||||
|
|
|
@ -46,8 +46,7 @@ serde = { version = "1.0", features = ["derive"] }
|
||||||
serde_json = "1.0"
|
serde_json = "1.0"
|
||||||
toml = "0.8"
|
toml = "0.8"
|
||||||
|
|
||||||
imara-diff = "0.1.8"
|
imara-diff = "0.2.0"
|
||||||
|
|
||||||
encoding_rs = "0.8"
|
encoding_rs = "0.8"
|
||||||
|
|
||||||
chrono = { version = "0.4", default-features = false, features = ["alloc", "std"] }
|
chrono = { version = "0.4", default-features = false, features = ["alloc", "std"] }
|
||||||
|
|
|
@ -1,51 +1,22 @@
|
||||||
use std::ops::Range;
|
use std::ops::Range;
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
|
|
||||||
use imara_diff::intern::InternedInput;
|
use imara_diff::{Algorithm, Diff, Hunk, IndentHeuristic, IndentLevel, InternedInput};
|
||||||
use imara_diff::Algorithm;
|
|
||||||
use ropey::RopeSlice;
|
use ropey::RopeSlice;
|
||||||
|
|
||||||
use crate::{ChangeSet, Rope, Tendril, Transaction};
|
use crate::{ChangeSet, Rope, Tendril, Transaction};
|
||||||
|
|
||||||
/// A `imara_diff::Sink` that builds a `ChangeSet` for a character diff of a hunk
|
struct ChangeSetBuilder<'a> {
|
||||||
struct CharChangeSetBuilder<'a> {
|
|
||||||
res: &'a mut ChangeSet,
|
|
||||||
hunk: &'a InternedInput<char>,
|
|
||||||
pos: u32,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl imara_diff::Sink for CharChangeSetBuilder<'_> {
|
|
||||||
type Out = ();
|
|
||||||
fn process_change(&mut self, before: Range<u32>, after: Range<u32>) {
|
|
||||||
self.res.retain((before.start - self.pos) as usize);
|
|
||||||
self.res.delete(before.len());
|
|
||||||
self.pos = before.end;
|
|
||||||
|
|
||||||
let res = self.hunk.after[after.start as usize..after.end as usize]
|
|
||||||
.iter()
|
|
||||||
.map(|&token| self.hunk.interner[token])
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
self.res.insert(res);
|
|
||||||
}
|
|
||||||
|
|
||||||
fn finish(self) -> Self::Out {
|
|
||||||
self.res.retain(self.hunk.before.len() - self.pos as usize);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
struct LineChangeSetBuilder<'a> {
|
|
||||||
res: ChangeSet,
|
res: ChangeSet,
|
||||||
after: RopeSlice<'a>,
|
after: RopeSlice<'a>,
|
||||||
file: &'a InternedInput<RopeSlice<'a>>,
|
file: &'a InternedInput<RopeSlice<'a>>,
|
||||||
current_hunk: InternedInput<char>,
|
current_hunk: InternedInput<char>,
|
||||||
|
char_diff: Diff,
|
||||||
pos: u32,
|
pos: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl imara_diff::Sink for LineChangeSetBuilder<'_> {
|
impl ChangeSetBuilder<'_> {
|
||||||
type Out = ChangeSet;
|
fn process_hunk(&mut self, before: Range<u32>, after: Range<u32>) {
|
||||||
|
|
||||||
fn process_change(&mut self, before: Range<u32>, after: Range<u32>) {
|
|
||||||
let len = self.file.before[self.pos as usize..before.start as usize]
|
let len = self.file.before[self.pos as usize..before.start as usize]
|
||||||
.iter()
|
.iter()
|
||||||
.map(|&it| self.file.interner[it].len_chars())
|
.map(|&it| self.file.interner[it].len_chars())
|
||||||
|
@ -109,25 +80,36 @@ impl imara_diff::Sink for LineChangeSetBuilder<'_> {
|
||||||
.flat_map(|&it| self.file.interner[it].chars());
|
.flat_map(|&it| self.file.interner[it].chars());
|
||||||
self.current_hunk.update_before(hunk_before);
|
self.current_hunk.update_before(hunk_before);
|
||||||
self.current_hunk.update_after(hunk_after);
|
self.current_hunk.update_after(hunk_after);
|
||||||
|
|
||||||
// the histogram heuristic does not work as well
|
// the histogram heuristic does not work as well
|
||||||
// for characters because the same characters often reoccur
|
// for characters because the same characters often reoccur
|
||||||
// use myer diff instead
|
// use myer diff instead
|
||||||
imara_diff::diff(
|
self.char_diff.compute_with(
|
||||||
Algorithm::Myers,
|
Algorithm::Myers,
|
||||||
&self.current_hunk,
|
&self.current_hunk.before,
|
||||||
CharChangeSetBuilder {
|
&self.current_hunk.after,
|
||||||
res: &mut self.res,
|
self.current_hunk.interner.num_tokens(),
|
||||||
hunk: &self.current_hunk,
|
|
||||||
pos: 0,
|
|
||||||
},
|
|
||||||
);
|
);
|
||||||
|
let mut pos = 0;
|
||||||
|
for Hunk { before, after } in self.char_diff.hunks() {
|
||||||
|
self.res.retain((before.start - pos) as usize);
|
||||||
|
self.res.delete(before.len());
|
||||||
|
pos = before.end;
|
||||||
|
|
||||||
|
let res = self.current_hunk.after[after.start as usize..after.end as usize]
|
||||||
|
.iter()
|
||||||
|
.map(|&token| self.current_hunk.interner[token])
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
self.res.insert(res);
|
||||||
|
}
|
||||||
|
self.res
|
||||||
|
.retain(self.current_hunk.before.len() - pos as usize);
|
||||||
|
// reuse allocations
|
||||||
self.current_hunk.clear();
|
self.current_hunk.clear();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn finish(mut self) -> Self::Out {
|
fn finish(mut self) -> ChangeSet {
|
||||||
let len = self.file.before[self.pos as usize..]
|
let len = self.file.before[self.pos as usize..]
|
||||||
.iter()
|
.iter()
|
||||||
.map(|&it| self.file.interner[it].len_chars())
|
.map(|&it| self.file.interner[it].len_chars())
|
||||||
|
@ -140,7 +122,7 @@ impl imara_diff::Sink for LineChangeSetBuilder<'_> {
|
||||||
|
|
||||||
struct RopeLines<'a>(RopeSlice<'a>);
|
struct RopeLines<'a>(RopeSlice<'a>);
|
||||||
|
|
||||||
impl<'a> imara_diff::intern::TokenSource for RopeLines<'a> {
|
impl<'a> imara_diff::TokenSource for RopeLines<'a> {
|
||||||
type Token = RopeSlice<'a>;
|
type Token = RopeSlice<'a>;
|
||||||
type Tokenizer = ropey::iter::Lines<'a>;
|
type Tokenizer = ropey::iter::Lines<'a>;
|
||||||
|
|
||||||
|
@ -161,15 +143,23 @@ pub fn compare_ropes(before: &Rope, after: &Rope) -> Transaction {
|
||||||
let res = ChangeSet::with_capacity(32);
|
let res = ChangeSet::with_capacity(32);
|
||||||
let after = after.slice(..);
|
let after = after.slice(..);
|
||||||
let file = InternedInput::new(RopeLines(before.slice(..)), RopeLines(after));
|
let file = InternedInput::new(RopeLines(before.slice(..)), RopeLines(after));
|
||||||
let builder = LineChangeSetBuilder {
|
let mut builder = ChangeSetBuilder {
|
||||||
res,
|
res,
|
||||||
file: &file,
|
file: &file,
|
||||||
after,
|
after,
|
||||||
pos: 0,
|
pos: 0,
|
||||||
current_hunk: InternedInput::default(),
|
current_hunk: InternedInput::default(),
|
||||||
|
char_diff: Diff::default(),
|
||||||
};
|
};
|
||||||
|
let mut diff = Diff::compute(Algorithm::Histogram, &file);
|
||||||
let res = imara_diff::diff(Algorithm::Histogram, &file, builder).into();
|
diff.postprocess_with_heuristic(
|
||||||
|
&file,
|
||||||
|
IndentHeuristic::new(|token| IndentLevel::for_ascii_line(file.interner[token].bytes(), 4)),
|
||||||
|
);
|
||||||
|
for hunk in diff.hunks() {
|
||||||
|
builder.process_hunk(hunk.before, hunk.after)
|
||||||
|
}
|
||||||
|
let res = builder.finish().into();
|
||||||
|
|
||||||
log::debug!(
|
log::debug!(
|
||||||
"rope diff took {}s",
|
"rope diff took {}s",
|
||||||
|
|
|
@ -20,7 +20,7 @@ parking_lot.workspace = true
|
||||||
arc-swap = { version = "1.7.1" }
|
arc-swap = { version = "1.7.1" }
|
||||||
|
|
||||||
gix = { version = "0.72.1", features = ["attributes", "status"], default-features = false, optional = true }
|
gix = { version = "0.72.1", features = ["attributes", "status"], default-features = false, optional = true }
|
||||||
imara-diff = "0.1.8"
|
imara-diff = "0.2.0"
|
||||||
anyhow = "1"
|
anyhow = "1"
|
||||||
|
|
||||||
log = "0.4"
|
log = "0.4"
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
use std::iter::Peekable;
|
use std::iter::Peekable;
|
||||||
use std::ops::Range;
|
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use helix_core::Rope;
|
use helix_core::Rope;
|
||||||
|
@ -12,6 +11,8 @@ use tokio::time::Instant;
|
||||||
|
|
||||||
use crate::diff::worker::DiffWorker;
|
use crate::diff::worker::DiffWorker;
|
||||||
|
|
||||||
|
pub use imara_diff::Hunk;
|
||||||
|
|
||||||
mod line_cache;
|
mod line_cache;
|
||||||
mod worker;
|
mod worker;
|
||||||
|
|
||||||
|
@ -52,8 +53,8 @@ impl DiffHandle {
|
||||||
let worker = DiffWorker {
|
let worker = DiffWorker {
|
||||||
channel: receiver,
|
channel: receiver,
|
||||||
diff: diff.clone(),
|
diff: diff.clone(),
|
||||||
new_hunks: Vec::default(),
|
|
||||||
diff_finished_notify: Arc::default(),
|
diff_finished_notify: Arc::default(),
|
||||||
|
diff_alloc: imara_diff::Diff::default(),
|
||||||
};
|
};
|
||||||
let handle = tokio::spawn(worker.run(diff_base, doc));
|
let handle = tokio::spawn(worker.run(diff_base, doc));
|
||||||
let differ = DiffHandle {
|
let differ = DiffHandle {
|
||||||
|
@ -118,48 +119,6 @@ const MAX_DIFF_LINES: usize = 64 * u16::MAX as usize;
|
||||||
// cap average line length to 128 for files with MAX_DIFF_LINES
|
// cap average line length to 128 for files with MAX_DIFF_LINES
|
||||||
const MAX_DIFF_BYTES: usize = MAX_DIFF_LINES * 128;
|
const MAX_DIFF_BYTES: usize = MAX_DIFF_LINES * 128;
|
||||||
|
|
||||||
/// A single change in a file potentially spanning multiple lines
|
|
||||||
/// Hunks produced by the differs are always ordered by their position
|
|
||||||
/// in the file and non-overlapping.
|
|
||||||
/// Specifically for any two hunks `x` and `y` the following properties hold:
|
|
||||||
///
|
|
||||||
/// ``` no_compile
|
|
||||||
/// assert!(x.before.end <= y.before.start);
|
|
||||||
/// assert!(x.after.end <= y.after.start);
|
|
||||||
/// ```
|
|
||||||
#[derive(PartialEq, Eq, Clone, Debug)]
|
|
||||||
pub struct Hunk {
|
|
||||||
pub before: Range<u32>,
|
|
||||||
pub after: Range<u32>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Hunk {
|
|
||||||
/// Can be used instead of `Option::None` for better performance
|
|
||||||
/// because lines larger then `i32::MAX` are not supported by `imara-diff` anyways.
|
|
||||||
/// Has some nice properties where it usually is not necessary to check for `None` separately:
|
|
||||||
/// Empty ranges fail contains checks and also fails smaller then checks.
|
|
||||||
pub const NONE: Hunk = Hunk {
|
|
||||||
before: u32::MAX..u32::MAX,
|
|
||||||
after: u32::MAX..u32::MAX,
|
|
||||||
};
|
|
||||||
|
|
||||||
/// Inverts a change so that `before`
|
|
||||||
pub fn invert(&self) -> Hunk {
|
|
||||||
Hunk {
|
|
||||||
before: self.after.clone(),
|
|
||||||
after: self.before.clone(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn is_pure_insertion(&self) -> bool {
|
|
||||||
self.before.is_empty()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn is_pure_removal(&self) -> bool {
|
|
||||||
self.after.is_empty()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// A list of changes in a file sorted in ascending
|
/// A list of changes in a file sorted in ascending
|
||||||
/// non-overlapping order
|
/// non-overlapping order
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
|
|
|
@ -13,7 +13,7 @@
|
||||||
use std::mem::transmute;
|
use std::mem::transmute;
|
||||||
|
|
||||||
use helix_core::{Rope, RopeSlice};
|
use helix_core::{Rope, RopeSlice};
|
||||||
use imara_diff::intern::{InternedInput, Interner};
|
use imara_diff::{InternedInput, Interner};
|
||||||
|
|
||||||
use super::{MAX_DIFF_BYTES, MAX_DIFF_LINES};
|
use super::{MAX_DIFF_BYTES, MAX_DIFF_LINES};
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,7 @@
|
||||||
use std::mem::swap;
|
|
||||||
use std::ops::Range;
|
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use helix_core::{Rope, RopeSlice};
|
use helix_core::{Rope, RopeSlice};
|
||||||
use imara_diff::intern::InternedInput;
|
use imara_diff::{IndentHeuristic, IndentLevel, InternedInput};
|
||||||
use parking_lot::RwLock;
|
use parking_lot::RwLock;
|
||||||
use tokio::sync::mpsc::UnboundedReceiver;
|
use tokio::sync::mpsc::UnboundedReceiver;
|
||||||
use tokio::sync::Notify;
|
use tokio::sync::Notify;
|
||||||
|
@ -14,7 +12,6 @@ use crate::diff::{
|
||||||
};
|
};
|
||||||
|
|
||||||
use super::line_cache::InternedRopeLines;
|
use super::line_cache::InternedRopeLines;
|
||||||
use super::Hunk;
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test;
|
mod test;
|
||||||
|
@ -22,8 +19,8 @@ mod test;
|
||||||
pub(super) struct DiffWorker {
|
pub(super) struct DiffWorker {
|
||||||
pub channel: UnboundedReceiver<Event>,
|
pub channel: UnboundedReceiver<Event>,
|
||||||
pub diff: Arc<RwLock<DiffInner>>,
|
pub diff: Arc<RwLock<DiffInner>>,
|
||||||
pub new_hunks: Vec<Hunk>,
|
|
||||||
pub diff_finished_notify: Arc<Notify>,
|
pub diff_finished_notify: Arc<Notify>,
|
||||||
|
pub diff_alloc: imara_diff::Diff,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl DiffWorker {
|
impl DiffWorker {
|
||||||
|
@ -76,15 +73,26 @@ impl DiffWorker {
|
||||||
let mut diff = self.diff.write();
|
let mut diff = self.diff.write();
|
||||||
diff.diff_base = diff_base;
|
diff.diff_base = diff_base;
|
||||||
diff.doc = doc;
|
diff.doc = doc;
|
||||||
swap(&mut diff.hunks, &mut self.new_hunks);
|
diff.hunks.clear();
|
||||||
|
diff.hunks.extend(self.diff_alloc.hunks());
|
||||||
|
drop(diff);
|
||||||
self.diff_finished_notify.notify_waiters();
|
self.diff_finished_notify.notify_waiters();
|
||||||
self.new_hunks.clear();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn perform_diff(&mut self, input: &InternedInput<RopeSlice>) {
|
fn perform_diff(&mut self, input: &InternedInput<RopeSlice>) {
|
||||||
imara_diff::diff(ALGORITHM, input, |before: Range<u32>, after: Range<u32>| {
|
self.diff_alloc.compute_with(
|
||||||
self.new_hunks.push(Hunk { before, after })
|
ALGORITHM,
|
||||||
})
|
&input.before,
|
||||||
|
&input.after,
|
||||||
|
input.interner.num_tokens(),
|
||||||
|
);
|
||||||
|
self.diff_alloc.postprocess_with(
|
||||||
|
&input.before,
|
||||||
|
&input.after,
|
||||||
|
IndentHeuristic::new(|token| {
|
||||||
|
IndentLevel::for_ascii_line(input.interner[token].bytes(), 4)
|
||||||
|
}),
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -94,7 +102,7 @@ struct EventAccumulator {
|
||||||
render_lock: Option<RenderLock>,
|
render_lock: Option<RenderLock>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl EventAccumulator {
|
impl<'a> EventAccumulator {
|
||||||
fn new() -> EventAccumulator {
|
fn new() -> EventAccumulator {
|
||||||
EventAccumulator {
|
EventAccumulator {
|
||||||
diff_base: None,
|
diff_base: None,
|
||||||
|
|
Loading…
Reference in New Issue