From f4863ec69cabbe0eda815c17f7d787365c8738d2 Mon Sep 17 00:00:00 2001 From: Michael Davis Date: Tue, 25 Mar 2025 12:43:53 -0400 Subject: [PATCH] Complete words from open buffers --- book/src/editor.md | 19 + book/src/languages.md | 1 + helix-core/src/completion.rs | 1 + helix-core/src/syntax/config.rs | 10 + helix-core/src/transaction.rs | 11 + helix-term/src/handlers.rs | 4 +- helix-term/src/handlers/completion.rs | 4 +- helix-term/src/handlers/completion/request.rs | 9 +- helix-term/src/handlers/completion/word.rs | 134 +++++ helix-view/src/document.rs | 6 + helix-view/src/editor.rs | 22 +- helix-view/src/handlers.rs | 7 + helix-view/src/handlers/word_index.rs | 477 ++++++++++++++++++ 13 files changed, 701 insertions(+), 4 deletions(-) create mode 100644 helix-term/src/handlers/completion/word.rs create mode 100644 helix-view/src/handlers/word_index.rs diff --git a/book/src/editor.md b/book/src/editor.md index b79792058..ce04d0a01 100644 --- a/book/src/editor.md +++ b/book/src/editor.md @@ -19,6 +19,7 @@ - [`[editor.soft-wrap]` Section](#editorsoft-wrap-section) - [`[editor.smart-tab]` Section](#editorsmart-tab-section) - [`[editor.inline-diagnostics]` Section](#editorinline-diagnostics-section) +- [`[editor.word-completion]` Section](#editorword-completion-section) ### `[editor]` Section @@ -474,3 +475,21 @@ end-of-line-diagnostics = "hint" [editor.inline-diagnostics] cursor-line = "warning" # show warnings and errors on the cursorline inline ``` + +### `[editor.word-completion]` Section + +Options for controlling completion of words from open buffers. + +| Key | Description | Default | +| --- | --- | --- | +| `enable` | Whether word completion is enabled | `true` | +| `trigger-length` | Number of word characters to type before triggering completion | `7` | + +Example: + +```toml +[editor.word-completion] +enable = true +# Set the trigger length lower so that words are completed more often +trigger-length = 4 +``` diff --git a/book/src/languages.md b/book/src/languages.md index ea18e9c39..cf2f1c23e 100644 --- a/book/src/languages.md +++ b/book/src/languages.md @@ -71,6 +71,7 @@ These configuration keys are available: | `text-width` | Maximum line length. Used for the `:reflow` command and soft-wrapping if `soft-wrap.wrap-at-text-width` is set, defaults to `editor.text-width` | | `rulers` | Overrides the `editor.rulers` config key for the language. | | `path-completion` | Overrides the `editor.path-completion` config key for the language. | +| `word-completion` | Overrides the [`editor.word-completion`](./editor.md#editorword-completion-section) configuration for the language. | | `workspace-lsp-roots` | Directories relative to the workspace root that are treated as LSP roots. Should only be set in `.helix/config.toml`. Overwrites the setting of the same name in `config.toml` if set. | | `persistent-diagnostic-sources` | An array of LSP diagnostic sources assumed unchanged when the language server resends the same set of diagnostics. Helix can track the position for these diagnostics internally instead. Useful for diagnostics that are recomputed on save. diff --git a/helix-core/src/completion.rs b/helix-core/src/completion.rs index 29c86b738..11b49cfc9 100644 --- a/helix-core/src/completion.rs +++ b/helix-core/src/completion.rs @@ -16,6 +16,7 @@ pub struct CompletionItem { pub enum CompletionProvider { Lsp(LanguageServerId), Path, + Word, } impl From for CompletionProvider { diff --git a/helix-core/src/syntax/config.rs b/helix-core/src/syntax/config.rs index 432611bb0..ddff26f17 100644 --- a/helix-core/src/syntax/config.rs +++ b/helix-core/src/syntax/config.rs @@ -7,6 +7,7 @@ use serde::{ser::SerializeSeq as _, Deserialize, Serialize}; use std::{ collections::{HashMap, HashSet}, fmt::{self, Display}, + num::NonZeroU8, path::PathBuf, str::FromStr, }; @@ -60,6 +61,8 @@ pub struct LanguageConfiguration { /// If set, overrides `editor.path-completion`. pub path_completion: Option, + /// If set, overrides `editor.word-completion`. + pub word_completion: Option, #[serde(default)] pub diagnostic_severity: Severity, @@ -572,6 +575,13 @@ pub struct SoftWrap { pub wrap_at_text_width: Option, } +#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)] +#[serde(default, rename_all = "kebab-case", deny_unknown_fields)] +pub struct WordCompletion { + pub enable: Option, + pub trigger_length: Option, +} + fn deserialize_regex<'de, D>(deserializer: D) -> Result, D::Error> where D: serde::Deserializer<'de>, diff --git a/helix-core/src/transaction.rs b/helix-core/src/transaction.rs index 450b47365..83945f765 100644 --- a/helix-core/src/transaction.rs +++ b/helix-core/src/transaction.rs @@ -19,6 +19,17 @@ pub enum Operation { Insert(Tendril), } +impl Operation { + /// The number of characters affected by the operation. + #[allow(clippy::len_without_is_empty)] + pub fn len(&self) -> usize { + match self { + Self::Retain(n) | Self::Delete(n) => *n, + Self::Insert(s) => s.chars().count(), + } + } +} + #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub enum Assoc { Before, diff --git a/helix-term/src/handlers.rs b/helix-term/src/handlers.rs index c7d71526c..9c46a6503 100644 --- a/helix-term/src/handlers.rs +++ b/helix-term/src/handlers.rs @@ -8,7 +8,7 @@ use crate::events; use crate::handlers::auto_save::AutoSaveHandler; use crate::handlers::signature_help::SignatureHelpHandler; -pub use helix_view::handlers::Handlers; +pub use helix_view::handlers::{word_index, Handlers}; use self::document_colors::DocumentColorsHandler; @@ -26,12 +26,14 @@ pub fn setup(config: Arc>) -> Handlers { let signature_hints = SignatureHelpHandler::new().spawn(); let auto_save = AutoSaveHandler::new().spawn(); let document_colors = DocumentColorsHandler::default().spawn(); + let word_index = word_index::Handler::spawn(); let handlers = Handlers { completions: helix_view::handlers::completion::CompletionHandler::new(event_tx), signature_hints, auto_save, document_colors, + word_index, }; helix_view::handlers::register_hooks(&handlers); diff --git a/helix-term/src/handlers/completion.rs b/helix-term/src/handlers/completion.rs index 5017399bd..22bb0ce08 100644 --- a/helix-term/src/handlers/completion.rs +++ b/helix-term/src/handlers/completion.rs @@ -30,6 +30,7 @@ mod item; mod path; mod request; mod resolve; +mod word; async fn handle_response( requests: &mut JoinSet, @@ -82,7 +83,7 @@ async fn replace_completions( fn show_completion( editor: &mut Editor, compositor: &mut Compositor, - items: Vec, + mut items: Vec, context: HashMap, trigger: Trigger, ) { @@ -101,6 +102,7 @@ fn show_completion( if ui.completion.is_some() { return; } + word::retain_valid_completions(trigger, doc, view.id, &mut items); editor.handlers.completions.active_completions = context; let completion_area = ui.set_completion(editor, items, trigger.pos, size); diff --git a/helix-term/src/handlers/completion/request.rs b/helix-term/src/handlers/completion/request.rs index 51a3129a8..29cd8e42c 100644 --- a/helix-term/src/handlers/completion/request.rs +++ b/helix-term/src/handlers/completion/request.rs @@ -28,6 +28,8 @@ use crate::job::{dispatch, dispatch_blocking}; use crate::ui; use crate::ui::editor::InsertEvent; +use super::word; + #[derive(Debug, PartialEq, Eq, Clone, Copy)] pub(super) enum TriggerKind { Auto, @@ -242,10 +244,15 @@ fn request_completions( doc.selection(view.id).clone(), doc, handle.clone(), - savepoint, + savepoint.clone(), ) { requests.spawn_blocking(path_completion_request); } + if let Some(word_completion_request) = + word::completion(editor, trigger, handle.clone(), savepoint) + { + requests.spawn_blocking(word_completion_request); + } let ui = compositor.find::().unwrap(); ui.last_insert.1.push(InsertEvent::RequestCompletion); diff --git a/helix-term/src/handlers/completion/word.rs b/helix-term/src/handlers/completion/word.rs new file mode 100644 index 000000000..aa204bf8f --- /dev/null +++ b/helix-term/src/handlers/completion/word.rs @@ -0,0 +1,134 @@ +use std::{borrow::Cow, sync::Arc}; + +use helix_core::{ + self as core, chars::char_is_word, completion::CompletionProvider, movement, Transaction, +}; +use helix_event::TaskHandle; +use helix_stdx::rope::RopeSliceExt as _; +use helix_view::{ + document::SavePoint, handlers::completion::ResponseContext, Document, Editor, ViewId, +}; + +use super::{request::TriggerKind, CompletionItem, CompletionItems, CompletionResponse, Trigger}; + +const COMPLETION_KIND: &str = "word"; + +pub(super) fn completion( + editor: &Editor, + trigger: Trigger, + handle: TaskHandle, + savepoint: Arc, +) -> Option CompletionResponse> { + if !doc!(editor).word_completion_enabled() { + return None; + } + let config = editor.config().word_completion; + let doc_config = doc!(editor) + .language_config() + .and_then(|config| config.word_completion); + let trigger_length = doc_config + .and_then(|c| c.trigger_length) + .unwrap_or(config.trigger_length) + .get() as usize; + + let (view, doc) = current_ref!(editor); + let rope = doc.text().clone(); + let word_index = editor.handlers.word_index().clone(); + let text = doc.text().slice(..); + let selection = doc.selection(view.id).clone(); + let pos = selection.primary().cursor(text); + + let cursor = movement::move_prev_word_start(text, core::Range::point(pos), 1); + if cursor.head == pos { + return None; + } + if trigger.kind != TriggerKind::Manual + && text + .slice(cursor.head..) + .graphemes() + .take(trigger_length) + .take_while(|g| g.chars().all(char_is_word)) + .count() + != trigger_length + { + return None; + } + + let typed_word_range = cursor.head..pos; + let typed_word = text.slice(typed_word_range.clone()); + let edit_diff = if typed_word + .char(typed_word.len_chars().saturating_sub(1)) + .is_whitespace() + { + 0 + } else { + typed_word.len_chars() + }; + + if handle.is_canceled() { + return None; + } + + let future = move || { + let text = rope.slice(..); + let typed_word: Cow<_> = text.slice(typed_word_range).into(); + let items = word_index + .matches(&typed_word) + .into_iter() + .filter(|word| word.as_str() != typed_word.as_ref()) + .map(|word| { + let transaction = Transaction::change_by_selection(&rope, &selection, |range| { + let cursor = range.cursor(text); + (cursor - edit_diff, cursor, Some((&word).into())) + }); + CompletionItem::Other(core::CompletionItem { + transaction, + label: word.into(), + kind: Cow::Borrowed(COMPLETION_KIND), + documentation: None, + provider: CompletionProvider::Word, + }) + }) + .collect(); + + CompletionResponse { + items: CompletionItems::Other(items), + provider: CompletionProvider::Word, + context: ResponseContext { + is_incomplete: false, + priority: 0, + savepoint, + }, + } + }; + + Some(future) +} + +pub(super) fn retain_valid_completions( + trigger: Trigger, + doc: &Document, + view_id: ViewId, + items: &mut Vec, +) { + if trigger.kind == TriggerKind::Manual { + return; + } + + let text = doc.text().slice(..); + let cursor = doc.selection(view_id).primary().cursor(text); + if text + .get_char(cursor.saturating_sub(1)) + .is_some_and(|ch| ch.is_whitespace()) + { + items.retain(|item| { + !matches!( + item, + CompletionItem::Other(core::CompletionItem { + provider: CompletionProvider::Word, + .. + }) + ) + }); + } +} diff --git a/helix-view/src/document.rs b/helix-view/src/document.rs index fb89e2e0c..18c68948d 100644 --- a/helix-view/src/document.rs +++ b/helix-view/src/document.rs @@ -1809,6 +1809,12 @@ impl Document { self.version } + pub fn word_completion_enabled(&self) -> bool { + self.language_config() + .and_then(|lang_config| lang_config.word_completion.and_then(|c| c.enable)) + .unwrap_or_else(|| self.config.load().word_completion.enable) + } + pub fn path_completion_enabled(&self) -> bool { self.language_config() .and_then(|lang_config| lang_config.path_completion) diff --git a/helix-view/src/editor.rs b/helix-view/src/editor.rs index 9aa073fcf..6441f236f 100644 --- a/helix-view/src/editor.rs +++ b/helix-view/src/editor.rs @@ -29,7 +29,7 @@ use std::{ collections::{BTreeMap, HashMap, HashSet}, fs, io::{self, stdin}, - num::NonZeroUsize, + num::{NonZeroU8, NonZeroUsize}, path::{Path, PathBuf}, pin::Pin, sync::Arc, @@ -279,6 +279,9 @@ pub struct Config { /// either absolute or relative to the current opened document or current working directory (if the buffer is not yet saved). /// Defaults to true. pub path_completion: bool, + /// Configures completion of words from open buffers. + /// Defaults to enabled with a trigger length of 7. + pub word_completion: WordCompletion, /// Automatic formatting on save. Defaults to true. pub auto_format: bool, /// Default register used for yank/paste. Defaults to '"' @@ -964,6 +967,22 @@ pub enum PopupBorderConfig { Menu, } +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(default, rename_all = "kebab-case", deny_unknown_fields)] +pub struct WordCompletion { + pub enable: bool, + pub trigger_length: NonZeroU8, +} + +impl Default for WordCompletion { + fn default() -> Self { + Self { + enable: true, + trigger_length: NonZeroU8::new(7).unwrap(), + } + } +} + impl Default for Config { fn default() -> Self { Self { @@ -983,6 +1002,7 @@ impl Default for Config { auto_pairs: AutoPairConfig::default(), auto_completion: true, path_completion: true, + word_completion: WordCompletion::default(), auto_format: true, default_yank_register: '"', auto_save: AutoSave::default(), diff --git a/helix-view/src/handlers.rs b/helix-view/src/handlers.rs index 258ed89e5..6aba17d6d 100644 --- a/helix-view/src/handlers.rs +++ b/helix-view/src/handlers.rs @@ -9,6 +9,7 @@ pub mod completion; pub mod dap; pub mod diagnostics; pub mod lsp; +pub mod word_index; #[derive(Debug)] pub enum AutoSaveEvent { @@ -22,6 +23,7 @@ pub struct Handlers { pub signature_hints: Sender, pub auto_save: Sender, pub document_colors: Sender, + pub word_index: word_index::Handler, } impl Handlers { @@ -46,8 +48,13 @@ impl Handlers { }; send_blocking(&self.signature_hints, event) } + + pub fn word_index(&self) -> &word_index::WordIndex { + &self.word_index.index + } } pub fn register_hooks(handlers: &Handlers) { lsp::register_hooks(handlers); + word_index::register_hooks(handlers); } diff --git a/helix-view/src/handlers/word_index.rs b/helix-view/src/handlers/word_index.rs new file mode 100644 index 000000000..f84e77e28 --- /dev/null +++ b/helix-view/src/handlers/word_index.rs @@ -0,0 +1,477 @@ +//! Indexing of words from open buffers. +//! +//! This provides an eventually consistent set of words used in any open buffers. This set is +//! later used for lexical completion. + +use std::{borrow::Cow, collections::HashMap, iter, mem, sync::Arc, time::Duration}; + +use helix_core::{ + chars::char_is_word, fuzzy::fuzzy_match, movement, ChangeSet, Range, Rope, RopeSlice, +}; +use helix_event::{register_hook, AsyncHook}; +use helix_stdx::rope::RopeSliceExt as _; +use parking_lot::RwLock; +use tokio::{sync::mpsc, time::Instant}; + +use crate::{ + events::{DocumentDidChange, DocumentDidClose, DocumentDidOpen}, + DocumentId, +}; + +use super::Handlers; + +#[derive(Debug)] +struct Change { + old_text: Rope, + text: Rope, + changes: ChangeSet, +} + +#[derive(Debug)] +enum Event { + Insert(Rope), + Update(DocumentId, Change), + Delete(DocumentId, Rope), +} + +#[derive(Debug)] +pub struct Handler { + pub(super) index: WordIndex, + /// A sender into an async hook which debounces updates to the index. + hook: mpsc::Sender, + /// A sender to a tokio task which coordinates the indexing of documents. + /// + /// See [WordIndex::run]. A supervisor-like task is in charge of spawning tasks to update the + /// index. This ensures that consecutive edits to a document trigger the correct order of + /// insertions and deletions into the word set. + coordinator: mpsc::UnboundedSender, +} + +impl Handler { + pub fn spawn() -> Self { + let index = WordIndex::default(); + let (tx, rx) = mpsc::unbounded_channel(); + tokio::spawn(index.clone().run(rx)); + Self { + hook: Hook { + changes: HashMap::default(), + coordinator: tx.clone(), + } + .spawn(), + index, + coordinator: tx, + } + } +} + +#[derive(Debug)] +struct Hook { + changes: HashMap, + coordinator: mpsc::UnboundedSender, +} + +const DEBOUNCE: Duration = Duration::from_secs(1); + +impl AsyncHook for Hook { + type Event = Event; + + fn handle_event(&mut self, event: Self::Event, timeout: Option) -> Option { + match event { + Event::Insert(_) => unreachable!("inserts are sent to the worker directly"), + Event::Update(doc, change) => { + if let Some(pending_change) = self.changes.get_mut(&doc) { + // If there is already a change waiting for this document, merge the two + // changes together by composing the changesets and saving the new `text`. + pending_change.changes = + mem::take(&mut pending_change.changes).compose(change.changes); + pending_change.text = change.text; + Some(Instant::now() + DEBOUNCE) + } else if !is_changeset_significant(&change.changes) { + // If the changeset is fairly large, debounce before updating the index. + self.changes.insert(doc, change); + Some(Instant::now() + DEBOUNCE) + } else { + // Otherwise if the change is small, queue the update to the index immediately. + self.coordinator.send(Event::Update(doc, change)).unwrap(); + timeout + } + } + Event::Delete(doc, text) => { + // If there are pending changes that haven't been indexed since the last debounce, + // forget them and delete the old text. + if let Some(change) = self.changes.remove(&doc) { + self.coordinator + .send(Event::Delete(doc, change.old_text)) + .unwrap(); + } else { + self.coordinator.send(Event::Delete(doc, text)).unwrap(); + } + timeout + } + } + } + + fn finish_debounce(&mut self) { + for (doc, change) in self.changes.drain() { + self.coordinator.send(Event::Update(doc, change)).unwrap(); + } + } +} + +/// Minimum number of grapheme clusters required to include a word in the index +const MIN_WORD_GRAPHEMES: usize = 3; +/// Maximum word length allowed (in chars) +const MAX_WORD_LEN: usize = 50; + +// TODO: choose or create a suitable small string type. +type Word = String; + +#[derive(Debug, Default)] +struct WordIndexInner { + /// Reference counted storage for words. + /// + /// Words are very likely to be reused many times. Instead of storing duplicates we keep a + /// reference count of times a word is used. When the reference count drops to zero the word + /// is removed from the index. + words: HashMap, +} + +impl WordIndexInner { + fn words(&self) -> impl Iterator { + self.words.keys() + } + + fn insert(&mut self, word: RopeSlice) { + let word: Cow = word.into(); + if let Some(rc) = self.words.get_mut(word.as_ref()) { + *rc = rc.saturating_add(1); + } else { + self.words.insert(word.into_owned(), 1); + } + } + + fn remove(&mut self, word: RopeSlice) { + let word: Cow = word.into(); + match self.words.get_mut(word.as_ref()) { + Some(1) => { + self.words.remove(word.as_ref()); + } + Some(n) => *n -= 1, + None => (), + } + } +} + +#[derive(Debug, Default, Clone)] +pub struct WordIndex { + inner: Arc>, +} + +impl WordIndex { + pub fn matches(&self, pattern: &str) -> Vec { + let inner = self.inner.read(); + let mut matches = fuzzy_match(pattern, inner.words(), false); + matches.sort_unstable_by_key(|(_, score)| *score); + matches.into_iter().map(|(word, _)| word.clone()).collect() + } + + fn add_document(&self, text: &Rope) { + let words: Vec<_> = words(text.slice(..)).collect(); + let mut inner = self.inner.write(); + for word in words { + inner.insert(word); + } + } + + fn update_document(&self, old_text: &Rope, text: &Rope, changes: &ChangeSet) { + let mut inserted = Vec::new(); + let mut removed = Vec::new(); + for (old_window, new_window) in changed_windows(old_text.slice(..), text.slice(..), changes) + { + inserted.extend(words(new_window)); + removed.extend(words(old_window)); + } + + let mut inner = self.inner.write(); + for word in inserted { + inner.insert(word); + } + for word in removed { + inner.remove(word); + } + } + + fn remove_document(&self, text: &Rope) { + let words: Vec<_> = words(text.slice(..)).collect(); + let mut inner = self.inner.write(); + for word in words { + inner.remove(word); + } + } + + /// Coordinate the indexing of documents. + /// + /// This task wraps a MPSC queue and spawns blocking tasks which update the index. Updates + /// are applied one-by-one to ensure that changes to the index are **serialized**: + /// updates to each document must be applied in-order. + async fn run(self, mut events: mpsc::UnboundedReceiver) { + while let Some(event) = events.recv().await { + let this = self.clone(); + tokio::task::spawn_blocking(move || match event { + Event::Insert(text) => { + this.add_document(&text); + } + Event::Update( + _doc, + Change { + old_text, + text, + changes, + .. + }, + ) => { + this.update_document(&old_text, &text, &changes); + } + Event::Delete(_doc, text) => { + this.remove_document(&text); + } + }) + .await + .unwrap(); + } + } +} + +fn words(text: RopeSlice) -> impl Iterator { + let mut cursor = Range::point(0); + if text + .get_char(cursor.anchor) + .is_some_and(|ch| !ch.is_whitespace()) + { + let cursor_word_end = movement::move_next_word_end(text, cursor, 1); + if cursor_word_end.anchor == 0 { + cursor = cursor_word_end; + } + } + + iter::from_fn(move || { + while cursor.head <= text.len_chars() { + let mut word = None; + if text + .slice(..cursor.head) + .graphemes_rev() + .take(MIN_WORD_GRAPHEMES) + .take_while(|g| g.chars().all(char_is_word)) + .count() + == MIN_WORD_GRAPHEMES + { + cursor.anchor += text + .chars_at(cursor.anchor) + .take_while(|&c| !char_is_word(c)) + .count(); + let slice = cursor.slice(text); + if slice.len_chars() <= MAX_WORD_LEN { + word = Some(slice); + } + } + let head = cursor.head; + cursor = movement::move_next_word_end(text, cursor, 1); + if cursor.head == head { + cursor.head = usize::MAX; + } + if word.is_some() { + return word; + } + } + None + }) +} + +/// Finds areas of the old and new texts around each operation in `changes`. +/// +/// The window is larger than the changed area and can encompass multiple insert/delete operations +/// if they are grouped closely together. +/// +/// The ranges of the old and new text should usually be of different sizes. For example a +/// deletion of "foo" surrounded by large retain sections would give a longer window into the +/// `old_text` and shorter window of `new_text`. Vice-versa for an insertion. A full replacement +/// of a word though would give two slices of the same size. +fn changed_windows<'a>( + old_text: RopeSlice<'a>, + new_text: RopeSlice<'a>, + changes: &'a ChangeSet, +) -> impl Iterator, RopeSlice<'a>)> { + use helix_core::Operation::*; + + let mut operations = changes.changes().iter().peekable(); + let mut old_pos = 0; + let mut new_pos = 0; + iter::from_fn(move || loop { + let operation = operations.next()?; + let old_start = old_pos; + let new_start = new_pos; + let len = operation.len(); + match operation { + Retain(_) => { + old_pos += len; + new_pos += len; + continue; + } + Insert(_) => new_pos += len, + Delete(_) => old_pos += len, + } + + // Scan ahead until a `Retain` is found which would end a window. + while let Some(o) = operations.next_if(|op| !matches!(op, Retain(n) if *n > MAX_WORD_LEN)) { + let len = o.len(); + match o { + Retain(_) => { + old_pos += len; + new_pos += len; + } + Delete(_) => old_pos += len, + Insert(_) => new_pos += len, + } + } + + let old_window = old_start.saturating_sub(MAX_WORD_LEN) + ..(old_pos + MAX_WORD_LEN).min(old_text.len_chars()); + let new_window = new_start.saturating_sub(MAX_WORD_LEN) + ..(new_pos + MAX_WORD_LEN).min(new_text.len_chars()); + + return Some((old_text.slice(old_window), new_text.slice(new_window))); + }) +} + +/// Estimates whether a changeset is significant or small. +fn is_changeset_significant(changes: &ChangeSet) -> bool { + use helix_core::Operation::*; + + let mut diff = 0; + for operation in changes.changes() { + match operation { + Retain(_) => continue, + Delete(_) | Insert(_) => diff += operation.len(), + } + } + + // This is arbitrary and could be tuned further: + diff > 1_000 +} + +pub(crate) fn register_hooks(handlers: &Handlers) { + let coordinator = handlers.word_index.coordinator.clone(); + register_hook!(move |event: &mut DocumentDidOpen<'_>| { + let doc = doc!(event.editor, &event.doc); + if doc.word_completion_enabled() { + coordinator.send(Event::Insert(doc.text().clone())).unwrap(); + } + Ok(()) + }); + + let tx = handlers.word_index.hook.clone(); + register_hook!(move |event: &mut DocumentDidChange<'_>| { + if !event.ghost_transaction && event.doc.word_completion_enabled() { + helix_event::send_blocking( + &tx, + Event::Update( + event.doc.id(), + Change { + old_text: event.old_text.clone(), + text: event.doc.text().clone(), + changes: event.changes.clone(), + }, + ), + ); + } + Ok(()) + }); + + let tx = handlers.word_index.hook.clone(); + register_hook!(move |event: &mut DocumentDidClose<'_>| { + if event.doc.word_completion_enabled() { + helix_event::send_blocking( + &tx, + Event::Delete(event.doc.id(), event.doc.text().clone()), + ); + } + Ok(()) + }); +} + +#[cfg(test)] +mod tests { + use std::collections::HashSet; + + use super::*; + use helix_core::diff::compare_ropes; + + impl WordIndex { + fn words(&self) -> HashSet { + let inner = self.inner.read(); + inner.words().cloned().collect() + } + } + + #[track_caller] + fn assert_words>(text: &str, expected: T) { + let text = Rope::from_str(text); + let index = WordIndex::default(); + index.add_document(&text); + let actual = index.words(); + let expected: HashSet<_> = expected.into_iter().map(|i| i.to_string()).collect(); + assert_eq!(expected, actual); + } + + #[test] + fn parse() { + assert_words("one two three", ["one", "two", "three"]); + assert_words("a foo c", ["foo"]); + } + + #[track_caller] + fn assert_diff(before: &str, after: &str, expect_removed: R, expect_inserted: I) + where + S: ToString, + R: IntoIterator, + I: IntoIterator, + { + let before = Rope::from_str(before); + let after = Rope::from_str(after); + let diff = compare_ropes(&before, &after); + let expect_removed: HashSet<_> = + expect_removed.into_iter().map(|i| i.to_string()).collect(); + let expect_inserted: HashSet<_> = + expect_inserted.into_iter().map(|i| i.to_string()).collect(); + + let index = WordIndex::default(); + index.add_document(&before); + let words_before = index.words(); + index.update_document(&before, &after, diff.changes()); + let words_after = index.words(); + + let actual_removed = words_before.difference(&words_after).cloned().collect(); + let actual_inserted = words_after.difference(&words_before).cloned().collect(); + + eprintln!("\"{before}\" {words_before:?} => \"{after}\" {words_after:?}"); + assert_eq!( + expect_removed, actual_removed, + "expected {expect_removed:?} to be removed, instead {actual_removed:?} was" + ); + assert_eq!( + expect_inserted, actual_inserted, + "expected {expect_inserted:?} to be inserted, instead {actual_inserted:?} was" + ); + } + + #[test] + fn diff() { + assert_diff("one two three", "one five three", ["two"], ["five"]); + assert_diff("one two three", "one to three", ["two"], []); + assert_diff("one two three", "one three", ["two"], []); + assert_diff("one two three", "one t{o three", ["two"], []); + assert_diff("one foo three", "one fooo three", ["foo"], ["fooo"]); + + // TODO: further testing. Consider setting the max word size smaller in tests. + } +}