mirror of https://github.com/helix-editor/helix
Complete words from open buffers
parent
6be38642f4
commit
f4863ec69c
|
@ -19,6 +19,7 @@
|
|||
- [`[editor.soft-wrap]` Section](#editorsoft-wrap-section)
|
||||
- [`[editor.smart-tab]` Section](#editorsmart-tab-section)
|
||||
- [`[editor.inline-diagnostics]` Section](#editorinline-diagnostics-section)
|
||||
- [`[editor.word-completion]` Section](#editorword-completion-section)
|
||||
|
||||
### `[editor]` Section
|
||||
|
||||
|
@ -474,3 +475,21 @@ end-of-line-diagnostics = "hint"
|
|||
[editor.inline-diagnostics]
|
||||
cursor-line = "warning" # show warnings and errors on the cursorline inline
|
||||
```
|
||||
|
||||
### `[editor.word-completion]` Section
|
||||
|
||||
Options for controlling completion of words from open buffers.
|
||||
|
||||
| Key | Description | Default |
|
||||
| --- | --- | --- |
|
||||
| `enable` | Whether word completion is enabled | `true` |
|
||||
| `trigger-length` | Number of word characters to type before triggering completion | `7` |
|
||||
|
||||
Example:
|
||||
|
||||
```toml
|
||||
[editor.word-completion]
|
||||
enable = true
|
||||
# Set the trigger length lower so that words are completed more often
|
||||
trigger-length = 4
|
||||
```
|
||||
|
|
|
@ -71,6 +71,7 @@ These configuration keys are available:
|
|||
| `text-width` | Maximum line length. Used for the `:reflow` command and soft-wrapping if `soft-wrap.wrap-at-text-width` is set, defaults to `editor.text-width` |
|
||||
| `rulers` | Overrides the `editor.rulers` config key for the language. |
|
||||
| `path-completion` | Overrides the `editor.path-completion` config key for the language. |
|
||||
| `word-completion` | Overrides the [`editor.word-completion`](./editor.md#editorword-completion-section) configuration for the language. |
|
||||
| `workspace-lsp-roots` | Directories relative to the workspace root that are treated as LSP roots. Should only be set in `.helix/config.toml`. Overwrites the setting of the same name in `config.toml` if set. |
|
||||
| `persistent-diagnostic-sources` | An array of LSP diagnostic sources assumed unchanged when the language server resends the same set of diagnostics. Helix can track the position for these diagnostics internally instead. Useful for diagnostics that are recomputed on save.
|
||||
|
||||
|
|
|
@ -16,6 +16,7 @@ pub struct CompletionItem {
|
|||
pub enum CompletionProvider {
|
||||
Lsp(LanguageServerId),
|
||||
Path,
|
||||
Word,
|
||||
}
|
||||
|
||||
impl From<LanguageServerId> for CompletionProvider {
|
||||
|
|
|
@ -7,6 +7,7 @@ use serde::{ser::SerializeSeq as _, Deserialize, Serialize};
|
|||
use std::{
|
||||
collections::{HashMap, HashSet},
|
||||
fmt::{self, Display},
|
||||
num::NonZeroU8,
|
||||
path::PathBuf,
|
||||
str::FromStr,
|
||||
};
|
||||
|
@ -60,6 +61,8 @@ pub struct LanguageConfiguration {
|
|||
|
||||
/// If set, overrides `editor.path-completion`.
|
||||
pub path_completion: Option<bool>,
|
||||
/// If set, overrides `editor.word-completion`.
|
||||
pub word_completion: Option<WordCompletion>,
|
||||
|
||||
#[serde(default)]
|
||||
pub diagnostic_severity: Severity,
|
||||
|
@ -572,6 +575,13 @@ pub struct SoftWrap {
|
|||
pub wrap_at_text_width: Option<bool>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(default, rename_all = "kebab-case", deny_unknown_fields)]
|
||||
pub struct WordCompletion {
|
||||
pub enable: Option<bool>,
|
||||
pub trigger_length: Option<NonZeroU8>,
|
||||
}
|
||||
|
||||
fn deserialize_regex<'de, D>(deserializer: D) -> Result<Option<rope::Regex>, D::Error>
|
||||
where
|
||||
D: serde::Deserializer<'de>,
|
||||
|
|
|
@ -19,6 +19,17 @@ pub enum Operation {
|
|||
Insert(Tendril),
|
||||
}
|
||||
|
||||
impl Operation {
|
||||
/// The number of characters affected by the operation.
|
||||
#[allow(clippy::len_without_is_empty)]
|
||||
pub fn len(&self) -> usize {
|
||||
match self {
|
||||
Self::Retain(n) | Self::Delete(n) => *n,
|
||||
Self::Insert(s) => s.chars().count(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
||||
pub enum Assoc {
|
||||
Before,
|
||||
|
|
|
@ -8,7 +8,7 @@ use crate::events;
|
|||
use crate::handlers::auto_save::AutoSaveHandler;
|
||||
use crate::handlers::signature_help::SignatureHelpHandler;
|
||||
|
||||
pub use helix_view::handlers::Handlers;
|
||||
pub use helix_view::handlers::{word_index, Handlers};
|
||||
|
||||
use self::document_colors::DocumentColorsHandler;
|
||||
|
||||
|
@ -26,12 +26,14 @@ pub fn setup(config: Arc<ArcSwap<Config>>) -> Handlers {
|
|||
let signature_hints = SignatureHelpHandler::new().spawn();
|
||||
let auto_save = AutoSaveHandler::new().spawn();
|
||||
let document_colors = DocumentColorsHandler::default().spawn();
|
||||
let word_index = word_index::Handler::spawn();
|
||||
|
||||
let handlers = Handlers {
|
||||
completions: helix_view::handlers::completion::CompletionHandler::new(event_tx),
|
||||
signature_hints,
|
||||
auto_save,
|
||||
document_colors,
|
||||
word_index,
|
||||
};
|
||||
|
||||
helix_view::handlers::register_hooks(&handlers);
|
||||
|
|
|
@ -30,6 +30,7 @@ mod item;
|
|||
mod path;
|
||||
mod request;
|
||||
mod resolve;
|
||||
mod word;
|
||||
|
||||
async fn handle_response(
|
||||
requests: &mut JoinSet<CompletionResponse>,
|
||||
|
@ -82,7 +83,7 @@ async fn replace_completions(
|
|||
fn show_completion(
|
||||
editor: &mut Editor,
|
||||
compositor: &mut Compositor,
|
||||
items: Vec<CompletionItem>,
|
||||
mut items: Vec<CompletionItem>,
|
||||
context: HashMap<CompletionProvider, ResponseContext>,
|
||||
trigger: Trigger,
|
||||
) {
|
||||
|
@ -101,6 +102,7 @@ fn show_completion(
|
|||
if ui.completion.is_some() {
|
||||
return;
|
||||
}
|
||||
word::retain_valid_completions(trigger, doc, view.id, &mut items);
|
||||
editor.handlers.completions.active_completions = context;
|
||||
|
||||
let completion_area = ui.set_completion(editor, items, trigger.pos, size);
|
||||
|
|
|
@ -28,6 +28,8 @@ use crate::job::{dispatch, dispatch_blocking};
|
|||
use crate::ui;
|
||||
use crate::ui::editor::InsertEvent;
|
||||
|
||||
use super::word;
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub(super) enum TriggerKind {
|
||||
Auto,
|
||||
|
@ -242,10 +244,15 @@ fn request_completions(
|
|||
doc.selection(view.id).clone(),
|
||||
doc,
|
||||
handle.clone(),
|
||||
savepoint,
|
||||
savepoint.clone(),
|
||||
) {
|
||||
requests.spawn_blocking(path_completion_request);
|
||||
}
|
||||
if let Some(word_completion_request) =
|
||||
word::completion(editor, trigger, handle.clone(), savepoint)
|
||||
{
|
||||
requests.spawn_blocking(word_completion_request);
|
||||
}
|
||||
|
||||
let ui = compositor.find::<ui::EditorView>().unwrap();
|
||||
ui.last_insert.1.push(InsertEvent::RequestCompletion);
|
||||
|
|
|
@ -0,0 +1,134 @@
|
|||
use std::{borrow::Cow, sync::Arc};
|
||||
|
||||
use helix_core::{
|
||||
self as core, chars::char_is_word, completion::CompletionProvider, movement, Transaction,
|
||||
};
|
||||
use helix_event::TaskHandle;
|
||||
use helix_stdx::rope::RopeSliceExt as _;
|
||||
use helix_view::{
|
||||
document::SavePoint, handlers::completion::ResponseContext, Document, Editor, ViewId,
|
||||
};
|
||||
|
||||
use super::{request::TriggerKind, CompletionItem, CompletionItems, CompletionResponse, Trigger};
|
||||
|
||||
const COMPLETION_KIND: &str = "word";
|
||||
|
||||
pub(super) fn completion(
|
||||
editor: &Editor,
|
||||
trigger: Trigger,
|
||||
handle: TaskHandle,
|
||||
savepoint: Arc<SavePoint>,
|
||||
) -> Option<impl FnOnce() -> CompletionResponse> {
|
||||
if !doc!(editor).word_completion_enabled() {
|
||||
return None;
|
||||
}
|
||||
let config = editor.config().word_completion;
|
||||
let doc_config = doc!(editor)
|
||||
.language_config()
|
||||
.and_then(|config| config.word_completion);
|
||||
let trigger_length = doc_config
|
||||
.and_then(|c| c.trigger_length)
|
||||
.unwrap_or(config.trigger_length)
|
||||
.get() as usize;
|
||||
|
||||
let (view, doc) = current_ref!(editor);
|
||||
let rope = doc.text().clone();
|
||||
let word_index = editor.handlers.word_index().clone();
|
||||
let text = doc.text().slice(..);
|
||||
let selection = doc.selection(view.id).clone();
|
||||
let pos = selection.primary().cursor(text);
|
||||
|
||||
let cursor = movement::move_prev_word_start(text, core::Range::point(pos), 1);
|
||||
if cursor.head == pos {
|
||||
return None;
|
||||
}
|
||||
if trigger.kind != TriggerKind::Manual
|
||||
&& text
|
||||
.slice(cursor.head..)
|
||||
.graphemes()
|
||||
.take(trigger_length)
|
||||
.take_while(|g| g.chars().all(char_is_word))
|
||||
.count()
|
||||
!= trigger_length
|
||||
{
|
||||
return None;
|
||||
}
|
||||
|
||||
let typed_word_range = cursor.head..pos;
|
||||
let typed_word = text.slice(typed_word_range.clone());
|
||||
let edit_diff = if typed_word
|
||||
.char(typed_word.len_chars().saturating_sub(1))
|
||||
.is_whitespace()
|
||||
{
|
||||
0
|
||||
} else {
|
||||
typed_word.len_chars()
|
||||
};
|
||||
|
||||
if handle.is_canceled() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let future = move || {
|
||||
let text = rope.slice(..);
|
||||
let typed_word: Cow<_> = text.slice(typed_word_range).into();
|
||||
let items = word_index
|
||||
.matches(&typed_word)
|
||||
.into_iter()
|
||||
.filter(|word| word.as_str() != typed_word.as_ref())
|
||||
.map(|word| {
|
||||
let transaction = Transaction::change_by_selection(&rope, &selection, |range| {
|
||||
let cursor = range.cursor(text);
|
||||
(cursor - edit_diff, cursor, Some((&word).into()))
|
||||
});
|
||||
CompletionItem::Other(core::CompletionItem {
|
||||
transaction,
|
||||
label: word.into(),
|
||||
kind: Cow::Borrowed(COMPLETION_KIND),
|
||||
documentation: None,
|
||||
provider: CompletionProvider::Word,
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
CompletionResponse {
|
||||
items: CompletionItems::Other(items),
|
||||
provider: CompletionProvider::Word,
|
||||
context: ResponseContext {
|
||||
is_incomplete: false,
|
||||
priority: 0,
|
||||
savepoint,
|
||||
},
|
||||
}
|
||||
};
|
||||
|
||||
Some(future)
|
||||
}
|
||||
|
||||
pub(super) fn retain_valid_completions(
|
||||
trigger: Trigger,
|
||||
doc: &Document,
|
||||
view_id: ViewId,
|
||||
items: &mut Vec<CompletionItem>,
|
||||
) {
|
||||
if trigger.kind == TriggerKind::Manual {
|
||||
return;
|
||||
}
|
||||
|
||||
let text = doc.text().slice(..);
|
||||
let cursor = doc.selection(view_id).primary().cursor(text);
|
||||
if text
|
||||
.get_char(cursor.saturating_sub(1))
|
||||
.is_some_and(|ch| ch.is_whitespace())
|
||||
{
|
||||
items.retain(|item| {
|
||||
!matches!(
|
||||
item,
|
||||
CompletionItem::Other(core::CompletionItem {
|
||||
provider: CompletionProvider::Word,
|
||||
..
|
||||
})
|
||||
)
|
||||
});
|
||||
}
|
||||
}
|
|
@ -1809,6 +1809,12 @@ impl Document {
|
|||
self.version
|
||||
}
|
||||
|
||||
pub fn word_completion_enabled(&self) -> bool {
|
||||
self.language_config()
|
||||
.and_then(|lang_config| lang_config.word_completion.and_then(|c| c.enable))
|
||||
.unwrap_or_else(|| self.config.load().word_completion.enable)
|
||||
}
|
||||
|
||||
pub fn path_completion_enabled(&self) -> bool {
|
||||
self.language_config()
|
||||
.and_then(|lang_config| lang_config.path_completion)
|
||||
|
|
|
@ -29,7 +29,7 @@ use std::{
|
|||
collections::{BTreeMap, HashMap, HashSet},
|
||||
fs,
|
||||
io::{self, stdin},
|
||||
num::NonZeroUsize,
|
||||
num::{NonZeroU8, NonZeroUsize},
|
||||
path::{Path, PathBuf},
|
||||
pin::Pin,
|
||||
sync::Arc,
|
||||
|
@ -279,6 +279,9 @@ pub struct Config {
|
|||
/// either absolute or relative to the current opened document or current working directory (if the buffer is not yet saved).
|
||||
/// Defaults to true.
|
||||
pub path_completion: bool,
|
||||
/// Configures completion of words from open buffers.
|
||||
/// Defaults to enabled with a trigger length of 7.
|
||||
pub word_completion: WordCompletion,
|
||||
/// Automatic formatting on save. Defaults to true.
|
||||
pub auto_format: bool,
|
||||
/// Default register used for yank/paste. Defaults to '"'
|
||||
|
@ -964,6 +967,22 @@ pub enum PopupBorderConfig {
|
|||
Menu,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(default, rename_all = "kebab-case", deny_unknown_fields)]
|
||||
pub struct WordCompletion {
|
||||
pub enable: bool,
|
||||
pub trigger_length: NonZeroU8,
|
||||
}
|
||||
|
||||
impl Default for WordCompletion {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
enable: true,
|
||||
trigger_length: NonZeroU8::new(7).unwrap(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Config {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
|
@ -983,6 +1002,7 @@ impl Default for Config {
|
|||
auto_pairs: AutoPairConfig::default(),
|
||||
auto_completion: true,
|
||||
path_completion: true,
|
||||
word_completion: WordCompletion::default(),
|
||||
auto_format: true,
|
||||
default_yank_register: '"',
|
||||
auto_save: AutoSave::default(),
|
||||
|
|
|
@ -9,6 +9,7 @@ pub mod completion;
|
|||
pub mod dap;
|
||||
pub mod diagnostics;
|
||||
pub mod lsp;
|
||||
pub mod word_index;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum AutoSaveEvent {
|
||||
|
@ -22,6 +23,7 @@ pub struct Handlers {
|
|||
pub signature_hints: Sender<lsp::SignatureHelpEvent>,
|
||||
pub auto_save: Sender<AutoSaveEvent>,
|
||||
pub document_colors: Sender<lsp::DocumentColorsEvent>,
|
||||
pub word_index: word_index::Handler,
|
||||
}
|
||||
|
||||
impl Handlers {
|
||||
|
@ -46,8 +48,13 @@ impl Handlers {
|
|||
};
|
||||
send_blocking(&self.signature_hints, event)
|
||||
}
|
||||
|
||||
pub fn word_index(&self) -> &word_index::WordIndex {
|
||||
&self.word_index.index
|
||||
}
|
||||
}
|
||||
|
||||
pub fn register_hooks(handlers: &Handlers) {
|
||||
lsp::register_hooks(handlers);
|
||||
word_index::register_hooks(handlers);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,477 @@
|
|||
//! Indexing of words from open buffers.
|
||||
//!
|
||||
//! This provides an eventually consistent set of words used in any open buffers. This set is
|
||||
//! later used for lexical completion.
|
||||
|
||||
use std::{borrow::Cow, collections::HashMap, iter, mem, sync::Arc, time::Duration};
|
||||
|
||||
use helix_core::{
|
||||
chars::char_is_word, fuzzy::fuzzy_match, movement, ChangeSet, Range, Rope, RopeSlice,
|
||||
};
|
||||
use helix_event::{register_hook, AsyncHook};
|
||||
use helix_stdx::rope::RopeSliceExt as _;
|
||||
use parking_lot::RwLock;
|
||||
use tokio::{sync::mpsc, time::Instant};
|
||||
|
||||
use crate::{
|
||||
events::{DocumentDidChange, DocumentDidClose, DocumentDidOpen},
|
||||
DocumentId,
|
||||
};
|
||||
|
||||
use super::Handlers;
|
||||
|
||||
#[derive(Debug)]
|
||||
struct Change {
|
||||
old_text: Rope,
|
||||
text: Rope,
|
||||
changes: ChangeSet,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum Event {
|
||||
Insert(Rope),
|
||||
Update(DocumentId, Change),
|
||||
Delete(DocumentId, Rope),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Handler {
|
||||
pub(super) index: WordIndex,
|
||||
/// A sender into an async hook which debounces updates to the index.
|
||||
hook: mpsc::Sender<Event>,
|
||||
/// A sender to a tokio task which coordinates the indexing of documents.
|
||||
///
|
||||
/// See [WordIndex::run]. A supervisor-like task is in charge of spawning tasks to update the
|
||||
/// index. This ensures that consecutive edits to a document trigger the correct order of
|
||||
/// insertions and deletions into the word set.
|
||||
coordinator: mpsc::UnboundedSender<Event>,
|
||||
}
|
||||
|
||||
impl Handler {
|
||||
pub fn spawn() -> Self {
|
||||
let index = WordIndex::default();
|
||||
let (tx, rx) = mpsc::unbounded_channel();
|
||||
tokio::spawn(index.clone().run(rx));
|
||||
Self {
|
||||
hook: Hook {
|
||||
changes: HashMap::default(),
|
||||
coordinator: tx.clone(),
|
||||
}
|
||||
.spawn(),
|
||||
index,
|
||||
coordinator: tx,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct Hook {
|
||||
changes: HashMap<DocumentId, Change>,
|
||||
coordinator: mpsc::UnboundedSender<Event>,
|
||||
}
|
||||
|
||||
const DEBOUNCE: Duration = Duration::from_secs(1);
|
||||
|
||||
impl AsyncHook for Hook {
|
||||
type Event = Event;
|
||||
|
||||
fn handle_event(&mut self, event: Self::Event, timeout: Option<Instant>) -> Option<Instant> {
|
||||
match event {
|
||||
Event::Insert(_) => unreachable!("inserts are sent to the worker directly"),
|
||||
Event::Update(doc, change) => {
|
||||
if let Some(pending_change) = self.changes.get_mut(&doc) {
|
||||
// If there is already a change waiting for this document, merge the two
|
||||
// changes together by composing the changesets and saving the new `text`.
|
||||
pending_change.changes =
|
||||
mem::take(&mut pending_change.changes).compose(change.changes);
|
||||
pending_change.text = change.text;
|
||||
Some(Instant::now() + DEBOUNCE)
|
||||
} else if !is_changeset_significant(&change.changes) {
|
||||
// If the changeset is fairly large, debounce before updating the index.
|
||||
self.changes.insert(doc, change);
|
||||
Some(Instant::now() + DEBOUNCE)
|
||||
} else {
|
||||
// Otherwise if the change is small, queue the update to the index immediately.
|
||||
self.coordinator.send(Event::Update(doc, change)).unwrap();
|
||||
timeout
|
||||
}
|
||||
}
|
||||
Event::Delete(doc, text) => {
|
||||
// If there are pending changes that haven't been indexed since the last debounce,
|
||||
// forget them and delete the old text.
|
||||
if let Some(change) = self.changes.remove(&doc) {
|
||||
self.coordinator
|
||||
.send(Event::Delete(doc, change.old_text))
|
||||
.unwrap();
|
||||
} else {
|
||||
self.coordinator.send(Event::Delete(doc, text)).unwrap();
|
||||
}
|
||||
timeout
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn finish_debounce(&mut self) {
|
||||
for (doc, change) in self.changes.drain() {
|
||||
self.coordinator.send(Event::Update(doc, change)).unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Minimum number of grapheme clusters required to include a word in the index
|
||||
const MIN_WORD_GRAPHEMES: usize = 3;
|
||||
/// Maximum word length allowed (in chars)
|
||||
const MAX_WORD_LEN: usize = 50;
|
||||
|
||||
// TODO: choose or create a suitable small string type.
|
||||
type Word = String;
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
struct WordIndexInner {
|
||||
/// Reference counted storage for words.
|
||||
///
|
||||
/// Words are very likely to be reused many times. Instead of storing duplicates we keep a
|
||||
/// reference count of times a word is used. When the reference count drops to zero the word
|
||||
/// is removed from the index.
|
||||
words: HashMap<Word, u32>,
|
||||
}
|
||||
|
||||
impl WordIndexInner {
|
||||
fn words(&self) -> impl Iterator<Item = &Word> {
|
||||
self.words.keys()
|
||||
}
|
||||
|
||||
fn insert(&mut self, word: RopeSlice) {
|
||||
let word: Cow<str> = word.into();
|
||||
if let Some(rc) = self.words.get_mut(word.as_ref()) {
|
||||
*rc = rc.saturating_add(1);
|
||||
} else {
|
||||
self.words.insert(word.into_owned(), 1);
|
||||
}
|
||||
}
|
||||
|
||||
fn remove(&mut self, word: RopeSlice) {
|
||||
let word: Cow<str> = word.into();
|
||||
match self.words.get_mut(word.as_ref()) {
|
||||
Some(1) => {
|
||||
self.words.remove(word.as_ref());
|
||||
}
|
||||
Some(n) => *n -= 1,
|
||||
None => (),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Clone)]
|
||||
pub struct WordIndex {
|
||||
inner: Arc<RwLock<WordIndexInner>>,
|
||||
}
|
||||
|
||||
impl WordIndex {
|
||||
pub fn matches(&self, pattern: &str) -> Vec<String> {
|
||||
let inner = self.inner.read();
|
||||
let mut matches = fuzzy_match(pattern, inner.words(), false);
|
||||
matches.sort_unstable_by_key(|(_, score)| *score);
|
||||
matches.into_iter().map(|(word, _)| word.clone()).collect()
|
||||
}
|
||||
|
||||
fn add_document(&self, text: &Rope) {
|
||||
let words: Vec<_> = words(text.slice(..)).collect();
|
||||
let mut inner = self.inner.write();
|
||||
for word in words {
|
||||
inner.insert(word);
|
||||
}
|
||||
}
|
||||
|
||||
fn update_document(&self, old_text: &Rope, text: &Rope, changes: &ChangeSet) {
|
||||
let mut inserted = Vec::new();
|
||||
let mut removed = Vec::new();
|
||||
for (old_window, new_window) in changed_windows(old_text.slice(..), text.slice(..), changes)
|
||||
{
|
||||
inserted.extend(words(new_window));
|
||||
removed.extend(words(old_window));
|
||||
}
|
||||
|
||||
let mut inner = self.inner.write();
|
||||
for word in inserted {
|
||||
inner.insert(word);
|
||||
}
|
||||
for word in removed {
|
||||
inner.remove(word);
|
||||
}
|
||||
}
|
||||
|
||||
fn remove_document(&self, text: &Rope) {
|
||||
let words: Vec<_> = words(text.slice(..)).collect();
|
||||
let mut inner = self.inner.write();
|
||||
for word in words {
|
||||
inner.remove(word);
|
||||
}
|
||||
}
|
||||
|
||||
/// Coordinate the indexing of documents.
|
||||
///
|
||||
/// This task wraps a MPSC queue and spawns blocking tasks which update the index. Updates
|
||||
/// are applied one-by-one to ensure that changes to the index are **serialized**:
|
||||
/// updates to each document must be applied in-order.
|
||||
async fn run(self, mut events: mpsc::UnboundedReceiver<Event>) {
|
||||
while let Some(event) = events.recv().await {
|
||||
let this = self.clone();
|
||||
tokio::task::spawn_blocking(move || match event {
|
||||
Event::Insert(text) => {
|
||||
this.add_document(&text);
|
||||
}
|
||||
Event::Update(
|
||||
_doc,
|
||||
Change {
|
||||
old_text,
|
||||
text,
|
||||
changes,
|
||||
..
|
||||
},
|
||||
) => {
|
||||
this.update_document(&old_text, &text, &changes);
|
||||
}
|
||||
Event::Delete(_doc, text) => {
|
||||
this.remove_document(&text);
|
||||
}
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn words(text: RopeSlice) -> impl Iterator<Item = RopeSlice> {
|
||||
let mut cursor = Range::point(0);
|
||||
if text
|
||||
.get_char(cursor.anchor)
|
||||
.is_some_and(|ch| !ch.is_whitespace())
|
||||
{
|
||||
let cursor_word_end = movement::move_next_word_end(text, cursor, 1);
|
||||
if cursor_word_end.anchor == 0 {
|
||||
cursor = cursor_word_end;
|
||||
}
|
||||
}
|
||||
|
||||
iter::from_fn(move || {
|
||||
while cursor.head <= text.len_chars() {
|
||||
let mut word = None;
|
||||
if text
|
||||
.slice(..cursor.head)
|
||||
.graphemes_rev()
|
||||
.take(MIN_WORD_GRAPHEMES)
|
||||
.take_while(|g| g.chars().all(char_is_word))
|
||||
.count()
|
||||
== MIN_WORD_GRAPHEMES
|
||||
{
|
||||
cursor.anchor += text
|
||||
.chars_at(cursor.anchor)
|
||||
.take_while(|&c| !char_is_word(c))
|
||||
.count();
|
||||
let slice = cursor.slice(text);
|
||||
if slice.len_chars() <= MAX_WORD_LEN {
|
||||
word = Some(slice);
|
||||
}
|
||||
}
|
||||
let head = cursor.head;
|
||||
cursor = movement::move_next_word_end(text, cursor, 1);
|
||||
if cursor.head == head {
|
||||
cursor.head = usize::MAX;
|
||||
}
|
||||
if word.is_some() {
|
||||
return word;
|
||||
}
|
||||
}
|
||||
None
|
||||
})
|
||||
}
|
||||
|
||||
/// Finds areas of the old and new texts around each operation in `changes`.
|
||||
///
|
||||
/// The window is larger than the changed area and can encompass multiple insert/delete operations
|
||||
/// if they are grouped closely together.
|
||||
///
|
||||
/// The ranges of the old and new text should usually be of different sizes. For example a
|
||||
/// deletion of "foo" surrounded by large retain sections would give a longer window into the
|
||||
/// `old_text` and shorter window of `new_text`. Vice-versa for an insertion. A full replacement
|
||||
/// of a word though would give two slices of the same size.
|
||||
fn changed_windows<'a>(
|
||||
old_text: RopeSlice<'a>,
|
||||
new_text: RopeSlice<'a>,
|
||||
changes: &'a ChangeSet,
|
||||
) -> impl Iterator<Item = (RopeSlice<'a>, RopeSlice<'a>)> {
|
||||
use helix_core::Operation::*;
|
||||
|
||||
let mut operations = changes.changes().iter().peekable();
|
||||
let mut old_pos = 0;
|
||||
let mut new_pos = 0;
|
||||
iter::from_fn(move || loop {
|
||||
let operation = operations.next()?;
|
||||
let old_start = old_pos;
|
||||
let new_start = new_pos;
|
||||
let len = operation.len();
|
||||
match operation {
|
||||
Retain(_) => {
|
||||
old_pos += len;
|
||||
new_pos += len;
|
||||
continue;
|
||||
}
|
||||
Insert(_) => new_pos += len,
|
||||
Delete(_) => old_pos += len,
|
||||
}
|
||||
|
||||
// Scan ahead until a `Retain` is found which would end a window.
|
||||
while let Some(o) = operations.next_if(|op| !matches!(op, Retain(n) if *n > MAX_WORD_LEN)) {
|
||||
let len = o.len();
|
||||
match o {
|
||||
Retain(_) => {
|
||||
old_pos += len;
|
||||
new_pos += len;
|
||||
}
|
||||
Delete(_) => old_pos += len,
|
||||
Insert(_) => new_pos += len,
|
||||
}
|
||||
}
|
||||
|
||||
let old_window = old_start.saturating_sub(MAX_WORD_LEN)
|
||||
..(old_pos + MAX_WORD_LEN).min(old_text.len_chars());
|
||||
let new_window = new_start.saturating_sub(MAX_WORD_LEN)
|
||||
..(new_pos + MAX_WORD_LEN).min(new_text.len_chars());
|
||||
|
||||
return Some((old_text.slice(old_window), new_text.slice(new_window)));
|
||||
})
|
||||
}
|
||||
|
||||
/// Estimates whether a changeset is significant or small.
|
||||
fn is_changeset_significant(changes: &ChangeSet) -> bool {
|
||||
use helix_core::Operation::*;
|
||||
|
||||
let mut diff = 0;
|
||||
for operation in changes.changes() {
|
||||
match operation {
|
||||
Retain(_) => continue,
|
||||
Delete(_) | Insert(_) => diff += operation.len(),
|
||||
}
|
||||
}
|
||||
|
||||
// This is arbitrary and could be tuned further:
|
||||
diff > 1_000
|
||||
}
|
||||
|
||||
pub(crate) fn register_hooks(handlers: &Handlers) {
|
||||
let coordinator = handlers.word_index.coordinator.clone();
|
||||
register_hook!(move |event: &mut DocumentDidOpen<'_>| {
|
||||
let doc = doc!(event.editor, &event.doc);
|
||||
if doc.word_completion_enabled() {
|
||||
coordinator.send(Event::Insert(doc.text().clone())).unwrap();
|
||||
}
|
||||
Ok(())
|
||||
});
|
||||
|
||||
let tx = handlers.word_index.hook.clone();
|
||||
register_hook!(move |event: &mut DocumentDidChange<'_>| {
|
||||
if !event.ghost_transaction && event.doc.word_completion_enabled() {
|
||||
helix_event::send_blocking(
|
||||
&tx,
|
||||
Event::Update(
|
||||
event.doc.id(),
|
||||
Change {
|
||||
old_text: event.old_text.clone(),
|
||||
text: event.doc.text().clone(),
|
||||
changes: event.changes.clone(),
|
||||
},
|
||||
),
|
||||
);
|
||||
}
|
||||
Ok(())
|
||||
});
|
||||
|
||||
let tx = handlers.word_index.hook.clone();
|
||||
register_hook!(move |event: &mut DocumentDidClose<'_>| {
|
||||
if event.doc.word_completion_enabled() {
|
||||
helix_event::send_blocking(
|
||||
&tx,
|
||||
Event::Delete(event.doc.id(), event.doc.text().clone()),
|
||||
);
|
||||
}
|
||||
Ok(())
|
||||
});
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::collections::HashSet;
|
||||
|
||||
use super::*;
|
||||
use helix_core::diff::compare_ropes;
|
||||
|
||||
impl WordIndex {
|
||||
fn words(&self) -> HashSet<String> {
|
||||
let inner = self.inner.read();
|
||||
inner.words().cloned().collect()
|
||||
}
|
||||
}
|
||||
|
||||
#[track_caller]
|
||||
fn assert_words<I: ToString, T: IntoIterator<Item = I>>(text: &str, expected: T) {
|
||||
let text = Rope::from_str(text);
|
||||
let index = WordIndex::default();
|
||||
index.add_document(&text);
|
||||
let actual = index.words();
|
||||
let expected: HashSet<_> = expected.into_iter().map(|i| i.to_string()).collect();
|
||||
assert_eq!(expected, actual);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse() {
|
||||
assert_words("one two three", ["one", "two", "three"]);
|
||||
assert_words("a foo c", ["foo"]);
|
||||
}
|
||||
|
||||
#[track_caller]
|
||||
fn assert_diff<S, R, I>(before: &str, after: &str, expect_removed: R, expect_inserted: I)
|
||||
where
|
||||
S: ToString,
|
||||
R: IntoIterator<Item = S>,
|
||||
I: IntoIterator<Item = S>,
|
||||
{
|
||||
let before = Rope::from_str(before);
|
||||
let after = Rope::from_str(after);
|
||||
let diff = compare_ropes(&before, &after);
|
||||
let expect_removed: HashSet<_> =
|
||||
expect_removed.into_iter().map(|i| i.to_string()).collect();
|
||||
let expect_inserted: HashSet<_> =
|
||||
expect_inserted.into_iter().map(|i| i.to_string()).collect();
|
||||
|
||||
let index = WordIndex::default();
|
||||
index.add_document(&before);
|
||||
let words_before = index.words();
|
||||
index.update_document(&before, &after, diff.changes());
|
||||
let words_after = index.words();
|
||||
|
||||
let actual_removed = words_before.difference(&words_after).cloned().collect();
|
||||
let actual_inserted = words_after.difference(&words_before).cloned().collect();
|
||||
|
||||
eprintln!("\"{before}\" {words_before:?} => \"{after}\" {words_after:?}");
|
||||
assert_eq!(
|
||||
expect_removed, actual_removed,
|
||||
"expected {expect_removed:?} to be removed, instead {actual_removed:?} was"
|
||||
);
|
||||
assert_eq!(
|
||||
expect_inserted, actual_inserted,
|
||||
"expected {expect_inserted:?} to be inserted, instead {actual_inserted:?} was"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn diff() {
|
||||
assert_diff("one two three", "one five three", ["two"], ["five"]);
|
||||
assert_diff("one two three", "one to three", ["two"], []);
|
||||
assert_diff("one two three", "one three", ["two"], []);
|
||||
assert_diff("one two three", "one t{o three", ["two"], []);
|
||||
assert_diff("one foo three", "one fooo three", ["foo"], ["fooo"]);
|
||||
|
||||
// TODO: further testing. Consider setting the max word size smaller in tests.
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue