helix/helix-core/src/syntax.rs

1146 lines
36 KiB
Rust
Raw Normal View History

pub mod config;
2021-06-19 19:26:52 +08:00
2021-03-22 11:40:07 +08:00
use std::{
borrow::Cow,
2025-02-21 09:38:14 +08:00
collections::HashMap,
fmt, iter,
ops::{self, RangeBounds},
path::Path,
2021-03-22 11:40:07 +08:00
sync::Arc,
2025-02-21 09:38:14 +08:00
time::Duration,
2021-03-22 11:40:07 +08:00
};
2025-02-21 09:38:14 +08:00
use anyhow::{Context, Result};
use arc_swap::{ArcSwap, Guard};
use config::{Configuration, FileType, LanguageConfiguration, LanguageServerConfiguration};
use helix_loader::grammar::get_language;
use helix_stdx::rope::RopeSliceExt as _;
use once_cell::sync::OnceCell;
use ropey::RopeSlice;
use tree_house::{
highlighter,
query_iter::QueryIter,
tree_sitter::{Grammar, InactiveQueryCursor, InputEdit, Node, Query, RopeInput, Tree},
Error, InjectionLanguageMarker, LanguageConfig as SyntaxConfig, Layer,
};
2025-02-21 09:38:14 +08:00
use crate::{indent::IndentQuery, tree_sitter, ChangeSet, Language};
2025-02-21 09:38:14 +08:00
pub use tree_house::{
highlighter::{Highlight, HighlightEvent},
Error as HighlighterError, LanguageLoader, TreeCursor, TREE_SITTER_MATCH_LIMIT,
};
#[derive(Debug)]
2025-02-21 09:38:14 +08:00
pub struct LanguageData {
config: Arc<LanguageConfiguration>,
syntax: OnceCell<Option<SyntaxConfig>>,
indent_query: OnceCell<Option<IndentQuery>>,
textobject_query: OnceCell<Option<TextObjectQuery>>,
}
2025-02-21 09:38:14 +08:00
impl LanguageData {
fn new(config: LanguageConfiguration) -> Self {
Self {
config: Arc::new(config),
syntax: OnceCell::new(),
indent_query: OnceCell::new(),
textobject_query: OnceCell::new(),
}
}
2025-02-21 09:38:14 +08:00
pub fn config(&self) -> &Arc<LanguageConfiguration> {
&self.config
}
2025-02-21 09:38:14 +08:00
/// Loads the grammar and compiles the highlights, injections and locals for the language.
/// This function should only be used by this module or the xtask crate.
pub fn compile_syntax_config(
config: &LanguageConfiguration,
loader: &Loader,
) -> Result<Option<SyntaxConfig>> {
let name = &config.language_id;
let parser_name = config.grammar.as_deref().unwrap_or(name);
let Some(grammar) = get_language(parser_name)? else {
log::info!("Skipping syntax config for '{name}' because the parser's shared library does not exist");
return Ok(None);
};
let highlight_query_text = read_query(name, "highlights.scm");
let injection_query_text = read_query(name, "injections.scm");
let local_query_text = read_query(name, "locals.scm");
let config = SyntaxConfig::new(
grammar,
&highlight_query_text,
&injection_query_text,
&local_query_text,
)
.with_context(|| format!("Failed to compile highlights for '{name}'"))?;
2025-02-21 09:38:14 +08:00
reconfigure_highlights(&config, &loader.scopes());
2025-02-21 09:38:14 +08:00
Ok(Some(config))
}
2025-02-21 09:38:14 +08:00
fn syntax_config(&self, loader: &Loader) -> Option<&SyntaxConfig> {
self.syntax
.get_or_init(|| {
Self::compile_syntax_config(&self.config, loader)
.map_err(|err| {
log::error!("{err:#}");
})
.ok()
.flatten()
})
.as_ref()
2021-06-19 19:26:52 +08:00
}
2025-02-21 09:38:14 +08:00
/// Compiles the indents.scm query for a language.
/// This function should only be used by this module or the xtask crate.
pub fn compile_indent_query(
grammar: Grammar,
config: &LanguageConfiguration,
) -> Result<Option<IndentQuery>> {
let name = &config.language_id;
let text = read_query(name, "indents.scm");
if text.is_empty() {
return Ok(None);
2021-06-19 19:26:52 +08:00
}
2025-02-21 09:38:14 +08:00
let indent_query = IndentQuery::new(grammar, &text)
.with_context(|| format!("Failed to compile indents.scm query for '{name}'"))?;
Ok(Some(indent_query))
2021-06-19 19:26:52 +08:00
}
2025-02-21 09:38:14 +08:00
fn indent_query(&self, loader: &Loader) -> Option<&IndentQuery> {
self.indent_query
.get_or_init(|| {
2025-02-21 09:38:14 +08:00
let grammar = self.syntax_config(loader)?.grammar;
Self::compile_indent_query(grammar, &self.config)
.map_err(|err| {
log::error!("{err}");
})
.ok()
.flatten()
})
.as_ref()
}
2025-02-21 09:38:14 +08:00
/// Compiles the textobjects.scm query for a language.
/// This function should only be used by this module or the xtask crate.
pub fn compile_textobject_query(
grammar: Grammar,
config: &LanguageConfiguration,
) -> Result<Option<TextObjectQuery>> {
let name = &config.language_id;
let text = read_query(name, "textobjects.scm");
if text.is_empty() {
return Ok(None);
}
2025-02-21 09:38:14 +08:00
let query = Query::new(grammar, &text, |_, _| Ok(()))
.with_context(|| format!("Failed to compile textobjects.scm queries for '{name}'"))?;
Ok(Some(TextObjectQuery::new(query)))
}
2025-02-21 09:38:14 +08:00
fn textobject_query(&self, loader: &Loader) -> Option<&TextObjectQuery> {
self.textobject_query
.get_or_init(|| {
let grammar = self.syntax_config(loader)?.grammar;
Self::compile_textobject_query(grammar, &self.config)
.map_err(|err| {
log::error!("{err}");
})
.ok()
.flatten()
})
.as_ref()
Add glob file type support (#8006) * Replace FileType::Suffix with FileType::Glob Suffix is rather limited and cannot be used to match files which have semantic meaning based on location + file type (for example, Github Action workflow files). This patch adds support for a Glob FileType to replace Suffix, which encompasses the existing behavior & adds additional file matching functionality. Globs are standard Unix-style path globs, which are matched against the absolute path of the file. If the configured glob for a language is a relative glob (that is, it isn't an absolute path or already starts with a glob pattern), a glob pattern will be prepended to allow matching relative paths from any directory. The order of file type matching is also updated to first match on globs and then on extension. This is necessary as most cases where glob-matching is useful will have already been matched by an extension if glob matching is done last. * Convert file-types suffixes to globs * Use globs for filename matching Trying to match the file-type raw strings against both filename and extension leads to files with the same name as the extension having the incorrect syntax. * Match dockerfiles with suffixes It's common practice to add a suffix to dockerfiles based on their context, e.g. `Dockerfile.dev`, `Dockerfile.prod`, etc. * Make env filetype matching more generic Match on `.env` or any `.env.*` files. * Update docs * Use GlobSet to match all file type globs at once * Update todo.txt glob patterns * Consolidate language Configuration and Loader creation This is a refactor that improves the error handling for creating the `helix_core::syntax::Loader` from the default and user language configuration. * Fix integration tests * Add additional starlark file-type glob --------- Co-authored-by: Michael Davis <mcarsondavis@gmail.com>
2024-02-12 01:24:20 +08:00
}
2025-02-21 09:38:14 +08:00
fn reconfigure(&self, scopes: &[String]) {
if let Some(Some(config)) = self.syntax.get() {
reconfigure_highlights(config, scopes);
}
}
}
2025-02-21 09:38:14 +08:00
fn reconfigure_highlights(config: &SyntaxConfig, recognized_names: &[String]) {
config.configure(move |capture_name| {
let capture_parts: Vec<_> = capture_name.split('.').collect();
let mut best_index = None;
let mut best_match_len = 0;
for (i, recognized_name) in recognized_names.iter().enumerate() {
let mut len = 0;
let mut matches = true;
for (i, part) in recognized_name.split('.').enumerate() {
match capture_parts.get(i) {
Some(capture_part) if *capture_part == part => len += 1,
_ => {
matches = false;
break;
}
}
}
if matches && len > best_match_len {
best_index = Some(i);
best_match_len = len;
}
Add glob file type support (#8006) * Replace FileType::Suffix with FileType::Glob Suffix is rather limited and cannot be used to match files which have semantic meaning based on location + file type (for example, Github Action workflow files). This patch adds support for a Glob FileType to replace Suffix, which encompasses the existing behavior & adds additional file matching functionality. Globs are standard Unix-style path globs, which are matched against the absolute path of the file. If the configured glob for a language is a relative glob (that is, it isn't an absolute path or already starts with a glob pattern), a glob pattern will be prepended to allow matching relative paths from any directory. The order of file type matching is also updated to first match on globs and then on extension. This is necessary as most cases where glob-matching is useful will have already been matched by an extension if glob matching is done last. * Convert file-types suffixes to globs * Use globs for filename matching Trying to match the file-type raw strings against both filename and extension leads to files with the same name as the extension having the incorrect syntax. * Match dockerfiles with suffixes It's common practice to add a suffix to dockerfiles based on their context, e.g. `Dockerfile.dev`, `Dockerfile.prod`, etc. * Make env filetype matching more generic Match on `.env` or any `.env.*` files. * Update docs * Use GlobSet to match all file type globs at once * Update todo.txt glob patterns * Consolidate language Configuration and Loader creation This is a refactor that improves the error handling for creating the `helix_core::syntax::Loader` from the default and user language configuration. * Fix integration tests * Add additional starlark file-type glob --------- Co-authored-by: Michael Davis <mcarsondavis@gmail.com>
2024-02-12 01:24:20 +08:00
}
2025-02-21 09:38:14 +08:00
best_index.map(|idx| Highlight::new(idx as u32))
});
Add glob file type support (#8006) * Replace FileType::Suffix with FileType::Glob Suffix is rather limited and cannot be used to match files which have semantic meaning based on location + file type (for example, Github Action workflow files). This patch adds support for a Glob FileType to replace Suffix, which encompasses the existing behavior & adds additional file matching functionality. Globs are standard Unix-style path globs, which are matched against the absolute path of the file. If the configured glob for a language is a relative glob (that is, it isn't an absolute path or already starts with a glob pattern), a glob pattern will be prepended to allow matching relative paths from any directory. The order of file type matching is also updated to first match on globs and then on extension. This is necessary as most cases where glob-matching is useful will have already been matched by an extension if glob matching is done last. * Convert file-types suffixes to globs * Use globs for filename matching Trying to match the file-type raw strings against both filename and extension leads to files with the same name as the extension having the incorrect syntax. * Match dockerfiles with suffixes It's common practice to add a suffix to dockerfiles based on their context, e.g. `Dockerfile.dev`, `Dockerfile.prod`, etc. * Make env filetype matching more generic Match on `.env` or any `.env.*` files. * Update docs * Use GlobSet to match all file type globs at once * Update todo.txt glob patterns * Consolidate language Configuration and Loader creation This is a refactor that improves the error handling for creating the `helix_core::syntax::Loader` from the default and user language configuration. * Fix integration tests * Add additional starlark file-type glob --------- Co-authored-by: Michael Davis <mcarsondavis@gmail.com>
2024-02-12 01:24:20 +08:00
}
2025-02-21 09:38:14 +08:00
pub fn read_query(lang: &str, query_filename: &str) -> String {
tree_house::read_query(lang, |language| {
helix_loader::grammar::load_runtime_file(language, query_filename).unwrap_or_default()
})
}
#[derive(Debug, Default)]
pub struct Loader {
2025-02-21 09:38:14 +08:00
languages: Vec<LanguageData>,
languages_by_extension: HashMap<String, Language>,
languages_by_shebang: HashMap<String, Language>,
languages_glob_matcher: FileTypeGlobMatcher,
Adds support for multiple language servers per language. Language Servers are now configured in a separate table in `languages.toml`: ```toml [langauge-server.mylang-lsp] command = "mylang-lsp" args = ["--stdio"] config = { provideFormatter = true } [language-server.efm-lsp-prettier] command = "efm-langserver" [language-server.efm-lsp-prettier.config] documentFormatting = true languages = { typescript = [ { formatCommand ="prettier --stdin-filepath ${INPUT}", formatStdin = true } ] } ``` The language server for a language is configured like this (`typescript-language-server` is configured by default): ```toml [[language]] name = "typescript" language-servers = [ { name = "efm-lsp-prettier", only-features = [ "format" ] }, "typescript-language-server" ] ``` or equivalent: ```toml [[language]] name = "typescript" language-servers = [ { name = "typescript-language-server", except-features = [ "format" ] }, "efm-lsp-prettier" ] ``` Each requested LSP feature is priorized in the order of the `language-servers` array. For example the first `goto-definition` supported language server (in this case `typescript-language-server`) will be taken for the relevant LSP request (command `goto_definition`). If no `except-features` or `only-features` is given all features for the language server are enabled, as long as the language server supports these. If it doesn't the next language server which supports the feature is tried. The list of supported features are: - `format` - `goto-definition` - `goto-declaration` - `goto-type-definition` - `goto-reference` - `goto-implementation` - `signature-help` - `hover` - `document-highlight` - `completion` - `code-action` - `workspace-command` - `document-symbols` - `workspace-symbols` - `diagnostics` - `rename-symbol` - `inlay-hints` Another side-effect/difference that comes with this PR, is that only one language server instance is started if different languages use the same language server.
2022-05-24 00:10:48 +08:00
language_server_configs: HashMap<String, LanguageServerConfiguration>,
scopes: ArcSwap<Vec<String>>,
}
Add glob file type support (#8006) * Replace FileType::Suffix with FileType::Glob Suffix is rather limited and cannot be used to match files which have semantic meaning based on location + file type (for example, Github Action workflow files). This patch adds support for a Glob FileType to replace Suffix, which encompasses the existing behavior & adds additional file matching functionality. Globs are standard Unix-style path globs, which are matched against the absolute path of the file. If the configured glob for a language is a relative glob (that is, it isn't an absolute path or already starts with a glob pattern), a glob pattern will be prepended to allow matching relative paths from any directory. The order of file type matching is also updated to first match on globs and then on extension. This is necessary as most cases where glob-matching is useful will have already been matched by an extension if glob matching is done last. * Convert file-types suffixes to globs * Use globs for filename matching Trying to match the file-type raw strings against both filename and extension leads to files with the same name as the extension having the incorrect syntax. * Match dockerfiles with suffixes It's common practice to add a suffix to dockerfiles based on their context, e.g. `Dockerfile.dev`, `Dockerfile.prod`, etc. * Make env filetype matching more generic Match on `.env` or any `.env.*` files. * Update docs * Use GlobSet to match all file type globs at once * Update todo.txt glob patterns * Consolidate language Configuration and Loader creation This is a refactor that improves the error handling for creating the `helix_core::syntax::Loader` from the default and user language configuration. * Fix integration tests * Add additional starlark file-type glob --------- Co-authored-by: Michael Davis <mcarsondavis@gmail.com>
2024-02-12 01:24:20 +08:00
pub type LoaderError = globset::Error;
impl Loader {
Add glob file type support (#8006) * Replace FileType::Suffix with FileType::Glob Suffix is rather limited and cannot be used to match files which have semantic meaning based on location + file type (for example, Github Action workflow files). This patch adds support for a Glob FileType to replace Suffix, which encompasses the existing behavior & adds additional file matching functionality. Globs are standard Unix-style path globs, which are matched against the absolute path of the file. If the configured glob for a language is a relative glob (that is, it isn't an absolute path or already starts with a glob pattern), a glob pattern will be prepended to allow matching relative paths from any directory. The order of file type matching is also updated to first match on globs and then on extension. This is necessary as most cases where glob-matching is useful will have already been matched by an extension if glob matching is done last. * Convert file-types suffixes to globs * Use globs for filename matching Trying to match the file-type raw strings against both filename and extension leads to files with the same name as the extension having the incorrect syntax. * Match dockerfiles with suffixes It's common practice to add a suffix to dockerfiles based on their context, e.g. `Dockerfile.dev`, `Dockerfile.prod`, etc. * Make env filetype matching more generic Match on `.env` or any `.env.*` files. * Update docs * Use GlobSet to match all file type globs at once * Update todo.txt glob patterns * Consolidate language Configuration and Loader creation This is a refactor that improves the error handling for creating the `helix_core::syntax::Loader` from the default and user language configuration. * Fix integration tests * Add additional starlark file-type glob --------- Co-authored-by: Michael Davis <mcarsondavis@gmail.com>
2024-02-12 01:24:20 +08:00
pub fn new(config: Configuration) -> Result<Self, LoaderError> {
2025-02-21 09:38:14 +08:00
let mut languages = Vec::with_capacity(config.language.len());
let mut languages_by_extension = HashMap::new();
let mut languages_by_shebang = HashMap::new();
Add glob file type support (#8006) * Replace FileType::Suffix with FileType::Glob Suffix is rather limited and cannot be used to match files which have semantic meaning based on location + file type (for example, Github Action workflow files). This patch adds support for a Glob FileType to replace Suffix, which encompasses the existing behavior & adds additional file matching functionality. Globs are standard Unix-style path globs, which are matched against the absolute path of the file. If the configured glob for a language is a relative glob (that is, it isn't an absolute path or already starts with a glob pattern), a glob pattern will be prepended to allow matching relative paths from any directory. The order of file type matching is also updated to first match on globs and then on extension. This is necessary as most cases where glob-matching is useful will have already been matched by an extension if glob matching is done last. * Convert file-types suffixes to globs * Use globs for filename matching Trying to match the file-type raw strings against both filename and extension leads to files with the same name as the extension having the incorrect syntax. * Match dockerfiles with suffixes It's common practice to add a suffix to dockerfiles based on their context, e.g. `Dockerfile.dev`, `Dockerfile.prod`, etc. * Make env filetype matching more generic Match on `.env` or any `.env.*` files. * Update docs * Use GlobSet to match all file type globs at once * Update todo.txt glob patterns * Consolidate language Configuration and Loader creation This is a refactor that improves the error handling for creating the `helix_core::syntax::Loader` from the default and user language configuration. * Fix integration tests * Add additional starlark file-type glob --------- Co-authored-by: Michael Davis <mcarsondavis@gmail.com>
2024-02-12 01:24:20 +08:00
let mut file_type_globs = Vec::new();
2025-02-21 09:38:14 +08:00
for mut config in config.language {
let language = Language(languages.len() as u32);
config.language = Some(language);
for file_type in &config.file_types {
match file_type {
Add glob file type support (#8006) * Replace FileType::Suffix with FileType::Glob Suffix is rather limited and cannot be used to match files which have semantic meaning based on location + file type (for example, Github Action workflow files). This patch adds support for a Glob FileType to replace Suffix, which encompasses the existing behavior & adds additional file matching functionality. Globs are standard Unix-style path globs, which are matched against the absolute path of the file. If the configured glob for a language is a relative glob (that is, it isn't an absolute path or already starts with a glob pattern), a glob pattern will be prepended to allow matching relative paths from any directory. The order of file type matching is also updated to first match on globs and then on extension. This is necessary as most cases where glob-matching is useful will have already been matched by an extension if glob matching is done last. * Convert file-types suffixes to globs * Use globs for filename matching Trying to match the file-type raw strings against both filename and extension leads to files with the same name as the extension having the incorrect syntax. * Match dockerfiles with suffixes It's common practice to add a suffix to dockerfiles based on their context, e.g. `Dockerfile.dev`, `Dockerfile.prod`, etc. * Make env filetype matching more generic Match on `.env` or any `.env.*` files. * Update docs * Use GlobSet to match all file type globs at once * Update todo.txt glob patterns * Consolidate language Configuration and Loader creation This is a refactor that improves the error handling for creating the `helix_core::syntax::Loader` from the default and user language configuration. * Fix integration tests * Add additional starlark file-type glob --------- Co-authored-by: Michael Davis <mcarsondavis@gmail.com>
2024-02-12 01:24:20 +08:00
FileType::Extension(extension) => {
2025-02-21 09:38:14 +08:00
languages_by_extension.insert(extension.clone(), language);
Add glob file type support (#8006) * Replace FileType::Suffix with FileType::Glob Suffix is rather limited and cannot be used to match files which have semantic meaning based on location + file type (for example, Github Action workflow files). This patch adds support for a Glob FileType to replace Suffix, which encompasses the existing behavior & adds additional file matching functionality. Globs are standard Unix-style path globs, which are matched against the absolute path of the file. If the configured glob for a language is a relative glob (that is, it isn't an absolute path or already starts with a glob pattern), a glob pattern will be prepended to allow matching relative paths from any directory. The order of file type matching is also updated to first match on globs and then on extension. This is necessary as most cases where glob-matching is useful will have already been matched by an extension if glob matching is done last. * Convert file-types suffixes to globs * Use globs for filename matching Trying to match the file-type raw strings against both filename and extension leads to files with the same name as the extension having the incorrect syntax. * Match dockerfiles with suffixes It's common practice to add a suffix to dockerfiles based on their context, e.g. `Dockerfile.dev`, `Dockerfile.prod`, etc. * Make env filetype matching more generic Match on `.env` or any `.env.*` files. * Update docs * Use GlobSet to match all file type globs at once * Update todo.txt glob patterns * Consolidate language Configuration and Loader creation This is a refactor that improves the error handling for creating the `helix_core::syntax::Loader` from the default and user language configuration. * Fix integration tests * Add additional starlark file-type glob --------- Co-authored-by: Michael Davis <mcarsondavis@gmail.com>
2024-02-12 01:24:20 +08:00
}
FileType::Glob(glob) => {
2025-02-21 09:38:14 +08:00
file_type_globs.push(FileTypeGlob::new(glob.to_owned(), language));
Add glob file type support (#8006) * Replace FileType::Suffix with FileType::Glob Suffix is rather limited and cannot be used to match files which have semantic meaning based on location + file type (for example, Github Action workflow files). This patch adds support for a Glob FileType to replace Suffix, which encompasses the existing behavior & adds additional file matching functionality. Globs are standard Unix-style path globs, which are matched against the absolute path of the file. If the configured glob for a language is a relative glob (that is, it isn't an absolute path or already starts with a glob pattern), a glob pattern will be prepended to allow matching relative paths from any directory. The order of file type matching is also updated to first match on globs and then on extension. This is necessary as most cases where glob-matching is useful will have already been matched by an extension if glob matching is done last. * Convert file-types suffixes to globs * Use globs for filename matching Trying to match the file-type raw strings against both filename and extension leads to files with the same name as the extension having the incorrect syntax. * Match dockerfiles with suffixes It's common practice to add a suffix to dockerfiles based on their context, e.g. `Dockerfile.dev`, `Dockerfile.prod`, etc. * Make env filetype matching more generic Match on `.env` or any `.env.*` files. * Update docs * Use GlobSet to match all file type globs at once * Update todo.txt glob patterns * Consolidate language Configuration and Loader creation This is a refactor that improves the error handling for creating the `helix_core::syntax::Loader` from the default and user language configuration. * Fix integration tests * Add additional starlark file-type glob --------- Co-authored-by: Michael Davis <mcarsondavis@gmail.com>
2024-02-12 01:24:20 +08:00
}
};
}
for shebang in &config.shebangs {
2025-02-21 09:38:14 +08:00
languages_by_shebang.insert(shebang.clone(), language);
}
2025-02-21 09:38:14 +08:00
languages.push(LanguageData::new(config));
}
Add glob file type support (#8006) * Replace FileType::Suffix with FileType::Glob Suffix is rather limited and cannot be used to match files which have semantic meaning based on location + file type (for example, Github Action workflow files). This patch adds support for a Glob FileType to replace Suffix, which encompasses the existing behavior & adds additional file matching functionality. Globs are standard Unix-style path globs, which are matched against the absolute path of the file. If the configured glob for a language is a relative glob (that is, it isn't an absolute path or already starts with a glob pattern), a glob pattern will be prepended to allow matching relative paths from any directory. The order of file type matching is also updated to first match on globs and then on extension. This is necessary as most cases where glob-matching is useful will have already been matched by an extension if glob matching is done last. * Convert file-types suffixes to globs * Use globs for filename matching Trying to match the file-type raw strings against both filename and extension leads to files with the same name as the extension having the incorrect syntax. * Match dockerfiles with suffixes It's common practice to add a suffix to dockerfiles based on their context, e.g. `Dockerfile.dev`, `Dockerfile.prod`, etc. * Make env filetype matching more generic Match on `.env` or any `.env.*` files. * Update docs * Use GlobSet to match all file type globs at once * Update todo.txt glob patterns * Consolidate language Configuration and Loader creation This is a refactor that improves the error handling for creating the `helix_core::syntax::Loader` from the default and user language configuration. * Fix integration tests * Add additional starlark file-type glob --------- Co-authored-by: Michael Davis <mcarsondavis@gmail.com>
2024-02-12 01:24:20 +08:00
Ok(Self {
2025-02-21 09:38:14 +08:00
languages,
languages_by_extension,
languages_by_shebang,
languages_glob_matcher: FileTypeGlobMatcher::new(file_type_globs)?,
Add glob file type support (#8006) * Replace FileType::Suffix with FileType::Glob Suffix is rather limited and cannot be used to match files which have semantic meaning based on location + file type (for example, Github Action workflow files). This patch adds support for a Glob FileType to replace Suffix, which encompasses the existing behavior & adds additional file matching functionality. Globs are standard Unix-style path globs, which are matched against the absolute path of the file. If the configured glob for a language is a relative glob (that is, it isn't an absolute path or already starts with a glob pattern), a glob pattern will be prepended to allow matching relative paths from any directory. The order of file type matching is also updated to first match on globs and then on extension. This is necessary as most cases where glob-matching is useful will have already been matched by an extension if glob matching is done last. * Convert file-types suffixes to globs * Use globs for filename matching Trying to match the file-type raw strings against both filename and extension leads to files with the same name as the extension having the incorrect syntax. * Match dockerfiles with suffixes It's common practice to add a suffix to dockerfiles based on their context, e.g. `Dockerfile.dev`, `Dockerfile.prod`, etc. * Make env filetype matching more generic Match on `.env` or any `.env.*` files. * Update docs * Use GlobSet to match all file type globs at once * Update todo.txt glob patterns * Consolidate language Configuration and Loader creation This is a refactor that improves the error handling for creating the `helix_core::syntax::Loader` from the default and user language configuration. * Fix integration tests * Add additional starlark file-type glob --------- Co-authored-by: Michael Davis <mcarsondavis@gmail.com>
2024-02-12 01:24:20 +08:00
language_server_configs: config.language_server,
scopes: ArcSwap::from_pointee(Vec::new()),
})
}
2025-02-21 09:38:14 +08:00
pub fn languages(&self) -> impl ExactSizeIterator<Item = (Language, &LanguageData)> {
self.languages
.iter()
.enumerate()
.map(|(idx, data)| (Language(idx as u32), data))
}
2025-02-21 09:38:14 +08:00
pub fn language_configs(&self) -> impl ExactSizeIterator<Item = &LanguageConfiguration> {
self.languages.iter().map(|language| &*language.config)
}
2025-02-21 09:38:14 +08:00
pub fn language(&self, lang: Language) -> &LanguageData {
&self.languages[lang.idx()]
}
2025-02-21 09:38:14 +08:00
pub fn language_for_name(&self, name: impl PartialEq<String>) -> Option<Language> {
self.languages.iter().enumerate().find_map(|(idx, config)| {
(name == config.config.language_id).then_some(Language(idx as u32))
})
}
2021-06-19 19:26:52 +08:00
2025-02-21 09:38:14 +08:00
pub fn language_for_scope(&self, scope: &str) -> Option<Language> {
self.languages.iter().enumerate().find_map(|(idx, config)| {
(scope == config.config.scope).then_some(Language(idx as u32))
})
}
2025-02-21 09:38:14 +08:00
pub fn language_for_match(&self, text: RopeSlice) -> Option<Language> {
// PERF: If the name matches up with the id, then this saves the need to do expensive regex.
2025-02-21 09:38:14 +08:00
let shortcircuit = self.language_for_name(text);
if shortcircuit.is_some() {
return shortcircuit;
}
// If the name did not match up with a known id, then match on injection regex.
let mut best_match_length = 0;
let mut best_match_position = None;
2025-02-21 09:38:14 +08:00
for (idx, data) in self.languages.iter().enumerate() {
if let Some(injection_regex) = &data.config.injection_regex {
if let Some(mat) = injection_regex.find(text.regex_input()) {
let length = mat.end() - mat.start();
if length > best_match_length {
2025-02-21 09:38:14 +08:00
best_match_position = Some(idx);
best_match_length = length;
}
}
}
}
2025-02-21 09:38:14 +08:00
best_match_position.map(|i| Language(i as u32))
}
2025-02-21 09:38:14 +08:00
pub fn language_for_filename(&self, path: &Path) -> Option<Language> {
// Find all the language configurations that match this file name
// or a suffix of the file name.
// TODO: content_regex handling conflict resolution
self.languages_glob_matcher
.language_for_path(path)
.or_else(|| {
path.extension()
.and_then(|extension| extension.to_str())
.and_then(|extension| self.languages_by_extension.get(extension).copied())
})
}
2025-02-21 09:38:14 +08:00
pub fn language_for_shebang(&self, text: RopeSlice) -> Option<Language> {
// NOTE: this is slightly different than the one for injection markers in tree-house. It
// is anchored at the beginning.
use helix_stdx::rope::Regex;
use once_cell::sync::Lazy;
const SHEBANG: &str = r"^#!\s*(?:\S*[/\\](?:env\s+(?:\-\S+\s+)*)?)?([^\s\.\d]+)";
static SHEBANG_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(SHEBANG).unwrap());
let marker = SHEBANG_REGEX
.captures_iter(regex_cursor::Input::new(text))
.map(|cap| text.byte_slice(cap.get_group(1).unwrap().range()))
.next()?;
self.language_for_shebang_marker(marker)
}
fn language_for_shebang_marker(&self, marker: RopeSlice) -> Option<Language> {
let shebang: Cow<str> = marker.into();
2025-02-21 09:38:14 +08:00
self.languages_by_shebang.get(shebang.as_ref()).copied()
}
2025-02-21 09:38:14 +08:00
pub fn indent_query(&self, lang: Language) -> Option<&IndentQuery> {
self.language(lang).indent_query(self)
Adds support for multiple language servers per language. Language Servers are now configured in a separate table in `languages.toml`: ```toml [langauge-server.mylang-lsp] command = "mylang-lsp" args = ["--stdio"] config = { provideFormatter = true } [language-server.efm-lsp-prettier] command = "efm-langserver" [language-server.efm-lsp-prettier.config] documentFormatting = true languages = { typescript = [ { formatCommand ="prettier --stdin-filepath ${INPUT}", formatStdin = true } ] } ``` The language server for a language is configured like this (`typescript-language-server` is configured by default): ```toml [[language]] name = "typescript" language-servers = [ { name = "efm-lsp-prettier", only-features = [ "format" ] }, "typescript-language-server" ] ``` or equivalent: ```toml [[language]] name = "typescript" language-servers = [ { name = "typescript-language-server", except-features = [ "format" ] }, "efm-lsp-prettier" ] ``` Each requested LSP feature is priorized in the order of the `language-servers` array. For example the first `goto-definition` supported language server (in this case `typescript-language-server`) will be taken for the relevant LSP request (command `goto_definition`). If no `except-features` or `only-features` is given all features for the language server are enabled, as long as the language server supports these. If it doesn't the next language server which supports the feature is tried. The list of supported features are: - `format` - `goto-definition` - `goto-declaration` - `goto-type-definition` - `goto-reference` - `goto-implementation` - `signature-help` - `hover` - `document-highlight` - `completion` - `code-action` - `workspace-command` - `document-symbols` - `workspace-symbols` - `diagnostics` - `rename-symbol` - `inlay-hints` Another side-effect/difference that comes with this PR, is that only one language server instance is started if different languages use the same language server.
2022-05-24 00:10:48 +08:00
}
2025-02-21 09:38:14 +08:00
pub fn textobject_query(&self, lang: Language) -> Option<&TextObjectQuery> {
self.language(lang).textobject_query(self)
}
2025-02-21 09:38:14 +08:00
pub fn language_server_configs(&self) -> &HashMap<String, LanguageServerConfiguration> {
&self.language_server_configs
}
pub fn scopes(&self) -> Guard<Arc<Vec<String>>> {
self.scopes.load()
2021-06-19 19:26:52 +08:00
}
2025-02-21 09:38:14 +08:00
pub fn set_scopes(&self, scopes: Vec<String>) {
self.scopes.store(Arc::new(scopes));
2025-02-21 09:38:14 +08:00
// Reconfigure existing grammars
for data in &self.languages {
data.reconfigure(&self.scopes());
2021-11-06 23:21:03 +08:00
}
2020-09-17 13:57:49 +08:00
}
2025-02-21 09:38:14 +08:00
}
2020-09-17 13:57:49 +08:00
2025-02-21 09:38:14 +08:00
impl LanguageLoader for Loader {
fn language_for_marker(&self, marker: InjectionLanguageMarker) -> Option<Language> {
match marker {
InjectionLanguageMarker::Name(name) => self.language_for_name(name),
InjectionLanguageMarker::Match(text) => self.language_for_match(text),
InjectionLanguageMarker::Filename(text) => {
let path: Cow<str> = text.into();
self.language_for_filename(Path::new(path.as_ref()))
}
InjectionLanguageMarker::Shebang(text) => self.language_for_shebang_marker(text),
}
}
2025-02-21 09:38:14 +08:00
fn get_config(&self, lang: Language) -> Option<&SyntaxConfig> {
self.languages[lang.idx()].syntax_config(self)
}
}
#[derive(Debug)]
2025-02-21 09:38:14 +08:00
struct FileTypeGlob {
glob: globset::Glob,
language: Language,
}
2025-02-21 09:38:14 +08:00
impl FileTypeGlob {
pub fn new(glob: globset::Glob, language: Language) -> Self {
Self { glob, language }
}
}
2025-02-21 09:38:14 +08:00
#[derive(Debug)]
struct FileTypeGlobMatcher {
matcher: globset::GlobSet,
file_types: Vec<FileTypeGlob>,
}
2025-02-21 09:38:14 +08:00
impl Default for FileTypeGlobMatcher {
fn default() -> Self {
Self {
matcher: globset::GlobSet::empty(),
file_types: Default::default(),
}
}
2021-11-06 23:21:03 +08:00
}
2020-09-17 13:57:49 +08:00
2025-02-21 09:38:14 +08:00
impl FileTypeGlobMatcher {
fn new(file_types: Vec<FileTypeGlob>) -> Result<Self, globset::Error> {
let mut builder = globset::GlobSetBuilder::new();
for file_type in &file_types {
builder.add(file_type.glob.clone());
2020-09-17 13:57:49 +08:00
}
2025-02-21 09:38:14 +08:00
Ok(Self {
matcher: builder.build()?,
file_types,
})
2020-09-17 13:57:49 +08:00
}
2025-02-21 09:38:14 +08:00
fn language_for_path(&self, path: &Path) -> Option<Language> {
self.matcher
.matches(path)
.iter()
.filter_map(|idx| self.file_types.get(*idx))
.max_by_key(|file_type| file_type.glob.glob().len())
.map(|file_type| file_type.language)
}
}
#[derive(Debug)]
2025-02-21 09:38:14 +08:00
pub struct Syntax {
inner: tree_house::Syntax,
}
2025-02-21 09:38:14 +08:00
const PARSE_TIMEOUT: Duration = Duration::from_millis(500); // half a second is pretty generous
2025-02-21 09:38:14 +08:00
impl Syntax {
pub fn new(source: RopeSlice, language: Language, loader: &Loader) -> Result<Self, Error> {
let inner = tree_house::Syntax::new(source, language, PARSE_TIMEOUT, loader)?;
Ok(Self { inner })
}
2025-02-21 09:38:14 +08:00
pub fn update(
&mut self,
old_source: RopeSlice,
source: RopeSlice,
changeset: &ChangeSet,
loader: &Loader,
) -> Result<(), Error> {
let edits = generate_edits(old_source, changeset);
if edits.is_empty() {
Ok(())
} else {
self.inner.update(source, PARSE_TIMEOUT, &edits, loader)
}
}
2025-02-21 09:38:14 +08:00
pub fn layer(&self, layer: Layer) -> &tree_house::LayerData {
self.inner.layer(layer)
}
2025-02-21 09:38:14 +08:00
pub fn root_layer(&self) -> Layer {
self.inner.root()
}
2021-11-06 23:21:03 +08:00
2025-02-21 09:38:14 +08:00
pub fn layer_for_byte_range(&self, start: u32, end: u32) -> Layer {
self.inner.layer_for_byte_range(start, end)
}
2025-02-21 09:38:14 +08:00
pub fn root_language(&self) -> Language {
self.layer(self.root_layer()).language
}
2025-02-21 09:38:14 +08:00
pub fn tree(&self) -> &Tree {
self.inner.tree()
}
2021-06-19 19:26:52 +08:00
2025-02-21 09:38:14 +08:00
pub fn tree_for_byte_range(&self, start: u32, end: u32) -> &Tree {
self.inner.tree_for_byte_range(start, end)
}
2025-02-21 09:38:14 +08:00
pub fn named_descendant_for_byte_range(&self, start: u32, end: u32) -> Option<Node> {
self.inner.named_descendant_for_byte_range(start, end)
}
2025-02-21 09:38:14 +08:00
pub fn descendant_for_byte_range(&self, start: u32, end: u32) -> Option<Node> {
self.inner.descendant_for_byte_range(start, end)
}
2025-02-21 09:38:14 +08:00
pub fn walk(&self) -> TreeCursor {
self.inner.walk()
}
2025-02-21 09:38:14 +08:00
pub fn highlighter<'a>(
&'a self,
source: RopeSlice<'a>,
2025-02-21 09:38:14 +08:00
loader: &'a Loader,
range: impl RangeBounds<u32>,
) -> Highlighter<'a> {
Highlighter::new(&self.inner, source, loader, range)
}
2025-02-21 09:38:14 +08:00
pub fn query_iter<'a, QueryLoader, LayerState, Range>(
&'a self,
source: RopeSlice<'a>,
loader: QueryLoader,
range: Range,
) -> QueryIter<'a, 'a, QueryLoader, LayerState>
where
QueryLoader: FnMut(Language) -> Option<&'a Query> + 'a,
LayerState: Default,
Range: RangeBounds<u32>,
{
QueryIter::new(&self.inner, source, loader, range)
}
}
2025-02-21 09:38:14 +08:00
pub type Highlighter<'a> = highlighter::Highlighter<'a, 'a, Loader>;
2025-02-21 09:38:14 +08:00
fn generate_edits(old_text: RopeSlice, changeset: &ChangeSet) -> Vec<InputEdit> {
use crate::Operation::*;
use tree_sitter::Point;
let mut old_pos = 0;
let mut edits = Vec::new();
if changeset.changes.is_empty() {
return edits;
}
2025-02-21 09:38:14 +08:00
let mut iter = changeset.changes.iter().peekable();
// TODO; this is a lot easier with Change instead of Operation.
while let Some(change) = iter.next() {
let len = match change {
Delete(i) | Retain(i) => *i,
Insert(_) => 0,
2021-11-06 23:21:03 +08:00
};
2025-02-21 09:38:14 +08:00
let mut old_end = old_pos + len;
2021-11-06 23:21:03 +08:00
2025-02-21 09:38:14 +08:00
match change {
Retain(_) => {}
Delete(_) => {
let start_byte = old_text.char_to_byte(old_pos) as u32;
let old_end_byte = old_text.char_to_byte(old_end) as u32;
// deletion
edits.push(InputEdit {
start_byte, // old_pos to byte
old_end_byte, // old_end to byte
new_end_byte: start_byte, // old_pos to byte
start_point: Point::ZERO,
old_end_point: Point::ZERO,
new_end_point: Point::ZERO,
});
2021-11-06 23:21:03 +08:00
}
2025-02-21 09:38:14 +08:00
Insert(s) => {
let start_byte = old_text.char_to_byte(old_pos) as u32;
2021-11-06 23:21:03 +08:00
2025-02-21 09:38:14 +08:00
// a subsequent delete means a replace, consume it
if let Some(Delete(len)) = iter.peek() {
old_end = old_pos + len;
let old_end_byte = old_text.char_to_byte(old_end) as u32;
2021-11-06 23:21:03 +08:00
2025-02-21 09:38:14 +08:00
iter.next();
2021-11-06 23:21:03 +08:00
2025-02-21 09:38:14 +08:00
// replacement
edits.push(InputEdit {
start_byte, // old_pos to byte
old_end_byte, // old_end to byte
new_end_byte: start_byte + s.len() as u32, // old_pos to byte + s.len()
start_point: Point::ZERO,
old_end_point: Point::ZERO,
new_end_point: Point::ZERO,
});
2021-11-06 23:21:03 +08:00
} else {
2025-02-21 09:38:14 +08:00
// insert
edits.push(InputEdit {
start_byte, // old_pos to byte
old_end_byte: start_byte, // same
new_end_byte: start_byte + s.len() as u32, // old_pos + s.len()
start_point: Point::ZERO,
old_end_point: Point::ZERO,
new_end_point: Point::ZERO,
});
2021-11-06 23:21:03 +08:00
}
}
}
2025-02-21 09:38:14 +08:00
old_pos = old_end;
2021-11-06 23:21:03 +08:00
}
2025-02-21 09:38:14 +08:00
edits
2021-11-06 23:21:03 +08:00
}
2025-02-21 09:38:14 +08:00
/// A set of "overlay" highlights and ranges they apply to.
///
/// As overlays, the styles for the given `Highlight`s are merged on top of the syntax highlights.
#[derive(Debug)]
pub enum OverlayHighlights {
/// All highlights use a single `Highlight`.
///
/// Note that, currently, all ranges are assumed to be non-overlapping. This could change in
/// the future though.
Homogeneous {
highlight: Highlight,
ranges: Vec<ops::Range<usize>>,
},
/// A collection of different highlights for given ranges.
///
/// Note that the ranges **must be non-overlapping**.
Heterogenous {
highlights: Vec<(Highlight, ops::Range<usize>)>,
},
}
impl OverlayHighlights {
pub fn single(highlight: Highlight, range: ops::Range<usize>) -> Self {
Self::Homogeneous {
highlight,
ranges: vec![range],
}
2025-02-21 09:38:14 +08:00
}
fn is_empty(&self) -> bool {
match self {
Self::Homogeneous { ranges, .. } => ranges.is_empty(),
Self::Heterogenous { highlights } => highlights.is_empty(),
}
}
}
2025-02-21 09:38:14 +08:00
#[derive(Debug)]
struct Overlay {
highlights: OverlayHighlights,
/// The position of the highlighter into the Vec of ranges of the overlays.
///
/// Used by the `OverlayHighlighter`.
idx: usize,
/// The currently active highlight (and the ending character index) for this overlay.
///
/// Used by the `OverlayHighlighter`.
active_highlight: Option<(Highlight, usize)>,
}
2025-02-21 09:38:14 +08:00
impl Overlay {
fn new(highlights: OverlayHighlights) -> Option<Self> {
(!highlights.is_empty()).then_some(Self {
highlights,
idx: 0,
active_highlight: None,
})
}
2025-02-21 09:38:14 +08:00
fn current(&self) -> Option<(Highlight, ops::Range<usize>)> {
match &self.highlights {
OverlayHighlights::Homogeneous { highlight, ranges } => ranges
.get(self.idx)
.map(|range| (*highlight, range.clone())),
OverlayHighlights::Heterogenous { highlights } => highlights.get(self.idx).cloned(),
}
}
2025-02-21 09:38:14 +08:00
fn start(&self) -> Option<usize> {
match &self.highlights {
OverlayHighlights::Homogeneous { ranges, .. } => {
ranges.get(self.idx).map(|range| range.start)
}
2025-02-21 09:38:14 +08:00
OverlayHighlights::Heterogenous { highlights } => highlights
.get(self.idx)
.map(|(_highlight, range)| range.start),
}
}
}
2025-02-21 09:38:14 +08:00
/// A collection of highlights to apply when rendering which merge on top of syntax highlights.
#[derive(Debug)]
pub struct OverlayHighlighter {
overlays: Vec<Overlay>,
next_highlight_start: usize,
next_highlight_end: usize,
}
2025-02-21 09:38:14 +08:00
impl OverlayHighlighter {
pub fn new(overlays: impl IntoIterator<Item = OverlayHighlights>) -> Self {
let overlays: Vec<_> = overlays.into_iter().filter_map(Overlay::new).collect();
let next_highlight_start = overlays
.iter()
.filter_map(|overlay| overlay.start())
.min()
.unwrap_or(usize::MAX);
2025-02-21 09:38:14 +08:00
Self {
overlays,
next_highlight_start,
next_highlight_end: usize::MAX,
}
}
2025-02-21 09:38:14 +08:00
/// The current position in the overlay highlights.
///
/// This method is meant to be used when treating this type as a cursor over the overlay
/// highlights.
///
/// `usize::MAX` is returned when there are no more overlay highlights.
pub fn next_event_offset(&self) -> usize {
self.next_highlight_start.min(self.next_highlight_end)
}
2025-02-21 09:38:14 +08:00
pub fn advance(&mut self) -> (HighlightEvent, impl Iterator<Item = Highlight> + '_) {
let mut refresh = false;
let prev_stack_size = self
.overlays
.iter()
.filter(|overlay| overlay.active_highlight.is_some())
.count();
let pos = self.next_event_offset();
if self.next_highlight_end == pos {
for overlay in self.overlays.iter_mut() {
if overlay
.active_highlight
.is_some_and(|(_highlight, end)| end == pos)
2020-09-12 18:36:49 +08:00
{
2025-02-21 09:38:14 +08:00
overlay.active_highlight.take();
}
}
2025-02-21 09:38:14 +08:00
refresh = true;
}
2025-02-21 09:38:14 +08:00
while self.next_highlight_start == pos {
let mut activated_idx = usize::MAX;
for (idx, overlay) in self.overlays.iter_mut().enumerate() {
let Some((highlight, range)) = overlay.current() else {
continue;
};
if range.start != self.next_highlight_start {
continue;
}
2025-02-21 09:38:14 +08:00
// If this overlay has a highlight at this start index, set its active highlight
// and increment the cursor position within the overlay.
overlay.active_highlight = Some((highlight, range.end));
overlay.idx += 1;
2025-02-21 09:38:14 +08:00
activated_idx = activated_idx.min(idx);
}
2025-02-21 09:38:14 +08:00
// If `self.next_highlight_start == pos` that means that some overlay was ready to
// emit a highlight, so `activated_idx` must have been set to an existing index.
assert!(
(0..self.overlays.len()).contains(&activated_idx),
"expected an overlay to highlight (at pos {pos}, there are {} overlays)",
self.overlays.len()
);
// If any overlays are active after the (lowest) one which was just activated, the
// highlights need to be refreshed.
refresh |= self.overlays[activated_idx..]
.iter()
.any(|overlay| overlay.active_highlight.is_some());
self.next_highlight_start = self
.overlays
.iter()
.filter_map(|overlay| overlay.start())
.min()
.unwrap_or(usize::MAX);
}
2025-02-21 09:38:14 +08:00
self.next_highlight_end = self
.overlays
.iter()
.filter_map(|overlay| Some(overlay.active_highlight?.1))
.min()
.unwrap_or(usize::MAX);
2025-02-21 09:38:14 +08:00
let (event, start) = if refresh {
(HighlightEvent::Refresh, 0)
} else {
(HighlightEvent::Push, prev_stack_size)
};
2025-02-21 09:38:14 +08:00
(
event,
self.overlays
.iter()
.flat_map(|overlay| overlay.active_highlight)
.map(|(highlight, _end)| highlight)
.skip(start),
)
}
}
2025-02-21 09:38:14 +08:00
#[derive(Debug)]
pub enum CapturedNode<'a> {
Single(Node<'a>),
/// Guaranteed to be not empty
Grouped(Vec<Node<'a>>),
}
2025-02-21 09:38:14 +08:00
impl CapturedNode<'_> {
pub fn start_byte(&self) -> usize {
match self {
Self::Single(n) => n.start_byte() as usize,
Self::Grouped(ns) => ns[0].start_byte() as usize,
}
2025-02-21 09:38:14 +08:00
}
2025-02-21 09:38:14 +08:00
pub fn end_byte(&self) -> usize {
match self {
Self::Single(n) => n.end_byte() as usize,
Self::Grouped(ns) => ns.last().unwrap().end_byte() as usize,
}
2025-02-21 09:38:14 +08:00
}
2025-02-21 09:38:14 +08:00
pub fn byte_range(&self) -> ops::Range<usize> {
self.start_byte()..self.end_byte()
}
}
2025-02-21 09:38:14 +08:00
#[derive(Debug)]
pub struct TextObjectQuery {
query: Query,
}
2025-02-21 09:38:14 +08:00
impl TextObjectQuery {
pub fn new(query: Query) -> Self {
Self { query }
}
2025-02-21 09:38:14 +08:00
/// Run the query on the given node and return sub nodes which match given
/// capture ("function.inside", "class.around", etc).
///
/// Captures may contain multiple nodes by using quantifiers (+, *, etc),
/// and support for this is partial and could use improvement.
///
/// ```query
/// (comment)+ @capture
///
/// ; OR
/// (
/// (comment)*
/// .
/// (function)
/// ) @capture
/// ```
pub fn capture_nodes<'a>(
&'a self,
capture_name: &str,
node: &Node<'a>,
slice: RopeSlice<'a>,
) -> Option<impl Iterator<Item = CapturedNode<'a>>> {
self.capture_nodes_any(&[capture_name], node, slice)
}
2025-02-21 09:38:14 +08:00
/// Find the first capture that exists out of all given `capture_names`
/// and return sub nodes that match this capture.
pub fn capture_nodes_any<'a>(
&'a self,
capture_names: &[&str],
node: &Node<'a>,
slice: RopeSlice<'a>,
) -> Option<impl Iterator<Item = CapturedNode<'a>>> {
let capture = capture_names
.iter()
.find_map(|cap| self.query.get_capture(cap))?;
let mut cursor = InactiveQueryCursor::new(0..u32::MAX, TREE_SITTER_MATCH_LIMIT)
.execute_query(&self.query, node, RopeInput::new(slice));
2025-02-21 09:38:14 +08:00
let capture_node = iter::from_fn(move || {
let (mat, _) = cursor.next_matched_node()?;
Some(mat.nodes_for_capture(capture).cloned().collect())
})
.filter_map(move |nodes: Vec<_>| {
if nodes.len() > 1 {
Some(CapturedNode::Grouped(nodes))
} else {
nodes.into_iter().map(CapturedNode::Single).next()
}
});
Some(capture_node)
}
}
pub fn pretty_print_tree<W: fmt::Write>(fmt: &mut W, node: Node) -> fmt::Result {
if node.child_count() == 0 {
if node_is_visible(&node) {
write!(fmt, "({})", node.kind())
} else {
write!(fmt, "\"{}\"", format_anonymous_node_kind(node.kind()))
}
} else {
pretty_print_tree_impl(fmt, &mut node.walk(), 0)
}
}
2025-02-21 09:38:14 +08:00
fn node_is_visible(node: &Node) -> bool {
node.is_missing() || (node.is_named() && node.grammar().node_kind_is_visible(node.kind_id()))
}
fn format_anonymous_node_kind(kind: &str) -> Cow<str> {
if kind.contains('"') {
Cow::Owned(kind.replace('"', "\\\""))
} else {
Cow::Borrowed(kind)
}
}
fn pretty_print_tree_impl<W: fmt::Write>(
fmt: &mut W,
cursor: &mut tree_sitter::TreeCursor,
depth: usize,
) -> fmt::Result {
let node = cursor.node();
let visible = node_is_visible(&node);
if visible {
let indentation_columns = depth * 2;
write!(fmt, "{:indentation_columns$}", "")?;
if let Some(field_name) = cursor.field_name() {
write!(fmt, "{}: ", field_name)?;
}
write!(fmt, "({}", node.kind())?;
} else {
write!(fmt, " \"{}\"", format_anonymous_node_kind(node.kind()))?;
}
// Handle children.
if cursor.goto_first_child() {
loop {
if node_is_visible(&cursor.node()) {
fmt.write_char('\n')?;
}
pretty_print_tree_impl(fmt, cursor, depth + 1)?;
if !cursor.goto_next_sibling() {
break;
}
}
let moved = cursor.goto_parent();
// The parent of the first child must exist, and must be `node`.
debug_assert!(moved);
debug_assert!(cursor.node() == node);
}
if visible {
fmt.write_char(')')?;
}
Ok(())
}
#[cfg(test)]
mod test {
2025-02-21 09:38:14 +08:00
use once_cell::sync::Lazy;
use super::*;
use crate::{Rope, Transaction};
static LOADER: Lazy<Loader> = Lazy::new(crate::config::default_lang_loader);
2025-02-21 09:38:14 +08:00
#[test]
fn test_textobject_queries() {
let query_str = r#"
(line_comment)+ @quantified_nodes
((line_comment)+) @quantified_nodes_grouped
((line_comment) (line_comment)) @multiple_nodes_grouped
"#;
let source = Rope::from_str(
r#"
/// a comment on
2022-04-28 03:21:20 +08:00
/// multiple lines
"#,
);
2025-02-21 09:38:14 +08:00
let language = LOADER.language_for_name("rust").unwrap();
let grammar = LOADER.get_config(language).unwrap().grammar;
let query = Query::new(grammar, query_str, |_, _| Ok(())).unwrap();
let textobject = TextObjectQuery::new(query);
let syntax = Syntax::new(source.slice(..), language, &LOADER).unwrap();
let root = syntax.tree().root_node();
2025-02-21 09:38:14 +08:00
let test = |capture, range| {
let matches: Vec<_> = textobject
2025-02-21 09:38:14 +08:00
.capture_nodes(capture, &root, source.slice(..))
.unwrap()
.collect();
assert_eq!(
matches[0].byte_range(),
range,
"@{} expected {:?}",
capture,
range
)
};
test("quantified_nodes", 1..37);
// NOTE: Enable after implementing proper node group capturing
// test("quantified_nodes_grouped", 1..37);
// test("multiple_nodes_grouped", 1..37);
}
#[test]
fn test_input_edits() {
2025-02-21 09:38:14 +08:00
use tree_sitter::{InputEdit, Point};
let doc = Rope::from("hello world!\ntest 123");
let transaction = Transaction::change(
&doc,
vec![(6, 11, Some("test".into())), (12, 17, None)].into_iter(),
);
let edits = generate_edits(doc.slice(..), transaction.changes());
// transaction.apply(&mut state);
assert_eq!(
edits,
&[
InputEdit {
start_byte: 6,
old_end_byte: 11,
new_end_byte: 10,
2025-02-21 09:38:14 +08:00
start_point: Point::ZERO,
old_end_point: Point::ZERO,
new_end_point: Point::ZERO
},
InputEdit {
start_byte: 12,
old_end_byte: 17,
new_end_byte: 12,
2025-02-21 09:38:14 +08:00
start_point: Point::ZERO,
old_end_point: Point::ZERO,
new_end_point: Point::ZERO
}
]
);
// Testing with the official example from tree-sitter
let mut doc = Rope::from("fn test() {}");
let transaction =
Transaction::change(&doc, vec![(8, 8, Some("a: u32".into()))].into_iter());
let edits = generate_edits(doc.slice(..), transaction.changes());
transaction.apply(&mut doc);
assert_eq!(doc, "fn test(a: u32) {}");
assert_eq!(
edits,
&[InputEdit {
start_byte: 8,
old_end_byte: 8,
new_end_byte: 14,
2025-02-21 09:38:14 +08:00
start_point: Point::ZERO,
old_end_point: Point::ZERO,
new_end_point: Point::ZERO
}]
);
}
#[track_caller]
fn assert_pretty_print(
language_name: &str,
source: &str,
expected: &str,
start: usize,
end: usize,
) {
let source = Rope::from_str(source);
2025-02-21 09:38:14 +08:00
let language = LOADER.language_for_name(language_name).unwrap();
let syntax = Syntax::new(source.slice(..), language, &LOADER).unwrap();
let root = syntax
.tree()
.root_node()
2025-02-21 09:38:14 +08:00
.descendant_for_byte_range(start as u32, end as u32)
.unwrap();
let mut output = String::new();
pretty_print_tree(&mut output, root).unwrap();
assert_eq!(expected, output);
}
#[test]
fn test_pretty_print() {
let source = r#"// Hello"#;
assert_pretty_print("rust", source, "(line_comment \"//\")", 0, source.len());
// A large tree should be indented with fields:
let source = r#"fn main() {
println!("Hello, World!");
}"#;
assert_pretty_print(
"rust",
source,
concat!(
"(function_item \"fn\"\n",
" name: (identifier)\n",
" parameters: (parameters \"(\" \")\")\n",
" body: (block \"{\"\n",
" (expression_statement\n",
" (macro_invocation\n",
" macro: (identifier) \"!\"\n",
" (token_tree \"(\"\n",
" (string_literal \"\\\"\"\n",
" (string_content) \"\\\"\") \")\")) \";\") \"}\"))",
),
0,
source.len(),
);
// Selecting a token should print just that token:
let source = r#"fn main() {}"#;
assert_pretty_print("rust", source, r#""fn""#, 0, 1);
// Error nodes are printed as errors:
let source = r#"}{"#;
assert_pretty_print("rust", source, "(ERROR \"}\" \"{\")", 0, source.len());
// Fields broken under unnamed nodes are determined correctly.
// In the following source, `object` belongs to the `singleton_method`
// rule but `name` and `body` belong to an unnamed helper `_method_rest`.
// This can cause a bug with a pretty-printing implementation that
// uses `Node::field_name_for_child` to determine field names but is
// fixed when using `tree_sitter::TreeCursor::field_name`.
let source = "def self.method_name
true
end";
assert_pretty_print(
"ruby",
source,
concat!(
"(singleton_method \"def\"\n",
" object: (self) \".\"\n",
" name: (identifier)\n",
" body: (body_statement\n",
" (true)) \"end\")"
),
0,
source.len(),
);
}
}