helix/helix-core/src/syntax.rs

1298 lines
43 KiB
Rust
Raw Normal View History

use std::borrow::Cow;
use std::fmt::{self, Display};
use std::path::{Path, PathBuf};
use std::str::FromStr;
use std::sync::Arc;
use arc_swap::{ArcSwap, Guard};
use globset::GlobSet;
pub use helix_syntax::highlighter::{Highlight, HighlightEvent};
pub use helix_syntax::{
merge, pretty_print_tree, HighlightConfiguration, InjectionLanguageMarker, RopeProvider,
TextObjectQuery, TreeCursor,
2021-03-22 11:40:07 +08:00
};
pub use helix_syntax::{with_cursor, Syntax};
use once_cell::sync::{Lazy, OnceCell};
use regex::Regex;
use ropey::RopeSlice;
use serde::ser::SerializeSeq;
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};
use tree_sitter::{Point, Query};
use crate::auto_pairs::AutoPairs;
use crate::chars::char_is_line_ending;
use crate::diagnostic::Severity;
use crate::{ChangeSet, Operation, Tendril};
use helix_loader::grammar::{get_language, load_runtime_file};
fn deserialize_regex<'de, D>(deserializer: D) -> Result<Option<Regex>, D::Error>
where
D: serde::Deserializer<'de>,
{
Option::<String>::deserialize(deserializer)?
.map(|buf| Regex::new(&buf).map_err(serde::de::Error::custom))
.transpose()
}
fn deserialize_lsp_config<'de, D>(deserializer: D) -> Result<Option<serde_json::Value>, D::Error>
where
D: serde::Deserializer<'de>,
{
Option::<toml::Value>::deserialize(deserializer)?
.map(|toml| toml.try_into().map_err(serde::de::Error::custom))
.transpose()
}
fn deserialize_tab_width<'de, D>(deserializer: D) -> Result<usize, D::Error>
where
D: serde::Deserializer<'de>,
{
usize::deserialize(deserializer).and_then(|n| {
if n > 0 && n <= 16 {
Ok(n)
} else {
Err(serde::de::Error::custom(
"tab width must be a value from 1 to 16 inclusive",
))
}
})
}
pub fn deserialize_auto_pairs<'de, D>(deserializer: D) -> Result<Option<AutoPairs>, D::Error>
where
D: serde::Deserializer<'de>,
{
Ok(Option::<AutoPairConfig>::deserialize(deserializer)?.and_then(AutoPairConfig::into))
}
fn default_timeout() -> u64 {
20
}
#[derive(Debug, Serialize, Deserialize)]
Adds support for multiple language servers per language. Language Servers are now configured in a separate table in `languages.toml`: ```toml [langauge-server.mylang-lsp] command = "mylang-lsp" args = ["--stdio"] config = { provideFormatter = true } [language-server.efm-lsp-prettier] command = "efm-langserver" [language-server.efm-lsp-prettier.config] documentFormatting = true languages = { typescript = [ { formatCommand ="prettier --stdin-filepath ${INPUT}", formatStdin = true } ] } ``` The language server for a language is configured like this (`typescript-language-server` is configured by default): ```toml [[language]] name = "typescript" language-servers = [ { name = "efm-lsp-prettier", only-features = [ "format" ] }, "typescript-language-server" ] ``` or equivalent: ```toml [[language]] name = "typescript" language-servers = [ { name = "typescript-language-server", except-features = [ "format" ] }, "efm-lsp-prettier" ] ``` Each requested LSP feature is priorized in the order of the `language-servers` array. For example the first `goto-definition` supported language server (in this case `typescript-language-server`) will be taken for the relevant LSP request (command `goto_definition`). If no `except-features` or `only-features` is given all features for the language server are enabled, as long as the language server supports these. If it doesn't the next language server which supports the feature is tried. The list of supported features are: - `format` - `goto-definition` - `goto-declaration` - `goto-type-definition` - `goto-reference` - `goto-implementation` - `signature-help` - `hover` - `document-highlight` - `completion` - `code-action` - `workspace-command` - `document-symbols` - `workspace-symbols` - `diagnostics` - `rename-symbol` - `inlay-hints` Another side-effect/difference that comes with this PR, is that only one language server instance is started if different languages use the same language server.
2022-05-24 00:10:48 +08:00
#[serde(rename_all = "kebab-case")]
pub struct Configuration {
2021-03-25 15:53:32 +08:00
pub language: Vec<LanguageConfiguration>,
Adds support for multiple language servers per language. Language Servers are now configured in a separate table in `languages.toml`: ```toml [langauge-server.mylang-lsp] command = "mylang-lsp" args = ["--stdio"] config = { provideFormatter = true } [language-server.efm-lsp-prettier] command = "efm-langserver" [language-server.efm-lsp-prettier.config] documentFormatting = true languages = { typescript = [ { formatCommand ="prettier --stdin-filepath ${INPUT}", formatStdin = true } ] } ``` The language server for a language is configured like this (`typescript-language-server` is configured by default): ```toml [[language]] name = "typescript" language-servers = [ { name = "efm-lsp-prettier", only-features = [ "format" ] }, "typescript-language-server" ] ``` or equivalent: ```toml [[language]] name = "typescript" language-servers = [ { name = "typescript-language-server", except-features = [ "format" ] }, "efm-lsp-prettier" ] ``` Each requested LSP feature is priorized in the order of the `language-servers` array. For example the first `goto-definition` supported language server (in this case `typescript-language-server`) will be taken for the relevant LSP request (command `goto_definition`). If no `except-features` or `only-features` is given all features for the language server are enabled, as long as the language server supports these. If it doesn't the next language server which supports the feature is tried. The list of supported features are: - `format` - `goto-definition` - `goto-declaration` - `goto-type-definition` - `goto-reference` - `goto-implementation` - `signature-help` - `hover` - `document-highlight` - `completion` - `code-action` - `workspace-command` - `document-symbols` - `workspace-symbols` - `diagnostics` - `rename-symbol` - `inlay-hints` Another side-effect/difference that comes with this PR, is that only one language server instance is started if different languages use the same language server.
2022-05-24 00:10:48 +08:00
#[serde(default)]
pub language_server: HashMap<String, LanguageServerConfiguration>,
}
// largely based on tree-sitter/cli/src/loader.rs
#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case", deny_unknown_fields)]
pub struct LanguageConfiguration {
#[serde(rename = "name")]
Adds support for multiple language servers per language. Language Servers are now configured in a separate table in `languages.toml`: ```toml [langauge-server.mylang-lsp] command = "mylang-lsp" args = ["--stdio"] config = { provideFormatter = true } [language-server.efm-lsp-prettier] command = "efm-langserver" [language-server.efm-lsp-prettier.config] documentFormatting = true languages = { typescript = [ { formatCommand ="prettier --stdin-filepath ${INPUT}", formatStdin = true } ] } ``` The language server for a language is configured like this (`typescript-language-server` is configured by default): ```toml [[language]] name = "typescript" language-servers = [ { name = "efm-lsp-prettier", only-features = [ "format" ] }, "typescript-language-server" ] ``` or equivalent: ```toml [[language]] name = "typescript" language-servers = [ { name = "typescript-language-server", except-features = [ "format" ] }, "efm-lsp-prettier" ] ``` Each requested LSP feature is priorized in the order of the `language-servers` array. For example the first `goto-definition` supported language server (in this case `typescript-language-server`) will be taken for the relevant LSP request (command `goto_definition`). If no `except-features` or `only-features` is given all features for the language server are enabled, as long as the language server supports these. If it doesn't the next language server which supports the feature is tried. The list of supported features are: - `format` - `goto-definition` - `goto-declaration` - `goto-type-definition` - `goto-reference` - `goto-implementation` - `signature-help` - `hover` - `document-highlight` - `completion` - `code-action` - `workspace-command` - `document-symbols` - `workspace-symbols` - `diagnostics` - `rename-symbol` - `inlay-hints` Another side-effect/difference that comes with this PR, is that only one language server instance is started if different languages use the same language server.
2022-05-24 00:10:48 +08:00
pub language_id: String, // c-sharp, rust, tsx
#[serde(rename = "language-id")]
// see the table under https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#textDocumentItem
pub language_server_language_id: Option<String>, // csharp, rust, typescriptreact, for the language-server
pub scope: String, // source.rust
pub file_types: Vec<FileType>, // filename extension or ends_with? <Gemfile, rb, etc>
#[serde(default)]
pub shebangs: Vec<String>, // interpreter(s) associated with language
#[serde(default)]
pub roots: Vec<String>, // these indicate project roots <.git, Cargo.toml>
2024-02-27 21:36:25 +08:00
#[serde(
default,
skip_serializing,
deserialize_with = "from_comment_tokens",
alias = "comment-token"
)]
pub comment_tokens: Option<Vec<String>>,
#[serde(
default,
skip_serializing,
deserialize_with = "from_block_comment_tokens"
)]
pub block_comment_tokens: Option<Vec<BlockCommentToken>>,
Softwrapping improvements (#5893) * use max_line_width + 1 during softwrap to account for newline char Helix softwrap implementation always wraps lines so that the newline character doesn't get cut off so he line wraps one chars earlier then in other editors. This is necessary, because newline chars are always selecatble in helix and must never be hidden. However That means that `max_line_width` currently wraps one char earlier than expected. The typical definition of line width does not include the newline character and other helix commands like `:reflow` also don't count the newline character here. This commit makes softwrap use `max_line_width + 1` instead of `max_line_width` to correct the impedance missmatch. * fix typos Co-authored-by: Jonathan Lebon <jonathan@jlebon.com> * Add text-width to config.toml * text-width: update setting documentation * rename leftover config item * remove leftover max-line-length occurrences * Make `text-width` optional in editor config When it was only used for `:reflow` it made sense to have a default value set to `80`, but now that soft-wrapping uses this setting, keeping a default set to `80` would make soft-wrapping behave more aggressively. * Allow softwrapping to ignore `text-width` Softwrapping wraps by default to the viewport width or a configured `text-width` (whichever's smaller). In some cases we only want to set `text-width` to use for hard-wrapping and let longer lines flow if they have enough space. This setting allows that. * Revert "Make `text-width` optional in editor config" This reverts commit b247d526d69adf41434b6fd9c4983369c785aa22. * soft-wrap: allow per-language overrides * Update book/src/configuration.md Co-authored-by: Pascal Kuthe <pascal.kuthe@semimod.de> * Update book/src/languages.md Co-authored-by: Pascal Kuthe <pascal.kuthe@semimod.de> * Update book/src/configuration.md Co-authored-by: Pascal Kuthe <pascal.kuthe@semimod.de> --------- Co-authored-by: Pascal Kuthe <pascal.kuthe@semimod.de> Co-authored-by: Jonathan Lebon <jonathan@jlebon.com> Co-authored-by: Alex Boehm <alexb@ozrunways.com> Co-authored-by: Blaž Hrastnik <blaz@mxxn.io>
2023-03-08 10:02:11 +08:00
pub text_width: Option<usize>,
pub soft_wrap: Option<SoftWrap>,
#[serde(default)]
pub auto_format: bool,
#[serde(skip_serializing_if = "Option::is_none")]
pub formatter: Option<FormatterConfiguration>,
#[serde(default)]
pub diagnostic_severity: Severity,
pub grammar: Option<String>, // tree-sitter grammar name, defaults to language_id
// content_regex
#[serde(default, skip_serializing, deserialize_with = "deserialize_regex")]
2021-09-16 15:04:32 +08:00
pub injection_regex: Option<Regex>,
// first_line_regex
//
#[serde(skip)]
pub(crate) highlight_config: OnceCell<Option<Arc<HighlightConfiguration>>>,
// tags_config OnceCell<> https://github.com/tree-sitter/tree-sitter/pull/583
#[serde(
default,
skip_serializing_if = "Vec::is_empty",
serialize_with = "serialize_lang_features",
deserialize_with = "deserialize_lang_features"
)]
pub language_servers: Vec<LanguageServerFeatures>,
#[serde(skip_serializing_if = "Option::is_none")]
pub indent: Option<IndentationConfiguration>,
#[serde(skip)]
2022-03-30 23:08:07 +08:00
pub(crate) indent_query: OnceCell<Option<Query>>,
#[serde(skip)]
pub(crate) textobject_query: OnceCell<Option<TextObjectQuery>>,
#[serde(skip_serializing_if = "Option::is_none")]
2021-08-24 16:56:18 +08:00
pub debugger: Option<DebugAdapterConfig>,
/// Automatic insertion of pairs to parentheses, brackets,
/// etc. Defaults to true. Optionally, this can be a list of 2-tuples
/// to specify a list of characters to pair. This overrides the
/// global setting.
#[serde(default, skip_serializing, deserialize_with = "deserialize_auto_pairs")]
pub auto_pairs: Option<AutoPairs>,
pub rulers: Option<Vec<u16>>, // if set, override editor's rulers
/// Hardcoded LSP root directories relative to the workspace root, like `examples` or `tools/fuzz`.
/// Falling back to the current working directory if none are configured.
pub workspace_lsp_roots: Option<Vec<PathBuf>>,
#[serde(default)]
pub persistent_diagnostic_sources: Vec<String>,
}
fn read_query(language: &str, filename: &str) -> String {
helix_syntax::read_query(language, filename, |lang, filename| {
load_runtime_file(lang, filename).unwrap_or_default()
})
}
impl LanguageConfiguration {
fn initialize_highlight(&self, scopes: &[String]) -> Option<Arc<HighlightConfiguration>> {
let highlights_query = read_query(&self.language_id, "highlights.scm");
// always highlight syntax errors
// highlights_query += "\n(ERROR) @error";
let injections_query = read_query(&self.language_id, "injections.scm");
let locals_query = read_query(&self.language_id, "locals.scm");
if highlights_query.is_empty() {
None
} else {
let language = get_language(self.grammar.as_deref().unwrap_or(&self.language_id))
.map_err(|err| {
log::error!(
"Failed to load tree-sitter parser for language {:?}: {}",
self.language_id,
err
)
})
.ok()?;
let config = HighlightConfiguration::new(
language,
&highlights_query,
&injections_query,
&locals_query,
)
.map_err(|err| log::error!("Could not parse queries for language {:?}. Are your grammars out of sync? Try running 'hx --grammar fetch' and 'hx --grammar build'. This query could not be parsed: {:?}", self.language_id, err))
.ok()?;
config.configure(scopes);
Some(Arc::new(config))
}
}
pub fn reconfigure(&self, scopes: &[String]) {
if let Some(Some(config)) = self.highlight_config.get() {
config.configure(scopes);
}
}
pub fn get_highlight_config(&self) -> Option<Arc<HighlightConfiguration>> {
self.highlight_config.get().cloned().flatten()
}
pub fn highlight_config(&self, scopes: &[String]) -> Option<Arc<HighlightConfiguration>> {
self.highlight_config
.get_or_init(|| self.initialize_highlight(scopes))
.clone()
}
pub fn is_highlight_initialized(&self) -> bool {
self.highlight_config.get().is_some()
}
pub fn indent_query(&self) -> Option<&Query> {
self.indent_query
.get_or_init(|| self.load_query("indents.scm"))
.as_ref()
}
pub fn textobject_query(&self) -> Option<&TextObjectQuery> {
self.textobject_query
.get_or_init(|| {
self.load_query("textobjects.scm")
.map(|query| TextObjectQuery { query })
})
.as_ref()
}
pub fn scope(&self) -> &str {
&self.scope
}
fn load_query(&self, kind: &str) -> Option<Query> {
let query_text = read_query(&self.language_id, kind);
if query_text.is_empty() {
return None;
}
let lang = &self.highlight_config.get()?.as_ref()?.language;
Query::new(lang, &query_text)
.map_err(|e| {
log::error!(
"Failed to parse {} queries for {}: {}",
kind,
self.language_id,
e
)
})
.ok()
}
}
#[derive(Debug, PartialEq, Eq, Hash)]
pub enum FileType {
/// The extension of the file, either the `Path::extension` or the full
/// filename if the file does not have an extension.
Extension(String),
Add glob file type support (#8006) * Replace FileType::Suffix with FileType::Glob Suffix is rather limited and cannot be used to match files which have semantic meaning based on location + file type (for example, Github Action workflow files). This patch adds support for a Glob FileType to replace Suffix, which encompasses the existing behavior & adds additional file matching functionality. Globs are standard Unix-style path globs, which are matched against the absolute path of the file. If the configured glob for a language is a relative glob (that is, it isn't an absolute path or already starts with a glob pattern), a glob pattern will be prepended to allow matching relative paths from any directory. The order of file type matching is also updated to first match on globs and then on extension. This is necessary as most cases where glob-matching is useful will have already been matched by an extension if glob matching is done last. * Convert file-types suffixes to globs * Use globs for filename matching Trying to match the file-type raw strings against both filename and extension leads to files with the same name as the extension having the incorrect syntax. * Match dockerfiles with suffixes It's common practice to add a suffix to dockerfiles based on their context, e.g. `Dockerfile.dev`, `Dockerfile.prod`, etc. * Make env filetype matching more generic Match on `.env` or any `.env.*` files. * Update docs * Use GlobSet to match all file type globs at once * Update todo.txt glob patterns * Consolidate language Configuration and Loader creation This is a refactor that improves the error handling for creating the `helix_core::syntax::Loader` from the default and user language configuration. * Fix integration tests * Add additional starlark file-type glob --------- Co-authored-by: Michael Davis <mcarsondavis@gmail.com>
2024-02-12 01:24:20 +08:00
/// A Unix-style path glob. This is compared to the file's absolute path, so
/// it can be used to detect files based on their directories. If the glob
/// is not an absolute path and does not already start with a glob pattern,
/// a glob pattern will be prepended to it.
Glob(globset::Glob),
}
impl Serialize for FileType {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
use serde::ser::SerializeMap;
match self {
FileType::Extension(extension) => serializer.serialize_str(extension),
Add glob file type support (#8006) * Replace FileType::Suffix with FileType::Glob Suffix is rather limited and cannot be used to match files which have semantic meaning based on location + file type (for example, Github Action workflow files). This patch adds support for a Glob FileType to replace Suffix, which encompasses the existing behavior & adds additional file matching functionality. Globs are standard Unix-style path globs, which are matched against the absolute path of the file. If the configured glob for a language is a relative glob (that is, it isn't an absolute path or already starts with a glob pattern), a glob pattern will be prepended to allow matching relative paths from any directory. The order of file type matching is also updated to first match on globs and then on extension. This is necessary as most cases where glob-matching is useful will have already been matched by an extension if glob matching is done last. * Convert file-types suffixes to globs * Use globs for filename matching Trying to match the file-type raw strings against both filename and extension leads to files with the same name as the extension having the incorrect syntax. * Match dockerfiles with suffixes It's common practice to add a suffix to dockerfiles based on their context, e.g. `Dockerfile.dev`, `Dockerfile.prod`, etc. * Make env filetype matching more generic Match on `.env` or any `.env.*` files. * Update docs * Use GlobSet to match all file type globs at once * Update todo.txt glob patterns * Consolidate language Configuration and Loader creation This is a refactor that improves the error handling for creating the `helix_core::syntax::Loader` from the default and user language configuration. * Fix integration tests * Add additional starlark file-type glob --------- Co-authored-by: Michael Davis <mcarsondavis@gmail.com>
2024-02-12 01:24:20 +08:00
FileType::Glob(glob) => {
let mut map = serializer.serialize_map(Some(1))?;
Add glob file type support (#8006) * Replace FileType::Suffix with FileType::Glob Suffix is rather limited and cannot be used to match files which have semantic meaning based on location + file type (for example, Github Action workflow files). This patch adds support for a Glob FileType to replace Suffix, which encompasses the existing behavior & adds additional file matching functionality. Globs are standard Unix-style path globs, which are matched against the absolute path of the file. If the configured glob for a language is a relative glob (that is, it isn't an absolute path or already starts with a glob pattern), a glob pattern will be prepended to allow matching relative paths from any directory. The order of file type matching is also updated to first match on globs and then on extension. This is necessary as most cases where glob-matching is useful will have already been matched by an extension if glob matching is done last. * Convert file-types suffixes to globs * Use globs for filename matching Trying to match the file-type raw strings against both filename and extension leads to files with the same name as the extension having the incorrect syntax. * Match dockerfiles with suffixes It's common practice to add a suffix to dockerfiles based on their context, e.g. `Dockerfile.dev`, `Dockerfile.prod`, etc. * Make env filetype matching more generic Match on `.env` or any `.env.*` files. * Update docs * Use GlobSet to match all file type globs at once * Update todo.txt glob patterns * Consolidate language Configuration and Loader creation This is a refactor that improves the error handling for creating the `helix_core::syntax::Loader` from the default and user language configuration. * Fix integration tests * Add additional starlark file-type glob --------- Co-authored-by: Michael Davis <mcarsondavis@gmail.com>
2024-02-12 01:24:20 +08:00
map.serialize_entry("glob", glob.glob())?;
map.end()
}
}
}
}
impl<'de> Deserialize<'de> for FileType {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::de::Deserializer<'de>,
{
struct FileTypeVisitor;
impl<'de> serde::de::Visitor<'de> for FileTypeVisitor {
type Value = FileType;
fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
formatter.write_str("string or table")
}
fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(FileType::Extension(value.to_string()))
}
fn visit_map<M>(self, mut map: M) -> Result<Self::Value, M::Error>
where
M: serde::de::MapAccess<'de>,
{
match map.next_entry::<String, String>()? {
Add glob file type support (#8006) * Replace FileType::Suffix with FileType::Glob Suffix is rather limited and cannot be used to match files which have semantic meaning based on location + file type (for example, Github Action workflow files). This patch adds support for a Glob FileType to replace Suffix, which encompasses the existing behavior & adds additional file matching functionality. Globs are standard Unix-style path globs, which are matched against the absolute path of the file. If the configured glob for a language is a relative glob (that is, it isn't an absolute path or already starts with a glob pattern), a glob pattern will be prepended to allow matching relative paths from any directory. The order of file type matching is also updated to first match on globs and then on extension. This is necessary as most cases where glob-matching is useful will have already been matched by an extension if glob matching is done last. * Convert file-types suffixes to globs * Use globs for filename matching Trying to match the file-type raw strings against both filename and extension leads to files with the same name as the extension having the incorrect syntax. * Match dockerfiles with suffixes It's common practice to add a suffix to dockerfiles based on their context, e.g. `Dockerfile.dev`, `Dockerfile.prod`, etc. * Make env filetype matching more generic Match on `.env` or any `.env.*` files. * Update docs * Use GlobSet to match all file type globs at once * Update todo.txt glob patterns * Consolidate language Configuration and Loader creation This is a refactor that improves the error handling for creating the `helix_core::syntax::Loader` from the default and user language configuration. * Fix integration tests * Add additional starlark file-type glob --------- Co-authored-by: Michael Davis <mcarsondavis@gmail.com>
2024-02-12 01:24:20 +08:00
Some((key, mut glob)) if key == "glob" => {
// If the glob isn't an absolute path or already starts
// with a glob pattern, add a leading glob so we
// properly match relative paths.
if !glob.starts_with('/') && !glob.starts_with("*/") {
glob.insert_str(0, "*/");
}
globset::Glob::new(glob.as_str())
.map(FileType::Glob)
.map_err(|err| {
serde::de::Error::custom(format!("invalid `glob` pattern: {}", err))
})
}
Some((key, _value)) => Err(serde::de::Error::custom(format!(
"unknown key in `file-types` list: {}",
key
))),
None => Err(serde::de::Error::custom(
"expected a `suffix` key in the `file-types` entry",
)),
}
}
}
deserializer.deserialize_any(FileTypeVisitor)
}
}
2024-02-27 21:36:25 +08:00
fn from_comment_tokens<'de, D>(deserializer: D) -> Result<Option<Vec<String>>, D::Error>
where
D: serde::Deserializer<'de>,
{
#[derive(Deserialize)]
#[serde(untagged)]
enum CommentTokens {
Multiple(Vec<String>),
Single(String),
}
Ok(
Option::<CommentTokens>::deserialize(deserializer)?.map(|tokens| match tokens {
CommentTokens::Single(val) => vec![val],
CommentTokens::Multiple(vals) => vals,
}),
)
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct BlockCommentToken {
pub start: String,
pub end: String,
}
impl Default for BlockCommentToken {
fn default() -> Self {
BlockCommentToken {
start: "/*".to_string(),
end: "*/".to_string(),
}
}
}
fn from_block_comment_tokens<'de, D>(
deserializer: D,
) -> Result<Option<Vec<BlockCommentToken>>, D::Error>
where
D: serde::Deserializer<'de>,
{
#[derive(Deserialize)]
#[serde(untagged)]
enum BlockCommentTokens {
Multiple(Vec<BlockCommentToken>),
Single(BlockCommentToken),
}
Ok(
Option::<BlockCommentTokens>::deserialize(deserializer)?.map(|tokens| match tokens {
BlockCommentTokens::Single(val) => vec![val],
BlockCommentTokens::Multiple(vals) => vals,
}),
)
}
#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq, Hash)]
Adds support for multiple language servers per language. Language Servers are now configured in a separate table in `languages.toml`: ```toml [langauge-server.mylang-lsp] command = "mylang-lsp" args = ["--stdio"] config = { provideFormatter = true } [language-server.efm-lsp-prettier] command = "efm-langserver" [language-server.efm-lsp-prettier.config] documentFormatting = true languages = { typescript = [ { formatCommand ="prettier --stdin-filepath ${INPUT}", formatStdin = true } ] } ``` The language server for a language is configured like this (`typescript-language-server` is configured by default): ```toml [[language]] name = "typescript" language-servers = [ { name = "efm-lsp-prettier", only-features = [ "format" ] }, "typescript-language-server" ] ``` or equivalent: ```toml [[language]] name = "typescript" language-servers = [ { name = "typescript-language-server", except-features = [ "format" ] }, "efm-lsp-prettier" ] ``` Each requested LSP feature is priorized in the order of the `language-servers` array. For example the first `goto-definition` supported language server (in this case `typescript-language-server`) will be taken for the relevant LSP request (command `goto_definition`). If no `except-features` or `only-features` is given all features for the language server are enabled, as long as the language server supports these. If it doesn't the next language server which supports the feature is tried. The list of supported features are: - `format` - `goto-definition` - `goto-declaration` - `goto-type-definition` - `goto-reference` - `goto-implementation` - `signature-help` - `hover` - `document-highlight` - `completion` - `code-action` - `workspace-command` - `document-symbols` - `workspace-symbols` - `diagnostics` - `rename-symbol` - `inlay-hints` Another side-effect/difference that comes with this PR, is that only one language server instance is started if different languages use the same language server.
2022-05-24 00:10:48 +08:00
#[serde(rename_all = "kebab-case")]
pub enum LanguageServerFeature {
Format,
GotoDeclaration,
GotoDefinition,
GotoTypeDefinition,
GotoReference,
GotoImplementation,
// Goto, use bitflags, combining previous Goto members?
SignatureHelp,
Hover,
DocumentHighlight,
Completion,
CodeAction,
WorkspaceCommand,
DocumentSymbols,
WorkspaceSymbols,
// Symbols, use bitflags, see above?
Diagnostics,
RenameSymbol,
InlayHints,
}
impl Display for LanguageServerFeature {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
use LanguageServerFeature::*;
let feature = match self {
Format => "format",
GotoDeclaration => "goto-declaration",
GotoDefinition => "goto-definition",
GotoTypeDefinition => "goto-type-definition",
GotoReference => "goto-reference",
GotoImplementation => "goto-implementation",
SignatureHelp => "signature-help",
Hover => "hover",
DocumentHighlight => "document-highlight",
Completion => "completion",
CodeAction => "code-action",
WorkspaceCommand => "workspace-command",
DocumentSymbols => "document-symbols",
WorkspaceSymbols => "workspace-symbols",
Diagnostics => "diagnostics",
RenameSymbol => "rename-symbol",
InlayHints => "inlay-hints",
};
write!(f, "{feature}",)
Adds support for multiple language servers per language. Language Servers are now configured in a separate table in `languages.toml`: ```toml [langauge-server.mylang-lsp] command = "mylang-lsp" args = ["--stdio"] config = { provideFormatter = true } [language-server.efm-lsp-prettier] command = "efm-langserver" [language-server.efm-lsp-prettier.config] documentFormatting = true languages = { typescript = [ { formatCommand ="prettier --stdin-filepath ${INPUT}", formatStdin = true } ] } ``` The language server for a language is configured like this (`typescript-language-server` is configured by default): ```toml [[language]] name = "typescript" language-servers = [ { name = "efm-lsp-prettier", only-features = [ "format" ] }, "typescript-language-server" ] ``` or equivalent: ```toml [[language]] name = "typescript" language-servers = [ { name = "typescript-language-server", except-features = [ "format" ] }, "efm-lsp-prettier" ] ``` Each requested LSP feature is priorized in the order of the `language-servers` array. For example the first `goto-definition` supported language server (in this case `typescript-language-server`) will be taken for the relevant LSP request (command `goto_definition`). If no `except-features` or `only-features` is given all features for the language server are enabled, as long as the language server supports these. If it doesn't the next language server which supports the feature is tried. The list of supported features are: - `format` - `goto-definition` - `goto-declaration` - `goto-type-definition` - `goto-reference` - `goto-implementation` - `signature-help` - `hover` - `document-highlight` - `completion` - `code-action` - `workspace-command` - `document-symbols` - `workspace-symbols` - `diagnostics` - `rename-symbol` - `inlay-hints` Another side-effect/difference that comes with this PR, is that only one language server instance is started if different languages use the same language server.
2022-05-24 00:10:48 +08:00
}
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(untagged, rename_all = "kebab-case", deny_unknown_fields)]
enum LanguageServerFeatureConfiguration {
Adds support for multiple language servers per language. Language Servers are now configured in a separate table in `languages.toml`: ```toml [langauge-server.mylang-lsp] command = "mylang-lsp" args = ["--stdio"] config = { provideFormatter = true } [language-server.efm-lsp-prettier] command = "efm-langserver" [language-server.efm-lsp-prettier.config] documentFormatting = true languages = { typescript = [ { formatCommand ="prettier --stdin-filepath ${INPUT}", formatStdin = true } ] } ``` The language server for a language is configured like this (`typescript-language-server` is configured by default): ```toml [[language]] name = "typescript" language-servers = [ { name = "efm-lsp-prettier", only-features = [ "format" ] }, "typescript-language-server" ] ``` or equivalent: ```toml [[language]] name = "typescript" language-servers = [ { name = "typescript-language-server", except-features = [ "format" ] }, "efm-lsp-prettier" ] ``` Each requested LSP feature is priorized in the order of the `language-servers` array. For example the first `goto-definition` supported language server (in this case `typescript-language-server`) will be taken for the relevant LSP request (command `goto_definition`). If no `except-features` or `only-features` is given all features for the language server are enabled, as long as the language server supports these. If it doesn't the next language server which supports the feature is tried. The list of supported features are: - `format` - `goto-definition` - `goto-declaration` - `goto-type-definition` - `goto-reference` - `goto-implementation` - `signature-help` - `hover` - `document-highlight` - `completion` - `code-action` - `workspace-command` - `document-symbols` - `workspace-symbols` - `diagnostics` - `rename-symbol` - `inlay-hints` Another side-effect/difference that comes with this PR, is that only one language server instance is started if different languages use the same language server.
2022-05-24 00:10:48 +08:00
#[serde(rename_all = "kebab-case")]
Features {
#[serde(default, skip_serializing_if = "HashSet::is_empty")]
only_features: HashSet<LanguageServerFeature>,
#[serde(default, skip_serializing_if = "HashSet::is_empty")]
except_features: HashSet<LanguageServerFeature>,
Adds support for multiple language servers per language. Language Servers are now configured in a separate table in `languages.toml`: ```toml [langauge-server.mylang-lsp] command = "mylang-lsp" args = ["--stdio"] config = { provideFormatter = true } [language-server.efm-lsp-prettier] command = "efm-langserver" [language-server.efm-lsp-prettier.config] documentFormatting = true languages = { typescript = [ { formatCommand ="prettier --stdin-filepath ${INPUT}", formatStdin = true } ] } ``` The language server for a language is configured like this (`typescript-language-server` is configured by default): ```toml [[language]] name = "typescript" language-servers = [ { name = "efm-lsp-prettier", only-features = [ "format" ] }, "typescript-language-server" ] ``` or equivalent: ```toml [[language]] name = "typescript" language-servers = [ { name = "typescript-language-server", except-features = [ "format" ] }, "efm-lsp-prettier" ] ``` Each requested LSP feature is priorized in the order of the `language-servers` array. For example the first `goto-definition` supported language server (in this case `typescript-language-server`) will be taken for the relevant LSP request (command `goto_definition`). If no `except-features` or `only-features` is given all features for the language server are enabled, as long as the language server supports these. If it doesn't the next language server which supports the feature is tried. The list of supported features are: - `format` - `goto-definition` - `goto-declaration` - `goto-type-definition` - `goto-reference` - `goto-implementation` - `signature-help` - `hover` - `document-highlight` - `completion` - `code-action` - `workspace-command` - `document-symbols` - `workspace-symbols` - `diagnostics` - `rename-symbol` - `inlay-hints` Another side-effect/difference that comes with this PR, is that only one language server instance is started if different languages use the same language server.
2022-05-24 00:10:48 +08:00
name: String,
},
Simple(String),
}
#[derive(Debug, Default)]
pub struct LanguageServerFeatures {
pub name: String,
pub only: HashSet<LanguageServerFeature>,
pub excluded: HashSet<LanguageServerFeature>,
}
impl LanguageServerFeatures {
pub fn has_feature(&self, feature: LanguageServerFeature) -> bool {
(self.only.is_empty() || self.only.contains(&feature)) && !self.excluded.contains(&feature)
}
}
fn deserialize_lang_features<'de, D>(
deserializer: D,
) -> Result<Vec<LanguageServerFeatures>, D::Error>
where
D: serde::Deserializer<'de>,
{
let raw: Vec<LanguageServerFeatureConfiguration> = Deserialize::deserialize(deserializer)?;
let res = raw
.into_iter()
.map(|config| match config {
LanguageServerFeatureConfiguration::Simple(name) => LanguageServerFeatures {
name,
..Default::default()
},
LanguageServerFeatureConfiguration::Features {
only_features,
except_features,
name,
} => LanguageServerFeatures {
name,
only: only_features,
excluded: except_features,
},
})
.collect();
Ok(res)
}
fn serialize_lang_features<S>(
map: &Vec<LanguageServerFeatures>,
serializer: S,
) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
let mut serializer = serializer.serialize_seq(Some(map.len()))?;
for features in map {
let features = if features.only.is_empty() && features.excluded.is_empty() {
LanguageServerFeatureConfiguration::Simple(features.name.to_owned())
} else {
LanguageServerFeatureConfiguration::Features {
only_features: features.only.clone(),
except_features: features.excluded.clone(),
name: features.name.to_owned(),
}
};
serializer.serialize_element(&features)?;
}
serializer.end()
}
fn deserialize_required_root_patterns<'de, D>(deserializer: D) -> Result<Option<GlobSet>, D::Error>
where
D: serde::Deserializer<'de>,
{
let patterns = Vec::<String>::deserialize(deserializer)?;
if patterns.is_empty() {
return Ok(None);
}
let mut builder = globset::GlobSetBuilder::new();
for pattern in patterns {
let glob = globset::Glob::new(&pattern).map_err(serde::de::Error::custom)?;
builder.add(glob);
}
builder.build().map(Some).map_err(serde::de::Error::custom)
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case")]
pub struct LanguageServerConfiguration {
pub command: String,
#[serde(default)]
#[serde(skip_serializing_if = "Vec::is_empty")]
pub args: Vec<String>,
#[serde(default, skip_serializing_if = "HashMap::is_empty")]
pub environment: HashMap<String, String>,
Adds support for multiple language servers per language. Language Servers are now configured in a separate table in `languages.toml`: ```toml [langauge-server.mylang-lsp] command = "mylang-lsp" args = ["--stdio"] config = { provideFormatter = true } [language-server.efm-lsp-prettier] command = "efm-langserver" [language-server.efm-lsp-prettier.config] documentFormatting = true languages = { typescript = [ { formatCommand ="prettier --stdin-filepath ${INPUT}", formatStdin = true } ] } ``` The language server for a language is configured like this (`typescript-language-server` is configured by default): ```toml [[language]] name = "typescript" language-servers = [ { name = "efm-lsp-prettier", only-features = [ "format" ] }, "typescript-language-server" ] ``` or equivalent: ```toml [[language]] name = "typescript" language-servers = [ { name = "typescript-language-server", except-features = [ "format" ] }, "efm-lsp-prettier" ] ``` Each requested LSP feature is priorized in the order of the `language-servers` array. For example the first `goto-definition` supported language server (in this case `typescript-language-server`) will be taken for the relevant LSP request (command `goto_definition`). If no `except-features` or `only-features` is given all features for the language server are enabled, as long as the language server supports these. If it doesn't the next language server which supports the feature is tried. The list of supported features are: - `format` - `goto-definition` - `goto-declaration` - `goto-type-definition` - `goto-reference` - `goto-implementation` - `signature-help` - `hover` - `document-highlight` - `completion` - `code-action` - `workspace-command` - `document-symbols` - `workspace-symbols` - `diagnostics` - `rename-symbol` - `inlay-hints` Another side-effect/difference that comes with this PR, is that only one language server instance is started if different languages use the same language server.
2022-05-24 00:10:48 +08:00
#[serde(default, skip_serializing, deserialize_with = "deserialize_lsp_config")]
pub config: Option<serde_json::Value>,
#[serde(default = "default_timeout")]
pub timeout: u64,
#[serde(
default,
skip_serializing,
deserialize_with = "deserialize_required_root_patterns"
)]
pub required_root_patterns: Option<GlobSet>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case")]
pub struct FormatterConfiguration {
pub command: String,
#[serde(default)]
#[serde(skip_serializing_if = "Vec::is_empty")]
pub args: Vec<String>,
}
#[derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize)]
2021-08-29 19:51:47 +08:00
#[serde(rename_all = "kebab-case")]
pub struct AdvancedCompletion {
pub name: Option<String>,
pub completion: Option<String>,
pub default: Option<String>,
}
#[derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize)]
2021-08-29 19:51:47 +08:00
#[serde(rename_all = "kebab-case", untagged)]
pub enum DebugConfigCompletion {
Named(String),
Advanced(AdvancedCompletion),
}
#[derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize)]
2021-10-24 22:24:18 +08:00
#[serde(untagged)]
pub enum DebugArgumentValue {
String(String),
Array(Vec<String>),
Boolean(bool),
2021-10-24 22:24:18 +08:00
}
#[derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize)]
2021-08-29 19:51:47 +08:00
#[serde(rename_all = "kebab-case")]
pub struct DebugTemplate {
pub name: String,
pub request: String,
#[serde(default)]
2021-08-29 19:51:47 +08:00
pub completion: Vec<DebugConfigCompletion>,
2021-10-24 22:24:18 +08:00
pub args: HashMap<String, DebugArgumentValue>,
2021-08-29 19:51:47 +08:00
}
#[derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize)]
2021-08-29 19:51:47 +08:00
#[serde(rename_all = "kebab-case")]
pub struct DebugAdapterConfig {
pub name: String,
pub transport: String,
2021-09-27 02:36:06 +08:00
#[serde(default)]
2021-08-29 19:51:47 +08:00
pub command: String,
2021-09-06 18:49:31 +08:00
#[serde(default)]
2021-08-29 19:51:47 +08:00
pub args: Vec<String>,
pub port_arg: Option<String>,
pub templates: Vec<DebugTemplate>,
2021-09-27 02:36:06 +08:00
#[serde(default)]
pub quirks: DebuggerQuirks,
2021-08-29 19:51:47 +08:00
}
2021-10-17 12:58:11 +08:00
// Different workarounds for adapters' differences
#[derive(Debug, Default, PartialEq, Eq, Clone, Serialize, Deserialize)]
2021-10-17 12:58:11 +08:00
pub struct DebuggerQuirks {
#[serde(default)]
pub absolute_paths: bool,
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case")]
2021-03-22 12:47:39 +08:00
pub struct IndentationConfiguration {
#[serde(deserialize_with = "deserialize_tab_width")]
2021-03-22 12:47:39 +08:00
pub tab_width: usize,
pub unit: String,
2021-03-22 12:47:39 +08:00
}
2023-09-19 21:31:38 +08:00
/// How the indentation for a newly inserted line should be determined.
/// If the selected heuristic is not available (e.g. because the current
/// language has no tree-sitter indent queries), a simpler one will be used.
#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case")]
pub enum IndentationHeuristic {
/// Just copy the indentation of the line that the cursor is currently on.
Simple,
/// Use tree-sitter indent queries to compute the expected absolute indentation level of the new line.
TreeSitter,
/// Use tree-sitter indent queries to compute the expected difference in indentation between the new line
/// and the line before. Add this to the actual indentation level of the line before.
#[default]
Hybrid,
}
/// Configuration for auto pairs
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case", deny_unknown_fields, untagged)]
pub enum AutoPairConfig {
/// Enables or disables auto pairing. False means disabled. True means to use the default pairs.
Enable(bool),
/// The mappings of pairs.
Pairs(HashMap<char, char>),
}
impl Default for AutoPairConfig {
fn default() -> Self {
AutoPairConfig::Enable(true)
}
}
impl From<&AutoPairConfig> for Option<AutoPairs> {
fn from(auto_pair_config: &AutoPairConfig) -> Self {
match auto_pair_config {
AutoPairConfig::Enable(false) => None,
AutoPairConfig::Enable(true) => Some(AutoPairs::default()),
AutoPairConfig::Pairs(pairs) => Some(AutoPairs::new(pairs.iter())),
}
}
}
impl From<AutoPairConfig> for Option<AutoPairs> {
fn from(auto_pairs_config: AutoPairConfig) -> Self {
(&auto_pairs_config).into()
}
}
impl FromStr for AutoPairConfig {
type Err = std::str::ParseBoolError;
// only do bool parsing for runtime setting
fn from_str(s: &str) -> Result<Self, Self::Err> {
let enable: bool = s.parse()?;
Ok(AutoPairConfig::Enable(enable))
}
}
Softwrapping improvements (#5893) * use max_line_width + 1 during softwrap to account for newline char Helix softwrap implementation always wraps lines so that the newline character doesn't get cut off so he line wraps one chars earlier then in other editors. This is necessary, because newline chars are always selecatble in helix and must never be hidden. However That means that `max_line_width` currently wraps one char earlier than expected. The typical definition of line width does not include the newline character and other helix commands like `:reflow` also don't count the newline character here. This commit makes softwrap use `max_line_width + 1` instead of `max_line_width` to correct the impedance missmatch. * fix typos Co-authored-by: Jonathan Lebon <jonathan@jlebon.com> * Add text-width to config.toml * text-width: update setting documentation * rename leftover config item * remove leftover max-line-length occurrences * Make `text-width` optional in editor config When it was only used for `:reflow` it made sense to have a default value set to `80`, but now that soft-wrapping uses this setting, keeping a default set to `80` would make soft-wrapping behave more aggressively. * Allow softwrapping to ignore `text-width` Softwrapping wraps by default to the viewport width or a configured `text-width` (whichever's smaller). In some cases we only want to set `text-width` to use for hard-wrapping and let longer lines flow if they have enough space. This setting allows that. * Revert "Make `text-width` optional in editor config" This reverts commit b247d526d69adf41434b6fd9c4983369c785aa22. * soft-wrap: allow per-language overrides * Update book/src/configuration.md Co-authored-by: Pascal Kuthe <pascal.kuthe@semimod.de> * Update book/src/languages.md Co-authored-by: Pascal Kuthe <pascal.kuthe@semimod.de> * Update book/src/configuration.md Co-authored-by: Pascal Kuthe <pascal.kuthe@semimod.de> --------- Co-authored-by: Pascal Kuthe <pascal.kuthe@semimod.de> Co-authored-by: Jonathan Lebon <jonathan@jlebon.com> Co-authored-by: Alex Boehm <alexb@ozrunways.com> Co-authored-by: Blaž Hrastnik <blaz@mxxn.io>
2023-03-08 10:02:11 +08:00
#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
#[serde(default, rename_all = "kebab-case", deny_unknown_fields)]
pub struct SoftWrap {
/// Soft wrap lines that exceed viewport width. Default to off
// NOTE: Option on purpose because the struct is shared between language config and global config.
// By default the option is None so that the language config falls back to the global config unless explicitly set.
pub enable: Option<bool>,
Softwrapping improvements (#5893) * use max_line_width + 1 during softwrap to account for newline char Helix softwrap implementation always wraps lines so that the newline character doesn't get cut off so he line wraps one chars earlier then in other editors. This is necessary, because newline chars are always selecatble in helix and must never be hidden. However That means that `max_line_width` currently wraps one char earlier than expected. The typical definition of line width does not include the newline character and other helix commands like `:reflow` also don't count the newline character here. This commit makes softwrap use `max_line_width + 1` instead of `max_line_width` to correct the impedance missmatch. * fix typos Co-authored-by: Jonathan Lebon <jonathan@jlebon.com> * Add text-width to config.toml * text-width: update setting documentation * rename leftover config item * remove leftover max-line-length occurrences * Make `text-width` optional in editor config When it was only used for `:reflow` it made sense to have a default value set to `80`, but now that soft-wrapping uses this setting, keeping a default set to `80` would make soft-wrapping behave more aggressively. * Allow softwrapping to ignore `text-width` Softwrapping wraps by default to the viewport width or a configured `text-width` (whichever's smaller). In some cases we only want to set `text-width` to use for hard-wrapping and let longer lines flow if they have enough space. This setting allows that. * Revert "Make `text-width` optional in editor config" This reverts commit b247d526d69adf41434b6fd9c4983369c785aa22. * soft-wrap: allow per-language overrides * Update book/src/configuration.md Co-authored-by: Pascal Kuthe <pascal.kuthe@semimod.de> * Update book/src/languages.md Co-authored-by: Pascal Kuthe <pascal.kuthe@semimod.de> * Update book/src/configuration.md Co-authored-by: Pascal Kuthe <pascal.kuthe@semimod.de> --------- Co-authored-by: Pascal Kuthe <pascal.kuthe@semimod.de> Co-authored-by: Jonathan Lebon <jonathan@jlebon.com> Co-authored-by: Alex Boehm <alexb@ozrunways.com> Co-authored-by: Blaž Hrastnik <blaz@mxxn.io>
2023-03-08 10:02:11 +08:00
/// Maximum space left free at the end of the line.
/// This space is used to wrap text at word boundaries. If that is not possible within this limit
/// the word is simply split at the end of the line.
///
/// This is automatically hard-limited to a quarter of the viewport to ensure correct display on small views.
///
/// Default to 20
pub max_wrap: Option<u16>,
/// Maximum number of indentation that can be carried over from the previous line when softwrapping.
/// If a line is indented further then this limit it is rendered at the start of the viewport instead.
///
/// This is automatically hard-limited to a quarter of the viewport to ensure correct display on small views.
///
/// Default to 40
pub max_indent_retain: Option<u16>,
/// Indicator placed at the beginning of softwrapped lines
///
/// Defaults to ↪
pub wrap_indicator: Option<String>,
/// Softwrap at `text_width` instead of viewport width if it is shorter
pub wrap_at_text_width: Option<bool>,
}
Add glob file type support (#8006) * Replace FileType::Suffix with FileType::Glob Suffix is rather limited and cannot be used to match files which have semantic meaning based on location + file type (for example, Github Action workflow files). This patch adds support for a Glob FileType to replace Suffix, which encompasses the existing behavior & adds additional file matching functionality. Globs are standard Unix-style path globs, which are matched against the absolute path of the file. If the configured glob for a language is a relative glob (that is, it isn't an absolute path or already starts with a glob pattern), a glob pattern will be prepended to allow matching relative paths from any directory. The order of file type matching is also updated to first match on globs and then on extension. This is necessary as most cases where glob-matching is useful will have already been matched by an extension if glob matching is done last. * Convert file-types suffixes to globs * Use globs for filename matching Trying to match the file-type raw strings against both filename and extension leads to files with the same name as the extension having the incorrect syntax. * Match dockerfiles with suffixes It's common practice to add a suffix to dockerfiles based on their context, e.g. `Dockerfile.dev`, `Dockerfile.prod`, etc. * Make env filetype matching more generic Match on `.env` or any `.env.*` files. * Update docs * Use GlobSet to match all file type globs at once * Update todo.txt glob patterns * Consolidate language Configuration and Loader creation This is a refactor that improves the error handling for creating the `helix_core::syntax::Loader` from the default and user language configuration. * Fix integration tests * Add additional starlark file-type glob --------- Co-authored-by: Michael Davis <mcarsondavis@gmail.com>
2024-02-12 01:24:20 +08:00
#[derive(Debug)]
struct FileTypeGlob {
glob: globset::Glob,
language_id: usize,
}
impl FileTypeGlob {
fn new(glob: globset::Glob, language_id: usize) -> Self {
Self { glob, language_id }
}
}
#[derive(Debug)]
struct FileTypeGlobMatcher {
matcher: globset::GlobSet,
file_types: Vec<FileTypeGlob>,
}
impl FileTypeGlobMatcher {
fn new(file_types: Vec<FileTypeGlob>) -> Result<Self, globset::Error> {
let mut builder = globset::GlobSetBuilder::new();
for file_type in &file_types {
builder.add(file_type.glob.clone());
}
Ok(Self {
matcher: builder.build()?,
file_types,
})
}
fn language_id_for_path(&self, path: &Path) -> Option<&usize> {
self.matcher
.matches(path)
.iter()
.filter_map(|idx| self.file_types.get(*idx))
.max_by_key(|file_type| file_type.glob.glob().len())
.map(|file_type| &file_type.language_id)
}
}
// Expose loader as Lazy<> global since it's always static?
#[derive(Debug)]
pub struct Loader {
// highlight_names ?
language_configs: Vec<Arc<LanguageConfiguration>>,
language_config_ids_by_extension: HashMap<String, usize>, // Vec<usize>
Add glob file type support (#8006) * Replace FileType::Suffix with FileType::Glob Suffix is rather limited and cannot be used to match files which have semantic meaning based on location + file type (for example, Github Action workflow files). This patch adds support for a Glob FileType to replace Suffix, which encompasses the existing behavior & adds additional file matching functionality. Globs are standard Unix-style path globs, which are matched against the absolute path of the file. If the configured glob for a language is a relative glob (that is, it isn't an absolute path or already starts with a glob pattern), a glob pattern will be prepended to allow matching relative paths from any directory. The order of file type matching is also updated to first match on globs and then on extension. This is necessary as most cases where glob-matching is useful will have already been matched by an extension if glob matching is done last. * Convert file-types suffixes to globs * Use globs for filename matching Trying to match the file-type raw strings against both filename and extension leads to files with the same name as the extension having the incorrect syntax. * Match dockerfiles with suffixes It's common practice to add a suffix to dockerfiles based on their context, e.g. `Dockerfile.dev`, `Dockerfile.prod`, etc. * Make env filetype matching more generic Match on `.env` or any `.env.*` files. * Update docs * Use GlobSet to match all file type globs at once * Update todo.txt glob patterns * Consolidate language Configuration and Loader creation This is a refactor that improves the error handling for creating the `helix_core::syntax::Loader` from the default and user language configuration. * Fix integration tests * Add additional starlark file-type glob --------- Co-authored-by: Michael Davis <mcarsondavis@gmail.com>
2024-02-12 01:24:20 +08:00
language_config_ids_glob_matcher: FileTypeGlobMatcher,
language_config_ids_by_shebang: HashMap<String, usize>,
Adds support for multiple language servers per language. Language Servers are now configured in a separate table in `languages.toml`: ```toml [langauge-server.mylang-lsp] command = "mylang-lsp" args = ["--stdio"] config = { provideFormatter = true } [language-server.efm-lsp-prettier] command = "efm-langserver" [language-server.efm-lsp-prettier.config] documentFormatting = true languages = { typescript = [ { formatCommand ="prettier --stdin-filepath ${INPUT}", formatStdin = true } ] } ``` The language server for a language is configured like this (`typescript-language-server` is configured by default): ```toml [[language]] name = "typescript" language-servers = [ { name = "efm-lsp-prettier", only-features = [ "format" ] }, "typescript-language-server" ] ``` or equivalent: ```toml [[language]] name = "typescript" language-servers = [ { name = "typescript-language-server", except-features = [ "format" ] }, "efm-lsp-prettier" ] ``` Each requested LSP feature is priorized in the order of the `language-servers` array. For example the first `goto-definition` supported language server (in this case `typescript-language-server`) will be taken for the relevant LSP request (command `goto_definition`). If no `except-features` or `only-features` is given all features for the language server are enabled, as long as the language server supports these. If it doesn't the next language server which supports the feature is tried. The list of supported features are: - `format` - `goto-definition` - `goto-declaration` - `goto-type-definition` - `goto-reference` - `goto-implementation` - `signature-help` - `hover` - `document-highlight` - `completion` - `code-action` - `workspace-command` - `document-symbols` - `workspace-symbols` - `diagnostics` - `rename-symbol` - `inlay-hints` Another side-effect/difference that comes with this PR, is that only one language server instance is started if different languages use the same language server.
2022-05-24 00:10:48 +08:00
language_server_configs: HashMap<String, LanguageServerConfiguration>,
scopes: ArcSwap<Vec<String>>,
}
Add glob file type support (#8006) * Replace FileType::Suffix with FileType::Glob Suffix is rather limited and cannot be used to match files which have semantic meaning based on location + file type (for example, Github Action workflow files). This patch adds support for a Glob FileType to replace Suffix, which encompasses the existing behavior & adds additional file matching functionality. Globs are standard Unix-style path globs, which are matched against the absolute path of the file. If the configured glob for a language is a relative glob (that is, it isn't an absolute path or already starts with a glob pattern), a glob pattern will be prepended to allow matching relative paths from any directory. The order of file type matching is also updated to first match on globs and then on extension. This is necessary as most cases where glob-matching is useful will have already been matched by an extension if glob matching is done last. * Convert file-types suffixes to globs * Use globs for filename matching Trying to match the file-type raw strings against both filename and extension leads to files with the same name as the extension having the incorrect syntax. * Match dockerfiles with suffixes It's common practice to add a suffix to dockerfiles based on their context, e.g. `Dockerfile.dev`, `Dockerfile.prod`, etc. * Make env filetype matching more generic Match on `.env` or any `.env.*` files. * Update docs * Use GlobSet to match all file type globs at once * Update todo.txt glob patterns * Consolidate language Configuration and Loader creation This is a refactor that improves the error handling for creating the `helix_core::syntax::Loader` from the default and user language configuration. * Fix integration tests * Add additional starlark file-type glob --------- Co-authored-by: Michael Davis <mcarsondavis@gmail.com>
2024-02-12 01:24:20 +08:00
pub type LoaderError = globset::Error;
impl Loader {
Add glob file type support (#8006) * Replace FileType::Suffix with FileType::Glob Suffix is rather limited and cannot be used to match files which have semantic meaning based on location + file type (for example, Github Action workflow files). This patch adds support for a Glob FileType to replace Suffix, which encompasses the existing behavior & adds additional file matching functionality. Globs are standard Unix-style path globs, which are matched against the absolute path of the file. If the configured glob for a language is a relative glob (that is, it isn't an absolute path or already starts with a glob pattern), a glob pattern will be prepended to allow matching relative paths from any directory. The order of file type matching is also updated to first match on globs and then on extension. This is necessary as most cases where glob-matching is useful will have already been matched by an extension if glob matching is done last. * Convert file-types suffixes to globs * Use globs for filename matching Trying to match the file-type raw strings against both filename and extension leads to files with the same name as the extension having the incorrect syntax. * Match dockerfiles with suffixes It's common practice to add a suffix to dockerfiles based on their context, e.g. `Dockerfile.dev`, `Dockerfile.prod`, etc. * Make env filetype matching more generic Match on `.env` or any `.env.*` files. * Update docs * Use GlobSet to match all file type globs at once * Update todo.txt glob patterns * Consolidate language Configuration and Loader creation This is a refactor that improves the error handling for creating the `helix_core::syntax::Loader` from the default and user language configuration. * Fix integration tests * Add additional starlark file-type glob --------- Co-authored-by: Michael Davis <mcarsondavis@gmail.com>
2024-02-12 01:24:20 +08:00
pub fn new(config: Configuration) -> Result<Self, LoaderError> {
let mut language_configs = Vec::new();
let mut language_config_ids_by_extension = HashMap::new();
let mut language_config_ids_by_shebang = HashMap::new();
let mut file_type_globs = Vec::new();
for config in config.language {
// get the next id
Add glob file type support (#8006) * Replace FileType::Suffix with FileType::Glob Suffix is rather limited and cannot be used to match files which have semantic meaning based on location + file type (for example, Github Action workflow files). This patch adds support for a Glob FileType to replace Suffix, which encompasses the existing behavior & adds additional file matching functionality. Globs are standard Unix-style path globs, which are matched against the absolute path of the file. If the configured glob for a language is a relative glob (that is, it isn't an absolute path or already starts with a glob pattern), a glob pattern will be prepended to allow matching relative paths from any directory. The order of file type matching is also updated to first match on globs and then on extension. This is necessary as most cases where glob-matching is useful will have already been matched by an extension if glob matching is done last. * Convert file-types suffixes to globs * Use globs for filename matching Trying to match the file-type raw strings against both filename and extension leads to files with the same name as the extension having the incorrect syntax. * Match dockerfiles with suffixes It's common practice to add a suffix to dockerfiles based on their context, e.g. `Dockerfile.dev`, `Dockerfile.prod`, etc. * Make env filetype matching more generic Match on `.env` or any `.env.*` files. * Update docs * Use GlobSet to match all file type globs at once * Update todo.txt glob patterns * Consolidate language Configuration and Loader creation This is a refactor that improves the error handling for creating the `helix_core::syntax::Loader` from the default and user language configuration. * Fix integration tests * Add additional starlark file-type glob --------- Co-authored-by: Michael Davis <mcarsondavis@gmail.com>
2024-02-12 01:24:20 +08:00
let language_id = language_configs.len();
for file_type in &config.file_types {
// entry().or_insert(Vec::new).push(language_id);
match file_type {
Add glob file type support (#8006) * Replace FileType::Suffix with FileType::Glob Suffix is rather limited and cannot be used to match files which have semantic meaning based on location + file type (for example, Github Action workflow files). This patch adds support for a Glob FileType to replace Suffix, which encompasses the existing behavior & adds additional file matching functionality. Globs are standard Unix-style path globs, which are matched against the absolute path of the file. If the configured glob for a language is a relative glob (that is, it isn't an absolute path or already starts with a glob pattern), a glob pattern will be prepended to allow matching relative paths from any directory. The order of file type matching is also updated to first match on globs and then on extension. This is necessary as most cases where glob-matching is useful will have already been matched by an extension if glob matching is done last. * Convert file-types suffixes to globs * Use globs for filename matching Trying to match the file-type raw strings against both filename and extension leads to files with the same name as the extension having the incorrect syntax. * Match dockerfiles with suffixes It's common practice to add a suffix to dockerfiles based on their context, e.g. `Dockerfile.dev`, `Dockerfile.prod`, etc. * Make env filetype matching more generic Match on `.env` or any `.env.*` files. * Update docs * Use GlobSet to match all file type globs at once * Update todo.txt glob patterns * Consolidate language Configuration and Loader creation This is a refactor that improves the error handling for creating the `helix_core::syntax::Loader` from the default and user language configuration. * Fix integration tests * Add additional starlark file-type glob --------- Co-authored-by: Michael Davis <mcarsondavis@gmail.com>
2024-02-12 01:24:20 +08:00
FileType::Extension(extension) => {
language_config_ids_by_extension.insert(extension.clone(), language_id);
}
FileType::Glob(glob) => {
file_type_globs.push(FileTypeGlob::new(glob.to_owned(), language_id));
}
};
}
for shebang in &config.shebangs {
Add glob file type support (#8006) * Replace FileType::Suffix with FileType::Glob Suffix is rather limited and cannot be used to match files which have semantic meaning based on location + file type (for example, Github Action workflow files). This patch adds support for a Glob FileType to replace Suffix, which encompasses the existing behavior & adds additional file matching functionality. Globs are standard Unix-style path globs, which are matched against the absolute path of the file. If the configured glob for a language is a relative glob (that is, it isn't an absolute path or already starts with a glob pattern), a glob pattern will be prepended to allow matching relative paths from any directory. The order of file type matching is also updated to first match on globs and then on extension. This is necessary as most cases where glob-matching is useful will have already been matched by an extension if glob matching is done last. * Convert file-types suffixes to globs * Use globs for filename matching Trying to match the file-type raw strings against both filename and extension leads to files with the same name as the extension having the incorrect syntax. * Match dockerfiles with suffixes It's common practice to add a suffix to dockerfiles based on their context, e.g. `Dockerfile.dev`, `Dockerfile.prod`, etc. * Make env filetype matching more generic Match on `.env` or any `.env.*` files. * Update docs * Use GlobSet to match all file type globs at once * Update todo.txt glob patterns * Consolidate language Configuration and Loader creation This is a refactor that improves the error handling for creating the `helix_core::syntax::Loader` from the default and user language configuration. * Fix integration tests * Add additional starlark file-type glob --------- Co-authored-by: Michael Davis <mcarsondavis@gmail.com>
2024-02-12 01:24:20 +08:00
language_config_ids_by_shebang.insert(shebang.clone(), language_id);
}
Add glob file type support (#8006) * Replace FileType::Suffix with FileType::Glob Suffix is rather limited and cannot be used to match files which have semantic meaning based on location + file type (for example, Github Action workflow files). This patch adds support for a Glob FileType to replace Suffix, which encompasses the existing behavior & adds additional file matching functionality. Globs are standard Unix-style path globs, which are matched against the absolute path of the file. If the configured glob for a language is a relative glob (that is, it isn't an absolute path or already starts with a glob pattern), a glob pattern will be prepended to allow matching relative paths from any directory. The order of file type matching is also updated to first match on globs and then on extension. This is necessary as most cases where glob-matching is useful will have already been matched by an extension if glob matching is done last. * Convert file-types suffixes to globs * Use globs for filename matching Trying to match the file-type raw strings against both filename and extension leads to files with the same name as the extension having the incorrect syntax. * Match dockerfiles with suffixes It's common practice to add a suffix to dockerfiles based on their context, e.g. `Dockerfile.dev`, `Dockerfile.prod`, etc. * Make env filetype matching more generic Match on `.env` or any `.env.*` files. * Update docs * Use GlobSet to match all file type globs at once * Update todo.txt glob patterns * Consolidate language Configuration and Loader creation This is a refactor that improves the error handling for creating the `helix_core::syntax::Loader` from the default and user language configuration. * Fix integration tests * Add additional starlark file-type glob --------- Co-authored-by: Michael Davis <mcarsondavis@gmail.com>
2024-02-12 01:24:20 +08:00
language_configs.push(Arc::new(config));
}
Add glob file type support (#8006) * Replace FileType::Suffix with FileType::Glob Suffix is rather limited and cannot be used to match files which have semantic meaning based on location + file type (for example, Github Action workflow files). This patch adds support for a Glob FileType to replace Suffix, which encompasses the existing behavior & adds additional file matching functionality. Globs are standard Unix-style path globs, which are matched against the absolute path of the file. If the configured glob for a language is a relative glob (that is, it isn't an absolute path or already starts with a glob pattern), a glob pattern will be prepended to allow matching relative paths from any directory. The order of file type matching is also updated to first match on globs and then on extension. This is necessary as most cases where glob-matching is useful will have already been matched by an extension if glob matching is done last. * Convert file-types suffixes to globs * Use globs for filename matching Trying to match the file-type raw strings against both filename and extension leads to files with the same name as the extension having the incorrect syntax. * Match dockerfiles with suffixes It's common practice to add a suffix to dockerfiles based on their context, e.g. `Dockerfile.dev`, `Dockerfile.prod`, etc. * Make env filetype matching more generic Match on `.env` or any `.env.*` files. * Update docs * Use GlobSet to match all file type globs at once * Update todo.txt glob patterns * Consolidate language Configuration and Loader creation This is a refactor that improves the error handling for creating the `helix_core::syntax::Loader` from the default and user language configuration. * Fix integration tests * Add additional starlark file-type glob --------- Co-authored-by: Michael Davis <mcarsondavis@gmail.com>
2024-02-12 01:24:20 +08:00
Ok(Self {
language_configs,
language_config_ids_by_extension,
language_config_ids_glob_matcher: FileTypeGlobMatcher::new(file_type_globs)?,
language_config_ids_by_shebang,
language_server_configs: config.language_server,
scopes: ArcSwap::from_pointee(Vec::new()),
})
}
pub fn language_config_for_file_name(&self, path: &Path) -> Option<Arc<LanguageConfiguration>> {
// Find all the language configurations that match this file name
// or a suffix of the file name.
Add glob file type support (#8006) * Replace FileType::Suffix with FileType::Glob Suffix is rather limited and cannot be used to match files which have semantic meaning based on location + file type (for example, Github Action workflow files). This patch adds support for a Glob FileType to replace Suffix, which encompasses the existing behavior & adds additional file matching functionality. Globs are standard Unix-style path globs, which are matched against the absolute path of the file. If the configured glob for a language is a relative glob (that is, it isn't an absolute path or already starts with a glob pattern), a glob pattern will be prepended to allow matching relative paths from any directory. The order of file type matching is also updated to first match on globs and then on extension. This is necessary as most cases where glob-matching is useful will have already been matched by an extension if glob matching is done last. * Convert file-types suffixes to globs * Use globs for filename matching Trying to match the file-type raw strings against both filename and extension leads to files with the same name as the extension having the incorrect syntax. * Match dockerfiles with suffixes It's common practice to add a suffix to dockerfiles based on their context, e.g. `Dockerfile.dev`, `Dockerfile.prod`, etc. * Make env filetype matching more generic Match on `.env` or any `.env.*` files. * Update docs * Use GlobSet to match all file type globs at once * Update todo.txt glob patterns * Consolidate language Configuration and Loader creation This is a refactor that improves the error handling for creating the `helix_core::syntax::Loader` from the default and user language configuration. * Fix integration tests * Add additional starlark file-type glob --------- Co-authored-by: Michael Davis <mcarsondavis@gmail.com>
2024-02-12 01:24:20 +08:00
let configuration_id = self
.language_config_ids_glob_matcher
.language_id_for_path(path)
.or_else(|| {
path.extension()
.and_then(|extension| extension.to_str())
.and_then(|extension| self.language_config_ids_by_extension.get(extension))
});
configuration_id.and_then(|&id| self.language_configs.get(id).cloned())
// TODO: content_regex handling conflict resolution
}
pub fn language_config_for_shebang(
&self,
source: RopeSlice,
) -> Option<Arc<LanguageConfiguration>> {
let line = Cow::from(source.line(0));
// TODO: resue detection from helix-syntax
const SHEBANG: &str = r"#!\s*(?:\S*[/\\](?:env\s+(?:\-\S+\s+)*)?)?([^\s\.\d]+)";
static SHEBANG_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(&["^", SHEBANG].concat()).unwrap());
let configuration_id = SHEBANG_REGEX
.captures(&line)
.and_then(|cap| self.language_config_ids_by_shebang.get(&cap[1]));
configuration_id.and_then(|&id| self.language_configs.get(id).cloned())
}
pub fn language_config_for_scope(&self, scope: &str) -> Option<Arc<LanguageConfiguration>> {
self.language_configs
.iter()
.find(|config| config.scope == scope)
.cloned()
}
2021-06-19 19:26:52 +08:00
pub fn language_config_for_language_id(&self, id: &str) -> Option<Arc<LanguageConfiguration>> {
self.language_configs
.iter()
.find(|config| config.language_id == id)
.cloned()
}
/// Unlike language_config_for_language_id, which only returns Some for an exact id, this
/// function will perform a regex match on the given string to find the closest language match.
pub fn language_config_for_name(&self, name: &str) -> Option<Arc<LanguageConfiguration>> {
let mut best_match_length = 0;
let mut best_match_position = None;
for (i, configuration) in self.language_configs.iter().enumerate() {
if let Some(injection_regex) = &configuration.injection_regex {
if let Some(mat) = injection_regex.find(name) {
let length = mat.end() - mat.start();
if length > best_match_length {
best_match_position = Some(i);
best_match_length = length;
}
}
}
}
best_match_position.map(|i| self.language_configs[i].clone())
}
pub fn language_configuration_for_injection_string(
&self,
capture: &InjectionLanguageMarker,
) -> Option<Arc<LanguageConfiguration>> {
match capture {
InjectionLanguageMarker::Name(string) => self.language_config_for_name(string),
InjectionLanguageMarker::Filename(file) => self.language_config_for_file_name(file),
InjectionLanguageMarker::Shebang(shebang) => {
self.language_config_for_language_id(shebang)
}
}
}
pub fn language_configs(&self) -> impl Iterator<Item = &Arc<LanguageConfiguration>> {
self.language_configs.iter()
}
Adds support for multiple language servers per language. Language Servers are now configured in a separate table in `languages.toml`: ```toml [langauge-server.mylang-lsp] command = "mylang-lsp" args = ["--stdio"] config = { provideFormatter = true } [language-server.efm-lsp-prettier] command = "efm-langserver" [language-server.efm-lsp-prettier.config] documentFormatting = true languages = { typescript = [ { formatCommand ="prettier --stdin-filepath ${INPUT}", formatStdin = true } ] } ``` The language server for a language is configured like this (`typescript-language-server` is configured by default): ```toml [[language]] name = "typescript" language-servers = [ { name = "efm-lsp-prettier", only-features = [ "format" ] }, "typescript-language-server" ] ``` or equivalent: ```toml [[language]] name = "typescript" language-servers = [ { name = "typescript-language-server", except-features = [ "format" ] }, "efm-lsp-prettier" ] ``` Each requested LSP feature is priorized in the order of the `language-servers` array. For example the first `goto-definition` supported language server (in this case `typescript-language-server`) will be taken for the relevant LSP request (command `goto_definition`). If no `except-features` or `only-features` is given all features for the language server are enabled, as long as the language server supports these. If it doesn't the next language server which supports the feature is tried. The list of supported features are: - `format` - `goto-definition` - `goto-declaration` - `goto-type-definition` - `goto-reference` - `goto-implementation` - `signature-help` - `hover` - `document-highlight` - `completion` - `code-action` - `workspace-command` - `document-symbols` - `workspace-symbols` - `diagnostics` - `rename-symbol` - `inlay-hints` Another side-effect/difference that comes with this PR, is that only one language server instance is started if different languages use the same language server.
2022-05-24 00:10:48 +08:00
pub fn language_server_configs(&self) -> &HashMap<String, LanguageServerConfiguration> {
&self.language_server_configs
}
pub fn set_scopes(&self, scopes: Vec<String>) {
self.scopes.store(Arc::new(scopes));
// Reconfigure existing grammars
for config in self
.language_configs
.iter()
.filter(|cfg| cfg.is_highlight_initialized())
{
config.reconfigure(&self.scopes());
}
}
pub fn scopes(&self) -> Guard<Arc<Vec<String>>> {
self.scopes.load()
2021-06-19 19:26:52 +08:00
}
}
pub fn generate_edits(old_text: RopeSlice, changeset: &ChangeSet) -> Vec<tree_sitter::InputEdit> {
use Operation::*;
let mut old_pos = 0;
let mut edits = Vec::new();
2020-09-17 13:57:49 +08:00
if changeset.changes.is_empty() {
return edits;
}
2020-09-17 13:57:49 +08:00
let mut iter = changeset.changes.iter().peekable();
2021-11-06 23:21:03 +08:00
// TODO; this is a lot easier with Change instead of Operation.
2020-09-17 13:57:49 +08:00
fn point_at_pos(text: RopeSlice, pos: usize) -> (usize, Point) {
let byte = text.char_to_byte(pos); // <- attempted to index past end
let line = text.char_to_line(pos);
let line_start_byte = text.line_to_byte(line);
let col = byte - line_start_byte;
2021-11-06 23:21:03 +08:00
(byte, Point::new(line, col))
2020-09-17 13:57:49 +08:00
}
fn traverse(point: Point, text: &Tendril) -> Point {
let Point {
mut row,
mut column,
} = point;
2021-11-06 23:21:03 +08:00
// TODO: there should be a better way here.
let mut chars = text.chars().peekable();
while let Some(ch) = chars.next() {
if char_is_line_ending(ch) && !(ch == '\r' && chars.peek() == Some(&'\n')) {
row += 1;
column = 0;
} else {
column += 1;
2020-09-17 13:57:49 +08:00
}
}
2021-11-06 23:21:03 +08:00
Point { row, column }
}
2020-09-17 13:57:49 +08:00
2021-11-06 23:21:03 +08:00
while let Some(change) = iter.next() {
let len = match change {
Delete(i) | Retain(i) => *i,
Insert(_) => 0,
};
let mut old_end = old_pos + len;
match change {
Retain(_) => {}
Delete(_) => {
let (start_byte, start_position) = point_at_pos(old_text, old_pos);
let (old_end_byte, old_end_position) = point_at_pos(old_text, old_end);
// deletion
edits.push(tree_sitter::InputEdit {
start_byte, // old_pos to byte
old_end_byte, // old_end to byte
new_end_byte: start_byte, // old_pos to byte
start_position, // old pos to coords
old_end_position, // old_end to coords
new_end_position: start_position, // old pos to coords
});
}
Insert(s) => {
let (start_byte, start_position) = point_at_pos(old_text, old_pos);
2020-09-17 13:57:49 +08:00
2021-11-06 23:21:03 +08:00
// a subsequent delete means a replace, consume it
if let Some(Delete(len)) = iter.peek() {
old_end = old_pos + len;
let (old_end_byte, old_end_position) = point_at_pos(old_text, old_end);
2020-09-17 13:57:49 +08:00
2021-11-06 23:21:03 +08:00
iter.next();
2020-09-17 13:57:49 +08:00
2021-11-06 23:21:03 +08:00
// replacement
edits.push(tree_sitter::InputEdit {
2021-11-06 23:21:03 +08:00
start_byte, // old_pos to byte
old_end_byte, // old_end to byte
new_end_byte: start_byte + s.len(), // old_pos to byte + s.len()
start_position, // old pos to coords
old_end_position, // old_end to coords
new_end_position: traverse(start_position, s), // old pos + chars, newlines matter too (iter over)
});
} else {
// insert
edits.push(tree_sitter::InputEdit {
start_byte, // old_pos to byte
old_end_byte: start_byte, // same
new_end_byte: start_byte + s.len(), // old_pos + s.len()
start_position, // old pos to coords
old_end_position: start_position, // same
new_end_position: traverse(start_position, s), // old pos + chars, newlines matter too (iter over)
});
2020-09-17 13:57:49 +08:00
}
}
}
2021-11-06 23:21:03 +08:00
old_pos = old_end;
2020-09-17 13:57:49 +08:00
}
2021-11-06 23:21:03 +08:00
edits
2020-09-17 13:57:49 +08:00
}
#[cfg(test)]
mod test {
use tree_sitter::QueryCursor;
use super::*;
use crate::{Rope, Transaction};
#[test]
fn test_textobject_queries() {
let query_str = r#"
(line_comment)+ @quantified_nodes
((line_comment)+) @quantified_nodes_grouped
((line_comment) (line_comment)) @multiple_nodes_grouped
"#;
let source = Rope::from_str(
r#"
/// a comment on
2022-04-28 03:21:20 +08:00
/// multiple lines
"#,
);
let language = get_language("rust").unwrap();
2024-03-17 21:42:16 +08:00
let query = Query::new(&language, query_str).unwrap();
let textobject = TextObjectQuery { query };
let mut cursor = QueryCursor::new();
let config = HighlightConfiguration::new(language, "", "", "").unwrap();
let syntax = Syntax::new(source.slice(..), Arc::new(config), |_| None).unwrap();
let root = syntax.tree().root_node();
let mut test = |capture, range| {
let matches: Vec<_> = textobject
.capture_nodes(capture, root, source.slice(..), &mut cursor)
.unwrap()
.collect();
assert_eq!(
matches[0].byte_range(),
range,
"@{} expected {:?}",
capture,
range
)
};
test("quantified_nodes", 1..37);
// NOTE: Enable after implementing proper node group capturing
// test("quantified_nodes_grouped", 1..37);
// test("multiple_nodes_grouped", 1..37);
}
#[test]
fn test_parser() {
let highlight_names: Vec<String> = [
"attribute",
"constant",
"function.builtin",
"function",
"keyword",
"operator",
"property",
"punctuation",
"punctuation.bracket",
"punctuation.delimiter",
"string",
"string.special",
"tag",
"type",
"type.builtin",
"variable",
"variable.builtin",
"variable.parameter",
]
.iter()
.cloned()
.map(String::from)
.collect();
let language = get_language("rust").unwrap();
let config = HighlightConfiguration::new(
language,
2022-02-15 07:36:02 +08:00
&std::fs::read_to_string("../runtime/grammars/sources/rust/queries/highlights.scm")
.unwrap(),
&std::fs::read_to_string("../runtime/grammars/sources/rust/queries/injections.scm")
.unwrap(),
"", // locals.scm
2020-09-17 13:57:49 +08:00
)
.unwrap();
config.configure(&highlight_names);
let source = Rope::from_str(
"
struct Stuff {}
fn main() {}
",
);
let syntax = Syntax::new(source.slice(..), Arc::new(config), |_| None).unwrap();
let tree = syntax.tree();
let root = tree.root_node();
assert_eq!(root.kind(), "source_file");
assert_eq!(
root.to_sexp(),
concat!(
"(source_file ",
"(struct_item name: (type_identifier) body: (field_declaration_list)) ",
"(function_item name: (identifier) parameters: (parameters) body: (block)))"
)
);
2020-09-17 13:57:49 +08:00
let struct_node = root.child(0).unwrap();
assert_eq!(struct_node.kind(), "struct_item");
}
2020-09-17 13:57:49 +08:00
#[test]
fn test_input_edits() {
use tree_sitter::InputEdit;
let doc = Rope::from("hello world!\ntest 123");
let transaction = Transaction::change(
&doc,
vec![(6, 11, Some("test".into())), (12, 17, None)].into_iter(),
);
let edits = generate_edits(doc.slice(..), transaction.changes());
// transaction.apply(&mut state);
assert_eq!(
edits,
&[
InputEdit {
start_byte: 6,
old_end_byte: 11,
new_end_byte: 10,
start_position: Point { row: 0, column: 6 },
old_end_position: Point { row: 0, column: 11 },
new_end_position: Point { row: 0, column: 10 }
},
InputEdit {
start_byte: 12,
old_end_byte: 17,
new_end_byte: 12,
start_position: Point { row: 0, column: 12 },
old_end_position: Point { row: 1, column: 4 },
new_end_position: Point { row: 0, column: 12 }
}
]
);
// Testing with the official example from tree-sitter
let mut doc = Rope::from("fn test() {}");
let transaction =
Transaction::change(&doc, vec![(8, 8, Some("a: u32".into()))].into_iter());
let edits = generate_edits(doc.slice(..), transaction.changes());
transaction.apply(&mut doc);
assert_eq!(doc, "fn test(a: u32) {}");
assert_eq!(
edits,
&[InputEdit {
start_byte: 8,
old_end_byte: 8,
new_end_byte: 14,
start_position: Point { row: 0, column: 8 },
old_end_position: Point { row: 0, column: 8 },
new_end_position: Point { row: 0, column: 14 }
}]
);
}
#[track_caller]
fn assert_pretty_print(
language_name: &str,
source: &str,
expected: &str,
start: usize,
end: usize,
) {
let source = Rope::from_str(source);
let language = get_language(language_name).unwrap();
let config = HighlightConfiguration::new(language, "", "", "").unwrap();
let syntax = Syntax::new(source.slice(..), Arc::new(config), |_| None).unwrap();
let root = syntax
.tree()
.root_node()
.descendant_for_byte_range(start, end)
.unwrap();
let mut output = String::new();
pretty_print_tree(&mut output, root).unwrap();
assert_eq!(expected, output);
}
#[test]
fn test_pretty_print() {
let source = r#"// Hello"#;
assert_pretty_print("rust", source, "(line_comment)", 0, source.len());
// A large tree should be indented with fields:
let source = r#"fn main() {
println!("Hello, World!");
}"#;
assert_pretty_print(
"rust",
source,
concat!(
"(function_item\n",
" name: (identifier)\n",
" parameters: (parameters)\n",
" body: (block\n",
" (expression_statement\n",
" (macro_invocation\n",
" macro: (identifier)\n",
" (token_tree\n",
" (string_literal\n",
" (string_content)))))))",
),
0,
source.len(),
);
// Selecting a token should print just that token:
let source = r#"fn main() {}"#;
assert_pretty_print("rust", source, r#""fn""#, 0, 1);
// Error nodes are printed as errors:
let source = r#"}{"#;
assert_pretty_print("rust", source, "(ERROR)", 0, source.len());
// Fields broken under unnamed nodes are determined correctly.
// In the following source, `object` belongs to the `singleton_method`
// rule but `name` and `body` belong to an unnamed helper `_method_rest`.
// This can cause a bug with a pretty-printing implementation that
// uses `Node::field_name_for_child` to determine field names but is
// fixed when using `tree_sitter::TreeCursor::field_name`.
let source = "def self.method_name
true
end";
assert_pretty_print(
"ruby",
source,
concat!(
"(singleton_method\n",
" object: (self)\n",
" name: (identifier)\n",
" body: (body_statement\n",
" (true)))"
),
0,
source.len(),
);
}
#[test]
fn test_load_runtime_file() {
// Test to make sure we can load some data from the runtime directory.
2022-03-30 23:08:07 +08:00
let contents = load_runtime_file("rust", "indents.scm").unwrap();
assert!(!contents.is_empty());
let results = load_runtime_file("rust", "does-not-exist");
assert!(results.is_err());
}
}