2024-03-15 10:47:07 +08:00
use std ::borrow ::Cow ;
use std ::fmt ::{ self , Display } ;
use std ::path ::{ Path , PathBuf } ;
use std ::str ::FromStr ;
use std ::sync ::Arc ;
2021-07-02 03:24:22 +08:00
2022-01-03 11:52:01 +08:00
use arc_swap ::{ ArcSwap , Guard } ;
2024-02-12 09:35:25 +08:00
use globset ::GlobSet ;
2024-03-15 10:47:07 +08:00
pub use helix_syntax ::highlighter ::{ Highlight , HighlightEvent } ;
pub use helix_syntax ::{
merge , pretty_print_tree , HighlightConfiguration , InjectionLanguageMarker , RopeProvider ,
TextObjectQuery , TreeCursor ,
2021-03-22 11:40:07 +08:00
} ;
2024-03-15 10:47:07 +08:00
pub use helix_syntax ::{ with_cursor , Syntax } ;
2021-03-12 13:46:23 +08:00
use once_cell ::sync ::{ Lazy , OnceCell } ;
2024-03-15 10:47:07 +08:00
use regex ::Regex ;
use ropey ::RopeSlice ;
use serde ::ser ::SerializeSeq ;
use serde ::{ Deserialize , Serialize } ;
use std ::collections ::{ HashMap , HashSet } ;
use tree_sitter ::{ Point , Query } ;
use crate ::auto_pairs ::AutoPairs ;
use crate ::chars ::char_is_line_ending ;
use crate ::diagnostic ::Severity ;
use crate ::{ ChangeSet , Operation , Tendril } ;
2021-03-25 14:26:25 +08:00
2022-02-16 21:57:20 +08:00
use helix_loader ::grammar ::{ get_language , load_runtime_file } ;
2022-02-14 00:42:18 +08:00
2021-09-16 14:47:51 +08:00
fn deserialize_regex < ' de , D > ( deserializer : D ) -> Result < Option < Regex > , D ::Error >
where
D : serde ::Deserializer < ' de > ,
{
Option ::< String > ::deserialize ( deserializer ) ?
. map ( | buf | Regex ::new ( & buf ) . map_err ( serde ::de ::Error ::custom ) )
. transpose ( )
}
2021-10-08 10:14:12 +08:00
fn deserialize_lsp_config < ' de , D > ( deserializer : D ) -> Result < Option < serde_json ::Value > , D ::Error >
where
D : serde ::Deserializer < ' de > ,
{
Option ::< toml ::Value > ::deserialize ( deserializer ) ?
. map ( | toml | toml . try_into ( ) . map_err ( serde ::de ::Error ::custom ) )
. transpose ( )
}
2023-06-07 16:51:29 +08:00
fn deserialize_tab_width < ' de , D > ( deserializer : D ) -> Result < usize , D ::Error >
where
D : serde ::Deserializer < ' de > ,
{
usize ::deserialize ( deserializer ) . and_then ( | n | {
if n > 0 & & n < = 16 {
Ok ( n )
} else {
Err ( serde ::de ::Error ::custom (
" tab width must be a value from 1 to 16 inclusive " ,
) )
}
} )
}
2022-02-25 16:36:54 +08:00
pub fn deserialize_auto_pairs < ' de , D > ( deserializer : D ) -> Result < Option < AutoPairs > , D ::Error >
where
D : serde ::Deserializer < ' de > ,
{
Ok ( Option ::< AutoPairConfig > ::deserialize ( deserializer ) ? . and_then ( AutoPairConfig ::into ) )
}
2022-05-11 08:54:35 +08:00
fn default_timeout ( ) -> u64 {
20
}
2021-06-07 22:34:19 +08:00
#[ derive(Debug, Serialize, Deserialize) ]
2022-05-24 00:10:48 +08:00
#[ serde(rename_all = " kebab-case " ) ]
2021-03-25 14:26:25 +08:00
pub struct Configuration {
2021-03-25 15:53:32 +08:00
pub language : Vec < LanguageConfiguration > ,
2022-05-24 00:10:48 +08:00
#[ serde(default) ]
pub language_server : HashMap < String , LanguageServerConfiguration > ,
2022-02-15 13:23:01 +08:00
}
2020-09-22 17:23:48 +08:00
// largely based on tree-sitter/cli/src/loader.rs
2021-06-07 22:34:19 +08:00
#[ derive(Debug, Serialize, Deserialize) ]
2021-11-09 10:07:54 +08:00
#[ serde(rename_all = " kebab-case " , deny_unknown_fields) ]
2020-09-22 17:23:48 +08:00
pub struct LanguageConfiguration {
2021-03-25 14:26:25 +08:00
#[ serde(rename = " name " ) ]
2022-05-24 00:10:48 +08:00
pub language_id : String , // c-sharp, rust, tsx
#[ serde(rename = " language-id " ) ]
// see the table under https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#textDocumentItem
pub language_server_language_id : Option < String > , // csharp, rust, typescriptreact, for the language-server
2022-10-22 08:34:15 +08:00
pub scope : String , // source.rust
pub file_types : Vec < FileType > , // filename extension or ends_with? <Gemfile, rb, etc>
2021-11-09 09:57:08 +08:00
#[ serde(default) ]
pub shebangs : Vec < String > , // interpreter(s) associated with language
2023-11-13 22:24:24 +08:00
#[ serde(default) ]
pub roots : Vec < String > , // these indicate project roots <.git, Cargo.toml>
2024-02-27 21:36:25 +08:00
#[ serde(
default ,
skip_serializing ,
deserialize_with = " from_comment_tokens " ,
alias = " comment-token "
) ]
pub comment_tokens : Option < Vec < String > > ,
#[ serde(
default ,
skip_serializing ,
deserialize_with = " from_block_comment_tokens "
) ]
pub block_comment_tokens : Option < Vec < BlockCommentToken > > ,
2023-03-08 10:02:11 +08:00
pub text_width : Option < usize > ,
pub soft_wrap : Option < SoftWrap > ,
2021-10-08 10:14:12 +08:00
2021-06-12 09:20:37 +08:00
#[ serde(default) ]
pub auto_format : bool ,
2022-06-05 18:50:57 +08:00
2022-08-04 12:01:48 +08:00
#[ serde(skip_serializing_if = " Option::is_none " ) ]
pub formatter : Option < FormatterConfiguration > ,
2021-12-25 13:32:43 +08:00
#[ serde(default) ]
pub diagnostic_severity : Severity ,
2020-09-22 17:23:48 +08:00
2022-02-15 01:24:28 +08:00
pub grammar : Option < String > , // tree-sitter grammar name, defaults to language_id
2020-09-22 17:23:48 +08:00
// content_regex
2021-09-16 14:47:51 +08:00
#[ serde(default, skip_serializing, deserialize_with = " deserialize_regex " ) ]
2021-09-16 15:04:32 +08:00
pub injection_regex : Option < Regex > ,
2020-09-22 17:23:48 +08:00
// first_line_regex
//
2021-03-25 14:26:25 +08:00
#[ serde(skip) ]
2020-09-22 17:23:48 +08:00
pub ( crate ) highlight_config : OnceCell < Option < Arc < HighlightConfiguration > > > ,
// tags_config OnceCell<> https://github.com/tree-sitter/tree-sitter/pull/583
2023-03-19 03:12:20 +08:00
#[ serde(
default ,
2023-03-20 06:37:41 +08:00
skip_serializing_if = " Vec::is_empty " ,
2023-03-19 03:12:20 +08:00
serialize_with = " serialize_lang_features " ,
deserialize_with = " deserialize_lang_features "
) ]
2023-03-20 06:37:41 +08:00
pub language_servers : Vec < LanguageServerFeatures > ,
2021-03-25 14:26:25 +08:00
#[ serde(skip_serializing_if = " Option::is_none " ) ]
pub indent : Option < IndentationConfiguration > ,
2021-05-14 18:21:46 +08:00
#[ serde(skip) ]
2022-03-30 23:08:07 +08:00
pub ( crate ) indent_query : OnceCell < Option < Query > > ,
2021-10-23 10:41:19 +08:00
#[ serde(skip) ]
pub ( crate ) textobject_query : OnceCell < Option < TextObjectQuery > > ,
2021-08-23 21:48:06 +08:00
#[ serde(skip_serializing_if = " Option::is_none " ) ]
2021-08-24 16:56:18 +08:00
pub debugger : Option < DebugAdapterConfig > ,
2022-02-25 16:36:54 +08:00
/// Automatic insertion of pairs to parentheses, brackets,
/// etc. Defaults to true. Optionally, this can be a list of 2-tuples
/// to specify a list of characters to pair. This overrides the
/// global setting.
#[ serde(default, skip_serializing, deserialize_with = " deserialize_auto_pairs " ) ]
pub auto_pairs : Option < AutoPairs > ,
2022-04-20 09:44:32 +08:00
pub rulers : Option < Vec < u16 > > , // if set, override editor's rulers
2023-01-31 07:31:21 +08:00
/// Hardcoded LSP root directories relative to the workspace root, like `examples` or `tools/fuzz`.
/// Falling back to the current working directory if none are configured.
pub workspace_lsp_roots : Option < Vec < PathBuf > > ,
2023-03-27 00:10:09 +08:00
#[ serde(default) ]
pub persistent_diagnostic_sources : Vec < String > ,
2021-03-14 16:13:55 +08:00
}
2024-03-15 10:47:07 +08:00
fn read_query ( language : & str , filename : & str ) -> String {
helix_syntax ::read_query ( language , filename , | lang , filename | {
load_runtime_file ( lang , filename ) . unwrap_or_default ( )
} )
}
impl LanguageConfiguration {
fn initialize_highlight ( & self , scopes : & [ String ] ) -> Option < Arc < HighlightConfiguration > > {
let highlights_query = read_query ( & self . language_id , " highlights.scm " ) ;
// always highlight syntax errors
// highlights_query += "\n(ERROR) @error";
let injections_query = read_query ( & self . language_id , " injections.scm " ) ;
let locals_query = read_query ( & self . language_id , " locals.scm " ) ;
if highlights_query . is_empty ( ) {
None
} else {
let language = get_language ( self . grammar . as_deref ( ) . unwrap_or ( & self . language_id ) )
. map_err ( | err | {
log ::error! (
" Failed to load tree-sitter parser for language {:?}: {} " ,
self . language_id ,
err
)
} )
. ok ( ) ? ;
let config = HighlightConfiguration ::new (
language ,
& highlights_query ,
& injections_query ,
& locals_query ,
)
. map_err ( | err | log ::error! ( " Could not parse queries for language {:?}. Are your grammars out of sync? Try running 'hx --grammar fetch' and 'hx --grammar build'. This query could not be parsed: {:?} " , self . language_id , err ) )
. ok ( ) ? ;
config . configure ( scopes ) ;
Some ( Arc ::new ( config ) )
}
}
pub fn reconfigure ( & self , scopes : & [ String ] ) {
if let Some ( Some ( config ) ) = self . highlight_config . get ( ) {
config . configure ( scopes ) ;
}
}
pub fn get_highlight_config ( & self ) -> Option < Arc < HighlightConfiguration > > {
self . highlight_config . get ( ) . cloned ( ) . flatten ( )
}
pub fn highlight_config ( & self , scopes : & [ String ] ) -> Option < Arc < HighlightConfiguration > > {
self . highlight_config
. get_or_init ( | | self . initialize_highlight ( scopes ) )
. clone ( )
}
pub fn is_highlight_initialized ( & self ) -> bool {
self . highlight_config . get ( ) . is_some ( )
}
pub fn indent_query ( & self ) -> Option < & Query > {
self . indent_query
. get_or_init ( | | self . load_query ( " indents.scm " ) )
. as_ref ( )
}
pub fn textobject_query ( & self ) -> Option < & TextObjectQuery > {
self . textobject_query
. get_or_init ( | | {
self . load_query ( " textobjects.scm " )
. map ( | query | TextObjectQuery { query } )
} )
. as_ref ( )
}
pub fn scope ( & self ) -> & str {
& self . scope
}
fn load_query ( & self , kind : & str ) -> Option < Query > {
let query_text = read_query ( & self . language_id , kind ) ;
if query_text . is_empty ( ) {
return None ;
}
let lang = & self . highlight_config . get ( ) ? . as_ref ( ) ? . language ;
Query ::new ( lang , & query_text )
. map_err ( | e | {
log ::error! (
" Failed to parse {} queries for {}: {} " ,
kind ,
self . language_id ,
e
)
} )
. ok ( )
}
}
2022-10-22 08:34:15 +08:00
#[ derive(Debug, PartialEq, Eq, Hash) ]
pub enum FileType {
/// The extension of the file, either the `Path::extension` or the full
/// filename if the file does not have an extension.
Extension ( String ) ,
2024-02-12 01:24:20 +08:00
/// A Unix-style path glob. This is compared to the file's absolute path, so
/// it can be used to detect files based on their directories. If the glob
/// is not an absolute path and does not already start with a glob pattern,
/// a glob pattern will be prepended to it.
Glob ( globset ::Glob ) ,
2022-10-22 08:34:15 +08:00
}
impl Serialize for FileType {
fn serialize < S > ( & self , serializer : S ) -> Result < S ::Ok , S ::Error >
where
S : serde ::Serializer ,
{
use serde ::ser ::SerializeMap ;
match self {
FileType ::Extension ( extension ) = > serializer . serialize_str ( extension ) ,
2024-02-12 01:24:20 +08:00
FileType ::Glob ( glob ) = > {
2022-10-22 08:34:15 +08:00
let mut map = serializer . serialize_map ( Some ( 1 ) ) ? ;
2024-02-12 01:24:20 +08:00
map . serialize_entry ( " glob " , glob . glob ( ) ) ? ;
2022-10-22 08:34:15 +08:00
map . end ( )
}
}
}
}
impl < ' de > Deserialize < ' de > for FileType {
fn deserialize < D > ( deserializer : D ) -> Result < Self , D ::Error >
where
D : serde ::de ::Deserializer < ' de > ,
{
struct FileTypeVisitor ;
impl < ' de > serde ::de ::Visitor < ' de > for FileTypeVisitor {
type Value = FileType ;
fn expecting ( & self , formatter : & mut std ::fmt ::Formatter ) -> std ::fmt ::Result {
formatter . write_str ( " string or table " )
}
fn visit_str < E > ( self , value : & str ) -> Result < Self ::Value , E >
where
E : serde ::de ::Error ,
{
Ok ( FileType ::Extension ( value . to_string ( ) ) )
}
fn visit_map < M > ( self , mut map : M ) -> Result < Self ::Value , M ::Error >
where
M : serde ::de ::MapAccess < ' de > ,
{
match map . next_entry ::< String , String > ( ) ? {
2024-02-12 01:24:20 +08:00
Some ( ( key , mut glob ) ) if key = = " glob " = > {
// If the glob isn't an absolute path or already starts
// with a glob pattern, add a leading glob so we
// properly match relative paths.
if ! glob . starts_with ( '/' ) & & ! glob . starts_with ( " */ " ) {
glob . insert_str ( 0 , " */ " ) ;
}
globset ::Glob ::new ( glob . as_str ( ) )
. map ( FileType ::Glob )
. map_err ( | err | {
serde ::de ::Error ::custom ( format! ( " invalid `glob` pattern: {} " , err ) )
} )
}
2022-10-22 08:34:15 +08:00
Some ( ( key , _value ) ) = > Err ( serde ::de ::Error ::custom ( format! (
" unknown key in `file-types` list: {} " ,
key
) ) ) ,
None = > Err ( serde ::de ::Error ::custom (
" expected a `suffix` key in the `file-types` entry " ,
) ) ,
}
}
}
deserializer . deserialize_any ( FileTypeVisitor )
}
}
2024-02-27 21:36:25 +08:00
fn from_comment_tokens < ' de , D > ( deserializer : D ) -> Result < Option < Vec < String > > , D ::Error >
where
D : serde ::Deserializer < ' de > ,
{
#[ derive(Deserialize) ]
#[ serde(untagged) ]
enum CommentTokens {
Multiple ( Vec < String > ) ,
Single ( String ) ,
}
Ok (
Option ::< CommentTokens > ::deserialize ( deserializer ) ? . map ( | tokens | match tokens {
CommentTokens ::Single ( val ) = > vec! [ val ] ,
CommentTokens ::Multiple ( vals ) = > vals ,
} ) ,
)
}
#[ derive(Clone, Debug, Serialize, Deserialize) ]
pub struct BlockCommentToken {
pub start : String ,
pub end : String ,
}
impl Default for BlockCommentToken {
fn default ( ) -> Self {
BlockCommentToken {
start : " /* " . to_string ( ) ,
end : " */ " . to_string ( ) ,
}
}
}
fn from_block_comment_tokens < ' de , D > (
deserializer : D ,
) -> Result < Option < Vec < BlockCommentToken > > , D ::Error >
where
D : serde ::Deserializer < ' de > ,
{
#[ derive(Deserialize) ]
#[ serde(untagged) ]
enum BlockCommentTokens {
Multiple ( Vec < BlockCommentToken > ) ,
Single ( BlockCommentToken ) ,
}
Ok (
Option ::< BlockCommentTokens > ::deserialize ( deserializer ) ? . map ( | tokens | match tokens {
BlockCommentTokens ::Single ( val ) = > vec! [ val ] ,
BlockCommentTokens ::Multiple ( vals ) = > vals ,
} ) ,
)
}
2023-03-19 03:12:20 +08:00
#[ derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq, Hash) ]
2022-05-24 00:10:48 +08:00
#[ serde(rename_all = " kebab-case " ) ]
pub enum LanguageServerFeature {
Format ,
GotoDeclaration ,
GotoDefinition ,
GotoTypeDefinition ,
GotoReference ,
GotoImplementation ,
// Goto, use bitflags, combining previous Goto members?
SignatureHelp ,
Hover ,
DocumentHighlight ,
Completion ,
CodeAction ,
WorkspaceCommand ,
DocumentSymbols ,
WorkspaceSymbols ,
// Symbols, use bitflags, see above?
Diagnostics ,
RenameSymbol ,
InlayHints ,
}
impl Display for LanguageServerFeature {
fn fmt ( & self , f : & mut fmt ::Formatter < '_ > ) -> fmt ::Result {
2023-03-19 03:32:34 +08:00
use LanguageServerFeature ::* ;
let feature = match self {
Format = > " format " ,
GotoDeclaration = > " goto-declaration " ,
GotoDefinition = > " goto-definition " ,
GotoTypeDefinition = > " goto-type-definition " ,
2024-01-22 22:06:20 +08:00
GotoReference = > " goto-reference " ,
2023-03-19 03:32:34 +08:00
GotoImplementation = > " goto-implementation " ,
SignatureHelp = > " signature-help " ,
Hover = > " hover " ,
DocumentHighlight = > " document-highlight " ,
Completion = > " completion " ,
CodeAction = > " code-action " ,
WorkspaceCommand = > " workspace-command " ,
DocumentSymbols = > " document-symbols " ,
WorkspaceSymbols = > " workspace-symbols " ,
Diagnostics = > " diagnostics " ,
RenameSymbol = > " rename-symbol " ,
InlayHints = > " inlay-hints " ,
} ;
write! ( f , " {feature} " , )
2022-05-24 00:10:48 +08:00
}
}
#[ derive(Debug, Serialize, Deserialize) ]
#[ serde(untagged, rename_all = " kebab-case " , deny_unknown_fields) ]
2023-03-19 03:12:20 +08:00
enum LanguageServerFeatureConfiguration {
2022-05-24 00:10:48 +08:00
#[ serde(rename_all = " kebab-case " ) ]
Features {
2023-03-19 03:12:20 +08:00
#[ serde(default, skip_serializing_if = " HashSet::is_empty " ) ]
only_features : HashSet < LanguageServerFeature > ,
#[ serde(default, skip_serializing_if = " HashSet::is_empty " ) ]
except_features : HashSet < LanguageServerFeature > ,
2022-05-24 00:10:48 +08:00
name : String ,
} ,
Simple ( String ) ,
}
2023-03-19 03:12:20 +08:00
#[ derive(Debug, Default) ]
pub struct LanguageServerFeatures {
2023-03-20 06:37:41 +08:00
pub name : String ,
2023-03-19 03:12:20 +08:00
pub only : HashSet < LanguageServerFeature > ,
pub excluded : HashSet < LanguageServerFeature > ,
}
impl LanguageServerFeatures {
pub fn has_feature ( & self , feature : LanguageServerFeature ) -> bool {
2023-03-20 06:37:41 +08:00
( self . only . is_empty ( ) | | self . only . contains ( & feature ) ) & & ! self . excluded . contains ( & feature )
2023-03-19 03:12:20 +08:00
}
}
fn deserialize_lang_features < ' de , D > (
deserializer : D ,
2023-03-20 06:37:41 +08:00
) -> Result < Vec < LanguageServerFeatures > , D ::Error >
2023-03-19 03:12:20 +08:00
where
D : serde ::Deserializer < ' de > ,
{
let raw : Vec < LanguageServerFeatureConfiguration > = Deserialize ::deserialize ( deserializer ) ? ;
let res = raw
. into_iter ( )
. map ( | config | match config {
2023-03-20 06:37:41 +08:00
LanguageServerFeatureConfiguration ::Simple ( name ) = > LanguageServerFeatures {
name ,
.. Default ::default ( )
} ,
2023-03-19 03:12:20 +08:00
LanguageServerFeatureConfiguration ::Features {
only_features ,
except_features ,
name ,
2023-03-20 06:37:41 +08:00
} = > LanguageServerFeatures {
2023-03-19 03:12:20 +08:00
name ,
2023-03-20 06:37:41 +08:00
only : only_features ,
excluded : except_features ,
} ,
2023-03-19 03:12:20 +08:00
} )
. collect ( ) ;
Ok ( res )
}
fn serialize_lang_features < S > (
2023-03-20 06:37:41 +08:00
map : & Vec < LanguageServerFeatures > ,
2023-03-19 03:12:20 +08:00
serializer : S ,
) -> Result < S ::Ok , S ::Error >
where
S : serde ::Serializer ,
{
let mut serializer = serializer . serialize_seq ( Some ( map . len ( ) ) ) ? ;
2023-03-20 06:37:41 +08:00
for features in map {
2023-03-19 03:12:20 +08:00
let features = if features . only . is_empty ( ) & & features . excluded . is_empty ( ) {
2023-03-20 06:37:41 +08:00
LanguageServerFeatureConfiguration ::Simple ( features . name . to_owned ( ) )
2023-03-19 03:12:20 +08:00
} else {
LanguageServerFeatureConfiguration ::Features {
only_features : features . only . clone ( ) ,
except_features : features . excluded . clone ( ) ,
2023-03-20 06:37:41 +08:00
name : features . name . to_owned ( ) ,
2023-03-19 03:12:20 +08:00
}
} ;
serializer . serialize_element ( & features ) ? ;
}
serializer . end ( )
}
2024-02-12 09:35:25 +08:00
fn deserialize_required_root_patterns < ' de , D > ( deserializer : D ) -> Result < Option < GlobSet > , D ::Error >
where
D : serde ::Deserializer < ' de > ,
{
let patterns = Vec ::< String > ::deserialize ( deserializer ) ? ;
if patterns . is_empty ( ) {
return Ok ( None ) ;
}
let mut builder = globset ::GlobSetBuilder ::new ( ) ;
for pattern in patterns {
let glob = globset ::Glob ::new ( & pattern ) . map_err ( serde ::de ::Error ::custom ) ? ;
builder . add ( glob ) ;
}
builder . build ( ) . map ( Some ) . map_err ( serde ::de ::Error ::custom )
}
2021-06-07 22:34:19 +08:00
#[ derive(Debug, Serialize, Deserialize) ]
2021-03-25 14:26:25 +08:00
#[ serde(rename_all = " kebab-case " ) ]
2021-03-14 16:13:55 +08:00
pub struct LanguageServerConfiguration {
pub command : String ,
2021-03-25 14:26:25 +08:00
#[ serde(default) ]
#[ serde(skip_serializing_if = " Vec::is_empty " ) ]
2021-03-14 16:13:55 +08:00
pub args : Vec < String > ,
2022-12-09 12:09:23 +08:00
#[ serde(default, skip_serializing_if = " HashMap::is_empty " ) ]
pub environment : HashMap < String , String > ,
2022-05-24 00:10:48 +08:00
#[ serde(default, skip_serializing, deserialize_with = " deserialize_lsp_config " ) ]
pub config : Option < serde_json ::Value > ,
2022-05-11 08:54:35 +08:00
#[ serde(default = " default_timeout " ) ]
pub timeout : u64 ,
2024-02-12 09:35:25 +08:00
#[ serde(
default ,
skip_serializing ,
deserialize_with = " deserialize_required_root_patterns "
) ]
pub required_root_patterns : Option < GlobSet > ,
2020-09-22 17:23:48 +08:00
}
2022-08-04 12:01:48 +08:00
#[ derive(Debug, Clone, Serialize, Deserialize) ]
#[ serde(rename_all = " kebab-case " ) ]
pub struct FormatterConfiguration {
pub command : String ,
#[ serde(default) ]
#[ serde(skip_serializing_if = " Vec::is_empty " ) ]
pub args : Vec < String > ,
}
2022-11-04 20:01:17 +08:00
#[ derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize) ]
2021-08-29 19:51:47 +08:00
#[ serde(rename_all = " kebab-case " ) ]
pub struct AdvancedCompletion {
pub name : Option < String > ,
pub completion : Option < String > ,
pub default : Option < String > ,
}
2022-11-04 20:01:17 +08:00
#[ derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize) ]
2021-08-29 19:51:47 +08:00
#[ serde(rename_all = " kebab-case " , untagged) ]
pub enum DebugConfigCompletion {
Named ( String ) ,
Advanced ( AdvancedCompletion ) ,
}
2022-11-04 20:01:17 +08:00
#[ derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize) ]
2021-10-24 22:24:18 +08:00
#[ serde(untagged) ]
pub enum DebugArgumentValue {
String ( String ) ,
Array ( Vec < String > ) ,
2021-12-03 10:59:44 +08:00
Boolean ( bool ) ,
2021-10-24 22:24:18 +08:00
}
2022-11-04 20:01:17 +08:00
#[ derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize) ]
2021-08-29 19:51:47 +08:00
#[ serde(rename_all = " kebab-case " ) ]
pub struct DebugTemplate {
pub name : String ,
pub request : String ,
2024-05-06 23:37:04 +08:00
#[ serde(default) ]
2021-08-29 19:51:47 +08:00
pub completion : Vec < DebugConfigCompletion > ,
2021-10-24 22:24:18 +08:00
pub args : HashMap < String , DebugArgumentValue > ,
2021-08-29 19:51:47 +08:00
}
2022-11-04 20:01:17 +08:00
#[ derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize) ]
2021-08-29 19:51:47 +08:00
#[ serde(rename_all = " kebab-case " ) ]
pub struct DebugAdapterConfig {
pub name : String ,
pub transport : String ,
2021-09-27 02:36:06 +08:00
#[ serde(default) ]
2021-08-29 19:51:47 +08:00
pub command : String ,
2021-09-06 18:49:31 +08:00
#[ serde(default) ]
2021-08-29 19:51:47 +08:00
pub args : Vec < String > ,
pub port_arg : Option < String > ,
pub templates : Vec < DebugTemplate > ,
2021-09-27 02:36:06 +08:00
#[ serde(default) ]
pub quirks : DebuggerQuirks ,
2021-08-29 19:51:47 +08:00
}
2021-10-17 12:58:11 +08:00
// Different workarounds for adapters' differences
2022-11-04 20:01:17 +08:00
#[ derive(Debug, Default, PartialEq, Eq, Clone, Serialize, Deserialize) ]
2021-10-17 12:58:11 +08:00
pub struct DebuggerQuirks {
#[ serde(default) ]
pub absolute_paths : bool ,
}
2021-06-07 22:34:19 +08:00
#[ derive(Debug, Serialize, Deserialize) ]
2021-03-25 14:26:25 +08:00
#[ serde(rename_all = " kebab-case " ) ]
2021-03-22 12:47:39 +08:00
pub struct IndentationConfiguration {
2023-06-07 16:51:29 +08:00
#[ serde(deserialize_with = " deserialize_tab_width " ) ]
2021-03-22 12:47:39 +08:00
pub tab_width : usize ,
2021-03-25 14:26:25 +08:00
pub unit : String ,
2021-03-22 12:47:39 +08:00
}
2023-09-19 21:31:38 +08:00
/// How the indentation for a newly inserted line should be determined.
/// If the selected heuristic is not available (e.g. because the current
/// language has no tree-sitter indent queries), a simpler one will be used.
#[ derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize) ]
#[ serde(rename_all = " kebab-case " ) ]
pub enum IndentationHeuristic {
/// Just copy the indentation of the line that the cursor is currently on.
Simple ,
/// Use tree-sitter indent queries to compute the expected absolute indentation level of the new line.
TreeSitter ,
/// Use tree-sitter indent queries to compute the expected difference in indentation between the new line
/// and the line before. Add this to the actual indentation level of the line before.
#[ default ]
Hybrid ,
}
2022-02-25 16:36:54 +08:00
/// Configuration for auto pairs
2022-11-04 20:01:17 +08:00
#[ derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize) ]
2022-02-25 16:36:54 +08:00
#[ serde(rename_all = " kebab-case " , deny_unknown_fields, untagged) ]
pub enum AutoPairConfig {
/// Enables or disables auto pairing. False means disabled. True means to use the default pairs.
Enable ( bool ) ,
/// The mappings of pairs.
Pairs ( HashMap < char , char > ) ,
}
impl Default for AutoPairConfig {
fn default ( ) -> Self {
AutoPairConfig ::Enable ( true )
}
}
impl From < & AutoPairConfig > for Option < AutoPairs > {
fn from ( auto_pair_config : & AutoPairConfig ) -> Self {
match auto_pair_config {
AutoPairConfig ::Enable ( false ) = > None ,
AutoPairConfig ::Enable ( true ) = > Some ( AutoPairs ::default ( ) ) ,
AutoPairConfig ::Pairs ( pairs ) = > Some ( AutoPairs ::new ( pairs . iter ( ) ) ) ,
}
}
}
impl From < AutoPairConfig > for Option < AutoPairs > {
fn from ( auto_pairs_config : AutoPairConfig ) -> Self {
( & auto_pairs_config ) . into ( )
}
}
impl FromStr for AutoPairConfig {
type Err = std ::str ::ParseBoolError ;
// only do bool parsing for runtime setting
fn from_str ( s : & str ) -> Result < Self , Self ::Err > {
let enable : bool = s . parse ( ) ? ;
2022-03-15 16:04:22 +08:00
Ok ( AutoPairConfig ::Enable ( enable ) )
2022-02-25 16:36:54 +08:00
}
}
2023-03-08 10:02:11 +08:00
#[ derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize) ]
#[ serde(default, rename_all = " kebab-case " , deny_unknown_fields) ]
pub struct SoftWrap {
/// Soft wrap lines that exceed viewport width. Default to off
2023-04-08 20:50:43 +08:00
// NOTE: Option on purpose because the struct is shared between language config and global config.
// By default the option is None so that the language config falls back to the global config unless explicitly set.
pub enable : Option < bool > ,
2023-03-08 10:02:11 +08:00
/// Maximum space left free at the end of the line.
/// This space is used to wrap text at word boundaries. If that is not possible within this limit
/// the word is simply split at the end of the line.
///
/// This is automatically hard-limited to a quarter of the viewport to ensure correct display on small views.
///
/// Default to 20
pub max_wrap : Option < u16 > ,
/// Maximum number of indentation that can be carried over from the previous line when softwrapping.
/// If a line is indented further then this limit it is rendered at the start of the viewport instead.
///
/// This is automatically hard-limited to a quarter of the viewport to ensure correct display on small views.
///
/// Default to 40
pub max_indent_retain : Option < u16 > ,
/// Indicator placed at the beginning of softwrapped lines
///
/// Defaults to ↪
pub wrap_indicator : Option < String > ,
/// Softwrap at `text_width` instead of viewport width if it is shorter
pub wrap_at_text_width : Option < bool > ,
}
2020-09-22 17:23:48 +08:00
2024-02-12 01:24:20 +08:00
#[ derive(Debug) ]
struct FileTypeGlob {
glob : globset ::Glob ,
language_id : usize ,
}
impl FileTypeGlob {
fn new ( glob : globset ::Glob , language_id : usize ) -> Self {
Self { glob , language_id }
}
}
#[ derive(Debug) ]
struct FileTypeGlobMatcher {
matcher : globset ::GlobSet ,
file_types : Vec < FileTypeGlob > ,
}
impl FileTypeGlobMatcher {
fn new ( file_types : Vec < FileTypeGlob > ) -> Result < Self , globset ::Error > {
let mut builder = globset ::GlobSetBuilder ::new ( ) ;
for file_type in & file_types {
builder . add ( file_type . glob . clone ( ) ) ;
}
Ok ( Self {
matcher : builder . build ( ) ? ,
file_types ,
} )
}
fn language_id_for_path ( & self , path : & Path ) -> Option < & usize > {
self . matcher
. matches ( path )
. iter ( )
. filter_map ( | idx | self . file_types . get ( * idx ) )
. max_by_key ( | file_type | file_type . glob . glob ( ) . len ( ) )
. map ( | file_type | & file_type . language_id )
}
}
2022-01-03 11:52:01 +08:00
// Expose loader as Lazy<> global since it's always static?
2021-06-07 22:34:19 +08:00
#[ derive(Debug) ]
2020-09-22 17:23:48 +08:00
pub struct Loader {
// highlight_names ?
language_configs : Vec < Arc < LanguageConfiguration > > ,
2022-10-22 08:34:15 +08:00
language_config_ids_by_extension : HashMap < String , usize > , // Vec<usize>
2024-02-12 01:24:20 +08:00
language_config_ids_glob_matcher : FileTypeGlobMatcher ,
2021-11-08 23:19:44 +08:00
language_config_ids_by_shebang : HashMap < String , usize > ,
2022-01-03 11:52:01 +08:00
2022-05-24 00:10:48 +08:00
language_server_configs : HashMap < String , LanguageServerConfiguration > ,
2022-01-03 11:52:01 +08:00
scopes : ArcSwap < Vec < String > > ,
2020-09-22 17:23:48 +08:00
}
2024-02-12 01:24:20 +08:00
pub type LoaderError = globset ::Error ;
2020-09-22 17:23:48 +08:00
impl Loader {
2024-02-12 01:24:20 +08:00
pub fn new ( config : Configuration ) -> Result < Self , LoaderError > {
let mut language_configs = Vec ::new ( ) ;
let mut language_config_ids_by_extension = HashMap ::new ( ) ;
let mut language_config_ids_by_shebang = HashMap ::new ( ) ;
let mut file_type_globs = Vec ::new ( ) ;
2020-09-22 17:23:48 +08:00
2021-03-25 14:26:25 +08:00
for config in config . language {
2020-09-22 17:23:48 +08:00
// get the next id
2024-02-12 01:24:20 +08:00
let language_id = language_configs . len ( ) ;
2020-09-22 17:23:48 +08:00
for file_type in & config . file_types {
// entry().or_insert(Vec::new).push(language_id);
2022-10-22 08:34:15 +08:00
match file_type {
2024-02-12 01:24:20 +08:00
FileType ::Extension ( extension ) = > {
language_config_ids_by_extension . insert ( extension . clone ( ) , language_id ) ;
}
FileType ::Glob ( glob ) = > {
file_type_globs . push ( FileTypeGlob ::new ( glob . to_owned ( ) , language_id ) ) ;
}
2022-10-22 08:34:15 +08:00
} ;
2020-09-22 17:23:48 +08:00
}
2021-11-08 23:19:44 +08:00
for shebang in & config . shebangs {
2024-02-12 01:24:20 +08:00
language_config_ids_by_shebang . insert ( shebang . clone ( ) , language_id ) ;
2021-11-08 23:19:44 +08:00
}
2020-09-22 17:23:48 +08:00
2024-02-12 01:24:20 +08:00
language_configs . push ( Arc ::new ( config ) ) ;
2020-09-22 17:23:48 +08:00
}
2024-02-12 01:24:20 +08:00
Ok ( Self {
language_configs ,
language_config_ids_by_extension ,
language_config_ids_glob_matcher : FileTypeGlobMatcher ::new ( file_type_globs ) ? ,
language_config_ids_by_shebang ,
language_server_configs : config . language_server ,
scopes : ArcSwap ::from_pointee ( Vec ::new ( ) ) ,
} )
2020-09-22 17:23:48 +08:00
}
pub fn language_config_for_file_name ( & self , path : & Path ) -> Option < Arc < LanguageConfiguration > > {
// Find all the language configurations that match this file name
// or a suffix of the file name.
2024-02-12 01:24:20 +08:00
let configuration_id = self
. language_config_ids_glob_matcher
. language_id_for_path ( path )
2020-09-22 17:23:48 +08:00
. or_else ( | | {
path . extension ( )
. and_then ( | extension | extension . to_str ( ) )
2022-10-22 08:34:15 +08:00
. and_then ( | extension | self . language_config_ids_by_extension . get ( extension ) )
2020-09-22 17:23:48 +08:00
} ) ;
configuration_id . and_then ( | & id | self . language_configs . get ( id ) . cloned ( ) )
// TODO: content_regex handling conflict resolution
}
2020-10-14 17:07:42 +08:00
2023-07-26 02:15:36 +08:00
pub fn language_config_for_shebang (
& self ,
source : RopeSlice ,
) -> Option < Arc < LanguageConfiguration > > {
2021-11-08 23:30:34 +08:00
let line = Cow ::from ( source . line ( 0 ) ) ;
2024-03-15 10:47:07 +08:00
// TODO: resue detection from helix-syntax
const SHEBANG : & str = r "#!\s*(?:\S*[/\\](?:env\s+(?:\-\S+\s+)*)?)?([^\s\.\d]+)" ;
2023-04-28 17:21:34 +08:00
static SHEBANG_REGEX : Lazy < Regex > =
Lazy ::new ( | | Regex ::new ( & [ " ^ " , SHEBANG ] . concat ( ) ) . unwrap ( ) ) ;
2021-11-08 23:19:44 +08:00
let configuration_id = SHEBANG_REGEX
2021-11-08 23:30:34 +08:00
. captures ( & line )
2021-11-08 23:19:44 +08:00
. and_then ( | cap | self . language_config_ids_by_shebang . get ( & cap [ 1 ] ) ) ;
configuration_id . and_then ( | & id | self . language_configs . get ( id ) . cloned ( ) )
}
2020-10-14 17:07:42 +08:00
pub fn language_config_for_scope ( & self , scope : & str ) -> Option < Arc < LanguageConfiguration > > {
self . language_configs
. iter ( )
. find ( | config | config . scope = = scope )
. cloned ( )
}
2021-06-19 19:26:52 +08:00
2022-04-05 08:56:14 +08:00
pub fn language_config_for_language_id ( & self , id : & str ) -> Option < Arc < LanguageConfiguration > > {
self . language_configs
. iter ( )
. find ( | config | config . language_id = = id )
. cloned ( )
}
2023-04-28 17:21:34 +08:00
/// Unlike language_config_for_language_id, which only returns Some for an exact id, this
/// function will perform a regex match on the given string to find the closest language match.
pub fn language_config_for_name ( & self , name : & str ) -> Option < Arc < LanguageConfiguration > > {
2021-09-16 14:47:51 +08:00
let mut best_match_length = 0 ;
let mut best_match_position = None ;
for ( i , configuration ) in self . language_configs . iter ( ) . enumerate ( ) {
if let Some ( injection_regex ) = & configuration . injection_regex {
2023-04-28 17:21:34 +08:00
if let Some ( mat ) = injection_regex . find ( name ) {
2021-09-16 14:47:51 +08:00
let length = mat . end ( ) - mat . start ( ) ;
if length > best_match_length {
best_match_position = Some ( i ) ;
best_match_length = length ;
}
}
}
}
2023-04-28 17:21:34 +08:00
best_match_position . map ( | i | self . language_configs [ i ] . clone ( ) )
}
pub fn language_configuration_for_injection_string (
& self ,
capture : & InjectionLanguageMarker ,
) -> Option < Arc < LanguageConfiguration > > {
match capture {
InjectionLanguageMarker ::Name ( string ) = > self . language_config_for_name ( string ) ,
InjectionLanguageMarker ::Filename ( file ) = > self . language_config_for_file_name ( file ) ,
InjectionLanguageMarker ::Shebang ( shebang ) = > {
self . language_config_for_language_id ( shebang )
}
2021-09-16 14:47:51 +08:00
}
}
2022-01-03 11:52:01 +08:00
2022-04-05 08:56:14 +08:00
pub fn language_configs ( & self ) -> impl Iterator < Item = & Arc < LanguageConfiguration > > {
self . language_configs . iter ( )
}
2022-05-24 00:10:48 +08:00
pub fn language_server_configs ( & self ) -> & HashMap < String , LanguageServerConfiguration > {
& self . language_server_configs
}
2022-01-03 11:52:01 +08:00
pub fn set_scopes ( & self , scopes : Vec < String > ) {
self . scopes . store ( Arc ::new ( scopes ) ) ;
// Reconfigure existing grammars
for config in self
. language_configs
. iter ( )
. filter ( | cfg | cfg . is_highlight_initialized ( ) )
{
config . reconfigure ( & self . scopes ( ) ) ;
}
}
pub fn scopes ( & self ) -> Guard < Arc < Vec < String > > > {
self . scopes . load ( )
2021-06-19 19:26:52 +08:00
}
2020-09-22 17:23:48 +08:00
}
2024-03-15 10:47:07 +08:00
pub fn generate_edits ( old_text : RopeSlice , changeset : & ChangeSet ) -> Vec < tree_sitter ::InputEdit > {
use Operation ::* ;
let mut old_pos = 0 ;
2021-03-16 12:45:32 +08:00
2024-03-15 10:47:07 +08:00
let mut edits = Vec ::new ( ) ;
2020-09-17 13:57:49 +08:00
2024-03-15 10:47:07 +08:00
if changeset . changes . is_empty ( ) {
return edits ;
}
2020-09-17 13:57:49 +08:00
2024-03-15 10:47:07 +08:00
let mut iter = changeset . changes . iter ( ) . peekable ( ) ;
2021-11-06 23:21:03 +08:00
2024-03-15 10:47:07 +08:00
// TODO; this is a lot easier with Change instead of Operation.
2020-09-17 13:57:49 +08:00
2024-03-15 10:47:07 +08:00
fn point_at_pos ( text : RopeSlice , pos : usize ) -> ( usize , Point ) {
let byte = text . char_to_byte ( pos ) ; // <- attempted to index past end
let line = text . char_to_line ( pos ) ;
let line_start_byte = text . line_to_byte ( line ) ;
let col = byte - line_start_byte ;
2021-11-06 23:21:03 +08:00
2024-03-15 10:47:07 +08:00
( byte , Point ::new ( line , col ) )
2020-09-17 13:57:49 +08:00
}
2024-03-15 10:47:07 +08:00
fn traverse ( point : Point , text : & Tendril ) -> Point {
let Point {
mut row ,
mut column ,
} = point ;
2021-11-06 23:21:03 +08:00
// TODO: there should be a better way here.
let mut chars = text . chars ( ) . peekable ( ) ;
while let Some ( ch ) = chars . next ( ) {
if char_is_line_ending ( ch ) & & ! ( ch = = '\r' & & chars . peek ( ) = = Some ( & '\n' ) ) {
row + = 1 ;
column = 0 ;
} else {
column + = 1 ;
2020-09-17 13:57:49 +08:00
}
}
2021-11-06 23:21:03 +08:00
Point { row , column }
}
2020-09-17 13:57:49 +08:00
2021-11-06 23:21:03 +08:00
while let Some ( change ) = iter . next ( ) {
let len = match change {
Delete ( i ) | Retain ( i ) = > * i ,
Insert ( _ ) = > 0 ,
} ;
let mut old_end = old_pos + len ;
match change {
Retain ( _ ) = > { }
Delete ( _ ) = > {
let ( start_byte , start_position ) = point_at_pos ( old_text , old_pos ) ;
let ( old_end_byte , old_end_position ) = point_at_pos ( old_text , old_end ) ;
// deletion
edits . push ( tree_sitter ::InputEdit {
start_byte , // old_pos to byte
old_end_byte , // old_end to byte
new_end_byte : start_byte , // old_pos to byte
start_position , // old pos to coords
old_end_position , // old_end to coords
new_end_position : start_position , // old pos to coords
} ) ;
}
Insert ( s ) = > {
let ( start_byte , start_position ) = point_at_pos ( old_text , old_pos ) ;
2020-09-17 13:57:49 +08:00
2021-11-06 23:21:03 +08:00
// a subsequent delete means a replace, consume it
if let Some ( Delete ( len ) ) = iter . peek ( ) {
old_end = old_pos + len ;
2021-02-18 17:34:22 +08:00
let ( old_end_byte , old_end_position ) = point_at_pos ( old_text , old_end ) ;
2020-09-17 13:57:49 +08:00
2021-11-06 23:21:03 +08:00
iter . next ( ) ;
2020-09-17 13:57:49 +08:00
2021-11-06 23:21:03 +08:00
// replacement
2021-02-16 10:03:36 +08:00
edits . push ( tree_sitter ::InputEdit {
2021-11-06 23:21:03 +08:00
start_byte , // old_pos to byte
old_end_byte , // old_end to byte
new_end_byte : start_byte + s . len ( ) , // old_pos to byte + s.len()
start_position , // old pos to coords
old_end_position , // old_end to coords
new_end_position : traverse ( start_position , s ) , // old pos + chars, newlines matter too (iter over)
} ) ;
} else {
// insert
edits . push ( tree_sitter ::InputEdit {
start_byte , // old_pos to byte
old_end_byte : start_byte , // same
new_end_byte : start_byte + s . len ( ) , // old_pos + s.len()
start_position , // old pos to coords
old_end_position : start_position , // same
new_end_position : traverse ( start_position , s ) , // old pos + chars, newlines matter too (iter over)
2021-02-16 10:03:36 +08:00
} ) ;
2020-09-17 13:57:49 +08:00
}
}
}
2021-11-06 23:21:03 +08:00
old_pos = old_end ;
2020-09-17 13:57:49 +08:00
}
2021-11-06 23:21:03 +08:00
edits
2020-09-17 13:57:49 +08:00
}
2020-09-11 13:14:44 +08:00
2021-07-02 03:24:22 +08:00
#[ cfg(test) ]
mod test {
2024-03-15 10:47:07 +08:00
use tree_sitter ::QueryCursor ;
2021-07-02 03:24:22 +08:00
use super ::* ;
use crate ::{ Rope , Transaction } ;
2022-02-01 21:43:41 +08:00
#[ test ]
fn test_textobject_queries ( ) {
let query_str = r #"
( line_comment ) + @ quantified_nodes
( ( line_comment ) + ) @ quantified_nodes_grouped
( ( line_comment ) ( line_comment ) ) @ multiple_nodes_grouped
" #;
let source = Rope ::from_str (
r #"
/// a comment on
2022-04-28 03:21:20 +08:00
/// multiple lines
2022-02-01 21:43:41 +08:00
" #,
) ;
2022-10-19 16:29:09 +08:00
let language = get_language ( " rust " ) . unwrap ( ) ;
2022-02-01 21:43:41 +08:00
2024-03-17 21:42:16 +08:00
let query = Query ::new ( & language , query_str ) . unwrap ( ) ;
2022-02-01 21:43:41 +08:00
let textobject = TextObjectQuery { query } ;
let mut cursor = QueryCursor ::new ( ) ;
let config = HighlightConfiguration ::new ( language , " " , " " , " " ) . unwrap ( ) ;
2024-03-15 10:47:07 +08:00
let syntax = Syntax ::new ( source . slice ( .. ) , Arc ::new ( config ) , | _ | None ) . unwrap ( ) ;
2022-02-01 21:43:41 +08:00
let root = syntax . tree ( ) . root_node ( ) ;
let mut test = | capture , range | {
let matches : Vec < _ > = textobject
. capture_nodes ( capture , root , source . slice ( .. ) , & mut cursor )
. unwrap ( )
. collect ( ) ;
assert_eq! (
matches [ 0 ] . byte_range ( ) ,
range ,
2022-04-08 23:56:50 +08:00
" @{} expected {:?} " ,
capture ,
range
2022-02-01 21:43:41 +08:00
)
} ;
2024-04-16 00:07:15 +08:00
test ( " quantified_nodes " , 1 .. 37 ) ;
2022-02-01 21:43:41 +08:00
// NOTE: Enable after implementing proper node group capturing
2024-04-16 00:07:15 +08:00
// test("quantified_nodes_grouped", 1..37);
// test("multiple_nodes_grouped", 1..37);
2022-02-01 21:43:41 +08:00
}
2021-07-02 03:24:22 +08:00
#[ test ]
fn test_parser ( ) {
let highlight_names : Vec < String > = [
" attribute " ,
" constant " ,
" function.builtin " ,
" function " ,
" keyword " ,
" operator " ,
" property " ,
" punctuation " ,
" punctuation.bracket " ,
" punctuation.delimiter " ,
" string " ,
" string.special " ,
" tag " ,
" type " ,
" type.builtin " ,
" variable " ,
" variable.builtin " ,
" variable.parameter " ,
]
. iter ( )
. cloned ( )
. map ( String ::from )
. collect ( ) ;
2022-10-19 16:29:09 +08:00
let language = get_language ( " rust " ) . unwrap ( ) ;
2021-07-02 03:55:18 +08:00
let config = HighlightConfiguration ::new (
2021-07-02 03:24:22 +08:00
language ,
2022-02-15 07:36:02 +08:00
& std ::fs ::read_to_string ( " ../runtime/grammars/sources/rust/queries/highlights.scm " )
. unwrap ( ) ,
& std ::fs ::read_to_string ( " ../runtime/grammars/sources/rust/queries/injections.scm " )
. unwrap ( ) ,
2021-07-02 03:24:22 +08:00
" " , // locals.scm
2020-09-17 13:57:49 +08:00
)
2021-07-02 03:24:22 +08:00
. unwrap ( ) ;
config . configure ( & highlight_names ) ;
let source = Rope ::from_str (
"
struct Stuff { }
fn main ( ) { }
" ,
) ;
2024-03-15 10:47:07 +08:00
let syntax = Syntax ::new ( source . slice ( .. ) , Arc ::new ( config ) , | _ | None ) . unwrap ( ) ;
2021-07-02 03:24:22 +08:00
let tree = syntax . tree ( ) ;
let root = tree . root_node ( ) ;
assert_eq! ( root . kind ( ) , " source_file " ) ;
assert_eq! (
root . to_sexp ( ) ,
concat! (
" (source_file " ,
" (struct_item name: (type_identifier) body: (field_declaration_list)) " ,
" (function_item name: (identifier) parameters: (parameters) body: (block))) "
)
) ;
2020-09-17 13:57:49 +08:00
2021-07-02 03:24:22 +08:00
let struct_node = root . child ( 0 ) . unwrap ( ) ;
assert_eq! ( struct_node . kind ( ) , " struct_item " ) ;
}
2020-09-17 13:57:49 +08:00
2021-07-02 03:24:22 +08:00
#[ test ]
fn test_input_edits ( ) {
use tree_sitter ::InputEdit ;
2021-08-24 22:43:05 +08:00
let doc = Rope ::from ( " hello world! \n test 123 " ) ;
2021-07-02 03:24:22 +08:00
let transaction = Transaction ::change (
2021-08-24 22:43:05 +08:00
& doc ,
2021-07-02 03:24:22 +08:00
vec! [ ( 6 , 11 , Some ( " test " . into ( ) ) ) , ( 12 , 17 , None ) ] . into_iter ( ) ,
) ;
2023-07-26 02:15:36 +08:00
let edits = generate_edits ( doc . slice ( .. ) , transaction . changes ( ) ) ;
2021-07-02 03:24:22 +08:00
// transaction.apply(&mut state);
assert_eq! (
edits ,
& [
InputEdit {
start_byte : 6 ,
old_end_byte : 11 ,
new_end_byte : 10 ,
start_position : Point { row : 0 , column : 6 } ,
old_end_position : Point { row : 0 , column : 11 } ,
new_end_position : Point { row : 0 , column : 10 }
} ,
InputEdit {
start_byte : 12 ,
old_end_byte : 17 ,
new_end_byte : 12 ,
start_position : Point { row : 0 , column : 12 } ,
old_end_position : Point { row : 1 , column : 4 } ,
new_end_position : Point { row : 0 , column : 12 }
}
]
) ;
// Testing with the official example from tree-sitter
2021-08-24 22:43:05 +08:00
let mut doc = Rope ::from ( " fn test() {} " ) ;
2021-07-02 03:24:22 +08:00
let transaction =
2021-08-24 22:43:05 +08:00
Transaction ::change ( & doc , vec! [ ( 8 , 8 , Some ( " a: u32 " . into ( ) ) ) ] . into_iter ( ) ) ;
2023-07-26 02:15:36 +08:00
let edits = generate_edits ( doc . slice ( .. ) , transaction . changes ( ) ) ;
2021-08-24 22:43:05 +08:00
transaction . apply ( & mut doc ) ;
2021-07-02 03:24:22 +08:00
2021-08-24 22:43:05 +08:00
assert_eq! ( doc , " fn test(a: u32) {} " ) ;
2021-07-02 03:24:22 +08:00
assert_eq! (
edits ,
& [ InputEdit {
start_byte : 8 ,
old_end_byte : 8 ,
new_end_byte : 14 ,
start_position : Point { row : 0 , column : 8 } ,
old_end_position : Point { row : 0 , column : 8 } ,
new_end_position : Point { row : 0 , column : 14 }
} ]
) ;
}
2021-06-06 01:12:59 +08:00
2022-10-20 06:17:50 +08:00
#[ track_caller ]
Use TreeCursor to pretty-print :tree-sitter-subtree (#4606)
The current `:tree-sitter-subtree` has a bug for field-names when the
field name belongs to an unnamed child node. Take this ruby example:
def self.method_name
true
end
The subtree given by tree-sitter-cli is:
(singleton_method [2, 0] - [4, 3]
object: (self [2, 4] - [2, 8])
name: (identifier [2, 9] - [2, 20])
body: (body_statement [3, 2] - [3, 6]
(true [3, 2] - [3, 6])))
But the `:tree-sitter-subtree` output was
(singleton_method
object: (self)
body: (identifier)
(body_statement (true)))
The `singleton_method` rule defines the `name` and `body` fields in an
unnamed helper rule `_method_rest` and the old implementation of
`pretty_print_tree_impl` would pass the `field_name` down from the
named `singleton_method` node.
To fix it we switch to the [TreeCursor] API which is recommended by
the tree-sitter docs for traversing the tree. `TreeCursor::field_name`
accurately determines the field name for the current cursor position
even when the node is unnamed.
[TreeCursor]: https://docs.rs/tree-sitter/0.20.9/tree_sitter/struct.TreeCursor.html
2022-11-17 09:03:02 +08:00
fn assert_pretty_print (
language_name : & str ,
source : & str ,
expected : & str ,
start : usize ,
end : usize ,
) {
2022-10-20 06:17:50 +08:00
let source = Rope ::from_str ( source ) ;
Use TreeCursor to pretty-print :tree-sitter-subtree (#4606)
The current `:tree-sitter-subtree` has a bug for field-names when the
field name belongs to an unnamed child node. Take this ruby example:
def self.method_name
true
end
The subtree given by tree-sitter-cli is:
(singleton_method [2, 0] - [4, 3]
object: (self [2, 4] - [2, 8])
name: (identifier [2, 9] - [2, 20])
body: (body_statement [3, 2] - [3, 6]
(true [3, 2] - [3, 6])))
But the `:tree-sitter-subtree` output was
(singleton_method
object: (self)
body: (identifier)
(body_statement (true)))
The `singleton_method` rule defines the `name` and `body` fields in an
unnamed helper rule `_method_rest` and the old implementation of
`pretty_print_tree_impl` would pass the `field_name` down from the
named `singleton_method` node.
To fix it we switch to the [TreeCursor] API which is recommended by
the tree-sitter docs for traversing the tree. `TreeCursor::field_name`
accurately determines the field name for the current cursor position
even when the node is unnamed.
[TreeCursor]: https://docs.rs/tree-sitter/0.20.9/tree_sitter/struct.TreeCursor.html
2022-11-17 09:03:02 +08:00
let language = get_language ( language_name ) . unwrap ( ) ;
2022-10-20 06:17:50 +08:00
let config = HighlightConfiguration ::new ( language , " " , " " , " " ) . unwrap ( ) ;
2024-03-15 10:47:07 +08:00
let syntax = Syntax ::new ( source . slice ( .. ) , Arc ::new ( config ) , | _ | None ) . unwrap ( ) ;
2022-10-20 06:17:50 +08:00
let root = syntax
. tree ( )
. root_node ( )
. descendant_for_byte_range ( start , end )
. unwrap ( ) ;
let mut output = String ::new ( ) ;
pretty_print_tree ( & mut output , root ) . unwrap ( ) ;
assert_eq! ( expected , output ) ;
}
#[ test ]
fn test_pretty_print ( ) {
2024-04-16 00:07:15 +08:00
let source = r # "// Hello"# ;
Use TreeCursor to pretty-print :tree-sitter-subtree (#4606)
The current `:tree-sitter-subtree` has a bug for field-names when the
field name belongs to an unnamed child node. Take this ruby example:
def self.method_name
true
end
The subtree given by tree-sitter-cli is:
(singleton_method [2, 0] - [4, 3]
object: (self [2, 4] - [2, 8])
name: (identifier [2, 9] - [2, 20])
body: (body_statement [3, 2] - [3, 6]
(true [3, 2] - [3, 6])))
But the `:tree-sitter-subtree` output was
(singleton_method
object: (self)
body: (identifier)
(body_statement (true)))
The `singleton_method` rule defines the `name` and `body` fields in an
unnamed helper rule `_method_rest` and the old implementation of
`pretty_print_tree_impl` would pass the `field_name` down from the
named `singleton_method` node.
To fix it we switch to the [TreeCursor] API which is recommended by
the tree-sitter docs for traversing the tree. `TreeCursor::field_name`
accurately determines the field name for the current cursor position
even when the node is unnamed.
[TreeCursor]: https://docs.rs/tree-sitter/0.20.9/tree_sitter/struct.TreeCursor.html
2022-11-17 09:03:02 +08:00
assert_pretty_print ( " rust " , source , " (line_comment) " , 0 , source . len ( ) ) ;
2022-10-20 06:17:50 +08:00
// A large tree should be indented with fields:
let source = r #" fn main() {
println! ( " Hello, World! " ) ;
} " #;
assert_pretty_print (
Use TreeCursor to pretty-print :tree-sitter-subtree (#4606)
The current `:tree-sitter-subtree` has a bug for field-names when the
field name belongs to an unnamed child node. Take this ruby example:
def self.method_name
true
end
The subtree given by tree-sitter-cli is:
(singleton_method [2, 0] - [4, 3]
object: (self [2, 4] - [2, 8])
name: (identifier [2, 9] - [2, 20])
body: (body_statement [3, 2] - [3, 6]
(true [3, 2] - [3, 6])))
But the `:tree-sitter-subtree` output was
(singleton_method
object: (self)
body: (identifier)
(body_statement (true)))
The `singleton_method` rule defines the `name` and `body` fields in an
unnamed helper rule `_method_rest` and the old implementation of
`pretty_print_tree_impl` would pass the `field_name` down from the
named `singleton_method` node.
To fix it we switch to the [TreeCursor] API which is recommended by
the tree-sitter docs for traversing the tree. `TreeCursor::field_name`
accurately determines the field name for the current cursor position
even when the node is unnamed.
[TreeCursor]: https://docs.rs/tree-sitter/0.20.9/tree_sitter/struct.TreeCursor.html
2022-11-17 09:03:02 +08:00
" rust " ,
2022-10-20 06:17:50 +08:00
source ,
concat! (
" (function_item \n " ,
" name: (identifier) \n " ,
" parameters: (parameters) \n " ,
" body: (block \n " ,
" (expression_statement \n " ,
" (macro_invocation \n " ,
" macro: (identifier) \n " ,
" (token_tree \n " ,
2024-04-16 00:07:15 +08:00
" (string_literal \n " ,
" (string_content))))))) " ,
2022-10-20 06:17:50 +08:00
) ,
0 ,
source . len ( ) ,
) ;
// Selecting a token should print just that token:
let source = r # "fn main() {}"# ;
Use TreeCursor to pretty-print :tree-sitter-subtree (#4606)
The current `:tree-sitter-subtree` has a bug for field-names when the
field name belongs to an unnamed child node. Take this ruby example:
def self.method_name
true
end
The subtree given by tree-sitter-cli is:
(singleton_method [2, 0] - [4, 3]
object: (self [2, 4] - [2, 8])
name: (identifier [2, 9] - [2, 20])
body: (body_statement [3, 2] - [3, 6]
(true [3, 2] - [3, 6])))
But the `:tree-sitter-subtree` output was
(singleton_method
object: (self)
body: (identifier)
(body_statement (true)))
The `singleton_method` rule defines the `name` and `body` fields in an
unnamed helper rule `_method_rest` and the old implementation of
`pretty_print_tree_impl` would pass the `field_name` down from the
named `singleton_method` node.
To fix it we switch to the [TreeCursor] API which is recommended by
the tree-sitter docs for traversing the tree. `TreeCursor::field_name`
accurately determines the field name for the current cursor position
even when the node is unnamed.
[TreeCursor]: https://docs.rs/tree-sitter/0.20.9/tree_sitter/struct.TreeCursor.html
2022-11-17 09:03:02 +08:00
assert_pretty_print ( " rust " , source , r # ""fn""# , 0 , 1 ) ;
2022-10-20 06:17:50 +08:00
// Error nodes are printed as errors:
let source = r # "}{"# ;
Use TreeCursor to pretty-print :tree-sitter-subtree (#4606)
The current `:tree-sitter-subtree` has a bug for field-names when the
field name belongs to an unnamed child node. Take this ruby example:
def self.method_name
true
end
The subtree given by tree-sitter-cli is:
(singleton_method [2, 0] - [4, 3]
object: (self [2, 4] - [2, 8])
name: (identifier [2, 9] - [2, 20])
body: (body_statement [3, 2] - [3, 6]
(true [3, 2] - [3, 6])))
But the `:tree-sitter-subtree` output was
(singleton_method
object: (self)
body: (identifier)
(body_statement (true)))
The `singleton_method` rule defines the `name` and `body` fields in an
unnamed helper rule `_method_rest` and the old implementation of
`pretty_print_tree_impl` would pass the `field_name` down from the
named `singleton_method` node.
To fix it we switch to the [TreeCursor] API which is recommended by
the tree-sitter docs for traversing the tree. `TreeCursor::field_name`
accurately determines the field name for the current cursor position
even when the node is unnamed.
[TreeCursor]: https://docs.rs/tree-sitter/0.20.9/tree_sitter/struct.TreeCursor.html
2022-11-17 09:03:02 +08:00
assert_pretty_print ( " rust " , source , " (ERROR) " , 0 , source . len ( ) ) ;
// Fields broken under unnamed nodes are determined correctly.
// In the following source, `object` belongs to the `singleton_method`
// rule but `name` and `body` belong to an unnamed helper `_method_rest`.
// This can cause a bug with a pretty-printing implementation that
// uses `Node::field_name_for_child` to determine field names but is
2024-01-11 04:58:44 +08:00
// fixed when using `tree_sitter::TreeCursor::field_name`.
Use TreeCursor to pretty-print :tree-sitter-subtree (#4606)
The current `:tree-sitter-subtree` has a bug for field-names when the
field name belongs to an unnamed child node. Take this ruby example:
def self.method_name
true
end
The subtree given by tree-sitter-cli is:
(singleton_method [2, 0] - [4, 3]
object: (self [2, 4] - [2, 8])
name: (identifier [2, 9] - [2, 20])
body: (body_statement [3, 2] - [3, 6]
(true [3, 2] - [3, 6])))
But the `:tree-sitter-subtree` output was
(singleton_method
object: (self)
body: (identifier)
(body_statement (true)))
The `singleton_method` rule defines the `name` and `body` fields in an
unnamed helper rule `_method_rest` and the old implementation of
`pretty_print_tree_impl` would pass the `field_name` down from the
named `singleton_method` node.
To fix it we switch to the [TreeCursor] API which is recommended by
the tree-sitter docs for traversing the tree. `TreeCursor::field_name`
accurately determines the field name for the current cursor position
even when the node is unnamed.
[TreeCursor]: https://docs.rs/tree-sitter/0.20.9/tree_sitter/struct.TreeCursor.html
2022-11-17 09:03:02 +08:00
let source = " def self.method_name
true
end " ;
assert_pretty_print (
" ruby " ,
source ,
concat! (
" (singleton_method \n " ,
" object: (self) \n " ,
" name: (identifier) \n " ,
" body: (body_statement \n " ,
" (true))) "
) ,
0 ,
source . len ( ) ,
) ;
2022-10-20 06:17:50 +08:00
}
2021-07-02 03:24:22 +08:00
#[ test ]
fn test_load_runtime_file ( ) {
// Test to make sure we can load some data from the runtime directory.
2022-03-30 23:08:07 +08:00
let contents = load_runtime_file ( " rust " , " indents.scm " ) . unwrap ( ) ;
2021-07-02 03:24:22 +08:00
assert! ( ! contents . is_empty ( ) ) ;
2021-06-06 04:24:10 +08:00
2021-07-02 03:24:22 +08:00
let results = load_runtime_file ( " rust " , " does-not-exist " ) ;
assert! ( results . is_err ( ) ) ;
}
2021-06-06 01:12:59 +08:00
}