2021-07-02 03:24:22 +08:00
use crate ::{
2022-02-25 16:36:54 +08:00
auto_pairs ::AutoPairs ,
2021-07-02 03:24:22 +08:00
chars ::char_is_line_ending ,
2021-12-25 13:32:43 +08:00
diagnostic ::Severity ,
2021-07-02 03:24:22 +08:00
regex ::Regex ,
transaction ::{ ChangeSet , Operation } ,
Rope , RopeSlice , Tendril ,
} ;
2022-01-03 11:52:01 +08:00
use arc_swap ::{ ArcSwap , Guard } ;
2021-11-06 23:21:03 +08:00
use slotmap ::{ DefaultKey as LayerId , HopSlotMap } ;
2021-06-19 19:26:52 +08:00
2021-03-22 11:40:07 +08:00
use std ::{
borrow ::Cow ,
cell ::RefCell ,
2022-01-07 08:19:20 +08:00
collections ::{ HashMap , HashSet , VecDeque } ,
2021-06-07 22:34:19 +08:00
fmt ,
2021-07-02 03:24:22 +08:00
path ::Path ,
2022-02-25 16:36:54 +08:00
str ::FromStr ,
2021-03-22 11:40:07 +08:00
sync ::Arc ,
} ;
2020-09-22 17:23:48 +08:00
2021-03-12 13:46:23 +08:00
use once_cell ::sync ::{ Lazy , OnceCell } ;
2021-03-25 14:26:25 +08:00
use serde ::{ Deserialize , Serialize } ;
2022-02-16 21:57:20 +08:00
use helix_loader ::grammar ::{ get_language , load_runtime_file } ;
2022-02-14 00:42:18 +08:00
2021-09-16 14:47:51 +08:00
fn deserialize_regex < ' de , D > ( deserializer : D ) -> Result < Option < Regex > , D ::Error >
where
D : serde ::Deserializer < ' de > ,
{
Option ::< String > ::deserialize ( deserializer ) ?
. map ( | buf | Regex ::new ( & buf ) . map_err ( serde ::de ::Error ::custom ) )
. transpose ( )
}
2021-10-08 10:14:12 +08:00
fn deserialize_lsp_config < ' de , D > ( deserializer : D ) -> Result < Option < serde_json ::Value > , D ::Error >
where
D : serde ::Deserializer < ' de > ,
{
Option ::< toml ::Value > ::deserialize ( deserializer ) ?
. map ( | toml | toml . try_into ( ) . map_err ( serde ::de ::Error ::custom ) )
. transpose ( )
}
2022-02-25 16:36:54 +08:00
pub fn deserialize_auto_pairs < ' de , D > ( deserializer : D ) -> Result < Option < AutoPairs > , D ::Error >
where
D : serde ::Deserializer < ' de > ,
{
Ok ( Option ::< AutoPairConfig > ::deserialize ( deserializer ) ? . and_then ( AutoPairConfig ::into ) )
}
2022-05-11 08:54:35 +08:00
fn default_timeout ( ) -> u64 {
20
}
2021-06-07 22:34:19 +08:00
#[ derive(Debug, Serialize, Deserialize) ]
2021-03-25 14:26:25 +08:00
pub struct Configuration {
2021-03-25 15:53:32 +08:00
pub language : Vec < LanguageConfiguration > ,
2022-02-15 13:23:01 +08:00
}
2020-09-22 17:23:48 +08:00
// largely based on tree-sitter/cli/src/loader.rs
2021-06-07 22:34:19 +08:00
#[ derive(Debug, Serialize, Deserialize) ]
2021-11-09 10:07:54 +08:00
#[ serde(rename_all = " kebab-case " , deny_unknown_fields) ]
2020-09-22 17:23:48 +08:00
pub struct LanguageConfiguration {
2021-03-25 14:26:25 +08:00
#[ serde(rename = " name " ) ]
2021-11-22 02:55:08 +08:00
pub language_id : String , // c-sharp, rust
2021-03-14 16:13:55 +08:00
pub scope : String , // source.rust
pub file_types : Vec < String > , // filename ends_with? <Gemfile, rb, etc>
2021-11-09 09:57:08 +08:00
#[ serde(default) ]
pub shebangs : Vec < String > , // interpreter(s) associated with language
2021-03-14 16:13:55 +08:00
pub roots : Vec < String > , // these indicate project roots <.git, Cargo.toml>
2021-07-19 00:33:38 +08:00
pub comment_token : Option < String > ,
2022-05-02 22:24:22 +08:00
pub max_line_length : Option < usize > ,
2021-10-08 10:14:12 +08:00
#[ serde(default, skip_serializing, deserialize_with = " deserialize_lsp_config " ) ]
pub config : Option < serde_json ::Value > ,
2020-09-22 17:23:48 +08:00
2021-06-12 09:20:37 +08:00
#[ serde(default) ]
pub auto_format : bool ,
2022-06-05 18:50:57 +08:00
2021-12-25 13:32:43 +08:00
#[ serde(default) ]
pub diagnostic_severity : Severity ,
2020-09-22 17:23:48 +08:00
2022-02-15 01:24:28 +08:00
pub grammar : Option < String > , // tree-sitter grammar name, defaults to language_id
2020-09-22 17:23:48 +08:00
// content_regex
2021-09-16 14:47:51 +08:00
#[ serde(default, skip_serializing, deserialize_with = " deserialize_regex " ) ]
2021-09-16 15:04:32 +08:00
pub injection_regex : Option < Regex > ,
2020-09-22 17:23:48 +08:00
// first_line_regex
//
2021-03-25 14:26:25 +08:00
#[ serde(skip) ]
2020-09-22 17:23:48 +08:00
pub ( crate ) highlight_config : OnceCell < Option < Arc < HighlightConfiguration > > > ,
// tags_config OnceCell<> https://github.com/tree-sitter/tree-sitter/pull/583
2021-03-25 14:26:25 +08:00
#[ serde(skip_serializing_if = " Option::is_none " ) ]
pub language_server : Option < LanguageServerConfiguration > ,
#[ serde(skip_serializing_if = " Option::is_none " ) ]
pub indent : Option < IndentationConfiguration > ,
2021-05-14 18:21:46 +08:00
#[ serde(skip) ]
2022-03-30 23:08:07 +08:00
pub ( crate ) indent_query : OnceCell < Option < Query > > ,
2021-10-23 10:41:19 +08:00
#[ serde(skip) ]
pub ( crate ) textobject_query : OnceCell < Option < TextObjectQuery > > ,
2021-08-23 21:48:06 +08:00
#[ serde(skip_serializing_if = " Option::is_none " ) ]
2021-08-24 16:56:18 +08:00
pub debugger : Option < DebugAdapterConfig > ,
2022-02-25 16:36:54 +08:00
/// Automatic insertion of pairs to parentheses, brackets,
/// etc. Defaults to true. Optionally, this can be a list of 2-tuples
/// to specify a list of characters to pair. This overrides the
/// global setting.
#[ serde(default, skip_serializing, deserialize_with = " deserialize_auto_pairs " ) ]
pub auto_pairs : Option < AutoPairs > ,
2022-04-20 09:44:32 +08:00
pub rulers : Option < Vec < u16 > > , // if set, override editor's rulers
2021-03-14 16:13:55 +08:00
}
2021-06-07 22:34:19 +08:00
#[ derive(Debug, Serialize, Deserialize) ]
2021-03-25 14:26:25 +08:00
#[ serde(rename_all = " kebab-case " ) ]
2021-03-14 16:13:55 +08:00
pub struct LanguageServerConfiguration {
pub command : String ,
2021-03-25 14:26:25 +08:00
#[ serde(default) ]
#[ serde(skip_serializing_if = " Vec::is_empty " ) ]
2021-03-14 16:13:55 +08:00
pub args : Vec < String > ,
2022-05-11 08:54:35 +08:00
#[ serde(default = " default_timeout " ) ]
pub timeout : u64 ,
2022-01-15 14:23:06 +08:00
pub language_id : Option < String > ,
2020-09-22 17:23:48 +08:00
}
2021-08-29 19:51:47 +08:00
#[ derive(Debug, PartialEq, Clone, Deserialize, Serialize) ]
#[ serde(rename_all = " kebab-case " ) ]
pub struct AdvancedCompletion {
pub name : Option < String > ,
pub completion : Option < String > ,
pub default : Option < String > ,
}
#[ derive(Debug, PartialEq, Clone, Deserialize, Serialize) ]
#[ serde(rename_all = " kebab-case " , untagged) ]
pub enum DebugConfigCompletion {
Named ( String ) ,
Advanced ( AdvancedCompletion ) ,
}
2021-10-24 22:24:18 +08:00
#[ derive(Debug, PartialEq, Clone, Deserialize, Serialize) ]
#[ serde(untagged) ]
pub enum DebugArgumentValue {
String ( String ) ,
Array ( Vec < String > ) ,
2021-12-03 10:59:44 +08:00
Boolean ( bool ) ,
2021-10-24 22:24:18 +08:00
}
2021-08-29 19:51:47 +08:00
#[ derive(Debug, PartialEq, Clone, Deserialize, Serialize) ]
#[ serde(rename_all = " kebab-case " ) ]
pub struct DebugTemplate {
pub name : String ,
pub request : String ,
pub completion : Vec < DebugConfigCompletion > ,
2021-10-24 22:24:18 +08:00
pub args : HashMap < String , DebugArgumentValue > ,
2021-08-29 19:51:47 +08:00
}
#[ derive(Debug, PartialEq, Clone, Deserialize, Serialize) ]
#[ serde(rename_all = " kebab-case " ) ]
pub struct DebugAdapterConfig {
pub name : String ,
pub transport : String ,
2021-09-27 02:36:06 +08:00
#[ serde(default) ]
2021-08-29 19:51:47 +08:00
pub command : String ,
2021-09-06 18:49:31 +08:00
#[ serde(default) ]
2021-08-29 19:51:47 +08:00
pub args : Vec < String > ,
pub port_arg : Option < String > ,
pub templates : Vec < DebugTemplate > ,
2021-09-27 02:36:06 +08:00
#[ serde(default) ]
pub quirks : DebuggerQuirks ,
2021-08-29 19:51:47 +08:00
}
2021-10-17 12:58:11 +08:00
// Different workarounds for adapters' differences
#[ derive(Debug, Default, PartialEq, Clone, Serialize, Deserialize) ]
pub struct DebuggerQuirks {
#[ serde(default) ]
pub absolute_paths : bool ,
}
2021-06-07 22:34:19 +08:00
#[ derive(Debug, Serialize, Deserialize) ]
2021-03-25 14:26:25 +08:00
#[ serde(rename_all = " kebab-case " ) ]
2021-03-22 12:47:39 +08:00
pub struct IndentationConfiguration {
pub tab_width : usize ,
2021-03-25 14:26:25 +08:00
pub unit : String ,
2021-03-22 12:47:39 +08:00
}
2022-02-25 16:36:54 +08:00
/// Configuration for auto pairs
#[ derive(Debug, Clone, PartialEq, Serialize, Deserialize) ]
#[ serde(rename_all = " kebab-case " , deny_unknown_fields, untagged) ]
pub enum AutoPairConfig {
/// Enables or disables auto pairing. False means disabled. True means to use the default pairs.
Enable ( bool ) ,
/// The mappings of pairs.
Pairs ( HashMap < char , char > ) ,
}
impl Default for AutoPairConfig {
fn default ( ) -> Self {
AutoPairConfig ::Enable ( true )
}
}
impl From < & AutoPairConfig > for Option < AutoPairs > {
fn from ( auto_pair_config : & AutoPairConfig ) -> Self {
match auto_pair_config {
AutoPairConfig ::Enable ( false ) = > None ,
AutoPairConfig ::Enable ( true ) = > Some ( AutoPairs ::default ( ) ) ,
AutoPairConfig ::Pairs ( pairs ) = > Some ( AutoPairs ::new ( pairs . iter ( ) ) ) ,
}
}
}
impl From < AutoPairConfig > for Option < AutoPairs > {
fn from ( auto_pairs_config : AutoPairConfig ) -> Self {
( & auto_pairs_config ) . into ( )
}
}
impl FromStr for AutoPairConfig {
type Err = std ::str ::ParseBoolError ;
// only do bool parsing for runtime setting
fn from_str ( s : & str ) -> Result < Self , Self ::Err > {
let enable : bool = s . parse ( ) ? ;
2022-03-15 16:04:22 +08:00
Ok ( AutoPairConfig ::Enable ( enable ) )
2022-02-25 16:36:54 +08:00
}
}
2021-10-23 10:41:19 +08:00
#[ derive(Debug) ]
pub struct TextObjectQuery {
pub query : Query ,
}
2022-05-20 09:19:46 +08:00
#[ derive(Debug) ]
2022-02-01 21:43:41 +08:00
pub enum CapturedNode < ' a > {
Single ( Node < ' a > ) ,
2022-04-28 03:21:20 +08:00
/// Guaranteed to be not empty
2022-02-01 21:43:41 +08:00
Grouped ( Vec < Node < ' a > > ) ,
}
impl < ' a > CapturedNode < ' a > {
pub fn start_byte ( & self ) -> usize {
match self {
Self ::Single ( n ) = > n . start_byte ( ) ,
Self ::Grouped ( ns ) = > ns [ 0 ] . start_byte ( ) ,
}
}
pub fn end_byte ( & self ) -> usize {
match self {
Self ::Single ( n ) = > n . end_byte ( ) ,
Self ::Grouped ( ns ) = > ns . last ( ) . unwrap ( ) . end_byte ( ) ,
}
}
pub fn byte_range ( & self ) -> std ::ops ::Range < usize > {
self . start_byte ( ) .. self . end_byte ( )
}
}
2021-10-23 10:41:19 +08:00
impl TextObjectQuery {
/// Run the query on the given node and return sub nodes which match given
/// capture ("function.inside", "class.around", etc).
2022-02-01 21:43:41 +08:00
///
/// Captures may contain multiple nodes by using quantifiers (+, *, etc),
/// and support for this is partial and could use improvement.
///
/// ```query
/// (comment)+ @capture
///
2022-05-20 09:19:46 +08:00
/// ; OR
2022-02-01 21:43:41 +08:00
/// (
2022-05-20 09:19:46 +08:00
/// (comment)*
/// .
2022-02-01 21:43:41 +08:00
/// (function)
/// ) @capture
/// ```
2021-10-23 10:41:19 +08:00
pub fn capture_nodes < ' a > (
& ' a self ,
capture_name : & str ,
node : Node < ' a > ,
slice : RopeSlice < ' a > ,
cursor : & ' a mut QueryCursor ,
2022-02-01 21:43:41 +08:00
) -> Option < impl Iterator < Item = CapturedNode < ' a > > > {
2022-02-11 11:31:52 +08:00
self . capture_nodes_any ( & [ capture_name ] , node , slice , cursor )
}
/// Find the first capture that exists out of all given `capture_names`
/// and return sub nodes that match this capture.
pub fn capture_nodes_any < ' a > (
& ' a self ,
capture_names : & [ & str ] ,
node : Node < ' a > ,
slice : RopeSlice < ' a > ,
cursor : & ' a mut QueryCursor ,
2022-02-01 21:43:41 +08:00
) -> Option < impl Iterator < Item = CapturedNode < ' a > > > {
2022-02-11 11:31:52 +08:00
let capture_idx = capture_names
. iter ( )
. find_map ( | cap | self . query . capture_index_for_name ( cap ) ) ? ;
2022-05-20 09:19:46 +08:00
let nodes = cursor
. captures ( & self . query , node , RopeProvider ( slice ) )
. filter_map ( move | ( mat , _ ) | {
let nodes : Vec < _ > = mat
. captures
. iter ( )
2022-06-11 23:39:21 +08:00
. filter_map ( | cap | ( cap . index = = capture_idx ) . then ( | | cap . node ) )
2022-05-20 09:19:46 +08:00
. collect ( ) ;
if nodes . len ( ) > 1 {
Some ( CapturedNode ::Grouped ( nodes ) )
} else {
nodes . into_iter ( ) . map ( CapturedNode ::Single ) . next ( )
2022-02-01 21:43:41 +08:00
}
2022-05-20 09:19:46 +08:00
} ) ;
2022-02-01 21:43:41 +08:00
Some ( nodes )
2021-10-23 10:41:19 +08:00
}
}
2021-06-04 04:46:56 +08:00
fn read_query ( language : & str , filename : & str ) -> String {
static INHERITS_REGEX : Lazy < Regex > =
2022-05-05 21:47:28 +08:00
Lazy ::new ( | | Regex ::new ( r ";+\s*inherits\s*:?\s*([a-z_,()-]+)\s*" ) . unwrap ( ) ) ;
2021-06-04 04:46:56 +08:00
let query = load_runtime_file ( language , filename ) . unwrap_or_default ( ) ;
2021-04-08 22:18:25 +08:00
// TODO: the collect() is not ideal
let inherits = INHERITS_REGEX
. captures_iter ( & query )
. flat_map ( | captures | {
captures [ 1 ]
. split ( ',' )
. map ( str ::to_owned )
. collect ::< Vec < _ > > ( )
} )
. collect ::< Vec < _ > > ( ) ;
if inherits . is_empty ( ) {
return query ;
}
let mut queries = inherits
. iter ( )
. map ( | language | read_query ( language , filename ) )
. collect ::< Vec < _ > > ( ) ;
queries . push ( query ) ;
queries . concat ( )
}
2020-09-22 17:23:48 +08:00
impl LanguageConfiguration {
2021-06-19 19:26:52 +08:00
fn initialize_highlight ( & self , scopes : & [ String ] ) -> Option < Arc < HighlightConfiguration > > {
2021-07-11 18:36:45 +08:00
let language = self . language_id . to_ascii_lowercase ( ) ;
2021-04-07 17:05:59 +08:00
2021-06-19 19:26:52 +08:00
let highlights_query = read_query ( & language , " highlights.scm " ) ;
// always highlight syntax errors
// highlights_query += "\n(ERROR) @error";
2020-09-22 17:23:48 +08:00
2021-06-19 19:26:52 +08:00
let injections_query = read_query ( & language , " injections.scm " ) ;
2021-07-09 00:11:20 +08:00
let locals_query = read_query ( & language , " locals.scm " ) ;
2020-09-22 17:23:48 +08:00
2021-06-19 19:26:52 +08:00
if highlights_query . is_empty ( ) {
None
} else {
2022-02-16 21:57:20 +08:00
let language = get_language ( self . grammar . as_deref ( ) . unwrap_or ( & self . language_id ) )
. map_err ( | e | log ::info! ( " {} " , e ) )
. ok ( ) ? ;
2021-07-02 03:55:18 +08:00
let config = HighlightConfiguration ::new (
2021-06-19 19:26:52 +08:00
language ,
& highlights_query ,
& injections_query ,
2021-07-09 00:11:20 +08:00
& locals_query ,
2022-01-17 23:53:25 +08:00
)
2022-03-22 18:52:57 +08:00
. unwrap_or_else ( | query_error | panic! ( " Could not parse queries for language {:?} . Are your grammars out of sync? Try running 'hx --grammar fetch' and 'hx --grammar build'. This query could not be parsed: {:?} " , self . language_id , query_error ) ) ;
2021-09-06 17:13:52 +08:00
2021-06-19 19:26:52 +08:00
config . configure ( scopes ) ;
Some ( Arc ::new ( config ) )
}
}
2021-06-19 19:52:28 +08:00
pub fn reconfigure ( & self , scopes : & [ String ] ) {
if let Some ( Some ( config ) ) = self . highlight_config . get ( ) {
config . configure ( scopes ) ;
2021-06-19 19:26:52 +08:00
}
}
2021-06-19 19:52:28 +08:00
pub fn highlight_config ( & self , scopes : & [ String ] ) -> Option < Arc < HighlightConfiguration > > {
self . highlight_config
. get_or_init ( | | self . initialize_highlight ( scopes ) )
2021-02-24 15:07:39 +08:00
. clone ( )
2020-09-22 17:23:48 +08:00
}
2020-11-05 14:15:19 +08:00
2021-06-19 19:26:52 +08:00
pub fn is_highlight_initialized ( & self ) -> bool {
self . highlight_config . get ( ) . is_some ( )
2020-09-22 17:23:48 +08:00
}
2020-11-05 14:15:19 +08:00
2022-03-30 23:08:07 +08:00
pub fn indent_query ( & self ) -> Option < & Query > {
2021-05-14 18:21:46 +08:00
self . indent_query
. get_or_init ( | | {
2022-03-30 23:08:07 +08:00
let lang_name = self . language_id . to_ascii_lowercase ( ) ;
let query_text = read_query ( & lang_name , " indents.scm " ) ;
2022-04-01 10:27:06 +08:00
if query_text . is_empty ( ) {
return None ;
}
2022-03-30 23:08:07 +08:00
let lang = self . highlight_config . get ( ) ? . as_ref ( ) ? . language ;
Query ::new ( lang , & query_text ) . ok ( )
2021-05-14 18:21:46 +08:00
} )
. as_ref ( )
}
2021-10-23 10:41:19 +08:00
pub fn textobject_query ( & self ) -> Option < & TextObjectQuery > {
self . textobject_query
. get_or_init ( | | -> Option < TextObjectQuery > {
let lang_name = self . language_id . to_ascii_lowercase ( ) ;
let query_text = read_query ( & lang_name , " textobjects.scm " ) ;
let lang = self . highlight_config . get ( ) ? . as_ref ( ) ? . language ;
2022-04-18 23:14:48 +08:00
let query = Query ::new ( lang , & query_text )
. map_err ( | e | log ::error! ( " Failed to parse textobjects.scm queries: {} " , e ) )
. ok ( ) ? ;
2021-10-23 10:41:19 +08:00
Some ( TextObjectQuery { query } )
} )
. as_ref ( )
}
2020-11-05 14:15:19 +08:00
pub fn scope ( & self ) -> & str {
& self . scope
}
2020-09-22 17:23:48 +08:00
}
2022-01-03 11:52:01 +08:00
// Expose loader as Lazy<> global since it's always static?
2021-06-07 22:34:19 +08:00
#[ derive(Debug) ]
2020-09-22 17:23:48 +08:00
pub struct Loader {
// highlight_names ?
language_configs : Vec < Arc < LanguageConfiguration > > ,
language_config_ids_by_file_type : HashMap < String , usize > , // Vec<usize>
2021-11-08 23:19:44 +08:00
language_config_ids_by_shebang : HashMap < String , usize > ,
2022-01-03 11:52:01 +08:00
scopes : ArcSwap < Vec < String > > ,
2020-09-22 17:23:48 +08:00
}
impl Loader {
2021-06-19 19:26:52 +08:00
pub fn new ( config : Configuration ) -> Self {
2021-03-24 13:52:13 +08:00
let mut loader = Self {
2020-09-22 17:23:48 +08:00
language_configs : Vec ::new ( ) ,
language_config_ids_by_file_type : HashMap ::new ( ) ,
2021-11-08 23:19:44 +08:00
language_config_ids_by_shebang : HashMap ::new ( ) ,
2022-01-03 11:52:01 +08:00
scopes : ArcSwap ::from_pointee ( Vec ::new ( ) ) ,
2020-09-22 17:23:48 +08:00
} ;
2021-03-25 14:26:25 +08:00
for config in config . language {
2020-09-22 17:23:48 +08:00
// get the next id
let language_id = loader . language_configs . len ( ) ;
for file_type in & config . file_types {
// entry().or_insert(Vec::new).push(language_id);
loader
. language_config_ids_by_file_type
. insert ( file_type . clone ( ) , language_id ) ;
}
2021-11-08 23:19:44 +08:00
for shebang in & config . shebangs {
loader
. language_config_ids_by_shebang
. insert ( shebang . clone ( ) , language_id ) ;
}
2020-09-22 17:23:48 +08:00
loader . language_configs . push ( Arc ::new ( config ) ) ;
}
loader
}
pub fn language_config_for_file_name ( & self , path : & Path ) -> Option < Arc < LanguageConfiguration > > {
// Find all the language configurations that match this file name
// or a suffix of the file name.
let configuration_id = path
. file_name ( )
. and_then ( | n | n . to_str ( ) )
. and_then ( | file_name | self . language_config_ids_by_file_type . get ( file_name ) )
. or_else ( | | {
path . extension ( )
. and_then ( | extension | extension . to_str ( ) )
. and_then ( | extension | self . language_config_ids_by_file_type . get ( extension ) )
} ) ;
configuration_id . and_then ( | & id | self . language_configs . get ( id ) . cloned ( ) )
// TODO: content_regex handling conflict resolution
}
2020-10-14 17:07:42 +08:00
2021-11-08 23:30:34 +08:00
pub fn language_config_for_shebang ( & self , source : & Rope ) -> Option < Arc < LanguageConfiguration > > {
let line = Cow ::from ( source . line ( 0 ) ) ;
2021-12-03 23:13:24 +08:00
static SHEBANG_REGEX : Lazy < Regex > = Lazy ::new ( | | {
Regex ::new ( r "^#!\s*(?:\S*[/\\](?:env\s+(?:\-\S+\s+)*)?)?([^\s\.\d]+)" ) . unwrap ( )
} ) ;
2021-11-08 23:19:44 +08:00
let configuration_id = SHEBANG_REGEX
2021-11-08 23:30:34 +08:00
. captures ( & line )
2021-11-08 23:19:44 +08:00
. and_then ( | cap | self . language_config_ids_by_shebang . get ( & cap [ 1 ] ) ) ;
configuration_id . and_then ( | & id | self . language_configs . get ( id ) . cloned ( ) )
}
2020-10-14 17:07:42 +08:00
pub fn language_config_for_scope ( & self , scope : & str ) -> Option < Arc < LanguageConfiguration > > {
self . language_configs
. iter ( )
. find ( | config | config . scope = = scope )
. cloned ( )
}
2021-06-19 19:26:52 +08:00
2022-04-05 08:56:14 +08:00
pub fn language_config_for_language_id ( & self , id : & str ) -> Option < Arc < LanguageConfiguration > > {
self . language_configs
. iter ( )
. find ( | config | config . language_id = = id )
. cloned ( )
}
2021-09-16 14:47:51 +08:00
pub fn language_configuration_for_injection_string (
& self ,
string : & str ,
) -> Option < Arc < LanguageConfiguration > > {
let mut best_match_length = 0 ;
let mut best_match_position = None ;
for ( i , configuration ) in self . language_configs . iter ( ) . enumerate ( ) {
if let Some ( injection_regex ) = & configuration . injection_regex {
if let Some ( mat ) = injection_regex . find ( string ) {
let length = mat . end ( ) - mat . start ( ) ;
if length > best_match_length {
best_match_position = Some ( i ) ;
best_match_length = length ;
}
}
}
}
if let Some ( i ) = best_match_position {
let configuration = & self . language_configs [ i ] ;
return Some ( configuration . clone ( ) ) ;
}
None
}
2022-01-03 11:52:01 +08:00
2022-04-05 08:56:14 +08:00
pub fn language_configs ( & self ) -> impl Iterator < Item = & Arc < LanguageConfiguration > > {
self . language_configs . iter ( )
}
2022-01-03 11:52:01 +08:00
pub fn set_scopes ( & self , scopes : Vec < String > ) {
self . scopes . store ( Arc ::new ( scopes ) ) ;
// Reconfigure existing grammars
for config in self
. language_configs
. iter ( )
. filter ( | cfg | cfg . is_highlight_initialized ( ) )
{
config . reconfigure ( & self . scopes ( ) ) ;
}
}
pub fn scopes ( & self ) -> Guard < Arc < Vec < String > > > {
self . scopes . load ( )
2021-06-19 19:26:52 +08:00
}
2020-09-22 17:23:48 +08:00
}
2021-03-27 11:14:58 +08:00
pub struct TsParser {
2021-03-12 13:46:23 +08:00
parser : tree_sitter ::Parser ,
2022-03-30 23:08:07 +08:00
pub cursors : Vec < QueryCursor > ,
2021-03-12 13:46:23 +08:00
}
2020-09-17 13:57:49 +08:00
2021-03-12 13:46:23 +08:00
// could also just use a pool, or a single instance?
thread_local! {
2021-03-27 11:14:58 +08:00
pub static PARSER : RefCell < TsParser > = RefCell ::new ( TsParser {
2021-03-12 13:46:23 +08:00
parser : Parser ::new ( ) ,
cursors : Vec ::new ( ) ,
} )
}
2021-06-07 22:34:19 +08:00
#[ derive(Debug) ]
2021-03-12 13:46:23 +08:00
pub struct Syntax {
2021-11-06 23:21:03 +08:00
layers : HopSlotMap < LayerId , LanguageLayer > ,
root : LayerId ,
loader : Arc < Loader > ,
2020-09-17 13:57:49 +08:00
}
2021-03-16 12:51:22 +08:00
fn byte_range_to_str ( range : std ::ops ::Range < usize > , source : RopeSlice ) -> Cow < str > {
2022-01-17 15:28:56 +08:00
Cow ::from ( source . byte_slice ( range ) )
2021-03-16 12:45:32 +08:00
}
2020-09-17 13:57:49 +08:00
impl Syntax {
2021-11-06 23:21:03 +08:00
pub fn new ( source : & Rope , config : Arc < HighlightConfiguration > , loader : Arc < Loader > ) -> Self {
let root_layer = LanguageLayer {
tree : None ,
config ,
depth : 0 ,
ranges : vec ! [ Range {
start_byte : 0 ,
end_byte : usize ::MAX ,
start_point : Point ::new ( 0 , 0 ) ,
end_point : Point ::new ( usize ::MAX , usize ::MAX ) ,
} ] ,
} ;
2020-09-17 13:57:49 +08:00
// track scope_descriptor: a Vec of scopes for item in tree
2021-11-06 23:21:03 +08:00
let mut layers = HopSlotMap ::default ( ) ;
let root = layers . insert ( root_layer ) ;
2020-09-17 13:57:49 +08:00
let mut syntax = Self {
2021-11-06 23:21:03 +08:00
root ,
layers ,
loader ,
2020-09-17 13:57:49 +08:00
} ;
2021-11-06 23:21:03 +08:00
syntax
2022-01-23 15:15:27 +08:00
. update ( source , source , & ChangeSet ::new ( source ) )
2021-11-06 23:21:03 +08:00
. unwrap ( ) ;
2020-09-17 13:57:49 +08:00
syntax
}
2020-09-29 00:01:27 +08:00
pub fn update (
& mut self ,
old_source : & Rope ,
source : & Rope ,
changeset : & ChangeSet ,
) -> Result < ( ) , Error > {
2021-11-06 23:21:03 +08:00
let mut queue = VecDeque ::new ( ) ;
2022-01-03 15:20:46 +08:00
queue . push_back ( self . root ) ;
2022-01-09 23:40:02 +08:00
let scopes = self . loader . scopes . load ( ) ;
2021-11-06 23:21:03 +08:00
let injection_callback = | language : & str | {
self . loader
. language_configuration_for_injection_string ( language )
2022-01-09 23:40:02 +08:00
. and_then ( | language_config | language_config . highlight_config ( & scopes ) )
2021-11-06 23:21:03 +08:00
} ;
2022-01-03 15:20:46 +08:00
// Convert the changeset into tree sitter edits.
let edits = generate_edits ( old_source , changeset ) ;
2022-01-09 23:38:29 +08:00
// Use the edits to update all layers markers
2022-01-03 15:20:46 +08:00
if ! edits . is_empty ( ) {
2022-01-07 08:19:20 +08:00
fn point_add ( a : Point , b : Point ) -> Point {
if b . row > 0 {
Point ::new ( a . row . saturating_add ( b . row ) , b . column )
} else {
Point ::new ( 0 , a . column . saturating_add ( b . column ) )
}
}
fn point_sub ( a : Point , b : Point ) -> Point {
if a . row > b . row {
Point ::new ( a . row . saturating_sub ( b . row ) , a . column )
} else {
Point ::new ( 0 , a . column . saturating_sub ( b . column ) )
}
}
2022-01-03 15:20:46 +08:00
for layer in & mut self . layers . values_mut ( ) {
2022-01-08 16:35:32 +08:00
// The root layer always covers the whole range (0..usize::MAX)
if layer . depth = = 0 {
continue ;
}
2022-01-03 15:20:46 +08:00
for range in & mut layer . ranges {
2022-01-08 16:35:32 +08:00
// Roughly based on https://github.com/tree-sitter/tree-sitter/blob/ddeaa0c7f534268b35b4f6cb39b52df082754413/lib/src/subtree.c#L691-L720
2022-01-07 08:19:20 +08:00
for edit in edits . iter ( ) . rev ( ) {
let is_pure_insertion = edit . old_end_byte = = edit . start_byte ;
2022-01-03 15:20:46 +08:00
// if edit is after range, skip
if edit . start_byte > range . end_byte {
2022-01-07 08:19:20 +08:00
// TODO: || (is_noop && edit.start_byte == range.end_byte)
2022-01-03 15:20:46 +08:00
continue ;
}
// if edit is before range, shift entire range by len
2022-01-07 08:19:20 +08:00
if edit . old_end_byte < range . start_byte {
range . start_byte =
edit . new_end_byte + ( range . start_byte - edit . old_end_byte ) ;
range . start_point = point_add (
edit . new_end_position ,
point_sub ( range . start_point , edit . old_end_position ) ,
) ;
range . end_byte = edit
. new_end_byte
. saturating_add ( range . end_byte - edit . old_end_byte ) ;
range . end_point = point_add (
edit . new_end_position ,
point_sub ( range . end_point , edit . old_end_position ) ,
) ;
}
// if the edit starts in the space before and extends into the range
else if edit . start_byte < range . start_byte {
range . start_byte = edit . new_end_byte ;
range . start_point = edit . new_end_position ;
range . end_byte = range
. end_byte
. saturating_sub ( edit . old_end_byte )
. saturating_add ( edit . new_end_byte ) ;
range . end_point = point_add (
edit . new_end_position ,
point_sub ( range . end_point , edit . old_end_position ) ,
) ;
}
// If the edit is an insertion at the start of the tree, shift
else if edit . start_byte = = range . start_byte & & is_pure_insertion {
range . start_byte = edit . new_end_byte ;
range . start_point = edit . new_end_position ;
} else {
range . end_byte = range
. end_byte
. saturating_sub ( edit . old_end_byte )
. saturating_add ( edit . new_end_byte ) ;
range . end_point = point_add (
edit . new_end_position ,
point_sub ( range . end_point , edit . old_end_position ) ,
) ;
}
2022-01-03 15:20:46 +08:00
}
}
2021-11-06 23:21:03 +08:00
}
}
2021-03-12 13:46:23 +08:00
PARSER . with ( | ts_parser | {
2021-11-06 23:21:03 +08:00
let ts_parser = & mut ts_parser . borrow_mut ( ) ;
let mut cursor = ts_parser . cursors . pop ( ) . unwrap_or_else ( QueryCursor ::new ) ;
// TODO: might need to set cursor range
2022-01-09 23:42:06 +08:00
cursor . set_byte_range ( 0 .. usize ::MAX ) ;
2020-09-17 13:57:49 +08:00
2022-01-03 14:35:05 +08:00
let source_slice = source . slice ( .. ) ;
2020-09-17 13:57:49 +08:00
2022-01-03 15:20:46 +08:00
let mut touched = HashSet ::new ( ) ;
2020-09-17 13:57:49 +08:00
2022-01-03 15:20:46 +08:00
// TODO: we should be able to avoid editing & parsing layers with ranges earlier in the document before the edit
2021-11-06 23:21:03 +08:00
while let Some ( layer_id ) = queue . pop_front ( ) {
2022-01-07 08:19:20 +08:00
// Mark the layer as touched
touched . insert ( layer_id ) ;
2022-01-03 15:20:46 +08:00
let layer = & mut self . layers [ layer_id ] ;
// If a tree already exists, notify it of changes.
if let Some ( tree ) = & mut layer . tree {
for edit in edits . iter ( ) . rev ( ) {
// Apply the edits in reverse.
// If we applied them in order then edit 1 would disrupt the positioning of edit 2.
tree . edit ( edit ) ;
}
}
2021-11-06 23:21:03 +08:00
// Re-parse the tree.
2022-01-09 23:50:32 +08:00
layer . parse ( & mut ts_parser . parser , source ) ? ;
2021-11-06 23:21:03 +08:00
2022-01-03 15:20:46 +08:00
// Switch to an immutable borrow.
2021-11-06 23:21:03 +08:00
let layer = & self . layers [ layer_id ] ;
// Process injections.
let matches = cursor . matches (
& layer . config . injections_query ,
layer . tree ( ) . root_node ( ) ,
2022-01-03 14:35:05 +08:00
RopeProvider ( source_slice ) ,
2021-11-06 23:21:03 +08:00
) ;
let mut injections = Vec ::new ( ) ;
for mat in matches {
let ( language_name , content_node , include_children ) = injection_for_match (
& layer . config ,
& layer . config . injections_query ,
& mat ,
2022-01-03 14:35:05 +08:00
source_slice ,
2021-11-06 23:21:03 +08:00
) ;
// Explicitly remove this match so that none of its other captures will remain
// in the stream of captures.
2022-01-03 14:35:05 +08:00
mat . remove ( ) ;
2021-11-06 23:21:03 +08:00
// If a language is found with the given name, then add a new language layer
// to the highlighted document.
if let ( Some ( language_name ) , Some ( content_node ) ) = ( language_name , content_node )
{
if let Some ( config ) = ( injection_callback ) ( & language_name ) {
let ranges =
intersect_ranges ( & layer . ranges , & [ content_node ] , include_children ) ;
if ! ranges . is_empty ( ) {
injections . push ( ( config , ranges ) ) ;
}
}
}
}
2020-09-17 13:57:49 +08:00
2021-11-06 23:21:03 +08:00
// Process combined injections.
if let Some ( combined_injections_query ) = & layer . config . combined_injections_query {
let mut injections_by_pattern_index =
vec! [ ( None , Vec ::new ( ) , false ) ; combined_injections_query . pattern_count ( ) ] ;
let matches = cursor . matches (
combined_injections_query ,
layer . tree ( ) . root_node ( ) ,
2022-01-03 14:35:05 +08:00
RopeProvider ( source_slice ) ,
2021-11-06 23:21:03 +08:00
) ;
for mat in matches {
let entry = & mut injections_by_pattern_index [ mat . pattern_index ] ;
let ( language_name , content_node , include_children ) = injection_for_match (
& layer . config ,
combined_injections_query ,
& mat ,
2022-01-03 14:35:05 +08:00
source_slice ,
2021-11-06 23:21:03 +08:00
) ;
if language_name . is_some ( ) {
entry . 0 = language_name ;
}
if let Some ( content_node ) = content_node {
entry . 1. push ( content_node ) ;
}
entry . 2 = include_children ;
}
for ( lang_name , content_nodes , includes_children ) in injections_by_pattern_index
{
if let ( Some ( lang_name ) , false ) = ( lang_name , content_nodes . is_empty ( ) ) {
if let Some ( config ) = ( injection_callback ) ( & lang_name ) {
let ranges = intersect_ranges (
& layer . ranges ,
& content_nodes ,
includes_children ,
) ;
if ! ranges . is_empty ( ) {
injections . push ( ( config , ranges ) ) ;
}
}
}
}
}
let depth = layer . depth + 1 ;
// TODO: can't inline this since matches borrows self.layers
for ( config , ranges ) in injections {
2022-01-03 15:20:46 +08:00
// Find an existing layer
let layer = self
. layers
. iter_mut ( )
. find ( | ( _ , layer ) | {
layer . depth = = depth & & // TODO: track parent id instead
layer . config . language = = config . language & & layer . ranges = = ranges
} )
2022-01-23 15:08:36 +08:00
. map ( | ( id , _layer ) | id ) ;
2022-01-03 15:20:46 +08:00
// ...or insert a new one.
let layer_id = layer . unwrap_or_else ( | | {
self . layers . insert ( LanguageLayer {
tree : None ,
config ,
depth ,
ranges ,
} )
2021-11-06 23:21:03 +08:00
} ) ;
2022-01-03 15:20:46 +08:00
2021-11-06 23:21:03 +08:00
queue . push_back ( layer_id ) ;
}
2022-01-03 15:20:46 +08:00
// TODO: pre-process local scopes at this time, rather than highlight?
// would solve problems with locals not working across boundaries
2021-11-06 23:21:03 +08:00
}
// Return the cursor back in the pool.
ts_parser . cursors . push ( cursor ) ;
2022-01-03 15:20:46 +08:00
// Remove all untouched layers
self . layers . retain ( | id , _ | touched . contains ( & id ) ) ;
2021-11-06 23:21:03 +08:00
2022-01-03 15:20:46 +08:00
Ok ( ( ) )
} )
2020-09-17 13:57:49 +08:00
}
2021-03-26 10:02:32 +08:00
pub fn tree ( & self ) -> & Tree {
2021-11-06 23:21:03 +08:00
self . layers [ self . root ] . tree ( )
2020-09-17 13:57:49 +08:00
}
/// Iterate over the highlighted regions for a given slice of source code.
pub fn highlight_iter < ' a > (
2021-10-28 09:24:11 +08:00
& ' a self ,
2021-03-16 12:45:32 +08:00
source : RopeSlice < ' a > ,
2020-09-19 10:55:42 +08:00
range : Option < std ::ops ::Range < usize > > ,
2020-09-17 13:57:49 +08:00
cancellation_flag : Option < & ' a AtomicUsize > ,
2021-03-16 12:49:22 +08:00
) -> impl Iterator < Item = Result < HighlightEvent , Error > > + ' a {
2021-11-06 23:21:03 +08:00
let mut layers = self
. layers
. iter ( )
2022-01-09 23:42:53 +08:00
. filter_map ( | ( _ , layer ) | {
2022-01-17 23:53:25 +08:00
// TODO: if range doesn't overlap layer range, skip it
2021-11-06 23:21:03 +08:00
// Reuse a cursor from the pool if available.
let mut cursor = PARSER . with ( | ts_parser | {
let highlighter = & mut ts_parser . borrow_mut ( ) ;
highlighter . cursors . pop ( ) . unwrap_or_else ( QueryCursor ::new )
} ) ;
// The `captures` iterator borrows the `Tree` and the `QueryCursor`, which
// prevents them from being moved. But both of these values are really just
// pointers, so it's actually ok to move them.
let cursor_ref =
unsafe { mem ::transmute ::< _ , & 'static mut QueryCursor > ( & mut cursor ) } ;
// if reusing cursors & no range this resets to whole range
2022-01-09 23:42:06 +08:00
cursor_ref . set_byte_range ( range . clone ( ) . unwrap_or ( 0 .. usize ::MAX ) ) ;
2021-11-06 23:21:03 +08:00
2022-01-03 14:35:32 +08:00
let mut captures = cursor_ref
2021-11-06 23:21:03 +08:00
. captures (
& layer . config . query ,
layer . tree ( ) . root_node ( ) ,
RopeProvider ( source ) ,
)
. peekable ( ) ;
2022-01-03 14:35:32 +08:00
// If there's no captures, skip the layer
2022-01-23 15:15:27 +08:00
captures . peek ( ) ? ;
2022-01-03 14:35:32 +08:00
Some ( HighlightIterLayer {
2021-11-06 23:21:03 +08:00
highlight_end_stack : Vec ::new ( ) ,
scope_stack : vec ! [ LocalScope {
inherits : false ,
range : 0 .. usize ::MAX ,
local_defs : Vec ::new ( ) ,
} ] ,
cursor ,
_tree : None ,
captures ,
2022-01-03 14:35:32 +08:00
config : layer . config . as_ref ( ) , // TODO: just reuse `layer`
depth : layer . depth , // TODO: just reuse `layer`
ranges : & layer . ranges , // TODO: temp
} )
2021-11-06 23:21:03 +08:00
} )
. collect ::< Vec < _ > > ( ) ;
// HAXX: arrange layers by byte range, with deeper layers positioned first
layers . sort_by_key ( | layer | {
(
layer . ranges . first ( ) . cloned ( ) ,
std ::cmp ::Reverse ( layer . depth ) ,
)
2021-06-10 11:49:34 +08:00
} ) ;
2020-09-17 13:57:49 +08:00
let mut result = HighlightIter {
source ,
2022-01-17 23:53:25 +08:00
byte_offset : range . map_or ( 0 , | r | r . start ) ,
2020-09-17 13:57:49 +08:00
cancellation_flag ,
iter_count : 0 ,
2021-11-06 23:21:03 +08:00
layers ,
2020-09-17 13:57:49 +08:00
next_event : None ,
last_highlight_range : None ,
} ;
result . sort_layers ( ) ;
2021-03-16 12:49:22 +08:00
result
2020-09-17 13:57:49 +08:00
}
// Commenting
// comment_strings_for_pos
// is_commented
// Indentation
// suggested_indent_for_line_at_buffer_row
// suggested_indent_for_buffer_row
// indent_level_for_line
// TODO: Folding
}
2020-09-11 13:14:44 +08:00
2021-06-07 22:34:19 +08:00
#[ derive(Debug) ]
2020-09-11 13:14:44 +08:00
pub struct LanguageLayer {
// mode
2020-09-17 13:57:49 +08:00
// grammar
2021-11-06 23:21:03 +08:00
pub config : Arc < HighlightConfiguration > ,
2020-10-09 15:58:43 +08:00
pub ( crate ) tree : Option < Tree > ,
2021-11-06 23:21:03 +08:00
pub ranges : Vec < Range > ,
pub depth : usize ,
2020-09-11 13:14:44 +08:00
}
2020-09-17 13:57:49 +08:00
impl LanguageLayer {
2021-03-26 10:02:32 +08:00
pub fn tree ( & self ) -> & Tree {
2020-09-17 13:57:49 +08:00
// TODO: no unwrap
self . tree . as_ref ( ) . unwrap ( )
}
2022-01-09 23:50:32 +08:00
fn parse ( & mut self , parser : & mut Parser , source : & Rope ) -> Result < ( ) , Error > {
parser . set_included_ranges ( & self . ranges ) . unwrap ( ) ;
2021-10-19 12:08:06 +08:00
2022-01-09 23:50:32 +08:00
parser
2021-11-06 23:21:03 +08:00
. set_language ( self . config . language )
. map_err ( | _ | Error ::InvalidLanguage ) ? ;
// unsafe { syntax.parser.set_cancellation_flag(cancellation_flag) };
2022-01-09 23:50:32 +08:00
let tree = parser
2021-11-06 23:21:03 +08:00
. parse_with (
& mut | byte , _ | {
if byte < = source . len_bytes ( ) {
let ( chunk , start_byte , _ , _ ) = source . chunk_at_byte ( byte ) ;
chunk [ byte - start_byte .. ] . as_bytes ( )
} else {
// out of range
& [ ]
}
} ,
self . tree . as_ref ( ) ,
)
. ok_or ( Error ::Cancelled ) ? ;
// unsafe { ts_parser.parser.set_cancellation_flag(None) };
self . tree = Some ( tree ) ;
2020-09-17 13:57:49 +08:00
Ok ( ( ) )
}
2021-11-06 23:21:03 +08:00
}
2020-09-17 13:57:49 +08:00
2021-11-06 23:21:03 +08:00
pub ( crate ) fn generate_edits (
2022-01-03 15:20:46 +08:00
old_text : & Rope ,
2021-11-06 23:21:03 +08:00
changeset : & ChangeSet ,
) -> Vec < tree_sitter ::InputEdit > {
use Operation ::* ;
let mut old_pos = 0 ;
2020-09-17 13:57:49 +08:00
2021-11-06 23:21:03 +08:00
let mut edits = Vec ::new ( ) ;
2020-09-17 13:57:49 +08:00
2022-01-03 15:20:46 +08:00
if changeset . changes . is_empty ( ) {
return edits ;
}
2020-09-17 13:57:49 +08:00
2021-11-06 23:21:03 +08:00
let mut iter = changeset . changes . iter ( ) . peekable ( ) ;
2020-09-17 13:57:49 +08:00
2021-11-06 23:21:03 +08:00
// TODO; this is a lot easier with Change instead of Operation.
2020-09-17 13:57:49 +08:00
2022-01-03 15:20:46 +08:00
fn point_at_pos ( text : & Rope , pos : usize ) -> ( usize , Point ) {
2021-11-06 23:21:03 +08:00
let byte = text . char_to_byte ( pos ) ; // <- attempted to index past end
let line = text . char_to_line ( pos ) ;
let line_start_byte = text . line_to_byte ( line ) ;
let col = byte - line_start_byte ;
2020-09-17 13:57:49 +08:00
2021-11-06 23:21:03 +08:00
( byte , Point ::new ( line , col ) )
}
2020-09-17 13:57:49 +08:00
2021-11-06 23:21:03 +08:00
fn traverse ( point : Point , text : & Tendril ) -> Point {
let Point {
mut row ,
mut column ,
} = point ;
// TODO: there should be a better way here.
let mut chars = text . chars ( ) . peekable ( ) ;
while let Some ( ch ) = chars . next ( ) {
if char_is_line_ending ( ch ) & & ! ( ch = = '\r' & & chars . peek ( ) = = Some ( & '\n' ) ) {
row + = 1 ;
column = 0 ;
} else {
column + = 1 ;
2020-09-17 13:57:49 +08:00
}
}
2021-11-06 23:21:03 +08:00
Point { row , column }
}
2020-09-17 13:57:49 +08:00
2021-11-06 23:21:03 +08:00
while let Some ( change ) = iter . next ( ) {
let len = match change {
Delete ( i ) | Retain ( i ) = > * i ,
Insert ( _ ) = > 0 ,
} ;
let mut old_end = old_pos + len ;
match change {
Retain ( _ ) = > { }
Delete ( _ ) = > {
let ( start_byte , start_position ) = point_at_pos ( old_text , old_pos ) ;
let ( old_end_byte , old_end_position ) = point_at_pos ( old_text , old_end ) ;
// deletion
edits . push ( tree_sitter ::InputEdit {
start_byte , // old_pos to byte
old_end_byte , // old_end to byte
new_end_byte : start_byte , // old_pos to byte
start_position , // old pos to coords
old_end_position , // old_end to coords
new_end_position : start_position , // old pos to coords
} ) ;
}
Insert ( s ) = > {
let ( start_byte , start_position ) = point_at_pos ( old_text , old_pos ) ;
2020-09-17 13:57:49 +08:00
2021-11-06 23:21:03 +08:00
// a subsequent delete means a replace, consume it
if let Some ( Delete ( len ) ) = iter . peek ( ) {
old_end = old_pos + len ;
2021-02-18 17:34:22 +08:00
let ( old_end_byte , old_end_position ) = point_at_pos ( old_text , old_end ) ;
2020-09-17 13:57:49 +08:00
2021-11-06 23:21:03 +08:00
iter . next ( ) ;
2020-09-17 13:57:49 +08:00
2021-11-06 23:21:03 +08:00
// replacement
2021-02-16 10:03:36 +08:00
edits . push ( tree_sitter ::InputEdit {
2021-11-06 23:21:03 +08:00
start_byte , // old_pos to byte
old_end_byte , // old_end to byte
new_end_byte : start_byte + s . len ( ) , // old_pos to byte + s.len()
start_position , // old pos to coords
old_end_position , // old_end to coords
new_end_position : traverse ( start_position , s ) , // old pos + chars, newlines matter too (iter over)
} ) ;
} else {
// insert
edits . push ( tree_sitter ::InputEdit {
start_byte , // old_pos to byte
old_end_byte : start_byte , // same
new_end_byte : start_byte + s . len ( ) , // old_pos + s.len()
start_position , // old pos to coords
old_end_position : start_position , // same
new_end_position : traverse ( start_position , s ) , // old pos + chars, newlines matter too (iter over)
2021-02-16 10:03:36 +08:00
} ) ;
2020-09-17 13:57:49 +08:00
}
}
}
2021-11-06 23:21:03 +08:00
old_pos = old_end ;
2020-09-17 13:57:49 +08:00
}
2021-11-06 23:21:03 +08:00
edits
2020-09-17 13:57:49 +08:00
}
2020-09-11 13:14:44 +08:00
use std ::sync ::atomic ::{ AtomicUsize , Ordering } ;
use std ::{ iter , mem , ops , str , usize } ;
use tree_sitter ::{
2022-05-20 09:19:46 +08:00
Language as Grammar , Node , Parser , Point , Query , QueryCaptures , QueryCursor , QueryError ,
QueryMatch , Range , TextProvider , Tree ,
2020-09-11 13:14:44 +08:00
} ;
const CANCELLATION_CHECK_INTERVAL : usize = 100 ;
/// Indicates which highlight should be applied to a region of source code.
#[ derive(Copy, Clone, Debug, PartialEq, Eq) ]
pub struct Highlight ( pub usize ) ;
/// Represents the reason why syntax highlighting failed.
#[ derive(Debug, PartialEq, Eq) ]
pub enum Error {
Cancelled ,
InvalidLanguage ,
Unknown ,
}
/// Represents a single step in rendering a syntax-highlighted document.
#[ derive(Copy, Clone, Debug) ]
pub enum HighlightEvent {
Source { start : usize , end : usize } ,
HighlightStart ( Highlight ) ,
HighlightEnd ,
}
2022-04-28 03:21:20 +08:00
/// Contains the data needed to highlight code written in a particular language.
2020-09-11 13:14:44 +08:00
///
/// This struct is immutable and can be shared between threads.
2021-06-07 22:34:19 +08:00
#[ derive(Debug) ]
2020-09-11 13:14:44 +08:00
pub struct HighlightConfiguration {
2020-09-22 17:23:48 +08:00
pub language : Grammar ,
2020-09-11 13:14:44 +08:00
pub query : Query ,
2021-11-06 23:21:03 +08:00
injections_query : Query ,
2020-09-11 13:14:44 +08:00
combined_injections_query : Option < Query > ,
highlights_pattern_index : usize ,
2021-06-19 19:26:52 +08:00
highlight_indices : ArcSwap < Vec < Option < Highlight > > > ,
2020-09-11 13:14:44 +08:00
non_local_variable_patterns : Vec < bool > ,
injection_content_capture_index : Option < u32 > ,
injection_language_capture_index : Option < u32 > ,
local_scope_capture_index : Option < u32 > ,
local_def_capture_index : Option < u32 > ,
local_def_value_capture_index : Option < u32 > ,
local_ref_capture_index : Option < u32 > ,
}
#[ derive(Debug) ]
struct LocalDef < ' a > {
2021-03-16 12:45:32 +08:00
name : Cow < ' a , str > ,
2020-09-11 13:14:44 +08:00
value_range : ops ::Range < usize > ,
highlight : Option < Highlight > ,
}
#[ derive(Debug) ]
struct LocalScope < ' a > {
inherits : bool ,
range : ops ::Range < usize > ,
local_defs : Vec < LocalDef < ' a > > ,
}
2021-06-07 22:34:19 +08:00
#[ derive(Debug) ]
2021-11-06 23:21:03 +08:00
struct HighlightIter < ' a > {
2021-03-16 12:45:32 +08:00
source : RopeSlice < ' a > ,
2020-09-11 13:14:44 +08:00
byte_offset : usize ,
cancellation_flag : Option < & ' a AtomicUsize > ,
2021-09-06 12:18:16 +08:00
layers : Vec < HighlightIterLayer < ' a > > ,
2020-09-11 13:14:44 +08:00
iter_count : usize ,
next_event : Option < HighlightEvent > ,
last_highlight_range : Option < ( usize , usize , usize ) > ,
}
2021-09-06 12:18:16 +08:00
// Adapter to convert rope chunks to bytes
2022-03-30 23:08:07 +08:00
pub struct ChunksBytes < ' a > {
2021-09-06 12:18:16 +08:00
chunks : ropey ::iter ::Chunks < ' a > ,
}
impl < ' a > Iterator for ChunksBytes < ' a > {
type Item = & ' a [ u8 ] ;
fn next ( & mut self ) -> Option < Self ::Item > {
self . chunks . next ( ) . map ( str ::as_bytes )
}
}
2022-03-30 23:08:07 +08:00
pub struct RopeProvider < ' a > ( pub RopeSlice < ' a > ) ;
2021-09-06 12:18:16 +08:00
impl < ' a > TextProvider < ' a > for RopeProvider < ' a > {
type I = ChunksBytes < ' a > ;
fn text ( & mut self , node : Node ) -> Self ::I {
2022-01-17 15:28:56 +08:00
let fragment = self . 0. byte_slice ( node . start_byte ( ) .. node . end_byte ( ) ) ;
2021-09-06 12:18:16 +08:00
ChunksBytes {
chunks : fragment . chunks ( ) ,
}
}
}
struct HighlightIterLayer < ' a > {
2020-09-17 13:57:49 +08:00
_tree : Option < Tree > ,
2020-09-11 13:14:44 +08:00
cursor : QueryCursor ,
2021-09-06 12:18:16 +08:00
captures : iter ::Peekable < QueryCaptures < ' a , ' a , RopeProvider < ' a > > > ,
2020-09-11 13:14:44 +08:00
config : & ' a HighlightConfiguration ,
highlight_end_stack : Vec < usize > ,
scope_stack : Vec < LocalScope < ' a > > ,
depth : usize ,
2022-01-03 14:35:32 +08:00
ranges : & ' a [ Range ] ,
2020-09-11 13:14:44 +08:00
}
2021-09-06 12:18:16 +08:00
impl < ' a > fmt ::Debug for HighlightIterLayer < ' a > {
2021-06-06 23:55:05 +08:00
fn fmt ( & self , f : & mut fmt ::Formatter < '_ > ) -> fmt ::Result {
2021-06-07 22:34:19 +08:00
f . debug_struct ( " HighlightIterLayer " ) . finish ( )
2021-06-06 23:55:05 +08:00
}
}
2020-09-11 13:14:44 +08:00
impl HighlightConfiguration {
2020-09-22 17:23:48 +08:00
/// Creates a `HighlightConfiguration` for a given `Grammar` and set of highlighting
2020-09-11 13:14:44 +08:00
/// queries.
///
/// # Parameters
///
2020-09-22 17:23:48 +08:00
/// * `language` - The Tree-sitter `Grammar` that should be used for parsing.
2020-09-11 13:14:44 +08:00
/// * `highlights_query` - A string containing tree patterns for syntax highlighting. This
/// should be non-empty, otherwise no syntax highlights will be added.
/// * `injections_query` - A string containing tree patterns for injecting other languages
/// into the document. This can be empty if no injections are desired.
/// * `locals_query` - A string containing tree patterns for tracking local variable
/// definitions and references. This can be empty if local variable tracking is not needed.
///
/// Returns a `HighlightConfiguration` that can then be used with the `highlight` method.
pub fn new (
2020-09-22 17:23:48 +08:00
language : Grammar ,
2020-09-11 13:14:44 +08:00
highlights_query : & str ,
injection_query : & str ,
locals_query : & str ,
) -> Result < Self , QueryError > {
// Concatenate the query strings, keeping track of the start offset of each section.
let mut query_source = String ::new ( ) ;
query_source . push_str ( locals_query ) ;
let highlights_query_offset = query_source . len ( ) ;
query_source . push_str ( highlights_query ) ;
// Construct a single query by concatenating the three query strings, but record the
// range of pattern indices that belong to each individual string.
2021-11-06 23:21:03 +08:00
let query = Query ::new ( language , & query_source ) ? ;
2020-09-11 13:14:44 +08:00
let mut highlights_pattern_index = 0 ;
for i in 0 .. ( query . pattern_count ( ) ) {
let pattern_offset = query . start_byte_for_pattern ( i ) ;
if pattern_offset < highlights_query_offset {
2021-11-06 23:21:03 +08:00
highlights_pattern_index + = 1 ;
2020-09-11 13:14:44 +08:00
}
}
2021-11-06 23:21:03 +08:00
let mut injections_query = Query ::new ( language , injection_query ) ? ;
2020-09-11 13:14:44 +08:00
// Construct a separate query just for dealing with the 'combined injections'.
// Disable the combined injection patterns in the main query.
let mut combined_injections_query = Query ::new ( language , injection_query ) ? ;
let mut has_combined_queries = false ;
2021-11-06 23:21:03 +08:00
for pattern_index in 0 .. injections_query . pattern_count ( ) {
let settings = injections_query . property_settings ( pattern_index ) ;
2020-09-11 13:14:44 +08:00
if settings . iter ( ) . any ( | s | & * s . key = = " injection.combined " ) {
has_combined_queries = true ;
2021-11-06 23:21:03 +08:00
injections_query . disable_pattern ( pattern_index ) ;
2020-09-11 13:14:44 +08:00
} else {
combined_injections_query . disable_pattern ( pattern_index ) ;
}
}
let combined_injections_query = if has_combined_queries {
Some ( combined_injections_query )
} else {
None
} ;
// Find all of the highlighting patterns that are disabled for nodes that
// have been identified as local variables.
let non_local_variable_patterns = ( 0 .. query . pattern_count ( ) )
. map ( | i | {
query
. property_predicates ( i )
. iter ( )
. any ( | ( prop , positive ) | ! * positive & & prop . key . as_ref ( ) = = " local " )
} )
. collect ( ) ;
// Store the numeric ids for all of the special captures.
let mut injection_content_capture_index = None ;
let mut injection_language_capture_index = None ;
let mut local_def_capture_index = None ;
let mut local_def_value_capture_index = None ;
let mut local_ref_capture_index = None ;
let mut local_scope_capture_index = None ;
for ( i , name ) in query . capture_names ( ) . iter ( ) . enumerate ( ) {
let i = Some ( i as u32 ) ;
match name . as_str ( ) {
" local.definition " = > local_def_capture_index = i ,
" local.definition-value " = > local_def_value_capture_index = i ,
" local.reference " = > local_ref_capture_index = i ,
" local.scope " = > local_scope_capture_index = i ,
_ = > { }
}
}
2021-11-06 23:21:03 +08:00
for ( i , name ) in injections_query . capture_names ( ) . iter ( ) . enumerate ( ) {
let i = Some ( i as u32 ) ;
match name . as_str ( ) {
" injection.content " = > injection_content_capture_index = i ,
" injection.language " = > injection_language_capture_index = i ,
_ = > { }
}
}
2021-06-19 19:26:52 +08:00
let highlight_indices = ArcSwap ::from_pointee ( vec! [ None ; query . capture_names ( ) . len ( ) ] ) ;
2021-05-06 16:20:00 +08:00
Ok ( Self {
2020-09-11 13:14:44 +08:00
language ,
query ,
2021-11-06 23:21:03 +08:00
injections_query ,
2020-09-11 13:14:44 +08:00
combined_injections_query ,
highlights_pattern_index ,
highlight_indices ,
non_local_variable_patterns ,
injection_content_capture_index ,
injection_language_capture_index ,
2021-03-01 16:37:31 +08:00
local_scope_capture_index ,
2020-09-11 13:14:44 +08:00
local_def_capture_index ,
local_def_value_capture_index ,
local_ref_capture_index ,
} )
}
/// Get a slice containing all of the highlight names used in the configuration.
pub fn names ( & self ) -> & [ String ] {
self . query . capture_names ( )
}
/// Set the list of recognized highlight names.
///
/// Tree-sitter syntax-highlighting queries specify highlights in the form of dot-separated
/// highlight names like `punctuation.bracket` and `function.method.builtin`. Consumers of
/// these queries can choose to recognize highlights with different levels of specificity.
/// For example, the string `function.builtin` will match against `function.method.builtin`
/// and `function.builtin.constructor`, but will not match `function.method`.
///
/// When highlighting, results are returned as `Highlight` values, which contain the index
/// of the matched highlight this list of highlight names.
2021-06-19 19:26:52 +08:00
pub fn configure ( & self , recognized_names : & [ String ] ) {
2020-09-11 13:14:44 +08:00
let mut capture_parts = Vec ::new ( ) ;
2021-06-19 19:26:52 +08:00
let indices : Vec < _ > = self
. query
. capture_names ( )
. iter ( )
. map ( move | capture_name | {
2020-09-11 13:14:44 +08:00
capture_parts . clear ( ) ;
capture_parts . extend ( capture_name . split ( '.' ) ) ;
let mut best_index = None ;
let mut best_match_len = 0 ;
for ( i , recognized_name ) in recognized_names . iter ( ) . enumerate ( ) {
2021-06-19 19:26:52 +08:00
let recognized_name = recognized_name ;
2020-09-11 13:14:44 +08:00
let mut len = 0 ;
let mut matches = true ;
for part in recognized_name . split ( '.' ) {
len + = 1 ;
if ! capture_parts . contains ( & part ) {
matches = false ;
break ;
}
}
if matches & & len > best_match_len {
best_index = Some ( i ) ;
best_match_len = len ;
}
}
best_index . map ( Highlight )
2021-06-19 19:26:52 +08:00
} )
. collect ( ) ;
self . highlight_indices . store ( Arc ::new ( indices ) ) ;
2020-09-11 13:14:44 +08:00
}
}
2021-09-06 12:18:16 +08:00
impl < ' a > HighlightIterLayer < ' a > {
2020-09-11 13:14:44 +08:00
// First, sort scope boundaries by their byte offset in the document. At a
// given position, emit scope endings before scope beginnings. Finally, emit
// scope boundaries from deeper layers first.
fn sort_key ( & mut self ) -> Option < ( usize , bool , isize ) > {
let depth = - ( self . depth as isize ) ;
let next_start = self
. captures
. peek ( )
. map ( | ( m , i ) | m . captures [ * i ] . node . start_byte ( ) ) ;
let next_end = self . highlight_end_stack . last ( ) . cloned ( ) ;
match ( next_start , next_end ) {
( Some ( start ) , Some ( end ) ) = > {
if start < end {
Some ( ( start , true , depth ) )
} else {
Some ( ( end , false , depth ) )
}
}
( Some ( i ) , None ) = > Some ( ( i , true , depth ) ) ,
( None , Some ( j ) ) = > Some ( ( j , false , depth ) ) ,
_ = > None ,
}
}
}
2021-11-06 23:21:03 +08:00
// Compute the ranges that should be included when parsing an injection.
// This takes into account three things:
// * `parent_ranges` - The ranges must all fall within the *current* layer's ranges.
// * `nodes` - Every injection takes place within a set of nodes. The injection ranges
// are the ranges of those nodes.
// * `includes_children` - For some injections, the content nodes' children should be
// excluded from the nested document, so that only the content nodes' *own* content
// is reparsed. For other injections, the content nodes' entire ranges should be
// reparsed, including the ranges of their children.
fn intersect_ranges (
parent_ranges : & [ Range ] ,
nodes : & [ Node ] ,
includes_children : bool ,
) -> Vec < Range > {
let mut cursor = nodes [ 0 ] . walk ( ) ;
let mut result = Vec ::new ( ) ;
let mut parent_range_iter = parent_ranges . iter ( ) ;
let mut parent_range = parent_range_iter
. next ( )
. expect ( " Layers should only be constructed with non-empty ranges vectors " ) ;
for node in nodes . iter ( ) {
let mut preceding_range = Range {
start_byte : 0 ,
start_point : Point ::new ( 0 , 0 ) ,
end_byte : node . start_byte ( ) ,
end_point : node . start_position ( ) ,
} ;
let following_range = Range {
start_byte : node . end_byte ( ) ,
start_point : node . end_position ( ) ,
end_byte : usize ::MAX ,
end_point : Point ::new ( usize ::MAX , usize ::MAX ) ,
} ;
for excluded_range in node
. children ( & mut cursor )
. filter_map ( | child | {
if includes_children {
None
} else {
Some ( child . range ( ) )
}
} )
. chain ( [ following_range ] . iter ( ) . cloned ( ) )
{
let mut range = Range {
start_byte : preceding_range . end_byte ,
start_point : preceding_range . end_point ,
end_byte : excluded_range . start_byte ,
end_point : excluded_range . start_point ,
} ;
preceding_range = excluded_range ;
if range . end_byte < parent_range . start_byte {
continue ;
}
while parent_range . start_byte < = range . end_byte {
if parent_range . end_byte > range . start_byte {
if range . start_byte < parent_range . start_byte {
range . start_byte = parent_range . start_byte ;
range . start_point = parent_range . start_point ;
}
if parent_range . end_byte < range . end_byte {
if range . start_byte < parent_range . end_byte {
result . push ( Range {
start_byte : range . start_byte ,
start_point : range . start_point ,
end_byte : parent_range . end_byte ,
end_point : parent_range . end_point ,
} ) ;
}
range . start_byte = parent_range . end_byte ;
range . start_point = parent_range . end_point ;
} else {
if range . start_byte < range . end_byte {
result . push ( range ) ;
}
break ;
}
}
if let Some ( next_range ) = parent_range_iter . next ( ) {
parent_range = next_range ;
} else {
return result ;
}
}
}
}
result
}
impl < ' a > HighlightIter < ' a > {
2020-09-11 13:14:44 +08:00
fn emit_event (
& mut self ,
offset : usize ,
event : Option < HighlightEvent > ,
) -> Option < Result < HighlightEvent , Error > > {
let result ;
if self . byte_offset < offset {
result = Some ( Ok ( HighlightEvent ::Source {
start : self . byte_offset ,
end : offset ,
} ) ) ;
self . byte_offset = offset ;
self . next_event = event ;
} else {
result = event . map ( Ok ) ;
}
self . sort_layers ( ) ;
result
}
fn sort_layers ( & mut self ) {
while ! self . layers . is_empty ( ) {
if let Some ( sort_key ) = self . layers [ 0 ] . sort_key ( ) {
let mut i = 0 ;
while i + 1 < self . layers . len ( ) {
if let Some ( next_offset ) = self . layers [ i + 1 ] . sort_key ( ) {
if next_offset < sort_key {
i + = 1 ;
continue ;
}
2021-11-06 23:21:03 +08:00
} else {
let layer = self . layers . remove ( i + 1 ) ;
PARSER . with ( | ts_parser | {
let highlighter = & mut ts_parser . borrow_mut ( ) ;
highlighter . cursors . push ( layer . cursor ) ;
} ) ;
2020-09-11 13:14:44 +08:00
}
break ;
}
if i > 0 {
2020-09-12 16:44:57 +08:00
self . layers [ 0 .. ( i + 1 ) ] . rotate_left ( 1 ) ;
2020-09-11 13:14:44 +08:00
}
break ;
} else {
let layer = self . layers . remove ( 0 ) ;
2021-03-13 10:06:02 +08:00
PARSER . with ( | ts_parser | {
let highlighter = & mut ts_parser . borrow_mut ( ) ;
highlighter . cursors . push ( layer . cursor ) ;
} ) ;
2020-09-11 13:14:44 +08:00
}
}
}
}
2021-11-06 23:21:03 +08:00
impl < ' a > Iterator for HighlightIter < ' a > {
2020-09-11 13:14:44 +08:00
type Item = Result < HighlightEvent , Error > ;
fn next ( & mut self ) -> Option < Self ::Item > {
' main : loop {
// If we've already determined the next highlight boundary, just return it.
if let Some ( e ) = self . next_event . take ( ) {
return Some ( Ok ( e ) ) ;
}
// Periodically check for cancellation, returning `Cancelled` error if the
// cancellation flag was flipped.
if let Some ( cancellation_flag ) = self . cancellation_flag {
self . iter_count + = 1 ;
if self . iter_count > = CANCELLATION_CHECK_INTERVAL {
self . iter_count = 0 ;
if cancellation_flag . load ( Ordering ::Relaxed ) ! = 0 {
return Some ( Err ( Error ::Cancelled ) ) ;
}
}
}
// If none of the layers have any more highlight boundaries, terminate.
if self . layers . is_empty ( ) {
2021-03-16 12:45:32 +08:00
let len = self . source . len_bytes ( ) ;
return if self . byte_offset < len {
2020-09-11 13:14:44 +08:00
let result = Some ( Ok ( HighlightEvent ::Source {
start : self . byte_offset ,
2021-03-16 12:45:32 +08:00
end : len ,
2020-09-11 13:14:44 +08:00
} ) ) ;
2021-03-16 12:45:32 +08:00
self . byte_offset = len ;
2020-09-11 13:14:44 +08:00
result
} else {
None
} ;
}
// Get the next capture from whichever layer has the earliest highlight boundary.
let range ;
let layer = & mut self . layers [ 0 ] ;
if let Some ( ( next_match , capture_index ) ) = layer . captures . peek ( ) {
let next_capture = next_match . captures [ * capture_index ] ;
range = next_capture . node . byte_range ( ) ;
// If any previous highlight ends before this node starts, then before
// processing this capture, emit the source code up until the end of the
// previous highlight, and an end event for that highlight.
if let Some ( end_byte ) = layer . highlight_end_stack . last ( ) . cloned ( ) {
if end_byte < = range . start {
layer . highlight_end_stack . pop ( ) ;
return self . emit_event ( end_byte , Some ( HighlightEvent ::HighlightEnd ) ) ;
}
}
}
// If there are no more captures, then emit any remaining highlight end events.
// And if there are none of those, then just advance to the end of the document.
else if let Some ( end_byte ) = layer . highlight_end_stack . last ( ) . cloned ( ) {
layer . highlight_end_stack . pop ( ) ;
return self . emit_event ( end_byte , Some ( HighlightEvent ::HighlightEnd ) ) ;
} else {
2021-11-06 23:21:03 +08:00
return self . emit_event ( self . source . len_bytes ( ) , None ) ;
2020-09-11 13:14:44 +08:00
} ;
let ( mut match_ , capture_index ) = layer . captures . next ( ) . unwrap ( ) ;
let mut capture = match_ . captures [ capture_index ] ;
// Remove from the local scope stack any local scopes that have already ended.
while range . start > layer . scope_stack . last ( ) . unwrap ( ) . range . end {
layer . scope_stack . pop ( ) ;
}
// If this capture is for tracking local variables, then process the
// local variable info.
let mut reference_highlight = None ;
let mut definition_highlight = None ;
while match_ . pattern_index < layer . config . highlights_pattern_index {
// If the node represents a local scope, push a new local scope onto
// the scope stack.
if Some ( capture . index ) = = layer . config . local_scope_capture_index {
definition_highlight = None ;
let mut scope = LocalScope {
inherits : true ,
range : range . clone ( ) ,
local_defs : Vec ::new ( ) ,
} ;
for prop in layer . config . query . property_settings ( match_ . pattern_index ) {
2020-09-12 16:44:57 +08:00
if let " local.scope-inherits " = prop . key . as_ref ( ) {
scope . inherits =
prop . value . as_ref ( ) . map_or ( true , | r | r . as_ref ( ) = = " true " ) ;
2020-09-11 13:14:44 +08:00
}
}
layer . scope_stack . push ( scope ) ;
}
// If the node represents a definition, add a new definition to the
// local scope at the top of the scope stack.
else if Some ( capture . index ) = = layer . config . local_def_capture_index {
reference_highlight = None ;
let scope = layer . scope_stack . last_mut ( ) . unwrap ( ) ;
let mut value_range = 0 .. 0 ;
for capture in match_ . captures {
if Some ( capture . index ) = = layer . config . local_def_value_capture_index {
value_range = capture . node . byte_range ( ) ;
}
}
2021-03-16 12:45:32 +08:00
let name = byte_range_to_str ( range . clone ( ) , self . source ) ;
scope . local_defs . push ( LocalDef {
name ,
value_range ,
highlight : None ,
} ) ;
definition_highlight = scope . local_defs . last_mut ( ) . map ( | s | & mut s . highlight ) ;
2020-09-11 13:14:44 +08:00
}
// If the node represents a reference, then try to find the corresponding
// definition in the scope stack.
2020-09-12 18:36:49 +08:00
else if Some ( capture . index ) = = layer . config . local_ref_capture_index
& & definition_highlight . is_none ( )
{
definition_highlight = None ;
2021-03-16 12:45:32 +08:00
let name = byte_range_to_str ( range . clone ( ) , self . source ) ;
for scope in layer . scope_stack . iter ( ) . rev ( ) {
if let Some ( highlight ) = scope . local_defs . iter ( ) . rev ( ) . find_map ( | def | {
if def . name = = name & & range . start > = def . value_range . end {
Some ( def . highlight )
} else {
None
2020-09-11 13:14:44 +08:00
}
2021-03-16 12:45:32 +08:00
} ) {
reference_highlight = highlight ;
break ;
}
if ! scope . inherits {
break ;
2020-09-11 13:14:44 +08:00
}
}
}
// Continue processing any additional matches for the same node.
if let Some ( ( next_match , next_capture_index ) ) = layer . captures . peek ( ) {
let next_capture = next_match . captures [ * next_capture_index ] ;
if next_capture . node = = capture . node {
capture = next_capture ;
match_ = layer . captures . next ( ) . unwrap ( ) . 0 ;
continue ;
}
}
self . sort_layers ( ) ;
continue 'main ;
}
// Otherwise, this capture must represent a highlight.
// If this exact range has already been highlighted by an earlier pattern, or by
// a different layer, then skip over this one.
if let Some ( ( last_start , last_end , last_depth ) ) = self . last_highlight_range {
if range . start = = last_start & & range . end = = last_end & & layer . depth < last_depth {
self . sort_layers ( ) ;
continue 'main ;
}
}
// If the current node was found to be a local variable, then skip over any
// highlighting patterns that are disabled for local variables.
if definition_highlight . is_some ( ) | | reference_highlight . is_some ( ) {
while layer . config . non_local_variable_patterns [ match_ . pattern_index ] {
if let Some ( ( next_match , next_capture_index ) ) = layer . captures . peek ( ) {
let next_capture = next_match . captures [ * next_capture_index ] ;
if next_capture . node = = capture . node {
capture = next_capture ;
match_ = layer . captures . next ( ) . unwrap ( ) . 0 ;
continue ;
}
}
self . sort_layers ( ) ;
continue 'main ;
}
}
// Once a highlighting pattern is found for the current node, skip over
// any later highlighting patterns that also match this node. Captures
// for a given node are ordered by pattern index, so these subsequent
// captures are guaranteed to be for highlighting, not injections or
// local variables.
while let Some ( ( next_match , next_capture_index ) ) = layer . captures . peek ( ) {
let next_capture = next_match . captures [ * next_capture_index ] ;
if next_capture . node = = capture . node {
layer . captures . next ( ) ;
} else {
break ;
}
}
2021-06-19 19:26:52 +08:00
let current_highlight = layer . config . highlight_indices . load ( ) [ capture . index as usize ] ;
2020-09-11 13:14:44 +08:00
// If this node represents a local definition, then store the current
// highlight value on the local scope entry representing this node.
if let Some ( definition_highlight ) = definition_highlight {
* definition_highlight = current_highlight ;
}
// Emit a scope start event and push the node's end position to the stack.
if let Some ( highlight ) = reference_highlight . or ( current_highlight ) {
self . last_highlight_range = Some ( ( range . start , range . end , layer . depth ) ) ;
layer . highlight_end_stack . push ( range . end ) ;
return self
. emit_event ( range . start , Some ( HighlightEvent ::HighlightStart ( highlight ) ) ) ;
}
self . sort_layers ( ) ;
}
}
}
fn injection_for_match < ' a > (
config : & HighlightConfiguration ,
query : & ' a Query ,
2021-09-06 12:18:16 +08:00
query_match : & QueryMatch < ' a , ' a > ,
2021-03-16 12:45:32 +08:00
source : RopeSlice < ' a > ,
) -> ( Option < Cow < ' a , str > > , Option < Node < ' a > > , bool ) {
2020-09-11 13:14:44 +08:00
let content_capture_index = config . injection_content_capture_index ;
let language_capture_index = config . injection_language_capture_index ;
let mut language_name = None ;
let mut content_node = None ;
for capture in query_match . captures {
let index = Some ( capture . index ) ;
if index = = language_capture_index {
2021-03-16 12:45:32 +08:00
let name = byte_range_to_str ( capture . node . byte_range ( ) , source ) ;
language_name = Some ( name ) ;
2020-09-11 13:14:44 +08:00
} else if index = = content_capture_index {
content_node = Some ( capture . node ) ;
}
}
let mut include_children = false ;
for prop in query . property_settings ( query_match . pattern_index ) {
match prop . key . as_ref ( ) {
// In addition to specifying the language name via the text of a
// captured node, it can also be hard-coded via a `#set!` predicate
// that sets the injection.language key.
" injection.language " = > {
if language_name . is_none ( ) {
2021-03-16 12:45:32 +08:00
language_name = prop . value . as_ref ( ) . map ( | s | s . as_ref ( ) . into ( ) )
2020-09-11 13:14:44 +08:00
}
}
// By default, injections do not include the *children* of an
// `injection.content` node - only the ranges that belong to the
// node itself. This can be changed using a `#set!` predicate that
// sets the `injection.include-children` key.
" injection.include-children " = > include_children = true ,
_ = > { }
}
}
( language_name , content_node , include_children )
}
2021-06-27 18:10:48 +08:00
pub struct Merge < I > {
iter : I ,
spans : Box < dyn Iterator < Item = ( usize , std ::ops ::Range < usize > ) > > ,
next_event : Option < HighlightEvent > ,
next_span : Option < ( usize , std ::ops ::Range < usize > ) > ,
queue : Vec < HighlightEvent > ,
}
/// Merge a list of spans into the highlight event stream.
pub fn merge < I : Iterator < Item = HighlightEvent > > (
iter : I ,
spans : Vec < ( usize , std ::ops ::Range < usize > ) > ,
) -> Merge < I > {
let spans = Box ::new ( spans . into_iter ( ) ) ;
let mut merge = Merge {
iter ,
spans ,
next_event : None ,
next_span : None ,
queue : Vec ::new ( ) ,
} ;
merge . next_event = merge . iter . next ( ) ;
merge . next_span = merge . spans . next ( ) ;
merge
}
impl < I : Iterator < Item = HighlightEvent > > Iterator for Merge < I > {
type Item = HighlightEvent ;
fn next ( & mut self ) -> Option < Self ::Item > {
use HighlightEvent ::* ;
if let Some ( event ) = self . queue . pop ( ) {
return Some ( event ) ;
}
loop {
match ( self . next_event , & self . next_span ) {
// this happens when range is partially or fully offscreen
2021-07-02 03:55:18 +08:00
( Some ( Source { start , .. } ) , Some ( ( span , range ) ) ) if start > range . start = > {
2021-06-27 18:10:48 +08:00
if start > range . end {
self . next_span = self . spans . next ( ) ;
} else {
self . next_span = Some ( ( * span , start .. range . end ) ) ;
} ;
}
_ = > break ,
}
}
match ( self . next_event , & self . next_span ) {
( Some ( HighlightStart ( i ) ) , _ ) = > {
self . next_event = self . iter . next ( ) ;
Some ( HighlightStart ( i ) )
}
( Some ( HighlightEnd ) , _ ) = > {
self . next_event = self . iter . next ( ) ;
Some ( HighlightEnd )
}
2021-07-02 03:55:18 +08:00
( Some ( Source { start , end } ) , Some ( ( _ , range ) ) ) if start < range . start = > {
2021-06-27 18:10:48 +08:00
let intersect = range . start . min ( end ) ;
let event = Source {
start ,
end : intersect ,
} ;
if end = = intersect {
// the event is complete
self . next_event = self . iter . next ( ) ;
} else {
// subslice the event
self . next_event = Some ( Source {
start : intersect ,
end ,
} ) ;
} ;
Some ( event )
}
( Some ( Source { start , end } ) , Some ( ( span , range ) ) ) if start = = range . start = > {
let intersect = range . end . min ( end ) ;
let event = HighlightStart ( Highlight ( * span ) ) ;
// enqueue in reverse order
self . queue . push ( HighlightEnd ) ;
self . queue . push ( Source {
start ,
end : intersect ,
} ) ;
if end = = intersect {
// the event is complete
self . next_event = self . iter . next ( ) ;
} else {
// subslice the event
self . next_event = Some ( Source {
start : intersect ,
end ,
} ) ;
} ;
if intersect = = range . end {
self . next_span = self . spans . next ( ) ;
} else {
self . next_span = Some ( ( * span , intersect .. range . end ) ) ;
}
Some ( event )
}
( Some ( event ) , None ) = > {
self . next_event = self . iter . next ( ) ;
Some ( event )
}
2021-07-02 14:36:09 +08:00
// Can happen if cursor at EOF and/or diagnostic reaches past the end.
// We need to actually emit events for the cursor-at-EOF situation,
// even though the range is past the end of the text. This needs to be
// handled appropriately by the drawing code by not assuming that
// all `Source` events point to valid indices in the rope.
( None , Some ( ( span , range ) ) ) = > {
let event = HighlightStart ( Highlight ( * span ) ) ;
self . queue . push ( HighlightEnd ) ;
self . queue . push ( Source {
start : range . start ,
end : range . end ,
} ) ;
self . next_span = self . spans . next ( ) ;
Some ( event )
2021-06-28 13:50:35 +08:00
}
2021-06-27 18:10:48 +08:00
( None , None ) = > None ,
e = > unreachable! ( " {:?} " , e ) ,
}
}
}
2021-07-02 03:24:22 +08:00
#[ cfg(test) ]
mod test {
use super ::* ;
use crate ::{ Rope , Transaction } ;
2022-02-01 21:43:41 +08:00
#[ test ]
fn test_textobject_queries ( ) {
let query_str = r #"
( line_comment ) + @ quantified_nodes
( ( line_comment ) + ) @ quantified_nodes_grouped
( ( line_comment ) ( line_comment ) ) @ multiple_nodes_grouped
" #;
let source = Rope ::from_str (
r #"
/// a comment on
2022-04-28 03:21:20 +08:00
/// multiple lines
2022-02-01 21:43:41 +08:00
" #,
) ;
let loader = Loader ::new ( Configuration { language : vec ! [ ] } ) ;
2022-02-14 00:42:18 +08:00
let language = get_language ( " Rust " ) . unwrap ( ) ;
2022-02-01 21:43:41 +08:00
let query = Query ::new ( language , query_str ) . unwrap ( ) ;
let textobject = TextObjectQuery { query } ;
let mut cursor = QueryCursor ::new ( ) ;
let config = HighlightConfiguration ::new ( language , " " , " " , " " ) . unwrap ( ) ;
let syntax = Syntax ::new ( & source , Arc ::new ( config ) , Arc ::new ( loader ) ) ;
let root = syntax . tree ( ) . root_node ( ) ;
let mut test = | capture , range | {
let matches : Vec < _ > = textobject
. capture_nodes ( capture , root , source . slice ( .. ) , & mut cursor )
. unwrap ( )
. collect ( ) ;
assert_eq! (
matches [ 0 ] . byte_range ( ) ,
range ,
2022-04-08 23:56:50 +08:00
" @{} expected {:?} " ,
capture ,
range
2022-02-01 21:43:41 +08:00
)
} ;
2022-04-28 03:21:20 +08:00
test ( " quantified_nodes " , 1 .. 36 ) ;
2022-02-01 21:43:41 +08:00
// NOTE: Enable after implementing proper node group capturing
2022-04-28 03:21:20 +08:00
// test("quantified_nodes_grouped", 1..36);
// test("multiple_nodes_grouped", 1..36);
2022-02-01 21:43:41 +08:00
}
2021-07-02 03:24:22 +08:00
#[ test ]
fn test_parser ( ) {
let highlight_names : Vec < String > = [
" attribute " ,
" constant " ,
" function.builtin " ,
" function " ,
" keyword " ,
" operator " ,
" property " ,
" punctuation " ,
" punctuation.bracket " ,
" punctuation.delimiter " ,
" string " ,
" string.special " ,
" tag " ,
" type " ,
" type.builtin " ,
" variable " ,
" variable.builtin " ,
" variable.parameter " ,
]
. iter ( )
. cloned ( )
. map ( String ::from )
. collect ( ) ;
2022-02-16 21:57:20 +08:00
let loader = Loader ::new ( Configuration { language : vec ! [ ] } ) ;
2021-11-06 23:21:03 +08:00
2022-02-16 21:57:20 +08:00
let language = get_language ( " Rust " ) . unwrap ( ) ;
2021-07-02 03:55:18 +08:00
let config = HighlightConfiguration ::new (
2021-07-02 03:24:22 +08:00
language ,
2022-02-15 07:36:02 +08:00
& std ::fs ::read_to_string ( " ../runtime/grammars/sources/rust/queries/highlights.scm " )
. unwrap ( ) ,
& std ::fs ::read_to_string ( " ../runtime/grammars/sources/rust/queries/injections.scm " )
. unwrap ( ) ,
2021-07-02 03:24:22 +08:00
" " , // locals.scm
2020-09-17 13:57:49 +08:00
)
2021-07-02 03:24:22 +08:00
. unwrap ( ) ;
config . configure ( & highlight_names ) ;
let source = Rope ::from_str (
"
struct Stuff { }
fn main ( ) { }
" ,
) ;
2021-11-06 23:21:03 +08:00
let syntax = Syntax ::new ( & source , Arc ::new ( config ) , Arc ::new ( loader ) ) ;
2021-07-02 03:24:22 +08:00
let tree = syntax . tree ( ) ;
let root = tree . root_node ( ) ;
assert_eq! ( root . kind ( ) , " source_file " ) ;
assert_eq! (
root . to_sexp ( ) ,
concat! (
" (source_file " ,
" (struct_item name: (type_identifier) body: (field_declaration_list)) " ,
" (function_item name: (identifier) parameters: (parameters) body: (block))) "
)
) ;
2020-09-17 13:57:49 +08:00
2021-07-02 03:24:22 +08:00
let struct_node = root . child ( 0 ) . unwrap ( ) ;
assert_eq! ( struct_node . kind ( ) , " struct_item " ) ;
}
2020-09-17 13:57:49 +08:00
2021-07-02 03:24:22 +08:00
#[ test ]
fn test_input_edits ( ) {
use tree_sitter ::InputEdit ;
2021-08-24 22:43:05 +08:00
let doc = Rope ::from ( " hello world! \n test 123 " ) ;
2021-07-02 03:24:22 +08:00
let transaction = Transaction ::change (
2021-08-24 22:43:05 +08:00
& doc ,
2021-07-02 03:24:22 +08:00
vec! [ ( 6 , 11 , Some ( " test " . into ( ) ) ) , ( 12 , 17 , None ) ] . into_iter ( ) ,
) ;
2022-01-03 15:20:46 +08:00
let edits = generate_edits ( & doc , transaction . changes ( ) ) ;
2021-07-02 03:24:22 +08:00
// transaction.apply(&mut state);
assert_eq! (
edits ,
& [
InputEdit {
start_byte : 6 ,
old_end_byte : 11 ,
new_end_byte : 10 ,
start_position : Point { row : 0 , column : 6 } ,
old_end_position : Point { row : 0 , column : 11 } ,
new_end_position : Point { row : 0 , column : 10 }
} ,
InputEdit {
start_byte : 12 ,
old_end_byte : 17 ,
new_end_byte : 12 ,
start_position : Point { row : 0 , column : 12 } ,
old_end_position : Point { row : 1 , column : 4 } ,
new_end_position : Point { row : 0 , column : 12 }
}
]
) ;
// Testing with the official example from tree-sitter
2021-08-24 22:43:05 +08:00
let mut doc = Rope ::from ( " fn test() {} " ) ;
2021-07-02 03:24:22 +08:00
let transaction =
2021-08-24 22:43:05 +08:00
Transaction ::change ( & doc , vec! [ ( 8 , 8 , Some ( " a: u32 " . into ( ) ) ) ] . into_iter ( ) ) ;
2022-01-03 15:20:46 +08:00
let edits = generate_edits ( & doc , transaction . changes ( ) ) ;
2021-08-24 22:43:05 +08:00
transaction . apply ( & mut doc ) ;
2021-07-02 03:24:22 +08:00
2021-08-24 22:43:05 +08:00
assert_eq! ( doc , " fn test(a: u32) {} " ) ;
2021-07-02 03:24:22 +08:00
assert_eq! (
edits ,
& [ InputEdit {
start_byte : 8 ,
old_end_byte : 8 ,
new_end_byte : 14 ,
start_position : Point { row : 0 , column : 8 } ,
old_end_position : Point { row : 0 , column : 8 } ,
new_end_position : Point { row : 0 , column : 14 }
} ]
) ;
}
2021-06-06 01:12:59 +08:00
2021-07-02 03:24:22 +08:00
#[ test ]
fn test_load_runtime_file ( ) {
// Test to make sure we can load some data from the runtime directory.
2022-03-30 23:08:07 +08:00
let contents = load_runtime_file ( " rust " , " indents.scm " ) . unwrap ( ) ;
2021-07-02 03:24:22 +08:00
assert! ( ! contents . is_empty ( ) ) ;
2021-06-06 04:24:10 +08:00
2021-07-02 03:24:22 +08:00
let results = load_runtime_file ( " rust " , " does-not-exist " ) ;
assert! ( results . is_err ( ) ) ;
}
2021-06-06 01:12:59 +08:00
}