From 09bc67ad6d1bfa4368b065719e615d540f5f8dd7 Mon Sep 17 00:00:00 2001 From: Michael Davis Date: Wed, 14 May 2025 16:29:27 -0400 Subject: [PATCH] syntax: Fix language detection by shebang The switch to tree-house accidentally dropped some shebang parsing code from the loader's function to detect by shebang. This change restores that. The new code is slightly different as it's using a `regex_cursor` regex on the Rope rather than eagerly converting the text to a `Cow` and running a regular regex across it. --- helix-core/src/syntax.rs | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs index e232ee69b..86bb17b3d 100644 --- a/helix-core/src/syntax.rs +++ b/helix-core/src/syntax.rs @@ -312,7 +312,22 @@ impl Loader { } pub fn language_for_shebang(&self, text: RopeSlice) -> Option { - let shebang: Cow = text.into(); + // NOTE: this is slightly different than the one for injection markers in tree-house. It + // is anchored at the beginning. + use helix_stdx::rope::Regex; + use once_cell::sync::Lazy; + const SHEBANG: &str = r"^#!\s*(?:\S*[/\\](?:env\s+(?:\-\S+\s+)*)?)?([^\s\.\d]+)"; + static SHEBANG_REGEX: Lazy = Lazy::new(|| Regex::new(SHEBANG).unwrap()); + + let marker = SHEBANG_REGEX + .captures_iter(regex_cursor::Input::new(text)) + .map(|cap| text.byte_slice(cap.get_group(1).unwrap().range())) + .next()?; + self.language_for_shebang_marker(marker) + } + + fn language_for_shebang_marker(&self, marker: RopeSlice) -> Option { + let shebang: Cow = marker.into(); self.languages_by_shebang.get(shebang.as_ref()).copied() } @@ -351,7 +366,7 @@ impl LanguageLoader for Loader { let path: Cow = text.into(); self.language_for_filename(Path::new(path.as_ref())) } - InjectionLanguageMarker::Shebang(text) => self.language_for_shebang(text), + InjectionLanguageMarker::Shebang(text) => self.language_for_shebang_marker(text), } }