syntax: Fix language detection by shebang

The switch to tree-house accidentally dropped some shebang parsing code
from the loader's function to detect by shebang. This change restores
that. The new code is slightly different as it's using a `regex_cursor`
regex on the Rope rather than eagerly converting the text to a
`Cow<str>` and running a regular regex across it.
pull/12759/merge
Michael Davis 2025-05-14 16:29:27 -04:00
parent 6be38642f4
commit 09bc67ad6d
No known key found for this signature in database
1 changed files with 17 additions and 2 deletions

View File

@ -312,7 +312,22 @@ impl Loader {
} }
pub fn language_for_shebang(&self, text: RopeSlice) -> Option<Language> { pub fn language_for_shebang(&self, text: RopeSlice) -> Option<Language> {
let shebang: Cow<str> = text.into(); // NOTE: this is slightly different than the one for injection markers in tree-house. It
// is anchored at the beginning.
use helix_stdx::rope::Regex;
use once_cell::sync::Lazy;
const SHEBANG: &str = r"^#!\s*(?:\S*[/\\](?:env\s+(?:\-\S+\s+)*)?)?([^\s\.\d]+)";
static SHEBANG_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(SHEBANG).unwrap());
let marker = SHEBANG_REGEX
.captures_iter(regex_cursor::Input::new(text))
.map(|cap| text.byte_slice(cap.get_group(1).unwrap().range()))
.next()?;
self.language_for_shebang_marker(marker)
}
fn language_for_shebang_marker(&self, marker: RopeSlice) -> Option<Language> {
let shebang: Cow<str> = marker.into();
self.languages_by_shebang.get(shebang.as_ref()).copied() self.languages_by_shebang.get(shebang.as_ref()).copied()
} }
@ -351,7 +366,7 @@ impl LanguageLoader for Loader {
let path: Cow<str> = text.into(); let path: Cow<str> = text.into();
self.language_for_filename(Path::new(path.as_ref())) self.language_for_filename(Path::new(path.as_ref()))
} }
InjectionLanguageMarker::Shebang(text) => self.language_for_shebang(text), InjectionLanguageMarker::Shebang(text) => self.language_for_shebang_marker(text),
} }
} }