helix/helix-core/src/case_conversion.rs

351 lines
11 KiB
Rust
Raw Normal View History

2024-02-23 04:47:55 +08:00
use crate::Tendril;
// todo: should this be grapheme aware?
2024-12-20 19:32:14 +08:00
pub fn simple_case_conversion(
2024-12-20 19:25:27 +08:00
text: impl Iterator<Item = char>,
buf: &mut Tendril,
transform_char: impl Fn(&char) -> char,
) {
for c in text {
2024-12-20 19:25:27 +08:00
buf.push(transform_char(&c))
}
}
2025-01-12 01:04:04 +08:00
pub fn smart_case_conversion(
2025-01-12 01:43:57 +08:00
chars: impl Iterator<Item = char>,
2024-12-20 18:43:31 +08:00
buf: &mut Tendril,
capitalize_first: bool,
separator: Option<char>,
2024-12-20 18:43:31 +08:00
) {
2024-12-20 20:26:05 +08:00
let mut should_capitalize_current = capitalize_first;
let mut prev: Option<char> = None;
2024-12-20 18:43:31 +08:00
2025-01-12 01:43:57 +08:00
for current in chars.skip_while(|ch| ch.is_whitespace()) {
let mut maybe_add_separator = || {
if let Some(separator) = separator {
2025-01-12 01:43:57 +08:00
// We do not want to add a separator when the previous char is not a separator
// For example, snake__case is invalid
if prev.is_some_and(|ch| ch != separator) {
buf.push(separator);
}
}
2025-01-12 01:43:57 +08:00
};
if current.is_alphanumeric() {
// "camelCase" => transition at 'l' -> 'C'
let has_camel_transition =
current.is_uppercase() && prev.is_some_and(|ch| ch.is_lowercase());
if has_camel_transition {
maybe_add_separator();
2024-12-20 20:26:05 +08:00
should_capitalize_current = true;
2024-12-20 18:43:31 +08:00
}
2024-12-20 20:26:05 +08:00
if should_capitalize_current {
2025-01-12 01:43:57 +08:00
buf.push(current.to_ascii_uppercase());
2024-12-20 20:26:05 +08:00
should_capitalize_current = false;
2024-12-20 18:43:31 +08:00
} else {
2025-01-12 01:43:57 +08:00
buf.push(current.to_ascii_lowercase());
2024-12-20 18:43:31 +08:00
}
} else {
2024-12-20 20:26:05 +08:00
should_capitalize_current = true;
2025-01-12 01:43:57 +08:00
maybe_add_separator();
}
2025-01-12 01:43:57 +08:00
prev = Some(current);
}
2024-12-20 20:04:48 +08:00
*buf = buf.trim_end().into();
}
2024-12-20 19:32:14 +08:00
pub fn separator_case_conversion(
text: impl Iterator<Item = char>,
buf: &mut Tendril,
separator: char,
) {
2024-12-20 19:25:27 +08:00
let mut prev: Option<char> = None;
2024-12-20 18:33:59 +08:00
for c in text.skip_while(|ch| ch.is_whitespace()) {
if !c.is_alphanumeric() {
prev = Some(c);
continue;
}
// "camelCase" => transition at 'l' -> 'C'
let has_camel_transition = prev.is_some_and(|p| p.is_lowercase()) && c.is_uppercase();
// "email@somewhere" => transition at 'l' -> '@'
// first character must not be separator, e.g. @emailSomewhere should not become -email-somewhere
let has_alphanum_transition = !prev.is_some_and(|p| p.is_alphanumeric()) && !buf.is_empty();
if has_camel_transition || has_alphanum_transition {
buf.push(separator);
}
buf.push(c.to_ascii_lowercase());
2024-12-20 19:25:27 +08:00
prev = Some(c);
}
2024-12-20 19:25:27 +08:00
}
2024-12-20 19:32:14 +08:00
pub fn into_alternate_case(text: impl Iterator<Item = char>, buf: &mut Tendril) {
simple_case_conversion(text, buf, |c| {
2024-12-20 19:25:27 +08:00
if c.is_uppercase() {
c.to_ascii_lowercase()
} else if c.is_lowercase() {
c.to_ascii_uppercase()
} else {
*c
}
});
}
pub fn into_uppercase(text: impl Iterator<Item = char>, buf: &mut Tendril) {
2024-12-20 19:32:14 +08:00
simple_case_conversion(text, buf, char::to_ascii_uppercase);
2024-12-20 19:25:27 +08:00
}
pub fn into_lowercase(text: impl Iterator<Item = char>, buf: &mut Tendril) {
2024-12-20 19:32:14 +08:00
simple_case_conversion(text, buf, char::to_ascii_lowercase);
2024-12-20 15:47:13 +08:00
}
2024-12-20 19:32:14 +08:00
pub fn into_kebab_case(text: impl Iterator<Item = char>, buf: &mut Tendril) {
separator_case_conversion(text, buf, '-');
}
2024-12-20 19:32:14 +08:00
pub fn into_snake_case(text: impl Iterator<Item = char>, buf: &mut Tendril) {
separator_case_conversion(text, buf, '_');
}
2024-12-20 19:32:14 +08:00
pub fn into_title_case(text: impl Iterator<Item = char>, buf: &mut Tendril) {
2025-01-12 01:04:04 +08:00
smart_case_conversion(text, buf, true, Some(' '));
}
2024-12-20 19:32:14 +08:00
pub fn into_camel_case(text: impl Iterator<Item = char>, buf: &mut Tendril) {
2025-01-12 01:04:04 +08:00
smart_case_conversion(text, buf, false, None);
}
2024-12-20 19:32:14 +08:00
pub fn into_pascal_case(text: impl Iterator<Item = char>, buf: &mut Tendril) {
2025-01-12 01:04:04 +08:00
smart_case_conversion(text, buf, true, None);
2024-12-20 19:25:27 +08:00
}
fn to_case<I>(text: I, to_case_with: fn(I, &mut Tendril)) -> Tendril
where
I: Iterator<Item = char>,
{
let mut res = Tendril::new();
to_case_with(text, &mut res);
res
}
pub fn to_camel_case(text: impl Iterator<Item = char>) -> Tendril {
2024-12-20 19:32:14 +08:00
to_case(text, into_camel_case)
2024-12-20 19:25:27 +08:00
}
pub fn to_lowercase(text: impl Iterator<Item = char>) -> Tendril {
to_case(text, into_lowercase)
2024-12-20 19:25:27 +08:00
}
pub fn to_uppercase(text: impl Iterator<Item = char>) -> Tendril {
to_case(text, into_uppercase)
2024-12-20 19:25:27 +08:00
}
pub fn to_pascal_case(text: impl Iterator<Item = char>) -> Tendril {
2024-12-20 19:32:14 +08:00
to_case(text, into_pascal_case)
2024-12-20 19:25:27 +08:00
}
pub fn to_alternate_case(text: impl Iterator<Item = char>) -> Tendril {
2024-12-20 19:32:14 +08:00
to_case(text, into_alternate_case)
2024-12-20 19:25:27 +08:00
}
pub fn to_title_case(text: impl Iterator<Item = char>) -> Tendril {
2024-12-20 19:32:14 +08:00
to_case(text, into_title_case)
2024-12-20 19:25:27 +08:00
}
pub fn to_kebab_case(text: impl Iterator<Item = char>) -> Tendril {
2024-12-20 19:32:14 +08:00
to_case(text, into_kebab_case)
2024-12-20 19:25:27 +08:00
}
pub fn to_snake_case(text: impl Iterator<Item = char>) -> Tendril {
2024-12-20 19:32:14 +08:00
to_case(text, into_snake_case)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_camel_case_conversion() {
let tests = [
("hello world", "helloWorld"),
("Hello World", "helloWorld"),
("hello_world", "helloWorld"),
("HELLO_WORLD", "helloWorld"),
("hello-world", "helloWorld"),
("hello world", "helloWorld"),
(" hello world", "helloWorld"),
("hello\tworld", "helloWorld"),
("HELLO WORLD", "helloWorld"),
("HELLO-world", "helloWorld"),
("hello WORLD ", "helloWorld"),
("helloWorld", "helloWorld"),
];
for (input, expected) in tests {
assert_eq!(to_camel_case(input.chars()), expected)
}
}
#[test]
fn test_lower_case_conversion() {
let tests = [
("HelloWorld", "helloworld"),
("HELLO WORLD", "hello world"),
("hello_world", "hello_world"),
("Hello-World", "hello-world"),
("Hello", "hello"),
("WORLD", "world"),
("hello world", "hello world"),
("HELLOworld", "helloworld"),
("hello-world", "hello-world"),
("hello_world_here", "hello_world_here"),
("HELLO_world", "hello_world"),
("MixEdCaseString", "mixedcasestring"),
];
for (input, expected) in tests {
assert_eq!(to_lowercase(input.chars()), expected)
}
}
#[test]
fn test_upper_case_conversion() {
let tests = [
("helloWorld", "HELLOWORLD"),
("hello world", "HELLO WORLD"),
("hello_world", "HELLO_WORLD"),
("Hello-World", "HELLO-WORLD"),
("Hello", "HELLO"),
("world", "WORLD"),
("hello world", "HELLO WORLD"),
("helloworld", "HELLOWORLD"),
("hello-world", "HELLO-WORLD"),
("hello_world_here", "HELLO_WORLD_HERE"),
("hello_WORLD", "HELLO_WORLD"),
("mixedCaseString", "MIXEDCASESTRING"),
];
for (input, expected) in tests {
assert_eq!(to_uppercase(input.chars()), expected)
}
}
#[test]
fn test_pascal_case_conversion() {
let tests = [
("hello world", "HelloWorld"),
("Hello World", "HelloWorld"),
("hello_world", "HelloWorld"),
("HELLO_WORLD", "HelloWorld"),
("hello-world", "HelloWorld"),
("hello world", "HelloWorld"),
(" hello world", "HelloWorld"),
("hello\tworld", "HelloWorld"),
("HELLO WORLD", "HelloWorld"),
("HELLO-world", "HelloWorld"),
("hello WORLD ", "HelloWorld"),
("helloWorld", "HelloWorld"),
];
for (input, expected) in tests {
assert_eq!(to_pascal_case(input.chars()), expected)
}
}
#[test]
fn test_alternate_case_conversion() {
let tests = [
("hello world", "HELLO WORLD"),
("Hello World", "hELLO wORLD"),
("helLo_woRlD", "HELlO_WOrLd"),
("HELLO_world", "hello_WORLD"),
("hello-world", "HELLO-WORLD"),
("Hello-world", "hELLO-WORLD"),
("hello", "HELLO"),
("HELLO", "hello"),
("hello123", "HELLO123"),
("hello WORLD", "HELLO world"),
("HELLO123 world", "hello123 WORLD"),
("world hello", "WORLD HELLO"),
];
for (input, expected) in tests {
assert_eq!(to_alternate_case(input.chars()), expected)
}
}
#[test]
fn test_title_case_conversion() {
let tests = [
("hello world", "Hello World"),
("Hello World", "Hello World"),
("hello_world", "Hello World"),
("HELLO_WORLD", "Hello World"),
("hello-world", "Hello World"),
("hello world", "Hello World"),
(" hello world", "Hello World"),
("hello\tworld", "Hello World"),
("HELLO WORLD", "Hello World"),
("HELLO-world", "Hello World"),
2024-12-20 20:04:48 +08:00
("hello WORLD ", "Hello World"),
("helloWorld", "Hello World"),
];
for (input, expected) in tests {
2024-12-20 20:04:48 +08:00
dbg!(input);
assert_eq!(to_title_case(input.chars()), expected)
}
}
#[test]
fn test_kebab_case_conversion() {
let tests = [
("helloWorld", "hello-world"),
("HelloWorld", "hello-world"),
("hello_world", "hello-world"),
("HELLO_WORLD", "hello-world"),
("hello-world", "hello-world"),
("hello world", "hello-world"),
("hello\tworld", "hello-world"),
("HELLO WORLD", "hello-world"),
("HELLO-world", "hello-world"),
("hello WORLD ", "hello-world"),
("helloWorld", "hello-world"),
("HelloWorld123", "hello-world123"),
];
for (input, expected) in tests {
assert_eq!(to_kebab_case(input.chars()), expected)
}
}
#[test]
fn test_snake_case_conversion() {
let tests = [
("helloWorld", "hello_world"),
("HelloWorld", "hello_world"),
("hello world", "hello_world"),
("HELLO WORLD", "hello_world"),
("hello-world", "hello_world"),
("hello world", "hello_world"),
("hello\tworld", "hello_world"),
("HELLO WORLD", "hello_world"),
("HELLO-world", "hello_world"),
("hello WORLD ", "hello_world"),
("helloWorld", "hello_world"),
("helloWORLD123", "hello_world123"),
];
for (input, expected) in tests {
assert_eq!(to_snake_case(input.chars()), expected)
}
}
2024-02-23 04:47:55 +08:00
}