helix/helix-core/src/case_conversion.rs

330 lines
9.8 KiB
Rust
Raw Normal View History

2024-02-23 04:47:55 +08:00
use crate::Tendril;
// todo: should this be grapheme aware?
2024-12-20 19:37:14 +08:00
/// Converts each character into a different one, with zero context about surrounding characters
2024-12-20 19:32:14 +08:00
pub fn simple_case_conversion(
2024-12-20 19:25:27 +08:00
text: impl Iterator<Item = char>,
buf: &mut Tendril,
transform_char: impl Fn(&char) -> char,
) {
for c in text {
2024-12-20 19:25:27 +08:00
buf.push(transform_char(&c))
}
}
2024-12-20 19:32:14 +08:00
pub fn complex_case_conversion(
2024-12-20 18:43:31 +08:00
text: impl Iterator<Item = char>,
buf: &mut Tendril,
capitalize_first: bool,
separator: Option<char>,
2024-12-20 18:43:31 +08:00
) {
let mut capitalize_next = capitalize_first;
let mut prev: Option<char> = None;
2024-12-20 18:43:31 +08:00
for c in text.skip_while(|ch| ch.is_whitespace()) {
if c.is_alphanumeric() {
if prev.is_some_and(|p| p.is_lowercase()) && c.is_uppercase() {
2024-12-20 18:43:31 +08:00
capitalize_next = true;
}
2024-12-20 19:37:14 +08:00
if capitalize_next {
buf.push(c.to_ascii_uppercase());
2024-12-20 18:43:31 +08:00
capitalize_next = false;
} else {
buf.extend(c.to_lowercase());
}
} else {
capitalize_next = true;
if let Some(separator) = separator {
if prev.is_some_and(|p| p != separator) {
buf.push(separator);
}
2024-12-20 18:41:02 +08:00
}
}
2024-12-20 18:41:02 +08:00
prev = Some(c);
}
}
2024-12-20 19:32:14 +08:00
pub fn separator_case_conversion(
text: impl Iterator<Item = char>,
buf: &mut Tendril,
separator: char,
) {
2024-12-20 19:25:27 +08:00
let mut prev: Option<char> = None;
2024-12-20 18:33:59 +08:00
for c in text.skip_while(|ch| ch.is_whitespace()) {
if c.is_alphanumeric() {
2024-12-20 19:25:27 +08:00
if prev.is_some_and(|p| p.is_lowercase()) && c.is_uppercase()
|| !prev.is_some_and(|p| p.is_alphanumeric()) && !buf.is_empty()
{
buf.push(separator);
}
buf.push(c.to_ascii_lowercase());
}
2024-12-20 19:25:27 +08:00
prev = Some(c);
}
2024-12-20 19:25:27 +08:00
}
2024-12-20 19:32:14 +08:00
pub fn into_alternate_case(text: impl Iterator<Item = char>, buf: &mut Tendril) {
simple_case_conversion(text, buf, |c| {
2024-12-20 19:25:27 +08:00
if c.is_uppercase() {
c.to_ascii_lowercase()
} else if c.is_lowercase() {
c.to_ascii_uppercase()
} else {
*c
}
});
}
2024-12-20 19:32:14 +08:00
pub fn into_upper_case(text: impl Iterator<Item = char>, buf: &mut Tendril) {
simple_case_conversion(text, buf, char::to_ascii_uppercase);
2024-12-20 19:25:27 +08:00
}
2024-12-20 19:32:14 +08:00
pub fn into_lower_case(text: impl Iterator<Item = char>, buf: &mut Tendril) {
simple_case_conversion(text, buf, char::to_ascii_lowercase);
2024-12-20 15:47:13 +08:00
}
2024-12-20 19:32:14 +08:00
pub fn into_kebab_case(text: impl Iterator<Item = char>, buf: &mut Tendril) {
separator_case_conversion(text, buf, '-');
}
2024-12-20 19:32:14 +08:00
pub fn into_snake_case(text: impl Iterator<Item = char>, buf: &mut Tendril) {
separator_case_conversion(text, buf, '_');
}
2024-12-20 19:32:14 +08:00
pub fn into_title_case(text: impl Iterator<Item = char>, buf: &mut Tendril) {
2024-12-20 19:37:14 +08:00
complex_case_conversion(text, buf, true, Some(' '));
}
2024-12-20 19:32:14 +08:00
pub fn into_camel_case(text: impl Iterator<Item = char>, buf: &mut Tendril) {
2024-12-20 19:37:14 +08:00
complex_case_conversion(text, buf, false, None);
}
2024-12-20 19:32:14 +08:00
pub fn into_pascal_case(text: impl Iterator<Item = char>, buf: &mut Tendril) {
2024-12-20 19:37:14 +08:00
complex_case_conversion(text, buf, true, None);
2024-12-20 19:25:27 +08:00
}
fn to_case<I>(text: I, to_case_with: fn(I, &mut Tendril)) -> Tendril
where
I: Iterator<Item = char>,
{
let mut res = Tendril::new();
to_case_with(text, &mut res);
res
}
pub fn to_camel_case(text: impl Iterator<Item = char>) -> Tendril {
2024-12-20 19:32:14 +08:00
to_case(text, into_camel_case)
2024-12-20 19:25:27 +08:00
}
pub fn to_lower_case(text: impl Iterator<Item = char>) -> Tendril {
2024-12-20 19:32:14 +08:00
to_case(text, into_lower_case)
2024-12-20 19:25:27 +08:00
}
pub fn to_upper_case(text: impl Iterator<Item = char>) -> Tendril {
2024-12-20 19:32:14 +08:00
to_case(text, into_upper_case)
2024-12-20 19:25:27 +08:00
}
pub fn to_pascal_case(text: impl Iterator<Item = char>) -> Tendril {
2024-12-20 19:32:14 +08:00
to_case(text, into_pascal_case)
2024-12-20 19:25:27 +08:00
}
pub fn to_alternate_case(text: impl Iterator<Item = char>) -> Tendril {
2024-12-20 19:32:14 +08:00
to_case(text, into_alternate_case)
2024-12-20 19:25:27 +08:00
}
pub fn to_title_case(text: impl Iterator<Item = char>) -> Tendril {
2024-12-20 19:32:14 +08:00
to_case(text, into_title_case)
2024-12-20 19:25:27 +08:00
}
pub fn to_kebab_case(text: impl Iterator<Item = char>) -> Tendril {
2024-12-20 19:32:14 +08:00
to_case(text, into_kebab_case)
2024-12-20 19:25:27 +08:00
}
pub fn to_snake_case(text: impl Iterator<Item = char>) -> Tendril {
2024-12-20 19:32:14 +08:00
to_case(text, into_snake_case)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_camel_case_conversion() {
let tests = [
("hello world", "helloWorld"),
("Hello World", "helloWorld"),
("hello_world", "helloWorld"),
("HELLO_WORLD", "helloWorld"),
("hello-world", "helloWorld"),
("hello world", "helloWorld"),
(" hello world", "helloWorld"),
("hello\tworld", "helloWorld"),
("HELLO WORLD", "helloWorld"),
("HELLO-world", "helloWorld"),
("hello WORLD ", "helloWorld"),
("helloWorld", "helloWorld"),
];
for (input, expected) in tests {
assert_eq!(to_camel_case(input.chars()), expected)
}
}
#[test]
fn test_lower_case_conversion() {
let tests = [
("HelloWorld", "helloworld"),
("HELLO WORLD", "hello world"),
("hello_world", "hello_world"),
("Hello-World", "hello-world"),
("Hello", "hello"),
("WORLD", "world"),
("hello world", "hello world"),
("HELLOworld", "helloworld"),
("hello-world", "hello-world"),
("hello_world_here", "hello_world_here"),
("HELLO_world", "hello_world"),
("MixEdCaseString", "mixedcasestring"),
];
for (input, expected) in tests {
assert_eq!(to_lower_case(input.chars()), expected)
}
}
#[test]
fn test_upper_case_conversion() {
let tests = [
("helloWorld", "HELLOWORLD"),
("hello world", "HELLO WORLD"),
("hello_world", "HELLO_WORLD"),
("Hello-World", "HELLO-WORLD"),
("Hello", "HELLO"),
("world", "WORLD"),
("hello world", "HELLO WORLD"),
("helloworld", "HELLOWORLD"),
("hello-world", "HELLO-WORLD"),
("hello_world_here", "HELLO_WORLD_HERE"),
("hello_WORLD", "HELLO_WORLD"),
("mixedCaseString", "MIXEDCASESTRING"),
];
for (input, expected) in tests {
assert_eq!(to_upper_case(input.chars()), expected)
}
}
#[test]
fn test_pascal_case_conversion() {
let tests = [
("hello world", "HelloWorld"),
("Hello World", "HelloWorld"),
("hello_world", "HelloWorld"),
("HELLO_WORLD", "HelloWorld"),
("hello-world", "HelloWorld"),
("hello world", "HelloWorld"),
(" hello world", "HelloWorld"),
("hello\tworld", "HelloWorld"),
("HELLO WORLD", "HelloWorld"),
("HELLO-world", "HelloWorld"),
("hello WORLD ", "HelloWorld"),
("helloWorld", "HelloWorld"),
];
for (input, expected) in tests {
assert_eq!(to_pascal_case(input.chars()), expected)
}
}
#[test]
fn test_alternate_case_conversion() {
let tests = [
("hello world", "HELLO WORLD"),
("Hello World", "hELLO wORLD"),
("helLo_woRlD", "HELlO_WOrLd"),
("HELLO_world", "hello_WORLD"),
("hello-world", "HELLO-WORLD"),
("Hello-world", "hELLO-WORLD"),
("hello", "HELLO"),
("HELLO", "hello"),
("hello123", "HELLO123"),
("hello WORLD", "HELLO world"),
("HELLO123 world", "hello123 WORLD"),
("world hello", "WORLD HELLO"),
];
for (input, expected) in tests {
assert_eq!(to_alternate_case(input.chars()), expected)
}
}
#[test]
fn test_title_case_conversion() {
let tests = [
("hello world", "Hello World"),
("Hello World", "Hello World"),
("hello_world", "Hello World"),
("HELLO_WORLD", "Hello World"),
("hello-world", "Hello World"),
("hello world", "Hello World"),
(" hello world", "Hello World"),
("hello\tworld", "Hello World"),
// ("HELLO WORLD", "Hello World"),
("HELLO-world", "Hello World"),
// ("hello WORLD ", "Hello World"),
// ("helloWorld", "Hello World"),
];
for (input, expected) in tests {
assert_eq!(to_title_case(input.chars()), expected)
}
}
#[test]
fn test_kebab_case_conversion() {
let tests = [
("helloWorld", "hello-world"),
("HelloWorld", "hello-world"),
("hello_world", "hello-world"),
("HELLO_WORLD", "hello-world"),
("hello-world", "hello-world"),
("hello world", "hello-world"),
("hello\tworld", "hello-world"),
("HELLO WORLD", "hello-world"),
("HELLO-world", "hello-world"),
("hello WORLD ", "hello-world"),
("helloWorld", "hello-world"),
("HelloWorld123", "hello-world123"),
];
for (input, expected) in tests {
assert_eq!(to_kebab_case(input.chars()), expected)
}
}
#[test]
fn test_snake_case_conversion() {
let tests = [
("helloWorld", "hello_world"),
("HelloWorld", "hello_world"),
("hello world", "hello_world"),
("HELLO WORLD", "hello_world"),
("hello-world", "hello_world"),
("hello world", "hello_world"),
("hello\tworld", "hello_world"),
("HELLO WORLD", "hello_world"),
("HELLO-world", "hello_world"),
("hello WORLD ", "hello_world"),
("helloWorld", "hello_world"),
("helloWORLD123", "hello_world123"),
];
for (input, expected) in tests {
assert_eq!(to_snake_case(input.chars()), expected)
}
}
2024-02-23 04:47:55 +08:00
}