stdx: Add `RopeSliceExt::(nth_){next,prev}_grapheme_boundary`

These functions mirror those in `helix_core::graphemes` but operate
directly on byte indices rather than character indices. These are meant
to be used as we transition to Ropey v2 and always use byte indices.
pull/13743/head
Michael Davis 2025-06-10 16:00:42 -04:00
parent 362e97e927
commit f5dc8245ea
No known key found for this signature in database
1 changed files with 157 additions and 0 deletions

View File

@ -151,6 +151,88 @@ pub trait RopeSliceExt<'a>: Sized {
/// assert_eq!(graphemes.as_slice(), &["🖼️", "🏴‍☠️", "😶‍🌫️"]);
/// ```
fn graphemes_rev(self) -> RevRopeGraphemes<'a>;
/// Finds the byte index of the next grapheme boundary after `byte_idx`.
///
/// If the byte index lies on the last grapheme cluster in the slice then this function
/// returns `RopeSlice::len_bytes`.
///
/// # Example
///
/// ```
/// # use ropey::Rope;
/// # use helix_stdx::rope::RopeSliceExt;
/// let text = Rope::from_str("😶‍🌫️🏴‍☠️🖼️");
/// let slice = text.slice(..);
/// let mut byte_idx = 0;
/// assert_eq!(slice.graphemes_at(byte_idx).next(), Some("😶‍🌫️".into()));
/// byte_idx = slice.next_grapheme_boundary(byte_idx);
/// assert_eq!(slice.graphemes_at(byte_idx).next(), Some("🏴‍☠️".into()));
///
/// // If `byte_idx` does not lie on a character or grapheme boundary then this function is
/// // functionally the same as `ceil_grapheme_boundary`.
/// assert_eq!(slice.next_grapheme_boundary(byte_idx - 1), byte_idx);
/// assert_eq!(slice.next_grapheme_boundary(byte_idx - 2), byte_idx);
/// assert_eq!(slice.next_grapheme_boundary(byte_idx + 1), slice.next_grapheme_boundary(byte_idx));
/// assert_eq!(slice.next_grapheme_boundary(byte_idx + 2), slice.next_grapheme_boundary(byte_idx));
///
/// byte_idx = slice.next_grapheme_boundary(byte_idx);
/// assert_eq!(slice.graphemes_at(byte_idx).next(), Some("🖼️".into()));
/// byte_idx = slice.next_grapheme_boundary(byte_idx);
/// assert_eq!(slice.graphemes_at(byte_idx).next(), None);
/// assert_eq!(byte_idx, slice.len_bytes());
/// ```
fn next_grapheme_boundary(self, byte_idx: usize) -> usize {
self.nth_next_grapheme_boundary(byte_idx, 1)
}
/// Finds the byte index of the `n`th grapheme cluster after the given `byte_idx`.
///
/// If there are fewer than `n` grapheme clusters after `byte_idx` in the rope then this
/// function returns `RopeSlice::len_bytes`.
///
/// This is functionally equivalent to calling `next_grapheme_boundary` `n` times but is more
/// efficient.
fn nth_next_grapheme_boundary(self, byte_idx: usize, n: usize) -> usize;
/// Finds the byte index of the previous grapheme boundary before `byte_idx`.
///
/// If the byte index lies on the first grapheme cluster in the slice then this function
/// returns zero.
///
/// # Example
///
/// ```
/// # use ropey::Rope;
/// # use helix_stdx::rope::RopeSliceExt;
/// let text = Rope::from_str("😶‍🌫️🏴‍☠️🖼️");
/// let slice = text.slice(..);
/// let mut byte_idx = text.len_bytes();
/// assert_eq!(slice.graphemes_at(byte_idx).prev(), Some("🖼️".into()));
/// byte_idx = slice.prev_grapheme_boundary(byte_idx);
/// assert_eq!(slice.graphemes_at(byte_idx).prev(), Some("🏴‍☠️".into()));
///
/// // If `byte_idx` does not lie on a character or grapheme boundary then this function is
/// // functionally the same as `floor_grapheme_boundary`.
/// assert_eq!(slice.prev_grapheme_boundary(byte_idx + 1), byte_idx);
/// assert_eq!(slice.prev_grapheme_boundary(byte_idx + 2), byte_idx);
/// assert_eq!(slice.prev_grapheme_boundary(byte_idx - 1), slice.prev_grapheme_boundary(byte_idx));
/// assert_eq!(slice.prev_grapheme_boundary(byte_idx - 2), slice.prev_grapheme_boundary(byte_idx));
///
/// byte_idx = slice.prev_grapheme_boundary(byte_idx);
/// assert_eq!(slice.graphemes_at(byte_idx).prev(), Some("😶‍🌫️".into()));
/// byte_idx = slice.prev_grapheme_boundary(byte_idx);
/// assert_eq!(slice.graphemes_at(byte_idx).prev(), None);
/// assert_eq!(byte_idx, 0);
/// ```
fn prev_grapheme_boundary(self, byte_idx: usize) -> usize {
self.nth_prev_grapheme_boundary(byte_idx, 1)
}
/// Finds the byte index of the `n`th grapheme cluster before the given `byte_idx`.
///
/// If there are fewer than `n` grapheme clusters before `byte_idx` in the rope then this
/// function returns zero.
///
/// This is functionally equivalent to calling `prev_grapheme_boundary` `n` times but is more
/// efficient.
fn nth_prev_grapheme_boundary(self, byte_idx: usize, n: usize) -> usize;
}
impl<'a> RopeSliceExt<'a> for RopeSlice<'a> {
@ -360,6 +442,81 @@ impl<'a> RopeSliceExt<'a> for RopeSlice<'a> {
cursor: GraphemeCursor::new(self.len_bytes(), self.len_bytes(), true),
}
}
fn nth_next_grapheme_boundary(self, mut byte_idx: usize, n: usize) -> usize {
// Bounds check
assert!(byte_idx <= self.len_bytes());
byte_idx = self.floor_char_boundary(byte_idx);
// Get the chunk with our byte index in it.
let (mut chunk, mut chunk_byte_idx, _, _) = self.chunk_at_byte(byte_idx);
// Set up the grapheme cursor.
let mut gc = GraphemeCursor::new(byte_idx, self.len_bytes(), true);
// Find the nth next grapheme cluster boundary.
for _ in 0..n {
loop {
match gc.next_boundary(chunk, chunk_byte_idx) {
Ok(None) => return self.len_bytes(),
Ok(Some(boundary)) => {
byte_idx = boundary;
break;
}
Err(GraphemeIncomplete::NextChunk) => {
chunk_byte_idx += chunk.len();
let (a, _, _, _) = self.chunk_at_byte(chunk_byte_idx);
chunk = a;
}
Err(GraphemeIncomplete::PreContext(n)) => {
let ctx_chunk = self.chunk_at_byte(n - 1).0;
gc.provide_context(ctx_chunk, n - ctx_chunk.len());
}
_ => unreachable!(),
}
}
}
byte_idx
}
fn nth_prev_grapheme_boundary(self, mut byte_idx: usize, n: usize) -> usize {
// Bounds check
assert!(byte_idx <= self.len_bytes());
byte_idx = self.ceil_char_boundary(byte_idx);
// Get the chunk with our byte index in it.
let (mut chunk, mut chunk_byte_idx, _, _) = self.chunk_at_byte(byte_idx);
// Set up the grapheme cursor.
let mut gc = GraphemeCursor::new(byte_idx, self.len_bytes(), true);
for _ in 0..n {
loop {
match gc.prev_boundary(chunk, chunk_byte_idx) {
Ok(None) => return 0,
Ok(Some(boundary)) => {
byte_idx = boundary;
break;
}
Err(GraphemeIncomplete::PrevChunk) => {
let (a, b, _, _) = self.chunk_at_byte(chunk_byte_idx - 1);
chunk = a;
chunk_byte_idx = b;
}
Err(GraphemeIncomplete::PreContext(n)) => {
let ctx_chunk = self.chunk_at_byte(n - 1).0;
gc.provide_context(ctx_chunk, n - ctx_chunk.len());
}
_ => unreachable!(),
}
}
}
byte_idx
}
}
// copied from std