From ab9e36069b74c6241c66ca78e775e69bf4820830 Mon Sep 17 00:00:00 2001 From: Michael Davis Date: Tue, 10 Jun 2025 20:55:47 -0400 Subject: [PATCH] stdx: Unify RopeSlice grapheme cluster iterators This style for RopeGraphemes is identical to Ropey's Chars and Bytes iterators. Being able to move the iterator types like cursors over the bytes/chars/graphemes is useful in some cases. For example see `helix_core::movement::::range_to_target`. This change also adds `RopeSliceExt::graphemes_at` for flexibility. `graphemes` and `graphemes_rev` are now implemented in terms of `graphemes_at` and `RopeGraphemes::reversed`. --- helix-stdx/src/rope.rs | 261 ++++++++++++++++++++++------------------- 1 file changed, 140 insertions(+), 121 deletions(-) diff --git a/helix-stdx/src/rope.rs b/helix-stdx/src/rope.rs index 99ad79be2..d881867e6 100644 --- a/helix-stdx/src/rope.rs +++ b/helix-stdx/src/rope.rs @@ -135,7 +135,9 @@ pub trait RopeSliceExt<'a>: Sized { /// let graphemes: Vec<_> = text.graphemes().collect(); /// assert_eq!(graphemes.as_slice(), &["πŸ˜Άβ€πŸŒ«οΈ", "πŸ΄β€β˜ οΈ", "πŸ–ΌοΈ"]); /// ``` - fn graphemes(self) -> RopeGraphemes<'a>; + fn graphemes(self) -> RopeGraphemes<'a> { + self.graphemes_at(0) + } /// Returns an iterator over the grapheme clusters in the slice, reversed. /// /// The returned iterator starts at the end of the slice and ends at the beginning of the @@ -150,7 +152,23 @@ pub trait RopeSliceExt<'a>: Sized { /// let graphemes: Vec<_> = text.graphemes_rev().collect(); /// assert_eq!(graphemes.as_slice(), &["πŸ–ΌοΈ", "πŸ΄β€β˜ οΈ", "πŸ˜Άβ€πŸŒ«οΈ"]); /// ``` - fn graphemes_rev(self) -> RevRopeGraphemes<'a>; + fn graphemes_rev(self) -> RopeGraphemes<'a>; + /// Returns an iterator over the grapheme clusters in the slice at the given byte index. + /// + /// # Example + /// + /// ``` + /// # use ropey::Rope; + /// # use helix_stdx::rope::RopeSliceExt; + /// let text = Rope::from_str("πŸ˜Άβ€πŸŒ«οΈπŸ΄β€β˜ οΈπŸ–ΌοΈ"); + /// // 14 is the byte index of the pirate flag's starting cluster boundary. + /// let graphemes: Vec<_> = text.slice(..).graphemes_at(14).collect(); + /// assert_eq!(graphemes.as_slice(), &["πŸ΄β€β˜ οΈ", "πŸ–ΌοΈ"]); + /// // 27 is the byte index of the pirate flag's ending cluster boundary. + /// let graphemes: Vec<_> = text.slice(..).graphemes_at(27).reversed().collect(); + /// assert_eq!(graphemes.as_slice(), &["πŸ΄β€β˜ οΈ", "πŸ˜Άβ€πŸŒ«οΈ"]); + /// ``` + fn graphemes_at(self, byte_idx: usize) -> RopeGraphemes<'a>; /// Finds the byte index of the next grapheme boundary after `byte_idx`. /// /// If the byte index lies on the last grapheme cluster in the slice then this function @@ -401,29 +419,21 @@ impl<'a> RopeSliceExt<'a> for RopeSlice<'a> { } } - fn graphemes(self) -> RopeGraphemes<'a> { - let mut chunks = self.chunks(); - let first_chunk = chunks.next().unwrap_or(""); + fn graphemes_rev(self) -> RopeGraphemes<'a> { + self.graphemes_at(self.len_bytes()).reversed() + } + + fn graphemes_at(self, byte_idx: usize) -> RopeGraphemes<'a> { + let (mut chunks, chunk_byte_idx, _, _) = self.chunks_at_byte(byte_idx); + let current_chunk = chunks.next().unwrap_or(""); + RopeGraphemes { text: self, chunks, - cur_chunk: first_chunk, - cur_chunk_start: 0, - cursor: GraphemeCursor::new(0, self.len_bytes(), true), - } - } - - fn graphemes_rev(self) -> RevRopeGraphemes<'a> { - let (mut chunks, mut cur_chunk_start, _, _) = self.chunks_at_byte(self.len_bytes()); - chunks.reverse(); - let first_chunk = chunks.next().unwrap_or(""); - cur_chunk_start -= first_chunk.len(); - RevRopeGraphemes { - text: self, - chunks, - cur_chunk: first_chunk, - cur_chunk_start, - cursor: GraphemeCursor::new(self.len_bytes(), self.len_bytes(), true), + current_chunk, + chunk_byte_idx, + cursor: GraphemeCursor::new(byte_idx, self.len_bytes(), true), + is_reversed: false, } } @@ -511,13 +521,18 @@ const fn is_utf8_char_boundary(b: u8) -> bool { } /// An iterator over the graphemes of a `RopeSlice`. +/// +/// This iterator is cursor-like: rather than implementing DoubleEndedIterator it can be reversed +/// like a cursor. #[derive(Clone)] pub struct RopeGraphemes<'a> { text: RopeSlice<'a>, chunks: Chunks<'a>, - cur_chunk: &'a str, - cur_chunk_start: usize, + current_chunk: &'a str, + /// Byte index of the start of the current chunk. + chunk_byte_idx: usize, cursor: GraphemeCursor, + is_reversed: bool, } impl fmt::Debug for RopeGraphemes<'_> { @@ -525,112 +540,116 @@ impl fmt::Debug for RopeGraphemes<'_> { f.debug_struct("RopeGraphemes") .field("text", &self.text) .field("chunks", &self.chunks) - .field("cur_chunk", &self.cur_chunk) - .field("cur_chunk_start", &self.cur_chunk_start) + .field("current_chunk", &self.current_chunk) + .field("chunk_byte_idx", &self.chunk_byte_idx) // .field("cursor", &self.cursor) + .field("is_reversed", &self.is_reversed) .finish() } } +impl<'a> RopeGraphemes<'a> { + #[allow(clippy::should_implement_trait)] + pub fn next(&mut self) -> Option> { + if self.is_reversed { + self.prev_impl() + } else { + self.next_impl() + } + } + + pub fn prev(&mut self) -> Option> { + if self.is_reversed { + self.next_impl() + } else { + self.prev_impl() + } + } + + pub fn reverse(&mut self) { + self.is_reversed = !self.is_reversed; + } + + #[must_use] + pub fn reversed(mut self) -> Self { + self.reverse(); + self + } + + fn next_impl(&mut self) -> Option> { + let a = self.cursor.cur_cursor(); + let b; + loop { + match self + .cursor + .next_boundary(self.current_chunk, self.chunk_byte_idx) + { + Ok(None) => return None, + Ok(Some(n)) => { + b = n; + break; + } + Err(GraphemeIncomplete::NextChunk) => { + self.chunk_byte_idx += self.current_chunk.len(); + self.current_chunk = self.chunks.next().unwrap_or(""); + } + Err(GraphemeIncomplete::PreContext(idx)) => { + let (chunk, byte_idx, _, _) = self.text.chunk_at_byte(idx.saturating_sub(1)); + self.cursor.provide_context(chunk, byte_idx); + } + _ => unreachable!(), + } + } + + if a < self.chunk_byte_idx { + Some(self.text.byte_slice(a..b)) + } else { + let a2 = a - self.chunk_byte_idx; + let b2 = b - self.chunk_byte_idx; + Some((&self.current_chunk[a2..b2]).into()) + } + } + + fn prev_impl(&mut self) -> Option> { + let a = self.cursor.cur_cursor(); + let b; + loop { + match self + .cursor + .prev_boundary(self.current_chunk, self.chunk_byte_idx) + { + Ok(None) => return None, + Ok(Some(n)) => { + b = n; + break; + } + Err(GraphemeIncomplete::PrevChunk) => { + self.current_chunk = self.chunks.prev().unwrap_or(""); + self.chunk_byte_idx -= self.current_chunk.len(); + } + Err(GraphemeIncomplete::PreContext(idx)) => { + let (chunk, byte_idx, _, _) = self.text.chunk_at_byte(idx.saturating_sub(1)); + self.cursor.provide_context(chunk, byte_idx); + } + _ => unreachable!(), + } + } + + if a >= self.chunk_byte_idx + self.current_chunk.len() { + Some(self.text.byte_slice(b..a)) + } else { + let a2 = a - self.chunk_byte_idx; + let b2 = b - self.chunk_byte_idx; + Some((&self.current_chunk[b2..a2]).into()) + } + } +} + impl<'a> Iterator for RopeGraphemes<'a> { type Item = RopeSlice<'a>; fn next(&mut self) -> Option { - let a = self.cursor.cur_cursor(); - let b; - loop { - match self - .cursor - .next_boundary(self.cur_chunk, self.cur_chunk_start) - { - Ok(None) => { - return None; - } - Ok(Some(n)) => { - b = n; - break; - } - Err(GraphemeIncomplete::NextChunk) => { - self.cur_chunk_start += self.cur_chunk.len(); - self.cur_chunk = self.chunks.next().unwrap_or(""); - } - Err(GraphemeIncomplete::PreContext(idx)) => { - let (chunk, byte_idx, _, _) = self.text.chunk_at_byte(idx.saturating_sub(1)); - self.cursor.provide_context(chunk, byte_idx); - } - _ => unreachable!(), - } - } - - if a < self.cur_chunk_start { - Some(self.text.byte_slice(a..b)) - } else { - let a2 = a - self.cur_chunk_start; - let b2 = b - self.cur_chunk_start; - Some((&self.cur_chunk[a2..b2]).into()) - } - } -} - -/// An iterator over the graphemes of a `RopeSlice` in reverse. -#[derive(Clone)] -pub struct RevRopeGraphemes<'a> { - text: RopeSlice<'a>, - chunks: Chunks<'a>, - cur_chunk: &'a str, - cur_chunk_start: usize, - cursor: GraphemeCursor, -} - -impl fmt::Debug for RevRopeGraphemes<'_> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("RevRopeGraphemes") - .field("text", &self.text) - .field("chunks", &self.chunks) - .field("cur_chunk", &self.cur_chunk) - .field("cur_chunk_start", &self.cur_chunk_start) - // .field("cursor", &self.cursor) - .finish() - } -} - -impl<'a> Iterator for RevRopeGraphemes<'a> { - type Item = RopeSlice<'a>; - - fn next(&mut self) -> Option { - let a = self.cursor.cur_cursor(); - let b; - loop { - match self - .cursor - .prev_boundary(self.cur_chunk, self.cur_chunk_start) - { - Ok(None) => { - return None; - } - Ok(Some(n)) => { - b = n; - break; - } - Err(GraphemeIncomplete::PrevChunk) => { - self.cur_chunk = self.chunks.next().unwrap_or(""); - self.cur_chunk_start -= self.cur_chunk.len(); - } - Err(GraphemeIncomplete::PreContext(idx)) => { - let (chunk, byte_idx, _, _) = self.text.chunk_at_byte(idx.saturating_sub(1)); - self.cursor.provide_context(chunk, byte_idx); - } - _ => unreachable!(), - } - } - - if a >= self.cur_chunk_start + self.cur_chunk.len() { - Some(self.text.byte_slice(b..a)) - } else { - let a2 = a - self.cur_chunk_start; - let b2 = b - self.cur_chunk_start; - Some((&self.cur_chunk[b2..a2]).into()) - } + RopeGraphemes::next(self) } }