Michael Davis 2025-06-16 10:17:25 -05:00 committed by GitHub
commit 27f15c1969
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 359 additions and 107 deletions

View File

@ -135,7 +135,9 @@ pub trait RopeSliceExt<'a>: Sized {
/// let graphemes: Vec<_> = text.graphemes().collect();
/// assert_eq!(graphemes.as_slice(), &["😶‍🌫️", "🏴‍☠️", "🖼️"]);
/// ```
fn graphemes(self) -> RopeGraphemes<'a>;
fn graphemes(self) -> RopeGraphemes<'a> {
self.graphemes_at(0)
}
/// Returns an iterator over the grapheme clusters in the slice, reversed.
///
/// The returned iterator starts at the end of the slice and ends at the beginning of the
@ -150,7 +152,111 @@ pub trait RopeSliceExt<'a>: Sized {
/// let graphemes: Vec<_> = text.graphemes_rev().collect();
/// assert_eq!(graphemes.as_slice(), &["🖼️", "🏴‍☠️", "😶‍🌫️"]);
/// ```
fn graphemes_rev(self) -> RevRopeGraphemes<'a>;
fn graphemes_rev(self) -> RopeGraphemes<'a>;
/// Returns an iterator over the grapheme clusters in the slice at the given byte index.
///
/// # Example
///
/// ```
/// # use ropey::Rope;
/// # use helix_stdx::rope::RopeSliceExt;
/// let text = Rope::from_str("😶‍🌫️🏴‍☠️🖼️");
/// // 14 is the byte index of the pirate flag's starting cluster boundary.
/// let graphemes: Vec<_> = text.slice(..).graphemes_at(14).collect();
/// assert_eq!(graphemes.as_slice(), &["🏴‍☠️", "🖼️"]);
/// // 27 is the byte index of the pirate flag's ending cluster boundary.
/// let graphemes: Vec<_> = text.slice(..).graphemes_at(27).reversed().collect();
/// assert_eq!(graphemes.as_slice(), &["🏴‍☠️", "😶‍🌫️"]);
/// ```
fn graphemes_at(self, byte_idx: usize) -> RopeGraphemes<'a>;
/// Returns an iterator over the grapheme clusters in a rope and the byte index where each
/// grapheme cluster starts.
///
/// # Example
///
/// ```
/// # use ropey::Rope;
/// # use helix_stdx::rope::RopeSliceExt;
/// let text = Rope::from_str("😶‍🌫️🏴‍☠️🖼️");
/// let slice = text.slice(..);
/// let graphemes: Vec<_> = slice.grapheme_indices_at(0).collect();
/// assert_eq!(
/// graphemes.as_slice(),
/// &[(0, "😶‍🌫️".into()), (14, "🏴‍☠️".into()), (27, "🖼️".into())]
/// );
/// let graphemes: Vec<_> = slice.grapheme_indices_at(slice.len_bytes()).reversed().collect();
/// assert_eq!(
/// graphemes.as_slice(),
/// &[(27, "🖼️".into()), (14, "🏴‍☠️".into()), (0, "😶‍🌫️".into())]
/// );
/// ```
fn grapheme_indices_at(self, byte_idx: usize) -> RopeGraphemeIndices<'a>;
/// Finds the byte index of the next grapheme boundary after `byte_idx`.
///
/// If the byte index lies on the last grapheme cluster in the slice then this function
/// returns `RopeSlice::len_bytes`.
///
/// # Example
///
/// ```
/// # use ropey::Rope;
/// # use helix_stdx::rope::RopeSliceExt;
/// let text = Rope::from_str("😶‍🌫️🏴‍☠️🖼️");
/// let slice = text.slice(..);
/// let mut byte_idx = 0;
/// assert_eq!(slice.graphemes_at(byte_idx).next(), Some("😶‍🌫️".into()));
/// byte_idx = slice.next_grapheme_boundary(byte_idx);
/// assert_eq!(slice.graphemes_at(byte_idx).next(), Some("🏴‍☠️".into()));
/// byte_idx = slice.next_grapheme_boundary(byte_idx);
/// assert_eq!(slice.graphemes_at(byte_idx).next(), Some("🖼️".into()));
/// byte_idx = slice.next_grapheme_boundary(byte_idx);
/// assert_eq!(slice.graphemes_at(byte_idx).next(), None);
/// assert_eq!(byte_idx, slice.len_bytes());
/// ```
fn next_grapheme_boundary(self, byte_idx: usize) -> usize {
self.nth_next_grapheme_boundary(byte_idx, 1)
}
/// Finds the byte index of the `n`th grapheme cluster after the given `byte_idx`.
///
/// If there are more than `n` grapheme clusters after `byte_idx` in the rope then this
/// function returns `RopeSlice::len_bytes`.
///
/// This is functionally equivalent to calling `next_grapheme_boundary` `n` times but is more
/// efficient.
fn nth_next_grapheme_boundary(self, byte_idx: usize, n: usize) -> usize;
/// Finds the byte index of the previous grapheme boundary before `byte_idx`.
///
/// If the byte index lies on the first grapheme cluster in the slice then this function
/// returns zero.
///
/// # Example
///
/// ```
/// # use ropey::Rope;
/// # use helix_stdx::rope::RopeSliceExt;
/// let text = Rope::from_str("😶‍🌫️🏴‍☠️🖼️");
/// let slice = text.slice(..);
/// let mut byte_idx = text.len_bytes();
/// assert_eq!(slice.graphemes_at(byte_idx).prev(), Some("🖼️".into()));
/// byte_idx = slice.prev_grapheme_boundary(byte_idx);
/// assert_eq!(slice.graphemes_at(byte_idx).prev(), Some("🏴‍☠️".into()));
/// byte_idx = slice.prev_grapheme_boundary(byte_idx);
/// assert_eq!(slice.graphemes_at(byte_idx).prev(), Some("😶‍🌫️".into()));
/// byte_idx = slice.prev_grapheme_boundary(byte_idx);
/// assert_eq!(slice.graphemes_at(byte_idx).prev(), None);
/// assert_eq!(byte_idx, 0);
/// ```
fn prev_grapheme_boundary(self, byte_idx: usize) -> usize {
self.nth_prev_grapheme_boundary(byte_idx, 1)
}
/// Finds the byte index of the `n`th grapheme cluster before the given `byte_idx`.
///
/// If there are fewer than `n` grapheme clusters before `byte_idx` in the rope then this
/// function returns zero.
///
/// This is functionally equivalent to calling `prev_grapheme_boundary` `n` times but is more
/// efficient.
fn nth_prev_grapheme_boundary(self, byte_idx: usize, n: usize) -> usize;
}
impl<'a> RopeSliceExt<'a> for RopeSlice<'a> {
@ -335,31 +441,106 @@ impl<'a> RopeSliceExt<'a> for RopeSlice<'a> {
}
}
fn graphemes(self) -> RopeGraphemes<'a> {
let mut chunks = self.chunks();
let first_chunk = chunks.next().unwrap_or("");
fn graphemes_rev(self) -> RopeGraphemes<'a> {
self.graphemes_at(self.len_bytes()).reversed()
}
fn graphemes_at(self, byte_idx: usize) -> RopeGraphemes<'a> {
let (mut chunks, chunk_byte_idx, _, _) = self.chunks_at_byte(byte_idx);
let current_chunk = chunks.next().unwrap_or("");
RopeGraphemes {
text: self,
chunks,
cur_chunk: first_chunk,
cur_chunk_start: 0,
cursor: GraphemeCursor::new(0, self.len_bytes(), true),
current_chunk,
chunk_byte_idx,
cursor: GraphemeCursor::new(byte_idx, self.len_bytes(), true),
is_reversed: false,
}
}
fn graphemes_rev(self) -> RevRopeGraphemes<'a> {
let (mut chunks, mut cur_chunk_start, _, _) = self.chunks_at_byte(self.len_bytes());
chunks.reverse();
let first_chunk = chunks.next().unwrap_or("");
cur_chunk_start -= first_chunk.len();
RevRopeGraphemes {
text: self,
chunks,
cur_chunk: first_chunk,
cur_chunk_start,
cursor: GraphemeCursor::new(self.len_bytes(), self.len_bytes(), true),
fn grapheme_indices_at(self, byte_idx: usize) -> RopeGraphemeIndices<'a> {
RopeGraphemeIndices {
front_offset: byte_idx,
iter: self.graphemes_at(byte_idx),
is_reversed: false,
}
}
fn nth_next_grapheme_boundary(self, mut byte_idx: usize, n: usize) -> usize {
// Bounds check
debug_assert!(byte_idx <= self.len_bytes());
byte_idx = self.floor_char_boundary(byte_idx);
// Get the chunk with our byte index in it.
let (mut chunk, mut chunk_byte_idx, _, _) = self.chunk_at_byte(byte_idx);
// Set up the grapheme cursor.
let mut gc = GraphemeCursor::new(byte_idx, self.len_bytes(), true);
// Find the nth next grapheme cluster boundary.
for _ in 0..n {
loop {
match gc.next_boundary(chunk, chunk_byte_idx) {
Ok(None) => return self.len_bytes(),
Ok(Some(n)) => {
byte_idx = n;
break;
}
Err(GraphemeIncomplete::NextChunk) => {
chunk_byte_idx += chunk.len();
let (a, _, _, _) = self.chunk_at_byte(chunk_byte_idx);
chunk = a;
}
Err(GraphemeIncomplete::PreContext(n)) => {
let ctx_chunk = self.chunk_at_byte(n - 1).0;
gc.provide_context(ctx_chunk, n - ctx_chunk.len());
}
_ => unreachable!(),
}
}
}
byte_idx
}
fn nth_prev_grapheme_boundary(self, mut byte_idx: usize, n: usize) -> usize {
// Bounds check
debug_assert!(byte_idx <= self.len_bytes());
byte_idx = self.ceil_char_boundary(byte_idx);
// Get the chunk with our byte index in it.
let (mut chunk, mut chunk_byte_idx, _, _) = self.chunk_at_byte(byte_idx);
// Set up the grapheme cursor.
let mut gc = GraphemeCursor::new(byte_idx, self.len_bytes(), true);
for _ in 0..n {
loop {
match gc.prev_boundary(chunk, chunk_byte_idx) {
Ok(None) => return 0,
Ok(Some(n)) => {
byte_idx = n;
break;
}
Err(GraphemeIncomplete::PrevChunk) => {
let (a, b, _, _) = self.chunk_at_byte(chunk_byte_idx - 1);
chunk = a;
chunk_byte_idx = b;
}
Err(GraphemeIncomplete::PreContext(n)) => {
let ctx_chunk = self.chunk_at_byte(n - 1).0;
gc.provide_context(ctx_chunk, n - ctx_chunk.len());
}
_ => unreachable!(),
}
}
}
byte_idx
}
}
// copied from std
@ -370,13 +551,18 @@ const fn is_utf8_char_boundary(b: u8) -> bool {
}
/// An iterator over the graphemes of a `RopeSlice`.
///
/// This iterator is cursor-like: rather than implementing DoubleEndedIterator it can be reversed
/// like a cursor.
#[derive(Clone)]
pub struct RopeGraphemes<'a> {
text: RopeSlice<'a>,
chunks: Chunks<'a>,
cur_chunk: &'a str,
cur_chunk_start: usize,
current_chunk: &'a str,
/// Byte index of the start of the current chunk.
chunk_byte_idx: usize,
cursor: GraphemeCursor,
is_reversed: bool,
}
impl fmt::Debug for RopeGraphemes<'_> {
@ -384,112 +570,178 @@ impl fmt::Debug for RopeGraphemes<'_> {
f.debug_struct("RopeGraphemes")
.field("text", &self.text)
.field("chunks", &self.chunks)
.field("cur_chunk", &self.cur_chunk)
.field("cur_chunk_start", &self.cur_chunk_start)
.field("current_chunk", &self.current_chunk)
.field("chunk_byte_idx", &self.chunk_byte_idx)
// .field("cursor", &self.cursor)
.field("is_reversed", &self.is_reversed)
.finish()
}
}
impl<'a> RopeGraphemes<'a> {
#[allow(clippy::should_implement_trait)]
pub fn next(&mut self) -> Option<RopeSlice<'a>> {
if self.is_reversed {
self.prev_impl()
} else {
self.next_impl()
}
}
pub fn prev(&mut self) -> Option<RopeSlice<'a>> {
if self.is_reversed {
self.next_impl()
} else {
self.prev_impl()
}
}
pub fn reverse(&mut self) {
self.is_reversed = !self.is_reversed;
}
#[must_use]
pub fn reversed(mut self) -> Self {
self.reverse();
self
}
fn next_impl(&mut self) -> Option<RopeSlice<'a>> {
let a = self.cursor.cur_cursor();
let b;
loop {
match self
.cursor
.next_boundary(self.current_chunk, self.chunk_byte_idx)
{
Ok(None) => return None,
Ok(Some(n)) => {
b = n;
break;
}
Err(GraphemeIncomplete::NextChunk) => {
self.chunk_byte_idx += self.current_chunk.len();
self.current_chunk = self.chunks.next().unwrap_or("");
}
Err(GraphemeIncomplete::PreContext(idx)) => {
let (chunk, byte_idx, _, _) = self.text.chunk_at_byte(idx.saturating_sub(1));
self.cursor.provide_context(chunk, byte_idx);
}
_ => unreachable!(),
}
}
if a < self.chunk_byte_idx {
Some(self.text.byte_slice(a..b))
} else {
let a2 = a - self.chunk_byte_idx;
let b2 = b - self.chunk_byte_idx;
Some((&self.current_chunk[a2..b2]).into())
}
}
fn prev_impl(&mut self) -> Option<RopeSlice<'a>> {
let a = self.cursor.cur_cursor();
let b;
loop {
match self
.cursor
.prev_boundary(self.current_chunk, self.chunk_byte_idx)
{
Ok(None) => return None,
Ok(Some(n)) => {
b = n;
break;
}
Err(GraphemeIncomplete::PrevChunk) => {
self.current_chunk = self.chunks.prev().unwrap_or("");
self.chunk_byte_idx -= self.current_chunk.len();
}
Err(GraphemeIncomplete::PreContext(idx)) => {
let (chunk, byte_idx, _, _) = self.text.chunk_at_byte(idx.saturating_sub(1));
self.cursor.provide_context(chunk, byte_idx);
}
_ => unreachable!(),
}
}
if a >= self.chunk_byte_idx + self.current_chunk.len() {
Some(self.text.byte_slice(b..a))
} else {
let a2 = a - self.chunk_byte_idx;
let b2 = b - self.chunk_byte_idx;
Some((&self.current_chunk[b2..a2]).into())
}
}
}
impl<'a> Iterator for RopeGraphemes<'a> {
type Item = RopeSlice<'a>;
fn next(&mut self) -> Option<Self::Item> {
let a = self.cursor.cur_cursor();
let b;
loop {
match self
.cursor
.next_boundary(self.cur_chunk, self.cur_chunk_start)
{
Ok(None) => {
return None;
}
Ok(Some(n)) => {
b = n;
break;
}
Err(GraphemeIncomplete::NextChunk) => {
self.cur_chunk_start += self.cur_chunk.len();
self.cur_chunk = self.chunks.next().unwrap_or("");
}
Err(GraphemeIncomplete::PreContext(idx)) => {
let (chunk, byte_idx, _, _) = self.text.chunk_at_byte(idx.saturating_sub(1));
self.cursor.provide_context(chunk, byte_idx);
}
_ => unreachable!(),
}
RopeGraphemes::next(self)
}
}
if a < self.cur_chunk_start {
Some(self.text.byte_slice(a..b))
/// An iterator over the grapheme clusters in a rope and the byte indices where each grapheme
/// cluster starts.
///
/// This iterator wraps `RopeGraphemes` and is also cursor-like. Use `reverse` or `reversed` to
/// toggle the direction of the iterator.
#[derive(Debug, Clone)]
pub struct RopeGraphemeIndices<'a> {
front_offset: usize,
iter: RopeGraphemes<'a>,
is_reversed: bool,
}
impl<'a> RopeGraphemeIndices<'a> {
#[allow(clippy::should_implement_trait)]
pub fn next(&mut self) -> Option<(usize, RopeSlice<'a>)> {
if self.is_reversed {
self.prev_impl()
} else {
let a2 = a - self.cur_chunk_start;
let b2 = b - self.cur_chunk_start;
Some((&self.cur_chunk[a2..b2]).into())
self.next_impl()
}
}
pub fn prev(&mut self) -> Option<(usize, RopeSlice<'a>)> {
if self.is_reversed {
self.next_impl()
} else {
self.prev_impl()
}
}
pub fn reverse(&mut self) {
self.is_reversed = !self.is_reversed;
}
#[must_use]
pub fn reversed(mut self) -> Self {
self.reverse();
self
}
fn next_impl(&mut self) -> Option<(usize, RopeSlice<'a>)> {
let slice = self.iter.next()?;
let idx = self.front_offset;
self.front_offset += slice.len_bytes();
Some((idx, slice))
}
fn prev_impl(&mut self) -> Option<(usize, RopeSlice<'a>)> {
let slice = self.iter.prev()?;
self.front_offset -= slice.len_bytes();
Some((self.front_offset, slice))
}
}
/// An iterator over the graphemes of a `RopeSlice` in reverse.
#[derive(Clone)]
pub struct RevRopeGraphemes<'a> {
text: RopeSlice<'a>,
chunks: Chunks<'a>,
cur_chunk: &'a str,
cur_chunk_start: usize,
cursor: GraphemeCursor,
}
impl fmt::Debug for RevRopeGraphemes<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("RevRopeGraphemes")
.field("text", &self.text)
.field("chunks", &self.chunks)
.field("cur_chunk", &self.cur_chunk)
.field("cur_chunk_start", &self.cur_chunk_start)
// .field("cursor", &self.cursor)
.finish()
}
}
impl<'a> Iterator for RevRopeGraphemes<'a> {
type Item = RopeSlice<'a>;
impl<'a> Iterator for RopeGraphemeIndices<'a> {
type Item = (usize, RopeSlice<'a>);
fn next(&mut self) -> Option<Self::Item> {
let a = self.cursor.cur_cursor();
let b;
loop {
match self
.cursor
.prev_boundary(self.cur_chunk, self.cur_chunk_start)
{
Ok(None) => {
return None;
}
Ok(Some(n)) => {
b = n;
break;
}
Err(GraphemeIncomplete::PrevChunk) => {
self.cur_chunk = self.chunks.next().unwrap_or("");
self.cur_chunk_start -= self.cur_chunk.len();
}
Err(GraphemeIncomplete::PreContext(idx)) => {
let (chunk, byte_idx, _, _) = self.text.chunk_at_byte(idx.saturating_sub(1));
self.cursor.provide_context(chunk, byte_idx);
}
_ => unreachable!(),
}
}
if a >= self.cur_chunk_start + self.cur_chunk.len() {
Some(self.text.byte_slice(b..a))
} else {
let a2 = a - self.cur_chunk_start;
let b2 = b - self.cur_chunk_start;
Some((&self.cur_chunk[b2..a2]).into())
}
RopeGraphemeIndices::next(self)
}
}