mirror of https://github.com/helix-editor/helix
stdx: Unify RopeSlice grapheme cluster iterators
This style for RopeGraphemes is identical to Ropey's Chars and Bytes iterators. Being able to move the iterator types like cursors over the bytes/chars/graphemes is useful in some cases. For example see `helix_core::movement::<Chars as CharHelpers>::range_to_target`. This change also adds `RopeSliceExt::graphemes_at` for flexibility. `graphemes` and `graphemes_rev` are now implemented in terms of `graphemes_at` and `RopeGraphemes::reversed`.
parent
c68016081a
commit
ab9e36069b
|
@ -135,7 +135,9 @@ pub trait RopeSliceExt<'a>: Sized {
|
||||||
/// let graphemes: Vec<_> = text.graphemes().collect();
|
/// let graphemes: Vec<_> = text.graphemes().collect();
|
||||||
/// assert_eq!(graphemes.as_slice(), &["😶🌫️", "🏴☠️", "🖼️"]);
|
/// assert_eq!(graphemes.as_slice(), &["😶🌫️", "🏴☠️", "🖼️"]);
|
||||||
/// ```
|
/// ```
|
||||||
fn graphemes(self) -> RopeGraphemes<'a>;
|
fn graphemes(self) -> RopeGraphemes<'a> {
|
||||||
|
self.graphemes_at(0)
|
||||||
|
}
|
||||||
/// Returns an iterator over the grapheme clusters in the slice, reversed.
|
/// Returns an iterator over the grapheme clusters in the slice, reversed.
|
||||||
///
|
///
|
||||||
/// The returned iterator starts at the end of the slice and ends at the beginning of the
|
/// The returned iterator starts at the end of the slice and ends at the beginning of the
|
||||||
|
@ -150,7 +152,23 @@ pub trait RopeSliceExt<'a>: Sized {
|
||||||
/// let graphemes: Vec<_> = text.graphemes_rev().collect();
|
/// let graphemes: Vec<_> = text.graphemes_rev().collect();
|
||||||
/// assert_eq!(graphemes.as_slice(), &["🖼️", "🏴☠️", "😶🌫️"]);
|
/// assert_eq!(graphemes.as_slice(), &["🖼️", "🏴☠️", "😶🌫️"]);
|
||||||
/// ```
|
/// ```
|
||||||
fn graphemes_rev(self) -> RevRopeGraphemes<'a>;
|
fn graphemes_rev(self) -> RopeGraphemes<'a>;
|
||||||
|
/// Returns an iterator over the grapheme clusters in the slice at the given byte index.
|
||||||
|
///
|
||||||
|
/// # Example
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// # use ropey::Rope;
|
||||||
|
/// # use helix_stdx::rope::RopeSliceExt;
|
||||||
|
/// let text = Rope::from_str("😶🌫️🏴☠️🖼️");
|
||||||
|
/// // 14 is the byte index of the pirate flag's starting cluster boundary.
|
||||||
|
/// let graphemes: Vec<_> = text.slice(..).graphemes_at(14).collect();
|
||||||
|
/// assert_eq!(graphemes.as_slice(), &["🏴☠️", "🖼️"]);
|
||||||
|
/// // 27 is the byte index of the pirate flag's ending cluster boundary.
|
||||||
|
/// let graphemes: Vec<_> = text.slice(..).graphemes_at(27).reversed().collect();
|
||||||
|
/// assert_eq!(graphemes.as_slice(), &["🏴☠️", "😶🌫️"]);
|
||||||
|
/// ```
|
||||||
|
fn graphemes_at(self, byte_idx: usize) -> RopeGraphemes<'a>;
|
||||||
/// Finds the byte index of the next grapheme boundary after `byte_idx`.
|
/// Finds the byte index of the next grapheme boundary after `byte_idx`.
|
||||||
///
|
///
|
||||||
/// If the byte index lies on the last grapheme cluster in the slice then this function
|
/// If the byte index lies on the last grapheme cluster in the slice then this function
|
||||||
|
@ -401,29 +419,21 @@ impl<'a> RopeSliceExt<'a> for RopeSlice<'a> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn graphemes(self) -> RopeGraphemes<'a> {
|
fn graphemes_rev(self) -> RopeGraphemes<'a> {
|
||||||
let mut chunks = self.chunks();
|
self.graphemes_at(self.len_bytes()).reversed()
|
||||||
let first_chunk = chunks.next().unwrap_or("");
|
}
|
||||||
|
|
||||||
|
fn graphemes_at(self, byte_idx: usize) -> RopeGraphemes<'a> {
|
||||||
|
let (mut chunks, chunk_byte_idx, _, _) = self.chunks_at_byte(byte_idx);
|
||||||
|
let current_chunk = chunks.next().unwrap_or("");
|
||||||
|
|
||||||
RopeGraphemes {
|
RopeGraphemes {
|
||||||
text: self,
|
text: self,
|
||||||
chunks,
|
chunks,
|
||||||
cur_chunk: first_chunk,
|
current_chunk,
|
||||||
cur_chunk_start: 0,
|
chunk_byte_idx,
|
||||||
cursor: GraphemeCursor::new(0, self.len_bytes(), true),
|
cursor: GraphemeCursor::new(byte_idx, self.len_bytes(), true),
|
||||||
}
|
is_reversed: false,
|
||||||
}
|
|
||||||
|
|
||||||
fn graphemes_rev(self) -> RevRopeGraphemes<'a> {
|
|
||||||
let (mut chunks, mut cur_chunk_start, _, _) = self.chunks_at_byte(self.len_bytes());
|
|
||||||
chunks.reverse();
|
|
||||||
let first_chunk = chunks.next().unwrap_or("");
|
|
||||||
cur_chunk_start -= first_chunk.len();
|
|
||||||
RevRopeGraphemes {
|
|
||||||
text: self,
|
|
||||||
chunks,
|
|
||||||
cur_chunk: first_chunk,
|
|
||||||
cur_chunk_start,
|
|
||||||
cursor: GraphemeCursor::new(self.len_bytes(), self.len_bytes(), true),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -511,13 +521,18 @@ const fn is_utf8_char_boundary(b: u8) -> bool {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// An iterator over the graphemes of a `RopeSlice`.
|
/// An iterator over the graphemes of a `RopeSlice`.
|
||||||
|
///
|
||||||
|
/// This iterator is cursor-like: rather than implementing DoubleEndedIterator it can be reversed
|
||||||
|
/// like a cursor.
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct RopeGraphemes<'a> {
|
pub struct RopeGraphemes<'a> {
|
||||||
text: RopeSlice<'a>,
|
text: RopeSlice<'a>,
|
||||||
chunks: Chunks<'a>,
|
chunks: Chunks<'a>,
|
||||||
cur_chunk: &'a str,
|
current_chunk: &'a str,
|
||||||
cur_chunk_start: usize,
|
/// Byte index of the start of the current chunk.
|
||||||
|
chunk_byte_idx: usize,
|
||||||
cursor: GraphemeCursor,
|
cursor: GraphemeCursor,
|
||||||
|
is_reversed: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Debug for RopeGraphemes<'_> {
|
impl fmt::Debug for RopeGraphemes<'_> {
|
||||||
|
@ -525,112 +540,116 @@ impl fmt::Debug for RopeGraphemes<'_> {
|
||||||
f.debug_struct("RopeGraphemes")
|
f.debug_struct("RopeGraphemes")
|
||||||
.field("text", &self.text)
|
.field("text", &self.text)
|
||||||
.field("chunks", &self.chunks)
|
.field("chunks", &self.chunks)
|
||||||
.field("cur_chunk", &self.cur_chunk)
|
.field("current_chunk", &self.current_chunk)
|
||||||
.field("cur_chunk_start", &self.cur_chunk_start)
|
.field("chunk_byte_idx", &self.chunk_byte_idx)
|
||||||
// .field("cursor", &self.cursor)
|
// .field("cursor", &self.cursor)
|
||||||
|
.field("is_reversed", &self.is_reversed)
|
||||||
.finish()
|
.finish()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl<'a> RopeGraphemes<'a> {
|
||||||
|
#[allow(clippy::should_implement_trait)]
|
||||||
|
pub fn next(&mut self) -> Option<RopeSlice<'a>> {
|
||||||
|
if self.is_reversed {
|
||||||
|
self.prev_impl()
|
||||||
|
} else {
|
||||||
|
self.next_impl()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn prev(&mut self) -> Option<RopeSlice<'a>> {
|
||||||
|
if self.is_reversed {
|
||||||
|
self.next_impl()
|
||||||
|
} else {
|
||||||
|
self.prev_impl()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn reverse(&mut self) {
|
||||||
|
self.is_reversed = !self.is_reversed;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[must_use]
|
||||||
|
pub fn reversed(mut self) -> Self {
|
||||||
|
self.reverse();
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
fn next_impl(&mut self) -> Option<RopeSlice<'a>> {
|
||||||
|
let a = self.cursor.cur_cursor();
|
||||||
|
let b;
|
||||||
|
loop {
|
||||||
|
match self
|
||||||
|
.cursor
|
||||||
|
.next_boundary(self.current_chunk, self.chunk_byte_idx)
|
||||||
|
{
|
||||||
|
Ok(None) => return None,
|
||||||
|
Ok(Some(n)) => {
|
||||||
|
b = n;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
Err(GraphemeIncomplete::NextChunk) => {
|
||||||
|
self.chunk_byte_idx += self.current_chunk.len();
|
||||||
|
self.current_chunk = self.chunks.next().unwrap_or("");
|
||||||
|
}
|
||||||
|
Err(GraphemeIncomplete::PreContext(idx)) => {
|
||||||
|
let (chunk, byte_idx, _, _) = self.text.chunk_at_byte(idx.saturating_sub(1));
|
||||||
|
self.cursor.provide_context(chunk, byte_idx);
|
||||||
|
}
|
||||||
|
_ => unreachable!(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if a < self.chunk_byte_idx {
|
||||||
|
Some(self.text.byte_slice(a..b))
|
||||||
|
} else {
|
||||||
|
let a2 = a - self.chunk_byte_idx;
|
||||||
|
let b2 = b - self.chunk_byte_idx;
|
||||||
|
Some((&self.current_chunk[a2..b2]).into())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn prev_impl(&mut self) -> Option<RopeSlice<'a>> {
|
||||||
|
let a = self.cursor.cur_cursor();
|
||||||
|
let b;
|
||||||
|
loop {
|
||||||
|
match self
|
||||||
|
.cursor
|
||||||
|
.prev_boundary(self.current_chunk, self.chunk_byte_idx)
|
||||||
|
{
|
||||||
|
Ok(None) => return None,
|
||||||
|
Ok(Some(n)) => {
|
||||||
|
b = n;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
Err(GraphemeIncomplete::PrevChunk) => {
|
||||||
|
self.current_chunk = self.chunks.prev().unwrap_or("");
|
||||||
|
self.chunk_byte_idx -= self.current_chunk.len();
|
||||||
|
}
|
||||||
|
Err(GraphemeIncomplete::PreContext(idx)) => {
|
||||||
|
let (chunk, byte_idx, _, _) = self.text.chunk_at_byte(idx.saturating_sub(1));
|
||||||
|
self.cursor.provide_context(chunk, byte_idx);
|
||||||
|
}
|
||||||
|
_ => unreachable!(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if a >= self.chunk_byte_idx + self.current_chunk.len() {
|
||||||
|
Some(self.text.byte_slice(b..a))
|
||||||
|
} else {
|
||||||
|
let a2 = a - self.chunk_byte_idx;
|
||||||
|
let b2 = b - self.chunk_byte_idx;
|
||||||
|
Some((&self.current_chunk[b2..a2]).into())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl<'a> Iterator for RopeGraphemes<'a> {
|
impl<'a> Iterator for RopeGraphemes<'a> {
|
||||||
type Item = RopeSlice<'a>;
|
type Item = RopeSlice<'a>;
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
let a = self.cursor.cur_cursor();
|
RopeGraphemes::next(self)
|
||||||
let b;
|
|
||||||
loop {
|
|
||||||
match self
|
|
||||||
.cursor
|
|
||||||
.next_boundary(self.cur_chunk, self.cur_chunk_start)
|
|
||||||
{
|
|
||||||
Ok(None) => {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
Ok(Some(n)) => {
|
|
||||||
b = n;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
Err(GraphemeIncomplete::NextChunk) => {
|
|
||||||
self.cur_chunk_start += self.cur_chunk.len();
|
|
||||||
self.cur_chunk = self.chunks.next().unwrap_or("");
|
|
||||||
}
|
|
||||||
Err(GraphemeIncomplete::PreContext(idx)) => {
|
|
||||||
let (chunk, byte_idx, _, _) = self.text.chunk_at_byte(idx.saturating_sub(1));
|
|
||||||
self.cursor.provide_context(chunk, byte_idx);
|
|
||||||
}
|
|
||||||
_ => unreachable!(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if a < self.cur_chunk_start {
|
|
||||||
Some(self.text.byte_slice(a..b))
|
|
||||||
} else {
|
|
||||||
let a2 = a - self.cur_chunk_start;
|
|
||||||
let b2 = b - self.cur_chunk_start;
|
|
||||||
Some((&self.cur_chunk[a2..b2]).into())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// An iterator over the graphemes of a `RopeSlice` in reverse.
|
|
||||||
#[derive(Clone)]
|
|
||||||
pub struct RevRopeGraphemes<'a> {
|
|
||||||
text: RopeSlice<'a>,
|
|
||||||
chunks: Chunks<'a>,
|
|
||||||
cur_chunk: &'a str,
|
|
||||||
cur_chunk_start: usize,
|
|
||||||
cursor: GraphemeCursor,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl fmt::Debug for RevRopeGraphemes<'_> {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
||||||
f.debug_struct("RevRopeGraphemes")
|
|
||||||
.field("text", &self.text)
|
|
||||||
.field("chunks", &self.chunks)
|
|
||||||
.field("cur_chunk", &self.cur_chunk)
|
|
||||||
.field("cur_chunk_start", &self.cur_chunk_start)
|
|
||||||
// .field("cursor", &self.cursor)
|
|
||||||
.finish()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> Iterator for RevRopeGraphemes<'a> {
|
|
||||||
type Item = RopeSlice<'a>;
|
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
|
||||||
let a = self.cursor.cur_cursor();
|
|
||||||
let b;
|
|
||||||
loop {
|
|
||||||
match self
|
|
||||||
.cursor
|
|
||||||
.prev_boundary(self.cur_chunk, self.cur_chunk_start)
|
|
||||||
{
|
|
||||||
Ok(None) => {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
Ok(Some(n)) => {
|
|
||||||
b = n;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
Err(GraphemeIncomplete::PrevChunk) => {
|
|
||||||
self.cur_chunk = self.chunks.next().unwrap_or("");
|
|
||||||
self.cur_chunk_start -= self.cur_chunk.len();
|
|
||||||
}
|
|
||||||
Err(GraphemeIncomplete::PreContext(idx)) => {
|
|
||||||
let (chunk, byte_idx, _, _) = self.text.chunk_at_byte(idx.saturating_sub(1));
|
|
||||||
self.cursor.provide_context(chunk, byte_idx);
|
|
||||||
}
|
|
||||||
_ => unreachable!(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if a >= self.cur_chunk_start + self.cur_chunk.len() {
|
|
||||||
Some(self.text.byte_slice(b..a))
|
|
||||||
} else {
|
|
||||||
let a2 = a - self.cur_chunk_start;
|
|
||||||
let b2 = b - self.cur_chunk_start;
|
|
||||||
Some((&self.cur_chunk[b2..a2]).into())
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue