diff --git a/Cargo.lock b/Cargo.lock index 4f3536ebc..450abd9b9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1417,7 +1417,7 @@ dependencies = [ "parking_lot", "quickcheck", "regex", - "regex-cursor 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", + "regex-cursor", "ropey 2.0.0-alpha.3", "serde", "serde_json", @@ -1526,7 +1526,7 @@ dependencies = [ "etcetera", "once_cell", "regex-automata", - "regex-cursor 0.1.5 (git+https://github.com/cessen/regex-cursor.git?branch=ropey2)", + "regex-cursor", "ropey 2.0.0-alpha.3", "rustix 1.0.7", "tempfile", @@ -2322,19 +2322,6 @@ dependencies = [ "regex-syntax", ] -[[package]] -name = "regex-cursor" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0497c781d2f982ae8284d2932aee6a877e58a4541daa5e8fadc18cc75c23a61d" -dependencies = [ - "log", - "memchr", - "regex-automata", - "regex-syntax", - "ropey 1.6.1", -] - [[package]] name = "regex-cursor" version = "0.1.5" @@ -2831,7 +2818,7 @@ dependencies = [ "kstring", "once_cell", "regex", - "regex-cursor 0.1.5 (git+https://github.com/cessen/regex-cursor.git?branch=ropey2)", + "regex-cursor", "ropey 2.0.0-alpha.3", "slab", "tree-house-bindings", @@ -2844,7 +2831,7 @@ source = "git+https://github.com/helix-editor/tree-house?branch=ropey2#d24f15c08 dependencies = [ "cc", "libloading", - "regex-cursor 0.1.5 (git+https://github.com/cessen/regex-cursor.git?branch=ropey2)", + "regex-cursor", "ropey 2.0.0-alpha.3", "thiserror 2.0.12", ] diff --git a/helix-core/Cargo.toml b/helix-core/Cargo.toml index c2f9d5118..a2e8d465d 100644 --- a/helix-core/Cargo.toml +++ b/helix-core/Cargo.toml @@ -58,7 +58,7 @@ textwrap = "0.16.2" nucleo.workspace = true parking_lot.workspace = true globset = "0.4.16" -regex-cursor = "0.1.5" +regex-cursor.workspace = true [dev-dependencies] quickcheck = { version = "1", default-features = false } diff --git a/helix-core/src/graphemes.rs b/helix-core/src/graphemes.rs index 4cbb57464..10c8fc7df 100644 --- a/helix-core/src/graphemes.rs +++ b/helix-core/src/graphemes.rs @@ -1,8 +1,6 @@ //! Utility functions to traverse the unicode graphemes of a `Rope`'s text contents. //! //! Based on -use ropey::{str_utils::byte_to_char_idx, RopeSlice}; -use unicode_segmentation::{GraphemeCursor, GraphemeIncomplete}; use unicode_width::UnicodeWidthStr; use std::borrow::Cow; @@ -119,129 +117,6 @@ pub fn grapheme_width(g: &str) -> usize { } } -// NOTE: for byte indexing versions of these functions see `RopeSliceExt`'s -// `floor_grapheme_boundary` and `ceil_grapheme_boundary` and the rope grapheme iterators. - -#[must_use] -pub fn nth_prev_grapheme_boundary(slice: RopeSlice, char_idx: usize, n: usize) -> usize { - // Bounds check - debug_assert!(char_idx <= slice.len_chars()); - - // We work with bytes for this, so convert. - let mut byte_idx = slice.char_to_byte(char_idx); - - // Get the chunk with our byte index in it. - let (mut chunk, mut chunk_byte_idx, mut chunk_char_idx, _) = slice.chunk_at_byte(byte_idx); - - // Set up the grapheme cursor. - let mut gc = GraphemeCursor::new(byte_idx, slice.len_bytes(), true); - - // Find the previous grapheme cluster boundary. - for _ in 0..n { - loop { - match gc.prev_boundary(chunk, chunk_byte_idx) { - Ok(None) => return 0, - Ok(Some(n)) => { - byte_idx = n; - break; - } - Err(GraphemeIncomplete::PrevChunk) => { - let (a, b, c, _) = slice.chunk_at_byte(chunk_byte_idx - 1); - chunk = a; - chunk_byte_idx = b; - chunk_char_idx = c; - } - Err(GraphemeIncomplete::PreContext(n)) => { - let ctx_chunk = slice.chunk_at_byte(n - 1).0; - gc.provide_context(ctx_chunk, n - ctx_chunk.len()); - } - _ => unreachable!(), - } - } - } - let tmp = byte_to_char_idx(chunk, byte_idx - chunk_byte_idx); - chunk_char_idx + tmp -} - -/// Finds the previous grapheme boundary before the given char position. -#[must_use] -#[inline(always)] -pub fn prev_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> usize { - nth_prev_grapheme_boundary(slice, char_idx, 1) -} - -#[must_use] -pub fn nth_next_grapheme_boundary(slice: RopeSlice, char_idx: usize, n: usize) -> usize { - // Bounds check - debug_assert!(char_idx <= slice.len_chars()); - - // We work with bytes for this, so convert. - let mut byte_idx = slice.char_to_byte(char_idx); - - // Get the chunk with our byte index in it. - let (mut chunk, mut chunk_byte_idx, mut chunk_char_idx, _) = slice.chunk_at_byte(byte_idx); - - // Set up the grapheme cursor. - let mut gc = GraphemeCursor::new(byte_idx, slice.len_bytes(), true); - - // Find the nth next grapheme cluster boundary. - for _ in 0..n { - loop { - match gc.next_boundary(chunk, chunk_byte_idx) { - Ok(None) => return slice.len_chars(), - Ok(Some(n)) => { - byte_idx = n; - break; - } - Err(GraphemeIncomplete::NextChunk) => { - chunk_byte_idx += chunk.len(); - let (a, _, c, _) = slice.chunk_at_byte(chunk_byte_idx); - chunk = a; - chunk_char_idx = c; - } - Err(GraphemeIncomplete::PreContext(n)) => { - let ctx_chunk = slice.chunk_at_byte(n - 1).0; - gc.provide_context(ctx_chunk, n - ctx_chunk.len()); - } - _ => unreachable!(), - } - } - } - let tmp = byte_to_char_idx(chunk, byte_idx - chunk_byte_idx); - chunk_char_idx + tmp -} - -/// Finds the next grapheme boundary after the given char position. -#[must_use] -#[inline(always)] -pub fn next_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> usize { - nth_next_grapheme_boundary(slice, char_idx, 1) -} - -/// Returns the passed char index if it's already a grapheme boundary, -/// or the next grapheme boundary char index if not. -#[must_use] -#[inline] -pub fn ensure_grapheme_boundary_next(slice: RopeSlice, char_idx: usize) -> usize { - if char_idx == 0 { - char_idx - } else { - next_grapheme_boundary(slice, char_idx - 1) - } -} - -/// Returns the passed char index if it's already a grapheme boundary, -/// or the prev grapheme boundary char index if not. -#[must_use] -#[inline] -pub fn ensure_grapheme_boundary_prev(slice: RopeSlice, char_idx: usize) -> usize { - if char_idx == slice.len_chars() { - char_idx - } else { - prev_grapheme_boundary(slice, char_idx + 1) - } -} - /// A highly compressed Cow<'a, str> that holds /// atmost u31::MAX bytes and is readonly pub struct GraphemeStr<'a> { diff --git a/helix-core/src/lib.rs b/helix-core/src/lib.rs index 09865ca40..a05686ba6 100644 --- a/helix-core/src/lib.rs +++ b/helix-core/src/lib.rs @@ -45,7 +45,7 @@ pub use helix_loader::find_workspace; mod rope_reader; pub use rope_reader::RopeReader; -pub use ropey::{self, str_utils, Rope, RopeBuilder, RopeSlice}; +pub use ropey::{self, Rope, RopeBuilder, RopeSlice}; // pub use tendril::StrTendril as Tendril; pub use smartstring::SmartString; diff --git a/helix-core/src/object.rs b/helix-core/src/object.rs index e0c02d0a9..af16b4799 100644 --- a/helix-core/src/object.rs +++ b/helix-core/src/object.rs @@ -1,12 +1,11 @@ -use crate::{movement::Direction, syntax::TreeCursor, Range, RopeSlice, Selection, Syntax}; +use crate::{movement::Direction, syntax::TreeCursor, Range, Selection, Syntax}; -pub fn expand_selection(syntax: &Syntax, text: RopeSlice, selection: Selection) -> Selection { +pub fn expand_selection(syntax: &Syntax, selection: Selection) -> Selection { let cursor = &mut syntax.walk(); selection.transform(|range| { - let from = text.char_to_byte(range.from()) as u32; - let to = text.char_to_byte(range.to()) as u32; - + let from = range.from() as u32; + let to = range.to() as u32; let byte_range = from..to; cursor.reset_to_byte_range(from, to); @@ -17,17 +16,14 @@ pub fn expand_selection(syntax: &Syntax, text: RopeSlice, selection: Selection) } let node = cursor.node(); - let from = text.byte_to_char(node.start_byte() as usize); - let to = text.byte_to_char(node.end_byte() as usize); - - Range::new(to, from).with_direction(range.direction()) + Range::new(node.start_byte() as usize, node.end_byte() as usize) + .with_direction(range.direction()) }) } -pub fn shrink_selection(syntax: &Syntax, text: RopeSlice, selection: Selection) -> Selection { +pub fn shrink_selection(syntax: &Syntax, selection: Selection) -> Selection { select_node_impl( syntax, - text, selection, |cursor| { cursor.goto_first_child(); @@ -36,10 +32,9 @@ pub fn shrink_selection(syntax: &Syntax, text: RopeSlice, selection: Selection) ) } -pub fn select_next_sibling(syntax: &Syntax, text: RopeSlice, selection: Selection) -> Selection { +pub fn select_next_sibling(syntax: &Syntax, selection: Selection) -> Selection { select_node_impl( syntax, - text, selection, |cursor| { while !cursor.goto_next_sibling() { @@ -52,34 +47,36 @@ pub fn select_next_sibling(syntax: &Syntax, text: RopeSlice, selection: Selectio ) } -pub fn select_all_siblings(syntax: &Syntax, text: RopeSlice, selection: Selection) -> Selection { +pub fn select_all_siblings(syntax: &Syntax, selection: Selection) -> Selection { let mut cursor = syntax.walk(); selection.transform_iter(move |range| { - let (from, to) = range.into_byte_range(text); + let from = range.from(); + let to = range.to(); cursor.reset_to_byte_range(from as u32, to as u32); if !cursor.goto_parent_with(|parent| parent.child_count() > 1) { return vec![range].into_iter(); } - select_children(&mut cursor, text, range).into_iter() + select_children(&mut cursor, range).into_iter() }) } -pub fn select_all_children(syntax: &Syntax, text: RopeSlice, selection: Selection) -> Selection { +pub fn select_all_children(syntax: &Syntax, selection: Selection) -> Selection { let mut cursor = syntax.walk(); selection.transform_iter(move |range| { - let (from, to) = range.into_byte_range(text); + let from = range.from(); + let to = range.to(); cursor.reset_to_byte_range(from as u32, to as u32); - select_children(&mut cursor, text, range).into_iter() + select_children(&mut cursor, range).into_iter() }) } -fn select_children(cursor: &mut TreeCursor, text: RopeSlice, range: Range) -> Vec { +fn select_children(cursor: &mut TreeCursor, range: Range) -> Vec { let children = cursor .children() .filter(|child| child.is_named()) - .map(|child| Range::from_node(child, text, range.direction())) + .map(|child| Range::from_node(child, range.direction())) .collect::>(); if !children.is_empty() { @@ -89,10 +86,9 @@ fn select_children(cursor: &mut TreeCursor, text: RopeSlice, range: Range) -> Ve } } -pub fn select_prev_sibling(syntax: &Syntax, text: RopeSlice, selection: Selection) -> Selection { +pub fn select_prev_sibling(syntax: &Syntax, selection: Selection) -> Selection { select_node_impl( syntax, - text, selection, |cursor| { while !cursor.goto_previous_sibling() { @@ -107,7 +103,6 @@ pub fn select_prev_sibling(syntax: &Syntax, text: RopeSlice, selection: Selectio fn select_node_impl( syntax: &Syntax, - text: RopeSlice, selection: Selection, motion: F, direction: Option, @@ -118,17 +113,12 @@ where let cursor = &mut syntax.walk(); selection.transform(|range| { - let from = text.char_to_byte(range.from()) as u32; - let to = text.char_to_byte(range.to()) as u32; - - cursor.reset_to_byte_range(from, to); + cursor.reset_to_byte_range(range.from() as u32, range.to() as u32); motion(cursor); let node = cursor.node(); - let from = text.byte_to_char(node.start_byte() as usize); - let to = text.byte_to_char(node.end_byte() as usize); - - Range::new(from, to).with_direction(direction.unwrap_or_else(|| range.direction())) + Range::new(node.start_byte() as usize, node.end_byte() as usize) + .with_direction(direction.unwrap_or_else(|| range.direction())) }) } diff --git a/helix-core/src/selection.rs b/helix-core/src/selection.rs index 5bde08e31..7433a89aa 100644 --- a/helix-core/src/selection.rs +++ b/helix-core/src/selection.rs @@ -3,17 +3,11 @@ //! //! All positioning is done via `char` offsets into the buffer. use crate::{ - graphemes::{ - ensure_grapheme_boundary_next, ensure_grapheme_boundary_prev, next_grapheme_boundary, - prev_grapheme_boundary, - }, - line_ending::get_line_ending, - movement::Direction, - tree_sitter::Node, - Assoc, ChangeSet, RopeSlice, + line_ending::get_line_ending, movement::Direction, tree_sitter::Node, Assoc, ChangeSet, + RopeSlice, }; -use helix_stdx::range::is_subset; use helix_stdx::rope::{self, RopeSliceExt}; +use helix_stdx::{range::is_subset, rope::LINE_TYPE}; use smallvec::{smallvec, SmallVec}; use std::{borrow::Cow, iter, slice}; @@ -25,9 +19,9 @@ use std::{borrow::Cow, iter, slice}; /// can be in any order, or even share the same position. /// /// The anchor and head positions use gap indexing, meaning -/// that their indices represent the gaps *between* `char`s -/// rather than the `char`s themselves. For example, 1 -/// represents the position between the first and second `char`. +/// that their indices represent the gaps *between* bytes +/// rather than the bytes themselves. For example, 1 +/// represents the position between the first and second byte. /// /// Below are some examples of `Range` configurations. /// The anchor and head indices are shown as "(anchor, head)" @@ -75,10 +69,9 @@ impl Range { Self::new(head, head) } - pub fn from_node(node: Node, text: RopeSlice, direction: Direction) -> Self { - let from = text.byte_to_char(node.start_byte() as usize); - let to = text.byte_to_char(node.end_byte() as usize); - Range::new(from, to).with_direction(direction) + pub fn from_node(node: Node, direction: Direction) -> Self { + let range = node.byte_range(); + Range::new(range.start as usize, range.end as usize).with_direction(direction) } /// Start of the range. @@ -110,10 +103,13 @@ impl Range { let to = if self.is_empty() { self.to() } else { - prev_grapheme_boundary(text, self.to()).max(from) + text.prev_grapheme_boundary(self.to()).max(from) }; - (text.char_to_line(from), text.char_to_line(to)) + ( + text.byte_to_line_idx(from, LINE_TYPE), + text.byte_to_line_idx(to, LINE_TYPE), + ) } /// `true` when head and anchor are at the same position. @@ -277,16 +273,16 @@ impl Range { use std::cmp::Ordering; let (new_anchor, new_head) = match self.anchor.cmp(&self.head) { Ordering::Equal => { - let pos = ensure_grapheme_boundary_prev(slice, self.anchor); + let pos = slice.floor_grapheme_boundary(self.anchor); (pos, pos) } Ordering::Less => ( - ensure_grapheme_boundary_prev(slice, self.anchor), - ensure_grapheme_boundary_next(slice, self.head), + slice.floor_char_boundary(self.anchor), + slice.ceil_char_boundary(self.head), ), Ordering::Greater => ( - ensure_grapheme_boundary_next(slice, self.anchor), - ensure_grapheme_boundary_prev(slice, self.head), + slice.ceil_char_boundary(self.anchor), + slice.floor_char_boundary(self.head), ), }; Range { @@ -318,7 +314,7 @@ impl Range { if self.anchor == self.head { Range { anchor: self.anchor, - head: next_grapheme_boundary(slice, self.head), + head: slice.next_grapheme_boundary(self.head), old_visual_position: self.old_visual_position, } } else { @@ -334,39 +330,39 @@ impl Range { #[inline] pub fn cursor(self, text: RopeSlice) -> usize { if self.head > self.anchor { - prev_grapheme_boundary(text, self.head) + text.prev_grapheme_boundary(self.head) } else { self.head } } - /// Puts the left side of the block cursor at `char_idx`, optionally extending. + /// Puts the left side of the block cursor at `byte_idx`, optionally extending. /// /// This follows "1-width" semantics, and therefore does a combination of anchor /// and head moves to behave as if both the front and back of the range are 1-width /// blocks /// - /// This method assumes that the range and `char_idx` are already properly + /// This method assumes that the range and `byte_idx` are already properly /// grapheme-aligned. #[must_use] #[inline] - pub fn put_cursor(self, text: RopeSlice, char_idx: usize, extend: bool) -> Range { + pub fn put_cursor(self, text: RopeSlice, byte_idx: usize, extend: bool) -> Range { if extend { - let anchor = if self.head >= self.anchor && char_idx < self.anchor { - next_grapheme_boundary(text, self.anchor) - } else if self.head < self.anchor && char_idx >= self.anchor { - prev_grapheme_boundary(text, self.anchor) + let anchor = if self.head >= self.anchor && byte_idx < self.anchor { + text.next_grapheme_boundary(self.anchor) + } else if self.head < self.anchor && byte_idx >= self.anchor { + text.prev_grapheme_boundary(self.anchor) } else { self.anchor }; - if anchor <= char_idx { - Range::new(anchor, next_grapheme_boundary(text, char_idx)) + if anchor <= byte_idx { + Range::new(anchor, text.next_grapheme_boundary(byte_idx)) } else { - Range::new(anchor, char_idx) + Range::new(anchor, byte_idx) } } else { - Range::point(char_idx) + Range::point(byte_idx) } } @@ -374,7 +370,7 @@ impl Range { #[inline] #[must_use] pub fn cursor_line(&self, text: RopeSlice) -> usize { - text.char_to_line(self.cursor(text)) + text.byte_to_line_idx(self.cursor(text), LINE_TYPE) } /// Returns true if this Range covers a single grapheme in the given text @@ -384,12 +380,6 @@ impl Range { let second = graphemes.next(); first.is_some() && second.is_none() } - - /// Converts this char range into an in order byte range, discarding - /// direction. - pub fn into_byte_range(&self, text: RopeSlice) -> (usize, usize) { - (text.char_to_byte(self.from()), text.char_to_byte(self.to())) - } } impl From<(usize, usize)> for Range { @@ -772,7 +762,9 @@ pub fn keep_or_remove_matches( ) -> Option { let result: SmallVec<_> = selection .iter() - .filter(|range| regex.is_match(text.regex_input_at(range.from()..range.to())) ^ remove) + .filter(|range| { + regex.is_match(text.regex_input_at_bytes(range.from()..range.to())) ^ remove + }) .copied() .collect(); @@ -792,13 +784,10 @@ pub fn select_on_matches( let mut result = SmallVec::with_capacity(selection.len()); for sel in selection { - for mat in regex.find_iter(text.regex_input_at(sel.from()..sel.to())) { + for mat in regex.find_iter(text.regex_input_at_bytes(sel.from()..sel.to())) { // TODO: retain range direction - let start = text.byte_to_char(mat.start()); - let end = text.byte_to_char(mat.end()); - - let range = Range::new(start, end); + let range = Range::new(mat.start(), mat.end()); // Make sure the match is not right outside of the selection. // These invalid matches can come from using RegEx anchors like `^`, `$` if range != Range::point(sel.to()) { @@ -830,7 +819,7 @@ pub fn split_on_newline(text: RopeSlice, selection: &Selection) -> Selection { let mut start = sel_start; - for line in sel.slice(text).lines() { + for line in sel.slice(text).lines(LINE_TYPE) { let Some(line_ending) = get_line_ending(&line) else { break; }; @@ -863,11 +852,11 @@ pub fn split_on_matches(text: RopeSlice, selection: &Selection, regex: &rope::Re let sel_end = sel.to(); let mut start = sel_start; - for mat in regex.find_iter(text.regex_input_at(sel_start..sel_end)) { + for mat in regex.find_iter(text.regex_input_at_bytes(sel_start..sel_end)) { // TODO: retain range direction - let end = text.byte_to_char(mat.start()); + let end = mat.start(); result.push(Range::new(start, end)); - start = text.byte_to_char(mat.end()); + start = mat.end(); } if start < sel_end { diff --git a/helix-core/src/snippets/elaborate.rs b/helix-core/src/snippets/elaborate.rs index 012d1db77..09eee7e51 100644 --- a/helix-core/src/snippets/elaborate.rs +++ b/helix-core/src/snippets/elaborate.rs @@ -325,14 +325,14 @@ impl Transform { let mut buf = Tendril::new(); let it = self .regex - .captures_iter(doc.regex_input_at(range)) + .captures_iter(doc.regex_input_at_bytes(range)) .enumerate(); doc = doc.slice(range); let mut last_match = 0; for (_, cap) in it { // unwrap on 0 is OK because captures only reports matches let m = cap.get_group(0).unwrap(); - buf.extend(doc.byte_slice(last_match..m.start).chunks()); + buf.extend(doc.slice(last_match..m.start).chunks()); last_match = m.end; for fmt in &*self.replacement { match *fmt { @@ -341,12 +341,12 @@ impl Transform { } FormatItem::Capture(i) => { if let Some(cap) = cap.get_group(i) { - buf.extend(doc.byte_slice(cap.range()).chunks()); + buf.extend(doc.slice(cap.range()).chunks()); } } FormatItem::CaseChange(i, change) => { if let Some(cap) = cap.get_group(i).filter(|i| !i.is_empty()) { - let mut chars = doc.byte_slice(cap.range()).chars(); + let mut chars = doc.slice(cap.range()).chars(); match change { CaseChange::Upcase => to_upper_case_with(chars, &mut buf), CaseChange::Downcase => to_lower_case_with(chars, &mut buf), @@ -373,7 +373,7 @@ impl Transform { break; } } - buf.extend(doc.byte_slice(last_match..).chunks()); + buf.extend(doc.slice(last_match..).chunks()); buf } } diff --git a/helix-stdx/src/rope.rs b/helix-stdx/src/rope.rs index 78eb66292..e4dedd186 100644 --- a/helix-stdx/src/rope.rs +++ b/helix-stdx/src/rope.rs @@ -6,6 +6,8 @@ use regex_cursor::Input as RegexInput; use ropey::{ChunkCursor, RopeSlice}; use unicode_segmentation::{GraphemeCursor, GraphemeIncomplete}; +pub const LINE_TYPE: ropey::LineType = ropey::LineType::LF_CR; + pub trait RopeSliceExt<'a>: Sized { fn ends_with(self, text: &str) -> bool; fn starts_with(self, text: &str) -> bool; @@ -39,6 +41,10 @@ pub trait RopeSliceExt<'a>: Sized { /// assert_eq!(text.floor_grapheme_boundary(2), 2); /// ``` fn floor_grapheme_boundary(self, byte_idx: usize) -> usize; + fn prev_grapheme_boundary(self, byte_idx: usize) -> usize { + self.nth_prev_grapheme_boundary(byte_idx, 1) + } + fn nth_prev_grapheme_boundary(self, byte_idx: usize, n: usize) -> usize; /// Finds the closest byte index not exceeding `byte_idx` which lies on a grapheme cluster /// boundary. /// @@ -60,6 +66,10 @@ pub trait RopeSliceExt<'a>: Sized { /// assert_eq!(text.ceil_grapheme_boundary(2), 2); /// ``` fn ceil_grapheme_boundary(self, byte_idx: usize) -> usize; + fn next_grapheme_boundary(self, byte_idx: usize) -> usize { + self.nth_next_grapheme_boundary(byte_idx, 1) + } + fn nth_next_grapheme_boundary(self, byte_idx: usize, n: usize) -> usize; /// Checks whether the `byte_idx` lies on a grapheme cluster boundary. /// /// # Example @@ -185,6 +195,31 @@ impl<'a> RopeSliceExt<'a> for RopeSlice<'a> { } } + fn nth_prev_grapheme_boundary(self, mut byte_idx: usize, n: usize) -> usize { + byte_idx = self.floor_char_boundary(byte_idx); + + let mut chunk_cursor = self.chunk_cursor_at(byte_idx); + let mut cursor = GraphemeCursor::new(byte_idx, self.len(), true); + for _ in 0..n { + loop { + match cursor.prev_boundary(chunk_cursor.chunk(), chunk_cursor.byte_offset()) { + Ok(None) => return 0, + Ok(Some(boundary)) => { + byte_idx = boundary; + break; + } + Err(GraphemeIncomplete::PrevChunk) => assert!(chunk_cursor.prev()), + Err(GraphemeIncomplete::PreContext(n)) => { + let ctx_chunk = self.chunk(n - 1).0; + cursor.provide_context(ctx_chunk, n - ctx_chunk.len()); + } + _ => unreachable!(), + } + } + } + byte_idx + } + fn ceil_grapheme_boundary(self, mut byte_idx: usize) -> usize { if byte_idx >= self.len() { return self.len(); @@ -212,6 +247,31 @@ impl<'a> RopeSliceExt<'a> for RopeSlice<'a> { } } + fn nth_next_grapheme_boundary(self, mut byte_idx: usize, n: usize) -> usize { + byte_idx = self.ceil_char_boundary(byte_idx); + + let mut chunk_cursor = self.chunk_cursor_at(byte_idx); + let mut cursor = GraphemeCursor::new(byte_idx, self.len(), true); + for _ in 0..n { + loop { + match cursor.prev_boundary(chunk_cursor.chunk(), chunk_cursor.byte_offset()) { + Ok(None) => return 0, + Ok(Some(boundary)) => { + byte_idx = boundary; + break; + } + Err(GraphemeIncomplete::NextChunk) => assert!(chunk_cursor.next()), + Err(GraphemeIncomplete::PreContext(n)) => { + let ctx_chunk = self.chunk(n - 1).0; + cursor.provide_context(ctx_chunk, n - ctx_chunk.len()); + } + _ => unreachable!(), + } + } + } + byte_idx + } + fn is_grapheme_boundary(self, byte_idx: usize) -> bool { // The byte must lie on a character boundary to lie on a grapheme cluster boundary. if !self.is_char_boundary(byte_idx) {