From d3fb8fc9b83a928c86c99a764ebe0a61563805ab Mon Sep 17 00:00:00 2001 From: Michael Davis Date: Thu, 19 Jun 2025 10:27:31 -0400 Subject: [PATCH] Fix prompt truncation for non-ASCII lines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The prompt was previously assuming that each grapheme cluster in the line was single-width and single-byte. Lines like the one in the new integration test would cause panics because the anchor attempted to slice into a character. This change rewrites the anchor and truncation code in the prompt to account for Unicode segmentation and width. Now multi-width graphemes can be hidden by multiple consecutive elipses - for example "十" is hidden by "……" (2-width). Co-authored-by: Narazaki, Shuji --- helix-term/src/ui/prompt.rs | 66 ++++++++++++++++++++++++------- helix-term/tests/test/commands.rs | 22 +++++++++++ helix-tui/src/buffer.rs | 47 +++++++++++----------- 3 files changed, 97 insertions(+), 38 deletions(-) diff --git a/helix-term/src/ui/prompt.rs b/helix-term/src/ui/prompt.rs index ee5c46e76..ff4ca1fcc 100644 --- a/helix-term/src/ui/prompt.rs +++ b/helix-term/src/ui/prompt.rs @@ -12,7 +12,9 @@ use tui::text::Span; use tui::widgets::{Block, Widget}; use helix_core::{ - unicode::segmentation::GraphemeCursor, unicode::width::UnicodeWidthStr, Position, + unicode::segmentation::{GraphemeCursor, UnicodeSegmentation}, + unicode::width::UnicodeWidthStr, + Position, }; use helix_view::{ graphics::{CursorKind, Margin, Rect}, @@ -535,21 +537,51 @@ impl Prompt { .into(); text.render(self.line_area, surface, cx); } else { - if self.line.len() < self.line_area.width as usize { + let line_width = self.line_area.width as usize; + + if self.line.width() < line_width { self.anchor = 0; - } else if self.cursor < self.anchor { - self.anchor = self.cursor; - } else if self.cursor - self.anchor > self.line_area.width as usize { - self.anchor = self.cursor - self.line_area.width as usize; + } else if self.cursor <= self.anchor { + // Ensure the grapheme under the cursor is in view. + self.anchor = self.line[..self.cursor] + .grapheme_indices(true) + .next_back() + .map(|(i, _)| i) + .unwrap_or_default(); + } else if self.line[self.anchor..self.cursor].width() > line_width { + // Set the anchor to the last grapheme cluster before the width is exceeded. + let mut width = 0; + self.anchor = self.line[..self.cursor] + .grapheme_indices(true) + .rev() + .find_map(|(idx, g)| { + width += g.width(); + if width > line_width { + Some(idx + g.len()) + } else { + None + } + }) + .unwrap(); } self.truncate_start = self.anchor > 0; - self.truncate_end = self.line.len() - self.anchor > self.line_area.width as usize; + self.truncate_end = self.line[self.anchor..].width() > line_width; // if we keep inserting characters just before the end elipsis, we move the anchor // so that those new characters are displayed - if self.truncate_end && self.cursor - self.anchor >= self.line_area.width as usize { - self.anchor += 1; + if self.truncate_end && self.line[self.anchor..self.cursor].width() >= line_width { + // Move the anchor forward by one non-zero-width grapheme. + self.anchor += self.line[self.anchor..] + .grapheme_indices(true) + .find_map(|(idx, g)| { + if g.width() > 0 { + Some(idx + g.len()) + } else { + None + } + }) + .unwrap(); } surface.set_string_anchored( @@ -558,7 +590,7 @@ impl Prompt { self.truncate_start, self.truncate_end, &self.line.as_str()[self.anchor..], - self.line_area.width as usize - self.truncate_end as usize, + line_width, |_| prompt_color, ); } @@ -734,17 +766,21 @@ impl Component for Prompt { .clip_left(self.prompt.len() as u16) .clip_right(if self.prompt.is_empty() { 2 } else { 0 }); - let anchor = self.anchor.min(self.line.len().saturating_sub(1)); - let mut col = area.left() as usize - + UnicodeWidthStr::width(&self.line[anchor..self.cursor.max(anchor)]); + let mut col = area.left() as usize + self.line[self.anchor..self.cursor].width(); // ensure the cursor does not go beyond elipses - if self.truncate_end && self.cursor - self.anchor >= self.line_area.width as usize { + if self.truncate_end + && self.line[self.anchor..self.cursor].width() >= self.line_area.width as usize + { col -= 1; } if self.truncate_start && self.cursor == self.anchor { - col += 1; + col += self.line[self.cursor..] + .graphemes(true) + .next() + .unwrap() + .width(); } let line = area.height as usize - 1; diff --git a/helix-term/tests/test/commands.rs b/helix-term/tests/test/commands.rs index 29f76cfb8..20e8ac9a7 100644 --- a/helix-term/tests/test/commands.rs +++ b/helix-term/tests/test/commands.rs @@ -820,3 +820,25 @@ async fn macro_play_within_macro_record() -> anyhow::Result<()> { Ok(()) } + +#[tokio::test(flavor = "multi_thread")] +async fn global_search_with_multibyte_chars() -> anyhow::Result<()> { + // Assert that `helix_term::commands::global_search` handles multibyte characters correctly. + test(( + indoc! {"\ + // Hello world! + // #[| + ]# + "}, + // start global search + " /«十分に長い マルチバイトキャラクター列» で検索", + indoc! {"\ + // Hello world! + // #[| + ]# + "}, + )) + .await?; + + Ok(()) +} diff --git a/helix-tui/src/buffer.rs b/helix-tui/src/buffer.rs index bfcf35ac5..2cd30324b 100644 --- a/helix-tui/src/buffer.rs +++ b/helix-tui/src/buffer.rs @@ -326,43 +326,44 @@ impl Buffer { return (x, y); } - let max_offset = min( - self.area.right() as usize - 1, - width.saturating_add(x as usize), - ); - let mut start_index = self.index_of(x, y); - let mut end_index = self.index_of(max_offset as u16, y); - - if truncate_end { - self.content[end_index].set_symbol("…"); - end_index -= 1; - } + let mut index = self.index_of(x, y); + let mut rendered_width = 0; + let mut graphemes = string.grapheme_indices(true); if truncate_start { - self.content[start_index].set_symbol("…"); - start_index += 1; + for _ in 0..graphemes.next().map(|(_, g)| g.width()).unwrap_or_default() { + self.content[index].set_symbol("…"); + index += 1; + rendered_width += 1; + } } - let graphemes = string.grapheme_indices(true); - - for (byte_offset, s) in graphemes.skip(truncate_start as usize) { - if start_index > end_index { + for (byte_offset, s) in graphemes { + let grapheme_width = s.width(); + if truncate_end && rendered_width + grapheme_width >= width { break; } - let width = s.width(); - if width == 0 { + if grapheme_width == 0 { continue; } - self.content[start_index].set_symbol(s); - self.content[start_index].set_style(style(byte_offset)); + self.content[index].set_symbol(s); + self.content[index].set_style(style(byte_offset)); // Reset following cells if multi-width (they would be hidden by the grapheme): - for i in start_index + 1..start_index + width { + for i in index + 1..index + grapheme_width { self.content[i].reset(); } - start_index += width; + index += grapheme_width; + rendered_width += grapheme_width; + } + + if truncate_end { + for _ in 0..width.saturating_sub(rendered_width) { + self.content[index].set_symbol("…"); + index += 1; + } } (x, y)