Fix prompt truncation for non-ASCII lines

The prompt was previously assuming that each grapheme cluster in the
line was single-width and single-byte. Lines like the one in the new
integration test would cause panics because the anchor attempted to
slice into a character.

This change rewrites the anchor and truncation code in the prompt to
account for Unicode segmentation and width. Now multi-width graphemes
can be hidden by multiple consecutive elipses - for example "十" is
hidden by "……" (2-width).

Co-authored-by: Narazaki, Shuji <shujinarazaki@protonmail.com>
pull/12229/merge
Michael Davis 2025-06-19 10:27:31 -04:00
parent 684e108fd0
commit d3fb8fc9b8
No known key found for this signature in database
3 changed files with 97 additions and 38 deletions

View File

@ -12,7 +12,9 @@ use tui::text::Span;
use tui::widgets::{Block, Widget};
use helix_core::{
unicode::segmentation::GraphemeCursor, unicode::width::UnicodeWidthStr, Position,
unicode::segmentation::{GraphemeCursor, UnicodeSegmentation},
unicode::width::UnicodeWidthStr,
Position,
};
use helix_view::{
graphics::{CursorKind, Margin, Rect},
@ -535,21 +537,51 @@ impl Prompt {
.into();
text.render(self.line_area, surface, cx);
} else {
if self.line.len() < self.line_area.width as usize {
let line_width = self.line_area.width as usize;
if self.line.width() < line_width {
self.anchor = 0;
} else if self.cursor < self.anchor {
self.anchor = self.cursor;
} else if self.cursor - self.anchor > self.line_area.width as usize {
self.anchor = self.cursor - self.line_area.width as usize;
} else if self.cursor <= self.anchor {
// Ensure the grapheme under the cursor is in view.
self.anchor = self.line[..self.cursor]
.grapheme_indices(true)
.next_back()
.map(|(i, _)| i)
.unwrap_or_default();
} else if self.line[self.anchor..self.cursor].width() > line_width {
// Set the anchor to the last grapheme cluster before the width is exceeded.
let mut width = 0;
self.anchor = self.line[..self.cursor]
.grapheme_indices(true)
.rev()
.find_map(|(idx, g)| {
width += g.width();
if width > line_width {
Some(idx + g.len())
} else {
None
}
})
.unwrap();
}
self.truncate_start = self.anchor > 0;
self.truncate_end = self.line.len() - self.anchor > self.line_area.width as usize;
self.truncate_end = self.line[self.anchor..].width() > line_width;
// if we keep inserting characters just before the end elipsis, we move the anchor
// so that those new characters are displayed
if self.truncate_end && self.cursor - self.anchor >= self.line_area.width as usize {
self.anchor += 1;
if self.truncate_end && self.line[self.anchor..self.cursor].width() >= line_width {
// Move the anchor forward by one non-zero-width grapheme.
self.anchor += self.line[self.anchor..]
.grapheme_indices(true)
.find_map(|(idx, g)| {
if g.width() > 0 {
Some(idx + g.len())
} else {
None
}
})
.unwrap();
}
surface.set_string_anchored(
@ -558,7 +590,7 @@ impl Prompt {
self.truncate_start,
self.truncate_end,
&self.line.as_str()[self.anchor..],
self.line_area.width as usize - self.truncate_end as usize,
line_width,
|_| prompt_color,
);
}
@ -734,17 +766,21 @@ impl Component for Prompt {
.clip_left(self.prompt.len() as u16)
.clip_right(if self.prompt.is_empty() { 2 } else { 0 });
let anchor = self.anchor.min(self.line.len().saturating_sub(1));
let mut col = area.left() as usize
+ UnicodeWidthStr::width(&self.line[anchor..self.cursor.max(anchor)]);
let mut col = area.left() as usize + self.line[self.anchor..self.cursor].width();
// ensure the cursor does not go beyond elipses
if self.truncate_end && self.cursor - self.anchor >= self.line_area.width as usize {
if self.truncate_end
&& self.line[self.anchor..self.cursor].width() >= self.line_area.width as usize
{
col -= 1;
}
if self.truncate_start && self.cursor == self.anchor {
col += 1;
col += self.line[self.cursor..]
.graphemes(true)
.next()
.unwrap()
.width();
}
let line = area.height as usize - 1;

View File

@ -820,3 +820,25 @@ async fn macro_play_within_macro_record() -> anyhow::Result<()> {
Ok(())
}
#[tokio::test(flavor = "multi_thread")]
async fn global_search_with_multibyte_chars() -> anyhow::Result<()> {
// Assert that `helix_term::commands::global_search` handles multibyte characters correctly.
test((
indoc! {"\
// Hello world!
// #[|
]#
"},
// start global search
" /«十分に長い マルチバイトキャラクター列» で検索<ret><esc>",
indoc! {"\
// Hello world!
// #[|
]#
"},
))
.await?;
Ok(())
}

View File

@ -326,43 +326,44 @@ impl Buffer {
return (x, y);
}
let max_offset = min(
self.area.right() as usize - 1,
width.saturating_add(x as usize),
);
let mut start_index = self.index_of(x, y);
let mut end_index = self.index_of(max_offset as u16, y);
if truncate_end {
self.content[end_index].set_symbol("");
end_index -= 1;
}
let mut index = self.index_of(x, y);
let mut rendered_width = 0;
let mut graphemes = string.grapheme_indices(true);
if truncate_start {
self.content[start_index].set_symbol("");
start_index += 1;
for _ in 0..graphemes.next().map(|(_, g)| g.width()).unwrap_or_default() {
self.content[index].set_symbol("");
index += 1;
rendered_width += 1;
}
}
let graphemes = string.grapheme_indices(true);
for (byte_offset, s) in graphemes.skip(truncate_start as usize) {
if start_index > end_index {
for (byte_offset, s) in graphemes {
let grapheme_width = s.width();
if truncate_end && rendered_width + grapheme_width >= width {
break;
}
let width = s.width();
if width == 0 {
if grapheme_width == 0 {
continue;
}
self.content[start_index].set_symbol(s);
self.content[start_index].set_style(style(byte_offset));
self.content[index].set_symbol(s);
self.content[index].set_style(style(byte_offset));
// Reset following cells if multi-width (they would be hidden by the grapheme):
for i in start_index + 1..start_index + width {
for i in index + 1..index + grapheme_width {
self.content[i].reset();
}
start_index += width;
index += grapheme_width;
rendered_width += grapheme_width;
}
if truncate_end {
for _ in 0..width.saturating_sub(rendered_width) {
self.content[index].set_symbol("");
index += 1;
}
}
(x, y)