From d781d69a06e7d4eef3334e44a25b02d05bad1e2d Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 12 Sep 2025 21:13:40 -0400 Subject: [PATCH] Fix prism error messages with multibyte truncation When a line is going to be displayed in an error message that contains multibyte characters, we need to respect the encoding of the source and truncate only at a character boundary, as opposed to a raw byte boundary. Fixes [Bug #21528] --- prism_compile.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/prism_compile.c b/prism_compile.c index 9a6197f7cb..578e6f240f 100644 --- a/prism_compile.c +++ b/prism_compile.c @@ -10627,7 +10627,26 @@ pm_parse_errors_format_line(const pm_parser_t *parser, const pm_newline_list_t * // Here we determine if we should truncate the end of the line. bool truncate_end = false; if ((column_end != 0) && ((end - (start + column_end)) >= PM_ERROR_TRUNCATE)) { - end = start + column_end + PM_ERROR_TRUNCATE; + const uint8_t *end_candidate = start + column_end + PM_ERROR_TRUNCATE; + + for (const uint8_t *ptr = start; ptr < end_candidate;) { + size_t char_width = parser->encoding->char_width(ptr, parser->end - ptr); + + // If we failed to decode a character, then just bail out and + // truncate at the fixed width. + if (char_width == 0) break; + + // If this next character would go past the end candidate, + // then we need to truncate before it. + if (ptr + char_width > end_candidate) { + end_candidate = ptr; + break; + } + + ptr += char_width; + } + + end = end_candidate; truncate_end = true; }