[ruby/json] Fix a regression in parsing of unicode surogate pairs

Fix: https://github.com/ruby/json/issues/912

In the case of surogate pairs we consume two backslashes, so
`json_next_backslash` need to ensure it's not sending us back in the
stream.

https://github.com/ruby/json/commit/0fce370c41
This commit is contained in:
Jean Boussier 2025-12-04 09:06:10 +01:00 committed by git
parent 932762f294
commit d58a45d32f
2 changed files with 10 additions and 1 deletions

View File

@ -651,7 +651,9 @@ static inline const char *json_next_backslash(const char *pe, const char *string
positions->size--;
const char *next_position = positions->positions[0];
positions->positions++;
return next_position;
if (next_position >= pe) {
return next_position;
}
}
if (positions->has_more) {

View File

@ -325,6 +325,13 @@ class JSONParserTest < Test::Unit::TestCase
assert_raise(JSON::ParserError) { parse('"\u111___"') }
end
def test_unicode_followed_by_newline
# Ref: https://github.com/ruby/json/issues/912
assert_equal "🌌\n".bytes, JSON.parse('"\ud83c\udf0c\n"').bytes
assert_equal "🌌\n", JSON.parse('"\ud83c\udf0c\n"')
assert_predicate JSON.parse('"\ud83c\udf0c\n"'), :valid_encoding?
end
def test_invalid_surogates
assert_raise(JSON::ParserError) { parse('"\\uD800"') }
assert_raise(JSON::ParserError) { parse('"\\uD800_________________"') }