[ruby/json] Fix a regression in parsing of unicode surogate pairs

Fix: https://github.com/ruby/json/issues/912 In the case of surogate pairs we consume two backslashes, so `json_next_backslash` need to ensure it's not sending us back in the stream. https://github.com/ruby/json/commit/0fce370c41
2026-01-27 04:24:23 +00:00 · 2025-12-04 09:06:10 +01:00 · 2025-12-04 09:06:10 +01:00 · d58a45d32f
commit d58a45d32f
parent 932762f294
2 changed files with 10 additions and 1 deletions
--- a/ext/json/parser/parser.c
+++ b/ext/json/parser/parser.c
@ -651,7 +651,9 @@ static inline const char *json_next_backslash(const char *pe, const char *string
        positions->size--;
        const char *next_position = positions->positions[0];
        positions->positions++;
-        return next_position;
+        if (next_position >= pe) {
+            return next_position;
+        }
    }

    if (positions->has_more) {
--- a/test/json/json_parser_test.rb
+++ b/test/json/json_parser_test.rb
@ -325,6 +325,13 @@ class JSONParserTest < Test::Unit::TestCase
    assert_raise(JSON::ParserError) { parse('"\u111___"') }
  end

+  def test_unicode_followed_by_newline
+    # Ref: https://github.com/ruby/json/issues/912
+    assert_equal "🌌\n".bytes, JSON.parse('"\ud83c\udf0c\n"').bytes
+    assert_equal "🌌\n", JSON.parse('"\ud83c\udf0c\n"')
+    assert_predicate JSON.parse('"\ud83c\udf0c\n"'), :valid_encoding?
+  end
+
  def test_invalid_surogates
    assert_raise(JSON::ParserError) { parse('"\\uD800"') }
    assert_raise(JSON::ParserError) { parse('"\\uD800_________________"') }