[DOC] Tweaks for String#dump and String#undump

Merged: https://github.com/ruby/ruby/pull/15180 Merged-By: peterzhu2118 <peter@peterzhu.ca>
2026-01-26 12:14:51 +00:00 · 2025-11-18 20:56:14 -06:00 · 2025-11-18 20:56:14 -06:00 · 319001192d · 2025-11-19 02:56:41 +00:00
commit 319001192d
parent 3ee08c8df8
3 changed files with 88 additions and 50 deletions
--- a/doc/string.rb
+++ b/doc/string.rb
@ -322,8 +322,7 @@
 # _Substitution_
 #
 # - #dump: Returns a printable version of +self+, enclosed in double-quotes.
-# - #undump: Returns a copy of +self+ with all <tt>\xNN</tt> notations replaced by <tt>\uNNNN</tt> notations
-#   and all escaped characters unescaped.
+# - #undump: Inverse of #dump; returns a copy of +self+ with changes of the kinds made by #dump "undone."
 # - #sub: Returns a copy of +self+ with the first substring matching a given pattern
 #   replaced with a given replacement string.
 # - #gsub: Returns a copy of +self+ with each substring that matches a given pattern
--- a/doc/string/dump.rdoc
+++ b/doc/string/dump.rdoc
@ -1,52 +1,97 @@
-Returns a printable version of +self+, enclosed in double-quotes:
+For an ordinary string, this method, +String#dump+,
+returns a printable ASCII-only version of +self+, enclosed in double-quotes.

-  'hello'.dump     # => "\"hello\""
+For a dumped string, method String#undump is the inverse of +String#dump+;
+it returns a "restored" version of +self+,
+where all the dumping changes have been undone.

-Certain special characters are rendered with escapes:
+In the simplest case, the dumped string contains the original string,
+enclosed in double-quotes;
+this example is done in +irb+ (interactive Ruby), which uses method `inspect` to render the results:

-  '"'.dump  # => "\"\\\"\""
-  '\\'.dump # => "\"\\\\\""
+  s = 'hello'   # => "hello"
+  s.dump        # => "\"hello\""
+  s.dump.undump # => "hello"

-Non-printing characters are rendered with escapes:
+Keep in mind that in the second line above:
+
+- The outer double-quotes are put on by +inspect+,
+  and _are_ _not_ part of the output of #dump.
+- The inner double-quotes _are_ part of the output of +dump+,
+  and are escaped by +inspect+ because they are within the outer double-quotes.
+
+To avoid confusion, we'll use this helper method to omit the outer double-quotes:
+
+  def dump(s)
+    print "String:   ", s, "\n"
+    print "Dumped:   ", s.dump, "\n"
+    print "Undumped: ", s.dump.undump, "\n"
+  end
+
+So that for string <tt>'hello'</tt>, we'll see:
+
+  String:    hello
+  Dumped:    "hello"
+  Undumped:  hello
+
+In a dump, certain special characters are escaped:
+
+  String:    "
+  Dumped:    "\""
+  Undumped:  "
+
+  String:    \
+  Dumped:    "\\"
+  Undumped:  \
+
+In a dump, unprintable characters are replaced by printable ones;
+the unprintable characters are the whitespace characters (other than space itself);
+here we see the ordinals for those characers, together with explanatory text:
+
+  h = {
+     7 => 'Alert (BEL)',
+     8 => 'Backspace (BS)',
+     9 => 'Horizontal tab (HT)',
+    10 => 'Linefeed (LF)',
+    11 => 'Vertical tab (VT)',
+    12 => 'Formfeed (FF)',
+    13 => 'Carriage return (CR)'
+  }
+
+In this example, the dumped output is printed by method #inspect,
+and so contains both outer double-quotes and escaped inner double-quotes:

  s = ''
-  s << 7   # Alarm (bell).
-  s << 8   # Back space.
-  s << 9   # Horizontal tab.
-  s << 10  # Line feed.
-  s << 11  # Vertical tab.
-  s << 12  # Form feed.
-  s << 13  # Carriage return.
-  s        # => "\a\b\t\n\v\f\r"
-  s.dump   # => "\"\\a\\b\\t\\n\\v\\f\\r\""
+  h.keys.each {|i| s << i } # => [7, 8, 9, 10, 11, 12, 13]
+  s                         # => "\a\b\t\n\v\f\r"
+  s.dump                    # => "\"\\a\\b\\t\\n\\v\\f\\r\""

-If +self+ is encoded in UTF-8 and contains Unicode characters, renders Unicode
-characters in Unicode escape sequence:
+If +self+ is encoded in UTF-8 and contains Unicode characters,
+each Unicode character is dumped as a Unicode escape sequence:

-  'тест'.dump     # => "\"\\u0442\\u0435\\u0441\\u0442\""
-  'こんにちは'.dump # => "\"\\u3053\\u3093\\u306B\\u3061\\u306F\""
+  String:    тест
+  Dumped:    "\u0442\u0435\u0441\u0442"
+  Undumped:  тест

-If the encoding of +self+ is not ASCII-compatible (i.e., +self.encoding.ascii_compatible?+
-returns +false+), renders all ASCII-compatible bytes as ASCII characters and all
-other bytes as hexadecimal. Appends <tt>.dup.force_encoding(\"encoding\")</tt>, where
-<tt><encoding></tt> is +self.encoding.name+:
+  String:    こんにちは
+  Dumped:    "\u3053\u3093\u306B\u3061\u306F"
+  Undumped:  こんにちは

-  s = 'hello'
-  s.encoding                # => #<Encoding:UTF-8>
-  s.dump                    # => "\"hello\""
-  s.encode('utf-16').dump   # => "\"\\xFE\\xFF\\x00h\\x00e\\x00l\\x00l\\x00o\".dup.force_encoding(\"UTF-16\")"
-  s.encode('utf-16le').dump # => "\"h\\x00e\\x00l\\x00l\\x00o\\x00\".dup.force_encoding(\"UTF-16LE\")"
+If the encoding of +self+ is not ASCII-compatible
+(i.e., if <tt>self.encoding.ascii_compatible?</tt> returns +false+),
+each ASCII-compatible byte is dumped as an ASCII character,
+and all other bytes are dumped as hexadecimal;
+also appends <tt>.dup.force_encoding(\"encoding\")</tt>,
+where <tt><encoding></tt> is <tt>self.encoding.name</tt>:

-  s = 'тест'
-  s.encoding                # => #<Encoding:UTF-8>
-  s.dump                    # => "\"\\u0442\\u0435\\u0441\\u0442\""
-  s.encode('utf-16').dump   # => "\"\\xFE\\xFF\\x04B\\x045\\x04A\\x04B\".dup.force_encoding(\"UTF-16\")"
-  s.encode('utf-16le').dump # => "\"B\\x045\\x04A\\x04B\\x04\".dup.force_encoding(\"UTF-16LE\")"
+  String:    hello
+  Dumped:    "\xFE\xFF\x00h\x00e\x00l\x00l\x00o".dup.force_encoding("UTF-16")
+  Undumped:  hello

-  s = 'こんにちは'
-  s.encoding                # => #<Encoding:UTF-8>
-  s.dump                    # => "\"\\u3053\\u3093\\u306B\\u3061\\u306F\""
-  s.encode('utf-16').dump   # => "\"\\xFE\\xFF0S0\\x930k0a0o\".dup.force_encoding(\"UTF-16\")"
-  s.encode('utf-16le').dump # => "\"S0\\x930k0a0o0\".dup.force_encoding(\"UTF-16LE\")"
+  String:    тест
+  Dumped:    "\xFE\xFF\x04B\x045\x04A\x04B".dup.force_encoding("UTF-16")
+  Undumped:  тест

-Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String].
+  String:    こんにちは
+  Dumped:    "\xFE\xFF0S0\x930k0a0o".dup.force_encoding("UTF-16")
+  Undumped:  こんにちは
--- a/string.c
+++ b/string.c
@ -7628,17 +7628,11 @@ static VALUE rb_str_is_ascii_only_p(VALUE str);

 /*
 *  call-seq:
- *    undump -> string
+ *    undump -> new_string
 *
- *  Returns an unescaped version of +self+:
- *
- *    s_orig = "\f\x00\xff\\\""    # => "\f\u0000\xFF\\\""
- *    s_dumped = s_orig.dump       # => "\"\\f\\x00\\xFF\\\\\\\"\""
- *    s_undumped = s_dumped.undump # => "\f\u0000\xFF\\\""
- *    s_undumped == s_orig         # => true
- *
- *  Related: String#dump (inverse of String#undump).
+ *  Inverse of String#dump; returns a copy of +self+ with changes of the kinds made by String#dump "undone."
 *
+ *  Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String].
 */

 static VALUE