[DOC] Use Japanese for multi-byte characters (#15745)

This commit is contained in:
Burdette Lamar 2025-12-26 14:06:21 -06:00 committed by GitHub
parent 7b3b1a1442
commit dedde99676
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
Notes: git 2025-12-26 20:06:48 +00:00
Merged-By: BurdetteLamar <BurdetteLamar@Yahoo.com>
2 changed files with 34 additions and 24 deletions

View File

@ -7,8 +7,8 @@ text = <<~EOT
Fifth line Fifth line
EOT EOT
# Russian text. # Japanese text.
russian = "\u{442 435 441 442}" # => "тест" japanese = 'こんにちは'
# Binary data. # Binary data.
data = "\u9990\u9991\u9992\u9993\u9994" data = "\u9990\u9991\u9992\u9993\u9994"
@ -16,8 +16,8 @@ data = "\u9990\u9991\u9992\u9993\u9994"
# Text file. # Text file.
File.write('t.txt', text) File.write('t.txt', text)
# File with Russian text. # File with Japanese text.
File.write('t.rus', russian) File.write('t.ja', japanese)
# File with binary data. # File with binary data.
f = File.new('t.dat', 'wb:UTF-16') f = File.new('t.dat', 'wb:UTF-16')

50
io.c
View File

@ -4720,10 +4720,11 @@ rb_io_each_line(int argc, VALUE *argv, VALUE io)
* Calls the given block with each byte (0..255) in the stream; returns +self+. * Calls the given block with each byte (0..255) in the stream; returns +self+.
* See {Byte IO}[rdoc-ref:IO@Byte+IO]. * See {Byte IO}[rdoc-ref:IO@Byte+IO].
* *
* f = File.new('t.rus') * File.read('t.ja') # => "こんにちは"
* f = File.new('t.ja')
* a = [] * a = []
* f.each_byte {|b| a << b } * f.each_byte {|b| a << b }
* a # => [209, 130, 208, 181, 209, 129, 209, 130] * a # => [227, 129, 147, 227, 130, 147, 227, 129, 171, 227, 129, 161, 227, 129, 175]
* f.close * f.close
* *
* Returns an Enumerator if no block is given. * Returns an Enumerator if no block is given.
@ -4868,10 +4869,11 @@ io_getc(rb_io_t *fptr, rb_encoding *enc)
* Calls the given block with each character in the stream; returns +self+. * Calls the given block with each character in the stream; returns +self+.
* See {Character IO}[rdoc-ref:IO@Character+IO]. * See {Character IO}[rdoc-ref:IO@Character+IO].
* *
* f = File.new('t.rus') * File.read('t.ja') # => "こんにちは"
* f = File.new('t.ja')
* a = [] * a = []
* f.each_char {|c| a << c.ord } * f.each_char {|c| a << c.ord }
* a # => [1090, 1077, 1089, 1090] * a # => [12371, 12435, 12395, 12385, 12399]
* f.close * f.close
* *
* Returns an Enumerator if no block is given. * Returns an Enumerator if no block is given.
@ -4906,10 +4908,11 @@ rb_io_each_char(VALUE io)
* *
* Calls the given block with each codepoint in the stream; returns +self+: * Calls the given block with each codepoint in the stream; returns +self+:
* *
* f = File.new('t.rus') * File.read('t.ja') # => "こんにちは"
* f = File.new('t.ja')
* a = [] * a = []
* f.each_codepoint {|c| a << c } * f.each_codepoint {|c| a << c }
* a # => [1090, 1077, 1089, 1090] * a # => [12371, 12435, 12395, 12385, 12399]
* f.close * f.close
* *
* Returns an Enumerator if no block is given. * Returns an Enumerator if no block is given.
@ -5023,8 +5026,9 @@ rb_io_each_codepoint(VALUE io)
* f = File.open('t.txt') * f = File.open('t.txt')
* f.getc # => "F" * f.getc # => "F"
* f.close * f.close
* f = File.open('t.rus') * File.read('t.ja') # => "こんにちは"
* f.getc.ord # => 1090 * f = File.open('t.ja')
* f.getc.ord # => 12371
* f.close * f.close
* *
* Related: IO#readchar (may raise EOFError). * Related: IO#readchar (may raise EOFError).
@ -5056,8 +5060,9 @@ rb_io_getc(VALUE io)
* f = File.open('t.txt') * f = File.open('t.txt')
* f.readchar # => "F" * f.readchar # => "F"
* f.close * f.close
* f = File.open('t.rus') * File.read('t.ja') # => "こんにちは"
* f.readchar.ord # => 1090 * f = File.open('t.ja')
* f.readchar.ord # => 12371
* f.close * f.close
* *
* Related: IO#getc (will not raise EOFError). * Related: IO#getc (will not raise EOFError).
@ -5086,8 +5091,9 @@ rb_io_readchar(VALUE io)
* f = File.open('t.txt') * f = File.open('t.txt')
* f.getbyte # => 70 * f.getbyte # => 70
* f.close * f.close
* f = File.open('t.rus') * File.read('t.ja') # => "こんにちは"
* f.getbyte # => 209 * f = File.open('t.ja')
* f.getbyte # => 227
* f.close * f.close
* *
* Related: IO#readbyte (may raise EOFError). * Related: IO#readbyte (may raise EOFError).
@ -5130,8 +5136,9 @@ rb_io_getbyte(VALUE io)
* f = File.open('t.txt') * f = File.open('t.txt')
* f.readbyte # => 70 * f.readbyte # => 70
* f.close * f.close
* f = File.open('t.rus') * File.read('t.ja') # => "こんにちは"
* f.readbyte # => 209 * f = File.open('t.ja')
* f.readbyte # => 227
* f.close * f.close
* *
* Related: IO#getbyte (will not raise EOFError). * Related: IO#getbyte (will not raise EOFError).
@ -9492,7 +9499,8 @@ static VALUE io_initialize(VALUE io, VALUE fnum, VALUE vmode, VALUE opt);
* The new \IO object does not inherit encoding * The new \IO object does not inherit encoding
* (because the integer file descriptor does not have an encoding): * (because the integer file descriptor does not have an encoding):
* *
* fd = IO.sysopen('t.rus', 'rb') * File.read('t.ja') # => "こんにちは"
* fd = IO.sysopen('t.ja', 'rb')
* io = IO.new(fd) * io = IO.new(fd)
* io.external_encoding # => #<Encoding:UTF-8> # Not ASCII-8BIT. * io.external_encoding # => #<Encoding:UTF-8> # Not ASCII-8BIT.
* *
@ -15304,11 +15312,13 @@ set_LAST_READ_LINE(VALUE val, ID _x, VALUE *_y)
* File.open('t.txt') {|f| f.gets(11) } # => "First line\n" * File.open('t.txt') {|f| f.gets(11) } # => "First line\n"
* File.open('t.txt') {|f| f.gets(12) } # => "First line\n" * File.open('t.txt') {|f| f.gets(12) } # => "First line\n"
* *
* # Text with 2-byte characters, which will not be split. * # Text with 3-byte characters, which will not be split.
* File.open('t.rus') {|f| f.gets(1).size } # => 1 * File.read('t.ja') # => "こんにちは"
* File.open('t.rus') {|f| f.gets(2).size } # => 1 * File.open('t.ja') {|f| f.gets(1).size } # => 1
* File.open('t.rus') {|f| f.gets(3).size } # => 2 * File.open('t.ja') {|f| f.gets(2).size } # => 1
* File.open('t.rus') {|f| f.gets(4).size } # => 2 * File.open('t.ja') {|f| f.gets(3).size } # => 1
* File.open('t.ja') {|f| f.gets(4).size } # => 2
* File.open('t.ja') {|f| f.gets(5).size } # => 2
* *
* ===== Line Separator and Line Limit * ===== Line Separator and Line Limit
* *