mirror of
https://github.com/ruby/ruby.git
synced 2026-01-26 20:19:19 +00:00
Reapply "[ruby/prism] Add Ripper :on_sp events for Prism.lex_compat and Prism::Translation::Ripper"
This reverts commit 58f1127b51cf4fbb1f334f8701a041f40701dca2.
This commit is contained in:
parent
20113a228d
commit
2842e61c92
Notes:
git
2026-01-20 13:19:02 +00:00
@ -61,8 +61,7 @@ module Prism
|
||||
# Prism::lex_compat(source, **options) -> LexCompat::Result
|
||||
#
|
||||
# Returns a parse result whose value is an array of tokens that closely
|
||||
# resembles the return value of Ripper::lex. The main difference is that the
|
||||
# `:on_sp` token is not emitted.
|
||||
# resembles the return value of Ripper::lex.
|
||||
#
|
||||
# For supported options, see Prism::parse.
|
||||
def self.lex_compat(source, **options)
|
||||
@ -72,9 +71,8 @@ module Prism
|
||||
# :call-seq:
|
||||
# Prism::lex_ripper(source) -> Array
|
||||
#
|
||||
# This lexes with the Ripper lex. It drops any space events but otherwise
|
||||
# returns the same tokens. Raises SyntaxError if the syntax in source is
|
||||
# invalid.
|
||||
# This wraps the result of Ripper.lex. It produces almost exactly the
|
||||
# same tokens. Raises SyntaxError if the syntax in source is invalid.
|
||||
def self.lex_ripper(source)
|
||||
LexRipper.new(source).result # steep:ignore
|
||||
end
|
||||
|
||||
@ -226,7 +226,7 @@ module Prism
|
||||
end
|
||||
|
||||
# Tokens where state should be ignored
|
||||
# used for :on_comment, :on_heredoc_end, :on_embexpr_end
|
||||
# used for :on_sp, :on_comment, :on_heredoc_end, :on_embexpr_end
|
||||
class IgnoreStateToken < Token
|
||||
def ==(other) # :nodoc:
|
||||
self[0...-1] == other[0...-1]
|
||||
@ -611,10 +611,10 @@ module Prism
|
||||
BOM_FLUSHED = RUBY_VERSION >= "3.3.0"
|
||||
private_constant :BOM_FLUSHED
|
||||
|
||||
attr_reader :source, :options
|
||||
attr_reader :options
|
||||
|
||||
def initialize(source, **options)
|
||||
@source = source
|
||||
def initialize(code, **options)
|
||||
@code = code
|
||||
@options = options
|
||||
end
|
||||
|
||||
@ -624,12 +624,14 @@ module Prism
|
||||
state = :default
|
||||
heredoc_stack = [[]] #: Array[Array[Heredoc::PlainHeredoc | Heredoc::DashHeredoc | Heredoc::DedentingHeredoc]]
|
||||
|
||||
result = Prism.lex(source, **options)
|
||||
result = Prism.lex(@code, **options)
|
||||
source = result.source
|
||||
result_value = result.value
|
||||
previous_state = nil #: State?
|
||||
last_heredoc_end = nil #: Integer?
|
||||
eof_token = nil
|
||||
|
||||
bom = source.byteslice(0..2) == "\xEF\xBB\xBF"
|
||||
bom = source.slice(0, 3) == "\xEF\xBB\xBF"
|
||||
|
||||
result_value.each_with_index do |(token, lex_state), index|
|
||||
lineno = token.location.start_line
|
||||
@ -741,6 +743,7 @@ module Prism
|
||||
|
||||
Token.new([[lineno, column], event, value, lex_state])
|
||||
when :on_eof
|
||||
eof_token = token
|
||||
previous_token = result_value[index - 1][0]
|
||||
|
||||
# If we're at the end of the file and the previous token was a
|
||||
@ -763,7 +766,7 @@ module Prism
|
||||
end_offset += 3
|
||||
end
|
||||
|
||||
tokens << Token.new([[lineno, 0], :on_nl, source.byteslice(start_offset...end_offset), lex_state])
|
||||
tokens << Token.new([[lineno, 0], :on_nl, source.slice(start_offset, end_offset - start_offset), lex_state])
|
||||
end
|
||||
end
|
||||
|
||||
@ -857,7 +860,89 @@ module Prism
|
||||
# We sort by location to compare against Ripper's output
|
||||
tokens.sort_by!(&:location)
|
||||
|
||||
Result.new(tokens, result.comments, result.magic_comments, result.data_loc, result.errors, result.warnings, Source.for(source))
|
||||
# Add :on_sp tokens
|
||||
tokens = add_on_sp_tokens(tokens, source, result.data_loc, bom, eof_token)
|
||||
|
||||
Result.new(tokens, result.comments, result.magic_comments, result.data_loc, result.errors, result.warnings, source)
|
||||
end
|
||||
|
||||
def add_on_sp_tokens(tokens, source, data_loc, bom, eof_token)
|
||||
new_tokens = []
|
||||
|
||||
prev_token_state = Translation::Ripper::Lexer::State.cached(Translation::Ripper::EXPR_BEG)
|
||||
prev_token_end = bom ? 3 : 0
|
||||
|
||||
tokens.each do |token|
|
||||
line, column = token.location
|
||||
start_offset = source.line_to_byte_offset(line) + column
|
||||
# Ripper reports columns on line 1 without counting the BOM, so we adjust to get the real offset
|
||||
start_offset += 3 if line == 1 && bom
|
||||
|
||||
if start_offset > prev_token_end
|
||||
sp_value = source.slice(prev_token_end, start_offset - prev_token_end)
|
||||
sp_line = source.line(prev_token_end)
|
||||
sp_column = source.column(prev_token_end)
|
||||
# Ripper reports columns on line 1 without counting the BOM
|
||||
sp_column -= 3 if sp_line == 1 && bom
|
||||
continuation_index = sp_value.byteindex("\\")
|
||||
|
||||
# ripper emits up to three :on_sp tokens when line continuations are used
|
||||
if continuation_index
|
||||
next_whitespace_index = continuation_index + 1
|
||||
next_whitespace_index += 1 if sp_value.byteslice(next_whitespace_index) == "\r"
|
||||
next_whitespace_index += 1
|
||||
first_whitespace = sp_value[0...continuation_index]
|
||||
continuation = sp_value[continuation_index...next_whitespace_index]
|
||||
second_whitespace = sp_value[next_whitespace_index..]
|
||||
|
||||
new_tokens << IgnoreStateToken.new([
|
||||
[sp_line, sp_column],
|
||||
:on_sp,
|
||||
first_whitespace,
|
||||
prev_token_state
|
||||
]) unless first_whitespace.empty?
|
||||
|
||||
new_tokens << IgnoreStateToken.new([
|
||||
[sp_line, sp_column + continuation_index],
|
||||
:on_sp,
|
||||
continuation,
|
||||
prev_token_state
|
||||
])
|
||||
|
||||
new_tokens << IgnoreStateToken.new([
|
||||
[sp_line + 1, 0],
|
||||
:on_sp,
|
||||
second_whitespace,
|
||||
prev_token_state
|
||||
]) unless second_whitespace.empty?
|
||||
else
|
||||
new_tokens << IgnoreStateToken.new([
|
||||
[sp_line, sp_column],
|
||||
:on_sp,
|
||||
sp_value,
|
||||
prev_token_state
|
||||
])
|
||||
end
|
||||
end
|
||||
|
||||
new_tokens << token
|
||||
prev_token_state = token.state
|
||||
prev_token_end = start_offset + token.value.bytesize
|
||||
end
|
||||
|
||||
unless data_loc # no trailing :on_sp with __END__ as it is always preceded by :on_nl
|
||||
end_offset = eof_token.location.end_offset
|
||||
if prev_token_end < end_offset
|
||||
new_tokens << IgnoreStateToken.new([
|
||||
[source.line(prev_token_end), source.column(prev_token_end)],
|
||||
:on_sp,
|
||||
source.slice(prev_token_end, end_offset - prev_token_end),
|
||||
prev_token_state
|
||||
])
|
||||
end
|
||||
end
|
||||
|
||||
new_tokens
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
@ -19,8 +19,6 @@ module Prism
|
||||
|
||||
lex(source).each do |token|
|
||||
case token[1]
|
||||
when :on_sp
|
||||
# skip
|
||||
when :on_tstring_content
|
||||
if previous[1] == :on_tstring_content && (token[2].start_with?("\#$") || token[2].start_with?("\#@"))
|
||||
previous[2] << token[2]
|
||||
|
||||
1
test/prism/fixtures/bom_leading_space.txt
Normal file
1
test/prism/fixtures/bom_leading_space.txt
Normal file
@ -0,0 +1 @@
|
||||
p (42)
|
||||
1
test/prism/fixtures/bom_spaces.txt
Normal file
1
test/prism/fixtures/bom_spaces.txt
Normal file
@ -0,0 +1 @@
|
||||
p ( 42 )
|
||||
@ -39,6 +39,8 @@ module Prism
|
||||
|
||||
# Skip these tests that we haven't implemented yet.
|
||||
omitted_sexp_raw = [
|
||||
"bom_leading_space.txt",
|
||||
"bom_spaces.txt",
|
||||
"dos_endings.txt",
|
||||
"heredocs_with_fake_newlines.txt",
|
||||
"heredocs_with_ignored_newlines.txt",
|
||||
@ -92,7 +94,7 @@ module Prism
|
||||
assert_equal(expected, lexer.parse[0].to_a)
|
||||
assert_equal(lexer.parse[0].to_a, lexer.scan[0].to_a)
|
||||
|
||||
assert_equal(%i[on_int on_op], Translation::Ripper::Lexer.new("1 +").lex.map(&:event))
|
||||
assert_equal(%i[on_int on_sp on_op], Translation::Ripper::Lexer.new("1 +").lex.map(&:event))
|
||||
assert_raise(SyntaxError) { Translation::Ripper::Lexer.new("1 +").lex(raise_errors: true) }
|
||||
end
|
||||
|
||||
@ -121,15 +123,17 @@ module Prism
|
||||
def assert_ripper_lex(source)
|
||||
prism = Translation::Ripper.lex(source)
|
||||
ripper = Ripper.lex(source)
|
||||
ripper.reject! { |elem| elem[1] == :on_sp } # Prism doesn't emit on_sp
|
||||
ripper.sort_by! { |elem| elem[0] } # Prism emits tokens by their order in the code, not in parse order
|
||||
|
||||
# Prism emits tokens by their order in the code, not in parse order
|
||||
ripper.sort_by! { |elem| elem[0] }
|
||||
|
||||
[prism.size, ripper.size].max.times do |i|
|
||||
expected = ripper[i]
|
||||
actual = prism[i]
|
||||
|
||||
# Since tokens related to heredocs are not emitted in the same order,
|
||||
# the state also doesn't line up.
|
||||
if expected[1] == :on_heredoc_end && actual[1] == :on_heredoc_end
|
||||
if expected && actual && expected[1] == :on_heredoc_end && actual[1] == :on_heredoc_end
|
||||
expected[3] = actual[3] = nil
|
||||
end
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user