Revert "[ruby/prism] Add Ripper :on_sp events for Prism.lex_compat and Prism::Translation::Ripper"

This reverts commit 35a7b5159f39de2cac848c072674e5350cc41aa4. This broke syntax_suggest. https://github.com/ruby/ruby/actions/runs/21167011751/job/60874111912
2026-01-27 04:24:23 +00:00 · 2026-01-20 19:10:16 +09:00 · 2026-01-20 19:10:16 +09:00 · 58f1127b51
commit 58f1127b51
parent 35a7b5159f
6 changed files with 19 additions and 106 deletions
--- a/lib/prism.rb
+++ b/lib/prism.rb
@ -61,7 +61,8 @@ module Prism
  #   Prism::lex_compat(source, **options) -> LexCompat::Result
  #
  # Returns a parse result whose value is an array of tokens that closely
-  # resembles the return value of Ripper::lex.
+  # resembles the return value of Ripper::lex. The main difference is that the
+  # `:on_sp` token is not emitted.
  #
  # For supported options, see Prism::parse.
  def self.lex_compat(source, **options)
@ -71,8 +72,9 @@ module Prism
  # :call-seq:
  #   Prism::lex_ripper(source) -> Array
  #
-  # This wraps the result of Ripper.lex. It produces almost exactly the
-  # same tokens. Raises SyntaxError if the syntax in source is invalid.
+  # This lexes with the Ripper lex. It drops any space events but otherwise
+  # returns the same tokens. Raises SyntaxError if the syntax in source is
+  # invalid.
  def self.lex_ripper(source)
    LexRipper.new(source).result # steep:ignore
  end
--- a/lib/prism/lex_compat.rb
+++ b/lib/prism/lex_compat.rb
@ -226,7 +226,7 @@ module Prism
    end

    # Tokens where state should be ignored
-    # used for :on_sp, :on_comment, :on_heredoc_end, :on_embexpr_end
+    # used for :on_comment, :on_heredoc_end, :on_embexpr_end
    class IgnoreStateToken < Token
      def ==(other) # :nodoc:
        self[0...-1] == other[0...-1]
@ -611,10 +611,10 @@ module Prism
    BOM_FLUSHED = RUBY_VERSION >= "3.3.0"
    private_constant :BOM_FLUSHED

-    attr_reader :options
+    attr_reader :source, :options

-    def initialize(code, **options)
-      @code = code
+    def initialize(source, **options)
+      @source = source
      @options = options
    end

@ -624,14 +624,12 @@ module Prism
      state = :default
      heredoc_stack = [[]] #: Array[Array[Heredoc::PlainHeredoc | Heredoc::DashHeredoc | Heredoc::DedentingHeredoc]]

-      result = Prism.lex(@code, **options)
-      source = result.source
+      result = Prism.lex(source, **options)
      result_value = result.value
      previous_state = nil #: State?
      last_heredoc_end = nil #: Integer?
-      eof_token = nil

-      bom = source.slice(0, 3) == "\xEF\xBB\xBF"
+      bom = source.byteslice(0..2) == "\xEF\xBB\xBF"

      result_value.each_with_index do |(token, lex_state), index|
        lineno = token.location.start_line
@ -743,7 +741,6 @@ module Prism

            Token.new([[lineno, column], event, value, lex_state])
          when :on_eof
-            eof_token = token
            previous_token = result_value[index - 1][0]

            # If we're at the end of the file and the previous token was a
@ -766,7 +763,7 @@ module Prism
                  end_offset += 3
                end

-                tokens << Token.new([[lineno, 0], :on_nl, source.slice(start_offset, end_offset - start_offset), lex_state])
+                tokens << Token.new([[lineno, 0], :on_nl, source.byteslice(start_offset...end_offset), lex_state])
              end
            end

@ -860,89 +857,7 @@ module Prism
      # We sort by location to compare against Ripper's output
      tokens.sort_by!(&:location)

-      # Add :on_sp tokens
-      tokens = add_on_sp_tokens(tokens, source, result.data_loc, bom, eof_token)
-
-      Result.new(tokens, result.comments, result.magic_comments, result.data_loc, result.errors, result.warnings, source)
-    end
-
-    def add_on_sp_tokens(tokens, source, data_loc, bom, eof_token)
-      new_tokens = []
-
-      prev_token_state = Translation::Ripper::Lexer::State.cached(Translation::Ripper::EXPR_BEG)
-      prev_token_end = bom ? 3 : 0
-
-      tokens.each do |token|
-        line, column = token.location
-        start_offset = source.line_to_byte_offset(line) + column
-        # Ripper reports columns on line 1 without counting the BOM, so we adjust to get the real offset
-        start_offset += 3 if line == 1 && bom
-
-        if start_offset > prev_token_end
-          sp_value = source.slice(prev_token_end, start_offset - prev_token_end)
-          sp_line = source.line(prev_token_end)
-          sp_column = source.column(prev_token_end)
-          # Ripper reports columns on line 1 without counting the BOM
-          sp_column -= 3 if sp_line == 1 && bom
-          continuation_index = sp_value.byteindex("\\")
-
-          # ripper emits up to three :on_sp tokens when line continuations are used
-          if continuation_index
-            next_whitespace_index = continuation_index + 1
-            next_whitespace_index += 1 if sp_value.byteslice(next_whitespace_index) == "\r"
-            next_whitespace_index += 1
-            first_whitespace = sp_value[0...continuation_index]
-            continuation = sp_value[continuation_index...next_whitespace_index]
-            second_whitespace = sp_value[next_whitespace_index..]
-
-            new_tokens << IgnoreStateToken.new([
-              [sp_line, sp_column],
-              :on_sp,
-              first_whitespace,
-              prev_token_state
-            ]) unless first_whitespace.empty?
-
-            new_tokens << IgnoreStateToken.new([
-              [sp_line, sp_column + continuation_index],
-              :on_sp,
-              continuation,
-              prev_token_state
-            ])
-
-            new_tokens << IgnoreStateToken.new([
-              [sp_line + 1, 0],
-              :on_sp,
-              second_whitespace,
-              prev_token_state
-            ]) unless second_whitespace.empty?
-          else
-            new_tokens << IgnoreStateToken.new([
-              [sp_line, sp_column],
-              :on_sp,
-              sp_value,
-              prev_token_state
-            ])
-          end
-        end
-
-        new_tokens << token
-        prev_token_state = token.state
-        prev_token_end = start_offset + token.value.bytesize
-      end
-
-      unless data_loc # no trailing :on_sp with __END__ as it is always preceded by :on_nl
-        end_offset = eof_token.location.end_offset
-        if prev_token_end < end_offset
-          new_tokens << IgnoreStateToken.new([
-            [source.line(prev_token_end), source.column(prev_token_end)],
-            :on_sp,
-            source.slice(prev_token_end, end_offset - prev_token_end),
-            prev_token_state
-          ])
-        end
-      end
-
-      new_tokens
+      Result.new(tokens, result.comments, result.magic_comments, result.data_loc, result.errors, result.warnings, Source.for(source))
    end
  end

--- a/lib/prism/lex_ripper.rb
+++ b/lib/prism/lex_ripper.rb
@ -19,6 +19,8 @@ module Prism

      lex(source).each do |token|
        case token[1]
+        when :on_sp
+          # skip
        when :on_tstring_content
          if previous[1] == :on_tstring_content && (token[2].start_with?("\#$") || token[2].start_with?("\#@"))
            previous[2] << token[2]
--- a/test/prism/fixtures/bom_leading_space.txt
+++ b/test/prism/fixtures/bom_leading_space.txt
@ -1 +0,0 @@
- p (42)
--- a/test/prism/fixtures/bom_spaces.txt
+++ b/test/prism/fixtures/bom_spaces.txt
@ -1 +0,0 @@
-p ( 42 )
--- a/test/prism/ruby/ripper_test.rb
+++ b/test/prism/ruby/ripper_test.rb
@ -39,8 +39,6 @@ module Prism

    # Skip these tests that we haven't implemented yet.
    omitted_sexp_raw = [
-      "bom_leading_space.txt",
-      "bom_spaces.txt",
      "dos_endings.txt",
      "heredocs_with_fake_newlines.txt",
      "heredocs_with_ignored_newlines.txt",
@ -94,7 +92,7 @@ module Prism
      assert_equal(expected, lexer.parse[0].to_a)
      assert_equal(lexer.parse[0].to_a, lexer.scan[0].to_a)

-      assert_equal(%i[on_int on_sp on_op], Translation::Ripper::Lexer.new("1 +").lex.map(&:event))
+      assert_equal(%i[on_int on_op], Translation::Ripper::Lexer.new("1 +").lex.map(&:event))
      assert_raise(SyntaxError) { Translation::Ripper::Lexer.new("1 +").lex(raise_errors: true) }
    end

@ -123,17 +121,15 @@ module Prism
    def assert_ripper_lex(source)
      prism = Translation::Ripper.lex(source)
      ripper = Ripper.lex(source)
-
-      # Prism emits tokens by their order in the code, not in parse order
-      ripper.sort_by! { |elem| elem[0] }
+      ripper.reject! { |elem| elem[1] == :on_sp } # Prism doesn't emit on_sp
+      ripper.sort_by! { |elem| elem[0] } # Prism emits tokens by their order in the code, not in parse order

      [prism.size, ripper.size].max.times do |i|
        expected = ripper[i]
        actual = prism[i]
-
        # Since tokens related to heredocs are not emitted in the same order,
        # the state also doesn't line up.
-        if expected && actual && expected[1] == :on_heredoc_end && actual[1] == :on_heredoc_end
+        if expected[1] == :on_heredoc_end && actual[1] == :on_heredoc_end
          expected[3] = actual[3] = nil
        end