Speed up the lexer for Ruby 3.4+ (#1832)

* Speed up lexing

* Bump msrv to 3.0 (from 2.7)

* Normalize test for ruby-head compat

* Fix bug when parsing negative numbers
This commit is contained in:
Ian Ker-Seymer 2024-10-23 14:15:33 -04:00 committed by GitHub
parent b233b3d081
commit b3553787c8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 246 additions and 82 deletions

View File

@ -11,10 +11,10 @@ jobs:
strategy:
matrix:
entry:
- { ruby: 2.7, allowed-failure: false } # minimum supported
- { ruby: 3.0, allowed-failure: false } # minimum supported
- { ruby: 3.2, allowed-failure: false }
- { ruby: 3.3, allowed-failure: false } # latest
- { ruby: ruby-head, allowed-failure: true }
- { ruby: ruby-head, allowed-failure: false }
name: Test Ruby ${{ matrix.entry.ruby }}
steps:
- uses: actions/checkout@v3

1
.gitignore vendored
View File

@ -6,3 +6,4 @@ pkg
.rvmrc
.bundle
.byebug_history
Gemfile.lock

View File

@ -1 +1 @@
3.3.0
3.3.4

View File

@ -1,75 +0,0 @@
GIT
remote: https://github.com/Shopify/liquid-c.git
revision: 5a786af7284df55e013ea20551c4b688d02e8326
ref: main
specs:
liquid-c (4.2.0)
liquid (>= 5.0.1)
PATH
remote: .
specs:
liquid (5.6.0.alpha)
GEM
remote: https://rubygems.org/
specs:
ast (2.4.2)
base64 (0.2.0)
benchmark-ips (2.13.0)
json (2.7.2)
language_server-protocol (3.17.0.3)
memory_profiler (1.0.1)
minitest (5.22.3)
parallel (1.24.0)
parser (3.3.0.5)
ast (~> 2.4.1)
racc
racc (1.7.3)
rainbow (3.1.1)
rake (13.2.1)
regexp_parser (2.9.0)
rexml (3.2.6)
rubocop (1.61.0)
json (~> 2.3)
language_server-protocol (>= 3.17.0)
parallel (~> 1.10)
parser (>= 3.3.0.2)
rainbow (>= 2.2.2, < 4.0)
regexp_parser (>= 1.8, < 3.0)
rexml (>= 3.2.5, < 4.0)
rubocop-ast (>= 1.30.0, < 2.0)
ruby-progressbar (~> 1.7)
unicode-display_width (>= 2.4.0, < 3.0)
rubocop-ast (1.31.2)
parser (>= 3.3.0.4)
rubocop-performance (1.19.1)
rubocop (>= 1.7.0, < 2.0)
rubocop-ast (>= 0.4.0)
rubocop-shopify (2.12.0)
rubocop (~> 1.44)
ruby-progressbar (1.13.0)
stackprof (0.2.26)
terminal-table (3.0.2)
unicode-display_width (>= 1.1.1, < 3)
unicode-display_width (2.5.0)
PLATFORMS
ruby
DEPENDENCIES
base64
benchmark-ips
liquid!
liquid-c!
memory_profiler
minitest
rake (~> 13.0)
rubocop (~> 1.61.0)
rubocop-performance
rubocop-shopify (~> 2.12.0)
stackprof
terminal-table
BUNDLED WITH
2.5.7

View File

@ -81,6 +81,14 @@ namespace :benchmark do
task :strict do
ruby "./performance/benchmark.rb strict"
end
desc "Run unit benchmarks"
task :unit do
Dir["./performance/unit/*_benchmark.rb"].each do |file|
puts "🧪 Running #{file}"
ruby file
end
end
end
namespace :profile do

View File

@ -1,8 +1,9 @@
# frozen_string_literal: true
require "strscan"
module Liquid
class Lexer
class Lexer1
SPECIALS = {
'|' => :pipe,
'.' => :dot,
@ -58,4 +59,157 @@ module Liquid
@output << [:end_of_string]
end
end
class Lexer2
CLOSE_ROUND = [:close_round, ")"].freeze
CLOSE_SQUARE = [:close_square, "]"].freeze
COLON = [:colon, ":"].freeze
COMMA = [:comma, ","].freeze
COMPARISION_NOT_EQUAL = [:comparison, "!="].freeze
COMPARISON_CONTAINS = [:comparison, "contains"].freeze
COMPARISON_EQUAL = [:comparison, "=="].freeze
COMPARISON_GREATER_THAN = [:comparison, ">"].freeze
COMPARISON_GREATER_THAN_OR_EQUAL = [:comparison, ">="].freeze
COMPARISON_LESS_THAN = [:comparison, "<"].freeze
COMPARISON_LESS_THAN_OR_EQUAL = [:comparison, "<="].freeze
COMPARISON_NOT_EQUAL_ALT = [:comparison, "<>"].freeze
CONTAINS = /contains(?=\s)/
DASH = [:dash, "-"].freeze
DOT = [:dot, "."].freeze
DOTDOT = [:dotdot, ".."].freeze
DOT_ORD = ".".ord
DOUBLE_STRING_LITERAL = /"[^\"]*"/
EOS = [:end_of_string].freeze
IDENTIFIER = /[a-zA-Z_][\w-]*\??/
NUMBER_LITERAL = /-?\d+(\.\d+)?/
OPEN_ROUND = [:open_round, "("].freeze
OPEN_SQUARE = [:open_square, "["].freeze
PIPE = [:pipe, "|"].freeze
QUESTION = [:question, "?"].freeze
RUBY_WHITESPACE = [" ", "\t", "\r", "\n", "\f"].freeze
SINGLE_STRING_LITERAL = /'[^\']*'/
WHITESPACE_OR_NOTHING = /\s*/
COMPARISON_JUMP_TABLE = [].tap do |table|
table["=".ord] = [].tap do |sub_table|
sub_table["=".ord] = COMPARISON_EQUAL
sub_table.freeze
end
table["!".ord] = [].tap do |sub_table|
sub_table["=".ord] = COMPARISION_NOT_EQUAL
sub_table.freeze
end
table["<".ord] = [].tap do |sub_table|
sub_table["=".ord] = COMPARISON_LESS_THAN_OR_EQUAL
sub_table[">".ord] = COMPARISON_NOT_EQUAL_ALT
RUBY_WHITESPACE.each { |c| sub_table[c.ord] = COMPARISON_LESS_THAN }
sub_table.freeze
end
table[">".ord] = [].tap do |sub_table|
sub_table["=".ord] = COMPARISON_GREATER_THAN_OR_EQUAL
RUBY_WHITESPACE.each { |c| sub_table[c.ord] = COMPARISON_GREATER_THAN }
sub_table.freeze
end
table.freeze
end
NEXT_MATCHER_JUMP_TABLE = [].tap do |table|
"a".upto("z") do |c|
table[c.ord] = [:id, IDENTIFIER].freeze
table[c.upcase.ord] = [:id, IDENTIFIER].freeze
end
table["_".ord] = [:id, IDENTIFIER].freeze
"0".upto("9") do |c|
table[c.ord] = [:number, NUMBER_LITERAL].freeze
end
table["-".ord] = [:number, NUMBER_LITERAL].freeze
table["'".ord] = [:string, SINGLE_STRING_LITERAL].freeze
table["\"".ord] = [:string, DOUBLE_STRING_LITERAL].freeze
table.freeze
end
SPECIAL_TABLE = [].tap do |table|
table["|".ord] = PIPE
table[".".ord] = DOT
table[":".ord] = COLON
table[",".ord] = COMMA
table["[".ord] = OPEN_SQUARE
table["]".ord] = CLOSE_SQUARE
table["(".ord] = OPEN_ROUND
table[")".ord] = CLOSE_ROUND
table["?".ord] = QUESTION
table["-".ord] = DASH
end
NUMBER_TABLE = [].tap do |table|
"0".upto("9") do |c|
table[c.ord] = true
end
table.freeze
end
def initialize(input)
@ss = StringScanner.new(input)
end
# rubocop:disable Metrics/BlockNesting
def tokenize
@output = []
until @ss.eos?
@ss.skip(WHITESPACE_OR_NOTHING)
break if @ss.eos?
peeked = @ss.peek_byte
if (special = SPECIAL_TABLE[peeked])
@ss.scan_byte
# Special case for ".."
if special == DOT && @ss.peek_byte == DOT_ORD
@ss.scan_byte
@output << DOTDOT
elsif special == DASH
# Special case for negative numbers
if NUMBER_TABLE[@ss.peek_byte]
@ss.pos -= 1
@output << [:number, @ss.scan(NUMBER_LITERAL)]
else
@output << special
end
else
@output << special
end
elsif (sub_table = COMPARISON_JUMP_TABLE[peeked])
@ss.scan_byte
if (found = sub_table[@ss.peek_byte])
@output << found
@ss.scan_byte
else
raise SyntaxError, "Unexpected character #{peeked.chr}"
end
else
type, pattern = NEXT_MATCHER_JUMP_TABLE[peeked]
if type && (t = @ss.scan(pattern))
# Special case for "contains"
@output << if type == :id && t == "contains"
COMPARISON_CONTAINS
else
[type, t]
end
else
raise SyntaxError, "Unexpected character #{peeked.chr}"
end
end
end
# rubocop:enable Metrics/BlockNesting
@output << EOS
end
end
Lexer = StringScanner.instance_methods.include?(:scan_byte) ? Lexer2 : Lexer1
end

View File

@ -53,7 +53,7 @@ module Liquid
str = consume
str << variable_lookups
when :open_square
str = consume
str = consume.dup
str << expression
str << consume(:close_square)
str << variable_lookups

View File

@ -17,7 +17,7 @@ Gem::Specification.new do |s|
s.license = "MIT"
# s.description = "A secure, non-evaling end user template engine with aesthetic markup."
s.required_ruby_version = ">= 2.7.0"
s.required_ruby_version = ">= 3.0.0"
s.required_rubygems_version = ">= 1.3.7"
s.metadata['allowed_push_host'] = 'https://rubygems.org'
@ -28,6 +28,9 @@ Gem::Specification.new do |s|
s.require_path = "lib"
s.add_dependency("strscan")
s.add_dependency("bigdecimal")
s.add_development_dependency('rake', '~> 13.0')
s.add_development_dependency('minitest')
end

View File

@ -3,6 +3,7 @@
require 'benchmark/ips'
require_relative 'theme_runner'
RubyVM::YJIT.enable if defined?(RubyVM::YJIT)
Liquid::Template.error_mode = ARGV.first.to_sym if ARGV.first
profiler = ThemeRunner.new

View File

@ -0,0 +1,62 @@
# frozen_string_literal: true
require "benchmark/ips"
# benchmark liquid lexing
require 'liquid'
RubyVM::YJIT.enable
EXPRESSIONS = [
"foo[1..2].baz",
"12.0",
"foo.bar.based",
"21 - 62",
"foo.bar.baz",
"foo > 12",
"foo < 12",
"foo <= 12",
"foo >= 12",
"foo <> 12",
"foo == 12",
"foo != 12",
"foo contains 12",
"foo contains 'bar'",
"foo != 'bar'",
"'foo' contains 'bar'",
'234089',
"foo | default: -1",
]
EXPRESSIONS.each do |expr|
lexer_1_result = Liquid::Lexer1.new(expr).tokenize
lexer_2_result = Liquid::Lexer2.new(expr).tokenize
next if lexer_1_result == lexer_2_result
warn "Lexer1 and Lexer2 results are different for expression: #{expr}"
warn "expected: #{lexer_1_result}"
warn "got: #{lexer_2_result}"
abort
end
Benchmark.ips do |x|
x.config(time: 10, warmup: 5)
x.report("Liquid::Lexer1#tokenize") do
EXPRESSIONS.each do |expr|
l = Liquid::Lexer1.new(expr)
l.tokenize
end
end
x.report("Liquid::Lexer2#tokenize") do
EXPRESSIONS.each do |expr|
l = Liquid::Lexer2.new(expr)
l.tokenize
end
end
x.compare!
end

View File

@ -32,7 +32,7 @@ class TestDrop < Liquid::Drop
attr_reader :value
def registers
{ @value => @context.registers[@value] }
"{#{@value.inspect}=>#{@context.registers[@value].inspect}}"
end
end

View File

@ -50,4 +50,14 @@ class LexerUnitTest < Minitest::Test
Lexer.new("%").tokenize
end
end
def test_negative_numbers
tokens = Lexer.new("foo | default: -1").tokenize
assert_equal([[:id, 'foo'], [:pipe, '|'], [:id, 'default'], [:colon, ":"], [:number, '-1'], [:end_of_string]], tokens)
end
def test_greater_than_two_digits
tokens = Lexer.new("foo > 12").tokenize
assert_equal([[:id, 'foo'], [:comparison, '>'], [:number, '12'], [:end_of_string]], tokens)
end
end