mirror of
https://github.com/Shopify/liquid.git
synced 2026-01-26 12:14:58 +00:00
Speed up the lexer for Ruby 3.4+ (#1832)
* Speed up lexing * Bump msrv to 3.0 (from 2.7) * Normalize test for ruby-head compat * Fix bug when parsing negative numbers
This commit is contained in:
parent
b233b3d081
commit
b3553787c8
4
.github/workflows/liquid.yml
vendored
4
.github/workflows/liquid.yml
vendored
@ -11,10 +11,10 @@ jobs:
|
||||
strategy:
|
||||
matrix:
|
||||
entry:
|
||||
- { ruby: 2.7, allowed-failure: false } # minimum supported
|
||||
- { ruby: 3.0, allowed-failure: false } # minimum supported
|
||||
- { ruby: 3.2, allowed-failure: false }
|
||||
- { ruby: 3.3, allowed-failure: false } # latest
|
||||
- { ruby: ruby-head, allowed-failure: true }
|
||||
- { ruby: ruby-head, allowed-failure: false }
|
||||
name: Test Ruby ${{ matrix.entry.ruby }}
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@ -6,3 +6,4 @@ pkg
|
||||
.rvmrc
|
||||
.bundle
|
||||
.byebug_history
|
||||
Gemfile.lock
|
||||
|
||||
@ -1 +1 @@
|
||||
3.3.0
|
||||
3.3.4
|
||||
|
||||
75
Gemfile.lock
75
Gemfile.lock
@ -1,75 +0,0 @@
|
||||
GIT
|
||||
remote: https://github.com/Shopify/liquid-c.git
|
||||
revision: 5a786af7284df55e013ea20551c4b688d02e8326
|
||||
ref: main
|
||||
specs:
|
||||
liquid-c (4.2.0)
|
||||
liquid (>= 5.0.1)
|
||||
|
||||
PATH
|
||||
remote: .
|
||||
specs:
|
||||
liquid (5.6.0.alpha)
|
||||
|
||||
GEM
|
||||
remote: https://rubygems.org/
|
||||
specs:
|
||||
ast (2.4.2)
|
||||
base64 (0.2.0)
|
||||
benchmark-ips (2.13.0)
|
||||
json (2.7.2)
|
||||
language_server-protocol (3.17.0.3)
|
||||
memory_profiler (1.0.1)
|
||||
minitest (5.22.3)
|
||||
parallel (1.24.0)
|
||||
parser (3.3.0.5)
|
||||
ast (~> 2.4.1)
|
||||
racc
|
||||
racc (1.7.3)
|
||||
rainbow (3.1.1)
|
||||
rake (13.2.1)
|
||||
regexp_parser (2.9.0)
|
||||
rexml (3.2.6)
|
||||
rubocop (1.61.0)
|
||||
json (~> 2.3)
|
||||
language_server-protocol (>= 3.17.0)
|
||||
parallel (~> 1.10)
|
||||
parser (>= 3.3.0.2)
|
||||
rainbow (>= 2.2.2, < 4.0)
|
||||
regexp_parser (>= 1.8, < 3.0)
|
||||
rexml (>= 3.2.5, < 4.0)
|
||||
rubocop-ast (>= 1.30.0, < 2.0)
|
||||
ruby-progressbar (~> 1.7)
|
||||
unicode-display_width (>= 2.4.0, < 3.0)
|
||||
rubocop-ast (1.31.2)
|
||||
parser (>= 3.3.0.4)
|
||||
rubocop-performance (1.19.1)
|
||||
rubocop (>= 1.7.0, < 2.0)
|
||||
rubocop-ast (>= 0.4.0)
|
||||
rubocop-shopify (2.12.0)
|
||||
rubocop (~> 1.44)
|
||||
ruby-progressbar (1.13.0)
|
||||
stackprof (0.2.26)
|
||||
terminal-table (3.0.2)
|
||||
unicode-display_width (>= 1.1.1, < 3)
|
||||
unicode-display_width (2.5.0)
|
||||
|
||||
PLATFORMS
|
||||
ruby
|
||||
|
||||
DEPENDENCIES
|
||||
base64
|
||||
benchmark-ips
|
||||
liquid!
|
||||
liquid-c!
|
||||
memory_profiler
|
||||
minitest
|
||||
rake (~> 13.0)
|
||||
rubocop (~> 1.61.0)
|
||||
rubocop-performance
|
||||
rubocop-shopify (~> 2.12.0)
|
||||
stackprof
|
||||
terminal-table
|
||||
|
||||
BUNDLED WITH
|
||||
2.5.7
|
||||
8
Rakefile
8
Rakefile
@ -81,6 +81,14 @@ namespace :benchmark do
|
||||
task :strict do
|
||||
ruby "./performance/benchmark.rb strict"
|
||||
end
|
||||
|
||||
desc "Run unit benchmarks"
|
||||
task :unit do
|
||||
Dir["./performance/unit/*_benchmark.rb"].each do |file|
|
||||
puts "🧪 Running #{file}"
|
||||
ruby file
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
namespace :profile do
|
||||
|
||||
@ -1,8 +1,9 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
require "strscan"
|
||||
|
||||
module Liquid
|
||||
class Lexer
|
||||
class Lexer1
|
||||
SPECIALS = {
|
||||
'|' => :pipe,
|
||||
'.' => :dot,
|
||||
@ -58,4 +59,157 @@ module Liquid
|
||||
@output << [:end_of_string]
|
||||
end
|
||||
end
|
||||
|
||||
class Lexer2
|
||||
CLOSE_ROUND = [:close_round, ")"].freeze
|
||||
CLOSE_SQUARE = [:close_square, "]"].freeze
|
||||
COLON = [:colon, ":"].freeze
|
||||
COMMA = [:comma, ","].freeze
|
||||
COMPARISION_NOT_EQUAL = [:comparison, "!="].freeze
|
||||
COMPARISON_CONTAINS = [:comparison, "contains"].freeze
|
||||
COMPARISON_EQUAL = [:comparison, "=="].freeze
|
||||
COMPARISON_GREATER_THAN = [:comparison, ">"].freeze
|
||||
COMPARISON_GREATER_THAN_OR_EQUAL = [:comparison, ">="].freeze
|
||||
COMPARISON_LESS_THAN = [:comparison, "<"].freeze
|
||||
COMPARISON_LESS_THAN_OR_EQUAL = [:comparison, "<="].freeze
|
||||
COMPARISON_NOT_EQUAL_ALT = [:comparison, "<>"].freeze
|
||||
CONTAINS = /contains(?=\s)/
|
||||
DASH = [:dash, "-"].freeze
|
||||
DOT = [:dot, "."].freeze
|
||||
DOTDOT = [:dotdot, ".."].freeze
|
||||
DOT_ORD = ".".ord
|
||||
DOUBLE_STRING_LITERAL = /"[^\"]*"/
|
||||
EOS = [:end_of_string].freeze
|
||||
IDENTIFIER = /[a-zA-Z_][\w-]*\??/
|
||||
NUMBER_LITERAL = /-?\d+(\.\d+)?/
|
||||
OPEN_ROUND = [:open_round, "("].freeze
|
||||
OPEN_SQUARE = [:open_square, "["].freeze
|
||||
PIPE = [:pipe, "|"].freeze
|
||||
QUESTION = [:question, "?"].freeze
|
||||
RUBY_WHITESPACE = [" ", "\t", "\r", "\n", "\f"].freeze
|
||||
SINGLE_STRING_LITERAL = /'[^\']*'/
|
||||
WHITESPACE_OR_NOTHING = /\s*/
|
||||
|
||||
COMPARISON_JUMP_TABLE = [].tap do |table|
|
||||
table["=".ord] = [].tap do |sub_table|
|
||||
sub_table["=".ord] = COMPARISON_EQUAL
|
||||
sub_table.freeze
|
||||
end
|
||||
table["!".ord] = [].tap do |sub_table|
|
||||
sub_table["=".ord] = COMPARISION_NOT_EQUAL
|
||||
sub_table.freeze
|
||||
end
|
||||
table["<".ord] = [].tap do |sub_table|
|
||||
sub_table["=".ord] = COMPARISON_LESS_THAN_OR_EQUAL
|
||||
sub_table[">".ord] = COMPARISON_NOT_EQUAL_ALT
|
||||
RUBY_WHITESPACE.each { |c| sub_table[c.ord] = COMPARISON_LESS_THAN }
|
||||
sub_table.freeze
|
||||
end
|
||||
table[">".ord] = [].tap do |sub_table|
|
||||
sub_table["=".ord] = COMPARISON_GREATER_THAN_OR_EQUAL
|
||||
RUBY_WHITESPACE.each { |c| sub_table[c.ord] = COMPARISON_GREATER_THAN }
|
||||
sub_table.freeze
|
||||
end
|
||||
table.freeze
|
||||
end
|
||||
|
||||
NEXT_MATCHER_JUMP_TABLE = [].tap do |table|
|
||||
"a".upto("z") do |c|
|
||||
table[c.ord] = [:id, IDENTIFIER].freeze
|
||||
table[c.upcase.ord] = [:id, IDENTIFIER].freeze
|
||||
end
|
||||
table["_".ord] = [:id, IDENTIFIER].freeze
|
||||
|
||||
"0".upto("9") do |c|
|
||||
table[c.ord] = [:number, NUMBER_LITERAL].freeze
|
||||
end
|
||||
table["-".ord] = [:number, NUMBER_LITERAL].freeze
|
||||
|
||||
table["'".ord] = [:string, SINGLE_STRING_LITERAL].freeze
|
||||
table["\"".ord] = [:string, DOUBLE_STRING_LITERAL].freeze
|
||||
table.freeze
|
||||
end
|
||||
|
||||
SPECIAL_TABLE = [].tap do |table|
|
||||
table["|".ord] = PIPE
|
||||
table[".".ord] = DOT
|
||||
table[":".ord] = COLON
|
||||
table[",".ord] = COMMA
|
||||
table["[".ord] = OPEN_SQUARE
|
||||
table["]".ord] = CLOSE_SQUARE
|
||||
table["(".ord] = OPEN_ROUND
|
||||
table[")".ord] = CLOSE_ROUND
|
||||
table["?".ord] = QUESTION
|
||||
table["-".ord] = DASH
|
||||
end
|
||||
|
||||
NUMBER_TABLE = [].tap do |table|
|
||||
"0".upto("9") do |c|
|
||||
table[c.ord] = true
|
||||
end
|
||||
table.freeze
|
||||
end
|
||||
|
||||
def initialize(input)
|
||||
@ss = StringScanner.new(input)
|
||||
end
|
||||
|
||||
# rubocop:disable Metrics/BlockNesting
|
||||
def tokenize
|
||||
@output = []
|
||||
|
||||
until @ss.eos?
|
||||
@ss.skip(WHITESPACE_OR_NOTHING)
|
||||
|
||||
break if @ss.eos?
|
||||
|
||||
peeked = @ss.peek_byte
|
||||
|
||||
if (special = SPECIAL_TABLE[peeked])
|
||||
@ss.scan_byte
|
||||
# Special case for ".."
|
||||
if special == DOT && @ss.peek_byte == DOT_ORD
|
||||
@ss.scan_byte
|
||||
@output << DOTDOT
|
||||
elsif special == DASH
|
||||
# Special case for negative numbers
|
||||
if NUMBER_TABLE[@ss.peek_byte]
|
||||
@ss.pos -= 1
|
||||
@output << [:number, @ss.scan(NUMBER_LITERAL)]
|
||||
else
|
||||
@output << special
|
||||
end
|
||||
else
|
||||
@output << special
|
||||
end
|
||||
elsif (sub_table = COMPARISON_JUMP_TABLE[peeked])
|
||||
@ss.scan_byte
|
||||
if (found = sub_table[@ss.peek_byte])
|
||||
@output << found
|
||||
@ss.scan_byte
|
||||
else
|
||||
raise SyntaxError, "Unexpected character #{peeked.chr}"
|
||||
end
|
||||
else
|
||||
type, pattern = NEXT_MATCHER_JUMP_TABLE[peeked]
|
||||
|
||||
if type && (t = @ss.scan(pattern))
|
||||
# Special case for "contains"
|
||||
@output << if type == :id && t == "contains"
|
||||
COMPARISON_CONTAINS
|
||||
else
|
||||
[type, t]
|
||||
end
|
||||
else
|
||||
raise SyntaxError, "Unexpected character #{peeked.chr}"
|
||||
end
|
||||
end
|
||||
end
|
||||
# rubocop:enable Metrics/BlockNesting
|
||||
|
||||
@output << EOS
|
||||
end
|
||||
end
|
||||
|
||||
Lexer = StringScanner.instance_methods.include?(:scan_byte) ? Lexer2 : Lexer1
|
||||
end
|
||||
|
||||
@ -53,7 +53,7 @@ module Liquid
|
||||
str = consume
|
||||
str << variable_lookups
|
||||
when :open_square
|
||||
str = consume
|
||||
str = consume.dup
|
||||
str << expression
|
||||
str << consume(:close_square)
|
||||
str << variable_lookups
|
||||
|
||||
@ -17,7 +17,7 @@ Gem::Specification.new do |s|
|
||||
s.license = "MIT"
|
||||
# s.description = "A secure, non-evaling end user template engine with aesthetic markup."
|
||||
|
||||
s.required_ruby_version = ">= 2.7.0"
|
||||
s.required_ruby_version = ">= 3.0.0"
|
||||
s.required_rubygems_version = ">= 1.3.7"
|
||||
|
||||
s.metadata['allowed_push_host'] = 'https://rubygems.org'
|
||||
@ -28,6 +28,9 @@ Gem::Specification.new do |s|
|
||||
|
||||
s.require_path = "lib"
|
||||
|
||||
s.add_dependency("strscan")
|
||||
s.add_dependency("bigdecimal")
|
||||
|
||||
s.add_development_dependency('rake', '~> 13.0')
|
||||
s.add_development_dependency('minitest')
|
||||
end
|
||||
|
||||
@ -3,6 +3,7 @@
|
||||
require 'benchmark/ips'
|
||||
require_relative 'theme_runner'
|
||||
|
||||
RubyVM::YJIT.enable if defined?(RubyVM::YJIT)
|
||||
Liquid::Template.error_mode = ARGV.first.to_sym if ARGV.first
|
||||
profiler = ThemeRunner.new
|
||||
|
||||
|
||||
62
performance/unit/lexer_benchmark.rb
Normal file
62
performance/unit/lexer_benchmark.rb
Normal file
@ -0,0 +1,62 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
require "benchmark/ips"
|
||||
|
||||
# benchmark liquid lexing
|
||||
|
||||
require 'liquid'
|
||||
|
||||
RubyVM::YJIT.enable
|
||||
|
||||
EXPRESSIONS = [
|
||||
"foo[1..2].baz",
|
||||
"12.0",
|
||||
"foo.bar.based",
|
||||
"21 - 62",
|
||||
"foo.bar.baz",
|
||||
"foo > 12",
|
||||
"foo < 12",
|
||||
"foo <= 12",
|
||||
"foo >= 12",
|
||||
"foo <> 12",
|
||||
"foo == 12",
|
||||
"foo != 12",
|
||||
"foo contains 12",
|
||||
"foo contains 'bar'",
|
||||
"foo != 'bar'",
|
||||
"'foo' contains 'bar'",
|
||||
'234089',
|
||||
"foo | default: -1",
|
||||
]
|
||||
|
||||
EXPRESSIONS.each do |expr|
|
||||
lexer_1_result = Liquid::Lexer1.new(expr).tokenize
|
||||
lexer_2_result = Liquid::Lexer2.new(expr).tokenize
|
||||
|
||||
next if lexer_1_result == lexer_2_result
|
||||
|
||||
warn "Lexer1 and Lexer2 results are different for expression: #{expr}"
|
||||
warn "expected: #{lexer_1_result}"
|
||||
warn "got: #{lexer_2_result}"
|
||||
abort
|
||||
end
|
||||
|
||||
Benchmark.ips do |x|
|
||||
x.config(time: 10, warmup: 5)
|
||||
|
||||
x.report("Liquid::Lexer1#tokenize") do
|
||||
EXPRESSIONS.each do |expr|
|
||||
l = Liquid::Lexer1.new(expr)
|
||||
l.tokenize
|
||||
end
|
||||
end
|
||||
|
||||
x.report("Liquid::Lexer2#tokenize") do
|
||||
EXPRESSIONS.each do |expr|
|
||||
l = Liquid::Lexer2.new(expr)
|
||||
l.tokenize
|
||||
end
|
||||
end
|
||||
|
||||
x.compare!
|
||||
end
|
||||
@ -32,7 +32,7 @@ class TestDrop < Liquid::Drop
|
||||
attr_reader :value
|
||||
|
||||
def registers
|
||||
{ @value => @context.registers[@value] }
|
||||
"{#{@value.inspect}=>#{@context.registers[@value].inspect}}"
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
@ -50,4 +50,14 @@ class LexerUnitTest < Minitest::Test
|
||||
Lexer.new("%").tokenize
|
||||
end
|
||||
end
|
||||
|
||||
def test_negative_numbers
|
||||
tokens = Lexer.new("foo | default: -1").tokenize
|
||||
assert_equal([[:id, 'foo'], [:pipe, '|'], [:id, 'default'], [:colon, ":"], [:number, '-1'], [:end_of_string]], tokens)
|
||||
end
|
||||
|
||||
def test_greater_than_two_digits
|
||||
tokens = Lexer.new("foo > 12").tokenize
|
||||
assert_equal([[:id, 'foo'], [:comparison, '>'], [:number, '12'], [:end_of_string]], tokens)
|
||||
end
|
||||
end
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user