Merge pull request #1905 from Shopify/catlee/invalid_utf8

Raise SyntaxError on invalid UTF8 strings in lexer/tokenizer
This commit is contained in:
Chris AtLee 2025-02-13 09:24:11 -05:00 committed by GitHub
commit da4afd4156
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 35 additions and 0 deletions

View File

@ -2,6 +2,8 @@
## 5.7.3 (unreleased)
* Raise Liquid::SyntaxError when parsing invalidly encoded strings
## 5.7.2 2025-01-31
* Fix array filters to not support nested properties

View File

@ -161,6 +161,12 @@ module Liquid
end
# rubocop:enable Metrics/BlockNesting
output << EOS
rescue ::ArgumentError => e
if e.message == "invalid byte sequence in #{ss.string.encoding}"
raise SyntaxError, "Invalid byte sequence in #{ss.string.encoding}"
else
raise
end
end
def raise_syntax_error(start_pos, ss)

View File

@ -103,6 +103,12 @@ module Liquid
pos = @ss.pos -= 2
@source.byteslice(start, pos - start)
rescue ::ArgumentError => e
if e.message == "invalid byte sequence in #{@ss.string.encoding}"
raise SyntaxError, "Invalid byte sequence in #{@ss.string.encoding}"
else
raise
end
end
def next_variable_token

View File

@ -131,6 +131,16 @@ class LexerUnitTest < Minitest::Test
assert_equal([[:id, "false"], [:number, "1"], [:end_of_string]], tokenize("false 1"))
end
def test_error_with_invalid_utf8
error = assert_raises(SyntaxError) do
tokenize("\x00\xff")
end
assert_equal(
'Liquid syntax error: Invalid byte sequence in UTF-8',
error.message,
)
end
private
def tokenize(input)

View File

@ -35,4 +35,15 @@ class TemplateUnitTest < Minitest::Test
def test_template_inheritance
assert_equal("foo", TemplateSubclass.parse("foo").render)
end
def test_invalid_utf8
input = "\xff\x00"
error = assert_raises(SyntaxError) do
Liquid::Tokenizer.new(source: input, string_scanner: StringScanner.new(input))
end
assert_equal(
'Liquid syntax error: Invalid byte sequence in UTF-8',
error.message,
)
end
end