Raise SyntaxError on invalid encoding symbol (#10967)

[Bug #20280]

Backport of #10014.
This commit is contained in:
Peter Zhu 2024-06-11 14:01:29 -04:00 committed by GitHub
parent 40251ed0df
commit 4c50d23245
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 49 additions and 21 deletions

18
parse.y
View File

@ -12846,9 +12846,22 @@ new_defined(struct parser_params *p, NODE *expr, const YYLTYPE *loc)
return NEW_DEFINED(n, loc);
}
static VALUE
str_to_sym_check(struct parser_params *p, VALUE lit, const YYLTYPE *loc)
{
if (rb_enc_str_coderange(lit) == ENC_CODERANGE_BROKEN) {
yyerror1(loc, "invalid symbol");
lit = STR_NEW0();
}
return lit;
}
static NODE*
symbol_append(struct parser_params *p, NODE *symbols, NODE *symbol)
{
VALUE lit;
enum node_type type = nd_type(symbol);
switch (type) {
case NODE_DSTR:
@ -12856,7 +12869,8 @@ symbol_append(struct parser_params *p, NODE *symbols, NODE *symbol)
break;
case NODE_STR:
nd_set_type(symbol, NODE_LIT);
RB_OBJ_WRITTEN(p->ast, Qnil, RNODE_LIT(symbol)->nd_lit = rb_str_intern(RNODE_LIT(symbol)->nd_lit));
lit = str_to_sym_check(p, RNODE_LIT(symbol)->nd_lit, &RNODE(symbol)->nd_loc);
RB_OBJ_WRITTEN(p->ast, Qnil, RNODE_LIT(symbol)->nd_lit = rb_str_intern(lit));
break;
default:
compile_error(p, "unexpected node as symbol: %s", parser_node_name(type));
@ -14553,7 +14567,7 @@ dsym_node(struct parser_params *p, NODE *node, const YYLTYPE *loc)
nd_set_loc(node, loc);
break;
case NODE_STR:
lit = RNODE_STR(node)->nd_lit;
lit = str_to_sym_check(p, RNODE_STR(node)->nd_lit, &RNODE(node)->nd_loc);
RB_OBJ_WRITTEN(p->ast, Qnil, RNODE_STR(node)->nd_lit = ID2SYM(rb_intern_str(lit)));
nd_set_type(node, NODE_LIT);
nd_set_loc(node, loc);

View File

@ -679,12 +679,14 @@ rb_parser_config_initialize(rb_parser_config_t *config)
config->enc_isspace = enc_isspace;
config->enc_coderange_7bit = ENC_CODERANGE_7BIT;
config->enc_coderange_unknown = ENC_CODERANGE_UNKNOWN;
config->enc_coderange_broken = ENC_CODERANGE_BROKEN;
config->enc_compatible = enc_compatible;
config->enc_from_encoding = enc_from_encoding;
config->encoding_get = encoding_get;
config->encoding_set = encoding_set;
config->encoding_is_ascii8bit = encoding_is_ascii8bit;
config->usascii_encoding = usascii_encoding;
config->enc_str_coderange = rb_enc_str_coderange;
config->ractor_make_shareable = rb_ractor_make_shareable;

View File

@ -1295,12 +1295,14 @@ typedef struct rb_parser_config_struct {
int (*enc_isspace)(OnigCodePoint c, rb_encoding *enc);
int enc_coderange_7bit;
int enc_coderange_unknown;
int enc_coderange_broken;
rb_encoding *(*enc_compatible)(VALUE str1, VALUE str2);
VALUE (*enc_from_encoding)(rb_encoding *enc);
int (*encoding_get)(VALUE obj);
void (*encoding_set)(VALUE obj, int encindex);
int (*encoding_is_ascii8bit)(VALUE obj);
rb_encoding *(*usascii_encoding)(void);
int (*enc_str_coderange)(VALUE str);
/* Ractor */
VALUE (*ractor_make_shareable)(VALUE obj);

View File

@ -191,20 +191,22 @@ describe "Hash literal" do
usascii_hash.keys.first.encoding.should == Encoding::US_ASCII
end
it "raises an EncodingError at parse time when Symbol key with invalid bytes" do
ScratchPad.record []
-> {
eval 'ScratchPad << 1; {:"\xC3" => 1}'
}.should raise_error(EncodingError, 'invalid symbol in encoding UTF-8 :"\xC3"')
ScratchPad.recorded.should == []
end
ruby_bug "#20280", ""..."3.3" do
it "raises a SyntaxError at parse time when Symbol key with invalid bytes" do
ScratchPad.record []
-> {
eval 'ScratchPad << 1; {:"\xC3" => 1}'
}.should raise_error(SyntaxError, /invalid symbol/)
ScratchPad.recorded.should == []
end
it "raises an EncodingError at parse time when Symbol key with invalid bytes and 'key: value' syntax used" do
ScratchPad.record []
-> {
eval 'ScratchPad << 1; {"\xC3": 1}'
}.should raise_error(EncodingError, 'invalid symbol in encoding UTF-8 :"\xC3"')
ScratchPad.recorded.should == []
it "raises a SyntaxError at parse time when Symbol key with invalid bytes and 'key: value' syntax used" do
ScratchPad.record []
-> {
eval 'ScratchPad << 1; {"\xC3": 1}'
}.should raise_error(SyntaxError, /invalid symbol/)
ScratchPad.recorded.should == []
end
end
end

View File

@ -96,11 +96,13 @@ describe "A Symbol literal" do
%I{a b #{"c"}}.should == [:a, :b, :c]
end
it "raises an EncodingError at parse time when Symbol with invalid bytes" do
ScratchPad.record []
-> {
eval 'ScratchPad << 1; :"\xC3"'
}.should raise_error(EncodingError, 'invalid symbol in encoding UTF-8 :"\xC3"')
ScratchPad.recorded.should == []
ruby_bug "#20280", ""..."3.3" do
it "raises a SyntaxError at parse time when Symbol with invalid bytes" do
ScratchPad.record []
-> {
eval 'ScratchPad << 1; :"\xC3"'
}.should raise_error(SyntaxError, /invalid symbol/)
ScratchPad.recorded.should == []
end
end
end

View File

@ -1354,6 +1354,10 @@ eom
assert_valid_syntax 'p :foo, {proc do end => proc do end, b: proc do end}', bug13073
end
def test_invalid_encoding_symbol
assert_syntax_error('{"\xC3": 1}', "invalid symbol")
end
def test_do_after_local_variable
obj = Object.new
def obj.m; yield; end

View File

@ -283,12 +283,14 @@ struct rb_imemo_tmpbuf_struct {
#define rb_enc_isspace p->config->enc_isspace
#define ENC_CODERANGE_7BIT p->config->enc_coderange_7bit
#define ENC_CODERANGE_UNKNOWN p->config->enc_coderange_unknown
#define ENC_CODERANGE_BROKEN p->config->enc_coderange_broken
#define rb_enc_compatible p->config->enc_compatible
#define rb_enc_from_encoding p->config->enc_from_encoding
#define ENCODING_GET p->config->encoding_get
#define ENCODING_SET p->config->encoding_set
#define ENCODING_IS_ASCII8BIT p->config->encoding_is_ascii8bit
#define rb_usascii_encoding p->config->usascii_encoding
#define rb_enc_str_coderange p->config->enc_str_coderange
#define rb_ractor_make_shareable p->config->ractor_make_shareable