mirror of
https://github.com/ruby/ruby.git
synced 2026-01-29 05:24:23 +00:00
[ruby/prism] Serialize the newline_list to avoid recomputing it again later
* Fixes https://github.com/ruby/prism/issues/2380 https://github.com/ruby/prism/commit/4eaaa90114
This commit is contained in:
parent
65f5435540
commit
f0f6ffef42
@ -12,15 +12,13 @@ module Prism
|
||||
attr_accessor :start_line
|
||||
|
||||
# The list of newline byte offsets in the source code.
|
||||
attr_reader :offsets
|
||||
attr_accessor :offsets
|
||||
|
||||
# Create a new source object with the given source code and newline byte
|
||||
# offsets. If no newline byte offsets are given, they will be computed from
|
||||
# the source code.
|
||||
def initialize(source, start_line = 1, offsets = compute_offsets(source))
|
||||
# Create a new source object with the given source code.
|
||||
def initialize(source)
|
||||
@source = source
|
||||
@start_line = start_line
|
||||
@offsets = offsets
|
||||
@start_line = 1 # set after parsing is done
|
||||
@offsets = [] # set after parsing is done
|
||||
end
|
||||
|
||||
# Perform a byteslice on the source code using the given byte offset and
|
||||
@ -94,14 +92,6 @@ module Prism
|
||||
|
||||
left - 1
|
||||
end
|
||||
|
||||
# Find all of the newlines in the source code and return their byte offsets
|
||||
# from the start of the string an array.
|
||||
def compute_offsets(code)
|
||||
offsets = [0]
|
||||
code.b.scan("\n") { offsets << $~.end(0) }
|
||||
offsets
|
||||
end
|
||||
end
|
||||
|
||||
# This represents a location in the source.
|
||||
|
||||
@ -542,9 +542,9 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod
|
||||
pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
|
||||
pm_parser_register_encoding_changed_callback(&parser, parse_lex_encoding_changed_callback);
|
||||
|
||||
VALUE offsets = rb_ary_new();
|
||||
VALUE source_argv[] = { rb_str_new((const char *) pm_string_source(input), pm_string_length(input)), ULONG2NUM(parser.start_line), offsets };
|
||||
VALUE source = rb_class_new_instance(3, source_argv, rb_cPrismSource);
|
||||
VALUE source_string = rb_str_new((const char *) pm_string_source(input), pm_string_length(input));
|
||||
VALUE source_argv[] = { source_string };
|
||||
VALUE source = rb_class_new_instance(1, source_argv, rb_cPrismSource);
|
||||
|
||||
parse_lex_data_t parse_lex_data = {
|
||||
.source = source,
|
||||
@ -561,17 +561,18 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod
|
||||
parser.lex_callback = &lex_callback;
|
||||
pm_node_t *node = pm_parse(&parser);
|
||||
|
||||
// Here we need to update the source range to have the correct newline
|
||||
// offsets. We do it here because we've already created the object and given
|
||||
// it over to all of the tokens.
|
||||
for (size_t index = 0; index < parser.newline_list.size; index++) {
|
||||
rb_ary_push(offsets, INT2FIX(parser.newline_list.offsets[index]));
|
||||
}
|
||||
// Here we need to update the Source object to have the correct
|
||||
// encoding for the source string and the correct newline offsets.
|
||||
// We do it here because we've already created the Source object and given
|
||||
// it over to all of the tokens, and both of these are only set after pm_parse().
|
||||
rb_encoding *encoding = rb_enc_find(parser.encoding->name);
|
||||
rb_enc_associate(source_string, encoding);
|
||||
pm_source_init(source, &parser);
|
||||
|
||||
VALUE value;
|
||||
if (return_nodes) {
|
||||
value = rb_ary_new_capa(2);
|
||||
rb_ary_push(value, pm_ast_new(&parser, node, parse_lex_data.encoding));
|
||||
rb_ary_push(value, pm_ast_new(&parser, node, parse_lex_data.encoding, source));
|
||||
rb_ary_push(value, parse_lex_data.tokens);
|
||||
} else {
|
||||
value = parse_lex_data.tokens;
|
||||
@ -650,7 +651,7 @@ parse_input(pm_string_t *input, const pm_options_t *options) {
|
||||
|
||||
VALUE source = pm_source_new(&parser, encoding);
|
||||
VALUE result_argv[] = {
|
||||
pm_ast_new(&parser, node, encoding),
|
||||
pm_ast_new(&parser, node, encoding, source),
|
||||
parser_comments(&parser, source),
|
||||
parser_magic_comments(&parser, source),
|
||||
parser_data_loc(&parser, source),
|
||||
|
||||
@ -8,8 +8,9 @@
|
||||
#include "prism.h"
|
||||
|
||||
VALUE pm_source_new(pm_parser_t *parser, rb_encoding *encoding);
|
||||
void pm_source_init(VALUE source, pm_parser_t *parser);
|
||||
VALUE pm_token_new(pm_parser_t *parser, pm_token_t *token, rb_encoding *encoding, VALUE source);
|
||||
VALUE pm_ast_new(pm_parser_t *parser, pm_node_t *node, rb_encoding *encoding);
|
||||
VALUE pm_ast_new(pm_parser_t *parser, pm_node_t *node, rb_encoding *encoding, VALUE source);
|
||||
|
||||
void Init_prism_api_node(void);
|
||||
void Init_prism_pack(void);
|
||||
|
||||
@ -36,18 +36,26 @@ pm_string_new(pm_string_t *string, rb_encoding *encoding) {
|
||||
return rb_enc_str_new((const char *) pm_string_source(string), pm_string_length(string), encoding);
|
||||
}
|
||||
|
||||
// Create a Prism::Source object from the given parser.
|
||||
// Create a Prism::Source object from the given parser, after pm_parse() was called.
|
||||
VALUE
|
||||
pm_source_new(pm_parser_t *parser, rb_encoding *encoding) {
|
||||
VALUE source = rb_enc_str_new((const char *) parser->start, parser->end - parser->start, encoding);
|
||||
VALUE source_string = rb_enc_str_new((const char *) parser->start, parser->end - parser->start, encoding);
|
||||
VALUE source_argv[] = { source_string };
|
||||
VALUE source = rb_class_new_instance(1, source_argv, rb_cPrismSource);
|
||||
|
||||
pm_source_init(source, parser);
|
||||
return source;
|
||||
}
|
||||
|
||||
void
|
||||
pm_source_init(VALUE source, pm_parser_t *parser) {
|
||||
rb_funcall(source, rb_intern("start_line="), 1, LONG2NUM(parser->start_line));
|
||||
|
||||
VALUE offsets = rb_ary_new_capa(parser->newline_list.size);
|
||||
|
||||
for (size_t index = 0; index < parser->newline_list.size; index++) {
|
||||
rb_ary_push(offsets, INT2FIX(parser->newline_list.offsets[index]));
|
||||
rb_ary_push(offsets, ULONG2NUM(parser->newline_list.offsets[index]));
|
||||
}
|
||||
|
||||
VALUE source_argv[] = { source, LONG2NUM(parser->start_line), offsets };
|
||||
return rb_class_new_instance(3, source_argv, rb_cPrismSource);
|
||||
rb_funcall(source, rb_intern("offsets="), 1, offsets);
|
||||
}
|
||||
|
||||
typedef struct pm_node_stack_node {
|
||||
@ -77,8 +85,7 @@ pm_node_stack_pop(pm_node_stack_node_t **stack) {
|
||||
}
|
||||
|
||||
VALUE
|
||||
pm_ast_new(pm_parser_t *parser, pm_node_t *node, rb_encoding *encoding) {
|
||||
VALUE source = pm_source_new(parser, encoding);
|
||||
pm_ast_new(pm_parser_t *parser, pm_node_t *node, rb_encoding *encoding, VALUE source) {
|
||||
ID *constants = calloc(parser->constant_pool.size, sizeof(ID));
|
||||
|
||||
for (uint32_t index = 0; index < parser->constant_pool.size; index++) {
|
||||
|
||||
@ -82,6 +82,10 @@ module Prism
|
||||
source.start_line = load_varsint
|
||||
end
|
||||
|
||||
def load_line_offsets
|
||||
source.offsets = load_varuint.times.map { load_varuint }
|
||||
end
|
||||
|
||||
def load_comments
|
||||
load_varuint.times.map do
|
||||
case load_varuint
|
||||
@ -118,6 +122,7 @@ module Prism
|
||||
tokens = load_tokens
|
||||
encoding = load_encoding
|
||||
load_start_line
|
||||
load_line_offsets
|
||||
comments, magic_comments, data_loc, errors, warnings = load_metadata
|
||||
tokens.each { |token,| token.value.force_encoding(encoding) }
|
||||
|
||||
@ -129,6 +134,7 @@ module Prism
|
||||
load_header
|
||||
load_encoding
|
||||
load_start_line
|
||||
load_line_offsets
|
||||
|
||||
comments, magic_comments, data_loc, errors, warnings = load_metadata
|
||||
|
||||
|
||||
@ -128,6 +128,17 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
pm_serialize_newline_list(pm_newline_list_t *list, pm_buffer_t *buffer) {
|
||||
uint32_t size = pm_sizet_to_u32(list->size);
|
||||
pm_buffer_append_varuint(buffer, size);
|
||||
|
||||
for (uint32_t i = 0; i < size; i++) {
|
||||
uint32_t offset = pm_sizet_to_u32(list->offsets[i]);
|
||||
pm_buffer_append_varuint(buffer, offset);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
pm_serialize_comment(pm_parser_t *parser, pm_comment_t *comment, pm_buffer_t *buffer) {
|
||||
// serialize type
|
||||
@ -214,14 +225,11 @@ pm_serialize_encoding(const pm_encoding_t *encoding, pm_buffer_t *buffer) {
|
||||
pm_buffer_append_string(buffer, encoding->name, encoding_length);
|
||||
}
|
||||
|
||||
#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
|
||||
/**
|
||||
* Serialize the encoding, metadata, nodes, and constant pool.
|
||||
*/
|
||||
void
|
||||
pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
||||
static void
|
||||
pm_serialize_metadata(pm_parser_t *parser, pm_buffer_t *buffer) {
|
||||
pm_serialize_encoding(parser->encoding, buffer);
|
||||
pm_buffer_append_varsint(buffer, parser->start_line);
|
||||
pm_serialize_newline_list(&parser->newline_list, buffer);
|
||||
<%- unless Prism::SERIALIZE_ONLY_SEMANTICS_FIELDS -%>
|
||||
pm_serialize_comment_list(parser, &parser->comment_list, buffer);
|
||||
<%- end -%>
|
||||
@ -229,6 +237,15 @@ pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer)
|
||||
pm_serialize_data_loc(parser, buffer);
|
||||
pm_serialize_diagnostic_list(parser, &parser->error_list, buffer);
|
||||
pm_serialize_diagnostic_list(parser, &parser->warning_list, buffer);
|
||||
}
|
||||
|
||||
#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
|
||||
/**
|
||||
* Serialize the metadata, nodes, and constant pool.
|
||||
*/
|
||||
void
|
||||
pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
||||
pm_serialize_metadata(parser, buffer);
|
||||
|
||||
// Here we're going to leave space for the offset of the constant pool in
|
||||
// the buffer.
|
||||
@ -319,13 +336,7 @@ pm_serialize_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const
|
||||
// Append 0 to mark end of tokens.
|
||||
pm_buffer_append_byte(buffer, 0);
|
||||
|
||||
pm_serialize_encoding(parser.encoding, buffer);
|
||||
pm_buffer_append_varsint(buffer, parser.start_line);
|
||||
pm_serialize_comment_list(&parser, &parser.comment_list, buffer);
|
||||
pm_serialize_magic_comment_list(&parser, &parser.magic_comment_list, buffer);
|
||||
pm_serialize_data_loc(&parser, buffer);
|
||||
pm_serialize_diagnostic_list(&parser, &parser.error_list, buffer);
|
||||
pm_serialize_diagnostic_list(&parser, &parser.warning_list, buffer);
|
||||
pm_serialize_metadata(&parser, buffer);
|
||||
|
||||
pm_node_destroy(&parser, node);
|
||||
pm_parser_free(&parser);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user