mirror of
https://github.com/ruby/ruby.git
synced 2026-01-26 20:19:19 +00:00
StringScanner#scan_integer support base 16 integers (#116)
Followup: https://github.com/ruby/strscan/pull/115 `scan_integer` is now implemented in Ruby as to efficiently handle keyword arguments without allocating a Hash. Given the goal of `scan_integer` is to more effciently parse integers without having to allocate an intermediary object, using `rb_scan_args` would defeat the purpose. Additionally, the C implementation now uses `rb_isdigit` and `rb_isxdigit`, because on Windows `isdigit` is locale dependent.
This commit is contained in:
parent
5514485e13
commit
79cc3d26ed
25
ext/strscan/lib/strscan/strscan.rb
Normal file
25
ext/strscan/lib/strscan/strscan.rb
Normal file
@ -0,0 +1,25 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
class StringScanner
|
||||
# call-seq:
|
||||
# scan_integer(base: 10)
|
||||
#
|
||||
# If `base` isn't provided or is `10`, then it is equivalent to calling `#scan` with a `[+-]?\d+` pattern,
|
||||
# and returns an Integer or nil.
|
||||
#
|
||||
# If `base` is `16`, then it is equivalent to calling `#scan` with a `[+-]?(0x)?[0-9a-fA-F]+` pattern,
|
||||
# and returns an Integer or nil.
|
||||
#
|
||||
# The scanned string must be encoded with an ASCII compatible encoding, otherwise
|
||||
# Encoding::CompatibilityError will be raised.
|
||||
def scan_integer(base: 10)
|
||||
case base
|
||||
when 10
|
||||
scan_base10_integer
|
||||
when 16
|
||||
scan_base16_integer
|
||||
else
|
||||
raise ArgumentError, "Unsupported integer base: #{base.inspect}, expected 10 or 16"
|
||||
end
|
||||
end
|
||||
end
|
||||
@ -20,7 +20,6 @@
|
||||
extern size_t onig_region_memsize(const struct re_registers *regs);
|
||||
#endif
|
||||
|
||||
#include <ctype.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#define STRSCAN_VERSION "3.1.1.dev"
|
||||
@ -116,7 +115,7 @@ static VALUE strscan_get_byte _((VALUE self));
|
||||
static VALUE strscan_getbyte _((VALUE self));
|
||||
static VALUE strscan_peek _((VALUE self, VALUE len));
|
||||
static VALUE strscan_peep _((VALUE self, VALUE len));
|
||||
static VALUE strscan_scan_integer _((VALUE self));
|
||||
static VALUE strscan_scan_base10_integer _((VALUE self));
|
||||
static VALUE strscan_unscan _((VALUE self));
|
||||
static VALUE strscan_bol_p _((VALUE self));
|
||||
static VALUE strscan_eos_p _((VALUE self));
|
||||
@ -1268,21 +1267,26 @@ strscan_peep(VALUE self, VALUE vlen)
|
||||
return strscan_peek(self, vlen);
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* scan_integer
|
||||
*
|
||||
* Equivalent to #scan with a [+-]?\d+ pattern, and returns an Integer or nil.
|
||||
*
|
||||
* The scanned string must be encoded with an ASCII compatible encoding, otherwise
|
||||
* Encoding::CompatibilityError will be raised.
|
||||
*/
|
||||
static VALUE
|
||||
strscan_scan_integer(VALUE self)
|
||||
strscan_parse_integer(struct strscanner *p, int base, long len)
|
||||
{
|
||||
char *ptr, *buffer;
|
||||
long len = 0;
|
||||
VALUE buffer_v, integer;
|
||||
|
||||
char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
|
||||
|
||||
MEMCPY(buffer, CURPTR(p), char, len);
|
||||
buffer[len] = '\0';
|
||||
integer = rb_cstr2inum(buffer, base);
|
||||
RB_ALLOCV_END(buffer_v);
|
||||
p->curr += len;
|
||||
return integer;
|
||||
}
|
||||
|
||||
static VALUE
|
||||
strscan_scan_base10_integer(VALUE self)
|
||||
{
|
||||
char *ptr;
|
||||
long len = 0;
|
||||
struct strscanner *p;
|
||||
|
||||
GET_SCANNER(self, p);
|
||||
@ -1302,25 +1306,60 @@ strscan_scan_integer(VALUE self)
|
||||
len++;
|
||||
}
|
||||
|
||||
if (!isdigit(ptr[len])) {
|
||||
if (!rb_isdigit(ptr[len])) {
|
||||
return Qnil;
|
||||
}
|
||||
|
||||
MATCHED(p);
|
||||
p->prev = p->curr;
|
||||
|
||||
while (len < remaining_len && isdigit(ptr[len])) {
|
||||
while (len < remaining_len && rb_isdigit(ptr[len])) {
|
||||
len++;
|
||||
}
|
||||
|
||||
buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
|
||||
return strscan_parse_integer(p, 10, len);
|
||||
}
|
||||
|
||||
MEMCPY(buffer, CURPTR(p), char, len);
|
||||
buffer[len] = '\0';
|
||||
integer = rb_cstr2inum(buffer, 10);
|
||||
RB_ALLOCV_END(buffer_v);
|
||||
p->curr += len;
|
||||
return integer;
|
||||
static VALUE
|
||||
strscan_scan_base16_integer(VALUE self)
|
||||
{
|
||||
char *ptr;
|
||||
long len = 0;
|
||||
struct strscanner *p;
|
||||
|
||||
GET_SCANNER(self, p);
|
||||
CLEAR_MATCH_STATUS(p);
|
||||
|
||||
rb_must_asciicompat(p->str);
|
||||
|
||||
ptr = CURPTR(p);
|
||||
|
||||
long remaining_len = S_RESTLEN(p);
|
||||
|
||||
if (remaining_len <= 0) {
|
||||
return Qnil;
|
||||
}
|
||||
|
||||
if (ptr[len] == '-' || ptr[len] == '+') {
|
||||
len++;
|
||||
}
|
||||
|
||||
if ((remaining_len >= (len + 2)) && ptr[len] == '0' && ptr[len + 1] == 'x') {
|
||||
len += 2;
|
||||
}
|
||||
|
||||
if (len >= remaining_len || !rb_isxdigit(ptr[len])) {
|
||||
return Qnil;
|
||||
}
|
||||
|
||||
MATCHED(p);
|
||||
p->prev = p->curr;
|
||||
|
||||
while (len < remaining_len && rb_isxdigit(ptr[len])) {
|
||||
len++;
|
||||
}
|
||||
|
||||
return strscan_parse_integer(p, 16, len);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2261,7 +2300,8 @@ Init_strscan(void)
|
||||
rb_define_method(StringScanner, "peek_byte", strscan_peek_byte, 0);
|
||||
rb_define_method(StringScanner, "peep", strscan_peep, 1);
|
||||
|
||||
rb_define_method(StringScanner, "scan_integer", strscan_scan_integer, 0);
|
||||
rb_define_private_method(StringScanner, "scan_base10_integer", strscan_scan_base10_integer, 0);
|
||||
rb_define_private_method(StringScanner, "scan_base16_integer", strscan_scan_base16_integer, 0);
|
||||
|
||||
rb_define_method(StringScanner, "unscan", strscan_unscan, 0);
|
||||
|
||||
@ -2290,4 +2330,6 @@ Init_strscan(void)
|
||||
rb_define_method(StringScanner, "fixed_anchor?", strscan_fixed_anchor_p, 0);
|
||||
|
||||
rb_define_method(StringScanner, "named_captures", strscan_named_captures, 0);
|
||||
|
||||
rb_require("strscan/strscan");
|
||||
}
|
||||
|
||||
@ -19,14 +19,17 @@ Gem::Specification.new do |s|
|
||||
files = [
|
||||
"COPYING",
|
||||
"LICENSE.txt",
|
||||
"lib/strscan/strscan.rb"
|
||||
]
|
||||
|
||||
s.require_paths = %w{lib}
|
||||
|
||||
if RUBY_ENGINE == "jruby"
|
||||
s.require_paths = %w{ext/jruby/lib lib}
|
||||
files << "ext/jruby/lib/strscan.rb"
|
||||
files << "lib/strscan.jar"
|
||||
files << "ext/jruby/lib/strscan.rb"
|
||||
s.require_paths += %w{ext/jruby/lib}
|
||||
s.platform = "java"
|
||||
else
|
||||
s.require_paths = %w{lib}
|
||||
files << "ext/strscan/extconf.rb"
|
||||
files << "ext/strscan/strscan.c"
|
||||
s.rdoc_options << "-idoc"
|
||||
|
||||
@ -945,6 +945,81 @@ module StringScannerTests
|
||||
s.scan_integer
|
||||
end
|
||||
end
|
||||
|
||||
def test_scan_integer_base_16
|
||||
omit "scan_integer isn't implemented on TruffleRuby yet" if RUBY_ENGINE == "truffleruby"
|
||||
|
||||
s = create_string_scanner('0')
|
||||
assert_equal 0x0, s.scan_integer(base: 16)
|
||||
assert_equal 1, s.pos
|
||||
assert_predicate s, :matched?
|
||||
|
||||
s = create_string_scanner('abc')
|
||||
assert_equal 0xabc, s.scan_integer(base: 16)
|
||||
assert_equal 3, s.pos
|
||||
assert_predicate s, :matched?
|
||||
|
||||
s = create_string_scanner('123abc')
|
||||
assert_equal 0x123abc, s.scan_integer(base: 16)
|
||||
assert_equal 6, s.pos
|
||||
assert_predicate s, :matched?
|
||||
|
||||
s = create_string_scanner('0x123abc')
|
||||
assert_equal 0x123abc, s.scan_integer(base: 16)
|
||||
assert_equal 8, s.pos
|
||||
assert_predicate s, :matched?
|
||||
|
||||
s = create_string_scanner('0x123ABC')
|
||||
assert_equal 0x123abc, s.scan_integer(base: 16)
|
||||
assert_equal 8, s.pos
|
||||
assert_predicate s, :matched?
|
||||
|
||||
s = create_string_scanner('-0x123ABC')
|
||||
assert_equal -0x123abc, s.scan_integer(base: 16)
|
||||
assert_equal 9, s.pos
|
||||
assert_predicate s, :matched?
|
||||
|
||||
s = create_string_scanner('+0x123ABC')
|
||||
assert_equal +0x123abc, s.scan_integer(base: 16)
|
||||
assert_equal 9, s.pos
|
||||
assert_predicate s, :matched?
|
||||
|
||||
s = create_string_scanner('0x')
|
||||
assert_nil s.scan_integer(base: 16)
|
||||
assert_equal 0, s.pos
|
||||
refute_predicate s, :matched?
|
||||
|
||||
s = create_string_scanner('-0x')
|
||||
assert_nil s.scan_integer(base: 16)
|
||||
assert_equal 0, s.pos
|
||||
refute_predicate s, :matched?
|
||||
|
||||
s = create_string_scanner('+0x')
|
||||
assert_nil s.scan_integer(base: 16)
|
||||
assert_equal 0, s.pos
|
||||
refute_predicate s, :matched?
|
||||
|
||||
s = create_string_scanner('-123abc')
|
||||
assert_equal -0x123abc, s.scan_integer(base: 16)
|
||||
assert_equal 7, s.pos
|
||||
assert_predicate s, :matched?
|
||||
|
||||
s = create_string_scanner('+123')
|
||||
assert_equal 0x123, s.scan_integer(base: 16)
|
||||
assert_equal 4, s.pos
|
||||
assert_predicate s, :matched?
|
||||
|
||||
s = create_string_scanner('-abc')
|
||||
assert_equal -0xabc, s.scan_integer(base: 16)
|
||||
assert_equal 4, s.pos
|
||||
assert_predicate s, :matched?
|
||||
|
||||
huge_integer = 'F' * 2_000
|
||||
s = create_string_scanner(huge_integer)
|
||||
assert_equal huge_integer.to_i(16), s.scan_integer(base: 16)
|
||||
assert_equal 2_000, s.pos
|
||||
assert_predicate s, :matched?
|
||||
end
|
||||
end
|
||||
|
||||
class TestStringScanner < Test::Unit::TestCase
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user