mirror of
https://github.com/netwide-assembler/nasm.git
synced 2026-01-26 16:09:24 +00:00
preproc: implement %hs2b() and %b2hs() functions for compact binary data
Convenience preprocessor functions that allows for efficient packing of binary data in source code. Move some functions that has previously been local but are more generally useful into more accessible places. Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
This commit is contained in:
parent
856ac7b7fb
commit
f6166e571a
13
asm/floats.c
13
asm/floats.c
@ -495,17 +495,6 @@ static bool ieee_round(bool minus, fp_limb *mant, int bits)
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Returns a value >= 16 if not a valid hex digit */
|
||||
static unsigned int hexval(char c)
|
||||
{
|
||||
unsigned int v = (unsigned char) c;
|
||||
|
||||
if (v >= '0' && v <= '9')
|
||||
return v - '0';
|
||||
else
|
||||
return (v|0x20) - 'a' + 10;
|
||||
}
|
||||
|
||||
/* Handle floating-point numbers with radix 2^bits and binary exponent */
|
||||
static bool ieee_flconvert_bin(const char *string, int bits,
|
||||
fp_limb *mant, int32_t *exponent)
|
||||
@ -535,7 +524,7 @@ static bool ieee_flconvert_bin(const char *string, int bits,
|
||||
nasm_nonfatal("too many periods in floating-point constant");
|
||||
return false;
|
||||
}
|
||||
} else if ((v = hexval(c)) < (unsigned int)radix) {
|
||||
} else if ((v = nasm_hexval(c)) < (unsigned int)radix) {
|
||||
if (!seendigit && v) {
|
||||
int l = log2tbl[v];
|
||||
|
||||
|
||||
119
asm/preproc.c
119
asm/preproc.c
@ -7688,6 +7688,7 @@ stdmac_strcat(const SMacro *s, Token **params, int nparams)
|
||||
int i;
|
||||
size_t len = 0;
|
||||
char *str, *p;
|
||||
Token *t;
|
||||
|
||||
(void)s;
|
||||
|
||||
@ -7696,14 +7697,112 @@ stdmac_strcat(const SMacro *s, Token **params, int nparams)
|
||||
len += params[i]->len;
|
||||
}
|
||||
|
||||
nasm_newn(str, len+1);
|
||||
p = str;
|
||||
p = str = nasm_malloc(len+1);
|
||||
|
||||
for (i = 0; i < nparams; i++) {
|
||||
p = mempcpy(p, tok_text(params[i]), params[i]->len);
|
||||
}
|
||||
*p = '\0';
|
||||
|
||||
return make_tok_qstr_len(NULL, str, len);
|
||||
t = make_tok_qstr_len(NULL, str, p - str);
|
||||
nasm_free(str);
|
||||
return t;
|
||||
}
|
||||
|
||||
/* %hs2b() function */
|
||||
static Token *
|
||||
stdmac_hs2b(const SMacro *s, Token **params, int nparams)
|
||||
{
|
||||
int i;
|
||||
size_t len = 0;
|
||||
char *str, *q;
|
||||
Token *t;
|
||||
|
||||
(void)s;
|
||||
|
||||
for (i = 0; i < nparams; i++) {
|
||||
unquote_token(params[i]);
|
||||
len += (params[i]->len + 1) >> 1; /* Maximum possible */
|
||||
}
|
||||
|
||||
q = str = nasm_malloc(len+1);
|
||||
|
||||
for (i = 0; i < nparams; i++) {
|
||||
const char *p = tok_text(params[i]);
|
||||
unsigned int j;
|
||||
unsigned int len = params[i]->len;
|
||||
int v = -1;
|
||||
|
||||
for (j = 0; j < len; j++) {
|
||||
unsigned int hv = nasm_hexval(*p++);
|
||||
if (hv > 15) {
|
||||
/* Separator character or end of string */
|
||||
if (v >= 0)
|
||||
*q++ = v;
|
||||
v = -1;
|
||||
} else {
|
||||
if (v >= 0) {
|
||||
*q++ = (v << 4) + hv;
|
||||
v = -1;
|
||||
} else {
|
||||
v = hv;
|
||||
}
|
||||
}
|
||||
}
|
||||
/* Partial byte at the end? */
|
||||
if (v >= 0)
|
||||
*q++ = v;
|
||||
}
|
||||
*q = '\0';
|
||||
|
||||
t = make_tok_qstr_len(NULL, str, q - str);
|
||||
nasm_free(str);
|
||||
return t;
|
||||
}
|
||||
|
||||
/* %b2hs() function */
|
||||
static Token *
|
||||
stdmac_b2hs(const SMacro *s, Token **params, int nparams)
|
||||
{
|
||||
const char * const dchars = nasm_digit_chars(false);
|
||||
const char *p;
|
||||
const char *sep;
|
||||
uint8_t b;
|
||||
char *str, *q;
|
||||
size_t bytes, len, seplen;
|
||||
size_t i;
|
||||
Token *t;
|
||||
|
||||
(void)s;
|
||||
(void)nparams;
|
||||
|
||||
p = unquote_token(params[0]);
|
||||
|
||||
if (!params[0]->len)
|
||||
return make_tok_qstr_len(NULL, "", 0);
|
||||
|
||||
sep = unquote_token(params[1]);
|
||||
bytes = params[0]->len;
|
||||
seplen = params[1]->len;
|
||||
len = (bytes << 1) + (seplen * (bytes-1));
|
||||
|
||||
q = str = nasm_malloc(len+1);
|
||||
|
||||
b = *p++;
|
||||
*q++ = dchars[b >> 4];
|
||||
*q++ = dchars[b & 15];
|
||||
for (i = 1; i < bytes; i++) {
|
||||
if (seplen)
|
||||
q = mempcpy(q, sep, seplen);
|
||||
b = *p++;
|
||||
*q++ = dchars[b >> 4];
|
||||
*q++ = dchars[b & 15];
|
||||
}
|
||||
*q = '\0';
|
||||
|
||||
t = make_tok_qstr_len(NULL, str, q - str);
|
||||
nasm_free(str);
|
||||
return t;
|
||||
}
|
||||
|
||||
/* %substr() function */
|
||||
@ -8223,12 +8322,11 @@ static void pp_add_magic_simple(void)
|
||||
{ "__?PTR?__", true, 0, 0, stdmac_ptr },
|
||||
{ "__?DEFAULT?__", true, 0, 0, stdmac_default },
|
||||
{ "%abs", false, 1, SPARM_EVAL, stdmac_abs },
|
||||
// { "%b2hs", false, 1, SPARM_STR|SPARM_CONDQUOTE, stdmac_b2hs },
|
||||
{ "%chr", false, 1, SPARM_EVAL|SPARM_OPTIONAL|SPARM_VARADIC, stdmac_chr },
|
||||
{ "%count", false, 1, SPARM_VARADIC, stdmac_count },
|
||||
{ "%depend", false, 1, SPARM_PLAIN, stdmac_depend },
|
||||
{ "%eval", false, 1, SPARM_EVAL|SPARM_VARADIC, stdmac_join },
|
||||
// { "%hs2b", false, 1, SPARM_STR|SPARM_CONDQUOTE, stdmac_hs2b },
|
||||
{ "%hs2b", false, 1, SPARM_STR|SPARM_CONDQUOTE|SPARM_VARADIC, stdmac_hs2b },
|
||||
{ "%map", false, 1, SPARM_VARADIC, stdmac_map },
|
||||
{ "%null", false, 1, SPARM_GREEDY, stdmac_null },
|
||||
{ "%pathsearch", false, 1, SPARM_PLAIN, stdmac_pathsearch },
|
||||
@ -8381,6 +8479,17 @@ static void pp_add_magic_miscfunc(void)
|
||||
tmpl.params[2].def = make_tok_num(NULL, 1);
|
||||
define_magic("%ord", false, &tmpl);
|
||||
|
||||
/* %b2hs() function */
|
||||
nasm_zero(tmpl);
|
||||
tmpl.nparam = 2;
|
||||
tmpl.expand = stdmac_b2hs;
|
||||
tmpl.recursive = true;
|
||||
nasm_newn(tmpl.params, tmpl.nparam);
|
||||
tmpl.params[0].flags = SPARM_STR|SPARM_CONDQUOTE;
|
||||
tmpl.params[1].flags = SPARM_STR|SPARM_CONDQUOTE|SPARM_OPTIONAL;
|
||||
tmpl.params[1].def = make_tok_qstr_len(NULL, "", 0);
|
||||
define_magic("%b2hs", false, &tmpl);
|
||||
|
||||
/* %find[i]() functions */
|
||||
for (i = 0; i < 2; i++) {
|
||||
static const char * const names[] = { "%findi", "%find" };
|
||||
|
||||
@ -17,9 +17,9 @@ It is the production version of NASM since 2025.
|
||||
\b Add support for the APX and AVX10 instruction sets, and various
|
||||
miscellaneous new instructions.
|
||||
|
||||
\b Add new preprocessor functions: \c{%chr()}, \c{%depend()},
|
||||
\c{%find()}, \c{%findi()}, \c{%null()}, \c{%ord()},
|
||||
\c{%pathsearch()}, and \c{%realpath()}. See \k{ppfunc}.
|
||||
\b Add new preprocessor functions: \c{%b2hs()}, \c{%chr()},
|
||||
\c{%depend()}, \c{%find()}, \c{%findi()}, \c{%hs2b()}, \c{%null()},
|
||||
\c{%ord()}, \c{%pathsearch()}, and \c{%realpath()}. See \k{ppfunc}.
|
||||
|
||||
\b New preprocessor directive \c{%note} to insert a note in the list
|
||||
file, without issuing an external diagnosic. Unlike a comment, it
|
||||
|
||||
@ -714,6 +714,15 @@ single token containing a decimal number; no minus sign will be
|
||||
emitted even if the input value is the maximum negative number.
|
||||
|
||||
|
||||
\S{f_b2hs} \i\c{%b2hs()} Function
|
||||
|
||||
The \c{%b2hs()} functin takes a quoted string and an optional
|
||||
separator string, and expands to a quoted string containing a packed
|
||||
hexadecimal form of the bytes of the first string, separated by the
|
||||
separator string if applicable. This is the inverse of the \c{%hs2b()}
|
||||
function, see \k{f_hs2b}.
|
||||
|
||||
|
||||
\S{f_chr} \i\c{%chr()} Function
|
||||
|
||||
The \c{%chr()} function evaluates its arguments as integers, then
|
||||
@ -759,6 +768,7 @@ This is the function equivalent of the \c{%depend} directive, see
|
||||
|
||||
See also the \c{%pathsearch()} function (\k{f_pathsearch}).
|
||||
|
||||
|
||||
\S{f_eval} \i\c{%eval()} Function
|
||||
|
||||
The \c{%eval()} function evaluates its argument as a numeric
|
||||
@ -805,6 +815,23 @@ Equivalent to \i\c\{%eval()}, except that the results generated are
|
||||
given as unsigned hexadecimal, with a \c{0x} prefix.
|
||||
|
||||
|
||||
\S{f_hs2b} \i\c\{%hs2b()} Function
|
||||
|
||||
The \c{%hs2b()} function takes one or more quoted strings containing
|
||||
hexadecimal numbers and optional separators (any character that is not
|
||||
a valid hexadecimal digit is considered a separator) and expands to a
|
||||
quoted string containing the bytes encoded in the hexadecimal
|
||||
string. Every pair of hexadecimal digits encodes a byte, but a
|
||||
separator will always terminate the encoding of a byte. Thus, these
|
||||
two statements will produce the same output:
|
||||
|
||||
\c db 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09
|
||||
\c db %hs2b("00010203 4 0506 07 8","9")
|
||||
|
||||
This can be used to compactly encode long strings of binary data in
|
||||
source code.
|
||||
|
||||
|
||||
\S{f_is} \i\c{%is()}\I\c{%isn()} Family Functions
|
||||
|
||||
Each \c{%if} conditional assembly family directive (see \k{condasm})
|
||||
|
||||
@ -292,6 +292,12 @@ int64_t readstrnum(char *str, int length, bool *warn);
|
||||
int numstr(char *buf, size_t buflen, uint64_t n,
|
||||
int digits, unsigned int base, bool ucase);
|
||||
|
||||
extern const char * const nasmlib_digit_chars[2];
|
||||
static inline const char *nasm_digit_chars(bool ucase)
|
||||
{
|
||||
return nasmlib_digit_chars[ucase];
|
||||
}
|
||||
|
||||
/*
|
||||
* seg_alloc: allocate a hitherto unused segment number.
|
||||
*/
|
||||
|
||||
@ -124,4 +124,15 @@ static inline void nasm_ctype_tasm_mode(void)
|
||||
/* No differences at the present moment */
|
||||
}
|
||||
|
||||
/* Returns a value >= 16 if not a valid hex digit */
|
||||
static inline unsigned int nasm_hexval(char c)
|
||||
{
|
||||
unsigned int v = (unsigned char) c;
|
||||
|
||||
if (v >= '0' && v <= '9')
|
||||
return v - '0';
|
||||
else
|
||||
return (v|0x20) - 'a' + 10;
|
||||
}
|
||||
|
||||
#endif /* NASM_NCTYPE_H */
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
/* ----------------------------------------------------------------------- *
|
||||
*
|
||||
* Copyright 2023 The NASM Authors - All Rights Reserved
|
||||
* Copyright 2023-2025 The NASM Authors - All Rights Reserved
|
||||
* See the file AUTHORS included with the NASM distribution for
|
||||
* the specific copyright holders.
|
||||
*
|
||||
@ -33,6 +33,20 @@
|
||||
|
||||
#include "nasmlib.h"
|
||||
|
||||
const char * const nasmlib_digit_chars[2] = {
|
||||
/* Lower case version */
|
||||
"0123456789"
|
||||
"abcdefghijklmnopqrstuvwxyz"
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
"@_",
|
||||
|
||||
/* Upper case version */
|
||||
"0123456789"
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
"abcdefghijklmnopqrstuvwxyz"
|
||||
"@_"
|
||||
};
|
||||
|
||||
/*
|
||||
* Produce an unsigned integer string from a number with a specified
|
||||
* base, digits and signedness.
|
||||
@ -40,21 +54,7 @@
|
||||
int numstr(char *buf, size_t buflen, uint64_t n,
|
||||
int digits, unsigned int base, bool ucase)
|
||||
{
|
||||
static const char digit_chars[2][NUMSTR_MAXBASE+1] =
|
||||
{
|
||||
/* Lower case version */
|
||||
"0123456789"
|
||||
"abcdefghijklmnopqrstuvwxyz"
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
"@_",
|
||||
|
||||
/* Upper case version */
|
||||
"0123456789"
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
"abcdefghijklmnopqrstuvwxyz"
|
||||
"@_"
|
||||
};
|
||||
const char * const dchars = digit_chars[ucase];
|
||||
const char * const dchars = nasm_digit_chars(ucase);
|
||||
bool moredigits = digits <= 0;
|
||||
char *p;
|
||||
int len;
|
||||
|
||||
@ -2,3 +2,8 @@
|
||||
db %ord("Hello, World!")
|
||||
db %ord("Hello, World!",1,-1)
|
||||
db %chr()
|
||||
db %b2hs("Hello, World!")
|
||||
db %b2hs("Hello, World!",':')
|
||||
db %hs2b("303132 33 34 35 3 6 3 78 9", "abcd")
|
||||
db 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09
|
||||
db %hs2b("00010203 4 0506 07 8","9")
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user