preproc: implement %hs2b() and %b2hs() functions for compact binary data

Convenience preprocessor functions that allows for efficient packing
of binary data in source code.

Move some functions that has previously been local but are more
generally useful into more accessible places.

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
This commit is contained in:
H. Peter Anvin 2025-09-15 23:01:59 -07:00
parent 856ac7b7fb
commit f6166e571a
8 changed files with 183 additions and 36 deletions

View File

@ -495,17 +495,6 @@ static bool ieee_round(bool minus, fp_limb *mant, int bits)
return false;
}
/* Returns a value >= 16 if not a valid hex digit */
static unsigned int hexval(char c)
{
unsigned int v = (unsigned char) c;
if (v >= '0' && v <= '9')
return v - '0';
else
return (v|0x20) - 'a' + 10;
}
/* Handle floating-point numbers with radix 2^bits and binary exponent */
static bool ieee_flconvert_bin(const char *string, int bits,
fp_limb *mant, int32_t *exponent)
@ -535,7 +524,7 @@ static bool ieee_flconvert_bin(const char *string, int bits,
nasm_nonfatal("too many periods in floating-point constant");
return false;
}
} else if ((v = hexval(c)) < (unsigned int)radix) {
} else if ((v = nasm_hexval(c)) < (unsigned int)radix) {
if (!seendigit && v) {
int l = log2tbl[v];

View File

@ -7688,6 +7688,7 @@ stdmac_strcat(const SMacro *s, Token **params, int nparams)
int i;
size_t len = 0;
char *str, *p;
Token *t;
(void)s;
@ -7696,14 +7697,112 @@ stdmac_strcat(const SMacro *s, Token **params, int nparams)
len += params[i]->len;
}
nasm_newn(str, len+1);
p = str;
p = str = nasm_malloc(len+1);
for (i = 0; i < nparams; i++) {
p = mempcpy(p, tok_text(params[i]), params[i]->len);
}
*p = '\0';
return make_tok_qstr_len(NULL, str, len);
t = make_tok_qstr_len(NULL, str, p - str);
nasm_free(str);
return t;
}
/* %hs2b() function */
static Token *
stdmac_hs2b(const SMacro *s, Token **params, int nparams)
{
int i;
size_t len = 0;
char *str, *q;
Token *t;
(void)s;
for (i = 0; i < nparams; i++) {
unquote_token(params[i]);
len += (params[i]->len + 1) >> 1; /* Maximum possible */
}
q = str = nasm_malloc(len+1);
for (i = 0; i < nparams; i++) {
const char *p = tok_text(params[i]);
unsigned int j;
unsigned int len = params[i]->len;
int v = -1;
for (j = 0; j < len; j++) {
unsigned int hv = nasm_hexval(*p++);
if (hv > 15) {
/* Separator character or end of string */
if (v >= 0)
*q++ = v;
v = -1;
} else {
if (v >= 0) {
*q++ = (v << 4) + hv;
v = -1;
} else {
v = hv;
}
}
}
/* Partial byte at the end? */
if (v >= 0)
*q++ = v;
}
*q = '\0';
t = make_tok_qstr_len(NULL, str, q - str);
nasm_free(str);
return t;
}
/* %b2hs() function */
static Token *
stdmac_b2hs(const SMacro *s, Token **params, int nparams)
{
const char * const dchars = nasm_digit_chars(false);
const char *p;
const char *sep;
uint8_t b;
char *str, *q;
size_t bytes, len, seplen;
size_t i;
Token *t;
(void)s;
(void)nparams;
p = unquote_token(params[0]);
if (!params[0]->len)
return make_tok_qstr_len(NULL, "", 0);
sep = unquote_token(params[1]);
bytes = params[0]->len;
seplen = params[1]->len;
len = (bytes << 1) + (seplen * (bytes-1));
q = str = nasm_malloc(len+1);
b = *p++;
*q++ = dchars[b >> 4];
*q++ = dchars[b & 15];
for (i = 1; i < bytes; i++) {
if (seplen)
q = mempcpy(q, sep, seplen);
b = *p++;
*q++ = dchars[b >> 4];
*q++ = dchars[b & 15];
}
*q = '\0';
t = make_tok_qstr_len(NULL, str, q - str);
nasm_free(str);
return t;
}
/* %substr() function */
@ -8223,12 +8322,11 @@ static void pp_add_magic_simple(void)
{ "__?PTR?__", true, 0, 0, stdmac_ptr },
{ "__?DEFAULT?__", true, 0, 0, stdmac_default },
{ "%abs", false, 1, SPARM_EVAL, stdmac_abs },
// { "%b2hs", false, 1, SPARM_STR|SPARM_CONDQUOTE, stdmac_b2hs },
{ "%chr", false, 1, SPARM_EVAL|SPARM_OPTIONAL|SPARM_VARADIC, stdmac_chr },
{ "%count", false, 1, SPARM_VARADIC, stdmac_count },
{ "%depend", false, 1, SPARM_PLAIN, stdmac_depend },
{ "%eval", false, 1, SPARM_EVAL|SPARM_VARADIC, stdmac_join },
// { "%hs2b", false, 1, SPARM_STR|SPARM_CONDQUOTE, stdmac_hs2b },
{ "%hs2b", false, 1, SPARM_STR|SPARM_CONDQUOTE|SPARM_VARADIC, stdmac_hs2b },
{ "%map", false, 1, SPARM_VARADIC, stdmac_map },
{ "%null", false, 1, SPARM_GREEDY, stdmac_null },
{ "%pathsearch", false, 1, SPARM_PLAIN, stdmac_pathsearch },
@ -8381,6 +8479,17 @@ static void pp_add_magic_miscfunc(void)
tmpl.params[2].def = make_tok_num(NULL, 1);
define_magic("%ord", false, &tmpl);
/* %b2hs() function */
nasm_zero(tmpl);
tmpl.nparam = 2;
tmpl.expand = stdmac_b2hs;
tmpl.recursive = true;
nasm_newn(tmpl.params, tmpl.nparam);
tmpl.params[0].flags = SPARM_STR|SPARM_CONDQUOTE;
tmpl.params[1].flags = SPARM_STR|SPARM_CONDQUOTE|SPARM_OPTIONAL;
tmpl.params[1].def = make_tok_qstr_len(NULL, "", 0);
define_magic("%b2hs", false, &tmpl);
/* %find[i]() functions */
for (i = 0; i < 2; i++) {
static const char * const names[] = { "%findi", "%find" };

View File

@ -17,9 +17,9 @@ It is the production version of NASM since 2025.
\b Add support for the APX and AVX10 instruction sets, and various
miscellaneous new instructions.
\b Add new preprocessor functions: \c{%chr()}, \c{%depend()},
\c{%find()}, \c{%findi()}, \c{%null()}, \c{%ord()},
\c{%pathsearch()}, and \c{%realpath()}. See \k{ppfunc}.
\b Add new preprocessor functions: \c{%b2hs()}, \c{%chr()},
\c{%depend()}, \c{%find()}, \c{%findi()}, \c{%hs2b()}, \c{%null()},
\c{%ord()}, \c{%pathsearch()}, and \c{%realpath()}. See \k{ppfunc}.
\b New preprocessor directive \c{%note} to insert a note in the list
file, without issuing an external diagnosic. Unlike a comment, it

View File

@ -714,6 +714,15 @@ single token containing a decimal number; no minus sign will be
emitted even if the input value is the maximum negative number.
\S{f_b2hs} \i\c{%b2hs()} Function
The \c{%b2hs()} functin takes a quoted string and an optional
separator string, and expands to a quoted string containing a packed
hexadecimal form of the bytes of the first string, separated by the
separator string if applicable. This is the inverse of the \c{%hs2b()}
function, see \k{f_hs2b}.
\S{f_chr} \i\c{%chr()} Function
The \c{%chr()} function evaluates its arguments as integers, then
@ -759,6 +768,7 @@ This is the function equivalent of the \c{%depend} directive, see
See also the \c{%pathsearch()} function (\k{f_pathsearch}).
\S{f_eval} \i\c{%eval()} Function
The \c{%eval()} function evaluates its argument as a numeric
@ -805,6 +815,23 @@ Equivalent to \i\c\{%eval()}, except that the results generated are
given as unsigned hexadecimal, with a \c{0x} prefix.
\S{f_hs2b} \i\c\{%hs2b()} Function
The \c{%hs2b()} function takes one or more quoted strings containing
hexadecimal numbers and optional separators (any character that is not
a valid hexadecimal digit is considered a separator) and expands to a
quoted string containing the bytes encoded in the hexadecimal
string. Every pair of hexadecimal digits encodes a byte, but a
separator will always terminate the encoding of a byte. Thus, these
two statements will produce the same output:
\c db 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09
\c db %hs2b("00010203 4 0506 07 8","9")
This can be used to compactly encode long strings of binary data in
source code.
\S{f_is} \i\c{%is()}\I\c{%isn()} Family Functions
Each \c{%if} conditional assembly family directive (see \k{condasm})

View File

@ -292,6 +292,12 @@ int64_t readstrnum(char *str, int length, bool *warn);
int numstr(char *buf, size_t buflen, uint64_t n,
int digits, unsigned int base, bool ucase);
extern const char * const nasmlib_digit_chars[2];
static inline const char *nasm_digit_chars(bool ucase)
{
return nasmlib_digit_chars[ucase];
}
/*
* seg_alloc: allocate a hitherto unused segment number.
*/

View File

@ -124,4 +124,15 @@ static inline void nasm_ctype_tasm_mode(void)
/* No differences at the present moment */
}
/* Returns a value >= 16 if not a valid hex digit */
static inline unsigned int nasm_hexval(char c)
{
unsigned int v = (unsigned char) c;
if (v >= '0' && v <= '9')
return v - '0';
else
return (v|0x20) - 'a' + 10;
}
#endif /* NASM_NCTYPE_H */

View File

@ -1,6 +1,6 @@
/* ----------------------------------------------------------------------- *
*
* Copyright 2023 The NASM Authors - All Rights Reserved
* Copyright 2023-2025 The NASM Authors - All Rights Reserved
* See the file AUTHORS included with the NASM distribution for
* the specific copyright holders.
*
@ -33,6 +33,20 @@
#include "nasmlib.h"
const char * const nasmlib_digit_chars[2] = {
/* Lower case version */
"0123456789"
"abcdefghijklmnopqrstuvwxyz"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"@_",
/* Upper case version */
"0123456789"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz"
"@_"
};
/*
* Produce an unsigned integer string from a number with a specified
* base, digits and signedness.
@ -40,21 +54,7 @@
int numstr(char *buf, size_t buflen, uint64_t n,
int digits, unsigned int base, bool ucase)
{
static const char digit_chars[2][NUMSTR_MAXBASE+1] =
{
/* Lower case version */
"0123456789"
"abcdefghijklmnopqrstuvwxyz"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"@_",
/* Upper case version */
"0123456789"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz"
"@_"
};
const char * const dchars = digit_chars[ucase];
const char * const dchars = nasm_digit_chars(ucase);
bool moredigits = digits <= 0;
char *p;
int len;

View File

@ -2,3 +2,8 @@
db %ord("Hello, World!")
db %ord("Hello, World!",1,-1)
db %chr()
db %b2hs("Hello, World!")
db %b2hs("Hello, World!",':')
db %hs2b("303132 33 34 35 3 6 3 78 9", "abcd")
db 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09
db %hs2b("00010203 4 0506 07 8","9")