diff --git a/asm/floats.c b/asm/floats.c index 25541aea..662d49f8 100644 --- a/asm/floats.c +++ b/asm/floats.c @@ -495,17 +495,6 @@ static bool ieee_round(bool minus, fp_limb *mant, int bits) return false; } -/* Returns a value >= 16 if not a valid hex digit */ -static unsigned int hexval(char c) -{ - unsigned int v = (unsigned char) c; - - if (v >= '0' && v <= '9') - return v - '0'; - else - return (v|0x20) - 'a' + 10; -} - /* Handle floating-point numbers with radix 2^bits and binary exponent */ static bool ieee_flconvert_bin(const char *string, int bits, fp_limb *mant, int32_t *exponent) @@ -535,7 +524,7 @@ static bool ieee_flconvert_bin(const char *string, int bits, nasm_nonfatal("too many periods in floating-point constant"); return false; } - } else if ((v = hexval(c)) < (unsigned int)radix) { + } else if ((v = nasm_hexval(c)) < (unsigned int)radix) { if (!seendigit && v) { int l = log2tbl[v]; diff --git a/asm/preproc.c b/asm/preproc.c index 0d425a3e..63b30bc3 100644 --- a/asm/preproc.c +++ b/asm/preproc.c @@ -7688,6 +7688,7 @@ stdmac_strcat(const SMacro *s, Token **params, int nparams) int i; size_t len = 0; char *str, *p; + Token *t; (void)s; @@ -7696,14 +7697,112 @@ stdmac_strcat(const SMacro *s, Token **params, int nparams) len += params[i]->len; } - nasm_newn(str, len+1); - p = str; + p = str = nasm_malloc(len+1); for (i = 0; i < nparams; i++) { p = mempcpy(p, tok_text(params[i]), params[i]->len); } + *p = '\0'; - return make_tok_qstr_len(NULL, str, len); + t = make_tok_qstr_len(NULL, str, p - str); + nasm_free(str); + return t; +} + +/* %hs2b() function */ +static Token * +stdmac_hs2b(const SMacro *s, Token **params, int nparams) +{ + int i; + size_t len = 0; + char *str, *q; + Token *t; + + (void)s; + + for (i = 0; i < nparams; i++) { + unquote_token(params[i]); + len += (params[i]->len + 1) >> 1; /* Maximum possible */ + } + + q = str = nasm_malloc(len+1); + + for (i = 0; i < nparams; i++) { + const char *p = tok_text(params[i]); + unsigned int j; + unsigned int len = params[i]->len; + int v = -1; + + for (j = 0; j < len; j++) { + unsigned int hv = nasm_hexval(*p++); + if (hv > 15) { + /* Separator character or end of string */ + if (v >= 0) + *q++ = v; + v = -1; + } else { + if (v >= 0) { + *q++ = (v << 4) + hv; + v = -1; + } else { + v = hv; + } + } + } + /* Partial byte at the end? */ + if (v >= 0) + *q++ = v; + } + *q = '\0'; + + t = make_tok_qstr_len(NULL, str, q - str); + nasm_free(str); + return t; +} + +/* %b2hs() function */ +static Token * +stdmac_b2hs(const SMacro *s, Token **params, int nparams) +{ + const char * const dchars = nasm_digit_chars(false); + const char *p; + const char *sep; + uint8_t b; + char *str, *q; + size_t bytes, len, seplen; + size_t i; + Token *t; + + (void)s; + (void)nparams; + + p = unquote_token(params[0]); + + if (!params[0]->len) + return make_tok_qstr_len(NULL, "", 0); + + sep = unquote_token(params[1]); + bytes = params[0]->len; + seplen = params[1]->len; + len = (bytes << 1) + (seplen * (bytes-1)); + + q = str = nasm_malloc(len+1); + + b = *p++; + *q++ = dchars[b >> 4]; + *q++ = dchars[b & 15]; + for (i = 1; i < bytes; i++) { + if (seplen) + q = mempcpy(q, sep, seplen); + b = *p++; + *q++ = dchars[b >> 4]; + *q++ = dchars[b & 15]; + } + *q = '\0'; + + t = make_tok_qstr_len(NULL, str, q - str); + nasm_free(str); + return t; } /* %substr() function */ @@ -8223,12 +8322,11 @@ static void pp_add_magic_simple(void) { "__?PTR?__", true, 0, 0, stdmac_ptr }, { "__?DEFAULT?__", true, 0, 0, stdmac_default }, { "%abs", false, 1, SPARM_EVAL, stdmac_abs }, -// { "%b2hs", false, 1, SPARM_STR|SPARM_CONDQUOTE, stdmac_b2hs }, { "%chr", false, 1, SPARM_EVAL|SPARM_OPTIONAL|SPARM_VARADIC, stdmac_chr }, { "%count", false, 1, SPARM_VARADIC, stdmac_count }, { "%depend", false, 1, SPARM_PLAIN, stdmac_depend }, { "%eval", false, 1, SPARM_EVAL|SPARM_VARADIC, stdmac_join }, -// { "%hs2b", false, 1, SPARM_STR|SPARM_CONDQUOTE, stdmac_hs2b }, + { "%hs2b", false, 1, SPARM_STR|SPARM_CONDQUOTE|SPARM_VARADIC, stdmac_hs2b }, { "%map", false, 1, SPARM_VARADIC, stdmac_map }, { "%null", false, 1, SPARM_GREEDY, stdmac_null }, { "%pathsearch", false, 1, SPARM_PLAIN, stdmac_pathsearch }, @@ -8381,6 +8479,17 @@ static void pp_add_magic_miscfunc(void) tmpl.params[2].def = make_tok_num(NULL, 1); define_magic("%ord", false, &tmpl); + /* %b2hs() function */ + nasm_zero(tmpl); + tmpl.nparam = 2; + tmpl.expand = stdmac_b2hs; + tmpl.recursive = true; + nasm_newn(tmpl.params, tmpl.nparam); + tmpl.params[0].flags = SPARM_STR|SPARM_CONDQUOTE; + tmpl.params[1].flags = SPARM_STR|SPARM_CONDQUOTE|SPARM_OPTIONAL; + tmpl.params[1].def = make_tok_qstr_len(NULL, "", 0); + define_magic("%b2hs", false, &tmpl); + /* %find[i]() functions */ for (i = 0; i < 2; i++) { static const char * const names[] = { "%findi", "%find" }; diff --git a/doc/changes.src b/doc/changes.src index fe06f171..fd5bff90 100644 --- a/doc/changes.src +++ b/doc/changes.src @@ -17,9 +17,9 @@ It is the production version of NASM since 2025. \b Add support for the APX and AVX10 instruction sets, and various miscellaneous new instructions. -\b Add new preprocessor functions: \c{%chr()}, \c{%depend()}, - \c{%find()}, \c{%findi()}, \c{%null()}, \c{%ord()}, - \c{%pathsearch()}, and \c{%realpath()}. See \k{ppfunc}. +\b Add new preprocessor functions: \c{%b2hs()}, \c{%chr()}, + \c{%depend()}, \c{%find()}, \c{%findi()}, \c{%hs2b()}, \c{%null()}, + \c{%ord()}, \c{%pathsearch()}, and \c{%realpath()}. See \k{ppfunc}. \b New preprocessor directive \c{%note} to insert a note in the list file, without issuing an external diagnosic. Unlike a comment, it diff --git a/doc/preproc.src b/doc/preproc.src index 5caa6e79..c88fb157 100644 --- a/doc/preproc.src +++ b/doc/preproc.src @@ -714,6 +714,15 @@ single token containing a decimal number; no minus sign will be emitted even if the input value is the maximum negative number. +\S{f_b2hs} \i\c{%b2hs()} Function + +The \c{%b2hs()} functin takes a quoted string and an optional +separator string, and expands to a quoted string containing a packed +hexadecimal form of the bytes of the first string, separated by the +separator string if applicable. This is the inverse of the \c{%hs2b()} +function, see \k{f_hs2b}. + + \S{f_chr} \i\c{%chr()} Function The \c{%chr()} function evaluates its arguments as integers, then @@ -759,6 +768,7 @@ This is the function equivalent of the \c{%depend} directive, see See also the \c{%pathsearch()} function (\k{f_pathsearch}). + \S{f_eval} \i\c{%eval()} Function The \c{%eval()} function evaluates its argument as a numeric @@ -805,6 +815,23 @@ Equivalent to \i\c\{%eval()}, except that the results generated are given as unsigned hexadecimal, with a \c{0x} prefix. +\S{f_hs2b} \i\c\{%hs2b()} Function + +The \c{%hs2b()} function takes one or more quoted strings containing +hexadecimal numbers and optional separators (any character that is not +a valid hexadecimal digit is considered a separator) and expands to a +quoted string containing the bytes encoded in the hexadecimal +string. Every pair of hexadecimal digits encodes a byte, but a +separator will always terminate the encoding of a byte. Thus, these +two statements will produce the same output: + +\c db 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09 +\c db %hs2b("00010203 4 0506 07 8","9") + +This can be used to compactly encode long strings of binary data in +source code. + + \S{f_is} \i\c{%is()}\I\c{%isn()} Family Functions Each \c{%if} conditional assembly family directive (see \k{condasm}) diff --git a/include/nasmlib.h b/include/nasmlib.h index 9602c92b..e8777094 100644 --- a/include/nasmlib.h +++ b/include/nasmlib.h @@ -292,6 +292,12 @@ int64_t readstrnum(char *str, int length, bool *warn); int numstr(char *buf, size_t buflen, uint64_t n, int digits, unsigned int base, bool ucase); +extern const char * const nasmlib_digit_chars[2]; +static inline const char *nasm_digit_chars(bool ucase) +{ + return nasmlib_digit_chars[ucase]; +} + /* * seg_alloc: allocate a hitherto unused segment number. */ diff --git a/include/nctype.h b/include/nctype.h index 97852faa..b0a3e85e 100644 --- a/include/nctype.h +++ b/include/nctype.h @@ -124,4 +124,15 @@ static inline void nasm_ctype_tasm_mode(void) /* No differences at the present moment */ } +/* Returns a value >= 16 if not a valid hex digit */ +static inline unsigned int nasm_hexval(char c) +{ + unsigned int v = (unsigned char) c; + + if (v >= '0' && v <= '9') + return v - '0'; + else + return (v|0x20) - 'a' + 10; +} + #endif /* NASM_NCTYPE_H */ diff --git a/nasmlib/numstr.c b/nasmlib/numstr.c index f47d0cb6..40d4e574 100644 --- a/nasmlib/numstr.c +++ b/nasmlib/numstr.c @@ -1,6 +1,6 @@ /* ----------------------------------------------------------------------- * * - * Copyright 2023 The NASM Authors - All Rights Reserved + * Copyright 2023-2025 The NASM Authors - All Rights Reserved * See the file AUTHORS included with the NASM distribution for * the specific copyright holders. * @@ -33,6 +33,20 @@ #include "nasmlib.h" +const char * const nasmlib_digit_chars[2] = { + /* Lower case version */ + "0123456789" + "abcdefghijklmnopqrstuvwxyz" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "@_", + + /* Upper case version */ + "0123456789" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "@_" +}; + /* * Produce an unsigned integer string from a number with a specified * base, digits and signedness. @@ -40,21 +54,7 @@ int numstr(char *buf, size_t buflen, uint64_t n, int digits, unsigned int base, bool ucase) { - static const char digit_chars[2][NUMSTR_MAXBASE+1] = - { - /* Lower case version */ - "0123456789" - "abcdefghijklmnopqrstuvwxyz" - "ABCDEFGHIJKLMNOPQRSTUVWXYZ" - "@_", - - /* Upper case version */ - "0123456789" - "ABCDEFGHIJKLMNOPQRSTUVWXYZ" - "abcdefghijklmnopqrstuvwxyz" - "@_" - }; - const char * const dchars = digit_chars[ucase]; + const char * const dchars = nasm_digit_chars(ucase); bool moredigits = digits <= 0; char *p; int len; diff --git a/test/chrord.asm b/test/chrord.asm index 140a4b05..87cc1f3d 100644 --- a/test/chrord.asm +++ b/test/chrord.asm @@ -2,3 +2,8 @@ db %ord("Hello, World!") db %ord("Hello, World!",1,-1) db %chr() + db %b2hs("Hello, World!") + db %b2hs("Hello, World!",':') + db %hs2b("303132 33 34 35 3 6 3 78 9", "abcd") + db 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09 + db %hs2b("00010203 4 0506 07 8","9")