perl/regcomp_debug.c

#ifdef PERL_EXT_RE_BUILD
#include "re_top.h"
#endif

#include "EXTERN.h"
#define PERL_IN_REGEX_ENGINE
#define PERL_IN_REGCOMP_ANY
#define PERL_IN_REGCOMP_DEBUG_C
#include "perl.h"

#ifdef PERL_IN_XSUB_RE
#  include "re_comp.h"
#else
#  include "regcomp.h"
#endif

#include "invlist_inline.h"
#include "unicode_constants.h"
#include "regcomp_internal.h"

#ifdef PERL_RE_BUILD_DEBUG
int
Perl_re_printf(pTHX_ const char *fmt, ...)
{
    va_list ap;
    int result;
    PerlIO *f= Perl_debug_log;
    PERL_ARGS_ASSERT_RE_PRINTF;
    va_start(ap, fmt);
    result = PerlIO_vprintf(f, fmt, ap);
    va_end(ap);
    return result;
}

int
Perl_re_indentf(pTHX_ const char *fmt, U32 depth, ...)
{
    va_list ap;
    int result;
    PerlIO *f= Perl_debug_log;
    PERL_ARGS_ASSERT_RE_INDENTF;
    va_start(ap, depth);
    PerlIO_printf(f, "%*s", ( (int)depth % 20 ) * 2, "");
    result = PerlIO_vprintf(f, fmt, ap);
    va_end(ap);
    return result;
}

void
Perl_debug_show_study_flags(pTHX_ U32 flags, const char *open_str,
                                    const char *close_str)
{
    PERL_ARGS_ASSERT_DEBUG_SHOW_STUDY_FLAGS;
    if (!flags)
        return;

    Perl_re_printf( aTHX_  "%s", open_str);
    DEBUG_SHOW_STUDY_FLAG(flags, SF_BEFORE_SEOL);
    DEBUG_SHOW_STUDY_FLAG(flags, SF_BEFORE_MEOL);
    DEBUG_SHOW_STUDY_FLAG(flags, SF_IS_INF);
    DEBUG_SHOW_STUDY_FLAG(flags, SF_HAS_PAR);
    DEBUG_SHOW_STUDY_FLAG(flags, SF_IN_PAR);
    DEBUG_SHOW_STUDY_FLAG(flags, SF_HAS_EVAL);
    DEBUG_SHOW_STUDY_FLAG(flags, SCF_DO_SUBSTR);
    DEBUG_SHOW_STUDY_FLAG(flags, SCF_DO_STCLASS_AND);
    DEBUG_SHOW_STUDY_FLAG(flags, SCF_DO_STCLASS_OR);
    DEBUG_SHOW_STUDY_FLAG(flags, SCF_DO_STCLASS);
    DEBUG_SHOW_STUDY_FLAG(flags, SCF_WHILEM_VISITED_POS);
    DEBUG_SHOW_STUDY_FLAG(flags, SCF_TRIE_RESTUDY);
    DEBUG_SHOW_STUDY_FLAG(flags, SCF_SEEN_ACCEPT);
    DEBUG_SHOW_STUDY_FLAG(flags, SCF_TRIE_DOING_RESTUDY);
    DEBUG_SHOW_STUDY_FLAG(flags, SCF_IN_DEFINE);
    Perl_re_printf( aTHX_  "%s", close_str);
}

void
Perl_debug_studydata(pTHX_ const char *where, scan_data_t *data,
                    U32 depth, int is_inf,
                    SSize_t min, SSize_t stopmin, SSize_t delta)
{
    PERL_ARGS_ASSERT_DEBUG_STUDYDATA;
    DECLARE_AND_GET_RE_DEBUG_FLAGS;

    DEBUG_OPTIMISE_MORE_r({
        if (!data) {
            Perl_re_indentf(aTHX_  "%s: NO DATA",
                depth,
                where);
            return;
        }
        Perl_re_indentf(aTHX_  "%s: M/S/D: %" IVdf "/%" IVdf "/%" IVdf " Pos:%" IVdf "/%" IVdf " Flags: 0x%" UVXf,
            depth,
            where,
            min, stopmin, delta,
            (IV)data->pos_min,
            (IV)data->pos_delta,
            (UV)data->flags
        );

        Perl_debug_show_study_flags(aTHX_ data->flags," [","]");

        Perl_re_printf( aTHX_
            " Whilem_c: %" IVdf " Lcp: %" IVdf " %s",
            (IV)data->whilem_c,
            (IV)(data->last_closep ? *((data)->last_closep) : -1),
            is_inf ? "INF " : ""
        );

        if (data->last_found) {
            int i;
            Perl_re_printf(aTHX_
                "Last:'%s' %" IVdf ":%" IVdf "/%" IVdf,
                    SvPVX_const(data->last_found),
                    (IV)data->last_end,
                    (IV)data->last_start_min,
                    (IV)data->last_start_max
            );

            for (i = 0; i < 2; i++) {
                Perl_re_printf(aTHX_
                    " %s%s: '%s' @ %" IVdf "/%" IVdf,
                    data->cur_is_floating == i ? "*" : "",
                    i ? "Float" : "Fixed",
                    SvPVX_const(data->substrs[i].str),
                    (IV)data->substrs[i].min_offset,
                    (IV)data->substrs[i].max_offset
                );
                Perl_debug_show_study_flags(aTHX_ data->substrs[i].flags," [","]");
            }
        }

        Perl_re_printf( aTHX_ "\n");
    });
}


void
Perl_debug_peep(pTHX_ const char *str, const RExC_state_t *pRExC_state,
                regnode *scan, U32 depth, U32 flags)
{
    PERL_ARGS_ASSERT_DEBUG_PEEP;
    DECLARE_AND_GET_RE_DEBUG_FLAGS;

    DEBUG_OPTIMISE_r({
        regnode *Next;

        if (!scan)
            return;
        Next = regnext(scan);
        regprop(RExC_rx, RExC_mysv, scan, NULL, pRExC_state);
        Perl_re_indentf( aTHX_   "%s>%3d: %s (%d)",
            depth,
            str,
            REG_NODE_NUM(scan), SvPV_nolen_const(RExC_mysv),
            Next ? (REG_NODE_NUM(Next)) : 0 );
        Perl_debug_show_study_flags(aTHX_ flags," [ ","]");
        Perl_re_printf( aTHX_  "\n");
   });
}

const regnode *
Perl_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
            const regnode *last, const regnode *plast,
            SV* sv, I32 indent, U32 depth)
{
    const regnode *next;
    const regnode *optstart= NULL;

    RXi_GET_DECL(r, ri);
    DECLARE_AND_GET_RE_DEBUG_FLAGS;

    PERL_ARGS_ASSERT_DUMPUNTIL;

#ifdef DEBUG_DUMPUNTIL
    Perl_re_printf( aTHX_  "--- %d : %d - %d - %d\n", indent, node-start,
        last ? last-start : 0, plast ? plast-start : 0);
#endif

    if (plast && plast < last)
        last = plast;

    while (node && (!last || node < last)) {
        const U8 op = OP(node);

        if (op == CLOSE || op == SRCLOSE || op == WHILEM)
            indent--;
        next = regnext((regnode *)node);
        const regnode *after = regnode_after((regnode *)node,0);

        /* Where, what. */
        if (op == OPTIMIZED) {
            if (!optstart && RE_DEBUG_FLAG(RE_DEBUG_COMPILE_OPTIMISE))
                optstart = node;
            else
                goto after_print;
        } else
            CLEAR_OPTSTART;

        regprop(r, sv, node, NULL, NULL);
        Perl_re_printf( aTHX_  "%4" IVdf ":%*s%s", (IV)(node - start),
                      (int)(2*indent + 1), "", SvPVX_const(sv));

        if (op != OPTIMIZED) {
            if (next == NULL)           /* Next ptr. */
                Perl_re_printf( aTHX_  " (0)");
            else if (REGNODE_TYPE(op) == BRANCH
                     && REGNODE_TYPE(OP(next)) != BRANCH )
                Perl_re_printf( aTHX_  " (FAIL)");
            else
                Perl_re_printf( aTHX_  " (%" IVdf ")", (IV)(next - start));
            Perl_re_printf( aTHX_ "\n");
        }

      after_print:
        if (REGNODE_TYPE(op) == BRANCHJ) {
            assert(next);
            const regnode *nnode = (OP(next) == LONGJMP
                                   ? regnext((regnode *)next)
                                   : next);
            if (last && nnode > last)
                nnode = last;
            DUMPUNTIL(after, nnode);
        }
        else if (REGNODE_TYPE(op) == BRANCH) {
            assert(next);
            DUMPUNTIL(after, next);
        }
        else if ( REGNODE_TYPE(op)  == TRIE ) {
            const regnode *this_trie = node;
            const U32 n = ARG1u(node);
            const reg_ac_data * const ac = op >= AHOCORASICK ?
               (reg_ac_data *)ri->data->data[n] :
               NULL;
            const reg_trie_data * const trie =
                (reg_trie_data*)ri->data->data[op < AHOCORASICK ? n : ac->trie];
#ifdef DEBUGGING
            AV *const trie_words
                           = MUTABLE_AV(ri->data->data[n + TRIE_WORDS_OFFSET]);
#endif
            const regnode *nextbranch= NULL;
            I32 word_idx;
            SvPVCLEAR(sv);
            for (word_idx= 0; word_idx < (I32)trie->wordcount; word_idx++) {
                SV ** const elem_ptr = av_fetch_simple(trie_words, word_idx, 0);

                Perl_re_indentf( aTHX_  "%s ",
                    indent+3,
                    elem_ptr
                    ? pv_pretty(sv, SvPV_nolen_const(*elem_ptr),
                                SvCUR(*elem_ptr), PL_dump_re_max_len,
                                PL_colors[0], PL_colors[1],
                                (SvUTF8(*elem_ptr)
                                 ? PERL_PV_ESCAPE_UNI
                                 : 0)
                                | PERL_PV_PRETTY_ELLIPSES
                                | PERL_PV_PRETTY_LTGT
                            )
                    : "???"
                );
                if (trie->jump) {
                    U16 dist = trie->jump[word_idx+1];
                    Perl_re_printf( aTHX_  "(%" UVuf ")\n",
                               (UV)((dist ? this_trie + dist : next) - start));
                    if (dist) {
                        if (!nextbranch)
                            nextbranch = this_trie + trie->jump[0];
                        DUMPUNTIL(this_trie + dist, nextbranch);
                    }
                    if (nextbranch && REGNODE_TYPE(OP(nextbranch))==BRANCH)
                        nextbranch = regnext((regnode *)nextbranch);
                } else {
                    Perl_re_printf( aTHX_  "\n");
                }
            }
            if (last && next > last)
                node = last;
            else
                node = next;
        }
        else if ( op == CURLY ) {   /* "next" might be very big: optimizer */
            DUMPUNTIL(after, after + 1); /* +1 is NOT a REGNODE_AFTER */
        }
        else if (REGNODE_TYPE(op) == CURLY && op != CURLYX) {
            assert(next);
            DUMPUNTIL(after, next);
        }
        else if ( op == PLUS || op == STAR) {
            DUMPUNTIL(after, after + 1); /* +1 NOT a REGNODE_AFTER */
        }
        else if (REGNODE_TYPE(op) == EXACT || op == ANYOFHs) {
            /* Literal string, where present. */
            node = (const regnode *)REGNODE_AFTER_varies(node);
        }
        else {
            node = REGNODE_AFTER_opcode(node,op);
        }
        if (op == CURLYX || op == OPEN || op == SROPEN)
            indent++;
        if (REGNODE_TYPE(op) == END)
            break;
    }
    CLEAR_OPTSTART;
#ifdef DEBUG_DUMPUNTIL
    Perl_re_printf( aTHX_  "--- %d\n", (int)indent);
#endif
    return node;
}

#endif  /* PERL_RE_BUILD_DEBUG */

/*
 - regdump - dump a regexp onto Perl_debug_log in vaguely comprehensible form
 */
#ifdef DEBUGGING
static void
S_regdump_intflags(pTHX_ const char *lead, const U32 flags)
{
    int bit;
    int set = 0;

    STATIC_ASSERT_STMT(REG_INTFLAGS_NAME_SIZE <= sizeof(flags) * CHARBITS);

    for (bit = 0; bit < REG_INTFLAGS_NAME_SIZE; bit++) {
        if (flags & (1 << bit)) {
            if (!set++ && lead)
                Perl_re_printf( aTHX_  "%s", lead);
            Perl_re_printf( aTHX_  "%s ", PL_reg_intflags_name[bit]);
        }
    }
    if (lead)  {
        if (set)
            Perl_re_printf( aTHX_  "\n");
        else
            Perl_re_printf( aTHX_  "%s[none-set]\n", lead);
    }
}

static void
S_regdump_extflags(pTHX_ const char *lead, const U32 flags)
{
    int bit;
    int set = 0;
    regex_charset cs;

    STATIC_ASSERT_STMT(REG_EXTFLAGS_NAME_SIZE <= sizeof(flags) * CHARBITS);

    for (bit = 0; bit < REG_EXTFLAGS_NAME_SIZE; bit++) {
        if (flags & (1U << bit)) {
            if ((1U << bit) & RXf_PMf_CHARSET) {  /* Output separately, below */
                continue;
            }
            if (!set++ && lead)
                Perl_re_printf( aTHX_  "%s", lead);
            Perl_re_printf( aTHX_  "%s ", PL_reg_extflags_name[bit]);
        }
    }
    if ((cs = get_regex_charset(flags)) != REGEX_DEPENDS_CHARSET) {
            if (!set++ && lead) {
                Perl_re_printf( aTHX_  "%s", lead);
            }
            switch (cs) {
                case REGEX_UNICODE_CHARSET:
                    Perl_re_printf( aTHX_  "UNICODE");
                    break;
                case REGEX_LOCALE_CHARSET:
                    Perl_re_printf( aTHX_  "LOCALE");
                    break;
                case REGEX_ASCII_RESTRICTED_CHARSET:
                    Perl_re_printf( aTHX_  "ASCII-RESTRICTED");
                    break;
                case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
                    Perl_re_printf( aTHX_  "ASCII-MORE_RESTRICTED");
                    break;
                default:
                    Perl_re_printf( aTHX_  "UNKNOWN CHARACTER SET");
                    break;
            }
    }
    if (lead)  {
        if (set)
            Perl_re_printf( aTHX_  "\n");
        else
            Perl_re_printf( aTHX_  "%s[none-set]\n", lead);
    }
}
#endif

void
Perl_regdump(pTHX_ const regexp *r)
{
#ifdef DEBUGGING
    int i;
    SV * const sv = sv_newmortal();
    SV *dsv= sv_newmortal();
    RXi_GET_DECL(r, ri);
    DECLARE_AND_GET_RE_DEBUG_FLAGS;

    PERL_ARGS_ASSERT_REGDUMP;

    (void)dumpuntil(r, ri->program, ri->program + 1, NULL, NULL, sv, 0, 0);

    /* Header fields of interest. */
    for (i = 0; i < 2; i++) {
        if (r->substrs->data[i].substr) {
            RE_PV_QUOTED_DECL(s, 0, dsv,
                            SvPVX_const(r->substrs->data[i].substr),
                            RE_SV_DUMPLEN(r->substrs->data[i].substr),
                            PL_dump_re_max_len);
            Perl_re_printf( aTHX_
                          "%s %s%s at %" IVdf "..%" UVuf " ",
                          i ? "floating" : "anchored",
                          s,
                          RE_SV_TAIL(r->substrs->data[i].substr),
                          (IV)r->substrs->data[i].min_offset,
                          (UV)r->substrs->data[i].max_offset);
        }
        else if (r->substrs->data[i].utf8_substr) {
            RE_PV_QUOTED_DECL(s, 1, dsv,
                            SvPVX_const(r->substrs->data[i].utf8_substr),
                            RE_SV_DUMPLEN(r->substrs->data[i].utf8_substr),
                            30);
            Perl_re_printf( aTHX_
                          "%s utf8 %s%s at %" IVdf "..%" UVuf " ",
                          i ? "floating" : "anchored",
                          s,
                          RE_SV_TAIL(r->substrs->data[i].utf8_substr),
                          (IV)r->substrs->data[i].min_offset,
                          (UV)r->substrs->data[i].max_offset);
        }
    }

    if (r->check_substr || r->check_utf8)
        Perl_re_printf( aTHX_
                      (const char *)
                      (   r->check_substr == r->substrs->data[1].substr
                       && r->check_utf8   == r->substrs->data[1].utf8_substr
                       ? "(checking floating" : "(checking anchored"));
    if (r->intflags & PREGf_NOSCAN)
        Perl_re_printf( aTHX_  " noscan");
    if (r->extflags & RXf_CHECK_ALL)
        Perl_re_printf( aTHX_  " isall");
    if (r->check_substr || r->check_utf8)
        Perl_re_printf( aTHX_  ") ");

    if (ri->regstclass) {
        regprop(r, sv, ri->regstclass, NULL, NULL);
        Perl_re_printf( aTHX_  "stclass %s ", SvPVX_const(sv));
    }
    if (r->intflags & PREGf_ANCH) {
        Perl_re_printf( aTHX_  "anchored");
        if (r->intflags & PREGf_ANCH_MBOL)
            Perl_re_printf( aTHX_  "(MBOL)");
        if (r->intflags & PREGf_ANCH_SBOL)
            Perl_re_printf( aTHX_  "(SBOL)");
        if (r->intflags & PREGf_ANCH_GPOS)
            Perl_re_printf( aTHX_  "(GPOS)");
        Perl_re_printf( aTHX_ " ");
    }
    if (r->intflags & PREGf_GPOS_SEEN)
        Perl_re_printf( aTHX_  "GPOS:%" UVuf " ", (UV)r->gofs);
    if (r->intflags & PREGf_SKIP)
        Perl_re_printf( aTHX_  "plus ");
    if (r->intflags & PREGf_IMPLICIT)
        Perl_re_printf( aTHX_  "implicit ");
    Perl_re_printf( aTHX_  "minlen %" IVdf " ", (IV)r->minlen);
    if (r->extflags & RXf_EVAL_SEEN)
        Perl_re_printf( aTHX_  "with eval ");
    Perl_re_printf( aTHX_  "\n");
    DEBUG_FLAGS_r({
        regdump_extflags("r->extflags: ", r->extflags);
        regdump_intflags("r->intflags: ", r->intflags);
    });
#else
    PERL_ARGS_ASSERT_REGDUMP;
    PERL_UNUSED_CONTEXT;
    PERL_UNUSED_ARG(r);
#endif  /* DEBUGGING */
}

/* Should be synchronized with ANYOF_ #defines in regcomp.h */
#ifdef DEBUGGING

#  if   CC_WORDCHAR_ != 0 || CC_DIGIT_ != 1        || CC_ALPHA_ != 2    \
     || CC_LOWER_ != 3    || CC_UPPER_ != 4        || CC_PUNCT_ != 5    \
     || CC_PRINT_ != 6    || CC_ALPHANUMERIC_ != 7 || CC_GRAPH_ != 8    \
     || CC_CASED_ != 9    || CC_SPACE_ != 10       || CC_BLANK_ != 11   \
     || CC_XDIGIT_ != 12  || CC_CNTRL_ != 13       || CC_ASCII_ != 14   \
     || CC_VERTSPACE_ != 15
#   error Need to adjust order of anyofs[]
#  endif
static const char * const anyofs[] = {
    "\\w",
    "\\W",
    "\\d",
    "\\D",
    "[:alpha:]",
    "[:^alpha:]",
    "[:lower:]",
    "[:^lower:]",
    "[:upper:]",
    "[:^upper:]",
    "[:punct:]",
    "[:^punct:]",
    "[:print:]",
    "[:^print:]",
    "[:alnum:]",
    "[:^alnum:]",
    "[:graph:]",
    "[:^graph:]",
    "[:cased:]",
    "[:^cased:]",
    "\\s",
    "\\S",
    "[:blank:]",
    "[:^blank:]",
    "[:xdigit:]",
    "[:^xdigit:]",
    "[:cntrl:]",
    "[:^cntrl:]",
    "[:ascii:]",
    "[:^ascii:]",
    "\\v",
    "\\V"
};
#endif

/*
- regprop - printable representation of opcode, with run time support
*/

void
Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_info *reginfo, const RExC_state_t *pRExC_state)
{
#ifdef DEBUGGING
    U8 k;
    const U8 op = OP(o);
    RXi_GET_DECL(prog, progi);
    DECLARE_AND_GET_RE_DEBUG_FLAGS;

    PERL_ARGS_ASSERT_REGPROP;

    SvPVCLEAR(sv);

    if (op > REGNODE_MAX) {          /* regnode.type is unsigned */
        if (pRExC_state) {  /* This gives more info, if we have it */
            FAIL3("panic: corrupted regexp opcode %d > %d",
                  (int)op, (int)REGNODE_MAX);
        }
        else {
            croak("panic: corrupted regexp opcode %d > %d",
                             (int)op, (int)REGNODE_MAX);
        }
    }
    sv_catpv(sv, REGNODE_NAME(op)); /* Take off const! */

    k = REGNODE_TYPE(op);
    if (op == BRANCH) {
        sv_catpvf(sv, " (buf:%" IVdf "/%" IVdf ")", (IV)ARG1a(o),(IV)ARG1b(o));
    }
    else if (op == BRANCHJ) {
        sv_catpvf(sv, " (buf:%" IVdf "/%" IVdf ")", (IV)ARG2a(o),(IV)ARG2b(o));
    }
    else if (k == EXACT) {
        sv_catpvs(sv, " ");
        /* Using is_utf8_string() (via PERL_PV_UNI_DETECT)
         * is a crude hack but it may be the best for now since
         * we have no flag "this EXACTish node was UTF-8"
         * --jhi */
        pv_pretty(sv, STRING(o), STR_LEN(o), PL_dump_re_max_len,
                  PL_colors[0], PL_colors[1],
                  PERL_PV_ESCAPE_UNI_DETECT |
                  PERL_PV_ESCAPE_NONASCII   |
                  PERL_PV_PRETTY_ELLIPSES   |
                  PERL_PV_PRETTY_LTGT       |
                  PERL_PV_PRETTY_NOCLEAR
                  );
    } else if (k == TRIE) {
        /* print the details of the trie in dumpuntil instead, as
         * progi->data isn't available here */
        const U32 n = ARG1u(o);
        const reg_ac_data * const ac = IS_TRIE_AC(op) ?
               (reg_ac_data *)progi->data->data[n] :
               NULL;
        const reg_trie_data * const trie
            = (reg_trie_data*)progi->data->data[!IS_TRIE_AC(op) ? n : ac->trie];

        sv_catpvf(sv, "-%s", REGNODE_NAME(FLAGS(o)));
        DEBUG_TRIE_COMPILE_r({
          if (trie->jump)
            sv_catpvs(sv, "(JUMP)");
          sv_catpvf(sv,
            "<S:%" UVuf "/%" IVdf " W:%" UVuf " L:%" UVuf "/%" UVuf " C:%" UVuf "/%" UVuf ">",
            (UV)trie->startstate,
            (IV)trie->statecount-1, /* -1 because of the unused 0 element */
            (UV)trie->wordcount,
            (UV)trie->minlen,
            (UV)trie->maxlen,
            (UV)TRIE_CHARCOUNT(trie),
            (UV)trie->uniquecharcount
          );
        });
        if ( IS_ANYOF_TRIE(op) || trie->bitmap ) {
            sv_catpvs(sv, "[");
            (void) put_charclass_bitmap_innards(sv,
                                                ((IS_ANYOF_TRIE(op))
                                                 ? ANYOF_BITMAP(o)
                                                 : TRIE_BITMAP(trie)),
                                                NULL,
                                                NULL,
                                                NULL,
                                                0,
                                                false
                                               );
            sv_catpvs(sv, "]");
        }
        if (trie->before_paren || trie->after_paren)
            sv_catpvf(sv, " (buf:%" IVdf "/%" IVdf ")",
                    (IV)trie->before_paren,(IV)trie->after_paren);
    } else if (k == CURLY) {
        U32 lo = ARG1i(o), hi = ARG2i(o);
        if (ARG3u(o)) /* check both ARG3a and ARG3b at the same time */
            sv_catpvf(sv, "<%d:%d>", ARG3a(o),ARG3b(o)); /* paren before, paren after */
        if (op == CURLYM || op == CURLYN || op == CURLYX)
            sv_catpvf(sv, "[%d]", FLAGS(o)); /* Parenth number */
        sv_catpvf(sv, "{%u,", (unsigned) lo);
        if (hi == REG_INFTY)
            sv_catpvs(sv, "INFTY");
        else
            sv_catpvf(sv, "%u", (unsigned) hi);
        sv_catpvs(sv, "}");
    }
    else if (k == WHILEM && FLAGS(o))                   /* Ordinal/of */
        sv_catpvf(sv, "[%d/%d]", FLAGS(o) & 0xf, FLAGS(o)>>4);
    else if (k == REF || k == OPEN || k == CLOSE
             || k == GROUPP || op == ACCEPT)
    {
        AV *name_list= NULL;
        U32 parno = (op == ACCEPT)              ? ARG2u(o) :
                   (op == OPEN || op == CLOSE) ? PARNO(o) :
                                                 ARG1u(o);
        if ( RXp_PAREN_NAMES(prog) ) {
            name_list = MUTABLE_AV(progi->data->data[progi->name_list_idx]);
        } else if ( pRExC_state ) {
            name_list = RExC_paren_name_list;
        }
        if ( name_list ) {
            if ( k != REF || (op < REFN)) {
                UV logical_parno = parno;
                if (prog->parno_to_logical)
                    logical_parno = prog->parno_to_logical[parno];

                sv_catpvf(sv, "%" UVuf, (UV)logical_parno);     /* Parenth number */
                if (parno != logical_parno)
                    sv_catpvf(sv, "/%" UVuf, (UV)parno);        /* Parenth number */

                SV **name= av_fetch_simple(name_list, parno, 0 );
                if (name)
                    sv_catpvf(sv, " '%" SVf "'", SVfARG(*name));
            }
            else
            if (parno > 0) {
                /* parno must always be larger than 0 for this block
                 * as it represents a slot into the data array, which
                 * has the 0 slot reserved for a placeholder so any valid
                 * index into it is always true, eg non-zero
                 * see the '%' "what" type and the implementation of
                 * S_reg_add_data()
                 */
                SV *sv_dat= MUTABLE_SV(progi->data->data[ parno ]);
                I32 *nums = (I32*)SvPVX(sv_dat);
                SV **name= av_fetch_simple(name_list, nums[0], 0 );
                I32 n;
                if (name) {
                    for ( n = 0; n < SvIVX(sv_dat); n++ ) {
                        sv_catpvf(sv, "%s%" IVdf,
                                    (n ? "," : ""), (IV)nums[n]);
                    }
                    sv_catpvf(sv, " '%" SVf "'", SVfARG(*name));
                }
            }
        } else if (parno > 0) {
            UV logical_parno = parno;
            if (prog->parno_to_logical)
                logical_parno = prog->parno_to_logical[parno];

            sv_catpvf(sv, "%" UVuf, (UV)logical_parno);     /* Parenth number */
            if (logical_parno != parno)
                sv_catpvf(sv, "/%" UVuf, (UV)parno);     /* Parenth number */

        }
        if ( k == REF ) {
            sv_catpvf(sv, " <%" IVdf ">", (IV)ARG2i(o));
        }
        if ( k == REF && reginfo) {
            U32 n = ARG1u(o);  /* which paren pair */
            I32 ln = RXp_OFFS_START(prog,n);
            if (RXp_LASTPAREN(prog) < n || ln == -1 || RXp_OFFS_END(prog,n) == -1)
                sv_catpvf(sv, ": FAIL");
            else if (ln == RXp_OFFS_END(prog,n))
                sv_catpvf(sv, ": ACCEPT - EMPTY STRING");
            else {
                const char *s = reginfo->strbeg + ln;
                sv_catpvf(sv, ": ");
                Perl_pv_pretty( aTHX_ sv, s, RXp_OFFS_END(prog,n) - RXp_OFFS_START(prog,n), 32, 0, 0,
                    PERL_PV_ESCAPE_UNI_DETECT|PERL_PV_PRETTY_NOCLEAR|PERL_PV_PRETTY_ELLIPSES|PERL_PV_PRETTY_QUOTE );
            }
        }
    } else if (k == GOSUB) {
        AV *name_list= NULL;
        IV parno = ARG1u(o);
        IV logical_parno = (parno && prog->parno_to_logical)
                         ? prog->parno_to_logical[parno]
                         : parno;
        if ( RXp_PAREN_NAMES(prog) ) {
            name_list = MUTABLE_AV(progi->data->data[progi->name_list_idx]);
        } else if ( pRExC_state ) {
            name_list = RExC_paren_name_list;
        }

        /* Paren and offset */
        sv_catpvf(sv, "%" IVdf, logical_parno);
        if (logical_parno != parno)
            sv_catpvf(sv, "/%" IVdf, parno);

        sv_catpvf(sv, "[%+d:%d]", (int)ARG2i(o),
                (int)((o + (int)ARG2i(o)) - progi->program) );
        if (name_list) {
            SV **name= av_fetch_simple(name_list, ARG1u(o), 0 );
            if (name)
                sv_catpvf(sv, " '%" SVf "'", SVfARG(*name));
        }
    }
    else if (k == LOGICAL)
        /* 2: embedded, otherwise 1 */
        sv_catpvf(sv, "[%d]", FLAGS(o));
    else if (k == ANYOF || k == ANYOFH || k == ANYOFR) {
        U8 flags;
        char * bitmap;
        U8 do_sep = 0;    /* Do we need to separate various components of the
                             output? */
        /* Set if there is still an unresolved user-defined property */
        SV *unresolved                = NULL;

        /* Things that are ignored except when the runtime locale is UTF-8 */
        SV *only_utf8_locale_invlist = NULL;

        /* Code points that don't fit in the bitmap */
        SV *nonbitmap_invlist = NULL;

        /* And things that aren't in the bitmap, but are small enough to be */
        SV* bitmap_range_not_in_bitmap = NULL;

        bool inverted;

        if (k != ANYOF) {
            flags = 0;
            bitmap = NULL;
        }
        else {
            flags = ANYOF_FLAGS(o);
            bitmap = ANYOF_BITMAP(o);
        }

        if (op == ANYOFL || op == ANYOFPOSIXL) {
            if ((flags & ANYOFL_UTF8_LOCALE_REQD)) {
                sv_catpvs(sv, "{utf8-locale-reqd}");
            }
            if (flags & ANYOFL_FOLD) {
                sv_catpvs(sv, "{i}");
            }
        }

        inverted = flags & ANYOF_INVERT;

        /* If there is stuff outside the bitmap, get it */
        if (k == ANYOFR) {

            /* For a single range, split into the parts inside vs outside the
             * bitmap. */
            UV start = ANYOFRbase(o);
            UV end   = ANYOFRbase(o) + ANYOFRdelta(o);

            if (start < NUM_ANYOF_CODE_POINTS) {
                if (end < NUM_ANYOF_CODE_POINTS) {
                    bitmap_range_not_in_bitmap
                          = _add_range_to_invlist(bitmap_range_not_in_bitmap,
                                                  start, end);
                }
                else {
                    bitmap_range_not_in_bitmap
                          = _add_range_to_invlist(bitmap_range_not_in_bitmap,
                                                  start, NUM_ANYOF_CODE_POINTS);
                    start = NUM_ANYOF_CODE_POINTS;
                }
            }

            if (start >= NUM_ANYOF_CODE_POINTS) {
                nonbitmap_invlist = _add_range_to_invlist(nonbitmap_invlist,
                                                ANYOFRbase(o),
                                                ANYOFRbase(o) + ANYOFRdelta(o));
            }
        }
        else if (ANYOF_MATCHES_ALL_OUTSIDE_BITMAP(o)) {
            nonbitmap_invlist = _add_range_to_invlist(nonbitmap_invlist,
                                                      NUM_ANYOF_CODE_POINTS,
                                                      UV_MAX);
        }
        else if (ANYOF_HAS_AUX(o)) {
                (void) GET_REGCLASS_AUX_DATA(prog, o, false,
                                                &unresolved,
                                                &only_utf8_locale_invlist,
                                                &nonbitmap_invlist);

            /* The aux data may contain stuff that could fit in the bitmap.
             * This could come from a user-defined property being finally
             * resolved when this call was done; or much more likely because
             * there are matches that require UTF-8 to be valid, and so aren't
             * in the bitmap (or ANYOFR).  This is teased apart later */
            _invlist_intersection(nonbitmap_invlist,
                                  PL_InBitmap,
                                  &bitmap_range_not_in_bitmap);
            /* Leave just the things that don't fit into the bitmap */
            _invlist_subtract(nonbitmap_invlist,
                              PL_InBitmap,
                              &nonbitmap_invlist);
        }

        /* Ready to start outputting.  First, the initial left bracket */
        sv_catpvf(sv, "[%s", PL_colors[0]);

        if (   bitmap
            || bitmap_range_not_in_bitmap
            || only_utf8_locale_invlist
            || unresolved)
        {
            /* Then all the things that could fit in the bitmap */
            do_sep = put_charclass_bitmap_innards(
                                    sv,
                                    bitmap,
                                    bitmap_range_not_in_bitmap,
                                    only_utf8_locale_invlist,
                                    o,
                                    flags,

                                    /* Can't try inverting for a
                                                   * better display if there
                                                   * are things that haven't
                                                   * been resolved */
                                    (unresolved != NULL || k == ANYOFR));
            SvREFCNT_dec(bitmap_range_not_in_bitmap);

            /* If there are user-defined properties which haven't been defined
             * yet, output them.  If the result is not to be inverted, it is
             * clearest to output them in a separate [] from the bitmap range
             * stuff.  If the result is to be complemented, we have to show
             * everything in one [], as the inversion applies to the whole
             * thing.  Use {braces} to separate them from anything in the
             * bitmap and anything above the bitmap. */
            if (unresolved) {
                if (inverted) {
                    if (! do_sep) { /* If didn't output anything in the bitmap
                                     */
                        sv_catpvs(sv, "^");
                    }
                    sv_catpvs(sv, "{");
                }
                else if (do_sep) {
                    sv_catpvf(sv,"%s][%s", PL_colors[1],
                                                      PL_colors[0]);
                }
                sv_catsv(sv, unresolved);
                if (inverted) {
                    sv_catpvs(sv, "}");
                }
                do_sep = ! inverted;
            }
            else if (     do_sep == 2
                     && ! nonbitmap_invlist
                     &&   ANYOF_MATCHES_NONE_OUTSIDE_BITMAP(o))
            {
                /* Here, the display shows the class as inverted, and
                 * everything above the lower display should also match, but
                 * there is no indication of that.  Add this range so the code
                 * below will add it to the display */
                _invlist_union_complement_2nd(nonbitmap_invlist,
                                              PL_InBitmap,
                                              &nonbitmap_invlist);
            }
        }

        /* And, finally, add the above-the-bitmap stuff */
        if (nonbitmap_invlist && _invlist_len(nonbitmap_invlist)) {
            SV* contents;

            /* See if truncation size is overridden */
            const STRLEN dump_len = (PL_dump_re_max_len > 256)
                                    ? PL_dump_re_max_len
                                    : 256;

            /* This is output in a separate [] */
            if (do_sep) {
                sv_catpvf(sv,"%s][%s", PL_colors[1], PL_colors[0]);
            }

            /* And, for easy of understanding, it is shown in the
             * uncomplemented form if possible.  The one exception being if
             * there are unresolved items, where the inversion has to be
             * delayed until runtime */
            if (inverted && ! unresolved) {
                _invlist_invert(nonbitmap_invlist);
                _invlist_subtract(nonbitmap_invlist, PL_InBitmap, &nonbitmap_invlist);
            }

            contents = invlist_contents(nonbitmap_invlist,
                                        false /* output suitable for catsv */
                                       );

            /* If the output is shorter than the permissible maximum, just do it. */
            if (SvCUR(contents) <= dump_len) {
                sv_catsv(sv, contents);
            }
            else {
                const char * contents_string = SvPVX(contents);
                STRLEN i = dump_len;

                /* Otherwise, start at the permissible max and work back to the
                 * first break possibility */
                while (i > 0 && contents_string[i] != ' ') {
                    i--;
                }
                if (i == 0) {       /* Fail-safe.  Use the max if we couldn't
                                       find a legal break */
                    i = dump_len;
                }

                sv_catpvn(sv, contents_string, i);
                sv_catpvs(sv, "...");
            }

            SvREFCNT_dec_NN(contents);
            SvREFCNT_dec_NN(nonbitmap_invlist);
        }

        /* And finally the matching, closing ']' */
        sv_catpvf(sv, "%s]", PL_colors[1]);

        if (op == ANYOFHs) {
            sv_catpvf(sv, " (Leading UTF-8 bytes = %s",
                _byte_dump_string((U8 *) ((struct regnode_anyofhs *) o)->string,
                FLAGS(o), 1));
        }
        else if (REGNODE_TYPE(op) != ANYOF) {
            U8 lowest = (op != ANYOFHr)
                         ? FLAGS(o)
                         : LOWEST_ANYOF_HRx_BYTE(FLAGS(o));
            U8 highest = (op == ANYOFHr)
                         ? HIGHEST_ANYOF_HRx_BYTE(FLAGS(o))
                         : (op == ANYOFH || op == ANYOFR)
                           ? 0xFF
                           : lowest;
#ifndef EBCDIC
            if (op != ANYOFR || ! isASCII(ANYOFRbase(o) + ANYOFRdelta(o)))
#endif
            {
                sv_catpvf(sv, " (First UTF-8 byte = %02X", lowest);
                if (lowest != highest) {
                    sv_catpvf(sv, "-%02X", highest);
                }
                sv_catpvf(sv, ")");
            }
        }

        SvREFCNT_dec(unresolved);
    }
    else if (k == ANYOFM) {
        SV * cp_list = get_ANYOFM_contents(o);

        sv_catpvf(sv, "[%s", PL_colors[0]);
        if (op == NANYOFM) {
            _invlist_invert(cp_list);
        }

        put_charclass_bitmap_innards(sv, NULL, cp_list, NULL, NULL, 0, true);
        sv_catpvf(sv, "%s]", PL_colors[1]);

        SvREFCNT_dec(cp_list);
    }
    else if (k == ANYOFHbbm) {
        SV * cp_list = get_ANYOFHbbm_contents(o);
        sv_catpvf(sv, "[%s", PL_colors[0]);

        sv_catsv(sv, invlist_contents(cp_list,
                                      false /* output suitable for catsv */
                                     ));
        sv_catpvf(sv, "%s]", PL_colors[1]);

        SvREFCNT_dec(cp_list);
    }
    else if (k == POSIXD || k == NPOSIXD) {
        U8 index = FLAGS(o) * 2;
        if (index < C_ARRAY_LENGTH(anyofs)) {
            if (*anyofs[index] != '[')  {
                sv_catpvs(sv, "[");
            }
            sv_catpv(sv, anyofs[index]);
            if (*anyofs[index] != '[')  {
                sv_catpvs(sv, "]");
            }
        }
        else {
            sv_catpvf(sv, "[illegal type = %d])", index);
        }
    }
    else if (k == BOUND || k == NBOUND) {
        /* Must be synced with order of 'bound_type' in regcomp.h */
        const char * const bounds[] = {
            "",      /* Traditional */
            "{gcb}",
            "{lb}",
            "{sb}",
            "{wb}"
        };
        assert(FLAGS(o) < C_ARRAY_LENGTH(bounds));
        sv_catpv(sv, bounds[FLAGS(o)]);
    }
    else if (k == BRANCHJ && (op == UNLESSM || op == IFMATCH)) {
        sv_catpvf(sv, "[%d", -(FLAGS(o)));
        if (NEXT_OFF(o)) {
            sv_catpvf(sv, "..-%d", FLAGS(o) - NEXT_OFF(o));
        }
        sv_catpvf(sv, "]");
    }
    else if (op == SBOL)
        sv_catpvf(sv, " /%s/", FLAGS(o) ? "\\A" : "^");
    else if (op == EVAL) {
        if (FLAGS(o) & EVAL_OPTIMISTIC_FLAG)
            sv_catpvf(sv, " optimistic");
    }

    /* add on the verb argument if there is one */
    if ( ( k == VERB || op == ACCEPT || op == OPFAIL ) && FLAGS(o)) {
        if ( ARG1u(o) )
            sv_catpvf(sv, ":%" SVf,
                       SVfARG((MUTABLE_SV(progi->data->data[ ARG1u( o ) ]))));
        else
            sv_catpvs(sv, ":NULL");
    }
#else
    PERL_UNUSED_CONTEXT;
    PERL_UNUSED_ARG(sv);
    PERL_UNUSED_ARG(o);
    PERL_UNUSED_ARG(prog);
    PERL_UNUSED_ARG(reginfo);
    PERL_UNUSED_ARG(pRExC_state);
#endif  /* DEBUGGING */
}


#ifdef DEBUGGING
STATIC void
S_put_code_point(pTHX_ SV *sv, UV c)
{
    PERL_ARGS_ASSERT_PUT_CODE_POINT;

    if (c > 255) {
        sv_catpvf(sv, "\\x{%04" UVXf "}", c);
    }
    else if (isPRINT(c)) {
        const char string = (char) c;

        /* We use {phrase} as metanotation in the class, so also escape literal
         * braces */
        if (isBACKSLASHED_PUNCT(c) || c == '{' || c == '}')
            sv_catpvs(sv, "\\");
        sv_catpvn(sv, &string, 1);
    }
    else if (isMNEMONIC_CNTRL(c)) {
        sv_catpvf(sv, "%s", cntrl_to_mnemonic((U8) c));
    }
    else {
        sv_catpvf(sv, "\\x%02X", (U8) c);
    }
}

STATIC void
S_put_range(pTHX_ SV *sv, UV start, const UV end, const bool allow_literals)
{
    /* Appends to 'sv' a displayable version of the range of code points from
     * 'start' to 'end'.  Mnemonics (like '\r') are used for the few controls
     * that have them, when they occur at the beginning or end of the range.
     * It uses hex to output the remaining code points, unless 'allow_literals'
     * is true, in which case the printable ASCII ones are output as-is (though
     * some of these will be escaped by put_code_point()).
     *
     * NOTE:  This is designed only for printing ranges of code points that fit
     *        inside an ANYOF bitmap.  Higher code points are simply suppressed
     */

    const unsigned int min_range_count = 3;

    assert(start <= end);

    PERL_ARGS_ASSERT_PUT_RANGE;

    while (start <= end) {
        UV this_end;
        const char * format;

        if (    end - start < min_range_count
            && (end - start <= 2 || (isPRINT_A(start) && isPRINT_A(end))))
        {
            /* Output a range of 1 or 2 chars individually, or longer ranges
             * when printable */
            for (; start <= end; start++) {
                put_code_point(sv, start);
            }
            break;
        }

        /* If permitted by the input options, and there is a possibility that
         * this range contains a printable literal, look to see if there is
         * one. */
        if (allow_literals && start <= MAX_PRINT_A) {

            /* If the character at the beginning of the range isn't an ASCII
             * printable, effectively split the range into two parts:
             *  1) the portion before the first such printable,
             *  2) the rest
             * and output them separately. */
            if (! isPRINT_A(start)) {
                UV temp_end = start + 1;

                /* There is no point looking beyond the final possible
                 * printable, in MAX_PRINT_A */
                UV max = MIN(end, MAX_PRINT_A);

                while (temp_end <= max && ! isPRINT_A(temp_end)) {
                    temp_end++;
                }

                /* Here, temp_end points to one beyond the first printable if
                 * found, or to one beyond 'max' if not.  If none found, make
                 * sure that we use the entire range */
                if (temp_end > MAX_PRINT_A) {
                    temp_end = end + 1;
                }

                /* Output the first part of the split range: the part that
                 * doesn't have printables, with the parameter set to not look
                 * for literals (otherwise we would infinitely recurse) */
                put_range(sv, start, temp_end - 1, false);

                /* The 2nd part of the range (if any) starts here. */
                start = temp_end;

                /* We do a continue, instead of dropping down, because even if
                 * the 2nd part is non-empty, it could be so short that we want
                 * to output it as individual characters, as tested for at the
                 * top of this loop.  */
                continue;
            }

            /* Here, 'start' is a printable ASCII.  If it is an alphanumeric,
             * output a sub-range of just the digits or letters, then process
             * the remaining portion as usual. */
            if (isALPHANUMERIC_A(start)) {
                UV mask = (isDIGIT_A(start))
                           ? CC_DIGIT_
                             : isUPPER_A(start)
                               ? CC_UPPER_
                               : CC_LOWER_;
                UV temp_end = start + 1;

                /* Find the end of the sub-range that includes just the
                 * characters in the same class as the first character in it */
                while (temp_end <= end && generic_isCC_A_(temp_end, mask)) {
                    temp_end++;
                }
                temp_end--;

                /* For short ranges, don't duplicate the code above to output
                 * them; just call recursively */
                if (temp_end - start < min_range_count) {
                    put_range(sv, start, temp_end, false);
                }
                else {  /* Output as a range */
                    put_code_point(sv, start);
                    sv_catpvs(sv, "-");
                    put_code_point(sv, temp_end);
                }
                start = temp_end + 1;
                continue;
            }

            /* We output any other printables as individual characters */
            if (isPUNCT_A(start) || isSPACE_A(start)) {
                while (start <= end && (isPUNCT_A(start)
                                        || isSPACE_A(start)))
                {
                    put_code_point(sv, start);
                    start++;
                }
                continue;
            }
        } /* End of looking for literals */

        /* Here is not to output as a literal.  Some control characters have
         * mnemonic names.  Split off any of those at the beginning and end of
         * the range to print mnemonically.  It isn't possible for many of
         * these to be in a row, so this won't overwhelm with output */
        if (   start <= end
            && (isMNEMONIC_CNTRL(start) || isMNEMONIC_CNTRL(end)))
        {
            while (isMNEMONIC_CNTRL(start) && start <= end) {
                put_code_point(sv, start);
                start++;
            }

            /* If this didn't take care of the whole range ... */
            if (start <= end) {

                /* Look backwards from the end to find the final non-mnemonic
                 * */
                UV temp_end = end;
                while (isMNEMONIC_CNTRL(temp_end)) {
                    temp_end--;
                }

                /* And separately output the interior range that doesn't start
                 * or end with mnemonics */
                put_range(sv, start, temp_end, false);

                /* Then output the mnemonic trailing controls */
                start = temp_end + 1;
                while (start <= end) {
                    put_code_point(sv, start);
                    start++;
                }
                break;
            }
        }

        /* As a final resort, output the range or subrange as hex. */

        if (start >= NUM_ANYOF_CODE_POINTS) {
            this_end = end;
        }
        else {  /* Have to split range at the bitmap boundary */
            this_end = (end < NUM_ANYOF_CODE_POINTS)
                        ? end
                        : NUM_ANYOF_CODE_POINTS - 1;
        }
#if NUM_ANYOF_CODE_POINTS > 256
        format = (this_end < 256)
                 ? "\\x%02" UVXf "-\\x%02" UVXf
                 : "\\x{%04" UVXf "}-\\x{%04" UVXf "}";
#else
        format = "\\x%02" UVXf "-\\x%02" UVXf;
#endif
        GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
        sv_catpvf(sv, format, start, this_end);
        GCC_DIAG_RESTORE_STMT;
        break;
    }
}

STATIC void
S_put_charclass_bitmap_innards_invlist(pTHX_ SV *sv, SV* invlist)
{
    /* Concatenate onto the PV in 'sv' a displayable form of the inversion list
     * 'invlist' */

    UV start, end;
    bool allow_literals = true;

    PERL_ARGS_ASSERT_PUT_CHARCLASS_BITMAP_INNARDS_INVLIST;

    /* Generally, it is more readable if printable characters are output as
     * literals, but if a range (nearly) spans all of them, it's best to output
     * it as a single range.  This code will use a single range if all but 2
     * ASCII printables are in it */
    invlist_iterinit(invlist);
    while (invlist_iternext(invlist, &start, &end)) {

        /* If the range starts beyond the final printable, it doesn't have any
         * in it */
        if (start > MAX_PRINT_A) {
            break;
        }

        /* In both ASCII and EBCDIC, a SPACE is the lowest printable.  To span
         * all but two, the range must start and end no later than 2 from
         * either end */
        if (start < ' ' + 2 && end > MAX_PRINT_A - 2) {
            if (end > MAX_PRINT_A) {
                end = MAX_PRINT_A;
            }
            if (start < ' ') {
                start = ' ';
            }
            if (end - start >= MAX_PRINT_A - ' ' - 2) {
                allow_literals = false;
            }
            break;
        }
    }
    invlist_iterfinish(invlist);

    /* Here we have figured things out.  Output each range */
    invlist_iterinit(invlist);
    while (invlist_iternext(invlist, &start, &end)) {
        if (start >= NUM_ANYOF_CODE_POINTS) {
            break;
        }
        put_range(sv, start, end, allow_literals);
    }
    invlist_iterfinish(invlist);

    return;
}

STATIC SV*
S_put_charclass_bitmap_innards_common(pTHX_
        SV* invlist,            /* The bitmap */
        SV* posixes,            /* Under /l, things like [:word:], \S */
        SV* only_utf8,          /* Under /d, matches iff the target is UTF-8 */
        SV* not_utf8,           /* /d, matches iff the target isn't UTF-8 */
        SV* only_utf8_locale,   /* Under /l, matches if the locale is UTF-8 */
        const bool invert       /* Is the result to be inverted? */
)
{
    /* Create and return an SV containing a displayable version of the bitmap
     * and associated information determined by the input parameters.  If the
     * output would have been only the inversion indicator '^', NULL is instead
     * returned. */

    SV * output;

    PERL_ARGS_ASSERT_PUT_CHARCLASS_BITMAP_INNARDS_COMMON;

    if (invert) {
        output = newSVpvs("^");
    }
    else {
        output = newSVpvs("");
    }

    /* First, the code points in the bitmap that are unconditionally there */
    put_charclass_bitmap_innards_invlist(output, invlist);

    /* Traditionally, these have been placed after the main code points */
    if (posixes) {
        sv_catsv(output, posixes);
    }

    if (only_utf8 && _invlist_len(only_utf8)) {
        sv_catpvf(output, "%s{utf8}%s", PL_colors[1], PL_colors[0]);
        put_charclass_bitmap_innards_invlist(output, only_utf8);
    }

    if (not_utf8 && _invlist_len(not_utf8)) {
        sv_catpvf(output, "%s{not utf8}%s", PL_colors[1], PL_colors[0]);
        put_charclass_bitmap_innards_invlist(output, not_utf8);
    }

    if (only_utf8_locale && _invlist_len(only_utf8_locale)) {
        sv_catpvf(output, "%s{utf8 locale}%s", PL_colors[1], PL_colors[0]);
        put_charclass_bitmap_innards_invlist(output, only_utf8_locale);

        /* This is the only list in this routine that can legally contain code
         * points outside the bitmap range.  The call just above to
         * 'put_charclass_bitmap_innards_invlist' will simply suppress them, so
         * output them here.  There's about a half-dozen possible, and none in
         * contiguous ranges longer than 2 */
        if (invlist_highest(only_utf8_locale) >= NUM_ANYOF_CODE_POINTS) {
            UV start, end;
            SV* above_bitmap = NULL;

            _invlist_subtract(only_utf8_locale, PL_InBitmap, &above_bitmap);

            invlist_iterinit(above_bitmap);
            while (invlist_iternext(above_bitmap, &start, &end)) {
                UV i;

                for (i = start; i <= end; i++) {
                    put_code_point(output, i);
                }
            }
            invlist_iterfinish(above_bitmap);
            SvREFCNT_dec_NN(above_bitmap);
        }
    }

    if (invert && SvCUR(output) == 1) {
        return NULL;
    }

    return output;
}

STATIC U8
S_put_charclass_bitmap_innards(pTHX_ SV *sv,
                                     char *bitmap,
                                     SV *nonbitmap_invlist,
                                     SV *only_utf8_locale_invlist,
                                     const regnode * const node,
                                     const U8 flags,
                                     const bool force_as_is_display)
{
    /* Appends to 'sv' a displayable version of the innards of the bracketed
     * character class defined by the other arguments:
     *  'bitmap' points to the bitmap, or NULL if to ignore that.
     *  'nonbitmap_invlist' is an inversion list of the code points that are in
     *      the bitmap range, but for some reason aren't in the bitmap; NULL if
     *      none.  The reasons for this could be that they require some
     *      condition such as the target string being or not being in UTF-8
     *      (under /d), or because they came from a user-defined property that
     *      was not resolved at the time of the regex compilation (under /u)
     *  'only_utf8_locale_invlist' is an inversion list of the code points that
     *      are valid only if the runtime locale is a UTF-8 one; NULL if none
     *  'node' is the regex pattern ANYOF node.  It is needed only when the
     *      above two parameters are not null, and is passed so that this
     *      routine can tease apart the various reasons for them.
     *  'flags' is the flags field of 'node'
     *  'force_as_is_display' is true if this routine should definitely NOT try
     *      to invert things to see if that leads to a cleaner display.  If
     *      false, this routine is free to use its judgment about doing this.
     *
     * It returns 0 if nothing was actually output.  (It may be that
     *              the bitmap, etc is empty.)
     *            1 if the output wasn't inverted (didn't begin with a '^')
     *            2 if the output was inverted (did begin with a '^')
     *
     * When called for outputting the bitmap of a non-ANYOF node, just pass the
     * bitmap, with the succeeding parameters set to NULL, and the final one to
     * false.
     */

    /* In general, it tries to display the 'cleanest' representation of the
     * innards, choosing whether to display them inverted or not, regardless of
     * whether the class itself is to be inverted.  However,  there are some
     * cases where it can't try inverting, as what actually matches isn't known
     * until runtime, and hence the inversion isn't either. */

    bool inverting_allowed = ! force_as_is_display;

    int i;
    STRLEN orig_sv_cur = SvCUR(sv);

    SV* invlist;            /* Inversion list we accumulate of code points that
                               are unconditionally matched */
    SV* only_utf8 = NULL;   /* Under /d, list of matches iff the target is
                               UTF-8 */
    SV* not_utf8 =  NULL;   /* /d, list of matches iff the target isn't UTF-8
                             */
    SV* posixes = NULL;     /* Under /l, string of things like [:word:], \D */
    SV* only_utf8_locale = NULL;    /* Under /l, list of matches if the locale
                                       is UTF-8 */

    SV* as_is_display;      /* The output string when we take the inputs
                               literally */
    SV* inverted_display;   /* The output string when we invert the inputs */

    bool invert = cBOOL(flags & ANYOF_INVERT);  /* Is the input to be inverted
                                                   to match? */
    /* We are biased in favor of displaying things without them being inverted,
     * as that is generally easier to understand */
    const int bias = 5;

    PERL_ARGS_ASSERT_PUT_CHARCLASS_BITMAP_INNARDS;

    /* Start off with whatever code points are passed in.  (We clone, so we
     * don't change the caller's list) */
    if (nonbitmap_invlist) {
        assert(invlist_highest(nonbitmap_invlist) < NUM_ANYOF_CODE_POINTS);
        invlist = invlist_clone(nonbitmap_invlist, NULL);
    }
    else {  /* Worst case size is every other code point is matched */
        invlist = _new_invlist(NUM_ANYOF_CODE_POINTS / 2);
    }

    if (flags) {
        if (OP(node) == ANYOFD) {

            /* This flag indicates that the code points below 0x100 in the
             * nonbitmap list are precisely the ones that match only when the
             * target is UTF-8 (they should all be non-ASCII). */
            if (flags & ANYOF_HAS_EXTRA_RUNTIME_MATCHES) {
                _invlist_intersection(invlist, PL_UpperLatin1, &only_utf8);
                _invlist_subtract(invlist, only_utf8, &invlist);
            }

            /* And this flag for matching all non-ASCII 0xFF and below */
            if (flags & ANYOFD_NON_UTF8_MATCHES_ALL_NON_ASCII__shared) {
                not_utf8 = invlist_clone(PL_UpperLatin1, NULL);
            }
        }
        else if (OP(node) == ANYOFL || OP(node) == ANYOFPOSIXL) {

            /* If either of these flags are set, what matches isn't
             * determinable except during execution, so don't know enough here
             * to invert */
            if (flags & (ANYOFL_FOLD|ANYOF_MATCHES_POSIXL)) {
                inverting_allowed = false;
            }

            /* What the posix classes match also varies at runtime, so these
             * will be output symbolically. */
            if (ANYOF_POSIXL_TEST_ANY_SET(node)) {
                int i;

                posixes = newSVpvs("");
                for (i = 0; i < ANYOF_POSIXL_MAX; i++) {
                    if (ANYOF_POSIXL_TEST(node, i)) {
                        sv_catpv(posixes, anyofs[i]);
                    }
                }
            }
        }
    }

    /* Accumulate the bit map into the unconditional match list */
    if (bitmap) {
        for (i = 0; i < NUM_ANYOF_CODE_POINTS; i++) {
            if (BITMAP_TEST(bitmap, i)) {
                int start = i++;
                for (;
                     i < NUM_ANYOF_CODE_POINTS && BITMAP_TEST(bitmap, i);
                     i++)
                { /* empty */ }
                invlist = _add_range_to_invlist(invlist, start, i-1);
            }
        }
    }

    /* Make sure that the conditional match lists don't have anything in them
     * that match unconditionally; otherwise the output is quite confusing.
     * This could happen if the code that populates these misses some
     * duplication. */
    if (only_utf8) {
        _invlist_subtract(only_utf8, invlist, &only_utf8);
    }
    if (not_utf8) {
        _invlist_subtract(not_utf8, invlist, &not_utf8);
    }

    if (only_utf8_locale_invlist) {

        /* Since this list is passed in, we have to make a copy before
         * modifying it */
        only_utf8_locale = invlist_clone(only_utf8_locale_invlist, NULL);

        _invlist_subtract(only_utf8_locale, invlist, &only_utf8_locale);

        /* And, it can get really weird for us to try outputting an inverted
         * form of this list when it has things above the bitmap, so don't even
         * try */
        if (invlist_highest(only_utf8_locale) >= NUM_ANYOF_CODE_POINTS) {
            inverting_allowed = false;
        }
    }

    /* Calculate what the output would be if we take the input as-is */
    as_is_display = put_charclass_bitmap_innards_common(invlist,
                                                    posixes,
                                                    only_utf8,
                                                    not_utf8,
                                                    only_utf8_locale,
                                                    invert);

    /* If have to take the output as-is, just do that */
    if (! inverting_allowed) {
        if (as_is_display) {
            sv_catsv(sv, as_is_display);
            SvREFCNT_dec_NN(as_is_display);
        }
    }
    else { /* But otherwise, create the output again on the inverted input, and
              use whichever version is shorter */

        int inverted_bias, as_is_bias;

        /* We will apply our bias to whichever of the results doesn't have
         * the '^' */
        bool trial_invert;
        if (invert) {
            trial_invert = false;
            as_is_bias = bias;
            inverted_bias = 0;
        }
        else {
            trial_invert = true;
            as_is_bias = 0;
            inverted_bias = bias;
        }

        /* Now invert each of the lists that contribute to the output,
         * excluding from the result things outside the possible range */

        /* For the unconditional inversion list, we have to add in all the
         * conditional code points, so that when inverted, they will be gone
         * from it */
        _invlist_union(only_utf8, invlist, &invlist);
        _invlist_union(not_utf8, invlist, &invlist);
        _invlist_union(only_utf8_locale, invlist, &invlist);
        _invlist_invert(invlist);
        _invlist_intersection(invlist, PL_InBitmap, &invlist);

        if (only_utf8) {
            _invlist_invert(only_utf8);
            _invlist_intersection(only_utf8, PL_UpperLatin1, &only_utf8);
        }
        else if (not_utf8) {

            /* If a code point matches iff the target string is not in UTF-8,
             * then complementing the result has it not match iff not in UTF-8,
             * which is the same thing as matching iff it is UTF-8. */
            only_utf8 = not_utf8;
            not_utf8 = NULL;
        }

        if (only_utf8_locale) {
            _invlist_invert(only_utf8_locale);
            _invlist_intersection(only_utf8_locale,
                                  PL_InBitmap,
                                  &only_utf8_locale);
        }

        inverted_display = put_charclass_bitmap_innards_common(
                                            invlist,
                                            posixes,
                                            only_utf8,
                                            not_utf8,
                                            only_utf8_locale, trial_invert);

        /* Use the shortest representation, taking into account our bias
         * against showing it inverted */
        if (   inverted_display
            && (   ! as_is_display
                || (  SvCUR(inverted_display) + inverted_bias
                    < SvCUR(as_is_display)    + as_is_bias)))
        {
            sv_catsv(sv, inverted_display);
            invert = ! invert;
        }
        else if (as_is_display) {
            sv_catsv(sv, as_is_display);
        }

        SvREFCNT_dec(as_is_display);
        SvREFCNT_dec(inverted_display);
    }

    SvREFCNT_dec_NN(invlist);
    SvREFCNT_dec(only_utf8);
    SvREFCNT_dec(not_utf8);
    SvREFCNT_dec(posixes);
    SvREFCNT_dec(only_utf8_locale);

    U8 did_output_something = (bool) (SvCUR(sv) > orig_sv_cur);
    if (did_output_something) {
        /* Distinguish between non and inverted cases */
        did_output_something += invert;
    }

    return did_output_something;
}
#endif /* DEBUGGING */