regen/embed.pl: Add ability to assert(s < e)

Where s is a pointer into a string, and e is the end of it.
2026-01-26 08:38:23 +00:00 · 2025-10-06 06:33:26 -06:00 · 2025-10-06 06:33:26 -06:00 · 1628b08aeb
commit 1628b08aeb
parent c0125f32ec
3 changed files with 232 additions and 13 deletions
--- a/autodoc.pl
+++ b/autodoc.pl
@ -620,7 +620,7 @@ sub check_and_add_proto_defn {
    $flags .= "n" if $flags =~ /#/;    # No threads, arguments for #ifdef

    my @munged_args= $args_ref->@*;
-    s/\b(?:NN|NULLOK)\b\s+//g for @munged_args;
+    s/\b(?:NN|NULLOK|[SM]PTR|EPTRQ?)\b\s+//g for @munged_args;

    my $flags_sans_d = $flags;
    my $docs_expected = $flags_sans_d =~ s/d//g;
--- a/embed.fnc
+++ b/embed.fnc
@ -182,20 +182,73 @@
 :	2)  the internal logic used by code that reads this file.
 :	3)  explicit asserts that you add in this file.
 :
-:   Sections below give more details of each item.
+:   Sections below give more details of each item.  For readability,
+:   constraints are split into two sections, one for pointer parameters, and
+:   one for the rest.
 :
 : *** Pointer Parameter Constraints
 :
-:   You must specify what checking is needed for all pointer arguments.  If the
-:   pointer is allowed to point to NULL, prefix that argument with 'NULLOK'
-:   (following the template of the many entries in this file that have that).
-:   If it can't be NULL, use 'NN' (again many entries herein do that).
-:   The reason for this requirement is to tell the maintainers that you have
-:   considered the question about the argument, and this is the answer.
+:   Every pointer parameter must have a constraint; one of the following:
+:
+:   NN	    means the called function is expecting this pointer parameter to be
+:	    non-NULL, and likely is not equipped to handle it being NULL.
+:   NULLOK  means the called function definitely can handle this parameter
+:	    being NULL.  The reason you need to specify this at all is to tell
+:	    future maintainers that you have considered the question about the
+:	    parameter, and this is the answer.
+:   SPTR    means that not only must this pointer parameter be non-NULL, it
+:	    points to a position in a character string, which the called
+:	    function is not to look behind.  If a parameter is marked with this
+:	    constraint, another parameter to the function must be marked with
+:	    one of the constraints below in this list.
+:   EPTR    means that not only must this pointer parameter be non-NULL, it
+:	    points to the position one byte beyond the end of a character
+:	    string.  The called function is not to look at the byte in that
+:	    position or any higher ones.  If a parameter is marked with this
+:	    constraint, another parameter to the function must be marked with
+:	    SPTR, or MPTR (described just below).  It also is fine to have
+:	    both an SPTR parameter and an MPTR one.
+:   MPTR    means that not only must this pointer parameter be non-NULL, it
+:	    points to a position somewhere in the middle of a character string.
+:	    If a parameter is marked with this constraint, another parameter to
+:	    the function must be marked with one of SPTR, EPTR, or EPTRQ
+:	    (described just below).  It also is fine to have both an SPTR
+:	    parameter and an EPTR (or EPTRQ) one.
+:   EPTRQ   is like EPTR, but the called function is equpped to handle the case
+:	    where the input SPTR and/or MPTR are equal to this parameter; they
+:	    don't have to be strictly less than it.  If a parameter is marked
+:	    with this constraint, no parameter may be marked as EPTR.
+:
+:   To summarize, either
+:	    SPTR <= MPTR <  EPTR
+:   or
+:	    SPTR <= MPTR <= EPTRQ
+:   In each equation all three or any two of the constraints must be present.
+:
+:   When only two constraints are present and one of them is either EPTR or
+:   EPTRQ, the difference between the remaining SPTR or MPTR becomes somewhat
+:   fuzzy; the generated assertion will be the same whichever constraint is
+:   used.  You should choose the one that makes the most sense for the
+:   semantics of the parameter.  For example, there are currently some
+:   functions with parameters named 'curpos', and no SPTR parameter exists.
+:   The name of the parameter clearly indicates it isn't necessarily the
+:   starting position of the string, so using MPTR as the constraint makes the
+:   most sense.
+:
+:   The parameters for the function can be in any order, except if a function
+:   has multiple different character strings, all the parameters for the first
+:   string must be positioned in the function call before any of the parameters
+:   for the second, and so forth.  (This accommodates the very few existing
+:   functions that have multiple strings passed to them, without needing to
+:   create a more general mechanism, like possibly SPTR1..EPTR1, SPTR2..EPTR2.)
 :
 : *** Non-pointer Parameter Constraints
 :
-:   For a numeric argument, you may specify that it can't be 0 by using 'NZ'
+:   Only a single constraint is currently available to you to use; it is for
+:   parameters that are some sort of integer
+:
+:   NZ	    means the called function is expecting this parameter to be
+:	    non-zero, and is not equipped to handle it being 0.
 :
 : *** Automatically generated checks
 :
--- a/regen/embed.pl
+++ b/regen/embed.pl
@ -281,6 +281,8 @@ sub generate_proto_h {
                    "$plain_func: n flag is contradicted by having arguments"
                                                            if $flags =~ /n/;
            my $n;
+            my @bounded_strings;
+
            for my $arg ( @$args ) {
                ++$n;

@ -296,8 +298,29 @@ sub generate_proto_h {
                    die_at_end "$plain_func: func: m flag required for"
                             . '"literal" argument' unless $has_mflag;
                }
-                else {
-                    my $nn =      ( $arg =~ s/\bNN\b// );
+                else {  # Look for constraints about this argument
+
+                    my $ptr_type;   # E, M, and S are the three types
+                                    # corresponding respectively to EPTR(Q)?,
+                                    # MPTR, and SPTR
+                    my $equal = ""; # EPTRQ is just an EPTR with this set to
+                                    # "="
+                    if ($arg =~ s/ \b ( [EMS] ) PTR (Q)? \b //x) {;
+                        $ptr_type = $1;
+                        if (defined $2) {
+                            die_at_end ": $func: Q only valid with EPTR"
+                                                          if $ptr_type ne 'E';
+                            $equal = "=";
+                        }
+                        elsif ($ptr_type eq 'M') {
+                            # A middle position always is <=
+                            $equal = "=";
+                        }
+                    }
+
+                    # A $ptr_type is a specialized 'nn'
+                    my $nn =  (defined $ptr_type) + ( $arg =~ s/\bNN\b// );
+
                    my $nz =      ( $arg =~ s/\bNZ\b// );
                    my $nullok =  ( $arg =~ s/\bNULLOK\b// );
                    my $nocheck = ( $arg =~ s/\bNOCHECK\b// );
@ -310,7 +333,8 @@ sub generate_proto_h {
                    # Note that we don't care if you say e.g., 'NN' multiple
                    # times
                    die_at_end
-                           ":$func: $arg Use only one of NN, NULLOK, and NZ"
+                           ":$func: $arg Use only one of NN (including"
+                         . " EPTR, EPTRQ, MPTR, SPTR), NULLOK, or NZ"
                                               if 0 + $nn + $nz + $nullok > 1;

                    push( @nonnull, $n ) if $nn;
@ -322,7 +346,8 @@ sub generate_proto_h {
                    # pointer.
                    if ($args_assert_line && $arg =~ /\*/) {
                        if ($nn + $nullok == 0) {
-                            warn "$func: $arg needs NN or NULLOK\n";
+                            warn "$func: $arg needs one of: NN, EPTR, EPTRQ,"
+                               . " MPTR, SPTR, or NULLOK\n";
                            ++$unflagged_pointers;
                        }

@ -358,10 +383,151 @@ sub generate_proto_h {
                                                                   if $nullok;
                            push @asserts, "assert($type_assert)";
                        }
+
+                        # If this is a pointer to a character string argument,
+                        # we need extra work.
+                        if ($ptr_type) {
+
+                            # For these, not only does the parameter have to
+                            # be non-NULL, but every dereference of it has to
+                            # too.
+                            #
+                            # First, get all the '*" derefs, except one.
+                            my $derefs = "*" x (($arg =~ tr/*//) - 1);
+
+                            # Then add the asserts that each dereferenced
+                            # layer is non-NULL.
+                            for (my $i = 1; $i <= length $derefs; $i++) {
+                                push @asserts, "assert("
+                                             . substr($derefs, 0, $i)
+                                             . "$argname)";
+                            }
+
+                            # Save the data we need later
+                            my %entry = (
+                                          argname => $argname,
+                                          equal   => $equal,
+                                          deref   => $derefs,
+                                        );
+
+                            # The motivation for all this is that some string
+                            # pointer parameters have constraints, such as
+                            # that the starting position can't be beyond the
+                            # ending one.  Unfortunately, the function's
+                            # parameters can be positioned in its prototype so
+                            # that the pointer to the ending position comes
+                            # before the pointer to the starting one, and this
+                            # can't be changed because they are API.  To cope
+                            # with this, we use the array below to save just
+                            # the crucial information about each while parsing
+                            # the parameters.  After all information is
+                            # gathered, we go through and handle it.  An entry
+                            # looks like this after all the parameters are
+                            # parsed:
+                            #   {
+                            #       'M' => {
+                            #               'equal' => '=',
+                            #               'argname' => 'curpos',
+                            #               'deref' => ''
+                            #               },
+                            #       'E' => {
+                            #               'equal' => '',
+                            #               'argname' => 'strend',
+                            #               'deref' => ''
+                            #               },
+                            #       'S' => {
+                            #               'equal' => '',
+                            #               'deref' => '',
+                            #               'argname' => 'strbeg'
+                            #               }
+                            #   }
+                            #
+                            # Only two of the keys need be present.
+                            # If the function has multiple string parameters,
+                            # the [0] entry in @bounded_strings will be for
+                            # the first string, [1] for the second, and so on.
+                            #
+                            # Here, we are in the middle of parsing the
+                            # parameters.  We add this parameter to the
+                            # current string's boundary constraints hash,
+                            # or create a new string if necessary.  The new
+                            # string's data is pushed as a new element onto
+                            # the array.
+                            #
+                            # A new element is created if the array is empty,
+                            # or if there is already an existing hash element
+                            # for the new key.  For example, you can't have
+                            # two EPTRs for the same string, so the second
+                            # must be for a new string.
+                            #
+                            # Otherwise we presume this hash value is for the
+                            # most recent string in the array.  If we have an
+                            # EPTR, and an MPTR comes along, assume that it is
+                            # for the same string as the EPTR.
+                            #
+                            # This hack works as long as all parameters for the
+                            # current string come before any of the next
+                            # string, which is the case for all existing
+                            # function calls, and any new ones can be
+                            # fashioned to conform.
+                            if (   @bounded_strings
+                                && ! defined $bounded_strings[-1]{$ptr_type})
+                            {
+                                $bounded_strings[-1]{$ptr_type} = \%entry;
+                            }
+                            else {
+                                push @bounded_strings,
+                                     { $ptr_type => \%entry };
+                            }
+                        }   # End of special handling of string bounds
                    }
                }   # End of this argument
            }   # End of loop through all arguments

+            # We have looped through all arguments, and for any bounded string
+            # ones, we have saved the information needed to generate things
+            # like
+            #   assert(s < e)
+            foreach my $string (@bounded_strings) {
+
+                # We need at least two bounds
+                if (1 == (  (defined $string->{S})
+                          + (defined $string->{M})
+                          + (defined $string->{E})))
+                {
+                    my ($type, $object) = each %$string;
+                    die_at_end
+                           "$func: Missing PTR constraint for string given by "
+                         . $object->{argname};
+                    next;
+                }
+
+                # But three or any two bounds work.  We may need to generate
+                # two asserts, so loop to do so, skipping any missing one.
+                for my $i (["S", "E"], ["S", "M"], ["M", "E"]) {
+
+                    # We don't need an assert for the whole span if we have an
+                    # intermediate one.
+                    next if defined $string->{M} &&    $i->[0] eq 'S'
+                                                    && $i->[1] eq 'E';
+
+                    my $lower = $string->{$i->[0]} or next;
+                    my $upper = $string->{$i->[1]} or next;
+
+                    # This reduces to either;
+                    #   assert(lower < upper);
+                    # or
+                    #   assert(lower <= upper);
+                    #
+                    # There might also be some derefences, like **lower
+                    push @asserts, "assert("
+                                        . "$lower->{deref}$lower->{argname}"
+                                        . " <$upper->{equal} "
+                                        . "$upper->{deref}$upper->{argname}"
+                                        . ")";
+                }
+            }
+
            $ret .= join ", ", @$args;
        }
        else {