cmListFileLexer: Do not require null-terminated input

This commit is contained in:
Sergiu Deitsch 2025-09-11 00:38:14 +02:00
parent 0a26c08004
commit 7040406f86
No known key found for this signature in database
12 changed files with 104 additions and 83 deletions

View File

@ -784,19 +784,19 @@ struct cmListFileLexer_s
int comment;
int line;
int column;
int size;
size_t size;
FILE* file;
size_t cr;
char* string_buffer;
char* string_position;
int string_left;
size_t string_left;
yyscan_t scanner;
};
static void cmListFileLexerSetToken(cmListFileLexer* lexer, const char* text,
int length);
size_t length);
static void cmListFileLexerAppend(cmListFileLexer* lexer, const char* text,
int length);
size_t length);
static int cmListFileLexerInput(cmListFileLexer* lexer, char* buffer,
size_t bufferSize);
static void cmListFileLexerInit(cmListFileLexer* lexer);
@ -1148,15 +1148,17 @@ case 2:
YY_RULE_SETUP
{
const char* bracket = yytext;
size_t length = yyleng;
lexer->comment = yytext[0] == '#';
if (lexer->comment) {
lexer->token.type = cmListFileLexer_Token_CommentBracket;
bracket += 1;
--length;
} else {
lexer->token.type = cmListFileLexer_Token_ArgumentBracket;
}
cmListFileLexerSetToken(lexer, "", 0);
lexer->bracket = strchr(bracket+1, '[') - bracket;
lexer->bracket = (char*)memchr(bracket + 1, '[', length - 1) - bracket;
if (yytext[yyleng-1] == '\n') {
++lexer->line;
lexer->column = 1;
@ -1223,7 +1225,6 @@ YY_RULE_SETUP
lexer->column += yyleng;
/* Erase the partial bracket from the token. */
lexer->token.length -= lexer->bracket;
lexer->token.text[lexer->token.length] = 0;
BEGIN(INITIAL);
return 1;
}
@ -2553,7 +2554,7 @@ void yyfree (void * ptr , yyscan_t yyscanner)
/*--------------------------------------------------------------------------*/
static void cmListFileLexerSetToken(cmListFileLexer* lexer, const char* text,
int length)
size_t length)
{
/* Set the token line and column number. */
lexer->token.line = lexer->line;
@ -2562,7 +2563,7 @@ static void cmListFileLexerSetToken(cmListFileLexer* lexer, const char* text,
/* Use the same buffer if possible. */
if (lexer->token.text) {
if (text && length < lexer->size) {
strcpy(lexer->token.text, text);
memcpy(lexer->token.text, text, length);
lexer->token.length = length;
return;
}
@ -2572,10 +2573,11 @@ static void cmListFileLexerSetToken(cmListFileLexer* lexer, const char* text,
}
/* Need to extend the buffer. */
if (text) {
lexer->token.text = strdup(text);
if (length > 0) {
lexer->token.text = (char*)malloc(length);
memcpy(lexer->token.text, text, length);
lexer->token.length = length;
lexer->size = length + 1;
lexer->size = length;
} else {
lexer->token.length = 0;
}
@ -2583,15 +2585,15 @@ static void cmListFileLexerSetToken(cmListFileLexer* lexer, const char* text,
/*--------------------------------------------------------------------------*/
static void cmListFileLexerAppend(cmListFileLexer* lexer, const char* text,
int length)
size_t length)
{
char* temp;
int newSize;
size_t newSize;
/* If the appended text will fit in the buffer, do not reallocate. */
newSize = lexer->token.length + length + 1;
newSize = lexer->token.length + length;
if (lexer->token.text && newSize <= lexer->size) {
strcpy(lexer->token.text + lexer->token.length, text);
memcpy(lexer->token.text + lexer->token.length, text, length);
lexer->token.length += length;
return;
}
@ -2603,7 +2605,6 @@ static void cmListFileLexerAppend(cmListFileLexer* lexer, const char* text,
free(lexer->token.text);
}
memcpy(temp + lexer->token.length, text, length);
temp[lexer->token.length + length] = 0;
lexer->token.text = temp;
lexer->token.length += length;
lexer->size = newSize;
@ -2643,9 +2644,9 @@ static int cmListFileLexerInput(cmListFileLexer* lexer, char* buffer,
lexer->cr = cr;
return n;
} else if (lexer->string_left) {
int length = lexer->string_left;
if ((int)bufferSize < length) {
length = (int)bufferSize;
size_t length = lexer->string_left;
if (bufferSize < length) {
length = bufferSize;
}
memcpy(buffer, lexer->string_position, length);
lexer->string_position += length;
@ -2765,15 +2766,18 @@ int cmListFileLexer_SetFileName(cmListFileLexer* lexer, const char* name,
}
/*--------------------------------------------------------------------------*/
int cmListFileLexer_SetString(cmListFileLexer* lexer, const char* text)
int cmListFileLexer_SetString(cmListFileLexer* lexer, char const* text,
size_t length)
{
int result = 1;
cmListFileLexerDestroy(lexer);
if (text) {
int length = (int)strlen(text);
lexer->string_buffer = (char*)malloc(length + 1);
/* text might be not NULL while length is 0. However, on some platforms
malloc(0) will return NULL. To avoid signaling an error to the caller in
such cases, ensure nonzero length. */
if (length > 0) {
lexer->string_buffer = (char*)malloc(length);
if (lexer->string_buffer) {
strcpy(lexer->string_buffer, text);
memcpy(lexer->string_buffer, text, length);
lexer->string_position = lexer->string_buffer;
lexer->string_left = length;
} else {

View File

@ -36,19 +36,19 @@ struct cmListFileLexer_s
int comment;
int line;
int column;
int size;
size_t size;
FILE* file;
size_t cr;
char* string_buffer;
char* string_position;
int string_left;
size_t string_left;
yyscan_t scanner;
};
static void cmListFileLexerSetToken(cmListFileLexer* lexer, const char* text,
int length);
size_t length);
static void cmListFileLexerAppend(cmListFileLexer* lexer, const char* text,
int length);
size_t length);
static int cmListFileLexerInput(cmListFileLexer* lexer, char* buffer,
size_t bufferSize);
static void cmListFileLexerInit(cmListFileLexer* lexer);
@ -90,15 +90,17 @@ LEGACY {MAKEVAR}|{UNQUOTED}|\"({MAKEVAR}|{UNQUOTED}|[ \t[=])*\"
#?\[=*\[\n? {
const char* bracket = yytext;
size_t length = yyleng;
lexer->comment = yytext[0] == '#';
if (lexer->comment) {
lexer->token.type = cmListFileLexer_Token_CommentBracket;
bracket += 1;
--length;
} else {
lexer->token.type = cmListFileLexer_Token_ArgumentBracket;
}
cmListFileLexerSetToken(lexer, "", 0);
lexer->bracket = strchr(bracket+1, '[') - bracket;
lexer->bracket = (char*)memchr(bracket + 1, '[', length - 1) - bracket;
if (yytext[yyleng-1] == '\n') {
++lexer->line;
lexer->column = 1;
@ -151,7 +153,6 @@ LEGACY {MAKEVAR}|{UNQUOTED}|\"({MAKEVAR}|{UNQUOTED}|[ \t[=])*\"
lexer->column += yyleng;
/* Erase the partial bracket from the token. */
lexer->token.length -= lexer->bracket;
lexer->token.text[lexer->token.length] = 0;
BEGIN(INITIAL);
return 1;
}
@ -266,7 +267,7 @@ LEGACY {MAKEVAR}|{UNQUOTED}|\"({MAKEVAR}|{UNQUOTED}|[ \t[=])*\"
/*--------------------------------------------------------------------------*/
static void cmListFileLexerSetToken(cmListFileLexer* lexer, const char* text,
int length)
size_t length)
{
/* Set the token line and column number. */
lexer->token.line = lexer->line;
@ -275,7 +276,7 @@ static void cmListFileLexerSetToken(cmListFileLexer* lexer, const char* text,
/* Use the same buffer if possible. */
if (lexer->token.text) {
if (text && length < lexer->size) {
strcpy(lexer->token.text, text);
memcpy(lexer->token.text, text, length);
lexer->token.length = length;
return;
}
@ -285,10 +286,11 @@ static void cmListFileLexerSetToken(cmListFileLexer* lexer, const char* text,
}
/* Need to extend the buffer. */
if (text) {
lexer->token.text = strdup(text);
if (length > 0) {
lexer->token.text = (char*)malloc(length);
memcpy(lexer->token.text, text, length);
lexer->token.length = length;
lexer->size = length + 1;
lexer->size = length;
} else {
lexer->token.length = 0;
}
@ -296,15 +298,15 @@ static void cmListFileLexerSetToken(cmListFileLexer* lexer, const char* text,
/*--------------------------------------------------------------------------*/
static void cmListFileLexerAppend(cmListFileLexer* lexer, const char* text,
int length)
size_t length)
{
char* temp;
int newSize;
size_t newSize;
/* If the appended text will fit in the buffer, do not reallocate. */
newSize = lexer->token.length + length + 1;
newSize = lexer->token.length + length;
if (lexer->token.text && newSize <= lexer->size) {
strcpy(lexer->token.text + lexer->token.length, text);
memcpy(lexer->token.text + lexer->token.length, text, length);
lexer->token.length += length;
return;
}
@ -316,7 +318,6 @@ static void cmListFileLexerAppend(cmListFileLexer* lexer, const char* text,
free(lexer->token.text);
}
memcpy(temp + lexer->token.length, text, length);
temp[lexer->token.length + length] = 0;
lexer->token.text = temp;
lexer->token.length += length;
lexer->size = newSize;
@ -356,9 +357,9 @@ static int cmListFileLexerInput(cmListFileLexer* lexer, char* buffer,
lexer->cr = cr;
return n;
} else if (lexer->string_left) {
int length = lexer->string_left;
if ((int)bufferSize < length) {
length = (int)bufferSize;
size_t length = lexer->string_left;
if (bufferSize < length) {
length = bufferSize;
}
memcpy(buffer, lexer->string_position, length);
lexer->string_position += length;
@ -478,15 +479,18 @@ int cmListFileLexer_SetFileName(cmListFileLexer* lexer, const char* name,
}
/*--------------------------------------------------------------------------*/
int cmListFileLexer_SetString(cmListFileLexer* lexer, const char* text)
int cmListFileLexer_SetString(cmListFileLexer* lexer, char const* text,
size_t length)
{
int result = 1;
cmListFileLexerDestroy(lexer);
if (text) {
int length = (int)strlen(text);
lexer->string_buffer = (char*)malloc(length + 1);
/* text might be not NULL while length is 0. However, on some platforms
malloc(0) will return NULL. To avoid signaling an error to the caller in
such cases, ensure nonzero length. */
if (length > 0) {
lexer->string_buffer = (char*)malloc(length);
if (lexer->string_buffer) {
strcpy(lexer->string_buffer, text);
memcpy(lexer->string_buffer, text, length);
lexer->string_position = lexer->string_buffer;
lexer->string_left = length;
} else {

View File

@ -724,8 +724,8 @@ int cmCTest::ProcessSteps()
this->SetTimeLimit(mf.GetDefinition("CTEST_TIME_LIMIT"));
this->SetCMakeVariables(mf);
std::vector<cmListFileArgument> args{
cmListFileArgument("RETURN_VALUE", cmListFileArgument::Unquoted, 0),
cmListFileArgument("return_value", cmListFileArgument::Unquoted, 0),
cmListFileArgument("RETURN_VALUE"_s, cmListFileArgument::Unquoted, 0),
cmListFileArgument("return_value"_s, cmListFileArgument::Unquoted, 0),
};
if (this->Impl->Parts[PartStart]) {
@ -843,12 +843,12 @@ int cmCTest::ProcessSteps()
auto const func = cmListFileFunction(
"ctest_submit", 0, 0,
{
cmListFileArgument("RETRY_COUNT", cmListFileArgument::Unquoted, 0),
cmListFileArgument("RETRY_COUNT"_s, cmListFileArgument::Unquoted, 0),
cmListFileArgument(count, cmListFileArgument::Quoted, 0),
cmListFileArgument("RETRY_DELAY", cmListFileArgument::Unquoted, 0),
cmListFileArgument("RETRY_DELAY"_s, cmListFileArgument::Unquoted, 0),
cmListFileArgument(delay, cmListFileArgument::Quoted, 0),
cmListFileArgument("RETURN_VALUE", cmListFileArgument::Unquoted, 0),
cmListFileArgument("return_value", cmListFileArgument::Unquoted, 0),
cmListFileArgument("RETURN_VALUE"_s, cmListFileArgument::Unquoted, 0),
cmListFileArgument("return_value"_s, cmListFileArgument::Unquoted, 0),
});
auto status = cmExecutionStatus(mf);
if (!mf.ExecuteCommand(func, status) ||

View File

@ -80,7 +80,7 @@ void FormatLine(std::back_insert_iterator<std::vector<cm::string_view>> outIt,
}
} // anonymous namespace
std::string cmDocumentationFormatter::Format(std::string text) const
std::string cmDocumentationFormatter::Format(cm::string_view text) const
{
// Exit early on empty text
if (text.empty()) {
@ -107,9 +107,8 @@ std::string cmDocumentationFormatter::Format(std::string text) const
) // clang-format on
{
auto const isLastLine = end == std::string::npos;
auto const line = isLastLine
? cm::string_view{ text.c_str() + start }
: cm::string_view{ text.c_str() + start, end - start };
auto const line =
isLastLine ? text.substr(start) : text.substr(start, end - start);
if (!line.empty() && line.front() == ' ') {
// Preformatted lines go as is w/ a leading padding

View File

@ -8,13 +8,15 @@
#include <iosfwd>
#include <string>
#include <cm/string_view>
class cmDocumentationSection;
/** Print documentation in a simple text format. */
class cmDocumentationFormatter
{
public:
std::string Format(std::string text) const;
std::string Format(cm::string_view text) const;
void PrintSection(std::ostream& os, cmDocumentationSection const& section);
void PrintFormatted(std::ostream& os, std::string const& text) const
{

View File

@ -1171,7 +1171,7 @@ bool cmFindPackageCommand::FindPackage(
}
std::vector<cmListFileArgument> listFileArgs(argsForProvider.size() + 1);
listFileArgs[0] =
cmListFileArgument("FIND_PACKAGE", cmListFileArgument::Unquoted, 0);
cmListFileArgument("FIND_PACKAGE"_s, cmListFileArgument::Unquoted, 0);
std::transform(argsForProvider.begin(), argsForProvider.end(),
listFileArgs.begin() + 1, [](std::string const& arg) {
return cmListFileArgument(arg,

View File

@ -11,6 +11,8 @@
# include <cmsys/Encoding.hxx>
#endif
#include <cm/string_view>
#include "cmList.h"
#include "cmListFileLexer.h"
#include "cmMessageType.h"
@ -51,11 +53,11 @@ public:
cmListFileParser& operator=(cmListFileParser const&) = delete;
bool ParseFile(char const* filename);
bool ParseString(char const* str, char const* virtual_filename);
bool ParseString(cm::string_view str, char const* virtual_filename);
private:
bool Parse();
bool ParseFunction(char const* name, long line);
bool ParseFunction(cm::string_view name, long line);
bool AddArgument(cmListFileLexer_Token* token,
cmListFileArgument::Delimiter delim);
void IssueFileOpenError(std::string const& text) const;
@ -142,12 +144,13 @@ bool cmListFileParser::ParseFile(char const* filename)
return this->Parse();
}
bool cmListFileParser::ParseString(char const* str,
bool cmListFileParser::ParseString(cm::string_view str,
char const* virtual_filename)
{
this->FileName = virtual_filename;
if (!cmListFileLexer_SetString(this->Lexer.get(), str)) {
if (!cmListFileLexer_SetString(this->Lexer.get(), str.data(),
str.length())) {
this->IssueFileOpenError("cmListFileCache: cannot allocate buffer.");
return false;
}
@ -170,7 +173,8 @@ bool cmListFileParser::Parse()
} else if (token->type == cmListFileLexer_Token_Identifier) {
if (haveNewline) {
haveNewline = false;
if (this->ParseFunction(token->text, token->line)) {
if (this->ParseFunction(cm::string_view(token->text, token->length),
token->line)) {
this->ListFile->Functions.emplace_back(
std::move(this->FunctionName), this->FunctionLine,
this->FunctionLineEnd, std::move(this->FunctionArguments));
@ -181,7 +185,7 @@ bool cmListFileParser::Parse()
auto error = cmStrCat(
"Parse error. Expected a newline, got ",
cmListFileLexer_GetTypeAsString(this->Lexer.get(), token->type),
" with text \"", token->text, "\".");
" with text \"", cm::string_view(token->text, token->length), "\".");
this->IssueError(error);
return false;
}
@ -189,7 +193,7 @@ bool cmListFileParser::Parse()
auto error = cmStrCat(
"Parse error. Expected a command name, got ",
cmListFileLexer_GetTypeAsString(this->Lexer.get(), token->type),
" with text \"", token->text, "\".");
" with text \"", cm::string_view(token->text, token->length), "\".");
this->IssueError(error);
return false;
}
@ -208,10 +212,10 @@ bool cmListFileParser::Parse()
return true;
}
bool cmListFileParser::ParseFunction(char const* name, long line)
bool cmListFileParser::ParseFunction(cm::string_view name, long line)
{
// Ininitialize a new function call.
this->FunctionName = name;
this->FunctionName.assign(name.data(), name.size());
this->FunctionLine = line;
// Command name has already been parsed. Read the left paren.
@ -225,10 +229,10 @@ bool cmListFileParser::ParseFunction(char const* name, long line)
return false;
}
if (token->type != cmListFileLexer_Token_ParenLeft) {
auto error =
cmStrCat("Parse error. Expected \"(\", got ",
cmListFileLexer_GetTypeAsString(this->Lexer.get(), token->type),
" with text \"", token->text, "\".");
auto error = cmStrCat(
"Parse error. Expected \"(\", got ",
cmListFileLexer_GetTypeAsString(this->Lexer.get(), token->type),
" with text \"", cm::string_view(token->text, token->length), "\".");
this->IssueError(error);
return false;
}
@ -283,7 +287,7 @@ bool cmListFileParser::ParseFunction(char const* name, long line)
"Parse error. Function missing ending \")\". "
"Instead found ",
cmListFileLexer_GetTypeAsString(this->Lexer.get(), token->type),
" with text \"", token->text, "\".");
" with text \"", cm::string_view(token->text, token->length), "\".");
this->IssueError(error);
return false;
}
@ -305,7 +309,8 @@ bool cmListFileParser::ParseFunction(char const* name, long line)
bool cmListFileParser::AddArgument(cmListFileLexer_Token* token,
cmListFileArgument::Delimiter delim)
{
this->FunctionArguments.emplace_back(token->text, delim, token->line);
this->FunctionArguments.emplace_back(
cm::string_view(token->text, token->length), delim, token->line);
if (this->Separation == SeparationOkay) {
return true;
}
@ -442,7 +447,7 @@ bool cmListFile::ParseFile(char const* filename, cmMessenger* messenger,
return !parseError;
}
bool cmListFile::ParseString(char const* str, char const* virtual_filename,
bool cmListFile::ParseString(cm::string_view str, char const* virtual_filename,
cmMessenger* messenger,
cmListFileBacktrace const& lfbt)
{

View File

@ -11,6 +11,7 @@
#include <vector>
#include <cm/optional>
#include <cm/string_view>
#include "cmList.h"
#include "cmStack.h"
@ -34,8 +35,8 @@ struct cmListFileArgument
Bracket
};
cmListFileArgument() = default;
cmListFileArgument(std::string v, Delimiter d, long line)
: Value(std::move(v))
cmListFileArgument(cm::string_view v, Delimiter d, long line)
: Value(v.data(), v.size())
, Delim(d)
, Line(line)
{
@ -241,7 +242,7 @@ struct cmListFile
bool ParseFile(char const* path, cmMessenger* messenger,
cmListFileBacktrace const& lfbt);
bool ParseString(char const* str, char const* virtual_filename,
bool ParseString(cm::string_view str, char const* virtual_filename,
cmMessenger* messenger, cmListFileBacktrace const& lfbt);
std::vector<cmListFileFunction> Functions;

View File

@ -2,6 +2,8 @@
file LICENSE.rst or https://cmake.org/licensing for details. */
#pragma once
#include <stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
@ -30,7 +32,7 @@ struct cmListFileLexer_Token_s
{
cmListFileLexer_Type type;
char* text;
int length;
size_t length;
int line;
int column;
};
@ -55,7 +57,7 @@ typedef struct cmListFileLexer_s cmListFileLexer;
cmListFileLexer* cmListFileLexer_New(void);
int cmListFileLexer_SetFileName(cmListFileLexer*, char const*,
cmListFileLexer_BOM* bom);
int cmListFileLexer_SetString(cmListFileLexer*, char const*);
int cmListFileLexer_SetString(cmListFileLexer*, char const*, size_t);
cmListFileLexer_Token* cmListFileLexer_Scan(cmListFileLexer*);
long cmListFileLexer_GetCurrentLine(cmListFileLexer*);
long cmListFileLexer_GetCurrentColumn(cmListFileLexer*);

View File

@ -837,7 +837,7 @@ bool cmMakefile::ReadListFileAsString(std::string const& content,
ListFileScope scope(this, filenametoread);
cmListFile listFile;
if (!listFile.ParseString(content.c_str(), virtualFileName.c_str(),
if (!listFile.ParseString(content, virtualFileName.c_str(),
this->GetMessenger(), this->Backtrace)) {
return false;
}

View File

@ -6,6 +6,8 @@
#include <memory>
#include <utility>
#include <cm/string_view>
#include "cmExecutionStatus.h"
#include "cmListFileCache.h"
#include "cmMakefile.h"

View File

@ -2,6 +2,8 @@
#include <string>
#include <vector>
#include <cmext/string_view>
#include <cm3p/cppdap/optional.h>
#include <cm3p/cppdap/protocol.h>
#include <cm3p/cppdap/types.h>
@ -17,7 +19,7 @@ static bool testStackFrameFunctionName(
auto thread = std::make_shared<cmDebugger::cmDebuggerThread>(0, "name");
auto const* functionName = "function_name";
auto arguments = std::vector<cmListFileArgument>{ cmListFileArgument(
"arg", cmListFileArgument::Delimiter::Unquoted, 0) };
"arg"_s, cmListFileArgument::Delimiter::Unquoted, 0) };
cmListFileFunction func(functionName, 10, 20, arguments);
thread->PushStackFrame(nullptr, "CMakeLists.txt", func);