gh-141004: Improve make check-c-api-docs (GH-143564)

- Gather all documented names into a set in a single pass.
  This makes the check much faster.

- Do not match substrings (e.g. documenting `PyErr_WarnEx`
  doesn't mean that `PyErr_Warn` is documented)

- Consider `PY`-prefixed names (a lot of old macros use this)
This commit is contained in:
Petr Viktorin 2026-01-09 11:16:50 +01:00 committed by GitHub
parent fd6d41b292
commit 234a15dc4e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 76 additions and 24 deletions

View File

@ -45,6 +45,27 @@ Py_LL
Py_SAFE_DOWNCAST
Py_ULL
Py_VA_COPY
PYLONG_BITS_IN_DIGIT
PY_DWORD_MAX
PY_FORMAT_SIZE_T
PY_INT32_T
PY_INT64_T
PY_LITTLE_ENDIAN
PY_LLONG_MAX
PY_LLONG_MIN
PY_LONG_LONG
PY_SIZE_MAX
PY_UINT32_T
PY_UINT64_T
PY_ULLONG_MAX
# patchlevel.h
PYTHON_ABI_STRING
PYTHON_API_STRING
PY_RELEASE_LEVEL_ALPHA
PY_RELEASE_LEVEL_BETA
PY_RELEASE_LEVEL_FINAL
PY_RELEASE_LEVEL_GAMMA
PY_VERSION
# unicodeobject.h
Py_UNICODE_SIZE
# cpython/methodobject.h
@ -91,3 +112,39 @@ Py_FrozenMain
# cpython/unicodeobject.h
PyUnicode_IS_COMPACT
PyUnicode_IS_COMPACT_ASCII
# pythonrun.h
PyErr_Display
# cpython/objimpl.h
PyObject_GET_WEAKREFS_LISTPTR
# cpython/pythonrun.h
PyOS_Readline
# cpython/warnings.h
PyErr_Warn
# fileobject.h
PY_STDIOTEXTMODE
# structmember.h
PY_WRITE_RESTRICTED
# pythread.h
PY_TIMEOUT_T
PY_TIMEOUT_MAX
# cpython/pyctype.h
PY_CTF_ALNUM
PY_CTF_ALPHA
PY_CTF_DIGIT
PY_CTF_LOWER
PY_CTF_SPACE
PY_CTF_UPPER
PY_CTF_XDIGIT
# cpython/code.h
PY_DEF_EVENT
PY_FOREACH_CODE_EVENT
# cpython/funcobject.h
PY_DEF_EVENT
PY_FOREACH_FUNC_EVENT
# cpython/monitoring.h
PY_MONITORING_EVENT_BRANCH
# cpython/dictobject.h
PY_DEF_EVENT
PY_FOREACH_DICT_EVENT
# cpython/pystats.h
PYSTATS_MAX_UOP_ID

View File

@ -8,6 +8,7 @@ SIMPLE_FUNCTION_REGEX = re.compile(r"PyAPI_FUNC(.+) (\w+)\(")
SIMPLE_MACRO_REGEX = re.compile(r"# *define *(\w+)(\(.+\))? ")
SIMPLE_INLINE_REGEX = re.compile(r"static inline .+( |\n)(\w+)")
SIMPLE_DATA_REGEX = re.compile(r"PyAPI_DATA\(.+\) (\w+)")
API_NAME_REGEX = re.compile(r'\bP[yY][a-zA-Z0-9_]+')
CPYTHON = Path(__file__).parent.parent.parent
INCLUDE = CPYTHON / "Include"
@ -72,24 +73,10 @@ def found_ignored_documented(singular: bool) -> str:
)
def is_documented(name: str) -> bool:
"""
Is a name present in the C API documentation?
"""
for path in C_API_DOCS.iterdir():
if path.is_dir():
continue
if path.suffix != ".rst":
continue
text = path.read_text(encoding="utf-8")
if name in text:
return True
return False
def scan_file_for_docs(filename: str, text: str) -> tuple[list[str], list[str]]:
def scan_file_for_docs(
filename: str,
text: str,
names: set[str]) -> tuple[list[str], list[str]]:
"""
Scan a header file for C API functions.
"""
@ -98,7 +85,7 @@ def scan_file_for_docs(filename: str, text: str) -> tuple[list[str], list[str]]:
colors = _colorize.get_colors()
def check_for_name(name: str) -> None:
documented = is_documented(name)
documented = name in names
if documented and (name in IGNORED):
documented_ignored.append(name)
elif not documented and (name not in IGNORED):
@ -106,14 +93,14 @@ def scan_file_for_docs(filename: str, text: str) -> tuple[list[str], list[str]]:
for function in SIMPLE_FUNCTION_REGEX.finditer(text):
name = function.group(2)
if not name.startswith("Py"):
if not API_NAME_REGEX.fullmatch(name):
continue
check_for_name(name)
for macro in SIMPLE_MACRO_REGEX.finditer(text):
name = macro.group(1)
if not name.startswith("Py"):
if not API_NAME_REGEX.fullmatch(name):
continue
if "(" in name:
@ -123,14 +110,14 @@ def scan_file_for_docs(filename: str, text: str) -> tuple[list[str], list[str]]:
for inline in SIMPLE_INLINE_REGEX.finditer(text):
name = inline.group(2)
if not name.startswith("Py"):
if not API_NAME_REGEX.fullmatch(name):
continue
check_for_name(name)
for data in SIMPLE_DATA_REGEX.finditer(text):
name = data.group(1)
if not name.startswith("Py"):
if not API_NAME_REGEX.fullmatch(name):
continue
check_for_name(name)
@ -152,6 +139,14 @@ def scan_file_for_docs(filename: str, text: str) -> tuple[list[str], list[str]]:
def main() -> None:
print("Gathering C API names from docs...")
names = set()
for path in C_API_DOCS.glob('**/*.rst'):
text = path.read_text(encoding="utf-8")
for name in API_NAME_REGEX.findall(text):
names.add(name)
print(f"Got {len(names)} names!")
print("Scanning for undocumented C API functions...")
files = [*INCLUDE.iterdir(), *(INCLUDE / "cpython").iterdir()]
all_missing: list[str] = []
@ -162,7 +157,7 @@ def main() -> None:
continue
assert file.exists()
text = file.read_text(encoding="utf-8")
missing, ignored = scan_file_for_docs(str(file.relative_to(INCLUDE)), text)
missing, ignored = scan_file_for_docs(str(file.relative_to(INCLUDE)), text, names)
all_found_ignored += ignored
all_missing += missing