mirror of
https://github.com/python/cpython.git
synced 2026-01-27 05:05:50 +00:00
gh-141004: Improve make check-c-api-docs (GH-143564)
- Gather all documented names into a set in a single pass. This makes the check much faster. - Do not match substrings (e.g. documenting `PyErr_WarnEx` doesn't mean that `PyErr_Warn` is documented) - Consider `PY`-prefixed names (a lot of old macros use this)
This commit is contained in:
parent
fd6d41b292
commit
234a15dc4e
@ -45,6 +45,27 @@ Py_LL
|
||||
Py_SAFE_DOWNCAST
|
||||
Py_ULL
|
||||
Py_VA_COPY
|
||||
PYLONG_BITS_IN_DIGIT
|
||||
PY_DWORD_MAX
|
||||
PY_FORMAT_SIZE_T
|
||||
PY_INT32_T
|
||||
PY_INT64_T
|
||||
PY_LITTLE_ENDIAN
|
||||
PY_LLONG_MAX
|
||||
PY_LLONG_MIN
|
||||
PY_LONG_LONG
|
||||
PY_SIZE_MAX
|
||||
PY_UINT32_T
|
||||
PY_UINT64_T
|
||||
PY_ULLONG_MAX
|
||||
# patchlevel.h
|
||||
PYTHON_ABI_STRING
|
||||
PYTHON_API_STRING
|
||||
PY_RELEASE_LEVEL_ALPHA
|
||||
PY_RELEASE_LEVEL_BETA
|
||||
PY_RELEASE_LEVEL_FINAL
|
||||
PY_RELEASE_LEVEL_GAMMA
|
||||
PY_VERSION
|
||||
# unicodeobject.h
|
||||
Py_UNICODE_SIZE
|
||||
# cpython/methodobject.h
|
||||
@ -91,3 +112,39 @@ Py_FrozenMain
|
||||
# cpython/unicodeobject.h
|
||||
PyUnicode_IS_COMPACT
|
||||
PyUnicode_IS_COMPACT_ASCII
|
||||
# pythonrun.h
|
||||
PyErr_Display
|
||||
# cpython/objimpl.h
|
||||
PyObject_GET_WEAKREFS_LISTPTR
|
||||
# cpython/pythonrun.h
|
||||
PyOS_Readline
|
||||
# cpython/warnings.h
|
||||
PyErr_Warn
|
||||
# fileobject.h
|
||||
PY_STDIOTEXTMODE
|
||||
# structmember.h
|
||||
PY_WRITE_RESTRICTED
|
||||
# pythread.h
|
||||
PY_TIMEOUT_T
|
||||
PY_TIMEOUT_MAX
|
||||
# cpython/pyctype.h
|
||||
PY_CTF_ALNUM
|
||||
PY_CTF_ALPHA
|
||||
PY_CTF_DIGIT
|
||||
PY_CTF_LOWER
|
||||
PY_CTF_SPACE
|
||||
PY_CTF_UPPER
|
||||
PY_CTF_XDIGIT
|
||||
# cpython/code.h
|
||||
PY_DEF_EVENT
|
||||
PY_FOREACH_CODE_EVENT
|
||||
# cpython/funcobject.h
|
||||
PY_DEF_EVENT
|
||||
PY_FOREACH_FUNC_EVENT
|
||||
# cpython/monitoring.h
|
||||
PY_MONITORING_EVENT_BRANCH
|
||||
# cpython/dictobject.h
|
||||
PY_DEF_EVENT
|
||||
PY_FOREACH_DICT_EVENT
|
||||
# cpython/pystats.h
|
||||
PYSTATS_MAX_UOP_ID
|
||||
|
||||
@ -8,6 +8,7 @@ SIMPLE_FUNCTION_REGEX = re.compile(r"PyAPI_FUNC(.+) (\w+)\(")
|
||||
SIMPLE_MACRO_REGEX = re.compile(r"# *define *(\w+)(\(.+\))? ")
|
||||
SIMPLE_INLINE_REGEX = re.compile(r"static inline .+( |\n)(\w+)")
|
||||
SIMPLE_DATA_REGEX = re.compile(r"PyAPI_DATA\(.+\) (\w+)")
|
||||
API_NAME_REGEX = re.compile(r'\bP[yY][a-zA-Z0-9_]+')
|
||||
|
||||
CPYTHON = Path(__file__).parent.parent.parent
|
||||
INCLUDE = CPYTHON / "Include"
|
||||
@ -72,24 +73,10 @@ def found_ignored_documented(singular: bool) -> str:
|
||||
)
|
||||
|
||||
|
||||
def is_documented(name: str) -> bool:
|
||||
"""
|
||||
Is a name present in the C API documentation?
|
||||
"""
|
||||
for path in C_API_DOCS.iterdir():
|
||||
if path.is_dir():
|
||||
continue
|
||||
if path.suffix != ".rst":
|
||||
continue
|
||||
|
||||
text = path.read_text(encoding="utf-8")
|
||||
if name in text:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def scan_file_for_docs(filename: str, text: str) -> tuple[list[str], list[str]]:
|
||||
def scan_file_for_docs(
|
||||
filename: str,
|
||||
text: str,
|
||||
names: set[str]) -> tuple[list[str], list[str]]:
|
||||
"""
|
||||
Scan a header file for C API functions.
|
||||
"""
|
||||
@ -98,7 +85,7 @@ def scan_file_for_docs(filename: str, text: str) -> tuple[list[str], list[str]]:
|
||||
colors = _colorize.get_colors()
|
||||
|
||||
def check_for_name(name: str) -> None:
|
||||
documented = is_documented(name)
|
||||
documented = name in names
|
||||
if documented and (name in IGNORED):
|
||||
documented_ignored.append(name)
|
||||
elif not documented and (name not in IGNORED):
|
||||
@ -106,14 +93,14 @@ def scan_file_for_docs(filename: str, text: str) -> tuple[list[str], list[str]]:
|
||||
|
||||
for function in SIMPLE_FUNCTION_REGEX.finditer(text):
|
||||
name = function.group(2)
|
||||
if not name.startswith("Py"):
|
||||
if not API_NAME_REGEX.fullmatch(name):
|
||||
continue
|
||||
|
||||
check_for_name(name)
|
||||
|
||||
for macro in SIMPLE_MACRO_REGEX.finditer(text):
|
||||
name = macro.group(1)
|
||||
if not name.startswith("Py"):
|
||||
if not API_NAME_REGEX.fullmatch(name):
|
||||
continue
|
||||
|
||||
if "(" in name:
|
||||
@ -123,14 +110,14 @@ def scan_file_for_docs(filename: str, text: str) -> tuple[list[str], list[str]]:
|
||||
|
||||
for inline in SIMPLE_INLINE_REGEX.finditer(text):
|
||||
name = inline.group(2)
|
||||
if not name.startswith("Py"):
|
||||
if not API_NAME_REGEX.fullmatch(name):
|
||||
continue
|
||||
|
||||
check_for_name(name)
|
||||
|
||||
for data in SIMPLE_DATA_REGEX.finditer(text):
|
||||
name = data.group(1)
|
||||
if not name.startswith("Py"):
|
||||
if not API_NAME_REGEX.fullmatch(name):
|
||||
continue
|
||||
|
||||
check_for_name(name)
|
||||
@ -152,6 +139,14 @@ def scan_file_for_docs(filename: str, text: str) -> tuple[list[str], list[str]]:
|
||||
|
||||
|
||||
def main() -> None:
|
||||
print("Gathering C API names from docs...")
|
||||
names = set()
|
||||
for path in C_API_DOCS.glob('**/*.rst'):
|
||||
text = path.read_text(encoding="utf-8")
|
||||
for name in API_NAME_REGEX.findall(text):
|
||||
names.add(name)
|
||||
print(f"Got {len(names)} names!")
|
||||
|
||||
print("Scanning for undocumented C API functions...")
|
||||
files = [*INCLUDE.iterdir(), *(INCLUDE / "cpython").iterdir()]
|
||||
all_missing: list[str] = []
|
||||
@ -162,7 +157,7 @@ def main() -> None:
|
||||
continue
|
||||
assert file.exists()
|
||||
text = file.read_text(encoding="utf-8")
|
||||
missing, ignored = scan_file_for_docs(str(file.relative_to(INCLUDE)), text)
|
||||
missing, ignored = scan_file_for_docs(str(file.relative_to(INCLUDE)), text, names)
|
||||
all_found_ignored += ignored
|
||||
all_missing += missing
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user