gh-142927: Hide _sync_coordinator frames from profiler output (#143337)

This commit is contained in:
Pablo Galindo Salgado 2026-01-02 12:09:36 +00:00 committed by GitHub
parent 315f474d11
commit 09ce592499
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 164 additions and 3 deletions

View File

@ -6,6 +6,7 @@ from .constants import (
THREAD_STATUS_GIL_REQUESTED,
THREAD_STATUS_UNKNOWN,
THREAD_STATUS_HAS_EXCEPTION,
_INTERNAL_FRAME_SUFFIXES,
)
try:
@ -42,6 +43,25 @@ def extract_lineno(location):
return 0
return location[0]
def _is_internal_frame(frame):
if isinstance(frame, tuple):
filename = frame[0] if frame else ""
else:
filename = getattr(frame, "filename", "")
if not filename:
return False
return filename.endswith(_INTERNAL_FRAME_SUFFIXES)
def filter_internal_frames(frames):
if not frames:
return frames
return [f for f in frames if not _is_internal_frame(f)]
class Collector(ABC):
@abstractmethod
def collect(self, stack_frames, timestamps_us=None):
@ -63,6 +83,10 @@ class Collector(ABC):
def export(self, filename):
"""Export collected data to a file."""
@staticmethod
def _filter_internal_frames(frames):
return filter_internal_frames(frames)
def _iter_all_frames(self, stack_frames, skip_idle=False):
for interpreter_info in stack_frames:
for thread_info in interpreter_info.threads:
@ -76,7 +100,10 @@ class Collector(ABC):
continue
frames = thread_info.frame_info
if frames:
yield frames, thread_info.thread_id
# Filter out internal profiler frames from the bottom of the stack
frames = self._filter_internal_frames(frames)
if frames:
yield frames, thread_info.thread_id
def _iter_async_frames(self, awaited_info_list):
# Phase 1: Index tasks and build parent relationships with pre-computed selection

View File

@ -23,6 +23,12 @@ SORT_MODE_NSAMPLES_CUMUL = 5
# Format: (lineno, end_lineno, col_offset, end_col_offset)
DEFAULT_LOCATION = (0, 0, -1, -1)
# Internal frame path suffixes to filter from profiling output
# These are internal profiler modules that should not appear in user-facing output
_INTERNAL_FRAME_SUFFIXES = (
"_sync_coordinator.py",
)
# Thread status flags
try:
from _remote_debugging import (

View File

@ -6,7 +6,7 @@ import sys
import threading
import time
from .collector import Collector
from .collector import Collector, filter_internal_frames
from .opcode_utils import get_opcode_info, format_opcode
try:
from _remote_debugging import THREAD_STATUS_HAS_GIL, THREAD_STATUS_ON_CPU, THREAD_STATUS_UNKNOWN, THREAD_STATUS_GIL_REQUESTED, THREAD_STATUS_HAS_EXCEPTION
@ -172,7 +172,7 @@ class GeckoCollector(Collector):
# Process threads
for interpreter_info in stack_frames:
for thread_info in interpreter_info.threads:
frames = thread_info.frame_info
frames = filter_internal_frames(thread_info.frame_info)
tid = thread_info.thread_id
# Initialize thread if needed

View File

@ -1823,3 +1823,131 @@ class TestCollectorFrameFormat(unittest.TestCase):
thread = profile["threads"][0]
# Should have recorded 3 functions
self.assertEqual(thread["funcTable"]["length"], 3)
class TestInternalFrameFiltering(unittest.TestCase):
"""Tests for filtering internal profiler frames from output."""
def test_filter_internal_frames(self):
"""Test that _sync_coordinator frames are filtered from anywhere in stack."""
from profiling.sampling.collector import filter_internal_frames
# Stack with _sync_coordinator in the middle (realistic scenario)
frames = [
MockFrameInfo("user_script.py", 10, "user_func"),
MockFrameInfo("/path/to/_sync_coordinator.py", 100, "main"),
MockFrameInfo("<frozen runpy>", 87, "_run_code"),
]
filtered = filter_internal_frames(frames)
self.assertEqual(len(filtered), 2)
self.assertEqual(filtered[0].filename, "user_script.py")
self.assertEqual(filtered[1].filename, "<frozen runpy>")
def test_pstats_collector_filters_internal_frames(self):
"""Test that PstatsCollector filters out internal frames."""
collector = PstatsCollector(sample_interval_usec=1000)
frames = [
MockInterpreterInfo(
0,
[
MockThreadInfo(
1,
[
MockFrameInfo("user_script.py", 10, "user_func"),
MockFrameInfo("/path/to/_sync_coordinator.py", 100, "main"),
MockFrameInfo("<frozen runpy>", 87, "_run_code"),
],
status=THREAD_STATUS_HAS_GIL,
)
],
)
]
collector.collect(frames)
self.assertEqual(len(collector.result), 2)
self.assertIn(("user_script.py", 10, "user_func"), collector.result)
self.assertIn(("<frozen runpy>", 87, "_run_code"), collector.result)
def test_gecko_collector_filters_internal_frames(self):
"""Test that GeckoCollector filters out internal frames."""
collector = GeckoCollector(sample_interval_usec=1000)
frames = [
MockInterpreterInfo(
0,
[
MockThreadInfo(
1,
[
MockFrameInfo("app.py", 50, "run"),
MockFrameInfo("/lib/_sync_coordinator.py", 100, "main"),
],
status=THREAD_STATUS_HAS_GIL,
)
],
)
]
collector.collect(frames)
profile = collector._build_profile()
string_array = profile["shared"]["stringArray"]
# Should not contain _sync_coordinator functions
for s in string_array:
self.assertNotIn("_sync_coordinator", s)
def test_flamegraph_collector_filters_internal_frames(self):
"""Test that FlamegraphCollector filters out internal frames."""
collector = FlamegraphCollector(sample_interval_usec=1000)
frames = [
MockInterpreterInfo(
0,
[
MockThreadInfo(
1,
[
MockFrameInfo("app.py", 50, "run"),
MockFrameInfo("/lib/_sync_coordinator.py", 100, "main"),
MockFrameInfo("<frozen runpy>", 87, "_run_code"),
],
status=THREAD_STATUS_HAS_GIL,
)
],
)
]
collector.collect(frames)
data = collector._convert_to_flamegraph_format()
strings = data.get("strings", [])
for s in strings:
self.assertNotIn("_sync_coordinator", s)
def test_collapsed_stack_collector_filters_internal_frames(self):
"""Test that CollapsedStackCollector filters out internal frames."""
collector = CollapsedStackCollector(sample_interval_usec=1000)
frames = [
MockInterpreterInfo(
0,
[
MockThreadInfo(
1,
[
MockFrameInfo("app.py", 50, "run"),
MockFrameInfo("/lib/_sync_coordinator.py", 100, "main"),
],
status=THREAD_STATUS_HAS_GIL,
)
],
)
]
collector.collect(frames)
# Check that no stack contains _sync_coordinator
for (call_tree, _), _ in collector.stack_counter.items():
for filename, _, _ in call_tree:
self.assertNotIn("_sync_coordinator", filename)