From fcfb002f8bced5872923292296f82695249ee832 Mon Sep 17 00:00:00 2001 From: maurycy <5383+maurycy@users.noreply.github.com> Date: Wed, 3 Jun 2026 13:46:33 +0200 Subject: [PATCH 1/9] taking a stab --- Lib/profiling/sampling/gecko_collector.py | 347 +++++++++++++++++----- 1 file changed, 266 insertions(+), 81 deletions(-) diff --git a/Lib/profiling/sampling/gecko_collector.py b/Lib/profiling/sampling/gecko_collector.py index 54392af95000082..dd899c0d745a0f3 100644 --- a/Lib/profiling/sampling/gecko_collector.py +++ b/Lib/profiling/sampling/gecko_collector.py @@ -1,8 +1,11 @@ +import array import itertools +import io import json import os import platform import sys +import tempfile import threading import time @@ -61,6 +64,102 @@ PROCESS_TYPE_MAIN = 0 STACKWALK_DISABLED = 0 +# In-memory buffer before spilling to disk +DEFAULT_SPILL_BUFFER_BYTES = 128 * 1024 + + +class TypedSpillColumn: + def __init__(self, directory, basename, typecode, *, + buffer_bytes=DEFAULT_SPILL_BUFFER_BYTES): + self.path = os.path.join(directory, basename) + self.buffer = array.array(typecode) + self.max_items = max(1, buffer_bytes // self.buffer.itemsize) + + def append(self, value): + self.buffer.append(value) + if len(self.buffer) >= self.max_items: + self.flush() + + def flush(self): + with open(self.path, "ab") as file: + self.buffer.tofile(file) + self.buffer.clear() + + def iter_chunks(self): + typecode = self.buffer.typecode + block_bytes = self.max_items * self.buffer.itemsize + with open(self.path, "rb") as file: + for block in iter(lambda: file.read(block_bytes), b""): + chunk = array.array(typecode) + chunk.frombytes(block) + yield chunk + + +class NDJSONSpillColumn: + _encoder = json.JSONEncoder(separators=(",", ":")) + + def __init__(self, directory, basename, *, + buffer_bytes=DEFAULT_SPILL_BUFFER_BYTES): + self.path = os.path.join(directory, basename) + self.buffer = bytearray() + self._buffer_bytes = buffer_bytes + + def append_object(self, data): + self.buffer += (self._encoder.encode(data) + "\n").encode() + if len(self.buffer) >= self._buffer_bytes: + self.flush() + + def flush(self): + with open(self.path, "ab") as file: + file.write(self.buffer) + self.buffer.clear() + + def iter_lines(self): + with open(self.path) as file: + for line in file: + yield line.rstrip("\n") + + +class GeckoThreadSpill: + _TYPED_COLUMNS = ( + ("samples_stack", "samples-stack.bin", "q"), + ("samples_time", "samples-time.bin", "d"), + ("markers_name", "markers-name.bin", "q"), + ("markers_start_time", "markers-start-time.bin", "d"), + ("markers_end_time", "markers-end-time.bin", "d"), + ("markers_phase", "markers-phase.bin", "B"), + ("markers_category", "markers-category.bin", "I"), + ) + + def __init__(self, directory, tid): + prefix = f"thread-{tid}-" + for attr, basename, typecode in self._TYPED_COLUMNS: + setattr(self, attr, TypedSpillColumn( + directory, prefix + basename, typecode)) + self.markers_data = NDJSONSpillColumn( + directory, prefix + "markers-data.ndjson") + self.sample_count = 0 + self.marker_count = 0 + + def append_sample(self, stack_index, time_ms): + self.samples_stack.append(stack_index) + self.samples_time.append(time_ms) + self.sample_count += 1 + + def append_marker(self, name_idx, start_time, end_time, phase, category, data): + self.markers_name.append(name_idx) + self.markers_start_time.append(start_time) + self.markers_end_time.append(end_time) + self.markers_phase.append(phase) + self.markers_category.append(category) + self.markers_data.append_object(data) + self.marker_count += 1 + + def prepare_read(self): + for attr, _basename, _typecode in self._TYPED_COLUMNS: + getattr(self, attr).flush() + self.markers_data.flush() + class GeckoCollector(Collector): aggregating = True @@ -77,6 +176,9 @@ def __init__(self, sample_interval_usec, *, skip_idle=False, opcodes=False): # Per-thread data structures self.threads = {} # tid -> thread data + self.spill_dir = None + self.thread_spills = {} + self.exported = False # Global tables self.libs = [] @@ -151,6 +253,9 @@ def collect(self, stack_frames, timestamps_us=None): stack_frames: List of interpreter/thread frame info timestamps_us: List of timestamps in microseconds (None for live sampling) """ + if self.exported: + raise RuntimeError("cannot append to GeckoCollector after export") + # Handle live sampling (no timestamps provided) if timestamps_us is None: current_time = (time.monotonic() * 1000) - self.start_time @@ -259,15 +364,9 @@ def collect(self, stack_frames, timestamps_us=None): stack_index = self._process_stack(thread_data, frames) # Add samples with timestamps - samples = thread_data["samples"] - samples_stack = samples["stack"] - samples_time = samples["time"] - samples_delay = samples["eventDelay"] - + thread_spill = self.thread_spills[tid] for t in times: - samples_stack.append(stack_index) - samples_time.append(t) - samples_delay.append(None) + thread_spill.append_sample(stack_index, t) # Handle opcodes if self.opcodes_enabled and frames: @@ -294,6 +393,10 @@ def collect(self, stack_frames, timestamps_us=None): def _create_thread(self, tid, is_main_thread): """Create a new thread structure with processed profile format.""" + if self.spill_dir is None: + self.spill_dir = tempfile.TemporaryDirectory() + + self.thread_spills[tid] = GeckoThreadSpill(self.spill_dir.name, tid) thread = { "name": f"Thread-{tid}", @@ -307,15 +410,6 @@ def _create_thread(self, tid, is_main_thread): "tid": tid, "processType": "default", "processName": "Python Process", - # Sample data - processed format with direct arrays - "samples": { - "stack": [], - "time": [], - "eventDelay": [], - "weight": None, - "weightType": "samples", - "length": 0, # Will be updated on export - }, # Stack table - processed format "stackTable": { "frame": [], @@ -366,16 +460,6 @@ def _create_thread(self, tid, is_main_thread): "functionSize": [], "length": 0, }, - # Markers - processed format (arrays) - "markers": { - "data": [], - "name": [], - "startTime": [], - "endTime": [], - "phase": [], - "category": [], - "length": 0, - }, # Caches for deduplication "_stackCache": {}, "_frameCache": {}, @@ -405,17 +489,10 @@ def _add_marker(self, tid, name, start_time, end_time, category): if tid not in self.threads: return - thread_data = self.threads[tid] duration = end_time - start_time name_idx = self._intern_string(name) - markers = thread_data["markers"] - markers["name"].append(name_idx) - markers["startTime"].append(start_time) - markers["endTime"].append(end_time) - markers["phase"].append(1) # 1 = interval marker - markers["category"].append(category) - markers["data"].append({ + self.thread_spills[tid].append_marker(name_idx, start_time, end_time, 1, category, { "type": name.replace(" ", ""), "duration": duration, "tid": tid @@ -426,20 +503,13 @@ def _add_opcode_interval_marker(self, tid, opcode, lineno, col_offset, funcname, if tid not in self.threads or opcode is None: return - thread_data = self.threads[tid] opcode_info = get_opcode_info(opcode) # Use formatted opcode name (with base opcode for specialized ones) formatted_opname = format_opcode(opcode) name_idx = self._intern_string(formatted_opname) - markers = thread_data["markers"] - markers["name"].append(name_idx) - markers["startTime"].append(start_time) - markers["endTime"].append(end_time) - markers["phase"].append(1) # 1 = interval marker - markers["category"].append(CATEGORY_OPCODES) - markers["data"].append({ + self.thread_spills[tid].append_marker(name_idx, start_time, end_time, 1, CATEGORY_OPCODES, { "type": "Opcode", "opcode": opcode, "opname": formatted_opname, @@ -660,7 +730,6 @@ def _finalize_markers(self): def export(self, filename): """Export the profile to a Gecko JSON file.""" - if self.sample_count > 0 and self.last_sample_time > 0: self.interval = self.last_sample_time / self.sample_count @@ -681,19 +750,31 @@ def spin(): spinner_thread = threading.Thread(target=spin, daemon=True) spinner_thread.start() + temp_path = None + replaced = False try: - # Finalize any open markers before building profile - self._finalize_markers() - - profile = self._build_profile() - - with open(filename, "w") as f: - json.dump(profile, f, separators=(",", ":")) + self._prepare_for_serialization() + output_dir = os.path.dirname(os.path.abspath(filename)) or "." + with tempfile.NamedTemporaryFile( + "w", dir=output_dir, delete=False + ) as file: + temp_path = file.name + self._stream_profile(file) + os.replace(temp_path, filename) + replaced = True finally: + self.exported = True stop_spinner.set() spinner_thread.join(timeout=1.0) # Small delay to ensure the clear happens time.sleep(0.01) + if temp_path is not None and not replaced: + try: + os.unlink(temp_path) + except FileNotFoundError: + pass + if self.spill_dir is not None: + self.spill_dir.cleanup() print(f"Gecko profile written to {filename}") print( @@ -727,34 +808,18 @@ def _build_marker_schema(self): def _build_profile(self): """Build the complete profile structure in processed format.""" - # Convert thread data to final format - threads = [] - - for tid, thread_data in self.threads.items(): - # Update lengths - samples = thread_data["samples"] - stack_table = thread_data["stackTable"] - frame_table = thread_data["frameTable"] - func_table = thread_data["funcTable"] - resource_table = thread_data["resourceTable"] - - samples["length"] = len(samples["stack"]) - stack_table["length"] = len(stack_table["frame"]) - frame_table["length"] = len(frame_table["func"]) - func_table["length"] = len(func_table["name"]) - resource_table["length"] = len(resource_table["name"]) - thread_data["markers"]["length"] = len(thread_data["markers"]["name"]) - - # Clean up internal caches - del thread_data["_stackCache"] - del thread_data["_frameCache"] - del thread_data["_funcCache"] - del thread_data["_resourceCache"] - - threads.append(thread_data) - - # Main profile structure in processed format - profile = { + try: + self._prepare_for_serialization() + file = io.StringIO() + self._stream_profile(file) + return json.loads(file.getvalue()) + finally: + self.exported = True + if self.spill_dir is not None: + self.spill_dir.cleanup() + + def _profile_head(self): + return { "meta": { "interval": self.interval, "startTime": self.start_time, @@ -784,7 +849,10 @@ def _build_profile(self): }, }, "libs": self.libs, - "threads": threads, + } + + def _profile_tail(self): + return { "pages": [], "shared": { "stringArray": self.global_strings, @@ -792,4 +860,121 @@ def _build_profile(self): }, } - return profile + def _prepare_for_serialization(self): + if self.exported: + raise RuntimeError("GeckoCollector has already been exported") + self._finalize_markers() + for spill in self.thread_spills.values(): + spill.prepare_read() + for thread_data in self.threads.values(): + thread_data["stackTable"]["length"] = len(thread_data["stackTable"]["frame"]) + thread_data["frameTable"]["length"] = len(thread_data["frameTable"]["func"]) + thread_data["funcTable"]["length"] = len(thread_data["funcTable"]["name"]) + thread_data["resourceTable"]["length"] = len(thread_data["resourceTable"]["name"]) + + def _stream_profile(self, file): + head = json.dumps( + self._profile_head(), separators=(",", ":"), allow_nan=False + )[1:-1] + tail = json.dumps( + self._profile_tail(), separators=(",", ":"), allow_nan=False + )[1:-1] + file.write("{") + file.write(head) + file.write(',"threads":[') + for index, (tid, thread_data) in enumerate(self.threads.items()): + if index: + file.write(",") + self._stream_thread(file, tid, thread_data) + file.write("],") + file.write(tail) + file.write("}") + + def _stream_thread(self, file, tid, thread_data): + spill = self.thread_spills[tid] + metadata = { + "name": thread_data["name"], + "isMainThread": thread_data["isMainThread"], + "processStartupTime": thread_data["processStartupTime"], + "processShutdownTime": thread_data["processShutdownTime"], + "registerTime": thread_data["registerTime"], + "unregisterTime": thread_data["unregisterTime"], + "pausedRanges": thread_data["pausedRanges"], + "pid": thread_data["pid"], + "tid": thread_data["tid"], + "processType": thread_data["processType"], + "processName": thread_data["processName"], + } + file.write("{") + file.write(json.dumps(metadata, separators=(",", ":"), allow_nan=False)[1:-1]) + file.write(',"samples":') + self._stream_samples(file, spill) + for key in ( + "stackTable", + "frameTable", + "funcTable", + "resourceTable", + "nativeSymbols", + ): + file.write(',"') + file.write(key) + file.write('":') + file.write(json.dumps( + thread_data[key], separators=(",", ":"), allow_nan=False + )) + file.write(',"markers":') + self._stream_markers(file, spill) + file.write("}") + + def _stream_samples(self, file, spill): + file.write('{"stack":') + _stream_array(file, _tokens(spill.samples_stack), spill.sample_count) + file.write(',"time":') + _stream_array(file, _tokens(spill.samples_time), spill.sample_count) + file.write(',"eventDelay":') + _stream_array( + file, + ("null" for _ in range(spill.sample_count)), + spill.sample_count, + ) + file.write(',"weight":null,"weightType":"samples","length":') + file.write(repr(spill.sample_count)) + file.write("}") + + def _stream_markers(self, file, spill): + file.write('{"data":') + _stream_array(file, spill.markers_data.iter_lines(), spill.marker_count) + file.write(',"name":') + _stream_array(file, _tokens(spill.markers_name), spill.marker_count) + file.write(',"startTime":') + _stream_array(file, _tokens(spill.markers_start_time), spill.marker_count) + file.write(',"endTime":') + _stream_array(file, _tokens(spill.markers_end_time), spill.marker_count) + file.write(',"phase":') + _stream_array(file, _tokens(spill.markers_phase), spill.marker_count) + file.write(',"category":') + _stream_array(file, _tokens(spill.markers_category), spill.marker_count) + file.write(',"length":') + file.write(repr(spill.marker_count)) + file.write("}") + + +def _stream_array(file, token_iter, expected_count): + file.write("[") + count = 0 + for token in token_iter: + if count: + file.write(",") + file.write(token) + count += 1 + if count != expected_count: + raise RuntimeError( + f"streamed {count} array items, expected {expected_count}" + ) + file.write("]") + + +def _tokens(column): + for chunk in column.iter_chunks(): + for value in chunk: + yield repr(value) From 57694cb972ba6cb2f031781932a0b82f95d7efaa Mon Sep 17 00:00:00 2001 From: maurycy <5383+maurycy@users.noreply.github.com> Date: Wed, 3 Jun 2026 13:51:38 +0200 Subject: [PATCH 2/9] news --- .../Library/2026-06-03-13-51-29.gh-issue-150662.ELT8Vg.rst | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2026-06-03-13-51-29.gh-issue-150662.ELT8Vg.rst diff --git a/Misc/NEWS.d/next/Library/2026-06-03-13-51-29.gh-issue-150662.ELT8Vg.rst b/Misc/NEWS.d/next/Library/2026-06-03-13-51-29.gh-issue-150662.ELT8Vg.rst new file mode 100644 index 000000000000000..42ed6ad7cd3c65f --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-06-03-13-51-29.gh-issue-150662.ELT8Vg.rst @@ -0,0 +1,4 @@ +Fix the ``--gecko`` collector in :mod:`profiling.sampling` that kept every +sample in memory. It now writes sample and marker data to temporary files +and reads them back, ultimately building the output file at the end. Patch +by Pablo Galindo and Maurycy Pawłowski-Wieroński. From e0805d01a604f740d61b7527ecaeff0bb52343cf Mon Sep 17 00:00:00 2001 From: maurycy <5383+maurycy@users.noreply.github.com> Date: Wed, 3 Jun 2026 13:52:50 +0200 Subject: [PATCH 3/9] test --- .../test_profiling/test_sampling_profiler/test_collectors.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py index 390a1479fdd2975..8d196a5a334ff8d 100644 --- a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py +++ b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py @@ -2702,10 +2702,7 @@ def test_gecko_opcode_state_change_emits_marker(self): collector.collect(frames2) # Should have emitted a marker for the first opcode - thread_data = collector.threads[1] - markers = thread_data["markers"] - # At least one marker should have been added - self.assertGreater(len(markers["name"]), 0) + self.assertGreater(collector.thread_spills[1].marker_count, 0) def test_gecko_opcode_markers_not_emitted_when_disabled(self): """Test that no opcode markers when opcodes=False.""" From 866b8fde2ff38aeba069d070a613b308d8ba7dca Mon Sep 17 00:00:00 2001 From: maurycy <5383+maurycy@users.noreply.github.com> Date: Wed, 3 Jun 2026 15:15:20 +0200 Subject: [PATCH 4/9] this? --- .../test_profiling/test_sampling_profiler/test_collectors.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py index 8d196a5a334ff8d..e02b6c07bd4427f 100644 --- a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py +++ b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py @@ -2659,6 +2659,7 @@ def test_gecko_collector_opcodes_enabled(self): def test_gecko_opcode_state_tracking(self): """Test that GeckoCollector tracks opcode state changes.""" collector = GeckoCollector(sample_interval_usec=1000, opcodes=True) + self.addCleanup(lambda: collector.spill_dir.cleanup()) # First sample with opcode 90 (RAISE_VARARGS) frame1 = MockFrameInfo("test.py", 10, "func", opcode=90) @@ -2680,6 +2681,7 @@ def test_gecko_opcode_state_tracking(self): def test_gecko_opcode_state_change_emits_marker(self): """Test that opcode state change emits an interval marker.""" collector = GeckoCollector(sample_interval_usec=1000, opcodes=True) + self.addCleanup(lambda: collector.spill_dir.cleanup()) # First sample: opcode 90 frame1 = MockFrameInfo("test.py", 10, "func", opcode=90) @@ -2707,6 +2709,7 @@ def test_gecko_opcode_state_change_emits_marker(self): def test_gecko_opcode_markers_not_emitted_when_disabled(self): """Test that no opcode markers when opcodes=False.""" collector = GeckoCollector(sample_interval_usec=1000, opcodes=False) + self.addCleanup(lambda: collector.spill_dir.cleanup()) frame1 = MockFrameInfo("test.py", 10, "func", opcode=90) frames1 = [ @@ -2732,6 +2735,7 @@ def test_gecko_opcode_markers_not_emitted_when_disabled(self): def test_gecko_opcode_with_none_opcode(self): """Test that None opcode doesn't cause issues.""" collector = GeckoCollector(sample_interval_usec=1000, opcodes=True) + self.addCleanup(lambda: collector.spill_dir.cleanup()) # Frame with no opcode (None) frame = MockFrameInfo("test.py", 10, "func", opcode=None) From a23814bbda140f983c6f76250136e9bfbcc20816 Mon Sep 17 00:00:00 2001 From: maurycy <5383+maurycy@users.noreply.github.com> Date: Wed, 3 Jun 2026 15:30:33 +0200 Subject: [PATCH 5/9] now? --- .../test_profiling/test_sampling_profiler/test_collectors.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py index e02b6c07bd4427f..9fe0d8f1bb66ff1 100644 --- a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py +++ b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py @@ -624,6 +624,8 @@ def test_gecko_collector_export(self): """Test Gecko profile export functionality.""" gecko_out = tempfile.NamedTemporaryFile(suffix=".json", delete=False) self.addCleanup(close_and_unlink, gecko_out) + # We cannot overwrite an open file on Windows. + gecko_out.close() collector = GeckoCollector(1000) From 8d5c68779d658b9b6197411300550c7dbebcf91a Mon Sep 17 00:00:00 2001 From: maurycy <5383+maurycy@users.noreply.github.com> Date: Thu, 4 Jun 2026 11:57:18 +0200 Subject: [PATCH 6/9] don't be too smart. bye `array` --- Lib/profiling/sampling/gecko_collector.py | 103 +++++++++------------- 1 file changed, 41 insertions(+), 62 deletions(-) diff --git a/Lib/profiling/sampling/gecko_collector.py b/Lib/profiling/sampling/gecko_collector.py index dd899c0d745a0f3..36c47d64298fc1f 100644 --- a/Lib/profiling/sampling/gecko_collector.py +++ b/Lib/profiling/sampling/gecko_collector.py @@ -1,4 +1,3 @@ -import array import itertools import io import json @@ -68,34 +67,7 @@ DEFAULT_SPILL_BUFFER_BYTES = 128 * 1024 -class TypedSpillColumn: - def __init__(self, directory, basename, typecode, *, - buffer_bytes=DEFAULT_SPILL_BUFFER_BYTES): - self.path = os.path.join(directory, basename) - self.buffer = array.array(typecode) - self.max_items = max(1, buffer_bytes // self.buffer.itemsize) - - def append(self, value): - self.buffer.append(value) - if len(self.buffer) >= self.max_items: - self.flush() - - def flush(self): - with open(self.path, "ab") as file: - self.buffer.tofile(file) - self.buffer.clear() - - def iter_chunks(self): - typecode = self.buffer.typecode - block_bytes = self.max_items * self.buffer.itemsize - with open(self.path, "rb") as file: - for block in iter(lambda: file.read(block_bytes), b""): - chunk = array.array(typecode) - chunk.frombytes(block) - yield chunk - - -class NDJSONSpillColumn: +class SpillColumn: _encoder = json.JSONEncoder(separators=(",", ":")) def __init__(self, directory, basename, *, @@ -104,8 +76,8 @@ def __init__(self, directory, basename, *, self.buffer = bytearray() self._buffer_bytes = buffer_bytes - def append_object(self, data): - self.buffer += (self._encoder.encode(data) + "\n").encode() + def append(self, value): + self.buffer += (self._encoder.encode(value) + "\n").encode() if len(self.buffer) >= self._buffer_bytes: self.flush() @@ -114,30 +86,28 @@ def flush(self): file.write(self.buffer) self.buffer.clear() - def iter_lines(self): + def iter_tokens(self): with open(self.path) as file: for line in file: yield line.rstrip("\n") class GeckoThreadSpill: - _TYPED_COLUMNS = ( - ("samples_stack", "samples-stack.bin", "q"), - ("samples_time", "samples-time.bin", "d"), - ("markers_name", "markers-name.bin", "q"), - ("markers_start_time", "markers-start-time.bin", "d"), - ("markers_end_time", "markers-end-time.bin", "d"), - ("markers_phase", "markers-phase.bin", "B"), - ("markers_category", "markers-category.bin", "I"), + _COLUMNS = ( + ("samples_stack", "samples-stack.json"), + ("samples_time", "samples-time.json"), + ("markers_name", "markers-name.json"), + ("markers_start_time", "markers-start-time.json"), + ("markers_end_time", "markers-end-time.json"), + ("markers_phase", "markers-phase.json"), + ("markers_category", "markers-category.json"), + ("markers_data", "markers-data.json"), ) def __init__(self, directory, tid): prefix = f"thread-{tid}-" - for attr, basename, typecode in self._TYPED_COLUMNS: - setattr(self, attr, TypedSpillColumn( - directory, prefix + basename, typecode)) - self.markers_data = NDJSONSpillColumn( - directory, prefix + "markers-data.ndjson") + for attr, basename in self._COLUMNS: + setattr(self, attr, SpillColumn(directory, prefix + basename)) self.sample_count = 0 self.marker_count = 0 @@ -152,13 +122,12 @@ def append_marker(self, name_idx, start_time, end_time, phase, category, data): self.markers_end_time.append(end_time) self.markers_phase.append(phase) self.markers_category.append(category) - self.markers_data.append_object(data) + self.markers_data.append(data) self.marker_count += 1 def prepare_read(self): - for attr, _basename, _typecode in self._TYPED_COLUMNS: + for attr, _basename in self._COLUMNS: getattr(self, attr).flush() - self.markers_data.flush() class GeckoCollector(Collector): @@ -928,9 +897,13 @@ def _stream_thread(self, file, tid, thread_data): def _stream_samples(self, file, spill): file.write('{"stack":') - _stream_array(file, _tokens(spill.samples_stack), spill.sample_count) + _stream_array( + file, spill.samples_stack.iter_tokens(), spill.sample_count + ) file.write(',"time":') - _stream_array(file, _tokens(spill.samples_time), spill.sample_count) + _stream_array( + file, spill.samples_time.iter_tokens(), spill.sample_count + ) file.write(',"eventDelay":') _stream_array( file, @@ -943,17 +916,29 @@ def _stream_samples(self, file, spill): def _stream_markers(self, file, spill): file.write('{"data":') - _stream_array(file, spill.markers_data.iter_lines(), spill.marker_count) + _stream_array( + file, spill.markers_data.iter_tokens(), spill.marker_count + ) file.write(',"name":') - _stream_array(file, _tokens(spill.markers_name), spill.marker_count) + _stream_array( + file, spill.markers_name.iter_tokens(), spill.marker_count + ) file.write(',"startTime":') - _stream_array(file, _tokens(spill.markers_start_time), spill.marker_count) + _stream_array( + file, spill.markers_start_time.iter_tokens(), spill.marker_count + ) file.write(',"endTime":') - _stream_array(file, _tokens(spill.markers_end_time), spill.marker_count) + _stream_array( + file, spill.markers_end_time.iter_tokens(), spill.marker_count + ) file.write(',"phase":') - _stream_array(file, _tokens(spill.markers_phase), spill.marker_count) + _stream_array( + file, spill.markers_phase.iter_tokens(), spill.marker_count + ) file.write(',"category":') - _stream_array(file, _tokens(spill.markers_category), spill.marker_count) + _stream_array( + file, spill.markers_category.iter_tokens(), spill.marker_count + ) file.write(',"length":') file.write(repr(spill.marker_count)) file.write("}") @@ -972,9 +957,3 @@ def _stream_array(file, token_iter, expected_count): f"streamed {count} array items, expected {expected_count}" ) file.write("]") - - -def _tokens(column): - for chunk in column.iter_chunks(): - for value in chunk: - yield repr(value) From 2e5871602ff4edcd241c2ba1d3f2b4477008e07c Mon Sep 17 00:00:00 2001 From: maurycy <5383+maurycy@users.noreply.github.com> Date: Thu, 4 Jun 2026 12:01:52 +0200 Subject: [PATCH 7/9] explicit encoding --- Lib/profiling/sampling/gecko_collector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/profiling/sampling/gecko_collector.py b/Lib/profiling/sampling/gecko_collector.py index 36c47d64298fc1f..06bc02db726408b 100644 --- a/Lib/profiling/sampling/gecko_collector.py +++ b/Lib/profiling/sampling/gecko_collector.py @@ -87,7 +87,7 @@ def flush(self): self.buffer.clear() def iter_tokens(self): - with open(self.path) as file: + with open(self.path, encoding="utf-8") as file: for line in file: yield line.rstrip("\n") From 89695bf6ad7cb3ce21fb8a05f9ca4b7ae26f7d28 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Sat, 6 Jun 2026 02:53:15 +0100 Subject: [PATCH 8/9] fixup! don't be too smart. bye `array` --- Lib/profiling/sampling/gecko_collector.py | 209 ++++++++++++---------- 1 file changed, 115 insertions(+), 94 deletions(-) diff --git a/Lib/profiling/sampling/gecko_collector.py b/Lib/profiling/sampling/gecko_collector.py index 06bc02db726408b..361f6037f216fdc 100644 --- a/Lib/profiling/sampling/gecko_collector.py +++ b/Lib/profiling/sampling/gecko_collector.py @@ -65,19 +65,24 @@ # In-memory buffer before spilling to disk DEFAULT_SPILL_BUFFER_BYTES = 128 * 1024 +_JSON_SEPARATORS = (",", ":") +_JSON_ENCODER = json.JSONEncoder( + separators=_JSON_SEPARATORS, allow_nan=False +) class SpillColumn: - _encoder = json.JSONEncoder(separators=(",", ":")) - def __init__(self, directory, basename, *, - buffer_bytes=DEFAULT_SPILL_BUFFER_BYTES): + buffer_bytes=None): self.path = os.path.join(directory, basename) self.buffer = bytearray() - self._buffer_bytes = buffer_bytes + self._buffer_bytes = ( + DEFAULT_SPILL_BUFFER_BYTES if buffer_bytes is None + else buffer_bytes + ) def append(self, value): - self.buffer += (self._encoder.encode(value) + "\n").encode() + self.buffer += (_JSON_ENCODER.encode(value) + "\n").encode("utf-8") if len(self.buffer) >= self._buffer_bytes: self.flush() @@ -146,7 +151,6 @@ def __init__(self, sample_interval_usec, *, skip_idle=False, opcodes=False): # Per-thread data structures self.threads = {} # tid -> thread data self.spill_dir = None - self.thread_spills = {} self.exported = False # Global tables @@ -333,7 +337,7 @@ def collect(self, stack_frames, timestamps_us=None): stack_index = self._process_stack(thread_data, frames) # Add samples with timestamps - thread_spill = self.thread_spills[tid] + thread_spill = thread_data["_spill"] for t in times: thread_spill.append_sample(stack_index, t) @@ -365,8 +369,6 @@ def _create_thread(self, tid, is_main_thread): if self.spill_dir is None: self.spill_dir = tempfile.TemporaryDirectory() - self.thread_spills[tid] = GeckoThreadSpill(self.spill_dir.name, tid) - thread = { "name": f"Thread-{tid}", "isMainThread": is_main_thread, @@ -434,6 +436,7 @@ def _create_thread(self, tid, is_main_thread): "_frameCache": {}, "_funcCache": {}, "_resourceCache": {}, + "_spill": GeckoThreadSpill(self.spill_dir.name, tid), } return thread @@ -461,13 +464,16 @@ def _add_marker(self, tid, name, start_time, end_time, category): duration = end_time - start_time name_idx = self._intern_string(name) - self.thread_spills[tid].append_marker(name_idx, start_time, end_time, 1, category, { - "type": name.replace(" ", ""), - "duration": duration, - "tid": tid - }) + self.threads[tid]["_spill"].append_marker( + name_idx, start_time, end_time, 1, category, { + "type": name.replace(" ", ""), + "duration": duration, + "tid": tid, + } + ) - def _add_opcode_interval_marker(self, tid, opcode, lineno, col_offset, funcname, start_time, end_time): + def _add_opcode_interval_marker(self, tid, opcode, lineno, col_offset, + funcname, start_time, end_time): """Add an interval marker for opcode execution span.""" if tid not in self.threads or opcode is None: return @@ -478,17 +484,19 @@ def _add_opcode_interval_marker(self, tid, opcode, lineno, col_offset, funcname, name_idx = self._intern_string(formatted_opname) - self.thread_spills[tid].append_marker(name_idx, start_time, end_time, 1, CATEGORY_OPCODES, { - "type": "Opcode", - "opcode": opcode, - "opname": formatted_opname, - "base_opname": opcode_info["base_opname"], - "is_specialized": opcode_info["is_specialized"], - "line": lineno, - "column": col_offset if col_offset >= 0 else None, - "function": funcname, - "duration": end_time - start_time, - }) + self.threads[tid]["_spill"].append_marker( + name_idx, start_time, end_time, 1, CATEGORY_OPCODES, { + "type": "Opcode", + "opcode": opcode, + "opname": formatted_opname, + "base_opname": opcode_info["base_opname"], + "is_specialized": opcode_info["is_specialized"], + "line": lineno, + "column": col_offset if col_offset >= 0 else None, + "function": funcname, + "duration": end_time - start_time, + } + ) def _process_stack(self, thread_data, frames): """Process a stack and return the stack index.""" @@ -742,8 +750,7 @@ def spin(): os.unlink(temp_path) except FileNotFoundError: pass - if self.spill_dir is not None: - self.spill_dir.cleanup() + self._cleanup_spills() print(f"Gecko profile written to {filename}") print( @@ -784,8 +791,7 @@ def _build_profile(self): return json.loads(file.getvalue()) finally: self.exported = True - if self.spill_dir is not None: - self.spill_dir.cleanup() + self._cleanup_spills() def _profile_head(self): return { @@ -833,34 +839,38 @@ def _prepare_for_serialization(self): if self.exported: raise RuntimeError("GeckoCollector has already been exported") self._finalize_markers() - for spill in self.thread_spills.values(): - spill.prepare_read() for thread_data in self.threads.values(): + thread_data["_spill"].prepare_read() thread_data["stackTable"]["length"] = len(thread_data["stackTable"]["frame"]) thread_data["frameTable"]["length"] = len(thread_data["frameTable"]["func"]) thread_data["funcTable"]["length"] = len(thread_data["funcTable"]["name"]) thread_data["resourceTable"]["length"] = len(thread_data["resourceTable"]["name"]) + def _cleanup_spills(self): + if self.spill_dir is not None: + self.spill_dir.cleanup() + self.spill_dir = None + def _stream_profile(self, file): - head = json.dumps( - self._profile_head(), separators=(",", ":"), allow_nan=False - )[1:-1] - tail = json.dumps( - self._profile_tail(), separators=(",", ":"), allow_nan=False - )[1:-1] file.write("{") - file.write(head) - file.write(',"threads":[') + first = True + for key, value in self._profile_head().items(): + first = _write_json_member(file, key, value, first) + + first = _write_member_name(file, "threads", first) + file.write("[") for index, (tid, thread_data) in enumerate(self.threads.items()): if index: file.write(",") self._stream_thread(file, tid, thread_data) - file.write("],") - file.write(tail) + file.write("]") + + for key, value in self._profile_tail().items(): + first = _write_json_member(file, key, value, first) file.write("}") def _stream_thread(self, file, tid, thread_data): - spill = self.thread_spills[tid] + spill = thread_data["_spill"] metadata = { "name": thread_data["name"], "isMainThread": thread_data["isMainThread"], @@ -875,8 +885,11 @@ def _stream_thread(self, file, tid, thread_data): "processName": thread_data["processName"], } file.write("{") - file.write(json.dumps(metadata, separators=(",", ":"), allow_nan=False)[1:-1]) - file.write(',"samples":') + first = True + for key, value in metadata.items(): + first = _write_json_member(file, key, value, first) + + first = _write_member_name(file, "samples", first) self._stream_samples(file, spill) for key in ( "stackTable", @@ -885,66 +898,74 @@ def _stream_thread(self, file, tid, thread_data): "resourceTable", "nativeSymbols", ): - file.write(',"') - file.write(key) - file.write('":') - file.write(json.dumps( - thread_data[key], separators=(",", ":"), allow_nan=False - )) - file.write(',"markers":') + first = _write_json_member(file, key, thread_data[key], first) + first = _write_member_name(file, "markers", first) self._stream_markers(file, spill) file.write("}") def _stream_samples(self, file, spill): - file.write('{"stack":') - _stream_array( - file, spill.samples_stack.iter_tokens(), spill.sample_count - ) - file.write(',"time":') - _stream_array( - file, spill.samples_time.iter_tokens(), spill.sample_count - ) - file.write(',"eventDelay":') - _stream_array( + _stream_column_table( file, - ("null" for _ in range(spill.sample_count)), + ( + ("stack", spill.samples_stack.iter_tokens()), + ("time", spill.samples_time.iter_tokens()), + ("eventDelay", ("null" for _ in range(spill.sample_count))), + ), spill.sample_count, + ( + ("weight", None), + ("weightType", "samples"), + ("length", spill.sample_count), + ), ) - file.write(',"weight":null,"weightType":"samples","length":') - file.write(repr(spill.sample_count)) - file.write("}") def _stream_markers(self, file, spill): - file.write('{"data":') - _stream_array( - file, spill.markers_data.iter_tokens(), spill.marker_count - ) - file.write(',"name":') - _stream_array( - file, spill.markers_name.iter_tokens(), spill.marker_count - ) - file.write(',"startTime":') - _stream_array( - file, spill.markers_start_time.iter_tokens(), spill.marker_count - ) - file.write(',"endTime":') - _stream_array( - file, spill.markers_end_time.iter_tokens(), spill.marker_count - ) - file.write(',"phase":') - _stream_array( - file, spill.markers_phase.iter_tokens(), spill.marker_count - ) - file.write(',"category":') - _stream_array( - file, spill.markers_category.iter_tokens(), spill.marker_count + _stream_column_table( + file, + ( + ("data", spill.markers_data.iter_tokens()), + ("name", spill.markers_name.iter_tokens()), + ("startTime", spill.markers_start_time.iter_tokens()), + ("endTime", spill.markers_end_time.iter_tokens()), + ("phase", spill.markers_phase.iter_tokens()), + ("category", spill.markers_category.iter_tokens()), + ), + spill.marker_count, + (("length", spill.marker_count),), ) - file.write(',"length":') - file.write(repr(spill.marker_count)) - file.write("}") -def _stream_array(file, token_iter, expected_count): +def _write_json(file, value): + for chunk in _JSON_ENCODER.iterencode(value): + file.write(chunk) + + +def _write_member_name(file, name, first): + if not first: + file.write(",") + _write_json(file, name) + file.write(":") + return False + + +def _write_json_member(file, name, value, first): + first = _write_member_name(file, name, first) + _write_json(file, value) + return first + + +def _stream_column_table(file, columns, expected_count, trailing_members=()): + file.write("{") + first = True + for name, token_iter in columns: + first = _write_member_name(file, name, first) + _stream_array(file, token_iter, expected_count, name) + for name, value in trailing_members: + first = _write_json_member(file, name, value, first) + file.write("}") + + +def _stream_array(file, token_iter, expected_count, label="array"): file.write("[") count = 0 for token in token_iter: @@ -954,6 +975,6 @@ def _stream_array(file, token_iter, expected_count): count += 1 if count != expected_count: raise RuntimeError( - f"streamed {count} array items, expected {expected_count}" + f"streamed {count} {label} items, expected {expected_count}" ) file.write("]") From 2846e2521b5dbaf7296c9f84160ec2f03da1ecfb Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Sat, 6 Jun 2026 02:53:19 +0100 Subject: [PATCH 9/9] fixup! test --- .../test_sampling_profiler/test_collectors.py | 206 ++++++++++++++---- 1 file changed, 168 insertions(+), 38 deletions(-) diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py index 9fe0d8f1bb66ff1..1ab31af67fec522 100644 --- a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py +++ b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py @@ -11,6 +11,7 @@ try: import _remote_debugging # noqa: F401 + from profiling.sampling import gecko_collector from profiling.sampling.pstats_collector import PstatsCollector from profiling.sampling.stack_collector import ( CollapsedStackCollector, @@ -59,6 +60,42 @@ def find_child_by_name(children, strings, substr): return None +def export_gecko_profile(testcase, collector): + gecko_out = tempfile.NamedTemporaryFile(suffix=".json", delete=False) + testcase.addCleanup(close_and_unlink, gecko_out) + # We cannot overwrite an open file on Windows. + gecko_out.close() + + with captured_stdout(), captured_stderr(): + collector.export(gecko_out.name) + + testcase.assertGreater(os.path.getsize(gecko_out.name), 0) + with open(gecko_out.name, encoding="utf-8") as file: + return json.load(file) + + +def assert_gecko_column_lengths(testcase, table, columns): + expected = table["length"] + for column in columns: + testcase.assertEqual( + len(table[column]), expected, + f"{column!r} has wrong length", + ) + + +def gecko_marker_names(profile, markers): + string_array = profile["shared"]["stringArray"] + return [string_array[idx] for idx in markers["name"]] + + +def gecko_opcode_marker_data(profile): + markers = profile["threads"][0]["markers"] + return [ + data for data in markers["data"] + if data.get("type") == "Opcode" + ] + + class TestSampleProfilerComponents(unittest.TestCase): """Unit tests for individual profiler components.""" @@ -583,9 +620,10 @@ def test_gecko_collector_basic(self): # Verify samples samples = thread_data["samples"] - self.assertEqual(len(samples["stack"]), 1) - self.assertEqual(len(samples["time"]), 1) self.assertEqual(samples["length"], 1) + assert_gecko_column_lengths( + self, samples, ("stack", "time", "eventDelay") + ) # Verify function table structure and content func_table = thread_data["funcTable"] @@ -622,11 +660,6 @@ def test_gecko_collector_basic(self): @unittest.skipIf(is_emscripten, "threads not available") def test_gecko_collector_export(self): """Test Gecko profile export functionality.""" - gecko_out = tempfile.NamedTemporaryFile(suffix=".json", delete=False) - self.addCleanup(close_and_unlink, gecko_out) - # We cannot overwrite an open file on Windows. - gecko_out.close() - collector = GeckoCollector(1000) test_frames1 = [ @@ -659,17 +692,7 @@ def test_gecko_collector_export(self): collector.collect(test_frames2) collector.collect(test_frames3) - # Export gecko profile - with captured_stdout(), captured_stderr(): - collector.export(gecko_out.name) - - # Verify file was created and contains valid data - self.assertTrue(os.path.exists(gecko_out.name)) - self.assertGreater(os.path.getsize(gecko_out.name), 0) - - # Check file contains valid JSON - with open(gecko_out.name, "r") as f: - profile_data = json.load(f) + profile_data = export_gecko_profile(self, collector) # Should be valid Gecko profile format self.assertIn("meta", profile_data) @@ -690,6 +713,100 @@ def test_gecko_collector_export(self): self.assertIn("func2", string_array) self.assertIn("other_func", string_array) + thread_data = profile_data["threads"][0] + assert_gecko_column_lengths( + self, thread_data["samples"], ("stack", "time", "eventDelay") + ) + + @unittest.skipIf(is_emscripten, "threads not available") + def test_gecko_collector_export_after_spill_flush(self): + """Test Gecko profile export after spill buffers flush to disk.""" + old_buffer_bytes = gecko_collector.DEFAULT_SPILL_BUFFER_BYTES + gecko_collector.DEFAULT_SPILL_BUFFER_BYTES = 1 + self.addCleanup( + setattr, gecko_collector, "DEFAULT_SPILL_BUFFER_BYTES", + old_buffer_bytes + ) + + collector = GeckoCollector(1000) + test_frames = [ + MockInterpreterInfo( + 0, + [ + MockThreadInfo( + 1, + [MockFrameInfo("file.py", 10, "func")], + status=THREAD_STATUS_HAS_GIL, + ) + ], + ) + ] + collector.collect(test_frames, timestamps_us=[1000, 2000, 3000]) + + profile_data = export_gecko_profile(self, collector) + samples = profile_data["threads"][0]["samples"] + self.assertEqual(samples["length"], 3) + assert_gecko_column_lengths( + self, samples, ("stack", "time", "eventDelay") + ) + + @unittest.skipIf(is_emscripten, "threads not available") + def test_gecko_collector_rejects_collect_after_export(self): + collector = GeckoCollector(1000) + test_frames = [ + MockInterpreterInfo( + 0, + [ + MockThreadInfo( + 1, + [MockFrameInfo("file.py", 10, "func")], + status=THREAD_STATUS_HAS_GIL, + ) + ], + ) + ] + collector.collect(test_frames) + export_gecko_profile(self, collector) + + with self.assertRaisesRegex(RuntimeError, "after export"): + collector.collect(test_frames) + + @unittest.skipIf(is_emscripten, "threads not available") + def test_gecko_collector_export_failure_keeps_existing_file(self): + collector = GeckoCollector(1000) + test_frames = [ + MockInterpreterInfo( + 0, + [ + MockThreadInfo( + 1, + [MockFrameInfo("file.py", 10, "func")], + status=THREAD_STATUS_HAS_GIL, + ) + ], + ) + ] + collector.collect(test_frames) + + with tempfile.TemporaryDirectory() as temp_dir: + filename = os.path.join(temp_dir, "profile.json") + with open(filename, "w", encoding="utf-8") as file: + file.write("existing") + + before = set(os.listdir(temp_dir)) + + def fail(file): + raise OSError("boom") + + collector._stream_profile = fail + with captured_stdout(), captured_stderr(): + with self.assertRaisesRegex(OSError, "boom"): + collector.export(filename) + + with open(filename, encoding="utf-8") as file: + self.assertEqual(file.read(), "existing") + self.assertEqual(set(os.listdir(temp_dir)), before) + def test_gecko_collector_markers(self): """Test Gecko profile markers for GIL and CPU state tracking.""" collector = GeckoCollector(1000) @@ -773,21 +890,16 @@ def test_gecko_collector_markers(self): self.assertIn("markers", thread_data) markers = thread_data["markers"] - # Should have marker arrays - self.assertIn("name", markers) - self.assertIn("startTime", markers) - self.assertIn("endTime", markers) - self.assertIn("category", markers) self.assertGreater( markers["length"], 0, "Should have generated markers" ) - - # Get marker names from string table - string_array = profile_data["shared"]["stringArray"] - marker_names = [string_array[idx] for idx in markers["name"]] + assert_gecko_column_lengths( + self, markers, + ("data", "name", "startTime", "endTime", "phase", "category"), + ) # Verify we have different marker types - marker_name_set = set(marker_names) + marker_name_set = set(gecko_marker_names(profile_data, markers)) # Should have "Has GIL" markers (when thread had GIL) self.assertIn( @@ -2661,7 +2773,7 @@ def test_gecko_collector_opcodes_enabled(self): def test_gecko_opcode_state_tracking(self): """Test that GeckoCollector tracks opcode state changes.""" collector = GeckoCollector(sample_interval_usec=1000, opcodes=True) - self.addCleanup(lambda: collector.spill_dir.cleanup()) + self.addCleanup(collector._cleanup_spills) # First sample with opcode 90 (RAISE_VARARGS) frame1 = MockFrameInfo("test.py", 10, "func", opcode=90) @@ -2683,7 +2795,6 @@ def test_gecko_opcode_state_tracking(self): def test_gecko_opcode_state_change_emits_marker(self): """Test that opcode state change emits an interval marker.""" collector = GeckoCollector(sample_interval_usec=1000, opcodes=True) - self.addCleanup(lambda: collector.spill_dir.cleanup()) # First sample: opcode 90 frame1 = MockFrameInfo("test.py", 10, "func", opcode=90) @@ -2706,12 +2817,32 @@ def test_gecko_opcode_state_change_emits_marker(self): collector.collect(frames2) # Should have emitted a marker for the first opcode - self.assertGreater(collector.thread_spills[1].marker_count, 0) + profile = collector._build_profile() + markers = profile["threads"][0]["markers"] + assert_gecko_column_lengths( + self, markers, + ("data", "name", "startTime", "endTime", "phase", "category"), + ) + opcode_markers = gecko_opcode_marker_data(profile) + self.assertIn( + { + "opcode": 90, + "line": 10, + "function": "func", + }, + [ + { + "opcode": marker["opcode"], + "line": marker["line"], + "function": marker["function"], + } + for marker in opcode_markers + ], + ) def test_gecko_opcode_markers_not_emitted_when_disabled(self): """Test that no opcode markers when opcodes=False.""" collector = GeckoCollector(sample_interval_usec=1000, opcodes=False) - self.addCleanup(lambda: collector.spill_dir.cleanup()) frame1 = MockFrameInfo("test.py", 10, "func", opcode=90) frames1 = [ @@ -2731,13 +2862,13 @@ def test_gecko_opcode_markers_not_emitted_when_disabled(self): ] collector.collect(frames2) - # opcode_state should not be tracked - self.assertEqual(len(collector.opcode_state), 0) + profile = collector._build_profile() + self.assertEqual(gecko_opcode_marker_data(profile), []) + self.assertEqual(profile["meta"]["markerSchema"], []) def test_gecko_opcode_with_none_opcode(self): """Test that None opcode doesn't cause issues.""" collector = GeckoCollector(sample_interval_usec=1000, opcodes=True) - self.addCleanup(lambda: collector.spill_dir.cleanup()) # Frame with no opcode (None) frame = MockFrameInfo("test.py", 10, "func", opcode=None) @@ -2749,9 +2880,8 @@ def test_gecko_opcode_with_none_opcode(self): ] collector.collect(frames) - # Should track the state but opcode is None - self.assertIn(1, collector.opcode_state) - self.assertIsNone(collector.opcode_state[1][0]) + profile = collector._build_profile() + self.assertEqual(gecko_opcode_marker_data(profile), []) class TestCollectorFrameFormat(unittest.TestCase):