Skip to content

Commit d995922

Browse files
authored
gh-136396: Include instrumentation when creating new copies of the bytecode (#136525)
Previously, we assumed that instrumentation would happen for all copies of the bytecode if the instrumentation version on the code object didn't match the per-interpreter instrumentation version. That assumption was incorrect: instrumentation will exit early if there are no new "events," even if there is an instrumentation version mismatch. To fix this, include the instrumented opcodes when creating new copies of the bytecode, rather than replacing them with their uninstrumented variants. I don't think we have to worry about races between instrumentation and creating new copies of the bytecode: instrumentation and new bytecode creation cannot happen concurrently. Instrumentation requires that either the world is stopped or the code object's per-object lock is held and new bytecode creation requires holding the code object's per-object lock.
1 parent 3d8c38f commit d995922

File tree

3 files changed

+161
-1
lines changed

3 files changed

+161
-1
lines changed

Lib/test/test_free_threading/test_monitoring.py

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,12 @@
22
environment to verify things are thread-safe in a free-threaded build"""
33

44
import sys
5+
import threading
56
import time
67
import unittest
78
import weakref
89

10+
from contextlib import contextmanager
911
from sys import monitoring
1012
from test.support import threading_helper
1113
from threading import Thread, _PyRLock, Barrier
@@ -192,6 +194,16 @@ def during_threads(self):
192194
self.set = not self.set
193195

194196

197+
class TraceBuf:
198+
def __init__(self):
199+
self.traces = []
200+
self.traces_lock = threading.Lock()
201+
202+
def append(self, trace):
203+
with self.traces_lock:
204+
self.traces.append(trace)
205+
206+
195207
@threading_helper.requires_working_threading()
196208
class MonitoringMisc(MonitoringTestMixin, TestCase):
197209
def register_callback(self, barrier):
@@ -246,6 +258,135 @@ def f():
246258
finally:
247259
sys.settrace(None)
248260

261+
def test_toggle_setprofile_no_new_events(self):
262+
# gh-136396: Make sure that profile functions are called for newly
263+
# created threads when profiling is toggled but the set of monitoring
264+
# events doesn't change
265+
traces = []
266+
267+
def profiler(frame, event, arg):
268+
traces.append((frame.f_code.co_name, event, arg))
269+
270+
def a(x, y):
271+
return b(x, y)
272+
273+
def b(x, y):
274+
return max(x, y)
275+
276+
sys.setprofile(profiler)
277+
try:
278+
a(1, 2)
279+
finally:
280+
sys.setprofile(None)
281+
traces.clear()
282+
283+
def thread_main(x, y):
284+
sys.setprofile(profiler)
285+
try:
286+
a(x, y)
287+
finally:
288+
sys.setprofile(None)
289+
t = Thread(target=thread_main, args=(100, 200))
290+
t.start()
291+
t.join()
292+
293+
expected = [
294+
("a", "call", None),
295+
("b", "call", None),
296+
("b", "c_call", max),
297+
("b", "c_return", max),
298+
("b", "return", 200),
299+
("a", "return", 200),
300+
("thread_main", "c_call", sys.setprofile),
301+
]
302+
self.assertEqual(traces, expected)
303+
304+
def observe_threads(self, observer, buf):
305+
def in_child(ident):
306+
return ident
307+
308+
def child(ident):
309+
with observer():
310+
in_child(ident)
311+
312+
def in_parent(ident):
313+
return ident
314+
315+
def parent(barrier, ident):
316+
barrier.wait()
317+
with observer():
318+
t = Thread(target=child, args=(ident,))
319+
t.start()
320+
t.join()
321+
in_parent(ident)
322+
323+
num_threads = 5
324+
barrier = Barrier(num_threads)
325+
threads = []
326+
for i in range(num_threads):
327+
t = Thread(target=parent, args=(barrier, i))
328+
t.start()
329+
threads.append(t)
330+
for t in threads:
331+
t.join()
332+
333+
for i in range(num_threads):
334+
self.assertIn(("in_parent", "return", i), buf.traces)
335+
self.assertIn(("in_child", "return", i), buf.traces)
336+
337+
def test_profile_threads(self):
338+
buf = TraceBuf()
339+
340+
def profiler(frame, event, arg):
341+
buf.append((frame.f_code.co_name, event, arg))
342+
343+
@contextmanager
344+
def profile():
345+
sys.setprofile(profiler)
346+
try:
347+
yield
348+
finally:
349+
sys.setprofile(None)
350+
351+
self.observe_threads(profile, buf)
352+
353+
def test_trace_threads(self):
354+
buf = TraceBuf()
355+
356+
def tracer(frame, event, arg):
357+
buf.append((frame.f_code.co_name, event, arg))
358+
return tracer
359+
360+
@contextmanager
361+
def trace():
362+
sys.settrace(tracer)
363+
try:
364+
yield
365+
finally:
366+
sys.settrace(None)
367+
368+
self.observe_threads(trace, buf)
369+
370+
def test_monitor_threads(self):
371+
buf = TraceBuf()
372+
373+
def monitor_py_return(code, off, retval):
374+
buf.append((code.co_name, "return", retval))
375+
376+
monitoring.register_callback(
377+
self.tool_id, monitoring.events.PY_RETURN, monitor_py_return
378+
)
379+
380+
monitoring.set_events(
381+
self.tool_id, monitoring.events.PY_RETURN
382+
)
383+
384+
@contextmanager
385+
def noop():
386+
yield
387+
388+
self.observe_threads(noop, buf)
389+
249390

250391
if __name__ == "__main__":
251392
unittest.main()
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix issue where per-thread bytecode was not instrumented for newly created
2+
threads.

Objects/codeobject.c

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3330,12 +3330,29 @@ _PyCodeArray_New(Py_ssize_t size)
33303330
return arr;
33313331
}
33323332

3333+
// Get the underlying code unit, leaving instrumentation
3334+
static _Py_CODEUNIT
3335+
deopt_code_unit(PyCodeObject *code, int i)
3336+
{
3337+
_Py_CODEUNIT *src_instr = _PyCode_CODE(code) + i;
3338+
_Py_CODEUNIT inst = {
3339+
.cache = FT_ATOMIC_LOAD_UINT16_RELAXED(*(uint16_t *)src_instr)};
3340+
int opcode = inst.op.code;
3341+
if (opcode < MIN_INSTRUMENTED_OPCODE) {
3342+
inst.op.code = _PyOpcode_Deopt[opcode];
3343+
assert(inst.op.code < MIN_SPECIALIZED_OPCODE);
3344+
}
3345+
// JIT should not be enabled with free-threading
3346+
assert(inst.op.code != ENTER_EXECUTOR);
3347+
return inst;
3348+
}
3349+
33333350
static void
33343351
copy_code(_Py_CODEUNIT *dst, PyCodeObject *co)
33353352
{
33363353
int code_len = (int) Py_SIZE(co);
33373354
for (int i = 0; i < code_len; i += _PyInstruction_GetLength(co, i)) {
3338-
dst[i] = _Py_GetBaseCodeUnit(co, i);
3355+
dst[i] = deopt_code_unit(co, i);
33393356
}
33403357
_PyCode_Quicken(dst, code_len, 1);
33413358
}

0 commit comments

Comments
 (0)